Prechádzať zdrojové kódy

Now using pipelines for inner training loop.

Kristian Schultz 3 rokov pred
rodič
commit
40ac3848a1
1 zmenil súbory, kde vykonal 85 pridanie a 29 odobranie
  1. 85 29
      library/generators/NextConvGeN.py

+ 85 - 29
library/generators/NextConvGeN.py

@@ -104,7 +104,7 @@ class NextConvGeN(GanBaseClass):
             print(self.cg.summary())
             print('\n')
 
-    def train(self, data, discTrainCount=5, batchSize=8):
+    def train(self, data, discTrainCount=5, batchSize=32):
         """
         Trains the Network.
 
@@ -265,28 +265,43 @@ class NextConvGeN(GanBaseClass):
         ## trained discriminator network.
         discriminator.trainable = False
 
+        # Shape of data:  (batchSize, 2, gen, n_feat)
+        # Shape of labels: (batchSize, 2 * gen, 2) 
+
         ## input receives a neighbourhood minority batch
         ## and a proximal majority batch concatenated
         batch_data = Input(shape=(2, self.gen, self.n_feat,))
+        # batch_data: (batchSize, 2, gen, n_feat)
         
         ## extract minority batch
         min_batch = Lambda(lambda x: x[:, 0, : ,:], name="SplitForGen")(batch_data)
+        # min_batch: (batchSize, gen, n_feat)
         
         ## extract majority batch
         maj_batch = Lambda(lambda x: x[:, 1, :, :], name="SplitForDisc")(batch_data)
+        # maj_batch: (batchSize, gen, n_feat)
         maj_batch = tf.reshape(maj_batch, (-1, self.n_feat), name="ReshapeForDisc")
+        # maj_batch: (batchSize * gen, n_feat)
         
         ## pass minority batch into generator to obtain convex space transformation
         ## (synthetic samples) of the minority neighbourhood input batch
         conv_samples = generator(min_batch)
+        # conv_batch: (batchSize, gen, n_feat)
         conv_samples = tf.reshape(conv_samples, (-1, self.n_feat), name="ReshapeGenOutput")
+        # conv_batch: (batchSize * gen, n_feat)
 
         ## pass samples into the discriminator to know its decisions
         conv_samples = discriminator(conv_samples)
+        conv_samples = tf.reshape(conv_samples, (-1, self.gen, 2), name="ReshapeGenDiscOutput")
+        # conv_batch: (batchSize * gen, 2)
+
         maj_batch = discriminator(maj_batch)
+        maj_batch = tf.reshape(maj_batch, (-1, self.gen, 2), name="ReshapeGenDiscOutput")
+        # conv_batch: (batchSize * gen, 2)
         
         ## concatenate the decisions
-        output = tf.concat([conv_samples, maj_batch],axis=0)
+        output = tf.concat([conv_samples, maj_batch],axis=1)
+        # output: (batchSize, 2 * gen, 2)
         
         ## note that, the discriminator will not be traied but will make decisions based
         ## on its previous training while using this function
@@ -318,7 +333,7 @@ class NextConvGeN(GanBaseClass):
 
 
     # Training
-    def _rough_learning(self, data, discTrainCount, batchSize=8):
+    def _rough_learning(self, data, discTrainCount, batchSize=32):
         generator = self.conv_sample_generator
         discriminator = self.maj_min_discriminator
         convGeN = self.cg
@@ -329,8 +344,7 @@ class NextConvGeN(GanBaseClass):
         labels = np.array(create01Labels(nLabels, self.gen))
         labelsGeN = np.array([labels])
         
-        
-        def createSamples(min_idx):
+        def indexToBatches(min_idx):
             self.timing["NBH"].start()
             ## generate minority neighbourhood batch for every minority class sampls by index
             min_batch_indices = shuffle(self.nmbMin.neighbourhoodOfItem(min_idx))
@@ -340,6 +354,11 @@ class NextConvGeN(GanBaseClass):
             maj_batch = self._BMB(min_batch_indices)
             self.timing["NBH"].stop()
 
+            return (min_batch, maj_batch)
+
+        def createSamples(min_idx):
+            min_batch, maj_batch = indexToBatches(min_idx)
+
             self.timing["GenSamples"].start()
             ## generate synthetic samples from convex space
             ## of minority neighbourhood batch using generator
@@ -353,11 +372,11 @@ class NextConvGeN(GanBaseClass):
             self.timing["FixType"].stop()
 
             ## concatenate them with the majority batch
-            conv_samples = [conv_samples, maj_batch]
+            conv_samples = [conv_samples, maj_batch, min_batch]
             return conv_samples
 
         def trainDiscriminator(samples):
-            concat_samples = tf.concat(samples, axis=0)
+            concat_samples = tf.concat([samples[0], samples[1]], axis=0)
             self.timing["Fit"].start()
             ## switch on discriminator training
             discriminator.trainable = True
@@ -366,30 +385,40 @@ class NextConvGeN(GanBaseClass):
             ## switch off the discriminator training again
             discriminator.trainable = False
             self.timing["Fit"].stop()
+        
+        def genSamplesForDisc():
+            for min_idx in range(minSetSize):
+                yield createSamples(min_idx)
 
-        def genSamples():
+        def genSamplesForGeN():
             for min_idx in range(minSetSize):
-                samples = createSamples(min_idx)
-                for x in samples[0]:
-                    yield x
+                yield indexToBatches(min_idx)
 
-                for x in samples[1]:
-                    yield x
+        def unbatch(indices, rows):
+            def fn():
+                for arr in rows:
+                    for i in indices:
+                        for x in arr[i]:
+                            yield x
+            return fn
 
         def genLabels():
             for min_idx in range(minSetSize):
                 for x in labels:
                     yield x
-            
+        
+        padd = np.zeros((self.gen - self.neb, self.n_feat))
                 
 
         for neb_epoch_count in range(self.neb_epochs):
-            for n in range(max(0,discTrainCount)):
+            shape = (self.gen, self.n_feat)
+
+            for n in range(1 + max(0,discTrainCount)):
                 self.progressBar([(neb_epoch_count + 1) / self.neb_epochs, n / discTrainCount, 0.5])
-                samples = genSamples()
-                
-                a = tf.data.Dataset.from_generator(genSamples, output_types=tf.float32)
-                b = tf.data.Dataset.from_generator(genLabels, output_types=tf.float32)
+
+                a = tf.data.Dataset.from_generator(genSamplesForDisc, output_types=tf.float32)
+                a = tf.data.Dataset.from_generator(unbatch([0,1], a), output_types=tf.float32)
+                b = tf.data.Dataset.from_tensor_slices(labels).repeat()
                 samples = tf.data.Dataset.zip((a, b)).batch(batchSize * 2 * self.gen)
 
                 self.timing["Fit"].start()
@@ -401,19 +430,46 @@ class NextConvGeN(GanBaseClass):
                 discriminator.trainable = False
                 self.timing["Fit"].stop()
 
-            for min_idx in range(minSetSize):
-                self.progressBar([(neb_epoch_count + 1) / self.neb_epochs, 1.0, (min_idx + 1) / minSetSize])
+            # for min_idx in range(minSetSize):
+            #     self.progressBar([(neb_epoch_count + 1) / self.neb_epochs, 1.0, (min_idx + 1) / minSetSize])
+
+            #     samples = createSamples(min_idx)
+            #     trainDiscriminator(samples)
+
+            #     ## use the complete network to make the generator learn on the decisions
+            #     ## made by the previous discriminator training
+            #     samples = np.array([[samples[2], samples[1]]])
+            #     gen_loss_history = convGeN.fit(samples, y=labelsGeN, verbose=0, batch_size=nLabels)
+
+            #     ## store the loss for the step
+            #     loss_history.append(gen_loss_history.history['loss'])
+
+
+            # <<<<<<<<<<
+            src = tf.data.Dataset.from_generator(genSamplesForGeN, output_types=tf.float32)
+            #a = tf.data.Dataset.from_generator(unbatch([0,1], src), output_types=tf.float32)
+            #b = tf.data.Dataset.from_tensor_slices(labels).repeat()
+            #samples = tf.data.Dataset.zip((a, b)).batch(batchSize * 2 * self.gen)
+
+            #self.timing["Fit"].start()
+            ### switch on discriminator training
+            #discriminator.trainable = True
+            ### train the discriminator with the concatenated samples and the one-hot encoded labels
+            #discriminator.fit(x=samples, verbose=0)
+            ### switch off the discriminator training again
+            #discriminator.trainable = False
+            #self.timing["Fit"].stop()
 
-                samples = createSamples(min_idx)
-                trainDiscriminator(samples)
+            ## use the complete network to make the generator learn on the decisions
+            ## made by the previous discriminator training
+            a = src.map(lambda x: [[tf.concat([x[0], padd], axis=0), x[1]]])
+            b = tf.data.Dataset.from_tensor_slices(labelsGeN).repeat()
+            samples = tf.data.Dataset.zip((a, b)).batch(batchSize)
 
-                ## use the complete network to make the generator learn on the decisions
-                ## made by the previous discriminator training
-                samples = np.array([samples])
-                gen_loss_history = convGeN.fit(samples, y=labelsGeN, verbose=0, batch_size=nLabels)
+            gen_loss_history = convGeN.fit(samples, verbose=0, batch_size=batchSize)
+            loss_history.append(gen_loss_history.history['loss'])
+            # >>>>>>>>>>
 
-                ## store the loss for the step
-                loss_history.append(gen_loss_history.history['loss'])
 
         if self.debug:
             run_range = range(1, len(loss_history) + 1)