Explorar el Código

Corrected mixed set 0/1 and did some code cleaning.

Kristian Schultz hace 4 años
padre
commit
8c9e832def
Se han modificado 2 ficheros con 170 adiciones y 192 borrados
  1. 42 50
      LoGAN.py
  2. 128 142
      LoGAN_v0.3.ipynb

+ 42 - 50
LoGAN.py

@@ -1,14 +1,11 @@
 import math
 import numpy as np
-import pandas as pd
 from tqdm import tqdm
 from keras.layers import Dense, Dropout, Input
 from keras.models import Model,Sequential
 from keras.layers.advanced_activations import LeakyReLU
 from keras.optimizers import Adam
 from sklearn.neighbors import NearestNeighbors
-from sklearn.manifold import TSNE
-from imblearn.datasets import fetch_datasets
 
 def adam_optimizer():
     return Adam(lr=0.0002, beta_1=0.5)
@@ -85,7 +82,8 @@ class GAN:
     Class for GAN.
     """
 
-    def __init__(self, n_feat=1, noise=None):
+    def __init__(self, n_feat=1, noise=None,
+                 discriminatorMin=None, discriminatorMax=None, generator=None):
         self.n_feat = n_feat
 
         if noise is None:
@@ -93,10 +91,10 @@ class GAN:
         else:
             self.noise = noise
 
-        self.gan = self.create_gan(
-            self.create_discriminator_min(),
-            self.create_discriminator_maj(),
-            self.create_generator())
+        self.create_gan(
+            discriminatorMin or self.create_discriminator_min(),
+            discriminatorMax or self.create_discriminator_maj(),
+            generator or self.create_generator())
 
 
     def create_generator(self):
@@ -158,17 +156,20 @@ class GAN:
         return discriminator
 
     def create_gan(self, discriminator_min, discriminator_maj, generator):
-        discriminator_min.trainable=False
-        discriminator_maj.trainable=False
+        discriminator_min.trainable = False
+        discriminator_maj.trainable = False
         gan_input = Input(shape=(self.n_feat,))
         x = generator(gan_input)
         gan_output_min= discriminator_min(x)
         gan_output_maj= discriminator_maj(x)
         gan = Model(inputs=gan_input, outputs=[gan_output_min,gan_output_maj])
         gan.compile(loss=['binary_crossentropy','binary_crossentropy'], optimizer='adam')
+
+        # store the parts for later usage.
         self.generator = generator
         self.discriminator_min = discriminator_min
         self.discriminator_maj = discriminator_maj
+        self.gan = gan
         return gan
 
     def train(self, parameters):
@@ -232,6 +233,35 @@ class GAN:
 
 
 
+class DataSet:
+    """
+    Stores data and Labels.
+    """
+    def __init__(self, data=None, labels=None, data0=None, data1=None):
+        if data is None:
+            self.fromData01(data0, data1)
+        else:
+            if labels is None:
+                raise "expected labels to be a numpy.array"
+            else:
+                self.data = data
+                self.labels = labels
+
+    def fromData01(self, data0=None, data1=None):
+        if data0 is None and data1 is None:
+            raise "Expected data, data0 or data1 to be a numpy.array"
+        elif data0 is None:
+            self.data = data1
+            self.labels = np.zeros(len(data1)) + 1
+        elif data1 is None:
+            self.data = data0
+            self.labels = np.zeros(len(data0))
+        else:
+            self.data = np.concatenate((data1, data0))
+            self.labels = np.concatenate(( np.zeros(len(data1)) + 1, np.zeros(len(data0)) ))
+
+
+
 class TrainTestData:
     """
     Stores features, data and labels for class 0 and class 1.
@@ -244,8 +274,8 @@ class TrainTestData:
         self.features_0_trn, self.features_0_tst = self.splitUpData(features0, trainFactor)
         self.features_1_trn, self.features_1_tst = self.splitUpData(features1, trainFactor)
 
-        self.testData, self.testLabels = self.joinData(self.features_1_tst, self.features_0_tst)
-        self.trainData, self.trainLabels = self.joinData(self.features_1_trn, self.features_0_trn)
+        self.test = DataSet(data1=self.features_1_tst, data0=self.features_0_tst)
+        self.train = DataSet(data1=self.features_1_trn, data0=self.features_0_trn)
 
     def splitUpData(self, data, trainFactor=0.9):
         size = len(data)
@@ -253,41 +283,3 @@ class TrainTestData:
         trn = data[list(range(0, trainSize))]
         tst = data[list(range(trainSize, size))]
         return trn, tst
-
-    def joinData(self, data0, data1):
-        data = np.concatenate((data1, data0))
-        labels = np.concatenate(( np.zeros(len(data1)) + 1, np.zeros(len(data0)) ))
-        return data, labels
-
-
-
-if __name__ == "__main__":
-    def createTrainParameters():
-        data = fetch_datasets()['yeast_me2']
-        labels = data.target
-        features = data.data
-        label_1 = list(np.where(labels == 1)[0])
-        label_0 = list(np.where(labels == -1)[0])
-        features_1 = features[label_1]
-        features_0 = features[label_0]
-        features_1_trn = features_1[list(range(0,math.ceil(len(features_1)*2/3)))]
-        data_embedded_min = TSNE(perplexity=.1).fit_transform(features_1_trn)
-
-        result_min = pd.DataFrame(data=data_embedded_min, columns=['t-SNE0', 't-SNE1'])
-        min_t = np.asmatrix(result_min)
-        min_t = min_t[0:len(features_1_trn)]
-        min_t = min_t[:, [0,1]]
-
-        return GanTrainParameters(
-            n_feat=len(features[1]),
-            batch_size=30,
-            min_t=min_t,
-            features_0_trn=features_0[list(range(0,math.ceil(len(features_0)*2/3)))],
-            features_1_trn=features_1_trn
-            )
-
-
-    gtp = createTrainParameters()
-
-    cGan = GAN(n_feat=gtp.n_feat)
-    cGan.train(parameters=gtp)

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 128 - 142
LoGAN_v0.3.ipynb


Algunos archivos no se mostraron porque demasiados archivos cambiaron en este cambio