Jelajahi Sumber

Style cleanup.

Kristian Schultz 4 tahun lalu
induk
melakukan
31f274beed
1 mengubah file dengan 52 tambahan dan 53 penghapusan
  1. 52 53
      library/convGAN.py

+ 52 - 53
library/convGAN.py

@@ -28,6 +28,16 @@ from tensorflow.keras.layers import Lambda
 import warnings
 import warnings
 warnings.filterwarnings("ignore")
 warnings.filterwarnings("ignore")
 
 
+
+
+def repeat(x, times):
+    return [x for _i in range(times)]
+
+def create01Labels(totalSize, sizeFirstHalf):
+    labels = repeat(np.array([1,0]), sizeFirstHalf)
+    labels.extend(repeat(np.array([0,1]), totalSize))
+    return np.array(labels)
+
 class ConvGAN(GanBaseClass):
 class ConvGAN(GanBaseClass):
     """
     """
     This is a toy example of a GAN.
     This is a toy example of a GAN.
@@ -247,19 +257,11 @@ class ConvGAN(GanBaseClass):
         generator = self.conv_sample_generator
         generator = self.conv_sample_generator
         discriminator = self.maj_min_discriminator
         discriminator = self.maj_min_discriminator
         GAN = self.cg
         GAN = self.cg
-        loss_history=[] ## this is for stroring the loss for every run
+        loss_history = [] ## this is for stroring the loss for every run
         min_idx = 0
         min_idx = 0
         neb_epoch_count = 1
         neb_epoch_count = 1
 
 
-        labels = []
-        for i in range(2 * self.gen):
-            if i < self.gen:
-                labels.append(np.array([1,0]))
-            else:
-                labels.append(np.array([0,1]))
-        labels = np.array(labels)
-        labels = tf.convert_to_tensor(labels)
-
+        labels = tf.convert_to_tensor(create01Labels(2 * self.gen, self.gen))
 
 
         for step in range(neb_epochs * len(data_min)):
         for step in range(neb_epochs * len(data_min)):
             ## generate minority neighbourhood batch for every minority class sampls by index
             ## generate minority neighbourhood batch for every minority class sampls by index
@@ -365,18 +367,20 @@ class ConvGAN(GanBaseClass):
 
 
 
 
 def rough_learning_predictions(discriminator,test_data_numpy,test_labels_numpy):
 def rough_learning_predictions(discriminator,test_data_numpy,test_labels_numpy):
-
-    ## after the first phase of training the discriminator can be used for classification
-    ## it already learns to differentiate the convex minority points with majority points during the first training phase
-    y_pred_2d=discriminator.predict(tf.convert_to_tensor(test_data_numpy))
+    """
+    after the first phase of training the discriminator can be used for classification
+    it already learns to differentiate the convex minority points with majority points
+    during the first training phase
+    """
+    y_pred_2d = discriminator.predict(tf.convert_to_tensor(test_data_numpy))
     ## discretisation of the labels
     ## discretisation of the labels
-    y_pred=np.digitize(y_pred_2d[:,0], [.5])
+    y_pred = np.digitize(y_pred_2d[:,0], [.5])
     ## prediction shows a model with good recall and less precision
     ## prediction shows a model with good recall and less precision
-    c=confusion_matrix(test_labels_numpy, y_pred)
-    f=f1_score(test_labels_numpy, y_pred)
-    pr=precision_score(test_labels_numpy, y_pred)
-    rc=recall_score(test_labels_numpy, y_pred)
-    k=cohen_kappa_score(test_labels_numpy, y_pred)
+    c = confusion_matrix(test_labels_numpy, y_pred)
+    f = f1_score(test_labels_numpy, y_pred)
+    pr = precision_score(test_labels_numpy, y_pred)
+    rc = recall_score(test_labels_numpy, y_pred)
+    k = cohen_kappa_score(test_labels_numpy, y_pred)
     print('Rough learning confusion matrix:', c)
     print('Rough learning confusion matrix:', c)
     print('Rough learning f1 score', f)
     print('Rough learning f1 score', f)
     print('Rough learning precision score', pr)
     print('Rough learning precision score', pr)
@@ -386,38 +390,35 @@ def rough_learning_predictions(discriminator,test_data_numpy,test_labels_numpy):
 
 
 
 
 
 
-
 def generate_synthetic_data(gan, data_min, data_maj):
 def generate_synthetic_data(gan, data_min, data_maj):
-    ## roughly claculate the upper bound of the synthetic samples to be generated from each neighbourhood
-    synth_num=((len(data_maj)-len(data_min))//len(data_min))+1
+    ## roughly claculate the upper bound of the synthetic samples
+    ## to be generated from each neighbourhood
+    synth_num = ((len(data_maj) - len(data_min)) // len(data_min)) + 1
 
 
     ## generate synth_num synthetic samples from each minority neighbourhood
     ## generate synth_num synthetic samples from each minority neighbourhood
     synth_set = gan.generateData(synth_num)
     synth_set = gan.generateData(synth_num)
 
 
-    ovs_min_class=np.concatenate((data_min,synth_set),axis=0)
-    ovs_training_dataset=np.concatenate((ovs_min_class,data_maj),axis=0)
-    ovs_pca_labels=np.concatenate((np.zeros(len(data_min)),np.zeros(len(synth_set))+1,np.zeros(len(data_maj))+2))
-    # TODO ovs_training_labels=np.concatenate((np.zeros(len(ovs_min_class))+1,np.zeros(len(data_maj))+0))
-    ovs_training_labels_oh=[]
-    for i in range(len(ovs_training_dataset)):
-        if i<len(ovs_min_class):
-            ovs_training_labels_oh.append(np.array([1,0]))
-        else:
-            ovs_training_labels_oh.append(np.array([0,1]))
-    ovs_training_labels_oh=np.array(ovs_training_labels_oh)
-    ovs_training_labels_oh=tf.convert_to_tensor(ovs_training_labels_oh)
-
+    ovs_min_class = np.concatenate((data_min,synth_set), axis=0)
+    ovs_training_dataset = np.concatenate((ovs_min_class,data_maj), axis=0)
+    ovs_pca_labels = np.concatenate((
+        np.zeros(len(data_min)),
+        np.zeros(len(synth_set)) + 1,
+        np.zeros(len(data_maj)) + 2
+        ))
+    
+    ovs_training_labels_oh = create01Labels(len(ovs_training_dataset), len(ovs_min_class))
+    ovs_training_labels_oh = tf.convert_to_tensor(ovs_training_labels_oh)
 
 
     ## PCA visualization of the synthetic sata
     ## PCA visualization of the synthetic sata
-    ## observe how the minority samples from convex space have optimal variance and avoids overlap with the majority
+    ## observe how the minority samples from convex space have optimal variance
+    ## and avoids overlap with the majority
     pca = PCA(n_components=2)
     pca = PCA(n_components=2)
     pca.fit(ovs_training_dataset)
     pca.fit(ovs_training_dataset)
-    data_pca= pca.transform(ovs_training_dataset)
+    data_pca = pca.transform(ovs_training_dataset)
 
 
     ## plot PCA
     ## plot PCA
     plt.rcParams["figure.figsize"] = (12,12)
     plt.rcParams["figure.figsize"] = (12,12)
 
 
-    # TODO colors=['r', 'b', 'g']
     plt.xticks(fontsize=20)
     plt.xticks(fontsize=20)
     plt.yticks(fontsize=20)
     plt.yticks(fontsize=20)
     plt.xlabel('PCA1',fontsize=25)
     plt.xlabel('PCA1',fontsize=25)
@@ -439,11 +440,11 @@ def final_learning(discriminator, ovs_training_dataset, ovs_training_labels_oh,
     print('\n')
     print('\n')
     ## second phase training of the discriminator with balanced data
     ## second phase training of the discriminator with balanced data
 
 
-    history_second_learning=discriminator.fit(x=ovs_training_dataset,y=ovs_training_labels_oh, batch_size=20, epochs=num_epochs)
+    history_second_learning = discriminator.fit(x=ovs_training_dataset, y=ovs_training_labels_oh, batch_size=20, epochs=num_epochs)
 
 
     ## loss of the second phase learning smoothly decreses
     ## loss of the second phase learning smoothly decreses
     ## this is because now the data is fixed and diverse convex combinations are no longer fed into the discriminator at every training step
     ## this is because now the data is fixed and diverse convex combinations are no longer fed into the discriminator at every training step
-    run_range=range(1,num_epochs+1)
+    run_range = range(1, num_epochs + 1)
     plt.rcParams["figure.figsize"] = (16,10)
     plt.rcParams["figure.figsize"] = (16,10)
     plt.xticks(fontsize=20)
     plt.xticks(fontsize=20)
     plt.yticks(fontsize=20)
     plt.yticks(fontsize=20)
@@ -458,22 +459,22 @@ def final_learning(discriminator, ovs_training_dataset, ovs_training_labels_oh,
     ## the recall decreases but the precision improves
     ## the recall decreases but the precision improves
     print('\n')
     print('\n')
 
 
-    y_pred_2d=discriminator.predict(tf.convert_to_tensor(test_data_numpy))
-    y_pred=np.digitize(y_pred_2d[:,0], [.5])
-    c=confusion_matrix(test_labels_numpy, y_pred)
-    f=f1_score(test_labels_numpy, y_pred)
-    pr=precision_score(test_labels_numpy, y_pred)
-    rc=recall_score(test_labels_numpy, y_pred)
-    k=cohen_kappa_score(test_labels_numpy, y_pred)
+    y_pred_2d = discriminator.predict(tf.convert_to_tensor(test_data_numpy))
+    y_pred = np.digitize(y_pred_2d[:,0], [.5])
+    c = confusion_matrix(test_labels_numpy, y_pred)
+    f = f1_score(test_labels_numpy, y_pred)
+    pr = precision_score(test_labels_numpy, y_pred)
+    rc = recall_score(test_labels_numpy, y_pred)
+    k = cohen_kappa_score(test_labels_numpy, y_pred)
     print('Final learning confusion matrix:', c)
     print('Final learning confusion matrix:', c)
     print('Final learning f1 score', f)
     print('Final learning f1 score', f)
     print('Final learning precision score', pr)
     print('Final learning precision score', pr)
     print('Final learning recall score', rc)
     print('Final learning recall score', rc)
     print('Final learning kappa score', k)
     print('Final learning kappa score', k)
-    return c,f,pr,rc,k
+    return c, f, pr, rc, k
 
 
 
 
-def convGAN_train_end_to_end(training_data,training_labels,test_data,test_labels, neb, gen, neb_epochs,epochs_retrain_disc):
+def convGAN_train_end_to_end(training_data, training_labels, test_data, test_labels, neb, gen, neb_epochs, epochs_retrain_disc):
 
 
     ##minority class
     ##minority class
     data_min=training_data[np.where(training_labels == 1)[0]]
     data_min=training_data[np.where(training_labels == 1)[0]]
@@ -516,7 +517,7 @@ def convGAN_train_end_to_end(training_data,training_labels,test_data,test_labels
     print('\n')
     print('\n')
 
 
     ## final training results
     ## final training results
-    c,f,pr,rc,k=final_learning(gan.maj_min_discriminator, ovs_training_dataset, ovs_training_labels_oh, test_data, test_labels, epochs_retrain_disc)
+    c,f,pr,rc,k = final_learning(gan.maj_min_discriminator, ovs_training_dataset, ovs_training_labels_oh, test_data, test_labels, epochs_retrain_disc)
 
 
     return ((c_r,f_r,pr_r,rc_r,k_r),(c,f,pr,rc,k))
     return ((c_r,f_r,pr_r,rc_r,k_r),(c,f,pr,rc,k))
 
 
@@ -551,8 +552,6 @@ def runTest():
     neb=gen=5 ##neb=gen required
     neb=gen=5 ##neb=gen required
     neb_epochs=10
     neb_epochs=10
     epochs_retrain_disc=50
     epochs_retrain_disc=50
-    # TODO n_feat=len(features_x[1]) ## number of features
-
 
 
     ## Training
     ## Training
     np.random.seed(42)
     np.random.seed(42)