4 yıl önce · 0ceeba2b69
--- a/convGAN-predict.ipynb
+++ b/convGAN-predict.ipynb
--- a/library/NNSearch.py
+++ b/library/NNSearch.py
@@ -3,6 +3,7 @@ import math
 
				 import tensorflow as tf
			
 
				 import numpy as np
			
 
				 from sklearn.neighbors import NearestNeighbors
			
 
				+from sklearn.utils import shuffle
			
 
				 from library.timing import timing
			
 
				 
			
 
				 
			
@@ -11,6 +12,7 @@ class NNSearch:
 
				         self.nebSize = nebSize
			
 
				         self.neighbourhoods = []
			
 
				         self.timingDict = timingDict
			
 
				+        self.basePoints = []
			
 
				 
			
 
				 
			
 
				     def timerStart(self, name):
			
@@ -28,21 +30,50 @@ class NNSearch:
 
				     def neighbourhoodOfItem(self, i):
			
 
				         return self.neighbourhoods[i]
			
 
				 
			
 
				+    def getNbhPointsOfItem(self, index):
			
 
				+        return self.getPointsFromIndices(self.neighbourhoodOfItem(index))
			
 
				 
			
 
				-    def fit(self, X, nebSize=None):
			
 
				+    def getPointsFromIndices(self, indices):
			
 
				+        nmbi = shuffle(np.array([indices]))
			
 
				+        nmb = self.basePoints[nmbi]
			
 
				+        return tf.convert_to_tensor(nmb[0])
			
 
				+
			
 
				+    def neighbourhoodOfItemList(self, items, maxCount=None):
			
 
				+        nbhIndices = set()
			
 
				+        duplicates = []
			
 
				+        for i in items:
			
 
				+            for x in self.neighbourhoodOfItem(i):
			
 
				+                if x in nbhIndices:
			
 
				+                    duplicates.append(x)
			
 
				+                else:
			
 
				+                    nbhIndices.add(x)
			
 
				+
			
 
				+        nbhIndices = list(nbhIndices)
			
 
				+        if maxCount is not None:
			
 
				+            if len(nbhIndices) < maxCount:
			
 
				+                nbhIndices.extend(duplicates)
			
 
				+            nbhIndices = nbhIndices[0:maxCount]
			
 
				+
			
 
				+        return self.getPointsFromIndices(nbhIndices)
			
 
				+
			
 
				+
			
 
				+    def fit(self, haystack, needles=None, nebSize=None):
			
 
				         self.timerStart("NN_fit_chained_init")
			
 
				         if nebSize == None:
			
 
				             nebSize = self.nebSize
			
 
				 
			
 
				-        nPoints = len(X)
			
 
				-        nFeatures = len(X[0])
			
 
				+        if needles is None:
			
 
				+            needles = haystack
			
 
				+
			
 
				+        self.basePoints = haystack
			
 
				 
			
 
				         neigh = NearestNeighbors(n_neighbors=nebSize)
			
 
				-        neigh.fit(X)
			
 
				+        neigh.fit(haystack)
			
 
				         self.timerStop("NN_fit_chained_init")
			
 
				         self.timerStart("NN_fit_chained_toList")
			
 
				         self.neighbourhoods = [
			
 
				                 (neigh.kneighbors([x], nebSize, return_distance=False))[0]
			
 
				-                for (i, x) in enumerate(X)
			
 
				+                for (i, x) in enumerate(needles)
			
 
				                 ]
			
 
				         self.timerStop("NN_fit_chained_toList")
			
 
				+        return self
			
--- a/library/analysis.py
+++ b/library/analysis.py
@@ -260,12 +260,14 @@ def runAllTestSets(dataSetList):
 
				 
			
 
				 
			
 
				 
			
 
				-generators = [ ("ProWRAS",       lambda _data: ProWRAS())
			
 
				-             , ("Repeater",      lambda _data: Repeater())
			
 
				-             #, ("SpheredNoise",  lambda _data: SpheredNoise())
			
 
				-             , ("SimpleGAN",     lambda data: SimpleGan(numOfFeatures=data.data0.shape[1]))
			
 
				-             , ("ctGAN",         lambda data: CtGAN(data.data0.shape[1]))
			
 
				-             , ("CTAB-GAN",      lambda _data: CtabGan())
			
 
				-             , ("convGAN",       lambda data: ConvGAN(data.data0.shape[1], neb=5, gen=5))
			
 
				-             , ("convGAN-full",  lambda data: ConvGAN(data.data0.shape[1], neb=data.data0.shape[1], gen=data.data0.shape[1]))
			
 
				-             ]
			
 
				+generators = { "ProWRAS":                 lambda _data: ProWRAS()
			
 
				+             , "Repeater":                lambda _data: Repeater()
			
 
				+             , "SpheredNoise":            lambda _data: SpheredNoise()
			
 
				+             , "SimpleGAN":               lambda data: SimpleGan(numOfFeatures=data.data0.shape[1])
			
 
				+             , "ctGAN":                   lambda data: CtGAN(data.data0.shape[1])
			
 
				+             , "CTAB-GAN":                lambda _data: CtabGan()
			
 
				+             , "convGAN":                 lambda data: ConvGAN(data.data0.shape[1], neb=5, gen=5)
			
 
				+             , "convGAN-full":            lambda data: ConvGAN(data.data0.shape[1], neb=data.data0.shape[1], gen=data.data0.shape[1])
			
 
				+             , "convGAN-proximary-5":     lambda data: ConvGAN(data.data0.shape[1], neb=5, gen=5, withMajorhoodNbSearch=True)
			
 
				+             , "convGAN-proxymary-full":  lambda data: ConvGAN(data.data0.shape[1], neb=data.data0.shape[1], gen=data.data0.shape[1], withMajorhoodNbSearch=True)
			
 
				+             }
			
--- a/library/exercise.py
+++ b/library/exercise.py
@@ -11,7 +11,7 @@ from sklearn.preprocessing import StandardScaler
 
				 import matplotlib.pyplot as plt
			
 
				 
			
 
				 from library.dataset import DataSet, TrainTestData
			
 
				-from library.testers import lr,knn, gb, TestResult
			
 
				+from library.testers import lr, knn, gb, TestResult, runTester
			
 
				 
			
 
				 
			
 
				 class Exercise:
			
@@ -85,6 +85,9 @@ class Exercise:
 
				         # Reset results array.
			
 
				         self.results = { name: [] for name in self.testFunctions }
			
 
				 
			
 
				+        if gan.canPredict and "GAN" not in self.testFunctions.keys():
			
 
				+            self.results["GAN"] = []
			
 
				+
			
 
				         # If a shuffle function is given then shuffle the data before the
			
 
				         # exercise starts.
			
 
				         if self.shuffleFunction is not None:
			
@@ -184,6 +187,12 @@ class Exercise:
 
				 
			
 
				         # Test this dataset with every given test-function.
			
 
				         # The results are printed out and stored to the results dictionary.
			
 
				+        if gan.canPredict and "GAN" not in self.testFunctions.keys():
			
 
				+            self.debug(f"-> test with GAN.predict")
			
 
				+            testResult = runTester(dataSlice, gan)
			
 
				+            self.debug(str(testResult))
			
 
				+            self.results["GAN"].append(testResult)
			
 
				+
			
 
				         for testerName in self.testFunctions:
			
 
				             self.debug(f"-> test with '{testerName}'")
			
 
				             testResult = (self.testFunctions[testerName])(dataSlice)
			
--- a/library/generators/convGAN.py
+++ b/library/generators/convGAN.py
@@ -1,21 +1,9 @@
 
				 import numpy as np
			
 
				-from numpy.random import seed
			
 
				-import pandas as pd
			
 
				 import matplotlib.pyplot as plt
			
 
				 
			
 
				 from library.interfaces import GanBaseClass
			
 
				 from library.dataset import DataSet
			
 
				 
			
 
				-from sklearn.decomposition import PCA
			
 
				-from sklearn.metrics import confusion_matrix
			
 
				-from sklearn.metrics import f1_score
			
 
				-from sklearn.metrics import cohen_kappa_score
			
 
				-from sklearn.metrics import precision_score
			
 
				-from sklearn.metrics import recall_score
			
 
				-from sklearn.neighbors import NearestNeighbors
			
 
				-from sklearn.utils import shuffle
			
 
				-from imblearn.datasets import fetch_datasets
			
 
				-
			
 
				 from keras.layers import Dense, Input, Multiply, Flatten, Conv1D, Reshape
			
 
				 from keras.models import Model
			
 
				 from keras import backend as K
			
@@ -45,7 +33,7 @@ class ConvGAN(GanBaseClass):
 
				     This is a toy example of a GAN.
			
 
				     It repeats the first point of the training-data-set.
			
 
				     """
			
 
				-    def __init__(self, n_feat, neb=5, gen=5, neb_epochs=10, debug=True):
			
 
				+    def __init__(self, n_feat, neb=5, gen=5, neb_epochs=10, withMajorhoodNbSearch=False, debug=False):
			
 
				         self.isTrained = False
			
 
				         self.n_feat = n_feat
			
 
				         self.neb = neb
			
@@ -53,10 +41,12 @@ class ConvGAN(GanBaseClass):
 
				         self.neb_epochs = 10
			
 
				         self.loss_history = None
			
 
				         self.debug = debug
			
 
				-        self.dataSet = None
			
 
				+        self.minSetSize = 0
			
 
				         self.conv_sample_generator = None
			
 
				         self.maj_min_discriminator = None
			
 
				+        self.withMajorhoodNbSearch = withMajorhoodNbSearch
			
 
				         self.cg = None
			
 
				+        self.canPredict = True
			
 
				 
			
 
				         if neb > gen:
			
 
				             raise ValueError(f"Expected neb <= gen but got neb={neb} and gen={gen}.")
			
@@ -85,7 +75,7 @@ class ConvGAN(GanBaseClass):
 
				             print(self.cg.summary())
			
 
				             print('\n')
			
 
				 
			
 
				-    def train(self, dataSet):
			
 
				+    def train(self, dataSet, discTrainCount=5):
			
 
				         """
			
 
				         Trains the GAN.
			
 
				 
			
@@ -97,9 +87,21 @@ class ConvGAN(GanBaseClass):
 
				         if dataSet.data1.shape[0] <= 0:
			
 
				             raise AttributeError("Train: Expected data class 1 to contain at least one point.")
			
 
				 
			
 
				-        self.dataSet = dataSet
			
 
				-        self.nmb = self._NMB_prepare(dataSet.data1)
			
 
				-        self._rough_learning(dataSet.data1, dataSet.data0)
			
 
				+        # Store size of minority class. This is needed during point generation.
			
 
				+        self.minSetSize = dataSet.data1.shape[0]
			
 
				+
			
 
				+        # Precalculate neighborhoods
			
 
				+        self.nmbMin = NNSearch(self.neb).fit(haystack=dataSet.data1)
			
 
				+        if self.withMajorhoodNbSearch:
			
 
				+            self.nmbMaj = NNSearch(self.neb).fit(haystack=dataSet.data0, needles=dataSet.data1)
			
 
				+        else:
			
 
				+            self.nmbMaj = None
			
 
				+
			
 
				+        # Do the training.
			
 
				+        self._rough_learning(dataSet.data1, dataSet.data0, discTrainCount)
			
 
				+        
			
 
				+        # Neighborhood in majority class is no longer needed. So save memory.
			
 
				+        self.nmbMaj = None
			
 
				         self.isTrained = True
			
 
				 
			
 
				     def generateDataPoint(self):
			
@@ -118,14 +120,12 @@ class ConvGAN(GanBaseClass):
 
				         if not self.isTrained:
			
 
				             raise ValueError("Try to generate data with untrained Re.")
			
 
				 
			
 
				-        data_min = self.dataSet.data1
			
 
				-
			
 
				         ## roughly claculate the upper bound of the synthetic samples to be generated from each neighbourhood
			
 
				-        synth_num = (numOfSamples // len(data_min)) + 1
			
 
				+        synth_num = (numOfSamples // self.minSetSize) + 1
			
 
				 
			
 
				         ## generate synth_num synthetic samples from each minority neighbourhood
			
 
				         synth_set=[]
			
 
				-        for i in range(len(data_min)):
			
 
				+        for i in range(self.minSetSize):
			
 
				             synth_set.extend(self._generate_data_for_min_point(i, synth_num))
			
 
				 
			
 
				         ## extract the exact number of synthetic samples needed to exactly balance the two classes
			
@@ -133,6 +133,10 @@ class ConvGAN(GanBaseClass):
 
				 
			
 
				         return synth_set
			
 
				 
			
 
				+    def predictReal(self, data):
			
 
				+        prediction = self.maj_min_discriminator.predict(data)
			
 
				+        return np.array([x[0] for x in prediction])
			
 
				+
			
 
				     # ###############################################################
			
 
				     # Hidden internal functions
			
 
				     # ###############################################################
			
@@ -200,6 +204,7 @@ class ConvGAN(GanBaseClass):
 
				         ## passed through two dense layers
			
 
				         y = Dense(250, activation='relu')(samples)
			
 
				         y = Dense(125, activation='relu')(y)
			
 
				+        y = Dense(75, activation='relu')(y)
			
 
				         
			
 
				         ## two output nodes. outputs have to be one-hot coded (see labels variable before)
			
 
				         output = Dense(2, activation='sigmoid')(y)
			
@@ -265,7 +270,7 @@ class ConvGAN(GanBaseClass):
 
				         runs = int(synth_num / self.neb) + 1
			
 
				         synth_set = []
			
 
				         for _run in range(runs):
			
 
				-            batch = self._NMB_guided(index)
			
 
				+            batch = self.nmbMin.getNbhPointsOfItem(index)
			
 
				             synth_batch = self.conv_sample_generator.predict(batch)
			
 
				             synth_set.extend(synth_batch)
			
 
				 
			
@@ -274,52 +279,73 @@ class ConvGAN(GanBaseClass):
 
				 
			
 
				 
			
 
				     # Training
			
 
				-    def _rough_learning(self, data_min, data_maj):
			
 
				+    def _rough_learning(self, data_min, data_maj, discTrainCount):
			
 
				         generator = self.conv_sample_generator
			
 
				         discriminator = self.maj_min_discriminator
			
 
				         GAN = self.cg
			
 
				         loss_history = [] ## this is for stroring the loss for every run
			
 
				-        min_idx = 0
			
 
				-        neb_epoch_count = 1
			
 
				+        step = 0
			
 
				+        minSetSize = len(data_min)
			
 
				 
			
 
				         labels = tf.convert_to_tensor(create01Labels(2 * self.gen, self.gen))
			
 
				 
			
 
				-        for step in range(self.neb_epochs * len(data_min)):
			
 
				-            ## generate minority neighbourhood batch for every minority class sampls by index
			
 
				-            min_batch = self._NMB_guided(min_idx)
			
 
				-            min_idx = min_idx + 1
			
 
				-            ## generate random proximal majority batch
			
 
				-            maj_batch = self._BMB(data_min, data_maj)
			
 
				-
			
 
				-            ## generate synthetic samples from convex space
			
 
				-            ## of minority neighbourhood batch using generator
			
 
				-            conv_samples = generator.predict(min_batch)
			
 
				-            ## concatenate them with the majority batch
			
 
				-            concat_sample = tf.concat([conv_samples, maj_batch], axis=0)
			
 
				-
			
 
				-            ## switch on discriminator training
			
 
				-            discriminator.trainable = True
			
 
				-            ## train the discriminator with the concatenated samples and the one-hot encoded labels
			
 
				-            discriminator.fit(x=concat_sample, y=labels, verbose=0)
			
 
				-            ## switch off the discriminator training again
			
 
				-            discriminator.trainable = False
			
 
				-
			
 
				-            ## use the GAN to make the generator learn on the decisions
			
 
				-            ## made by the previous discriminator training
			
 
				-            ##- print(f"concat sample shape: {concat_sample.shape}/{labels.shape}")
			
 
				-            gan_loss_history = GAN.fit(concat_sample, y=labels, verbose=0)
			
 
				-
			
 
				-            ## store the loss for the step
			
 
				-            loss_history.append(gan_loss_history.history['loss'])
			
 
				-
			
 
				-            if self.debug and ((step + 1) % 10 == 0):
			
 
				-                print(f"{step + 1} neighbourhood batches trained; running neighbourhood epoch {neb_epoch_count}")
			
 
				-
			
 
				-            if min_idx == len(data_min) - 1:
			
 
				-                if self.debug:
			
 
				-                    print(f"Neighbourhood epoch {neb_epoch_count} complete")
			
 
				-                neb_epoch_count = neb_epoch_count + 1
			
 
				-                min_idx = 0
			
 
				+        for neb_epoch_count in range(self.neb_epochs):
			
 
				+            if discTrainCount > 0:
			
 
				+                for n in range(discTrainCount):
			
 
				+                    for min_idx in range(minSetSize):
			
 
				+                        ## generate minority neighbourhood batch for every minority class sampls by index
			
 
				+                        min_batch_indices = self.nmbMin.neighbourhoodOfItem(min_idx)
			
 
				+                        min_batch = self.nmbMin.getPointsFromIndices(min_batch_indices)
			
 
				+                        ## generate random proximal majority batch
			
 
				+                        maj_batch = self._BMB(data_maj, min_batch_indices)
			
 
				+
			
 
				+                        ## generate synthetic samples from convex space
			
 
				+                        ## of minority neighbourhood batch using generator
			
 
				+                        conv_samples = generator.predict(min_batch)
			
 
				+                        ## concatenate them with the majority batch
			
 
				+                        concat_sample = tf.concat([conv_samples, maj_batch], axis=0)
			
 
				+
			
 
				+                        ## switch on discriminator training
			
 
				+                        discriminator.trainable = True
			
 
				+                        ## train the discriminator with the concatenated samples and the one-hot encoded labels
			
 
				+                        discriminator.fit(x=concat_sample, y=labels, verbose=0)
			
 
				+                        ## switch off the discriminator training again
			
 
				+                        discriminator.trainable = False
			
 
				+
			
 
				+            for min_idx in range(minSetSize):
			
 
				+                ## generate minority neighbourhood batch for every minority class sampls by index
			
 
				+                min_batch_indices = self.nmbMin.neighbourhoodOfItem(min_idx)
			
 
				+                min_batch = self.nmbMin.getPointsFromIndices(min_batch_indices)
			
 
				+                ## generate random proximal majority batch
			
 
				+                maj_batch = self._BMB(data_maj, min_batch_indices)
			
 
				+
			
 
				+                ## generate synthetic samples from convex space
			
 
				+                ## of minority neighbourhood batch using generator
			
 
				+                conv_samples = generator.predict(min_batch)
			
 
				+                ## concatenate them with the majority batch
			
 
				+                concat_sample = tf.concat([conv_samples, maj_batch], axis=0)
			
 
				+
			
 
				+                ## switch on discriminator training
			
 
				+                discriminator.trainable = True
			
 
				+                ## train the discriminator with the concatenated samples and the one-hot encoded labels
			
 
				+                discriminator.fit(x=concat_sample, y=labels, verbose=0)
			
 
				+                ## switch off the discriminator training again
			
 
				+                discriminator.trainable = False
			
 
				+
			
 
				+                ## use the GAN to make the generator learn on the decisions
			
 
				+                ## made by the previous discriminator training
			
 
				+                ##- print(f"concat sample shape: {concat_sample.shape}/{labels.shape}")
			
 
				+                gan_loss_history = GAN.fit(concat_sample, y=labels, verbose=0)
			
 
				+
			
 
				+                ## store the loss for the step
			
 
				+                loss_history.append(gan_loss_history.history['loss'])
			
 
				+
			
 
				+                step += 1
			
 
				+                if self.debug and (step % 10 == 0):
			
 
				+                    print(f"{step} neighbourhood batches trained; running neighbourhood epoch {neb_epoch_count}")
			
 
				+
			
 
				+            if self.debug:
			
 
				+                print(f"Neighbourhood epoch {neb_epoch_count + 1} complete")
			
 
				 
			
 
				         if self.debug:
			
 
				             run_range = range(1, len(loss_history) + 1)
			
@@ -340,37 +366,16 @@ class ConvGAN(GanBaseClass):
 
				 
			
 
				 
			
 
				     ## convGAN
			
 
				-    def _BMB(self, data_min, data_maj):
			
 
				+    def _BMB(self, data_maj, min_idxs):
			
 
				 
			
 
				         ## Generate a borderline majority batch
			
 
				-        ## data_min -> minority class data
			
 
				         ## data_maj -> majority class data
			
 
				-        ## neb -> oversampling neighbourhood
			
 
				+        ## min_idxs -> indices of points in minority class
			
 
				         ## gen -> convex combinations generated from each neighbourhood
			
 
				 
			
 
				-        return tf.convert_to_tensor(
			
 
				-            data_maj[np.random.randint(len(data_maj), size=self.gen)]
			
 
				-            )
			
 
				-
			
 
				-    def _NMB_prepare(self, data_min):
			
 
				-        neigh = NNSearch(self.neb)
			
 
				-        neigh.fit(data_min)
			
 
				-        return (data_min, neigh)
			
 
				-
			
 
				-
			
 
				-    def _NMB_guided(self, index):
			
 
				-
			
 
				-        ## generate a minority neighbourhood batch for a particular minority sample
			
 
				-        ## we need this for minority data generation
			
 
				-        ## we will generate synthetic samples for each training data neighbourhood
			
 
				-        ## index -> index of the minority sample in a training data whose neighbourhood we want to obtain
			
 
				-        ## data_min -> minority class data
			
 
				-        ## neb -> oversampling neighbourhood
			
 
				-        (data_min, neigh) = self.nmb
			
 
				-
			
 
				-        nmbi = np.array([neigh.neighbourhoodOfItem(index)])
			
 
				-        nmbi = shuffle(nmbi)
			
 
				-        nmb = data_min[nmbi]
			
 
				-        nmb = tf.convert_to_tensor(nmb[0])
			
 
				-        return nmb
			
 
				-
			
 
				+        if self.nmbMaj is not None:
			
 
				+            return self.nmbMaj.neighbourhoodOfItemList(min_idxs, maxCount=self.gen)
			
 
				+        else:
			
 
				+            return tf.convert_to_tensor(
			
 
				+                data_maj[np.random.randint(len(data_maj), size=self.gen)]
			
 
				+                )
			
--- a/library/interfaces.py
+++ b/library/interfaces.py
@@ -1,6 +1,7 @@
 
				 """
			
 
				 This module contains used interfaces for testing the Generative Adversarial Networks.
			
 
				 """
			
 
				+import numpy as np
			
 
				 
			
 
				 
			
 
				 class GanBaseClass:
			
@@ -13,6 +14,7 @@ class GanBaseClass:
 
				         """
			
 
				         Initializes the class.
			
 
				         """
			
 
				+        self.canPredict = False
			
 
				 
			
 
				     def reset(self):
			
 
				         """
			
@@ -39,3 +41,16 @@ class GanBaseClass:
 
				         *numOfSamples* is an integer > 0. It gives the number of generated samples.
			
 
				         """
			
 
				         raise NotImplementedError
			
 
				+
			
 
				+    def predict(self, data, limit=0.5):
			
 
				+        """
			
 
				+        Takes a list (numpy array) of data points.
			
 
				+        Returns a list with real values in [0,1] for the propapility
			
 
				+        that a point is in the minority dataset. With:
			
 
				+          0.0: point is in majority set
			
 
				+          1.0: point is in minority set
			
 
				+        """
			
 
				+        return np.array([max(0, min(1, int(x + 1.0 - limit))) for x in self.predictReal(data)])
			
 
				+
			
 
				+    def predictReal(self, data):
			
 
				+        raise NotImplemented
			
--- a/library/testers.py
+++ b/library/testers.py
@@ -182,9 +182,7 @@ def knn(ttd):
 
				     checkType(ttd)
			
 
				     knnTester = KNeighborsClassifier(n_neighbors=10)
			
 
				     knnTester.fit(ttd.train.data, ttd.train.labels)
			
 
				-
			
 
				-    prediction = knnTester.predict(ttd.test.data)
			
 
				-    return TestResult("KNN", ttd.test.labels, prediction)
			
 
				+    return runTester(ttd, knnTester, "KNN")
			
 
				 
			
 
				 
			
 
				 def gb(ttd):
			
@@ -197,10 +195,11 @@ def gb(ttd):
 
				     checkType(ttd)
			
 
				     tester = GradientBoostingClassifier()
			
 
				     tester.fit(ttd.train.data, ttd.train.labels)
			
 
				+    return runTester(ttd, tester, "GB")
			
 
				 
			
 
				+def runTester(ttd, tester, name="GAN"):
			
 
				     prediction = tester.predict(ttd.test.data)
			
 
				-    return TestResult("GB", ttd.test.labels, prediction)
			
 
				-
			
 
				+    return TestResult(name, ttd.test.labels, prediction)
			
 
				 
			
 
				 def checkType(t):
			
 
				     if str(type(t)) == "<class 'numpy.ndarray'>":
			
--- a/run_all_exercises.ipynb
+++ b/run_all_exercises.ipynb
@@ -20,14 +20,14 @@
 
				    "outputs": [],
			
 
				    "source": [
			
 
				     "for dataset in testSets:\n",
			
 
				-    "    for f in generators:\n",
			
 
				-    "        runExercise(dataset, None, name, f)"
			
 
				+    "    for name in generators.keys():\n",
			
 
				+    "        runExercise(dataset, None, name, generators[name])"
			
 
				    ]
			
 
				   }
			
 
				  ],
			
 
				  "metadata": {
			
 
				   "kernelspec": {
			
 
				-   "display_name": "Python 3",
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				    "language": "python",
			
 
				    "name": "python3"
			
 
				   },
			
@@ -41,7 +41,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.8.5"
			
 
				+   "version": "3.9.7"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/statistics.py
+++ b/statistics.py
@@ -10,8 +10,8 @@ f1Score = "f1 score"
 
				 
			
 
				 ignoreSet = ["yeast_me2"]
			
 
				 
			
 
				-gans = [g[0] for g in generators]
			
 
				-algs = {"LR", "GB", "KNN"}
			
 
				+gans = generators.keys()
			
 
				+algs = {"LR", "GB", "KNN", "GAN"}
			
 
				 
			
 
				 dataset  = [
			
 
				     "folding_abalone9-18",