Просмотр исходного кода

Merge branch 'master' with 'master' from server.

Kristian Schultz 3 лет назад
Родитель
Сommit
0ceeba2b69
9 измененных файлов с 310 добавлено и 117 удалено
  1. 132 0
      convGAN-predict.ipynb
  2. 36 5
      library/NNSearch.py
  3. 11 9
      library/analysis.py
  4. 10 1
      library/exercise.py
  5. 96 91
      library/generators/convGAN.py
  6. 15 0
      library/interfaces.py
  7. 4 5
      library/testers.py
  8. 4 4
      run_all_exercises.ipynb
  9. 2 2
      statistics.py

Разница между файлами не показана из-за своего большого размера
+ 132 - 0
convGAN-predict.ipynb


+ 36 - 5
library/NNSearch.py

@@ -3,6 +3,7 @@ import math
 import tensorflow as tf
 import tensorflow as tf
 import numpy as np
 import numpy as np
 from sklearn.neighbors import NearestNeighbors
 from sklearn.neighbors import NearestNeighbors
+from sklearn.utils import shuffle
 from library.timing import timing
 from library.timing import timing
 
 
 
 
@@ -11,6 +12,7 @@ class NNSearch:
         self.nebSize = nebSize
         self.nebSize = nebSize
         self.neighbourhoods = []
         self.neighbourhoods = []
         self.timingDict = timingDict
         self.timingDict = timingDict
+        self.basePoints = []
 
 
 
 
     def timerStart(self, name):
     def timerStart(self, name):
@@ -28,21 +30,50 @@ class NNSearch:
     def neighbourhoodOfItem(self, i):
     def neighbourhoodOfItem(self, i):
         return self.neighbourhoods[i]
         return self.neighbourhoods[i]
 
 
+    def getNbhPointsOfItem(self, index):
+        return self.getPointsFromIndices(self.neighbourhoodOfItem(index))
 
 
-    def fit(self, X, nebSize=None):
+    def getPointsFromIndices(self, indices):
+        nmbi = shuffle(np.array([indices]))
+        nmb = self.basePoints[nmbi]
+        return tf.convert_to_tensor(nmb[0])
+
+    def neighbourhoodOfItemList(self, items, maxCount=None):
+        nbhIndices = set()
+        duplicates = []
+        for i in items:
+            for x in self.neighbourhoodOfItem(i):
+                if x in nbhIndices:
+                    duplicates.append(x)
+                else:
+                    nbhIndices.add(x)
+
+        nbhIndices = list(nbhIndices)
+        if maxCount is not None:
+            if len(nbhIndices) < maxCount:
+                nbhIndices.extend(duplicates)
+            nbhIndices = nbhIndices[0:maxCount]
+
+        return self.getPointsFromIndices(nbhIndices)
+
+
+    def fit(self, haystack, needles=None, nebSize=None):
         self.timerStart("NN_fit_chained_init")
         self.timerStart("NN_fit_chained_init")
         if nebSize == None:
         if nebSize == None:
             nebSize = self.nebSize
             nebSize = self.nebSize
 
 
-        nPoints = len(X)
-        nFeatures = len(X[0])
+        if needles is None:
+            needles = haystack
+
+        self.basePoints = haystack
 
 
         neigh = NearestNeighbors(n_neighbors=nebSize)
         neigh = NearestNeighbors(n_neighbors=nebSize)
-        neigh.fit(X)
+        neigh.fit(haystack)
         self.timerStop("NN_fit_chained_init")
         self.timerStop("NN_fit_chained_init")
         self.timerStart("NN_fit_chained_toList")
         self.timerStart("NN_fit_chained_toList")
         self.neighbourhoods = [
         self.neighbourhoods = [
                 (neigh.kneighbors([x], nebSize, return_distance=False))[0]
                 (neigh.kneighbors([x], nebSize, return_distance=False))[0]
-                for (i, x) in enumerate(X)
+                for (i, x) in enumerate(needles)
                 ]
                 ]
         self.timerStop("NN_fit_chained_toList")
         self.timerStop("NN_fit_chained_toList")
+        return self

+ 11 - 9
library/analysis.py

@@ -260,12 +260,14 @@ def runAllTestSets(dataSetList):
 
 
 
 
 
 
-generators = [ ("ProWRAS",       lambda _data: ProWRAS())
-             , ("Repeater",      lambda _data: Repeater())
-             #, ("SpheredNoise",  lambda _data: SpheredNoise())
-             , ("SimpleGAN",     lambda data: SimpleGan(numOfFeatures=data.data0.shape[1]))
-             , ("ctGAN",         lambda data: CtGAN(data.data0.shape[1]))
-             , ("CTAB-GAN",      lambda _data: CtabGan())
-             , ("convGAN",       lambda data: ConvGAN(data.data0.shape[1], neb=5, gen=5))
-             , ("convGAN-full",  lambda data: ConvGAN(data.data0.shape[1], neb=data.data0.shape[1], gen=data.data0.shape[1]))
-             ]
+generators = { "ProWRAS":                 lambda _data: ProWRAS()
+             , "Repeater":                lambda _data: Repeater()
+             , "SpheredNoise":            lambda _data: SpheredNoise()
+             , "SimpleGAN":               lambda data: SimpleGan(numOfFeatures=data.data0.shape[1])
+             , "ctGAN":                   lambda data: CtGAN(data.data0.shape[1])
+             , "CTAB-GAN":                lambda _data: CtabGan()
+             , "convGAN":                 lambda data: ConvGAN(data.data0.shape[1], neb=5, gen=5)
+             , "convGAN-full":            lambda data: ConvGAN(data.data0.shape[1], neb=data.data0.shape[1], gen=data.data0.shape[1])
+             , "convGAN-proximary-5":     lambda data: ConvGAN(data.data0.shape[1], neb=5, gen=5, withMajorhoodNbSearch=True)
+             , "convGAN-proxymary-full":  lambda data: ConvGAN(data.data0.shape[1], neb=data.data0.shape[1], gen=data.data0.shape[1], withMajorhoodNbSearch=True)
+             }

+ 10 - 1
library/exercise.py

@@ -11,7 +11,7 @@ from sklearn.preprocessing import StandardScaler
 import matplotlib.pyplot as plt
 import matplotlib.pyplot as plt
 
 
 from library.dataset import DataSet, TrainTestData
 from library.dataset import DataSet, TrainTestData
-from library.testers import lr,knn, gb, TestResult
+from library.testers import lr, knn, gb, TestResult, runTester
 
 
 
 
 class Exercise:
 class Exercise:
@@ -85,6 +85,9 @@ class Exercise:
         # Reset results array.
         # Reset results array.
         self.results = { name: [] for name in self.testFunctions }
         self.results = { name: [] for name in self.testFunctions }
 
 
+        if gan.canPredict and "GAN" not in self.testFunctions.keys():
+            self.results["GAN"] = []
+
         # If a shuffle function is given then shuffle the data before the
         # If a shuffle function is given then shuffle the data before the
         # exercise starts.
         # exercise starts.
         if self.shuffleFunction is not None:
         if self.shuffleFunction is not None:
@@ -184,6 +187,12 @@ class Exercise:
 
 
         # Test this dataset with every given test-function.
         # Test this dataset with every given test-function.
         # The results are printed out and stored to the results dictionary.
         # The results are printed out and stored to the results dictionary.
+        if gan.canPredict and "GAN" not in self.testFunctions.keys():
+            self.debug(f"-> test with GAN.predict")
+            testResult = runTester(dataSlice, gan)
+            self.debug(str(testResult))
+            self.results["GAN"].append(testResult)
+
         for testerName in self.testFunctions:
         for testerName in self.testFunctions:
             self.debug(f"-> test with '{testerName}'")
             self.debug(f"-> test with '{testerName}'")
             testResult = (self.testFunctions[testerName])(dataSlice)
             testResult = (self.testFunctions[testerName])(dataSlice)

+ 96 - 91
library/generators/convGAN.py

@@ -1,21 +1,9 @@
 import numpy as np
 import numpy as np
-from numpy.random import seed
-import pandas as pd
 import matplotlib.pyplot as plt
 import matplotlib.pyplot as plt
 
 
 from library.interfaces import GanBaseClass
 from library.interfaces import GanBaseClass
 from library.dataset import DataSet
 from library.dataset import DataSet
 
 
-from sklearn.decomposition import PCA
-from sklearn.metrics import confusion_matrix
-from sklearn.metrics import f1_score
-from sklearn.metrics import cohen_kappa_score
-from sklearn.metrics import precision_score
-from sklearn.metrics import recall_score
-from sklearn.neighbors import NearestNeighbors
-from sklearn.utils import shuffle
-from imblearn.datasets import fetch_datasets
-
 from keras.layers import Dense, Input, Multiply, Flatten, Conv1D, Reshape
 from keras.layers import Dense, Input, Multiply, Flatten, Conv1D, Reshape
 from keras.models import Model
 from keras.models import Model
 from keras import backend as K
 from keras import backend as K
@@ -45,7 +33,7 @@ class ConvGAN(GanBaseClass):
     This is a toy example of a GAN.
     This is a toy example of a GAN.
     It repeats the first point of the training-data-set.
     It repeats the first point of the training-data-set.
     """
     """
-    def __init__(self, n_feat, neb=5, gen=5, neb_epochs=10, debug=True):
+    def __init__(self, n_feat, neb=5, gen=5, neb_epochs=10, withMajorhoodNbSearch=False, debug=False):
         self.isTrained = False
         self.isTrained = False
         self.n_feat = n_feat
         self.n_feat = n_feat
         self.neb = neb
         self.neb = neb
@@ -53,10 +41,12 @@ class ConvGAN(GanBaseClass):
         self.neb_epochs = 10
         self.neb_epochs = 10
         self.loss_history = None
         self.loss_history = None
         self.debug = debug
         self.debug = debug
-        self.dataSet = None
+        self.minSetSize = 0
         self.conv_sample_generator = None
         self.conv_sample_generator = None
         self.maj_min_discriminator = None
         self.maj_min_discriminator = None
+        self.withMajorhoodNbSearch = withMajorhoodNbSearch
         self.cg = None
         self.cg = None
+        self.canPredict = True
 
 
         if neb > gen:
         if neb > gen:
             raise ValueError(f"Expected neb <= gen but got neb={neb} and gen={gen}.")
             raise ValueError(f"Expected neb <= gen but got neb={neb} and gen={gen}.")
@@ -85,7 +75,7 @@ class ConvGAN(GanBaseClass):
             print(self.cg.summary())
             print(self.cg.summary())
             print('\n')
             print('\n')
 
 
-    def train(self, dataSet):
+    def train(self, dataSet, discTrainCount=5):
         """
         """
         Trains the GAN.
         Trains the GAN.
 
 
@@ -97,9 +87,21 @@ class ConvGAN(GanBaseClass):
         if dataSet.data1.shape[0] <= 0:
         if dataSet.data1.shape[0] <= 0:
             raise AttributeError("Train: Expected data class 1 to contain at least one point.")
             raise AttributeError("Train: Expected data class 1 to contain at least one point.")
 
 
-        self.dataSet = dataSet
-        self.nmb = self._NMB_prepare(dataSet.data1)
-        self._rough_learning(dataSet.data1, dataSet.data0)
+        # Store size of minority class. This is needed during point generation.
+        self.minSetSize = dataSet.data1.shape[0]
+
+        # Precalculate neighborhoods
+        self.nmbMin = NNSearch(self.neb).fit(haystack=dataSet.data1)
+        if self.withMajorhoodNbSearch:
+            self.nmbMaj = NNSearch(self.neb).fit(haystack=dataSet.data0, needles=dataSet.data1)
+        else:
+            self.nmbMaj = None
+
+        # Do the training.
+        self._rough_learning(dataSet.data1, dataSet.data0, discTrainCount)
+        
+        # Neighborhood in majority class is no longer needed. So save memory.
+        self.nmbMaj = None
         self.isTrained = True
         self.isTrained = True
 
 
     def generateDataPoint(self):
     def generateDataPoint(self):
@@ -118,14 +120,12 @@ class ConvGAN(GanBaseClass):
         if not self.isTrained:
         if not self.isTrained:
             raise ValueError("Try to generate data with untrained Re.")
             raise ValueError("Try to generate data with untrained Re.")
 
 
-        data_min = self.dataSet.data1
-
         ## roughly claculate the upper bound of the synthetic samples to be generated from each neighbourhood
         ## roughly claculate the upper bound of the synthetic samples to be generated from each neighbourhood
-        synth_num = (numOfSamples // len(data_min)) + 1
+        synth_num = (numOfSamples // self.minSetSize) + 1
 
 
         ## generate synth_num synthetic samples from each minority neighbourhood
         ## generate synth_num synthetic samples from each minority neighbourhood
         synth_set=[]
         synth_set=[]
-        for i in range(len(data_min)):
+        for i in range(self.minSetSize):
             synth_set.extend(self._generate_data_for_min_point(i, synth_num))
             synth_set.extend(self._generate_data_for_min_point(i, synth_num))
 
 
         ## extract the exact number of synthetic samples needed to exactly balance the two classes
         ## extract the exact number of synthetic samples needed to exactly balance the two classes
@@ -133,6 +133,10 @@ class ConvGAN(GanBaseClass):
 
 
         return synth_set
         return synth_set
 
 
+    def predictReal(self, data):
+        prediction = self.maj_min_discriminator.predict(data)
+        return np.array([x[0] for x in prediction])
+
     # ###############################################################
     # ###############################################################
     # Hidden internal functions
     # Hidden internal functions
     # ###############################################################
     # ###############################################################
@@ -200,6 +204,7 @@ class ConvGAN(GanBaseClass):
         ## passed through two dense layers
         ## passed through two dense layers
         y = Dense(250, activation='relu')(samples)
         y = Dense(250, activation='relu')(samples)
         y = Dense(125, activation='relu')(y)
         y = Dense(125, activation='relu')(y)
+        y = Dense(75, activation='relu')(y)
         
         
         ## two output nodes. outputs have to be one-hot coded (see labels variable before)
         ## two output nodes. outputs have to be one-hot coded (see labels variable before)
         output = Dense(2, activation='sigmoid')(y)
         output = Dense(2, activation='sigmoid')(y)
@@ -265,7 +270,7 @@ class ConvGAN(GanBaseClass):
         runs = int(synth_num / self.neb) + 1
         runs = int(synth_num / self.neb) + 1
         synth_set = []
         synth_set = []
         for _run in range(runs):
         for _run in range(runs):
-            batch = self._NMB_guided(index)
+            batch = self.nmbMin.getNbhPointsOfItem(index)
             synth_batch = self.conv_sample_generator.predict(batch)
             synth_batch = self.conv_sample_generator.predict(batch)
             synth_set.extend(synth_batch)
             synth_set.extend(synth_batch)
 
 
@@ -274,52 +279,73 @@ class ConvGAN(GanBaseClass):
 
 
 
 
     # Training
     # Training
-    def _rough_learning(self, data_min, data_maj):
+    def _rough_learning(self, data_min, data_maj, discTrainCount):
         generator = self.conv_sample_generator
         generator = self.conv_sample_generator
         discriminator = self.maj_min_discriminator
         discriminator = self.maj_min_discriminator
         GAN = self.cg
         GAN = self.cg
         loss_history = [] ## this is for stroring the loss for every run
         loss_history = [] ## this is for stroring the loss for every run
-        min_idx = 0
-        neb_epoch_count = 1
+        step = 0
+        minSetSize = len(data_min)
 
 
         labels = tf.convert_to_tensor(create01Labels(2 * self.gen, self.gen))
         labels = tf.convert_to_tensor(create01Labels(2 * self.gen, self.gen))
 
 
-        for step in range(self.neb_epochs * len(data_min)):
-            ## generate minority neighbourhood batch for every minority class sampls by index
-            min_batch = self._NMB_guided(min_idx)
-            min_idx = min_idx + 1
-            ## generate random proximal majority batch
-            maj_batch = self._BMB(data_min, data_maj)
-
-            ## generate synthetic samples from convex space
-            ## of minority neighbourhood batch using generator
-            conv_samples = generator.predict(min_batch)
-            ## concatenate them with the majority batch
-            concat_sample = tf.concat([conv_samples, maj_batch], axis=0)
-
-            ## switch on discriminator training
-            discriminator.trainable = True
-            ## train the discriminator with the concatenated samples and the one-hot encoded labels
-            discriminator.fit(x=concat_sample, y=labels, verbose=0)
-            ## switch off the discriminator training again
-            discriminator.trainable = False
-
-            ## use the GAN to make the generator learn on the decisions
-            ## made by the previous discriminator training
-            ##- print(f"concat sample shape: {concat_sample.shape}/{labels.shape}")
-            gan_loss_history = GAN.fit(concat_sample, y=labels, verbose=0)
-
-            ## store the loss for the step
-            loss_history.append(gan_loss_history.history['loss'])
-
-            if self.debug and ((step + 1) % 10 == 0):
-                print(f"{step + 1} neighbourhood batches trained; running neighbourhood epoch {neb_epoch_count}")
-
-            if min_idx == len(data_min) - 1:
-                if self.debug:
-                    print(f"Neighbourhood epoch {neb_epoch_count} complete")
-                neb_epoch_count = neb_epoch_count + 1
-                min_idx = 0
+        for neb_epoch_count in range(self.neb_epochs):
+            if discTrainCount > 0:
+                for n in range(discTrainCount):
+                    for min_idx in range(minSetSize):
+                        ## generate minority neighbourhood batch for every minority class sampls by index
+                        min_batch_indices = self.nmbMin.neighbourhoodOfItem(min_idx)
+                        min_batch = self.nmbMin.getPointsFromIndices(min_batch_indices)
+                        ## generate random proximal majority batch
+                        maj_batch = self._BMB(data_maj, min_batch_indices)
+
+                        ## generate synthetic samples from convex space
+                        ## of minority neighbourhood batch using generator
+                        conv_samples = generator.predict(min_batch)
+                        ## concatenate them with the majority batch
+                        concat_sample = tf.concat([conv_samples, maj_batch], axis=0)
+
+                        ## switch on discriminator training
+                        discriminator.trainable = True
+                        ## train the discriminator with the concatenated samples and the one-hot encoded labels
+                        discriminator.fit(x=concat_sample, y=labels, verbose=0)
+                        ## switch off the discriminator training again
+                        discriminator.trainable = False
+
+            for min_idx in range(minSetSize):
+                ## generate minority neighbourhood batch for every minority class sampls by index
+                min_batch_indices = self.nmbMin.neighbourhoodOfItem(min_idx)
+                min_batch = self.nmbMin.getPointsFromIndices(min_batch_indices)
+                ## generate random proximal majority batch
+                maj_batch = self._BMB(data_maj, min_batch_indices)
+
+                ## generate synthetic samples from convex space
+                ## of minority neighbourhood batch using generator
+                conv_samples = generator.predict(min_batch)
+                ## concatenate them with the majority batch
+                concat_sample = tf.concat([conv_samples, maj_batch], axis=0)
+
+                ## switch on discriminator training
+                discriminator.trainable = True
+                ## train the discriminator with the concatenated samples and the one-hot encoded labels
+                discriminator.fit(x=concat_sample, y=labels, verbose=0)
+                ## switch off the discriminator training again
+                discriminator.trainable = False
+
+                ## use the GAN to make the generator learn on the decisions
+                ## made by the previous discriminator training
+                ##- print(f"concat sample shape: {concat_sample.shape}/{labels.shape}")
+                gan_loss_history = GAN.fit(concat_sample, y=labels, verbose=0)
+
+                ## store the loss for the step
+                loss_history.append(gan_loss_history.history['loss'])
+
+                step += 1
+                if self.debug and (step % 10 == 0):
+                    print(f"{step} neighbourhood batches trained; running neighbourhood epoch {neb_epoch_count}")
+
+            if self.debug:
+                print(f"Neighbourhood epoch {neb_epoch_count + 1} complete")
 
 
         if self.debug:
         if self.debug:
             run_range = range(1, len(loss_history) + 1)
             run_range = range(1, len(loss_history) + 1)
@@ -340,37 +366,16 @@ class ConvGAN(GanBaseClass):
 
 
 
 
     ## convGAN
     ## convGAN
-    def _BMB(self, data_min, data_maj):
+    def _BMB(self, data_maj, min_idxs):
 
 
         ## Generate a borderline majority batch
         ## Generate a borderline majority batch
-        ## data_min -> minority class data
         ## data_maj -> majority class data
         ## data_maj -> majority class data
-        ## neb -> oversampling neighbourhood
+        ## min_idxs -> indices of points in minority class
         ## gen -> convex combinations generated from each neighbourhood
         ## gen -> convex combinations generated from each neighbourhood
 
 
-        return tf.convert_to_tensor(
-            data_maj[np.random.randint(len(data_maj), size=self.gen)]
-            )
-
-    def _NMB_prepare(self, data_min):
-        neigh = NNSearch(self.neb)
-        neigh.fit(data_min)
-        return (data_min, neigh)
-
-
-    def _NMB_guided(self, index):
-
-        ## generate a minority neighbourhood batch for a particular minority sample
-        ## we need this for minority data generation
-        ## we will generate synthetic samples for each training data neighbourhood
-        ## index -> index of the minority sample in a training data whose neighbourhood we want to obtain
-        ## data_min -> minority class data
-        ## neb -> oversampling neighbourhood
-        (data_min, neigh) = self.nmb
-
-        nmbi = np.array([neigh.neighbourhoodOfItem(index)])
-        nmbi = shuffle(nmbi)
-        nmb = data_min[nmbi]
-        nmb = tf.convert_to_tensor(nmb[0])
-        return nmb
-
+        if self.nmbMaj is not None:
+            return self.nmbMaj.neighbourhoodOfItemList(min_idxs, maxCount=self.gen)
+        else:
+            return tf.convert_to_tensor(
+                data_maj[np.random.randint(len(data_maj), size=self.gen)]
+                )

+ 15 - 0
library/interfaces.py

@@ -1,6 +1,7 @@
 """
 """
 This module contains used interfaces for testing the Generative Adversarial Networks.
 This module contains used interfaces for testing the Generative Adversarial Networks.
 """
 """
+import numpy as np
 
 
 
 
 class GanBaseClass:
 class GanBaseClass:
@@ -13,6 +14,7 @@ class GanBaseClass:
         """
         """
         Initializes the class.
         Initializes the class.
         """
         """
+        self.canPredict = False
 
 
     def reset(self):
     def reset(self):
         """
         """
@@ -39,3 +41,16 @@ class GanBaseClass:
         *numOfSamples* is an integer > 0. It gives the number of generated samples.
         *numOfSamples* is an integer > 0. It gives the number of generated samples.
         """
         """
         raise NotImplementedError
         raise NotImplementedError
+
+    def predict(self, data, limit=0.5):
+        """
+        Takes a list (numpy array) of data points.
+        Returns a list with real values in [0,1] for the propapility
+        that a point is in the minority dataset. With:
+          0.0: point is in majority set
+          1.0: point is in minority set
+        """
+        return np.array([max(0, min(1, int(x + 1.0 - limit))) for x in self.predictReal(data)])
+
+    def predictReal(self, data):
+        raise NotImplemented

+ 4 - 5
library/testers.py

@@ -182,9 +182,7 @@ def knn(ttd):
     checkType(ttd)
     checkType(ttd)
     knnTester = KNeighborsClassifier(n_neighbors=10)
     knnTester = KNeighborsClassifier(n_neighbors=10)
     knnTester.fit(ttd.train.data, ttd.train.labels)
     knnTester.fit(ttd.train.data, ttd.train.labels)
-
-    prediction = knnTester.predict(ttd.test.data)
-    return TestResult("KNN", ttd.test.labels, prediction)
+    return runTester(ttd, knnTester, "KNN")
 
 
 
 
 def gb(ttd):
 def gb(ttd):
@@ -197,10 +195,11 @@ def gb(ttd):
     checkType(ttd)
     checkType(ttd)
     tester = GradientBoostingClassifier()
     tester = GradientBoostingClassifier()
     tester.fit(ttd.train.data, ttd.train.labels)
     tester.fit(ttd.train.data, ttd.train.labels)
+    return runTester(ttd, tester, "GB")
 
 
+def runTester(ttd, tester, name="GAN"):
     prediction = tester.predict(ttd.test.data)
     prediction = tester.predict(ttd.test.data)
-    return TestResult("GB", ttd.test.labels, prediction)
-
+    return TestResult(name, ttd.test.labels, prediction)
 
 
 def checkType(t):
 def checkType(t):
     if str(type(t)) == "<class 'numpy.ndarray'>":
     if str(type(t)) == "<class 'numpy.ndarray'>":

+ 4 - 4
run_all_exercises.ipynb

@@ -20,14 +20,14 @@
    "outputs": [],
    "outputs": [],
    "source": [
    "source": [
     "for dataset in testSets:\n",
     "for dataset in testSets:\n",
-    "    for f in generators:\n",
-    "        runExercise(dataset, None, name, f)"
+    "    for name in generators.keys():\n",
+    "        runExercise(dataset, None, name, generators[name])"
    ]
    ]
   }
   }
  ],
  ],
  "metadata": {
  "metadata": {
   "kernelspec": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "language": "python",
    "name": "python3"
    "name": "python3"
   },
   },
@@ -41,7 +41,7 @@
    "name": "python",
    "name": "python",
    "nbconvert_exporter": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "pygments_lexer": "ipython3",
-   "version": "3.8.5"
+   "version": "3.9.7"
   }
   }
  },
  },
  "nbformat": 4,
  "nbformat": 4,

+ 2 - 2
statistics.py

@@ -10,8 +10,8 @@ f1Score = "f1 score"
 
 
 ignoreSet = ["yeast_me2"]
 ignoreSet = ["yeast_me2"]
 
 
-gans = [g[0] for g in generators]
-algs = {"LR", "GB", "KNN"}
+gans = generators.keys()
+algs = {"LR", "GB", "KNN", "GAN"}
 
 
 dataset  = [
 dataset  = [
     "folding_abalone9-18",
     "folding_abalone9-18",

Некоторые файлы не были показаны из-за большого количества измененных файлов