4 سال پیش · f520ffdea3
--- a/convGAN-predict.ipynb
+++ b/convGAN-predict.ipynb
--- a/library/analysis.py
+++ b/library/analysis.py
@@ -260,14 +260,14 @@ def runAllTestSets(dataSetList):
 
				 
			
 
				 
			
 
				 
			
 
				-generators = [ ("ProWRAS",       lambda _data: ProWRAS())
			
 
				-             , ("Repeater",      lambda _data: Repeater())
			
 
				-             #, ("SpheredNoise",  lambda _data: SpheredNoise())
			
 
				-             , ("SimpleGAN",     lambda data: SimpleGan(numOfFeatures=data.data0.shape[1]))
			
 
				-             , ("ctGAN",         lambda data: CtGAN(data.data0.shape[1]))
			
 
				-             , ("CTAB-GAN",      lambda _data: CtabGan())
			
 
				-             , ("convGAN",       lambda data: ConvGAN(data.data0.shape[1], neb=5, gen=5))
			
 
				-             , ("convGAN-full",  lambda data: ConvGAN(data.data0.shape[1], neb=data.data0.shape[1], gen=data.data0.shape[1]))
			
 
				-             , ("convGAN-proximary-5",       lambda data: ConvGAN(data.data0.shape[1], neb=5, gen=5, withMajorhoodNbSearch=True))
			
 
				-             , ("convGAN-proxymary-full",  lambda data: ConvGAN(data.data0.shape[1], neb=data.data0.shape[1], gen=data.data0.shape[1], withMajorhoodNbSearch=True))
			
 
				-             ]
			
 
				+generators = { "ProWRAS":                 lambda _data: ProWRAS()
			
 
				+             , "Repeater":                lambda _data: Repeater()
			
 
				+             , "SpheredNoise":            lambda _data: SpheredNoise()
			
 
				+             , "SimpleGAN":               lambda data: SimpleGan(numOfFeatures=data.data0.shape[1])
			
 
				+             , "ctGAN":                   lambda data: CtGAN(data.data0.shape[1])
			
 
				+             , "CTAB-GAN":                lambda _data: CtabGan()
			
 
				+             , "convGAN":                 lambda data: ConvGAN(data.data0.shape[1], neb=5, gen=5)
			
 
				+             , "convGAN-full":            lambda data: ConvGAN(data.data0.shape[1], neb=data.data0.shape[1], gen=data.data0.shape[1])
			
 
				+             , "convGAN-proximary-5":     lambda data: ConvGAN(data.data0.shape[1], neb=5, gen=5, withMajorhoodNbSearch=True)
			
 
				+             , "convGAN-proxymary-full":  lambda data: ConvGAN(data.data0.shape[1], neb=data.data0.shape[1], gen=data.data0.shape[1], withMajorhoodNbSearch=True)
			
 
				+             }
			
--- a/library/exercise.py
+++ b/library/exercise.py
@@ -11,7 +11,7 @@ from sklearn.preprocessing import StandardScaler
 
				 import matplotlib.pyplot as plt
			
 
				 
			
 
				 from library.dataset import DataSet, TrainTestData
			
 
				-from library.testers import lr,knn, gb, TestResult
			
 
				+from library.testers import lr, knn, gb, TestResult, runTester
			
 
				 
			
 
				 
			
 
				 class Exercise:
			
@@ -85,6 +85,9 @@ class Exercise:
 
				         # Reset results array.
			
 
				         self.results = { name: [] for name in self.testFunctions }
			
 
				 
			
 
				+        if gan.canPredict and "GAN" not in self.testFunctions.keys():
			
 
				+            self.results["GAN"] = []
			
 
				+
			
 
				         # If a shuffle function is given then shuffle the data before the
			
 
				         # exercise starts.
			
 
				         if self.shuffleFunction is not None:
			
@@ -184,6 +187,12 @@ class Exercise:
 
				 
			
 
				         # Test this dataset with every given test-function.
			
 
				         # The results are printed out and stored to the results dictionary.
			
 
				+        if gan.canPredict and "GAN" not in self.testFunctions.keys():
			
 
				+            self.debug(f"-> test with GAN.predict")
			
 
				+            testResult = runTester(dataSlice, gan)
			
 
				+            self.debug(str(testResult))
			
 
				+            self.results["GAN"].append(testResult)
			
 
				+
			
 
				         for testerName in self.testFunctions:
			
 
				             self.debug(f"-> test with '{testerName}'")
			
 
				             testResult = (self.testFunctions[testerName])(dataSlice)
			
--- a/library/generators/convGAN.py
+++ b/library/generators/convGAN.py
@@ -46,6 +46,7 @@ class ConvGAN(GanBaseClass):
 
				         self.maj_min_discriminator = None
			
 
				         self.withMajorhoodNbSearch = withMajorhoodNbSearch
			
 
				         self.cg = None
			
 
				+        self.canPredict = True
			
 
				 
			
 
				         if neb > gen:
			
 
				             raise ValueError(f"Expected neb <= gen but got neb={neb} and gen={gen}.")
			
@@ -74,7 +75,7 @@ class ConvGAN(GanBaseClass):
 
				             print(self.cg.summary())
			
 
				             print('\n')
			
 
				 
			
 
				-    def train(self, dataSet):
			
 
				+    def train(self, dataSet, discTrainCount=5):
			
 
				         """
			
 
				         Trains the GAN.
			
 
				 
			
@@ -97,7 +98,7 @@ class ConvGAN(GanBaseClass):
 
				             self.nmbMaj = None
			
 
				 
			
 
				         # Do the training.
			
 
				-        self._rough_learning(dataSet.data1, dataSet.data0)
			
 
				+        self._rough_learning(dataSet.data1, dataSet.data0, discTrainCount)
			
 
				         
			
 
				         # Neighborhood in majority class is no longer needed. So save memory.
			
 
				         self.nmbMaj = None
			
@@ -132,6 +133,10 @@ class ConvGAN(GanBaseClass):
 
				 
			
 
				         return synth_set
			
 
				 
			
 
				+    def predictReal(self, data):
			
 
				+        prediction = self.maj_min_discriminator.predict(data)
			
 
				+        return np.array([x[0] for x in prediction])
			
 
				+
			
 
				     # ###############################################################
			
 
				     # Hidden internal functions
			
 
				     # ###############################################################
			
@@ -199,6 +204,7 @@ class ConvGAN(GanBaseClass):
 
				         ## passed through two dense layers
			
 
				         y = Dense(250, activation='relu')(samples)
			
 
				         y = Dense(125, activation='relu')(y)
			
 
				+        y = Dense(75, activation='relu')(y)
			
 
				         
			
 
				         ## two output nodes. outputs have to be one-hot coded (see labels variable before)
			
 
				         output = Dense(2, activation='sigmoid')(y)
			
@@ -273,53 +279,73 @@ class ConvGAN(GanBaseClass):
 
				 
			
 
				 
			
 
				     # Training
			
 
				-    def _rough_learning(self, data_min, data_maj):
			
 
				+    def _rough_learning(self, data_min, data_maj, discTrainCount):
			
 
				         generator = self.conv_sample_generator
			
 
				         discriminator = self.maj_min_discriminator
			
 
				         GAN = self.cg
			
 
				         loss_history = [] ## this is for stroring the loss for every run
			
 
				-        min_idx = 0
			
 
				-        neb_epoch_count = 1
			
 
				+        step = 0
			
 
				+        minSetSize = len(data_min)
			
 
				 
			
 
				         labels = tf.convert_to_tensor(create01Labels(2 * self.gen, self.gen))
			
 
				 
			
 
				-        for step in range(self.neb_epochs * len(data_min)):
			
 
				-            ## generate minority neighbourhood batch for every minority class sampls by index
			
 
				-            min_batch_indices = self.nmbMin.neighbourhoodOfItem(min_idx)
			
 
				-            min_batch = self.nmbMin.getPointsFromIndices(min_batch_indices)
			
 
				-            min_idx = min_idx + 1
			
 
				-            ## generate random proximal majority batch
			
 
				-            maj_batch = self._BMB(data_maj, min_batch_indices)
			
 
				-
			
 
				-            ## generate synthetic samples from convex space
			
 
				-            ## of minority neighbourhood batch using generator
			
 
				-            conv_samples = generator.predict(min_batch)
			
 
				-            ## concatenate them with the majority batch
			
 
				-            concat_sample = tf.concat([conv_samples, maj_batch], axis=0)
			
 
				-
			
 
				-            ## switch on discriminator training
			
 
				-            discriminator.trainable = True
			
 
				-            ## train the discriminator with the concatenated samples and the one-hot encoded labels
			
 
				-            discriminator.fit(x=concat_sample, y=labels, verbose=0)
			
 
				-            ## switch off the discriminator training again
			
 
				-            discriminator.trainable = False
			
 
				-
			
 
				-            ## use the GAN to make the generator learn on the decisions
			
 
				-            ## made by the previous discriminator training
			
 
				-            ##- print(f"concat sample shape: {concat_sample.shape}/{labels.shape}")
			
 
				-            gan_loss_history = GAN.fit(concat_sample, y=labels, verbose=0)
			
 
				-
			
 
				-            ## store the loss for the step
			
 
				-            loss_history.append(gan_loss_history.history['loss'])
			
 
				-
			
 
				-            if self.debug and ((step + 1) % 10 == 0):
			
 
				-                print(f"{step + 1} neighbourhood batches trained; running neighbourhood epoch {neb_epoch_count}")
			
 
				-
			
 
				-            if min_idx == len(data_min) - 1:
			
 
				-                if self.debug:
			
 
				-                    print(f"Neighbourhood epoch {neb_epoch_count} complete")
			
 
				-                neb_epoch_count = neb_epoch_count + 1
			
 
				-                min_idx = 0
			
 
				+        for neb_epoch_count in range(self.neb_epochs):
			
 
				+            if discTrainCount > 0:
			
 
				+                for n in range(discTrainCount):
			
 
				+                    for min_idx in range(minSetSize):
			
 
				+                        ## generate minority neighbourhood batch for every minority class sampls by index
			
 
				+                        min_batch_indices = self.nmbMin.neighbourhoodOfItem(min_idx)
			
 
				+                        min_batch = self.nmbMin.getPointsFromIndices(min_batch_indices)
			
 
				+                        ## generate random proximal majority batch
			
 
				+                        maj_batch = self._BMB(data_maj, min_batch_indices)
			
 
				+
			
 
				+                        ## generate synthetic samples from convex space
			
 
				+                        ## of minority neighbourhood batch using generator
			
 
				+                        conv_samples = generator.predict(min_batch)
			
 
				+                        ## concatenate them with the majority batch
			
 
				+                        concat_sample = tf.concat([conv_samples, maj_batch], axis=0)
			
 
				+
			
 
				+                        ## switch on discriminator training
			
 
				+                        discriminator.trainable = True
			
 
				+                        ## train the discriminator with the concatenated samples and the one-hot encoded labels
			
 
				+                        discriminator.fit(x=concat_sample, y=labels, verbose=0)
			
 
				+                        ## switch off the discriminator training again
			
 
				+                        discriminator.trainable = False
			
 
				+
			
 
				+            for min_idx in range(minSetSize):
			
 
				+                ## generate minority neighbourhood batch for every minority class sampls by index
			
 
				+                min_batch_indices = self.nmbMin.neighbourhoodOfItem(min_idx)
			
 
				+                min_batch = self.nmbMin.getPointsFromIndices(min_batch_indices)
			
 
				+                ## generate random proximal majority batch
			
 
				+                maj_batch = self._BMB(data_maj, min_batch_indices)
			
 
				+
			
 
				+                ## generate synthetic samples from convex space
			
 
				+                ## of minority neighbourhood batch using generator
			
 
				+                conv_samples = generator.predict(min_batch)
			
 
				+                ## concatenate them with the majority batch
			
 
				+                concat_sample = tf.concat([conv_samples, maj_batch], axis=0)
			
 
				+
			
 
				+                ## switch on discriminator training
			
 
				+                discriminator.trainable = True
			
 
				+                ## train the discriminator with the concatenated samples and the one-hot encoded labels
			
 
				+                discriminator.fit(x=concat_sample, y=labels, verbose=0)
			
 
				+                ## switch off the discriminator training again
			
 
				+                discriminator.trainable = False
			
 
				+
			
 
				+                ## use the GAN to make the generator learn on the decisions
			
 
				+                ## made by the previous discriminator training
			
 
				+                ##- print(f"concat sample shape: {concat_sample.shape}/{labels.shape}")
			
 
				+                gan_loss_history = GAN.fit(concat_sample, y=labels, verbose=0)
			
 
				+
			
 
				+                ## store the loss for the step
			
 
				+                loss_history.append(gan_loss_history.history['loss'])
			
 
				+
			
 
				+                step += 1
			
 
				+                if self.debug and (step % 10 == 0):
			
 
				+                    print(f"{step} neighbourhood batches trained; running neighbourhood epoch {neb_epoch_count}")
			
 
				+
			
 
				+            if self.debug:
			
 
				+                print(f"Neighbourhood epoch {neb_epoch_count + 1} complete")
			
 
				 
			
 
				         if self.debug:
			
 
				             run_range = range(1, len(loss_history) + 1)
			
--- a/library/interfaces.py
+++ b/library/interfaces.py
@@ -1,6 +1,7 @@
 
				 """
			
 
				 This module contains used interfaces for testing the Generative Adversarial Networks.
			
 
				 """
			
 
				+import numpy as np
			
 
				 
			
 
				 
			
 
				 class GanBaseClass:
			
@@ -13,6 +14,7 @@ class GanBaseClass:
 
				         """
			
 
				         Initializes the class.
			
 
				         """
			
 
				+        self.canPredict = False
			
 
				 
			
 
				     def reset(self):
			
 
				         """
			
@@ -39,3 +41,16 @@ class GanBaseClass:
 
				         *numOfSamples* is an integer > 0. It gives the number of generated samples.
			
 
				         """
			
 
				         raise NotImplementedError
			
 
				+
			
 
				+    def predict(self, data, limit=0.5):
			
 
				+        """
			
 
				+        Takes a list (numpy array) of data points.
			
 
				+        Returns a list with real values in [0,1] for the propapility
			
 
				+        that a point is in the minority dataset. With:
			
 
				+          0.0: point is in majority set
			
 
				+          1.0: point is in minority set
			
 
				+        """
			
 
				+        return np.array([max(0, min(1, int(x + 1.0 - limit))) for x in self.predictReal(data)])
			
 
				+
			
 
				+    def predictReal(self, data):
			
 
				+        raise NotImplemented
			
--- a/library/testers.py
+++ b/library/testers.py
@@ -182,9 +182,7 @@ def knn(ttd):
 
				     checkType(ttd)
			
 
				     knnTester = KNeighborsClassifier(n_neighbors=10)
			
 
				     knnTester.fit(ttd.train.data, ttd.train.labels)
			
 
				-
			
 
				-    prediction = knnTester.predict(ttd.test.data)
			
 
				-    return TestResult("KNN", ttd.test.labels, prediction)
			
 
				+    return runTester(ttd, knnTester, "KNN")
			
 
				 
			
 
				 
			
 
				 def gb(ttd):
			
@@ -197,10 +195,11 @@ def gb(ttd):
 
				     checkType(ttd)
			
 
				     tester = GradientBoostingClassifier()
			
 
				     tester.fit(ttd.train.data, ttd.train.labels)
			
 
				+    return runTester(ttd, tester, "GB")
			
 
				 
			
 
				+def runTester(ttd, tester, name="GAN"):
			
 
				     prediction = tester.predict(ttd.test.data)
			
 
				-    return TestResult("GB", ttd.test.labels, prediction)
			
 
				-
			
 
				+    return TestResult(name, ttd.test.labels, prediction)
			
 
				 
			
 
				 def checkType(t):
			
 
				     if str(type(t)) == "<class 'numpy.ndarray'>":
			
--- a/run_all_exercises.ipynb
+++ b/run_all_exercises.ipynb
@@ -20,14 +20,14 @@
 
				    "outputs": [],
			
 
				    "source": [
			
 
				     "for dataset in testSets:\n",
			
 
				-    "    for f in generators:\n",
			
 
				-    "        runExercise(dataset, None, name, f)"
			
 
				+    "    for name in generators.keys():\n",
			
 
				+    "        runExercise(dataset, None, name, generators[name])"
			
 
				    ]
			
 
				   }
			
 
				  ],
			
 
				  "metadata": {
			
 
				   "kernelspec": {
			
 
				-   "display_name": "Python 3",
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				    "language": "python",
			
 
				    "name": "python3"
			
 
				   },
			
@@ -41,7 +41,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.8.5"
			
 
				+   "version": "3.9.7"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/statistics.py
+++ b/statistics.py
@@ -10,8 +10,8 @@ f1Score = "f1 score"
 
				 
			
 
				 ignoreSet = ["yeast_me2"]
			
 
				 
			
 
				-gans = [g[0] for g in generators]
			
 
				-algs = {"LR", "GB", "KNN"}
			
 
				+gans = generators.keys()
			
 
				+algs = {"LR", "GB", "KNN", "GAN"}
			
 
				 
			
 
				 dataset  = [
			
 
				     "folding_abalone9-18",