4 سال پیش · 90d7ab576b
--- a/Exercise.ipynb
+++ b/Exercise.ipynb
--- a/library/GanExamples.py
+++ b/library/GanExamples.py
@@ -0,0 +1,158 @@
 
															+"""
														
 
															+This module contains some example Generative Adversarial Networks for testing.
														
 
															+
														
 
															+The classes StupidToyPointGan and StupidToyListGan are not really Networks. This classes are used
														
 
															+for testing the interface. Hope your actually GAN will perform better than this two.
														
 
															+
														
 
															+The class SimpleGan is a simple standard Generative Adversarial Network.
														
 
															+"""
														
 
															+
														
 
															+
														
 
															+import numpy as np
														
 
															+
														
 
															+from library.interfaces import GanBaseClass
														
 
															+
														
 
															+
														
 
															+class StupidToyPointGan(GanBaseClass):
														
 
															+    """
														
 
															+    This is a toy example of a GAN.
														
 
															+    It repeats the first point of the training-data-set.
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self):
														
 
															+        """
														
 
															+        Initializes the class and mark it as untrained.
														
 
															+        """
														
 
															+        self.isTrained = False
														
 
															+        self.exampleItem = None
														
 
															+
														
 
															+    def train(self, dataSet):
														
 
															+        """
														
 
															+        Trains the GAN.
														
 
															+
														
 
															+        It stores the first data-point in the training data-set and mark the GAN as trained.
														
 
															+
														
 
															+        *dataSet* is a instance of /library.dataset.DataSet/. It contains the training dataset.
														
 
															+        We are only interested in the class 1.
														
 
															+        """
														
 
															+        if dataSet.data1.shape[0] <= 0:
														
 
															+            raise AttributeError("Train GAN: Expected data class 1 to contain at least one point.")
														
 
															+
														
 
															+        self.isTrained = True
														
 
															+        self.exampleItem = dataSet.data1[0].copy()
														
 
															+
														
 
															+    def generateDataPoint(self):
														
 
															+        """
														
 
															+        Generates one synthetic data-point by copying the stored data point.
														
 
															+        """
														
 
															+        if not self.isTrained:
														
 
															+            raise ValueError("Try to generate data with untrained GAN.")
														
 
															+
														
 
															+        return self.exampleItem
														
 
															+
														
 
															+    def generateData(self, numOfSamples=1):
														
 
															+        """
														
 
															+        Generates a list of synthetic data-points.
														
 
															+
														
 
															+        *numOfSamples* is a integer > 0. It gives the number of new generated samples.
														
 
															+        """
														
 
															+        numOfSamples = int(numOfSamples)
														
 
															+        if numOfSamples < 1:
														
 
															+            raise AttributeError("Expected numOfSamples to be > 0")
														
 
															+
														
 
															+        return np.array([self.generateDataPoint() for _ in range(numOfSamples)])
														
 
															+
														
 
															+
														
 
															+class StupidToyListGan(GanBaseClass):
														
 
															+    """
														
 
															+    This is a toy example of a GAN.
														
 
															+    It repeats the first point of the training-data-set.
														
 
															+    """
														
 
															+    def __init__(self, maxListSize=100):
														
 
															+        self.isTrained = False
														
 
															+        self.exampleItems = None
														
 
															+        self.nextIndex = 0
														
 
															+        self.maxListSize = int(maxListSize)
														
 
															+        if self.maxListSize < 1:
														
 
															+            raise AttributeError("Expected maxListSize to be > 0 but got " + str(self.maxListSize))
														
 
															+
														
 
															+
														
 
															+    def train(self, dataSet):
														
 
															+        """
														
 
															+        Trains the GAN.
														
 
															+
														
 
															+        It stores the first data-point in the training data-set and mark the GAN as trained.
														
 
															+
														
 
															+        *dataSet* is a instance of /library.dataset.DataSet/. It contains the training dataset.
														
 
															+        We are only interested in the first *maxListSize* points in class 1.
														
 
															+        """
														
 
															+        if dataSet.data1.shape[0] <= 0:
														
 
															+            raise AttributeError("Train GAN: Expected data class 1 to contain at least one point.")
														
 
															+
														
 
															+        self.isTrained = True
														
 
															+        self.exampleItems = dataSet.data1[: self.maxListSize].copy()
														
 
															+
														
 
															+    def generateDataPoint(self):
														
 
															+        """
														
 
															+        Returns one synthetic data point by repeating the stored list.
														
 
															+        """
														
 
															+        if not self.isTrained:
														
 
															+            raise ValueError("Try to generate data with untrained GAN.")
														
 
															+
														
 
															+        i = self.nextIndex
														
 
															+        self.nextIndex += 1
														
 
															+        if self.nextIndex >= self.exampleItems.shape[0]:
														
 
															+            self.nextIndex = 0
														
 
															+
														
 
															+        return self.exampleItems[i]
														
 
															+
														
 
															+
														
 
															+    def generateData(self, numOfSamples=1):
														
 
															+        """
														
 
															+        Generates a list of synthetic data-points.
														
 
															+
														
 
															+        *numOfSamples* is a integer > 0. It gives the number of new generated samples.
														
 
															+        """
														
 
															+        numOfSamples = int(numOfSamples)
														
 
															+        if numOfSamples < 1:
														
 
															+            raise AttributeError("Expected numOfSamples to be > 0")
														
 
															+
														
 
															+        return np.array([self.generateDataPoint() for _ in range(numOfSamples)])
														
 
															+
														
 
															+
														
 
															+# class SimpleGan(GanBaseClass):
														
 
															+#     def __init__(self, maxListSize=100):
														
 
															+#         self.isTrained = False
														
 
															+#         self.exampleItems = None
														
 
															+#         self.nextIndex = 0
														
 
															+#         self.maxListSize = int(maxListSize)
														
 
															+#         if self.maxListSize < 1:
														
 
															+#             raise AttributeError(f"Expected maxListSize to be > 0 but got {self.maxListSize}")
														
 
															+#
														
 
															+#
														
 
															+#     def train(self, dataSet):
														
 
															+#         if dataSet.data1.shape[0] <= 0:
														
 
															+#             raise AttributeError("Train GAN: Expected data class 1 to contain at least one point.")
														
 
															+#
														
 
															+#         self.isTrained = True
														
 
															+#         self.exampleItems = dataSet.data1[: self.maxListSize].copy()
														
 
															+#
														
 
															+#     def generateDataPoint(self, numOfSamples=1):
														
 
															+#         if not self.isTrained:
														
 
															+#             raise ValueError("Try to generate data with untrained GAN.")
														
 
															+#
														
 
															+#         i = self.nextIndex
														
 
															+#         self.nextIndex += 1
														
 
															+#         if self.nextIndex >= self.exampleItems.shape[0]:
														
 
															+#             self.nextIndex = 0
														
 
															+#
														
 
															+#         return self.exampleItems[i]
														
 
															+#
														
 
															+#
														
 
															+#     def generateData(self, numOfSamples=1):
														
 
															+#         numOfSamples = int(numOfSamples)
														
 
															+#         if numOfSamples < 1:
														
 
															+#             raise AttributeError("Expected numOfSamples to be > 0")
														
 
															+#
														
 
															+#         return np.array([self.generateDataPoint() for _ in range(numOfSamples)])
														
 
															+#
														
--- a/library/dataset.py
+++ b/library/dataset.py
@@ -1,11 +1,29 @@
 
															+"""
														
 
															+This module contains classes to collect data for testing and training.
														
 
															+"""
														
 
															+
														
 
															+
														
 
															 import math
														
 
															 import numpy as np
														
 
															+
														
 
															 class DataSet:
														
 
															     """
														
 
															-    Stores data and Labels.
														
 
															+    This class stores data and labels for a test or training dataset.
														
 
															+
														
 
															+    *data0*, *data1* are instances of /numpy.array/. Containg the data for the class 0 (majority
														
 
															+    class) and the class 1 (minority class).
														
 
															+
														
 
															+    *size0*, *size1* are integers, giving the size of the classes 0 and 1.
														
 
															+
														
 
															+    *data* is an instance of /numpy.array/ containing the combined classes 0 and 1.
														
 
															+
														
 
															+    *labels* is a /numpy.array/ containing the labels for *data*.
														
 
															     """
														
 
															     def __init__(self, data0=None, data1=None):
														
 
															+        """
														
 
															+        Initializes one instance of this class and fills *data* and *labels*.
														
 
															+        """
														
 
															         self.data0 = data0
														
 
															         self.data1 = data1
														
 
															         self.size0 = len(data0) if data0 is not None else 0
														
@@ -24,6 +42,10 @@ class DataSet:
 
															             raise AttributeError("Expected data, data0 or data1 to be a numpy.array")
														
 
															     def shuffleWith(self, shuffleFn):
														
 
															+        """
														
 
															+        Shuffles the points in the classes 0 and 1 with the given function
														
 
															+        (numpy.array -> numpy.array). After that the *data* array will be regenerated.
														
 
															+        """
														
 
															         if self.data0 is not None:
														
 
															             self.data0 = shuffleFn(self.data0)
														
@@ -38,33 +60,68 @@ class DataSet:
 
															             self.data = np.concatenate((self.data1, self.data0))
														
 
															     def labels0(self):
														
 
															+        """
														
 
															+        Returns a /numpy.array/ with labels for class0.
														
 
															+        """
														
 
															         return np.zeros(self.size0)
														
 
															     def labels1(self):
														
 
															+        """
														
 
															+        Returns a /numpy.array/ with labels for class1.
														
 
															+        """
														
 
															         return np.zeros(self.size1) + 1
														
 
															 class TrainTestData:
														
 
															     """
														
 
															-    Stores features, data and labels for class 0 and class 1.
														
 
															+    Stores data and labels for class 0 and class 1.
														
 
															+
														
 
															+    *train* is a /DataSet/ containing the data for training.
														
 
															+
														
 
															+    *test* is a /DataSet/ containing the data for testing.
														
 
															     """
														
 
															     def __init__(self, train, test):
														
 
															+        """
														
 
															+        Initializes a new instance for this class and stores the given data.
														
 
															+        """
														
 
															         self.train = train
														
 
															         self.test = test
														
 
															-    @staticmethod
														
 
															-    def splitUpData(data, trainFactor=0.9):
														
 
															-        size = len(data)
														
 
															-        trainSize = math.ceil(size * trainFactor)
														
 
															-        trn = data[list(range(0, trainSize))]
														
 
															-        tst = data[list(range(trainSize, size))]
														
 
															-        return trn, tst
														
 
															-
														
 
															     @classmethod
														
 
															-    def splitDataByFactor(cls, features0, features1, trainFactor=0.9):
														
 
															-        features_0_trn, features_0_tst = cls.splitUpData(features0, trainFactor)
														
 
															-        features_1_trn, features_1_tst = cls.splitUpData(features1, trainFactor)
														
 
															+    def splitDataByFactor(cls, features0, features1, factor=0.9):
														
 
															+        """
														
 
															+        Creates a new instance of this class.
														
 
															+
														
 
															+        The first (factor * 100%) percent of the points in the given classes are stored for
														
 
															+        training. The remaining points are stored for testing.
														
 
															+
														
 
															+        *features0* and *features1* are /numpy.array/ instances containing the data for class 0
														
 
															+        and class 1.
														
 
															+
														
 
															+        *factor* is a real number > 0 and < 1 for the spliting point.
														
 
															+        """
														
 
															+
														
 
															+        if factor <= 0.0 or factor >= 1.0:
														
 
															+            raise AttributeError(f"Expected trainFactor to be between 0 and 1 but got {factor}.")
														
 
															+
														
 
															+        # ----------------------------------------------------------------------------------------
														
 
															+        # Supporting function:
														
 
															+        def splitUpData(data):
														
 
															+            """
														
 
															+            Splits a given /numpy.array/ in two /numpy.array/.
														
 
															+            The first array contains (factor * 100%) percent of the data points.
														
 
															+            The second array contains the remaining data points.
														
 
															+            """
														
 
															+            size = len(data)
														
 
															+            trainSize = math.ceil(size * factor)
														
 
															+            trn = data[list(range(0, trainSize))]
														
 
															+            tst = data[list(range(trainSize, size))]
														
 
															+            return trn, tst
														
 
															+        # ----------------------------------------------------------------------------------------
														
 
															+
														
 
															+        features_0_trn, features_0_tst = splitUpData(features0)
														
 
															+        features_1_trn, features_1_tst = splitUpData(features1)
														
 
															         return cls(
														
 
															             test=DataSet(data1=features_1_tst, data0=features_0_tst),
														
@@ -73,8 +130,44 @@ class TrainTestData:
 
															     @classmethod
														
 
															     def splitDataToSlices(cls, bigData, numOfSlices=5):
														
 
															-        data0slices = cls._arrayToSlices(bigData.data0, numOfSlices)
														
 
															-        data1slices = cls._arrayToSlices(bigData.data1, numOfSlices)
														
 
															+        """
														
 
															+        Creates a list of new instance of this class. The list is returned as a generator.
														
 
															+
														
 
															+        The given data is splitted in the given number of slices.
														
 
															+
														
 
															+        *bigData* is an instance of /DataSet/ containing the data to split.
														
 
															+
														
 
															+        *numOfSlices* is the number of generated slices.
														
 
															+        """
														
 
															+
														
 
															+        numOfSlices = int(numOfSlices)
														
 
															+        if numOfSlices < 1:
														
 
															+            raise AttributeError(f"Expected numOfSlices to be positive but got {numOfSlices}")
														
 
															+
														
 
															+        # ----------------------------------------------------------------------------------------
														
 
															+        # Supporting function:
														
 
															+        def arrayToSlices(data):
														
 
															+            """
														
 
															+            Takes a /numpy.array/ and splits it into *numOfSlices* slices.
														
 
															+            A list of the slices will be returned.
														
 
															+            """
														
 
															+            size = len(data)
														
 
															+            if size < numOfSlices:
														
 
															+                raise AttributeError(
														
 
															+                    f"Expected data set to contain at least {numOfSlices} points"
														
 
															+                    + f" but got {size} points."
														
 
															+                    )
														
 
															+
														
 
															+            sliceSize = (size // numOfSlices) + (0 if size % numOfSlices == 0 else 1)
														
 
															+
														
 
															+            return [
														
 
															+                data[n * sliceSize : min(size, (n+1) * sliceSize)]
														
 
															+                for n in range(numOfSlices)
														
 
															+                ]
														
 
															+        # ----------------------------------------------------------------------------------------
														
 
															+
														
 
															+        data0slices = arrayToSlices(bigData.data0)
														
 
															+        data1slices = arrayToSlices(bigData.data1)
														
 
															         for n in range(numOfSlices):
														
 
															             data0 = np.concatenate([data0slices[k] for k in range(numOfSlices) if n != k])
														
@@ -82,19 +175,3 @@ class TrainTestData:
 
															             train = DataSet(data0=data0, data1=data1)
														
 
															             test = DataSet(data0=data0slices[n], data1=data1slices[n])
														
 
															             yield cls(train=train, test=test)
														
 
															-
														
 
															-    @staticmethod
														
 
															-    def _arrayToSlices(data, numOfSlices):
														
 
															-        size = len(data)
														
 
															-        if size < numOfSlices:
														
 
															-            raise AttributeError(
														
 
															-                f"Expected data set to contain at least {numOfSlices} points"
														
 
															-                + f" but got {size} points."
														
 
															-                )
														
 
															-
														
 
															-        sliceSize = (size // numOfSlices) + (0 if size % numOfSlices == 0 else 1)
														
 
															-
														
 
															-        return [
														
 
															-            data[n * sliceSize : min(size, (n+1) * sliceSize)]
														
 
															-            for n in range(numOfSlices)
														
 
															-            ]
														
--- a/library/exercise.py
+++ b/library/exercise.py
@@ -1,23 +1,19 @@
 
															-import numpy as np
														
 
															-import pandas as pd
														
 
															+"""
														
 
															+Class for testing the performance of Generative Adversarial Networks
														
 
															+in generating synthetic samples for datasets with a minority class.
														
 
															+"""
														
 
															-import sklearn
														
 
															-# needed in function lr
														
 
															-from sklearn import metrics
														
 
															-from sklearn.neighbors import KNeighborsClassifier
														
 
															-from sklearn.linear_model import LogisticRegression
														
 
															-from sklearn.metrics import confusion_matrix
														
 
															-from sklearn.metrics import average_precision_score
														
 
															-from sklearn.metrics import f1_score
														
 
															-from sklearn.metrics import balanced_accuracy_score
														
 
															+import numpy as np
														
 
															+import pandas as pd
														
 
															-from sklearn.decomposition import PCA
														
 
															 import seaborn as sns
														
 
															+from sklearn.decomposition import PCA
														
 
															 from sklearn.preprocessing import StandardScaler
														
 
															 import matplotlib.pyplot as plt
														
 
															 from library.dataset import DataSet, TrainTestData
														
 
															+from library.testers import lr, svm, knn
														
 
															 class Exercise:
														
@@ -25,134 +21,167 @@ class Exercise:
 
															     Exercising a test for a minority class extension class.
														
 
															     """
														
 
															-    def __init__(self, testFunctions, shuffleFunction=None, numOfSlices=5, numOfShuffles=5):
														
 
															-        self.numOfSlices = numOfSlices
														
 
															-        self.numOfShuffles = numOfShuffles
														
 
															-        self.testFunctions = testFunctions
														
 
															+    def __init__(self, testFunctions=None, shuffleFunction=None, numOfSlices=5, numOfShuffles=5):
														
 
															+        """
														
 
															+        Creates a instance of this class.
														
 
															+
														
 
															+        *testFunctions* is a dictionary /(String : Function)/ of functions for testing
														
 
															+        a generated dataset. The functions have the signature:
														
 
															+        /(TrainTestData, TrainTestData) -> TestResult/
														
 
															+
														
 
															+        *shuffleFunction* is either None or a function /numpy.array -> numpy.array/
														
 
															+        that shuffles a given array.
														
 
															+
														
 
															+        *numOfSlices* is an integer > 0. The dataset given for the run function
														
 
															+        will be divided in such many slices.
														
 
															+
														
 
															+        *numOfShuffles* is an integer > 0. It gives the number of exercised tests.
														
 
															+        The GAN will be trained and tested (numOfShuffles * numOfSlices) times.
														
 
															+        """
														
 
															+        self.numOfSlices = int(numOfSlices)
														
 
															+        self.numOfShuffles = int(numOfShuffles)
														
 
															         self.shuffleFunction = shuffleFunction
														
 
															         self.debug = print
														
 
															+        self.testFunctions = testFunctions
														
 
															+        if self.testFunctions is None:
														
 
															+            self.testFunctions = {
														
 
															+                "LR": lr,
														
 
															+                "SVM": svm,
														
 
															+                "KNN": knn
														
 
															+                }
														
 
															+
														
 
															+        self.results = { name: [] for name in self.testFunctions }
														
 
															+
														
 
															+        # Check if the given values are in valid range.
														
 
															+        if self.numOfSlices < 0:
														
 
															+            raise AttributeError(f"Expected numOfSlices to be > 0 but got {self.numOfSlices}")
														
 
															+
														
 
															+        if self.numOfShuffles < 0:
														
 
															+            raise AttributeError(f"Expected numOfShuffles to be > 0 but got {self.numOfShuffles}")
														
 
															+
														
 
															     def run(self, gan, dataset):
														
 
															+        """
														
 
															+        Exercise all tests for a given GAN.
														
 
															+
														
 
															+        *gan* is a implemention of library.interfaces.GanBaseClass.
														
 
															+        It defines the GAN to test.
														
 
															+
														
 
															+        *dataset* is a library.dataset.DataSet that contains the majority class
														
 
															+        (dataset.data0) and the minority class (dataset.data1) of data
														
 
															+        for training and testing.
														
 
															+        """
														
 
															+
														
 
															+        # Check if the given values are in valid range.
														
 
															         if len(dataset.data1) > len(dataset.data0):
														
 
															-            raise AttributeError("Expected class 1 to be the minority class but class 1 is bigger than class 0.")
														
 
															+            raise AttributeError(
														
 
															+                "Expected class 1 to be the minority class but class 1 is bigger than class 0.")
														
 
															+
														
 
															+        # Reset results array.
														
 
															+        self.results = { name: [] for name in self.testFunctions }
														
 
															+        # Repeat numOfShuffles times
														
 
															         self.debug("### Start exercise for synthetic point generator")
														
 
															         for shuffleStep in range(self.numOfShuffles):
														
 
															-            stepTitle = "Step {shuffleStep + 1}/{self.numOfShuffles}"
														
 
															+            stepTitle = f"Step {shuffleStep + 1}/{self.numOfShuffles}"
														
 
															             self.debug(f"\n====== {stepTitle} =======")
														
 
															+            # If a shuffle fuction is given then shuffle the data before the next
														
 
															+            # exercise starts.
														
 
															             if self.shuffleFunction is not None:
														
 
															                 self.debug("-> Shuffling data")
														
 
															                 dataset.shuffleWith(self.shuffleFunction)
														
 
															+
														
 
															+            # Split the (shuffled) data into numOfSlices slices.
														
 
															+            # dataSlices is a list of TrainTestData instances.
														
 
															+            #
														
 
															+            # If numOfSlices=3 then the data will be splited in D1, D2, D3.
														
 
															+            # dataSlices will contain:
														
 
															+            # [(train=D2+D3, test=D1), (train=D1+D3, test=D2), (train=D1+D2, test=D3)]
														
 
															             self.debug("-> Spliting data to slices")
														
 
															             dataSlices = TrainTestData.splitDataToSlices(dataset, self.numOfSlices)
														
 
															+            # Do a exercise for every slice.
														
 
															             for (sliceNr, sliceData) in enumerate(dataSlices):
														
 
															-                sliceTitle = "Slice {sliceNr + 1}/{self.numOfSlices}"
														
 
															+                sliceTitle = f"Slice {sliceNr + 1}/{self.numOfSlices}"
														
 
															                 self.debug(f"\n------ {stepTitle}: {sliceTitle} -------")
														
 
															                 self._exerciseWithDataSlice(gan, sliceData)
														
 
															+
														
 
															         self.debug("### Exercise is done.")
														
 
															     def _exerciseWithDataSlice(self, gan, dataSlice):
														
 
															+        """
														
 
															+        Runs one test for the given gan and dataSlice.
														
 
															+
														
 
															+        *gan* is a implemention of library.interfaces.GanBaseClass.
														
 
															+        It defines the GAN to test.
														
 
															+
														
 
															+        *dataSlice* is a library.dataset.TrainTestData instance that contains
														
 
															+        one data slice with training and testing data.
														
 
															+        """
														
 
															+
														
 
															+        # Train the gan so it can produce synthetic samples.
														
 
															         self.debug("-> Train generator for synthetic samples")
														
 
															         gan.train(dataSlice.train)
														
 
															+        # Count how many syhthetic samples are needed.
														
 
															         numOfNeededSamples = dataSlice.train.size0 - dataSlice.train.size1
														
 
															+        # Add synthetic samples (generated by the GAN) to the minority class.
														
 
															         if numOfNeededSamples > 0:
														
 
															             self.debug(f"-> create {numOfNeededSamples} synthetic samples")
														
 
															-            newSamples = np.asarray([gan.generateData() for _ in range(numOfNeededSamples)])
														
 
															-            train = DataSet(
														
 
															+            newSamples = gan.generateData(numOfNeededSamples)
														
 
															+            dataSlice.train = DataSet(
														
 
															                 data0=dataSlice.train.data0,
														
 
															                 data1=np.concatenate((dataSlice.train.data1, newSamples))
														
 
															                 )
														
 
															-        else:
														
 
															-            train = dataSlice.train
														
 
															-        plotCloud(train.data, train.labels)
														
 
															+        # Print out an overview of the new dataset.
														
 
															+        plotCloud(dataSlice.train)
														
 
															-        results = { name: [] for name in self.testFunctions }
														
 
															+        # Test this dataset with every given test-function.
														
 
															+        # The results are printed out and stored to the results dictionary.
														
 
															         for testerName in self.testFunctions:
														
 
															             self.debug(f"-> test with '{testerName}'")
														
 
															-            testResult = (self.testFunctions[testerName])(train, dataSlice.test)
														
 
															-            testResult.print()
														
 
															-            results[testerName].append(testResult)
														
 
															-
														
 
															-        self.debug("-> check results")
														
 
															-        self._checkResults(results, dataSlice.test.labels)
														
 
															-
														
 
															-    def _checkResults(self, results, expectedLabels):
														
 
															-        pass
														
 
															-
														
 
															-
														
 
															-class TestResult:
														
 
															-    def __init__(self, title, labels, prediction, aps=None):
														
 
															-        self.title = title
														
 
															-        self.con_mat = confusion_matrix(labels, prediction)
														
 
															-        self.bal_acc = balanced_accuracy_score(labels, prediction)
														
 
															-        self.f1 = f1_score(labels, prediction)
														
 
															-        self.aps = aps
														
 
															-
														
 
															-    def print(self):
														
 
															-        #tn, fp, fn, tp = con_mat.ravel()
														
 
															-        r = self.con_mat.ravel()
														
 
															-        print('tn, fp, fn, tp:', r)
														
 
															-
														
 
															-        if self.aps is not None:
														
 
															-            print('average_pr_score:', self.aps)
														
 
															-
														
 
															-        print(f'f1 score_{self.title}:', self.f1)
														
 
															-        print(f'balanced accuracy_{self.title}:', self.bal_acc)
														
 
															-        print(f'confusion matrix_{self.title}')
														
 
															-        print(self.con_mat)
														
 
															-
														
 
															+            testResult = (self.testFunctions[testerName])(dataSlice)
														
 
															+            self.debug(str(testResult))
														
 
															+            self.results[testerName].append(testResult)
														
 
															-def lr(train, test):
														
 
															-    logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial', class_weight={0: 1, 1: 1.3})
														
 
															-    logreg.fit(train.data, train.labels)
														
 
															+    def saveResultsTo(self, fileName):
														
 
															+        with open(fileName, "w") as f:
														
 
															+            for name in self.results:
														
 
															+                f.write(name + "\n")
														
 
															+                isFirst = True
														
 
															+                for result in self.results[name]:
														
 
															+                    if isFirst:
														
 
															+                        isFirst = False
														
 
															+                        f.write(result.csvHeading() + "\n")
														
 
															+                    f.write(result.toCSV() + "\n")
														
 
															+            
														
 
															-    prediction = logreg.predict(test.data)
														
 
															-
														
 
															-    prob_lr = logreg.predict_proba(test.data)
														
 
															-    aps_lr = average_precision_score(test.labels, prob_lr[:,1]) 
														
 
															-    return TestResult("LR", test.labels, prediction, aps_lr)
														
 
															-
														
 
															-def svm(train, test):
														
 
															-    svm = sklearn.svm.SVC(kernel='linear', decision_function_shape='ovo', class_weight={0: 1., 1: 1.}, probability=True)
														
 
															-    svm.fit(train.data, train.labels)
														
 
															-
														
 
															-    prediction = svm.predict(test.data)
														
 
															-    return TestResult("SVM", test.labels, prediction)
														
 
															-
														
 
															-
														
 
															-def knn(train, test):
														
 
															-    knn = KNeighborsClassifier(n_neighbors=10)
														
 
															-    knn.fit(train.data, train.labels)
														
 
															-    
														
 
															-    prediction = knn.predict(test.data)
														
 
															-    return TestResult("KNN", test.labels, prediction)
														
 
															-
														
 
															-
														
 
															-allTesters = {
														
 
															-    "LR": lr,
														
 
															-    "SVM": svm,
														
 
															-    "KNN": knn
														
 
															-    }
														
 
															+def plotCloud(dataset):
														
 
															+    """
														
 
															+    Does a PCA analysis of the given data and plot the both important axis.
														
 
															+    """
														
 
															+    # Normalizes the data.
														
 
															+    data_t = StandardScaler().fit_transform(dataset.data)
														
 
															-def plotCloud(data, labels):
														
 
															-    data_t = StandardScaler().fit_transform(data)
														
 
															+    # Run the PCA analysis.
														
 
															     pca = PCA(n_components=2)
														
 
															     pc = pca.fit_transform(data_t)
														
 
															+
														
 
															+    # Create a DataFrame for plotting.
														
 
															     result = pd.DataFrame(data=pc, columns=['PCA0', 'PCA1'])
														
 
															-    result['Cluster'] = labels
														
 
															-    
														
 
															+    result['Cluster'] = dataset.labels
														
 
															+
														
 
															+    # Plot the analysis results.
														
 
															     sns.set( font_scale=1.2)
														
 
															-    g=sns.lmplot( x="PCA0", y="PCA1",
														
 
															-      data=result, 
														
 
															-      fit_reg=False, 
														
 
															+    sns.lmplot( x="PCA0", y="PCA1",
														
 
															+      data=result,
														
 
															+      fit_reg=False,
														
 
															       hue='Cluster', # color by cluster
														
 
															       legend=False,
														
 
															       scatter_kws={"s": 3}, palette="Set1") # specify the point size
														
--- a/library/interfaces.py
+++ b/library/interfaces.py
@@ -1,41 +1,35 @@
 
															-import numpy as np
														
 
															+"""
														
 
															+This module contains used interfaces for testing the Generative Adversarial Networks.
														
 
															+"""
														
 
															-class GanBaseClass:
														
 
															-    def __init__(self):
														
 
															-        self.isTrained = False
														
 
															-        self.exampleItems = None
														
 
															-        self.nextIndex = 0
														
 
															-        pass
														
 
															-
														
 
															-    def train(self, dataSet):
														
 
															-        if dataSet.data1.shape[0] <= 0:
														
 
															-            raise AttributeError("Train GAN: Expected data class 1 to contain at least one point.")
														
 
															-
														
 
															-        print(
														
 
															-            "Train GAN with |class 0|=%d, |class 1|=%d"
														
 
															-            % (dataSet.data0.shape[0], dataSet.data1.shape[0])
														
 
															-            )
														
 
															-        self.isTrained = True
														
 
															-        self.exampleItems = dataSet.data1.copy()
														
 
															-
														
 
															-    def generateData(self):
														
 
															-        if not self.isTrained:
														
 
															-            raise ValueError("Try to generate data with untrained GAN.")
														
 
															-
														
 
															-        i = self.nextIndex
														
 
															-        self.nextIndex += 1
														
 
															-        if self.nextIndex >= self.exampleItems.shape[0]:
														
 
															-            self.nextIndex = 0
														
 
															-
														
 
															-        return self.exampleItems[i]
														
 
															+class GanBaseClass:
														
 
															+    """
														
 
															+    Base class for the Generative Adversarial Network.
														
 
															+    It defines the interface used by the Exercise class.
														
 
															+    """
														
 
															-class TesterNetworkBaseClass:
														
 
															     def __init__(self):
														
 
															-        pass
														
 
															+        """
														
 
															+        Initializes the class.
														
 
															+        """
														
 
															-    def train(self, data, labels):
														
 
															-        pass
														
 
															-
														
 
															-    def predict(self, data):
														
 
															-        return np.zeros(data.shape[0])
														
 
															+    def train(self, dataSet):
														
 
															+        """
														
 
															+        Trains the GAN.
														
 
															+        """
														
 
															+        raise NotImplementedError
														
 
															+
														
 
															+    def generateDataPoint(self):
														
 
															+        """
														
 
															+        Generates one synthetic data-point.
														
 
															+        """
														
 
															+        return self.generateData(1)[0]
														
 
															+
														
 
															+    def generateData(self, numOfSamples=1):
														
 
															+        """
														
 
															+        Generates a list of synthetic data-points.
														
 
															+
														
 
															+        *numOfSamples* is an integer > 0. It gives the number of generated samples.
														
 
															+        """
														
 
															+        raise NotImplementedError
														
--- a/library/testers.py
+++ b/library/testers.py
@@ -0,0 +1,112 @@
 
															+"""
														
 
															+This module contains test function for datasets using the logistic regression, the support vector
														
 
															+machine and the k-next-neighbourhood algoritm. Additionally it contains a class for storing the
														
 
															+results of the tests.
														
 
															+"""
														
 
															+
														
 
															+
														
 
															+import sklearn
														
 
															+# needed in function lr
														
 
															+from sklearn.neighbors import KNeighborsClassifier
														
 
															+from sklearn.linear_model import LogisticRegression
														
 
															+from sklearn.metrics import confusion_matrix
														
 
															+from sklearn.metrics import average_precision_score
														
 
															+from sklearn.metrics import f1_score
														
 
															+from sklearn.metrics import balanced_accuracy_score
														
 
															+
														
 
															+
														
 
															+class TestResult:
														
 
															+    """
														
 
															+    This class represents the result of one test.
														
 
															+
														
 
															+    It stores its *title*, a confusion matrix (*con_mat*), the balanced accuracy score (*bal_acc*)
														
 
															+    and the f1 score (*f1*). If given the average precision score is also stored (*aps*).
														
 
															+    """
														
 
															+    def __init__(self, title, labels, prediction, aps=None):
														
 
															+        """
														
 
															+        Creates an instance of this class. The stored data will be generated from the given values.
														
 
															+
														
 
															+        *title* is a text to identify this result.
														
 
															+
														
 
															+        *labels* is a /numpy.array/ containing the labels of the test-data-set.
														
 
															+
														
 
															+        *prediction* is a /numpy.array/ containing the done prediction for the test-data-set.
														
 
															+
														
 
															+        *aps* is a real number representing the average precision score.
														
 
															+        """
														
 
															+        self.title = title
														
 
															+        self.con_mat = confusion_matrix(labels, prediction)
														
 
															+        self.bal_acc = balanced_accuracy_score(labels, prediction)
														
 
															+        self.f1 = f1_score(labels, prediction)
														
 
															+        self.aps = aps
														
 
															+
														
 
															+    def __str__(self):
														
 
															+        """
														
 
															+        Generates a text representing this result.
														
 
															+        """
														
 
															+        #tn, fp, fn, tp = con_mat.ravel()
														
 
															+        r = self.con_mat.ravel()
														
 
															+        text = f"tn, fp, fn, tp: {r}"
														
 
															+
														
 
															+        if self.aps is not None:
														
 
															+            text += f"\naverage_pr_score: {self.aps}"
														
 
															+
														
 
															+        text += f"\nf1 score_{self.title}: {self.f1}"
														
 
															+        text += f"\nbalanced accuracy_{self.title}: {self.bal_acc}"
														
 
															+        text += f"\nconfusion matrix_{self.title}\n {self.con_mat}"
														
 
															+        return text
														
 
															+
														
 
															+
														
 
															+def lr(ttd):
														
 
															+    """
														
 
															+    Runs a test for a dataset with the logistic regression algorithm.
														
 
															+    It returns a /TestResult./
														
 
															+
														
 
															+    *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
														
 
															+    """
														
 
															+    logreg = LogisticRegression(
														
 
															+        C=1e5,
														
 
															+        solver='lbfgs',
														
 
															+        multi_class='multinomial',
														
 
															+        class_weight={0: 1, 1: 1.3}
														
 
															+        )
														
 
															+    logreg.fit(ttd.train.data, ttd.train.labels)
														
 
															+
														
 
															+    prediction = logreg.predict(ttd.test.data)
														
 
															+
														
 
															+    prob_lr = logreg.predict_proba(ttd.test.data)
														
 
															+    aps_lr = average_precision_score(ttd.test.labels, prob_lr[:,1])
														
 
															+    return TestResult("LR", ttd.test.labels, prediction, aps_lr)
														
 
															+
														
 
															+
														
 
															+def svm(ttd):
														
 
															+    """
														
 
															+    Runs a test for a dataset with the support vector machine algorithm.
														
 
															+    It returns a /TestResult./
														
 
															+
														
 
															+    *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
														
 
															+    """
														
 
															+    svmTester = sklearn.svm.SVC(
														
 
															+        kernel='linear',
														
 
															+        decision_function_shape='ovo',
														
 
															+        class_weight={0: 1., 1: 1.},
														
 
															+        probability=True
														
 
															+        )
														
 
															+    svmTester.fit(ttd.train.data, ttd.train.labels)
														
 
															+
														
 
															+    prediction = svmTester.predict(ttd.test.data)
														
 
															+    return TestResult("SVM", ttd.test.labels, prediction)
														
 
															+
														
 
															+
														
 
															+def knn(ttd):
														
 
															+    """
														
 
															+    Runs a test for a dataset with the k-next neighbourhood algorithm.
														
 
															+    It returns a /TestResult./
														
 
															+
														
 
															+    *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
														
 
															+    """
														
 
															+    knnTester = KNeighborsClassifier(n_neighbors=10)
														
 
															+    knnTester.fit(ttd.train.data, ttd.train.labels)
														
 
															+
														
 
															+    prediction = knnTester.predict(ttd.test.data)
														
 
															+    return TestResult("KNN", ttd.test.labels, prediction)