4 年之前 · 90d7ab576b
--- a/Exercise.ipynb
+++ b/Exercise.ipynb
--- a/library/GanExamples.py
+++ b/library/GanExamples.py
@@ -0,0 +1,158 @@
 
				+"""
			
 
				+This module contains some example Generative Adversarial Networks for testing.
			
 
				+
			
 
				+The classes StupidToyPointGan and StupidToyListGan are not really Networks. This classes are used
			
 
				+for testing the interface. Hope your actually GAN will perform better than this two.
			
 
				+
			
 
				+The class SimpleGan is a simple standard Generative Adversarial Network.
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+import numpy as np
			
 
				+
			
 
				+from library.interfaces import GanBaseClass
			
 
				+
			
 
				+
			
 
				+class StupidToyPointGan(GanBaseClass):
			
 
				+    """
			
 
				+    This is a toy example of a GAN.
			
 
				+    It repeats the first point of the training-data-set.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        """
			
 
				+        Initializes the class and mark it as untrained.
			
 
				+        """
			
 
				+        self.isTrained = False
			
 
				+        self.exampleItem = None
			
 
				+
			
 
				+    def train(self, dataSet):
			
 
				+        """
			
 
				+        Trains the GAN.
			
 
				+
			
 
				+        It stores the first data-point in the training data-set and mark the GAN as trained.
			
 
				+
			
 
				+        *dataSet* is a instance of /library.dataset.DataSet/. It contains the training dataset.
			
 
				+        We are only interested in the class 1.
			
 
				+        """
			
 
				+        if dataSet.data1.shape[0] <= 0:
			
 
				+            raise AttributeError("Train GAN: Expected data class 1 to contain at least one point.")
			
 
				+
			
 
				+        self.isTrained = True
			
 
				+        self.exampleItem = dataSet.data1[0].copy()
			
 
				+
			
 
				+    def generateDataPoint(self):
			
 
				+        """
			
 
				+        Generates one synthetic data-point by copying the stored data point.
			
 
				+        """
			
 
				+        if not self.isTrained:
			
 
				+            raise ValueError("Try to generate data with untrained GAN.")
			
 
				+
			
 
				+        return self.exampleItem
			
 
				+
			
 
				+    def generateData(self, numOfSamples=1):
			
 
				+        """
			
 
				+        Generates a list of synthetic data-points.
			
 
				+
			
 
				+        *numOfSamples* is a integer > 0. It gives the number of new generated samples.
			
 
				+        """
			
 
				+        numOfSamples = int(numOfSamples)
			
 
				+        if numOfSamples < 1:
			
 
				+            raise AttributeError("Expected numOfSamples to be > 0")
			
 
				+
			
 
				+        return np.array([self.generateDataPoint() for _ in range(numOfSamples)])
			
 
				+
			
 
				+
			
 
				+class StupidToyListGan(GanBaseClass):
			
 
				+    """
			
 
				+    This is a toy example of a GAN.
			
 
				+    It repeats the first point of the training-data-set.
			
 
				+    """
			
 
				+    def __init__(self, maxListSize=100):
			
 
				+        self.isTrained = False
			
 
				+        self.exampleItems = None
			
 
				+        self.nextIndex = 0
			
 
				+        self.maxListSize = int(maxListSize)
			
 
				+        if self.maxListSize < 1:
			
 
				+            raise AttributeError("Expected maxListSize to be > 0 but got " + str(self.maxListSize))
			
 
				+
			
 
				+
			
 
				+    def train(self, dataSet):
			
 
				+        """
			
 
				+        Trains the GAN.
			
 
				+
			
 
				+        It stores the first data-point in the training data-set and mark the GAN as trained.
			
 
				+
			
 
				+        *dataSet* is a instance of /library.dataset.DataSet/. It contains the training dataset.
			
 
				+        We are only interested in the first *maxListSize* points in class 1.
			
 
				+        """
			
 
				+        if dataSet.data1.shape[0] <= 0:
			
 
				+            raise AttributeError("Train GAN: Expected data class 1 to contain at least one point.")
			
 
				+
			
 
				+        self.isTrained = True
			
 
				+        self.exampleItems = dataSet.data1[: self.maxListSize].copy()
			
 
				+
			
 
				+    def generateDataPoint(self):
			
 
				+        """
			
 
				+        Returns one synthetic data point by repeating the stored list.
			
 
				+        """
			
 
				+        if not self.isTrained:
			
 
				+            raise ValueError("Try to generate data with untrained GAN.")
			
 
				+
			
 
				+        i = self.nextIndex
			
 
				+        self.nextIndex += 1
			
 
				+        if self.nextIndex >= self.exampleItems.shape[0]:
			
 
				+            self.nextIndex = 0
			
 
				+
			
 
				+        return self.exampleItems[i]
			
 
				+
			
 
				+
			
 
				+    def generateData(self, numOfSamples=1):
			
 
				+        """
			
 
				+        Generates a list of synthetic data-points.
			
 
				+
			
 
				+        *numOfSamples* is a integer > 0. It gives the number of new generated samples.
			
 
				+        """
			
 
				+        numOfSamples = int(numOfSamples)
			
 
				+        if numOfSamples < 1:
			
 
				+            raise AttributeError("Expected numOfSamples to be > 0")
			
 
				+
			
 
				+        return np.array([self.generateDataPoint() for _ in range(numOfSamples)])
			
 
				+
			
 
				+
			
 
				+# class SimpleGan(GanBaseClass):
			
 
				+#     def __init__(self, maxListSize=100):
			
 
				+#         self.isTrained = False
			
 
				+#         self.exampleItems = None
			
 
				+#         self.nextIndex = 0
			
 
				+#         self.maxListSize = int(maxListSize)
			
 
				+#         if self.maxListSize < 1:
			
 
				+#             raise AttributeError(f"Expected maxListSize to be > 0 but got {self.maxListSize}")
			
 
				+#
			
 
				+#
			
 
				+#     def train(self, dataSet):
			
 
				+#         if dataSet.data1.shape[0] <= 0:
			
 
				+#             raise AttributeError("Train GAN: Expected data class 1 to contain at least one point.")
			
 
				+#
			
 
				+#         self.isTrained = True
			
 
				+#         self.exampleItems = dataSet.data1[: self.maxListSize].copy()
			
 
				+#
			
 
				+#     def generateDataPoint(self, numOfSamples=1):
			
 
				+#         if not self.isTrained:
			
 
				+#             raise ValueError("Try to generate data with untrained GAN.")
			
 
				+#
			
 
				+#         i = self.nextIndex
			
 
				+#         self.nextIndex += 1
			
 
				+#         if self.nextIndex >= self.exampleItems.shape[0]:
			
 
				+#             self.nextIndex = 0
			
 
				+#
			
 
				+#         return self.exampleItems[i]
			
 
				+#
			
 
				+#
			
 
				+#     def generateData(self, numOfSamples=1):
			
 
				+#         numOfSamples = int(numOfSamples)
			
 
				+#         if numOfSamples < 1:
			
 
				+#             raise AttributeError("Expected numOfSamples to be > 0")
			
 
				+#
			
 
				+#         return np.array([self.generateDataPoint() for _ in range(numOfSamples)])
			
 
				+#
			
--- a/library/dataset.py
+++ b/library/dataset.py
@@ -1,11 +1,29 @@
 
				+"""
			
 
				+This module contains classes to collect data for testing and training.
			
 
				+"""
			
 
				+
			
 
				+
			
 
				 import math
			
 
				 import numpy as np
			
 
				 
			
 
				+
			
 
				 class DataSet:
			
 
				     """
			
 
				-    Stores data and Labels.
			
 
				+    This class stores data and labels for a test or training dataset.
			
 
				+
			
 
				+    *data0*, *data1* are instances of /numpy.array/. Containg the data for the class 0 (majority
			
 
				+    class) and the class 1 (minority class).
			
 
				+
			
 
				+    *size0*, *size1* are integers, giving the size of the classes 0 and 1.
			
 
				+
			
 
				+    *data* is an instance of /numpy.array/ containing the combined classes 0 and 1.
			
 
				+
			
 
				+    *labels* is a /numpy.array/ containing the labels for *data*.
			
 
				     """
			
 
				     def __init__(self, data0=None, data1=None):
			
 
				+        """
			
 
				+        Initializes one instance of this class and fills *data* and *labels*.
			
 
				+        """
			
 
				         self.data0 = data0
			
 
				         self.data1 = data1
			
 
				         self.size0 = len(data0) if data0 is not None else 0
			
@@ -24,6 +42,10 @@ class DataSet:
 
				             raise AttributeError("Expected data, data0 or data1 to be a numpy.array")
			
 
				 
			
 
				     def shuffleWith(self, shuffleFn):
			
 
				+        """
			
 
				+        Shuffles the points in the classes 0 and 1 with the given function
			
 
				+        (numpy.array -> numpy.array). After that the *data* array will be regenerated.
			
 
				+        """
			
 
				         if self.data0 is not None:
			
 
				             self.data0 = shuffleFn(self.data0)
			
 
				 
			
@@ -38,33 +60,68 @@ class DataSet:
 
				             self.data = np.concatenate((self.data1, self.data0))
			
 
				 
			
 
				     def labels0(self):
			
 
				+        """
			
 
				+        Returns a /numpy.array/ with labels for class0.
			
 
				+        """
			
 
				         return np.zeros(self.size0)
			
 
				 
			
 
				     def labels1(self):
			
 
				+        """
			
 
				+        Returns a /numpy.array/ with labels for class1.
			
 
				+        """
			
 
				         return np.zeros(self.size1) + 1
			
 
				 
			
 
				 
			
 
				 class TrainTestData:
			
 
				     """
			
 
				-    Stores features, data and labels for class 0 and class 1.
			
 
				+    Stores data and labels for class 0 and class 1.
			
 
				+
			
 
				+    *train* is a /DataSet/ containing the data for training.
			
 
				+
			
 
				+    *test* is a /DataSet/ containing the data for testing.
			
 
				     """
			
 
				 
			
 
				     def __init__(self, train, test):
			
 
				+        """
			
 
				+        Initializes a new instance for this class and stores the given data.
			
 
				+        """
			
 
				         self.train = train
			
 
				         self.test = test
			
 
				 
			
 
				-    @staticmethod
			
 
				-    def splitUpData(data, trainFactor=0.9):
			
 
				-        size = len(data)
			
 
				-        trainSize = math.ceil(size * trainFactor)
			
 
				-        trn = data[list(range(0, trainSize))]
			
 
				-        tst = data[list(range(trainSize, size))]
			
 
				-        return trn, tst
			
 
				-
			
 
				     @classmethod
			
 
				-    def splitDataByFactor(cls, features0, features1, trainFactor=0.9):
			
 
				-        features_0_trn, features_0_tst = cls.splitUpData(features0, trainFactor)
			
 
				-        features_1_trn, features_1_tst = cls.splitUpData(features1, trainFactor)
			
 
				+    def splitDataByFactor(cls, features0, features1, factor=0.9):
			
 
				+        """
			
 
				+        Creates a new instance of this class.
			
 
				+
			
 
				+        The first (factor * 100%) percent of the points in the given classes are stored for
			
 
				+        training. The remaining points are stored for testing.
			
 
				+
			
 
				+        *features0* and *features1* are /numpy.array/ instances containing the data for class 0
			
 
				+        and class 1.
			
 
				+
			
 
				+        *factor* is a real number > 0 and < 1 for the spliting point.
			
 
				+        """
			
 
				+
			
 
				+        if factor <= 0.0 or factor >= 1.0:
			
 
				+            raise AttributeError(f"Expected trainFactor to be between 0 and 1 but got {factor}.")
			
 
				+
			
 
				+        # ----------------------------------------------------------------------------------------
			
 
				+        # Supporting function:
			
 
				+        def splitUpData(data):
			
 
				+            """
			
 
				+            Splits a given /numpy.array/ in two /numpy.array/.
			
 
				+            The first array contains (factor * 100%) percent of the data points.
			
 
				+            The second array contains the remaining data points.
			
 
				+            """
			
 
				+            size = len(data)
			
 
				+            trainSize = math.ceil(size * factor)
			
 
				+            trn = data[list(range(0, trainSize))]
			
 
				+            tst = data[list(range(trainSize, size))]
			
 
				+            return trn, tst
			
 
				+        # ----------------------------------------------------------------------------------------
			
 
				+
			
 
				+        features_0_trn, features_0_tst = splitUpData(features0)
			
 
				+        features_1_trn, features_1_tst = splitUpData(features1)
			
 
				 
			
 
				         return cls(
			
 
				             test=DataSet(data1=features_1_tst, data0=features_0_tst),
			
@@ -73,8 +130,44 @@ class TrainTestData:
 
				 
			
 
				     @classmethod
			
 
				     def splitDataToSlices(cls, bigData, numOfSlices=5):
			
 
				-        data0slices = cls._arrayToSlices(bigData.data0, numOfSlices)
			
 
				-        data1slices = cls._arrayToSlices(bigData.data1, numOfSlices)
			
 
				+        """
			
 
				+        Creates a list of new instance of this class. The list is returned as a generator.
			
 
				+
			
 
				+        The given data is splitted in the given number of slices.
			
 
				+
			
 
				+        *bigData* is an instance of /DataSet/ containing the data to split.
			
 
				+
			
 
				+        *numOfSlices* is the number of generated slices.
			
 
				+        """
			
 
				+
			
 
				+        numOfSlices = int(numOfSlices)
			
 
				+        if numOfSlices < 1:
			
 
				+            raise AttributeError(f"Expected numOfSlices to be positive but got {numOfSlices}")
			
 
				+
			
 
				+        # ----------------------------------------------------------------------------------------
			
 
				+        # Supporting function:
			
 
				+        def arrayToSlices(data):
			
 
				+            """
			
 
				+            Takes a /numpy.array/ and splits it into *numOfSlices* slices.
			
 
				+            A list of the slices will be returned.
			
 
				+            """
			
 
				+            size = len(data)
			
 
				+            if size < numOfSlices:
			
 
				+                raise AttributeError(
			
 
				+                    f"Expected data set to contain at least {numOfSlices} points"
			
 
				+                    + f" but got {size} points."
			
 
				+                    )
			
 
				+
			
 
				+            sliceSize = (size // numOfSlices) + (0 if size % numOfSlices == 0 else 1)
			
 
				+
			
 
				+            return [
			
 
				+                data[n * sliceSize : min(size, (n+1) * sliceSize)]
			
 
				+                for n in range(numOfSlices)
			
 
				+                ]
			
 
				+        # ----------------------------------------------------------------------------------------
			
 
				+
			
 
				+        data0slices = arrayToSlices(bigData.data0)
			
 
				+        data1slices = arrayToSlices(bigData.data1)
			
 
				 
			
 
				         for n in range(numOfSlices):
			
 
				             data0 = np.concatenate([data0slices[k] for k in range(numOfSlices) if n != k])
			
@@ -82,19 +175,3 @@ class TrainTestData:
 
				             train = DataSet(data0=data0, data1=data1)
			
 
				             test = DataSet(data0=data0slices[n], data1=data1slices[n])
			
 
				             yield cls(train=train, test=test)
			
 
				-
			
 
				-    @staticmethod
			
 
				-    def _arrayToSlices(data, numOfSlices):
			
 
				-        size = len(data)
			
 
				-        if size < numOfSlices:
			
 
				-            raise AttributeError(
			
 
				-                f"Expected data set to contain at least {numOfSlices} points"
			
 
				-                + f" but got {size} points."
			
 
				-                )
			
 
				-
			
 
				-        sliceSize = (size // numOfSlices) + (0 if size % numOfSlices == 0 else 1)
			
 
				-
			
 
				-        return [
			
 
				-            data[n * sliceSize : min(size, (n+1) * sliceSize)]
			
 
				-            for n in range(numOfSlices)
			
 
				-            ]
			
--- a/library/exercise.py
+++ b/library/exercise.py
@@ -1,23 +1,19 @@
 
				-import numpy as np
			
 
				-import pandas as pd
			
 
				+"""
			
 
				+Class for testing the performance of Generative Adversarial Networks
			
 
				+in generating synthetic samples for datasets with a minority class.
			
 
				+"""
			
 
				 
			
 
				-import sklearn
			
 
				-# needed in function lr
			
 
				-from sklearn import metrics
			
 
				-from sklearn.neighbors import KNeighborsClassifier
			
 
				-from sklearn.linear_model import LogisticRegression
			
 
				-from sklearn.metrics import confusion_matrix
			
 
				-from sklearn.metrics import average_precision_score
			
 
				-from sklearn.metrics import f1_score
			
 
				-from sklearn.metrics import balanced_accuracy_score
			
 
				 
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				 
			
 
				-from sklearn.decomposition import PCA
			
 
				 import seaborn as sns
			
 
				+from sklearn.decomposition import PCA
			
 
				 from sklearn.preprocessing import StandardScaler
			
 
				 import matplotlib.pyplot as plt
			
 
				 
			
 
				 from library.dataset import DataSet, TrainTestData
			
 
				+from library.testers import lr, svm, knn
			
 
				 
			
 
				 
			
 
				 class Exercise:
			
@@ -25,134 +21,167 @@ class Exercise:
 
				     Exercising a test for a minority class extension class.
			
 
				     """
			
 
				 
			
 
				-    def __init__(self, testFunctions, shuffleFunction=None, numOfSlices=5, numOfShuffles=5):
			
 
				-        self.numOfSlices = numOfSlices
			
 
				-        self.numOfShuffles = numOfShuffles
			
 
				-        self.testFunctions = testFunctions
			
 
				+    def __init__(self, testFunctions=None, shuffleFunction=None, numOfSlices=5, numOfShuffles=5):
			
 
				+        """
			
 
				+        Creates a instance of this class.
			
 
				+
			
 
				+        *testFunctions* is a dictionary /(String : Function)/ of functions for testing
			
 
				+        a generated dataset. The functions have the signature:
			
 
				+        /(TrainTestData, TrainTestData) -> TestResult/
			
 
				+
			
 
				+        *shuffleFunction* is either None or a function /numpy.array -> numpy.array/
			
 
				+        that shuffles a given array.
			
 
				+
			
 
				+        *numOfSlices* is an integer > 0. The dataset given for the run function
			
 
				+        will be divided in such many slices.
			
 
				+
			
 
				+        *numOfShuffles* is an integer > 0. It gives the number of exercised tests.
			
 
				+        The GAN will be trained and tested (numOfShuffles * numOfSlices) times.
			
 
				+        """
			
 
				+        self.numOfSlices = int(numOfSlices)
			
 
				+        self.numOfShuffles = int(numOfShuffles)
			
 
				         self.shuffleFunction = shuffleFunction
			
 
				         self.debug = print
			
 
				 
			
 
				+        self.testFunctions = testFunctions
			
 
				+        if self.testFunctions is None:
			
 
				+            self.testFunctions = {
			
 
				+                "LR": lr,
			
 
				+                "SVM": svm,
			
 
				+                "KNN": knn
			
 
				+                }
			
 
				+
			
 
				+        self.results = { name: [] for name in self.testFunctions }
			
 
				+
			
 
				+        # Check if the given values are in valid range.
			
 
				+        if self.numOfSlices < 0:
			
 
				+            raise AttributeError(f"Expected numOfSlices to be > 0 but got {self.numOfSlices}")
			
 
				+
			
 
				+        if self.numOfShuffles < 0:
			
 
				+            raise AttributeError(f"Expected numOfShuffles to be > 0 but got {self.numOfShuffles}")
			
 
				+
			
 
				     def run(self, gan, dataset):
			
 
				+        """
			
 
				+        Exercise all tests for a given GAN.
			
 
				+
			
 
				+        *gan* is a implemention of library.interfaces.GanBaseClass.
			
 
				+        It defines the GAN to test.
			
 
				+
			
 
				+        *dataset* is a library.dataset.DataSet that contains the majority class
			
 
				+        (dataset.data0) and the minority class (dataset.data1) of data
			
 
				+        for training and testing.
			
 
				+        """
			
 
				+
			
 
				+        # Check if the given values are in valid range.
			
 
				         if len(dataset.data1) > len(dataset.data0):
			
 
				-            raise AttributeError("Expected class 1 to be the minority class but class 1 is bigger than class 0.")
			
 
				+            raise AttributeError(
			
 
				+                "Expected class 1 to be the minority class but class 1 is bigger than class 0.")
			
 
				+
			
 
				+        # Reset results array.
			
 
				+        self.results = { name: [] for name in self.testFunctions }
			
 
				 
			
 
				+        # Repeat numOfShuffles times
			
 
				         self.debug("### Start exercise for synthetic point generator")
			
 
				         for shuffleStep in range(self.numOfShuffles):
			
 
				-            stepTitle = "Step {shuffleStep + 1}/{self.numOfShuffles}"
			
 
				+            stepTitle = f"Step {shuffleStep + 1}/{self.numOfShuffles}"
			
 
				             self.debug(f"\n====== {stepTitle} =======")
			
 
				 
			
 
				+            # If a shuffle fuction is given then shuffle the data before the next
			
 
				+            # exercise starts.
			
 
				             if self.shuffleFunction is not None:
			
 
				                 self.debug("-> Shuffling data")
			
 
				                 dataset.shuffleWith(self.shuffleFunction)
			
 
				 
			
 
				+
			
 
				+            # Split the (shuffled) data into numOfSlices slices.
			
 
				+            # dataSlices is a list of TrainTestData instances.
			
 
				+            #
			
 
				+            # If numOfSlices=3 then the data will be splited in D1, D2, D3.
			
 
				+            # dataSlices will contain:
			
 
				+            # [(train=D2+D3, test=D1), (train=D1+D3, test=D2), (train=D1+D2, test=D3)]
			
 
				             self.debug("-> Spliting data to slices")
			
 
				             dataSlices = TrainTestData.splitDataToSlices(dataset, self.numOfSlices)
			
 
				 
			
 
				+            # Do a exercise for every slice.
			
 
				             for (sliceNr, sliceData) in enumerate(dataSlices):
			
 
				-                sliceTitle = "Slice {sliceNr + 1}/{self.numOfSlices}"
			
 
				+                sliceTitle = f"Slice {sliceNr + 1}/{self.numOfSlices}"
			
 
				                 self.debug(f"\n------ {stepTitle}: {sliceTitle} -------")
			
 
				                 self._exerciseWithDataSlice(gan, sliceData)
			
 
				+
			
 
				         self.debug("### Exercise is done.")
			
 
				 
			
 
				     def _exerciseWithDataSlice(self, gan, dataSlice):
			
 
				+        """
			
 
				+        Runs one test for the given gan and dataSlice.
			
 
				+
			
 
				+        *gan* is a implemention of library.interfaces.GanBaseClass.
			
 
				+        It defines the GAN to test.
			
 
				+
			
 
				+        *dataSlice* is a library.dataset.TrainTestData instance that contains
			
 
				+        one data slice with training and testing data.
			
 
				+        """
			
 
				+
			
 
				+        # Train the gan so it can produce synthetic samples.
			
 
				         self.debug("-> Train generator for synthetic samples")
			
 
				         gan.train(dataSlice.train)
			
 
				 
			
 
				+        # Count how many syhthetic samples are needed.
			
 
				         numOfNeededSamples = dataSlice.train.size0 - dataSlice.train.size1
			
 
				 
			
 
				+        # Add synthetic samples (generated by the GAN) to the minority class.
			
 
				         if numOfNeededSamples > 0:
			
 
				             self.debug(f"-> create {numOfNeededSamples} synthetic samples")
			
 
				-            newSamples = np.asarray([gan.generateData() for _ in range(numOfNeededSamples)])
			
 
				-            train = DataSet(
			
 
				+            newSamples = gan.generateData(numOfNeededSamples)
			
 
				+            dataSlice.train = DataSet(
			
 
				                 data0=dataSlice.train.data0,
			
 
				                 data1=np.concatenate((dataSlice.train.data1, newSamples))
			
 
				                 )
			
 
				-        else:
			
 
				-            train = dataSlice.train
			
 
				 
			
 
				-        plotCloud(train.data, train.labels)
			
 
				+        # Print out an overview of the new dataset.
			
 
				+        plotCloud(dataSlice.train)
			
 
				 
			
 
				-        results = { name: [] for name in self.testFunctions }
			
 
				+        # Test this dataset with every given test-function.
			
 
				+        # The results are printed out and stored to the results dictionary.
			
 
				         for testerName in self.testFunctions:
			
 
				             self.debug(f"-> test with '{testerName}'")
			
 
				-            testResult = (self.testFunctions[testerName])(train, dataSlice.test)
			
 
				-            testResult.print()
			
 
				-            results[testerName].append(testResult)
			
 
				-
			
 
				-        self.debug("-> check results")
			
 
				-        self._checkResults(results, dataSlice.test.labels)
			
 
				-
			
 
				-    def _checkResults(self, results, expectedLabels):
			
 
				-        pass
			
 
				-
			
 
				-
			
 
				-class TestResult:
			
 
				-    def __init__(self, title, labels, prediction, aps=None):
			
 
				-        self.title = title
			
 
				-        self.con_mat = confusion_matrix(labels, prediction)
			
 
				-        self.bal_acc = balanced_accuracy_score(labels, prediction)
			
 
				-        self.f1 = f1_score(labels, prediction)
			
 
				-        self.aps = aps
			
 
				-
			
 
				-    def print(self):
			
 
				-        #tn, fp, fn, tp = con_mat.ravel()
			
 
				-        r = self.con_mat.ravel()
			
 
				-        print('tn, fp, fn, tp:', r)
			
 
				-
			
 
				-        if self.aps is not None:
			
 
				-            print('average_pr_score:', self.aps)
			
 
				-
			
 
				-        print(f'f1 score_{self.title}:', self.f1)
			
 
				-        print(f'balanced accuracy_{self.title}:', self.bal_acc)
			
 
				-        print(f'confusion matrix_{self.title}')
			
 
				-        print(self.con_mat)
			
 
				-
			
 
				+            testResult = (self.testFunctions[testerName])(dataSlice)
			
 
				+            self.debug(str(testResult))
			
 
				+            self.results[testerName].append(testResult)
			
 
				 
			
 
				 
			
 
				-def lr(train, test):
			
 
				-    logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial', class_weight={0: 1, 1: 1.3})
			
 
				-    logreg.fit(train.data, train.labels)
			
 
				+    def saveResultsTo(self, fileName):
			
 
				+        with open(fileName, "w") as f:
			
 
				+            for name in self.results:
			
 
				+                f.write(name + "\n")
			
 
				+                isFirst = True
			
 
				+                for result in self.results[name]:
			
 
				+                    if isFirst:
			
 
				+                        isFirst = False
			
 
				+                        f.write(result.csvHeading() + "\n")
			
 
				+                    f.write(result.toCSV() + "\n")
			
 
				+            
			
 
				 
			
 
				-    prediction = logreg.predict(test.data)
			
 
				-
			
 
				-    prob_lr = logreg.predict_proba(test.data)
			
 
				-    aps_lr = average_precision_score(test.labels, prob_lr[:,1]) 
			
 
				-    return TestResult("LR", test.labels, prediction, aps_lr)
			
 
				-
			
 
				-def svm(train, test):
			
 
				-    svm = sklearn.svm.SVC(kernel='linear', decision_function_shape='ovo', class_weight={0: 1., 1: 1.}, probability=True)
			
 
				-    svm.fit(train.data, train.labels)
			
 
				-
			
 
				-    prediction = svm.predict(test.data)
			
 
				-    return TestResult("SVM", test.labels, prediction)
			
 
				-
			
 
				-
			
 
				-def knn(train, test):
			
 
				-    knn = KNeighborsClassifier(n_neighbors=10)
			
 
				-    knn.fit(train.data, train.labels)
			
 
				-    
			
 
				-    prediction = knn.predict(test.data)
			
 
				-    return TestResult("KNN", test.labels, prediction)
			
 
				-
			
 
				-
			
 
				-allTesters = {
			
 
				-    "LR": lr,
			
 
				-    "SVM": svm,
			
 
				-    "KNN": knn
			
 
				-    }
			
 
				 
			
 
				+def plotCloud(dataset):
			
 
				+    """
			
 
				+    Does a PCA analysis of the given data and plot the both important axis.
			
 
				+    """
			
 
				+    # Normalizes the data.
			
 
				+    data_t = StandardScaler().fit_transform(dataset.data)
			
 
				 
			
 
				-def plotCloud(data, labels):
			
 
				-    data_t = StandardScaler().fit_transform(data)
			
 
				+    # Run the PCA analysis.
			
 
				     pca = PCA(n_components=2)
			
 
				     pc = pca.fit_transform(data_t)
			
 
				+
			
 
				+    # Create a DataFrame for plotting.
			
 
				     result = pd.DataFrame(data=pc, columns=['PCA0', 'PCA1'])
			
 
				-    result['Cluster'] = labels
			
 
				-    
			
 
				+    result['Cluster'] = dataset.labels
			
 
				+
			
 
				+    # Plot the analysis results.
			
 
				     sns.set( font_scale=1.2)
			
 
				-    g=sns.lmplot( x="PCA0", y="PCA1",
			
 
				-      data=result, 
			
 
				-      fit_reg=False, 
			
 
				+    sns.lmplot( x="PCA0", y="PCA1",
			
 
				+      data=result,
			
 
				+      fit_reg=False,
			
 
				       hue='Cluster', # color by cluster
			
 
				       legend=False,
			
 
				       scatter_kws={"s": 3}, palette="Set1") # specify the point size
			
--- a/library/interfaces.py
+++ b/library/interfaces.py
@@ -1,41 +1,35 @@
 
				-import numpy as np
			
 
				+"""
			
 
				+This module contains used interfaces for testing the Generative Adversarial Networks.
			
 
				+"""
			
 
				 
			
 
				-class GanBaseClass:
			
 
				-    def __init__(self):
			
 
				-        self.isTrained = False
			
 
				-        self.exampleItems = None
			
 
				-        self.nextIndex = 0
			
 
				-        pass
			
 
				-
			
 
				-    def train(self, dataSet):
			
 
				-        if dataSet.data1.shape[0] <= 0:
			
 
				-            raise AttributeError("Train GAN: Expected data class 1 to contain at least one point.")
			
 
				-
			
 
				-        print(
			
 
				-            "Train GAN with |class 0|=%d, |class 1|=%d"
			
 
				-            % (dataSet.data0.shape[0], dataSet.data1.shape[0])
			
 
				-            )
			
 
				-        self.isTrained = True
			
 
				-        self.exampleItems = dataSet.data1.copy()
			
 
				-
			
 
				-    def generateData(self):
			
 
				-        if not self.isTrained:
			
 
				-            raise ValueError("Try to generate data with untrained GAN.")
			
 
				-
			
 
				-        i = self.nextIndex
			
 
				-        self.nextIndex += 1
			
 
				-        if self.nextIndex >= self.exampleItems.shape[0]:
			
 
				-            self.nextIndex = 0
			
 
				-
			
 
				-        return self.exampleItems[i]
			
 
				 
			
 
				+class GanBaseClass:
			
 
				+    """
			
 
				+    Base class for the Generative Adversarial Network.
			
 
				+    It defines the interface used by the Exercise class.
			
 
				+    """
			
 
				 
			
 
				-class TesterNetworkBaseClass:
			
 
				     def __init__(self):
			
 
				-        pass
			
 
				+        """
			
 
				+        Initializes the class.
			
 
				+        """
			
 
				 
			
 
				-    def train(self, data, labels):
			
 
				-        pass
			
 
				-
			
 
				-    def predict(self, data):
			
 
				-        return np.zeros(data.shape[0])
			
 
				+    def train(self, dataSet):
			
 
				+        """
			
 
				+        Trains the GAN.
			
 
				+        """
			
 
				+        raise NotImplementedError
			
 
				+
			
 
				+    def generateDataPoint(self):
			
 
				+        """
			
 
				+        Generates one synthetic data-point.
			
 
				+        """
			
 
				+        return self.generateData(1)[0]
			
 
				+
			
 
				+    def generateData(self, numOfSamples=1):
			
 
				+        """
			
 
				+        Generates a list of synthetic data-points.
			
 
				+
			
 
				+        *numOfSamples* is an integer > 0. It gives the number of generated samples.
			
 
				+        """
			
 
				+        raise NotImplementedError
			
--- a/library/testers.py
+++ b/library/testers.py
@@ -0,0 +1,112 @@
 
				+"""
			
 
				+This module contains test function for datasets using the logistic regression, the support vector
			
 
				+machine and the k-next-neighbourhood algoritm. Additionally it contains a class for storing the
			
 
				+results of the tests.
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+import sklearn
			
 
				+# needed in function lr
			
 
				+from sklearn.neighbors import KNeighborsClassifier
			
 
				+from sklearn.linear_model import LogisticRegression
			
 
				+from sklearn.metrics import confusion_matrix
			
 
				+from sklearn.metrics import average_precision_score
			
 
				+from sklearn.metrics import f1_score
			
 
				+from sklearn.metrics import balanced_accuracy_score
			
 
				+
			
 
				+
			
 
				+class TestResult:
			
 
				+    """
			
 
				+    This class represents the result of one test.
			
 
				+
			
 
				+    It stores its *title*, a confusion matrix (*con_mat*), the balanced accuracy score (*bal_acc*)
			
 
				+    and the f1 score (*f1*). If given the average precision score is also stored (*aps*).
			
 
				+    """
			
 
				+    def __init__(self, title, labels, prediction, aps=None):
			
 
				+        """
			
 
				+        Creates an instance of this class. The stored data will be generated from the given values.
			
 
				+
			
 
				+        *title* is a text to identify this result.
			
 
				+
			
 
				+        *labels* is a /numpy.array/ containing the labels of the test-data-set.
			
 
				+
			
 
				+        *prediction* is a /numpy.array/ containing the done prediction for the test-data-set.
			
 
				+
			
 
				+        *aps* is a real number representing the average precision score.
			
 
				+        """
			
 
				+        self.title = title
			
 
				+        self.con_mat = confusion_matrix(labels, prediction)
			
 
				+        self.bal_acc = balanced_accuracy_score(labels, prediction)
			
 
				+        self.f1 = f1_score(labels, prediction)
			
 
				+        self.aps = aps
			
 
				+
			
 
				+    def __str__(self):
			
 
				+        """
			
 
				+        Generates a text representing this result.
			
 
				+        """
			
 
				+        #tn, fp, fn, tp = con_mat.ravel()
			
 
				+        r = self.con_mat.ravel()
			
 
				+        text = f"tn, fp, fn, tp: {r}"
			
 
				+
			
 
				+        if self.aps is not None:
			
 
				+            text += f"\naverage_pr_score: {self.aps}"
			
 
				+
			
 
				+        text += f"\nf1 score_{self.title}: {self.f1}"
			
 
				+        text += f"\nbalanced accuracy_{self.title}: {self.bal_acc}"
			
 
				+        text += f"\nconfusion matrix_{self.title}\n {self.con_mat}"
			
 
				+        return text
			
 
				+
			
 
				+
			
 
				+def lr(ttd):
			
 
				+    """
			
 
				+    Runs a test for a dataset with the logistic regression algorithm.
			
 
				+    It returns a /TestResult./
			
 
				+
			
 
				+    *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
			
 
				+    """
			
 
				+    logreg = LogisticRegression(
			
 
				+        C=1e5,
			
 
				+        solver='lbfgs',
			
 
				+        multi_class='multinomial',
			
 
				+        class_weight={0: 1, 1: 1.3}
			
 
				+        )
			
 
				+    logreg.fit(ttd.train.data, ttd.train.labels)
			
 
				+
			
 
				+    prediction = logreg.predict(ttd.test.data)
			
 
				+
			
 
				+    prob_lr = logreg.predict_proba(ttd.test.data)
			
 
				+    aps_lr = average_precision_score(ttd.test.labels, prob_lr[:,1])
			
 
				+    return TestResult("LR", ttd.test.labels, prediction, aps_lr)
			
 
				+
			
 
				+
			
 
				+def svm(ttd):
			
 
				+    """
			
 
				+    Runs a test for a dataset with the support vector machine algorithm.
			
 
				+    It returns a /TestResult./
			
 
				+
			
 
				+    *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
			
 
				+    """
			
 
				+    svmTester = sklearn.svm.SVC(
			
 
				+        kernel='linear',
			
 
				+        decision_function_shape='ovo',
			
 
				+        class_weight={0: 1., 1: 1.},
			
 
				+        probability=True
			
 
				+        )
			
 
				+    svmTester.fit(ttd.train.data, ttd.train.labels)
			
 
				+
			
 
				+    prediction = svmTester.predict(ttd.test.data)
			
 
				+    return TestResult("SVM", ttd.test.labels, prediction)
			
 
				+
			
 
				+
			
 
				+def knn(ttd):
			
 
				+    """
			
 
				+    Runs a test for a dataset with the k-next neighbourhood algorithm.
			
 
				+    It returns a /TestResult./
			
 
				+
			
 
				+    *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
			
 
				+    """
			
 
				+    knnTester = KNeighborsClassifier(n_neighbors=10)
			
 
				+    knnTester.fit(ttd.train.data, ttd.train.labels)
			
 
				+
			
 
				+    prediction = knnTester.predict(ttd.test.data)
			
 
				+    return TestResult("KNN", ttd.test.labels, prediction)