瀏覽代碼

Added tester functions (lr, svn, knn), added ToyGANs, added shuffler, updated the example

Kristian Schultz 4 年之前
父節點
當前提交
90d7ab576b
共有 6 個文件被更改,包括 629 次插入192 次删除
  1. 98 31
      Example Exercise.ipynb
  2. 158 0
      library/GanExamples.py
  3. 108 31
      library/dataset.py
  4. 123 94
      library/exercise.py
  5. 30 36
      library/interfaces.py
  6. 112 0
      library/testers.py

文件差異過大導致無法顯示
+ 98 - 31
Example Exercise.ipynb


+ 158 - 0
library/GanExamples.py

@@ -0,0 +1,158 @@
+"""
+This module contains some example Generative Adversarial Networks for testing.
+
+The classes StupidToyPointGan and StupidToyListGan are not really Networks. This classes are used
+for testing the interface. Hope your actually GAN will perform better than this two.
+
+The class SimpleGan is a simple standard Generative Adversarial Network.
+"""
+
+
+import numpy as np
+
+from library.interfaces import GanBaseClass
+
+
+class StupidToyPointGan(GanBaseClass):
+    """
+    This is a toy example of a GAN.
+    It repeats the first point of the training-data-set.
+    """
+
+    def __init__(self):
+        """
+        Initializes the class and mark it as untrained.
+        """
+        self.isTrained = False
+        self.exampleItem = None
+
+    def train(self, dataSet):
+        """
+        Trains the GAN.
+
+        It stores the first data-point in the training data-set and mark the GAN as trained.
+
+        *dataSet* is a instance of /library.dataset.DataSet/. It contains the training dataset.
+        We are only interested in the class 1.
+        """
+        if dataSet.data1.shape[0] <= 0:
+            raise AttributeError("Train GAN: Expected data class 1 to contain at least one point.")
+
+        self.isTrained = True
+        self.exampleItem = dataSet.data1[0].copy()
+
+    def generateDataPoint(self):
+        """
+        Generates one synthetic data-point by copying the stored data point.
+        """
+        if not self.isTrained:
+            raise ValueError("Try to generate data with untrained GAN.")
+
+        return self.exampleItem
+
+    def generateData(self, numOfSamples=1):
+        """
+        Generates a list of synthetic data-points.
+
+        *numOfSamples* is a integer > 0. It gives the number of new generated samples.
+        """
+        numOfSamples = int(numOfSamples)
+        if numOfSamples < 1:
+            raise AttributeError("Expected numOfSamples to be > 0")
+
+        return np.array([self.generateDataPoint() for _ in range(numOfSamples)])
+
+
+class StupidToyListGan(GanBaseClass):
+    """
+    This is a toy example of a GAN.
+    It repeats the first point of the training-data-set.
+    """
+    def __init__(self, maxListSize=100):
+        self.isTrained = False
+        self.exampleItems = None
+        self.nextIndex = 0
+        self.maxListSize = int(maxListSize)
+        if self.maxListSize < 1:
+            raise AttributeError("Expected maxListSize to be > 0 but got " + str(self.maxListSize))
+
+
+    def train(self, dataSet):
+        """
+        Trains the GAN.
+
+        It stores the first data-point in the training data-set and mark the GAN as trained.
+
+        *dataSet* is a instance of /library.dataset.DataSet/. It contains the training dataset.
+        We are only interested in the first *maxListSize* points in class 1.
+        """
+        if dataSet.data1.shape[0] <= 0:
+            raise AttributeError("Train GAN: Expected data class 1 to contain at least one point.")
+
+        self.isTrained = True
+        self.exampleItems = dataSet.data1[: self.maxListSize].copy()
+
+    def generateDataPoint(self):
+        """
+        Returns one synthetic data point by repeating the stored list.
+        """
+        if not self.isTrained:
+            raise ValueError("Try to generate data with untrained GAN.")
+
+        i = self.nextIndex
+        self.nextIndex += 1
+        if self.nextIndex >= self.exampleItems.shape[0]:
+            self.nextIndex = 0
+
+        return self.exampleItems[i]
+
+
+    def generateData(self, numOfSamples=1):
+        """
+        Generates a list of synthetic data-points.
+
+        *numOfSamples* is a integer > 0. It gives the number of new generated samples.
+        """
+        numOfSamples = int(numOfSamples)
+        if numOfSamples < 1:
+            raise AttributeError("Expected numOfSamples to be > 0")
+
+        return np.array([self.generateDataPoint() for _ in range(numOfSamples)])
+
+
+# class SimpleGan(GanBaseClass):
+#     def __init__(self, maxListSize=100):
+#         self.isTrained = False
+#         self.exampleItems = None
+#         self.nextIndex = 0
+#         self.maxListSize = int(maxListSize)
+#         if self.maxListSize < 1:
+#             raise AttributeError(f"Expected maxListSize to be > 0 but got {self.maxListSize}")
+#
+#
+#     def train(self, dataSet):
+#         if dataSet.data1.shape[0] <= 0:
+#             raise AttributeError("Train GAN: Expected data class 1 to contain at least one point.")
+#
+#         self.isTrained = True
+#         self.exampleItems = dataSet.data1[: self.maxListSize].copy()
+#
+#     def generateDataPoint(self, numOfSamples=1):
+#         if not self.isTrained:
+#             raise ValueError("Try to generate data with untrained GAN.")
+#
+#         i = self.nextIndex
+#         self.nextIndex += 1
+#         if self.nextIndex >= self.exampleItems.shape[0]:
+#             self.nextIndex = 0
+#
+#         return self.exampleItems[i]
+#
+#
+#     def generateData(self, numOfSamples=1):
+#         numOfSamples = int(numOfSamples)
+#         if numOfSamples < 1:
+#             raise AttributeError("Expected numOfSamples to be > 0")
+#
+#         return np.array([self.generateDataPoint() for _ in range(numOfSamples)])
+#

+ 108 - 31
library/dataset.py

@@ -1,11 +1,29 @@
+"""
+This module contains classes to collect data for testing and training.
+"""
+
+
 import math
 import numpy as np
 
+
 class DataSet:
     """
-    Stores data and Labels.
+    This class stores data and labels for a test or training dataset.
+
+    *data0*, *data1* are instances of /numpy.array/. Containg the data for the class 0 (majority
+    class) and the class 1 (minority class).
+
+    *size0*, *size1* are integers, giving the size of the classes 0 and 1.
+
+    *data* is an instance of /numpy.array/ containing the combined classes 0 and 1.
+
+    *labels* is a /numpy.array/ containing the labels for *data*.
     """
     def __init__(self, data0=None, data1=None):
+        """
+        Initializes one instance of this class and fills *data* and *labels*.
+        """
         self.data0 = data0
         self.data1 = data1
         self.size0 = len(data0) if data0 is not None else 0
@@ -24,6 +42,10 @@ class DataSet:
             raise AttributeError("Expected data, data0 or data1 to be a numpy.array")
 
     def shuffleWith(self, shuffleFn):
+        """
+        Shuffles the points in the classes 0 and 1 with the given function
+        (numpy.array -> numpy.array). After that the *data* array will be regenerated.
+        """
         if self.data0 is not None:
             self.data0 = shuffleFn(self.data0)
 
@@ -38,33 +60,68 @@ class DataSet:
             self.data = np.concatenate((self.data1, self.data0))
 
     def labels0(self):
+        """
+        Returns a /numpy.array/ with labels for class0.
+        """
         return np.zeros(self.size0)
 
     def labels1(self):
+        """
+        Returns a /numpy.array/ with labels for class1.
+        """
         return np.zeros(self.size1) + 1
 
 
 class TrainTestData:
     """
-    Stores features, data and labels for class 0 and class 1.
+    Stores data and labels for class 0 and class 1.
+
+    *train* is a /DataSet/ containing the data for training.
+
+    *test* is a /DataSet/ containing the data for testing.
     """
 
     def __init__(self, train, test):
+        """
+        Initializes a new instance for this class and stores the given data.
+        """
         self.train = train
         self.test = test
 
-    @staticmethod
-    def splitUpData(data, trainFactor=0.9):
-        size = len(data)
-        trainSize = math.ceil(size * trainFactor)
-        trn = data[list(range(0, trainSize))]
-        tst = data[list(range(trainSize, size))]
-        return trn, tst
-
     @classmethod
-    def splitDataByFactor(cls, features0, features1, trainFactor=0.9):
-        features_0_trn, features_0_tst = cls.splitUpData(features0, trainFactor)
-        features_1_trn, features_1_tst = cls.splitUpData(features1, trainFactor)
+    def splitDataByFactor(cls, features0, features1, factor=0.9):
+        """
+        Creates a new instance of this class.
+
+        The first (factor * 100%) percent of the points in the given classes are stored for
+        training. The remaining points are stored for testing.
+
+        *features0* and *features1* are /numpy.array/ instances containing the data for class 0
+        and class 1.
+
+        *factor* is a real number > 0 and < 1 for the spliting point.
+        """
+
+        if factor <= 0.0 or factor >= 1.0:
+            raise AttributeError(f"Expected trainFactor to be between 0 and 1 but got {factor}.")
+
+        # ----------------------------------------------------------------------------------------
+        # Supporting function:
+        def splitUpData(data):
+            """
+            Splits a given /numpy.array/ in two /numpy.array/.
+            The first array contains (factor * 100%) percent of the data points.
+            The second array contains the remaining data points.
+            """
+            size = len(data)
+            trainSize = math.ceil(size * factor)
+            trn = data[list(range(0, trainSize))]
+            tst = data[list(range(trainSize, size))]
+            return trn, tst
+        # ----------------------------------------------------------------------------------------
+
+        features_0_trn, features_0_tst = splitUpData(features0)
+        features_1_trn, features_1_tst = splitUpData(features1)
 
         return cls(
             test=DataSet(data1=features_1_tst, data0=features_0_tst),
@@ -73,8 +130,44 @@ class TrainTestData:
 
     @classmethod
     def splitDataToSlices(cls, bigData, numOfSlices=5):
-        data0slices = cls._arrayToSlices(bigData.data0, numOfSlices)
-        data1slices = cls._arrayToSlices(bigData.data1, numOfSlices)
+        """
+        Creates a list of new instance of this class. The list is returned as a generator.
+
+        The given data is splitted in the given number of slices.
+
+        *bigData* is an instance of /DataSet/ containing the data to split.
+
+        *numOfSlices* is the number of generated slices.
+        """
+
+        numOfSlices = int(numOfSlices)
+        if numOfSlices < 1:
+            raise AttributeError(f"Expected numOfSlices to be positive but got {numOfSlices}")
+
+        # ----------------------------------------------------------------------------------------
+        # Supporting function:
+        def arrayToSlices(data):
+            """
+            Takes a /numpy.array/ and splits it into *numOfSlices* slices.
+            A list of the slices will be returned.
+            """
+            size = len(data)
+            if size < numOfSlices:
+                raise AttributeError(
+                    f"Expected data set to contain at least {numOfSlices} points"
+                    + f" but got {size} points."
+                    )
+
+            sliceSize = (size // numOfSlices) + (0 if size % numOfSlices == 0 else 1)
+
+            return [
+                data[n * sliceSize : min(size, (n+1) * sliceSize)]
+                for n in range(numOfSlices)
+                ]
+        # ----------------------------------------------------------------------------------------
+
+        data0slices = arrayToSlices(bigData.data0)
+        data1slices = arrayToSlices(bigData.data1)
 
         for n in range(numOfSlices):
             data0 = np.concatenate([data0slices[k] for k in range(numOfSlices) if n != k])
@@ -82,19 +175,3 @@ class TrainTestData:
             train = DataSet(data0=data0, data1=data1)
             test = DataSet(data0=data0slices[n], data1=data1slices[n])
             yield cls(train=train, test=test)
-
-    @staticmethod
-    def _arrayToSlices(data, numOfSlices):
-        size = len(data)
-        if size < numOfSlices:
-            raise AttributeError(
-                f"Expected data set to contain at least {numOfSlices} points"
-                + f" but got {size} points."
-                )
-
-        sliceSize = (size // numOfSlices) + (0 if size % numOfSlices == 0 else 1)
-
-        return [
-            data[n * sliceSize : min(size, (n+1) * sliceSize)]
-            for n in range(numOfSlices)
-            ]

+ 123 - 94
library/exercise.py

@@ -1,23 +1,19 @@
-import numpy as np
-import pandas as pd
+"""
+Class for testing the performance of Generative Adversarial Networks
+in generating synthetic samples for datasets with a minority class.
+"""
 
-import sklearn
-# needed in function lr
-from sklearn import metrics
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.linear_model import LogisticRegression
-from sklearn.metrics import confusion_matrix
-from sklearn.metrics import average_precision_score
-from sklearn.metrics import f1_score
-from sklearn.metrics import balanced_accuracy_score
 
+import numpy as np
+import pandas as pd
 
-from sklearn.decomposition import PCA
 import seaborn as sns
+from sklearn.decomposition import PCA
 from sklearn.preprocessing import StandardScaler
 import matplotlib.pyplot as plt
 
 from library.dataset import DataSet, TrainTestData
+from library.testers import lr, svm, knn
 
 
 class Exercise:
@@ -25,134 +21,167 @@ class Exercise:
     Exercising a test for a minority class extension class.
     """
 
-    def __init__(self, testFunctions, shuffleFunction=None, numOfSlices=5, numOfShuffles=5):
-        self.numOfSlices = numOfSlices
-        self.numOfShuffles = numOfShuffles
-        self.testFunctions = testFunctions
+    def __init__(self, testFunctions=None, shuffleFunction=None, numOfSlices=5, numOfShuffles=5):
+        """
+        Creates a instance of this class.
+
+        *testFunctions* is a dictionary /(String : Function)/ of functions for testing
+        a generated dataset. The functions have the signature:
+        /(TrainTestData, TrainTestData) -> TestResult/
+
+        *shuffleFunction* is either None or a function /numpy.array -> numpy.array/
+        that shuffles a given array.
+
+        *numOfSlices* is an integer > 0. The dataset given for the run function
+        will be divided in such many slices.
+
+        *numOfShuffles* is an integer > 0. It gives the number of exercised tests.
+        The GAN will be trained and tested (numOfShuffles * numOfSlices) times.
+        """
+        self.numOfSlices = int(numOfSlices)
+        self.numOfShuffles = int(numOfShuffles)
         self.shuffleFunction = shuffleFunction
         self.debug = print
 
+        self.testFunctions = testFunctions
+        if self.testFunctions is None:
+            self.testFunctions = {
+                "LR": lr,
+                "SVM": svm,
+                "KNN": knn
+                }
+
+        self.results = { name: [] for name in self.testFunctions }
+
+        # Check if the given values are in valid range.
+        if self.numOfSlices < 0:
+            raise AttributeError(f"Expected numOfSlices to be > 0 but got {self.numOfSlices}")
+
+        if self.numOfShuffles < 0:
+            raise AttributeError(f"Expected numOfShuffles to be > 0 but got {self.numOfShuffles}")
+
     def run(self, gan, dataset):
+        """
+        Exercise all tests for a given GAN.
+
+        *gan* is a implemention of library.interfaces.GanBaseClass.
+        It defines the GAN to test.
+
+        *dataset* is a library.dataset.DataSet that contains the majority class
+        (dataset.data0) and the minority class (dataset.data1) of data
+        for training and testing.
+        """
+
+        # Check if the given values are in valid range.
         if len(dataset.data1) > len(dataset.data0):
-            raise AttributeError("Expected class 1 to be the minority class but class 1 is bigger than class 0.")
+            raise AttributeError(
+                "Expected class 1 to be the minority class but class 1 is bigger than class 0.")
+
+        # Reset results array.
+        self.results = { name: [] for name in self.testFunctions }
 
+        # Repeat numOfShuffles times
         self.debug("### Start exercise for synthetic point generator")
         for shuffleStep in range(self.numOfShuffles):
-            stepTitle = "Step {shuffleStep + 1}/{self.numOfShuffles}"
+            stepTitle = f"Step {shuffleStep + 1}/{self.numOfShuffles}"
             self.debug(f"\n====== {stepTitle} =======")
 
+            # If a shuffle fuction is given then shuffle the data before the next
+            # exercise starts.
             if self.shuffleFunction is not None:
                 self.debug("-> Shuffling data")
                 dataset.shuffleWith(self.shuffleFunction)
 
+
+            # Split the (shuffled) data into numOfSlices slices.
+            # dataSlices is a list of TrainTestData instances.
+            #
+            # If numOfSlices=3 then the data will be splited in D1, D2, D3.
+            # dataSlices will contain:
+            # [(train=D2+D3, test=D1), (train=D1+D3, test=D2), (train=D1+D2, test=D3)]
             self.debug("-> Spliting data to slices")
             dataSlices = TrainTestData.splitDataToSlices(dataset, self.numOfSlices)
 
+            # Do a exercise for every slice.
             for (sliceNr, sliceData) in enumerate(dataSlices):
-                sliceTitle = "Slice {sliceNr + 1}/{self.numOfSlices}"
+                sliceTitle = f"Slice {sliceNr + 1}/{self.numOfSlices}"
                 self.debug(f"\n------ {stepTitle}: {sliceTitle} -------")
                 self._exerciseWithDataSlice(gan, sliceData)
+
         self.debug("### Exercise is done.")
 
     def _exerciseWithDataSlice(self, gan, dataSlice):
+        """
+        Runs one test for the given gan and dataSlice.
+
+        *gan* is a implemention of library.interfaces.GanBaseClass.
+        It defines the GAN to test.
+
+        *dataSlice* is a library.dataset.TrainTestData instance that contains
+        one data slice with training and testing data.
+        """
+
+        # Train the gan so it can produce synthetic samples.
         self.debug("-> Train generator for synthetic samples")
         gan.train(dataSlice.train)
 
+        # Count how many syhthetic samples are needed.
         numOfNeededSamples = dataSlice.train.size0 - dataSlice.train.size1
 
+        # Add synthetic samples (generated by the GAN) to the minority class.
         if numOfNeededSamples > 0:
             self.debug(f"-> create {numOfNeededSamples} synthetic samples")
-            newSamples = np.asarray([gan.generateData() for _ in range(numOfNeededSamples)])
-            train = DataSet(
+            newSamples = gan.generateData(numOfNeededSamples)
+            dataSlice.train = DataSet(
                 data0=dataSlice.train.data0,
                 data1=np.concatenate((dataSlice.train.data1, newSamples))
                 )
-        else:
-            train = dataSlice.train
 
-        plotCloud(train.data, train.labels)
+        # Print out an overview of the new dataset.
+        plotCloud(dataSlice.train)
 
-        results = { name: [] for name in self.testFunctions }
+        # Test this dataset with every given test-function.
+        # The results are printed out and stored to the results dictionary.
         for testerName in self.testFunctions:
             self.debug(f"-> test with '{testerName}'")
-            testResult = (self.testFunctions[testerName])(train, dataSlice.test)
-            testResult.print()
-            results[testerName].append(testResult)
-
-        self.debug("-> check results")
-        self._checkResults(results, dataSlice.test.labels)
-
-    def _checkResults(self, results, expectedLabels):
-        pass
-
-
-class TestResult:
-    def __init__(self, title, labels, prediction, aps=None):
-        self.title = title
-        self.con_mat = confusion_matrix(labels, prediction)
-        self.bal_acc = balanced_accuracy_score(labels, prediction)
-        self.f1 = f1_score(labels, prediction)
-        self.aps = aps
-
-    def print(self):
-        #tn, fp, fn, tp = con_mat.ravel()
-        r = self.con_mat.ravel()
-        print('tn, fp, fn, tp:', r)
-
-        if self.aps is not None:
-            print('average_pr_score:', self.aps)
-
-        print(f'f1 score_{self.title}:', self.f1)
-        print(f'balanced accuracy_{self.title}:', self.bal_acc)
-        print(f'confusion matrix_{self.title}')
-        print(self.con_mat)
-
+            testResult = (self.testFunctions[testerName])(dataSlice)
+            self.debug(str(testResult))
+            self.results[testerName].append(testResult)
 
 
-def lr(train, test):
-    logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial', class_weight={0: 1, 1: 1.3})
-    logreg.fit(train.data, train.labels)
+    def saveResultsTo(self, fileName):
+        with open(fileName, "w") as f:
+            for name in self.results:
+                f.write(name + "\n")
+                isFirst = True
+                for result in self.results[name]:
+                    if isFirst:
+                        isFirst = False
+                        f.write(result.csvHeading() + "\n")
+                    f.write(result.toCSV() + "\n")
+            
 
-    prediction = logreg.predict(test.data)
-
-    prob_lr = logreg.predict_proba(test.data)
-    aps_lr = average_precision_score(test.labels, prob_lr[:,1]) 
-    return TestResult("LR", test.labels, prediction, aps_lr)
-
-def svm(train, test):
-    svm = sklearn.svm.SVC(kernel='linear', decision_function_shape='ovo', class_weight={0: 1., 1: 1.}, probability=True)
-    svm.fit(train.data, train.labels)
-
-    prediction = svm.predict(test.data)
-    return TestResult("SVM", test.labels, prediction)
-
-
-def knn(train, test):
-    knn = KNeighborsClassifier(n_neighbors=10)
-    knn.fit(train.data, train.labels)
-    
-    prediction = knn.predict(test.data)
-    return TestResult("KNN", test.labels, prediction)
-
-
-allTesters = {
-    "LR": lr,
-    "SVM": svm,
-    "KNN": knn
-    }
 
+def plotCloud(dataset):
+    """
+    Does a PCA analysis of the given data and plot the both important axis.
+    """
+    # Normalizes the data.
+    data_t = StandardScaler().fit_transform(dataset.data)
 
-def plotCloud(data, labels):
-    data_t = StandardScaler().fit_transform(data)
+    # Run the PCA analysis.
     pca = PCA(n_components=2)
     pc = pca.fit_transform(data_t)
+
+    # Create a DataFrame for plotting.
     result = pd.DataFrame(data=pc, columns=['PCA0', 'PCA1'])
-    result['Cluster'] = labels
-    
+    result['Cluster'] = dataset.labels
+
+    # Plot the analysis results.
     sns.set( font_scale=1.2)
-    g=sns.lmplot( x="PCA0", y="PCA1",
-      data=result, 
-      fit_reg=False, 
+    sns.lmplot( x="PCA0", y="PCA1",
+      data=result,
+      fit_reg=False,
       hue='Cluster', # color by cluster
       legend=False,
       scatter_kws={"s": 3}, palette="Set1") # specify the point size

+ 30 - 36
library/interfaces.py

@@ -1,41 +1,35 @@
-import numpy as np
+"""
+This module contains used interfaces for testing the Generative Adversarial Networks.
+"""
 
-class GanBaseClass:
-    def __init__(self):
-        self.isTrained = False
-        self.exampleItems = None
-        self.nextIndex = 0
-        pass
-
-    def train(self, dataSet):
-        if dataSet.data1.shape[0] <= 0:
-            raise AttributeError("Train GAN: Expected data class 1 to contain at least one point.")
-
-        print(
-            "Train GAN with |class 0|=%d, |class 1|=%d"
-            % (dataSet.data0.shape[0], dataSet.data1.shape[0])
-            )
-        self.isTrained = True
-        self.exampleItems = dataSet.data1.copy()
-
-    def generateData(self):
-        if not self.isTrained:
-            raise ValueError("Try to generate data with untrained GAN.")
-
-        i = self.nextIndex
-        self.nextIndex += 1
-        if self.nextIndex >= self.exampleItems.shape[0]:
-            self.nextIndex = 0
-
-        return self.exampleItems[i]
 
+class GanBaseClass:
+    """
+    Base class for the Generative Adversarial Network.
+    It defines the interface used by the Exercise class.
+    """
 
-class TesterNetworkBaseClass:
     def __init__(self):
-        pass
+        """
+        Initializes the class.
+        """
 
-    def train(self, data, labels):
-        pass
-
-    def predict(self, data):
-        return np.zeros(data.shape[0])
+    def train(self, dataSet):
+        """
+        Trains the GAN.
+        """
+        raise NotImplementedError
+
+    def generateDataPoint(self):
+        """
+        Generates one synthetic data-point.
+        """
+        return self.generateData(1)[0]
+
+    def generateData(self, numOfSamples=1):
+        """
+        Generates a list of synthetic data-points.
+
+        *numOfSamples* is an integer > 0. It gives the number of generated samples.
+        """
+        raise NotImplementedError

+ 112 - 0
library/testers.py

@@ -0,0 +1,112 @@
+"""
+This module contains test function for datasets using the logistic regression, the support vector
+machine and the k-next-neighbourhood algoritm. Additionally it contains a class for storing the
+results of the tests.
+"""
+
+
+import sklearn
+# needed in function lr
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import confusion_matrix
+from sklearn.metrics import average_precision_score
+from sklearn.metrics import f1_score
+from sklearn.metrics import balanced_accuracy_score
+
+
+class TestResult:
+    """
+    This class represents the result of one test.
+
+    It stores its *title*, a confusion matrix (*con_mat*), the balanced accuracy score (*bal_acc*)
+    and the f1 score (*f1*). If given the average precision score is also stored (*aps*).
+    """
+    def __init__(self, title, labels, prediction, aps=None):
+        """
+        Creates an instance of this class. The stored data will be generated from the given values.
+
+        *title* is a text to identify this result.
+
+        *labels* is a /numpy.array/ containing the labels of the test-data-set.
+
+        *prediction* is a /numpy.array/ containing the done prediction for the test-data-set.
+
+        *aps* is a real number representing the average precision score.
+        """
+        self.title = title
+        self.con_mat = confusion_matrix(labels, prediction)
+        self.bal_acc = balanced_accuracy_score(labels, prediction)
+        self.f1 = f1_score(labels, prediction)
+        self.aps = aps
+
+    def __str__(self):
+        """
+        Generates a text representing this result.
+        """
+        #tn, fp, fn, tp = con_mat.ravel()
+        r = self.con_mat.ravel()
+        text = f"tn, fp, fn, tp: {r}"
+
+        if self.aps is not None:
+            text += f"\naverage_pr_score: {self.aps}"
+
+        text += f"\nf1 score_{self.title}: {self.f1}"
+        text += f"\nbalanced accuracy_{self.title}: {self.bal_acc}"
+        text += f"\nconfusion matrix_{self.title}\n {self.con_mat}"
+        return text
+
+
+def lr(ttd):
+    """
+    Runs a test for a dataset with the logistic regression algorithm.
+    It returns a /TestResult./
+
+    *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
+    """
+    logreg = LogisticRegression(
+        C=1e5,
+        solver='lbfgs',
+        multi_class='multinomial',
+        class_weight={0: 1, 1: 1.3}
+        )
+    logreg.fit(ttd.train.data, ttd.train.labels)
+
+    prediction = logreg.predict(ttd.test.data)
+
+    prob_lr = logreg.predict_proba(ttd.test.data)
+    aps_lr = average_precision_score(ttd.test.labels, prob_lr[:,1])
+    return TestResult("LR", ttd.test.labels, prediction, aps_lr)
+
+
+def svm(ttd):
+    """
+    Runs a test for a dataset with the support vector machine algorithm.
+    It returns a /TestResult./
+
+    *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
+    """
+    svmTester = sklearn.svm.SVC(
+        kernel='linear',
+        decision_function_shape='ovo',
+        class_weight={0: 1., 1: 1.},
+        probability=True
+        )
+    svmTester.fit(ttd.train.data, ttd.train.labels)
+
+    prediction = svmTester.predict(ttd.test.data)
+    return TestResult("SVM", ttd.test.labels, prediction)
+
+
+def knn(ttd):
+    """
+    Runs a test for a dataset with the k-next neighbourhood algorithm.
+    It returns a /TestResult./
+
+    *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
+    """
+    knnTester = KNeighborsClassifier(n_neighbors=10)
+    knnTester.fit(ttd.train.data, ttd.train.labels)
+
+    prediction = knnTester.predict(ttd.test.data)
+    return TestResult("KNN", ttd.test.labels, prediction)

部分文件因文件數量過多而無法顯示