4 лет назад · ee9e89dd8b
--- a/Exercise.ipynb
+++ b/Exercise.ipynb
--- a/library/exercise.py
+++ b/library/exercise.py
@@ -1,4 +1,21 @@
 
															 import numpy as np
														
 
															+import pandas as pd
														
 
															+
														
 
															+import sklearn
														
 
															+# needed in function lr
														
 
															+from sklearn import metrics
														
 
															+from sklearn.neighbors import KNeighborsClassifier
														
 
															+from sklearn.linear_model import LogisticRegression
														
 
															+from sklearn.metrics import confusion_matrix
														
 
															+from sklearn.metrics import average_precision_score
														
 
															+from sklearn.metrics import f1_score
														
 
															+from sklearn.metrics import balanced_accuracy_score
														
 
															+
														
 
															+
														
 
															+from sklearn.decomposition import PCA
														
 
															+import seaborn as sns
														
 
															+from sklearn.preprocessing import StandardScaler
														
 
															+import matplotlib.pyplot as plt
														
 
															 from library.dataset import DataSet, TrainTestData
														
@@ -8,10 +25,10 @@ class Exercise:
 
															     Exercising a test for a minority class extension class.
														
 
															     """
														
 
															-    def __init__(self, createNetworkFunction, shuffleFunction=None, numOfSlices=5, numOfShuffles=5):
														
 
															+    def __init__(self, testFunctions, shuffleFunction=None, numOfSlices=5, numOfShuffles=5):
														
 
															         self.numOfSlices = numOfSlices
														
 
															         self.numOfShuffles = numOfShuffles
														
 
															-        self.createNetworkFunction = createNetworkFunction
														
 
															+        self.testFunctions = testFunctions
														
 
															         self.shuffleFunction = shuffleFunction
														
 
															         self.debug = print
														
@@ -53,18 +70,92 @@ class Exercise:
 
															         else:
														
 
															             train = dataSlice.train
														
 
															+        plotCloud(train.data, train.labels)
														
 
															-        self.debug("-> create network")
														
 
															-        testNetwork = self.createNetworkFunction()
														
 
															-
														
 
															-        self.debug("-> train network")
														
 
															-        testNetwork.train(train.data, train.labels)
														
 
															-
														
 
															-        self.debug("-> test network")
														
 
															-        results = testNetwork.predict(dataSlice.test.data)
														
 
															+        results = { name: [] for name in self.testFunctions }
														
 
															+        for testerName in self.testFunctions:
														
 
															+            self.debug(f"-> test with '{testerName}'")
														
 
															+            testResult = (self.testFunctions[testerName])(train, dataSlice.test)
														
 
															+            testResult.print()
														
 
															+            results[testerName].append(testResult)
														
 
															         self.debug("-> check results")
														
 
															         self._checkResults(results, dataSlice.test.labels)
														
 
															     def _checkResults(self, results, expectedLabels):
														
 
															         pass
														
 
															+
														
 
															+
														
 
															+class TestResult:
														
 
															+    def __init__(self, title, labels, prediction, aps=None):
														
 
															+        self.title = title
														
 
															+        self.con_mat = confusion_matrix(labels, prediction)
														
 
															+        self.bal_acc = balanced_accuracy_score(labels, prediction)
														
 
															+        self.f1 = f1_score(labels, prediction)
														
 
															+        self.aps = aps
														
 
															+
														
 
															+    def print(self):
														
 
															+        #tn, fp, fn, tp = con_mat.ravel()
														
 
															+        r = self.con_mat.ravel()
														
 
															+        print('tn, fp, fn, tp:', r)
														
 
															+
														
 
															+        if self.aps is not None:
														
 
															+            print('average_pr_score:', self.aps)
														
 
															+
														
 
															+        print(f'f1 score_{self.title}:', self.f1)
														
 
															+        print(f'balanced accuracy_{self.title}:', self.bal_acc)
														
 
															+        print(f'confusion matrix_{self.title}')
														
 
															+        print(self.con_mat)
														
 
															+
														
 
															+
														
 
															+
														
 
															+def lr(train, test):
														
 
															+    logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial', class_weight={0: 1, 1: 1.3})
														
 
															+    logreg.fit(train.data, train.labels)
														
 
															+
														
 
															+    prediction = logreg.predict(test.data)
														
 
															+
														
 
															+    prob_lr = logreg.predict_proba(test.data)
														
 
															+    aps_lr = average_precision_score(test.labels, prob_lr[:,1]) 
														
 
															+    return TestResult("LR", test.labels, prediction, aps_lr)
														
 
															+
														
 
															+def svm(train, test):
														
 
															+    svm = sklearn.svm.SVC(kernel='linear', decision_function_shape='ovo', class_weight={0: 1., 1: 1.}, probability=True)
														
 
															+    svm.fit(train.data, train.labels)
														
 
															+
														
 
															+    prediction = svm.predict(test.data)
														
 
															+    return TestResult("SVM", test.labels, prediction)
														
 
															+
														
 
															+
														
 
															+def knn(train, test):
														
 
															+    knn = KNeighborsClassifier(n_neighbors=10)
														
 
															+    knn.fit(train.data, train.labels)
														
 
															+    
														
 
															+    prediction = knn.predict(test.data)
														
 
															+    return TestResult("KNN", test.labels, prediction)
														
 
															+
														
 
															+
														
 
															+allTesters = {
														
 
															+    "LR": lr,
														
 
															+    "SVM": svm,
														
 
															+    "KNN": knn
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+def plotCloud(data, labels):
														
 
															+    data_t = StandardScaler().fit_transform(data)
														
 
															+    pca = PCA(n_components=2)
														
 
															+    pc = pca.fit_transform(data_t)
														
 
															+    result = pd.DataFrame(data=pc, columns=['PCA0', 'PCA1'])
														
 
															+    result['Cluster'] = labels
														
 
															+    
														
 
															+    sns.set( font_scale=1.2)
														
 
															+    g=sns.lmplot( x="PCA0", y="PCA1",
														
 
															+      data=result, 
														
 
															+      fit_reg=False, 
														
 
															+      hue='Cluster', # color by cluster
														
 
															+      legend=False,
														
 
															+      scatter_kws={"s": 3}, palette="Set1") # specify the point size
														
 
															+
														
 
															+    plt.legend(title='', loc='upper left', labels=['0', '1'])
														
 
															+    plt.show()