Browse Source

Added network testers.

Kristian Schultz 4 năm trước cách đây
mục cha
commit
ee9e89dd8b
2 tập tin đã thay đổi với 117 bổ sung36 xóa
  1. 16 26
      Example Exercise.ipynb
  2. 101 10
      library/exercise.py

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 16 - 26
Example Exercise.ipynb


+ 101 - 10
library/exercise.py

@@ -1,4 +1,21 @@
 import numpy as np
+import pandas as pd
+
+import sklearn
+# needed in function lr
+from sklearn import metrics
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import confusion_matrix
+from sklearn.metrics import average_precision_score
+from sklearn.metrics import f1_score
+from sklearn.metrics import balanced_accuracy_score
+
+
+from sklearn.decomposition import PCA
+import seaborn as sns
+from sklearn.preprocessing import StandardScaler
+import matplotlib.pyplot as plt
 
 from library.dataset import DataSet, TrainTestData
 
@@ -8,10 +25,10 @@ class Exercise:
     Exercising a test for a minority class extension class.
     """
 
-    def __init__(self, createNetworkFunction, shuffleFunction=None, numOfSlices=5, numOfShuffles=5):
+    def __init__(self, testFunctions, shuffleFunction=None, numOfSlices=5, numOfShuffles=5):
         self.numOfSlices = numOfSlices
         self.numOfShuffles = numOfShuffles
-        self.createNetworkFunction = createNetworkFunction
+        self.testFunctions = testFunctions
         self.shuffleFunction = shuffleFunction
         self.debug = print
 
@@ -53,18 +70,92 @@ class Exercise:
         else:
             train = dataSlice.train
 
+        plotCloud(train.data, train.labels)
 
-        self.debug("-> create network")
-        testNetwork = self.createNetworkFunction()
-
-        self.debug("-> train network")
-        testNetwork.train(train.data, train.labels)
-
-        self.debug("-> test network")
-        results = testNetwork.predict(dataSlice.test.data)
+        results = { name: [] for name in self.testFunctions }
+        for testerName in self.testFunctions:
+            self.debug(f"-> test with '{testerName}'")
+            testResult = (self.testFunctions[testerName])(train, dataSlice.test)
+            testResult.print()
+            results[testerName].append(testResult)
 
         self.debug("-> check results")
         self._checkResults(results, dataSlice.test.labels)
 
     def _checkResults(self, results, expectedLabels):
         pass
+
+
+class TestResult:
+    def __init__(self, title, labels, prediction, aps=None):
+        self.title = title
+        self.con_mat = confusion_matrix(labels, prediction)
+        self.bal_acc = balanced_accuracy_score(labels, prediction)
+        self.f1 = f1_score(labels, prediction)
+        self.aps = aps
+
+    def print(self):
+        #tn, fp, fn, tp = con_mat.ravel()
+        r = self.con_mat.ravel()
+        print('tn, fp, fn, tp:', r)
+
+        if self.aps is not None:
+            print('average_pr_score:', self.aps)
+
+        print(f'f1 score_{self.title}:', self.f1)
+        print(f'balanced accuracy_{self.title}:', self.bal_acc)
+        print(f'confusion matrix_{self.title}')
+        print(self.con_mat)
+
+
+
+def lr(train, test):
+    logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial', class_weight={0: 1, 1: 1.3})
+    logreg.fit(train.data, train.labels)
+
+    prediction = logreg.predict(test.data)
+
+    prob_lr = logreg.predict_proba(test.data)
+    aps_lr = average_precision_score(test.labels, prob_lr[:,1]) 
+    return TestResult("LR", test.labels, prediction, aps_lr)
+
+def svm(train, test):
+    svm = sklearn.svm.SVC(kernel='linear', decision_function_shape='ovo', class_weight={0: 1., 1: 1.}, probability=True)
+    svm.fit(train.data, train.labels)
+
+    prediction = svm.predict(test.data)
+    return TestResult("SVM", test.labels, prediction)
+
+
+def knn(train, test):
+    knn = KNeighborsClassifier(n_neighbors=10)
+    knn.fit(train.data, train.labels)
+    
+    prediction = knn.predict(test.data)
+    return TestResult("KNN", test.labels, prediction)
+
+
+allTesters = {
+    "LR": lr,
+    "SVM": svm,
+    "KNN": knn
+    }
+
+
+def plotCloud(data, labels):
+    data_t = StandardScaler().fit_transform(data)
+    pca = PCA(n_components=2)
+    pc = pca.fit_transform(data_t)
+    result = pd.DataFrame(data=pc, columns=['PCA0', 'PCA1'])
+    result['Cluster'] = labels
+    
+    sns.set( font_scale=1.2)
+    g=sns.lmplot( x="PCA0", y="PCA1",
+      data=result, 
+      fit_reg=False, 
+      hue='Cluster', # color by cluster
+      legend=False,
+      scatter_kws={"s": 3}, palette="Set1") # specify the point size
+
+    plt.legend(title='', loc='upper left', labels=['0', '1'])
+    plt.show()

Một số tệp đã không được hiển thị bởi vì quá nhiều tập tin thay đổi trong này khác