|
@@ -1,4 +1,21 @@
|
|
|
import numpy as np
|
|
import numpy as np
|
|
|
|
|
+import pandas as pd
|
|
|
|
|
+
|
|
|
|
|
+import sklearn
|
|
|
|
|
+# needed in function lr
|
|
|
|
|
+from sklearn import metrics
|
|
|
|
|
+from sklearn.neighbors import KNeighborsClassifier
|
|
|
|
|
+from sklearn.linear_model import LogisticRegression
|
|
|
|
|
+from sklearn.metrics import confusion_matrix
|
|
|
|
|
+from sklearn.metrics import average_precision_score
|
|
|
|
|
+from sklearn.metrics import f1_score
|
|
|
|
|
+from sklearn.metrics import balanced_accuracy_score
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+from sklearn.decomposition import PCA
|
|
|
|
|
+import seaborn as sns
|
|
|
|
|
+from sklearn.preprocessing import StandardScaler
|
|
|
|
|
+import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
from library.dataset import DataSet, TrainTestData
|
|
from library.dataset import DataSet, TrainTestData
|
|
|
|
|
|
|
@@ -8,10 +25,10 @@ class Exercise:
|
|
|
Exercising a test for a minority class extension class.
|
|
Exercising a test for a minority class extension class.
|
|
|
"""
|
|
"""
|
|
|
|
|
|
|
|
- def __init__(self, createNetworkFunction, shuffleFunction=None, numOfSlices=5, numOfShuffles=5):
|
|
|
|
|
|
|
+ def __init__(self, testFunctions, shuffleFunction=None, numOfSlices=5, numOfShuffles=5):
|
|
|
self.numOfSlices = numOfSlices
|
|
self.numOfSlices = numOfSlices
|
|
|
self.numOfShuffles = numOfShuffles
|
|
self.numOfShuffles = numOfShuffles
|
|
|
- self.createNetworkFunction = createNetworkFunction
|
|
|
|
|
|
|
+ self.testFunctions = testFunctions
|
|
|
self.shuffleFunction = shuffleFunction
|
|
self.shuffleFunction = shuffleFunction
|
|
|
self.debug = print
|
|
self.debug = print
|
|
|
|
|
|
|
@@ -53,18 +70,92 @@ class Exercise:
|
|
|
else:
|
|
else:
|
|
|
train = dataSlice.train
|
|
train = dataSlice.train
|
|
|
|
|
|
|
|
|
|
+ plotCloud(train.data, train.labels)
|
|
|
|
|
|
|
|
- self.debug("-> create network")
|
|
|
|
|
- testNetwork = self.createNetworkFunction()
|
|
|
|
|
-
|
|
|
|
|
- self.debug("-> train network")
|
|
|
|
|
- testNetwork.train(train.data, train.labels)
|
|
|
|
|
-
|
|
|
|
|
- self.debug("-> test network")
|
|
|
|
|
- results = testNetwork.predict(dataSlice.test.data)
|
|
|
|
|
|
|
+ results = { name: [] for name in self.testFunctions }
|
|
|
|
|
+ for testerName in self.testFunctions:
|
|
|
|
|
+ self.debug(f"-> test with '{testerName}'")
|
|
|
|
|
+ testResult = (self.testFunctions[testerName])(train, dataSlice.test)
|
|
|
|
|
+ testResult.print()
|
|
|
|
|
+ results[testerName].append(testResult)
|
|
|
|
|
|
|
|
self.debug("-> check results")
|
|
self.debug("-> check results")
|
|
|
self._checkResults(results, dataSlice.test.labels)
|
|
self._checkResults(results, dataSlice.test.labels)
|
|
|
|
|
|
|
|
def _checkResults(self, results, expectedLabels):
|
|
def _checkResults(self, results, expectedLabels):
|
|
|
pass
|
|
pass
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class TestResult:
|
|
|
|
|
+ def __init__(self, title, labels, prediction, aps=None):
|
|
|
|
|
+ self.title = title
|
|
|
|
|
+ self.con_mat = confusion_matrix(labels, prediction)
|
|
|
|
|
+ self.bal_acc = balanced_accuracy_score(labels, prediction)
|
|
|
|
|
+ self.f1 = f1_score(labels, prediction)
|
|
|
|
|
+ self.aps = aps
|
|
|
|
|
+
|
|
|
|
|
+ def print(self):
|
|
|
|
|
+ #tn, fp, fn, tp = con_mat.ravel()
|
|
|
|
|
+ r = self.con_mat.ravel()
|
|
|
|
|
+ print('tn, fp, fn, tp:', r)
|
|
|
|
|
+
|
|
|
|
|
+ if self.aps is not None:
|
|
|
|
|
+ print('average_pr_score:', self.aps)
|
|
|
|
|
+
|
|
|
|
|
+ print(f'f1 score_{self.title}:', self.f1)
|
|
|
|
|
+ print(f'balanced accuracy_{self.title}:', self.bal_acc)
|
|
|
|
|
+ print(f'confusion matrix_{self.title}')
|
|
|
|
|
+ print(self.con_mat)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def lr(train, test):
|
|
|
|
|
+ logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial', class_weight={0: 1, 1: 1.3})
|
|
|
|
|
+ logreg.fit(train.data, train.labels)
|
|
|
|
|
+
|
|
|
|
|
+ prediction = logreg.predict(test.data)
|
|
|
|
|
+
|
|
|
|
|
+ prob_lr = logreg.predict_proba(test.data)
|
|
|
|
|
+ aps_lr = average_precision_score(test.labels, prob_lr[:,1])
|
|
|
|
|
+ return TestResult("LR", test.labels, prediction, aps_lr)
|
|
|
|
|
+
|
|
|
|
|
+def svm(train, test):
|
|
|
|
|
+ svm = sklearn.svm.SVC(kernel='linear', decision_function_shape='ovo', class_weight={0: 1., 1: 1.}, probability=True)
|
|
|
|
|
+ svm.fit(train.data, train.labels)
|
|
|
|
|
+
|
|
|
|
|
+ prediction = svm.predict(test.data)
|
|
|
|
|
+ return TestResult("SVM", test.labels, prediction)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def knn(train, test):
|
|
|
|
|
+ knn = KNeighborsClassifier(n_neighbors=10)
|
|
|
|
|
+ knn.fit(train.data, train.labels)
|
|
|
|
|
+
|
|
|
|
|
+ prediction = knn.predict(test.data)
|
|
|
|
|
+ return TestResult("KNN", test.labels, prediction)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+allTesters = {
|
|
|
|
|
+ "LR": lr,
|
|
|
|
|
+ "SVM": svm,
|
|
|
|
|
+ "KNN": knn
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def plotCloud(data, labels):
|
|
|
|
|
+ data_t = StandardScaler().fit_transform(data)
|
|
|
|
|
+ pca = PCA(n_components=2)
|
|
|
|
|
+ pc = pca.fit_transform(data_t)
|
|
|
|
|
+ result = pd.DataFrame(data=pc, columns=['PCA0', 'PCA1'])
|
|
|
|
|
+ result['Cluster'] = labels
|
|
|
|
|
+
|
|
|
|
|
+ sns.set( font_scale=1.2)
|
|
|
|
|
+ g=sns.lmplot( x="PCA0", y="PCA1",
|
|
|
|
|
+ data=result,
|
|
|
|
|
+ fit_reg=False,
|
|
|
|
|
+ hue='Cluster', # color by cluster
|
|
|
|
|
+ legend=False,
|
|
|
|
|
+ scatter_kws={"s": 3}, palette="Set1") # specify the point size
|
|
|
|
|
+
|
|
|
|
|
+ plt.legend(title='', loc='upper left', labels=['0', '1'])
|
|
|
|
|
+ plt.show()
|