""" This module contains test function for datasets using the logistic regression, the support vector machine and the k-next-neighbourhood algoritm. Additionally it contains a class for storing the results of the tests. """ import sklearn # needed in function lr from sklearn.neighbors import KNeighborsClassifier from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix from sklearn.metrics import average_precision_score from sklearn.metrics import f1_score from sklearn.metrics import balanced_accuracy_score class TestResult: """ This class represents the result of one test. It stores its *title*, a confusion matrix (*con_mat*), the balanced accuracy score (*bal_acc*) and the f1 score (*f1*). If given the average precision score is also stored (*aps*). """ def __init__(self, title, labels, prediction, aps=None): """ Creates an instance of this class. The stored data will be generated from the given values. *title* is a text to identify this result. *labels* is a /numpy.array/ containing the labels of the test-data-set. *prediction* is a /numpy.array/ containing the done prediction for the test-data-set. *aps* is a real number representing the average precision score. """ self.title = title self.con_mat = confusion_matrix(labels, prediction) self.bal_acc = balanced_accuracy_score(labels, prediction) self.f1 = f1_score(labels, prediction) self.aps = aps def __str__(self): """ Generates a text representing this result. """ #tn, fp, fn, tp = con_mat.ravel() r = self.con_mat.ravel() text = f"tn, fp, fn, tp: {r}" if self.aps is not None: text += f"\naverage_pr_score: {self.aps}" text += f"\nf1 score_{self.title}: {self.f1}" text += f"\nbalanced accuracy_{self.title}: {self.bal_acc}" text += f"\nconfusion matrix_{self.title}\n {self.con_mat}" return text def csvHeading(self): r = [ "F1 score", "balanced accuracy", "TN", "FP", "FN", "TP" ] if self.aps is not None: r.append("Aps") return ";".join(r) def toCSV(self): r = map(str, [ self.f1, self.bal_acc, self.con_mat[0] if len(self.con_mat) > 0 else float(self.con_mat), self.con_mat[1] if len(self.con_mat) > 1 else 0, self.con_mat[2] if len(self.con_mat) > 2 else 0, self.con_mat[3] if len(self.con_mat) > 3 else 0 ]) if self.aps is not None: r.append(str(self.aps)) return ";".join(r) def lr(ttd): """ Runs a test for a dataset with the logistic regression algorithm. It returns a /TestResult./ *ttd* is a /library.dataset.TrainTestData/ instance containing data to test. """ checkType(ttd) logreg = LogisticRegression( C=1e5, solver='lbfgs', multi_class='multinomial', class_weight={0: 1, 1: 1.3} ) logreg.fit(ttd.train.data, ttd.train.labels) prediction = logreg.predict(ttd.test.data) prob_lr = logreg.predict_proba(ttd.test.data) aps_lr = average_precision_score(ttd.test.labels, prob_lr[:,1]) return TestResult("LR", ttd.test.labels, prediction, aps_lr) def svm(ttd): """ Runs a test for a dataset with the support vector machine algorithm. It returns a /TestResult./ *ttd* is a /library.dataset.TrainTestData/ instance containing data to test. """ checkType(ttd) svmTester = sklearn.svm.SVC( kernel='linear', decision_function_shape='ovo', class_weight={0: 1., 1: 1.}, probability=True ) svmTester.fit(ttd.train.data, ttd.train.labels) prediction = svmTester.predict(ttd.test.data) return TestResult("SVM", ttd.test.labels, prediction) def knn(ttd): """ Runs a test for a dataset with the k-next neighbourhood algorithm. It returns a /TestResult./ *ttd* is a /library.dataset.TrainTestData/ instance containing data to test. """ checkType(ttd) knnTester = KNeighborsClassifier(n_neighbors=10) knnTester.fit(ttd.train.data, ttd.train.labels) prediction = knnTester.predict(ttd.test.data) return TestResult("KNN", ttd.test.labels, prediction) def checkType(t): if str(type(t)) == "": return t.shape[0] > 0 and all(map(checkType, t)) elif str(type(t)) == "": return len(t) > 0 and all(map(checkType, t)) elif str(type(t)) in ["", "", ""]: return True elif str(type(t)) == "": return checkType(t.data0) and checkType(t.data1) elif str(type(t)) == "": return checkType(t.train) and checkType(t.test) else: raise ValueError("expected int, float, or list, dataset of int, float but got " + str(type(t))) return False