""" This module contains test function for datasets using the logistic regression, the support vector machine and the k-next-neighbourhood algoritm. Additionally it contains a class for storing the results of the tests. """ import sklearn # needed in function lr from sklearn.neighbors import KNeighborsClassifier from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix from sklearn.metrics import average_precision_score from sklearn.metrics import f1_score from sklearn.metrics import balanced_accuracy_score class TestResult: """ This class represents the result of one test. It stores its *title*, a confusion matrix (*con_mat*), the balanced accuracy score (*bal_acc*) and the f1 score (*f1*). If given the average precision score is also stored (*aps*). """ def __init__(self, title, labels, prediction, aps=None): """ Creates an instance of this class. The stored data will be generated from the given values. *title* is a text to identify this result. *labels* is a /numpy.array/ containing the labels of the test-data-set. *prediction* is a /numpy.array/ containing the done prediction for the test-data-set. *aps* is a real number representing the average precision score. """ self.title = title self.con_mat = confusion_matrix(labels, prediction) self.bal_acc = balanced_accuracy_score(labels, prediction) self.f1 = f1_score(labels, prediction) self.aps = aps def __str__(self): """ Generates a text representing this result. """ #tn, fp, fn, tp = con_mat.ravel() r = self.con_mat.ravel() text = f"tn, fp, fn, tp: {r}" if self.aps is not None: text += f"\naverage_pr_score: {self.aps}" text += f"\nf1 score_{self.title}: {self.f1}" text += f"\nbalanced accuracy_{self.title}: {self.bal_acc}" text += f"\nconfusion matrix_{self.title}\n {self.con_mat}" return text def lr(ttd): """ Runs a test for a dataset with the logistic regression algorithm. It returns a /TestResult./ *ttd* is a /library.dataset.TrainTestData/ instance containing data to test. """ logreg = LogisticRegression( C=1e5, solver='lbfgs', multi_class='multinomial', class_weight={0: 1, 1: 1.3} ) logreg.fit(ttd.train.data, ttd.train.labels) prediction = logreg.predict(ttd.test.data) prob_lr = logreg.predict_proba(ttd.test.data) aps_lr = average_precision_score(ttd.test.labels, prob_lr[:,1]) return TestResult("LR", ttd.test.labels, prediction, aps_lr) def svm(ttd): """ Runs a test for a dataset with the support vector machine algorithm. It returns a /TestResult./ *ttd* is a /library.dataset.TrainTestData/ instance containing data to test. """ svmTester = sklearn.svm.SVC( kernel='linear', decision_function_shape='ovo', class_weight={0: 1., 1: 1.}, probability=True ) svmTester.fit(ttd.train.data, ttd.train.labels) prediction = svmTester.predict(ttd.test.data) return TestResult("SVM", ttd.test.labels, prediction) def knn(ttd): """ Runs a test for a dataset with the k-next neighbourhood algorithm. It returns a /TestResult./ *ttd* is a /library.dataset.TrainTestData/ instance containing data to test. """ knnTester = KNeighborsClassifier(n_neighbors=10) knnTester.fit(ttd.train.data, ttd.train.labels) prediction = knnTester.predict(ttd.test.data) return TestResult("KNN", ttd.test.labels, prediction)