| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 |
- """
- This module contains test function for datasets using the logistic regression, the support vector
- machine and the k-next-neighbourhood algoritm. Additionally it contains a class for storing the
- results of the tests.
- """
- import sklearn
- # needed in function lr
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.linear_model import LogisticRegression
- from sklearn.metrics import confusion_matrix
- from sklearn.metrics import average_precision_score
- from sklearn.metrics import f1_score
- from sklearn.metrics import balanced_accuracy_score
- class TestResult:
- """
- This class represents the result of one test.
- It stores its *title*, a confusion matrix (*con_mat*), the balanced accuracy score (*bal_acc*)
- and the f1 score (*f1*). If given the average precision score is also stored (*aps*).
- """
- def __init__(self, title, labels, prediction, aps=None):
- """
- Creates an instance of this class. The stored data will be generated from the given values.
- *title* is a text to identify this result.
- *labels* is a /numpy.array/ containing the labels of the test-data-set.
- *prediction* is a /numpy.array/ containing the done prediction for the test-data-set.
- *aps* is a real number representing the average precision score.
- """
- self.title = title
- self.con_mat = confusion_matrix(labels, prediction)
- self.bal_acc = balanced_accuracy_score(labels, prediction)
- self.f1 = f1_score(labels, prediction)
- self.aps = aps
- def __str__(self):
- """
- Generates a text representing this result.
- """
- #tn, fp, fn, tp = con_mat.ravel()
- r = self.con_mat.ravel()
- text = f"tn, fp, fn, tp: {r}"
- if self.aps is not None:
- text += f"\naverage_pr_score: {self.aps}"
- text += f"\nf1 score_{self.title}: {self.f1}"
- text += f"\nbalanced accuracy_{self.title}: {self.bal_acc}"
- text += f"\nconfusion matrix_{self.title}\n {self.con_mat}"
- return text
- def csvHeading(self):
- r = [
- "F1 score",
- "balanced accuracy",
- "TN",
- "FP",
- "FN",
- "TP"
- ]
- if self.aps is not None:
- r.append("Aps")
- return ";".join(r)
- def toCSV(self):
- r = map(str, [
- self.f1,
- self.bal_acc,
- self.con_mat[0] if len(self.con_mat) > 0 else float(self.con_mat),
- self.con_mat[1] if len(self.con_mat) > 1 else 0,
- self.con_mat[2] if len(self.con_mat) > 2 else 0,
- self.con_mat[3] if len(self.con_mat) > 3 else 0
- ])
- if self.aps is not None:
- r.append(str(self.aps))
- return ";".join(r)
- def lr(ttd):
- """
- Runs a test for a dataset with the logistic regression algorithm.
- It returns a /TestResult./
- *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
- """
- checkType(ttd)
- logreg = LogisticRegression(
- C=1e5,
- solver='lbfgs',
- multi_class='multinomial',
- class_weight={0: 1, 1: 1.3}
- )
- logreg.fit(ttd.train.data, ttd.train.labels)
- prediction = logreg.predict(ttd.test.data)
- prob_lr = logreg.predict_proba(ttd.test.data)
- aps_lr = average_precision_score(ttd.test.labels, prob_lr[:,1])
- return TestResult("LR", ttd.test.labels, prediction, aps_lr)
- def svm(ttd):
- """
- Runs a test for a dataset with the support vector machine algorithm.
- It returns a /TestResult./
- *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
- """
- checkType(ttd)
- svmTester = sklearn.svm.SVC(
- kernel='linear',
- decision_function_shape='ovo',
- class_weight={0: 1., 1: 1.},
- probability=True
- )
- svmTester.fit(ttd.train.data, ttd.train.labels)
- prediction = svmTester.predict(ttd.test.data)
- return TestResult("SVM", ttd.test.labels, prediction)
- def knn(ttd):
- """
- Runs a test for a dataset with the k-next neighbourhood algorithm.
- It returns a /TestResult./
- *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
- """
- checkType(ttd)
- knnTester = KNeighborsClassifier(n_neighbors=10)
- knnTester.fit(ttd.train.data, ttd.train.labels)
- prediction = knnTester.predict(ttd.test.data)
- return TestResult("KNN", ttd.test.labels, prediction)
- def checkType(t):
- if str(type(t)) == "<class 'numpy.ndarray'>":
- return t.shape[0] > 0 and all(map(checkType, t))
- elif str(type(t)) == "<class 'list'>":
- return len(t) > 0 and all(map(checkType, t))
- elif str(type(t)) in ["<class 'int'>", "<class 'float'>", "<class 'numpy.float64'>"]:
- return True
- elif str(type(t)) == "<class 'library.dataset.DataSet'>":
- return checkType(t.data0) and checkType(t.data1)
- elif str(type(t)) == "<class 'library.dataset.TrainTestData'>":
- return checkType(t.train) and checkType(t.test)
- else:
- raise ValueError("expected int, float, or list, dataset of int, float but got " + str(type(t)))
- return False
-
|