fyrr
/
ConvGeNCode


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
							"""
This module contains test function for datasets using the logistic regression, the support vector
machine and the k-next-neighbourhood algoritm. Additionally it contains a class for storing the
results of the tests.
"""


import sklearn
# needed in function lr
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import average_precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import balanced_accuracy_score


class TestResult:
    """
    This class represents the result of one test.

    It stores its *title*, a confusion matrix (*con_mat*), the balanced accuracy score (*bal_acc*)
    and the f1 score (*f1*). If given the average precision score is also stored (*aps*).
    """
    def __init__(self, title, labels, prediction, aps=None):
        """
        Creates an instance of this class. The stored data will be generated from the given values.

        *title* is a text to identify this result.

        *labels* is a /numpy.array/ containing the labels of the test-data-set.

        *prediction* is a /numpy.array/ containing the done prediction for the test-data-set.

        *aps* is a real number representing the average precision score.
        """
        self.title = title
        self.con_mat = confusion_matrix(labels, prediction)
        self.bal_acc = balanced_accuracy_score(labels, prediction)
        self.f1 = f1_score(labels, prediction)
        self.aps = aps

    def __str__(self):
        """
        Generates a text representing this result.
        """
        #tn, fp, fn, tp = con_mat.ravel()
        r = self.con_mat.ravel()
        text = f"tn, fp, fn, tp: {r}"

        if self.aps is not None:
            text += f"\naverage_pr_score: {self.aps}"

        text += f"\nf1 score_{self.title}: {self.f1}"
        text += f"\nbalanced accuracy_{self.title}: {self.bal_acc}"
        text += f"\nconfusion matrix_{self.title}\n {self.con_mat}"
        return text


def lr(ttd):
    """
    Runs a test for a dataset with the logistic regression algorithm.
    It returns a /TestResult./

    *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
    """
    logreg = LogisticRegression(
        C=1e5,
        solver='lbfgs',
        multi_class='multinomial',
        class_weight={0: 1, 1: 1.3}
        )
    logreg.fit(ttd.train.data, ttd.train.labels)

    prediction = logreg.predict(ttd.test.data)

    prob_lr = logreg.predict_proba(ttd.test.data)
    aps_lr = average_precision_score(ttd.test.labels, prob_lr[:,1])
    return TestResult("LR", ttd.test.labels, prediction, aps_lr)


def svm(ttd):
    """
    Runs a test for a dataset with the support vector machine algorithm.
    It returns a /TestResult./

    *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
    """
    svmTester = sklearn.svm.SVC(
        kernel='linear',
        decision_function_shape='ovo',
        class_weight={0: 1., 1: 1.},
        probability=True
        )
    svmTester.fit(ttd.train.data, ttd.train.labels)

    prediction = svmTester.predict(ttd.test.data)
    return TestResult("SVM", ttd.test.labels, prediction)


def knn(ttd):
    """
    Runs a test for a dataset with the k-next neighbourhood algorithm.
    It returns a /TestResult./

    *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
    """
    knnTester = KNeighborsClassifier(n_neighbors=10)
    knnTester.fit(ttd.train.data, ttd.train.labels)

    prediction = knnTester.predict(ttd.test.data)
    return TestResult("KNN", ttd.test.labels, prediction)