|
|
@@ -13,7 +13,17 @@ from sklearn.metrics import confusion_matrix
|
|
|
from sklearn.metrics import average_precision_score
|
|
|
from sklearn.metrics import f1_score
|
|
|
from sklearn.metrics import balanced_accuracy_score
|
|
|
-
|
|
|
+from sklearn.metrics import cohen_kappa_score
|
|
|
+from sklearn.ensemble import GradientBoostingClassifier
|
|
|
+
|
|
|
+_tF1 = "f1 score"
|
|
|
+_tBalAcc = "balanced accuracy"
|
|
|
+_tTN = "TN"
|
|
|
+_tTP = "TP"
|
|
|
+_tFN = "FN"
|
|
|
+_tFP = "FP"
|
|
|
+_tAps = "average precision score"
|
|
|
+_tCks = "cohens kappa score"
|
|
|
|
|
|
class TestResult:
|
|
|
"""
|
|
|
@@ -22,7 +32,7 @@ class TestResult:
|
|
|
It stores its *title*, a confusion matrix (*con_mat*), the balanced accuracy score (*bal_acc*)
|
|
|
and the f1 score (*f1*). If given the average precision score is also stored (*aps*).
|
|
|
"""
|
|
|
- def __init__(self, title, labels, prediction, aps=None):
|
|
|
+ def __init__(self, title, labels=None, prediction=None, aps=None):
|
|
|
"""
|
|
|
Creates an instance of this class. The stored data will be generated from the given values.
|
|
|
|
|
|
@@ -35,57 +45,109 @@ class TestResult:
|
|
|
*aps* is a real number representing the average precision score.
|
|
|
"""
|
|
|
self.title = title
|
|
|
- self.con_mat = confusion_matrix(labels, prediction)
|
|
|
- self.bal_acc = balanced_accuracy_score(labels, prediction)
|
|
|
- self.f1 = f1_score(labels, prediction)
|
|
|
- self.aps = aps
|
|
|
+ self.heading = [_tTN, _tTP, _tFN, _tFP, _tF1, _tBalAcc, _tCks]
|
|
|
+ if aps is not None:
|
|
|
+ self.heading.append(_tAps)
|
|
|
+ self.data = { n: 0.0 for n in self.heading }
|
|
|
+
|
|
|
+ if labels is not None and prediction is not None:
|
|
|
+ self.data[_tBalAcc] = balanced_accuracy_score(labels, prediction)
|
|
|
+ self.data[_tF1] = f1_score(labels, prediction)
|
|
|
+ self.data[_tCks] = cohen_kappa_score(labels, prediction)
|
|
|
+ conMat = self._enshureConfusionMatrix(confusion_matrix(labels, prediction))
|
|
|
+ [[tn, fp], [fn, tp]] = conMat
|
|
|
+ self.data[_tTN] = tn
|
|
|
+ self.data[_tTP] = tp
|
|
|
+ self.data[_tFN] = fn
|
|
|
+ self.data[_tFP] = fp
|
|
|
+
|
|
|
+ if aps is not None:
|
|
|
+ self.data[_tAps] = aps
|
|
|
|
|
|
def __str__(self):
|
|
|
"""
|
|
|
Generates a text representing this result.
|
|
|
"""
|
|
|
- #tn, fp, fn, tp = con_mat.ravel()
|
|
|
- r = self.con_mat.ravel()
|
|
|
- text = f"tn, fp, fn, tp: {r}"
|
|
|
+ text = ""
|
|
|
|
|
|
- if self.aps is not None:
|
|
|
- text += f"\naverage_pr_score: {self.aps}"
|
|
|
+ tn = self.data[_tTN]
|
|
|
+ tp = self.data[_tTP]
|
|
|
+ fn = self.data[_tFN]
|
|
|
+ fp = self.data[_tFP]
|
|
|
+ text += f"{self.title} tn, fp: {tn}, {tp}\n"
|
|
|
+ text += f"{self.title} fn, tp: {fn}, {tp}"
|
|
|
|
|
|
- text += f"\nf1 score_{self.title}: {self.f1}"
|
|
|
- text += f"\nbalanced accuracy_{self.title}: {self.bal_acc}"
|
|
|
- text += f"\nconfusion matrix_{self.title}\n {self.con_mat}"
|
|
|
- return text
|
|
|
+ for k in self.heading:
|
|
|
+ if k not in [_tTP, _tTN, _tFP, _tFN]:
|
|
|
+ text += f"{self.title} {k}: {self.data[k]:.3f}\n"
|
|
|
|
|
|
+ return text
|
|
|
|
|
|
def csvHeading(self):
|
|
|
- r = [
|
|
|
- "F1 score",
|
|
|
- "balanced accuracy",
|
|
|
- "TN",
|
|
|
- "FP",
|
|
|
- "FN",
|
|
|
- "TP"
|
|
|
- ]
|
|
|
+ return ";".join(self.heading)
|
|
|
|
|
|
- if self.aps is not None:
|
|
|
- r.append("Aps")
|
|
|
+ def toCSV(self):
|
|
|
+ return ";".join(map(lambda k: f"{self.data[k]:0.3f}", self.heading))
|
|
|
|
|
|
- return ";".join(r)
|
|
|
+ @staticmethod
|
|
|
+ def _enshureConfusionMatrix(c):
|
|
|
+ c0 = [0.0, 0.0]
|
|
|
+ c1 = [0.0, 0.0]
|
|
|
|
|
|
- def toCSV(self):
|
|
|
- r = map(str, [
|
|
|
- self.f1,
|
|
|
- self.bal_acc,
|
|
|
- self.con_mat[0] if len(self.con_mat) > 0 else float(self.con_mat),
|
|
|
- self.con_mat[1] if len(self.con_mat) > 1 else 0,
|
|
|
- self.con_mat[2] if len(self.con_mat) > 2 else 0,
|
|
|
- self.con_mat[3] if len(self.con_mat) > 3 else 0
|
|
|
- ])
|
|
|
+ if len(c) > 0:
|
|
|
+ if len(c[0]) > 0:
|
|
|
+ c0[0] = c[0][0]
|
|
|
+
|
|
|
+ if len(c[0]) > 1:
|
|
|
+ c0[1] = c[0][1]
|
|
|
+
|
|
|
+ if len(c) > 1 and len(c[1]) > 1:
|
|
|
+ c1[0] = c[1][0]
|
|
|
+ c1[1] = c[1][1]
|
|
|
+
|
|
|
+ return [c0, c1]
|
|
|
+
|
|
|
+ def copy(self):
|
|
|
+ r = TestResult(self.title)
|
|
|
+ r.data = self.data.copy()
|
|
|
+ r.heading = self.heading.copy()
|
|
|
+ return r
|
|
|
+
|
|
|
+
|
|
|
+ def addMinMaxAvg(self, mma=None):
|
|
|
+ if mma is None:
|
|
|
+ return (1, self.copy(), self.copy(), self.copy())
|
|
|
+
|
|
|
+ (n, mi, mx, a) = mma
|
|
|
+
|
|
|
+ for k in a.heading:
|
|
|
+ if k in self.heading:
|
|
|
+ a.data[k] += self.data[k]
|
|
|
|
|
|
- if self.aps is not None:
|
|
|
- r.append(str(self.aps))
|
|
|
+ for k in mi.heading:
|
|
|
+ if k in self.heading:
|
|
|
+ mi.data[k] = min(mi.data[k], self.data[k])
|
|
|
|
|
|
- return ";".join(r)
|
|
|
+ for k in mx.heading:
|
|
|
+ if k in self.heading:
|
|
|
+ mx.data[k] = max(mx.data[k], self.data[k])
|
|
|
+
|
|
|
+ return (n + 1, mi, mx, a)
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def finishMinMaxAvg(mma):
|
|
|
+ if mma is None:
|
|
|
+ return (TestResult("?"), TestResult("?"), TestResult("?"))
|
|
|
+ else:
|
|
|
+ (n, mi, ma, a) = mma
|
|
|
+ for k in a.heading:
|
|
|
+ if n > 0:
|
|
|
+ a.data[k] = a.data[k] / n
|
|
|
+ else:
|
|
|
+ a.data[k] = 0.0
|
|
|
+ return (mi, ma, a)
|
|
|
+
|
|
|
+
|
|
|
|
|
|
|
|
|
def lr(ttd):
|
|
|
@@ -146,6 +208,21 @@ def knn(ttd):
|
|
|
return TestResult("KNN", ttd.test.labels, prediction)
|
|
|
|
|
|
|
|
|
+def gb(ttd):
|
|
|
+ """
|
|
|
+ Runs a test for a dataset with the gradient boosting algorithm.
|
|
|
+ It returns a /TestResult./
|
|
|
+
|
|
|
+ *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
|
|
|
+ """
|
|
|
+ checkType(ttd)
|
|
|
+ tester = GradientBoostingClassifier()
|
|
|
+ tester.fit(ttd.train.data, ttd.train.labels)
|
|
|
+
|
|
|
+ prediction = tester.predict(ttd.test.data)
|
|
|
+ return TestResult("GB", ttd.test.labels, prediction)
|
|
|
+
|
|
|
+
|
|
|
def checkType(t):
|
|
|
if str(type(t)) == "<class 'numpy.ndarray'>":
|
|
|
return t.shape[0] > 0 and all(map(checkType, t))
|