Explorar o código

Added pre shuffeling and type checking.

Kristian Schultz %!s(int64=4) %!d(string=hai) anos
pai
achega
ca98b3d8c7
Modificáronse 3 ficheiros con 82 adicións e 4 borrados
  1. 19 2
      Example Exercise.ipynb
  2. 12 2
      library/exercise.py
  3. 51 0
      library/testers.py

A diferenza do arquivo foi suprimida porque é demasiado grande
+ 19 - 2
Example Exercise.ipynb


+ 12 - 2
library/exercise.py

@@ -80,13 +80,20 @@ class Exercise:
         # Reset results array.
         self.results = { name: [] for name in self.testFunctions }
 
+        # If a shuffle function is given then shuffle the data before the
+        # exercise starts.
+        if self.shuffleFunction is not None:
+            self.debug("-> Shuffling data")
+            for _n in range(3):
+                dataset.shuffleWith(self.shuffleFunction)
+
         # Repeat numOfShuffles times
         self.debug("### Start exercise for synthetic point generator")
         for shuffleStep in range(self.numOfShuffles):
             stepTitle = f"Step {shuffleStep + 1}/{self.numOfShuffles}"
             self.debug(f"\n====== {stepTitle} =======")
 
-            # If a shuffle fuction is given then shuffle the data before the next
+            # If a shuffle function is given then shuffle the data before the next
             # exercise starts.
             if self.shuffleFunction is not None:
                 self.debug("-> Shuffling data")
@@ -155,7 +162,10 @@ class Exercise:
 
     def saveResultsTo(self, fileName):
         with open(fileName, "w") as f:
-            for name in self.results:
+            for (n, name) in enumerate(self.results):
+                if n == 0:
+                    f.write("---")
+    
                 f.write(name + "\n")
                 isFirst = True
                 for result in self.results[name]:

+ 51 - 0
library/testers.py

@@ -57,6 +57,37 @@ class TestResult:
         return text
 
 
+    def csvHeading():
+        r = [
+            "F1 score",
+            "balanced accuracy",
+            "TN",
+            "FP",
+            "FN",
+            "TP"
+            ]
+
+        if self.aps is not None:
+            r.append(self.aps)
+
+        return ";".join(r)
+
+    def toCSV():
+        r = [
+            self.f1,
+            self.bal_acc,
+            self.con_mat[0] if len(self.con_mat) > 0 else float(self.con_mat),
+            self.con_mat[1] if len(self.con_mat) > 1 else 0,
+            self.con_mat[2] if len(self.con_mat) > 2 else 0,
+            self.con_mat[3] if len(self.con_mat) > 3 else 0
+            ]
+
+        if self.aps is not None:
+            r.append(self.aps)
+
+        return ";".join(r)
+
+
 def lr(ttd):
     """
     Runs a test for a dataset with the logistic regression algorithm.
@@ -64,6 +95,7 @@ def lr(ttd):
 
     *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
     """
+    checkType(ttd)
     logreg = LogisticRegression(
         C=1e5,
         solver='lbfgs',
@@ -86,6 +118,7 @@ def svm(ttd):
 
     *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
     """
+    checkType(ttd)
     svmTester = sklearn.svm.SVC(
         kernel='linear',
         decision_function_shape='ovo',
@@ -105,8 +138,26 @@ def knn(ttd):
 
     *ttd* is a /library.dataset.TrainTestData/ instance containing data to test.
     """
+    checkType(ttd)
     knnTester = KNeighborsClassifier(n_neighbors=10)
     knnTester.fit(ttd.train.data, ttd.train.labels)
 
     prediction = knnTester.predict(ttd.test.data)
     return TestResult("KNN", ttd.test.labels, prediction)
+
+
+def checkType(t):
+    if str(type(t)) == "<class 'numpy.ndarray'>":
+        return t.shape[0] > 0 and all(map(checkType, t))
+    elif str(type(t)) == "<class 'list'>":
+        return len(t) > 0 and all(map(checkType, t))
+    elif str(type(t)) in ["<class 'int'>", "<class 'float'>", "<class 'numpy.float64'>"]:
+        return True
+    elif str(type(t)) == "<class 'library.dataset.DataSet'>":
+        return checkType(t.data0) and checkType(t.data1)
+    elif str(type(t)) == "<class 'library.dataset.TrainTestData'>":
+        return checkType(t.train) and checkType(t.test)
+    else:
+        raise ValueError("expected int, float, or list, dataset of int, float but got " + str(type(t)))
+        return False
+    

Algúns arquivos non se mostraron porque demasiados arquivos cambiaron neste cambio