Explorar el Código

Added result about exercise avg.

Kristian Schultz hace 4 años
padre
commit
4e36f789b2
Se han modificado 2 ficheros con 36 adiciones y 3 borrados
  1. 33 2
      library/analysis.py
  2. 3 1
      library/exercise.py

+ 33 - 2
library/analysis.py

@@ -12,6 +12,28 @@ from imblearn.datasets import fetch_datasets
 
 
 def loadDataset(datasetName):
+    def isSame(xs, ys):
+        for (x, y) in zip(xs, ys):
+            if x != y:
+                return False
+        return True
+    
+    def isIn(ys):
+        def f(x):
+            for y in ys:
+                if isSame(x,y):
+                    return True
+            return False
+        return f
+
+    def isNotIn(ys):
+        def f(x):
+            for y in ys:
+                if isSame(x,y):
+                    return False
+            return True
+        return f
+
     pickle_in = open(f"{datasetName}.pickle", "rb")
     pickle_dict = pickle.load(pickle_in)
 
@@ -24,6 +46,13 @@ def loadDataset(datasetName):
     label_0 = list(np.where(labels == 0)[0])
     features_1 = features[label_1]
     features_0 = features[label_0]
+    cut = np.array(list(filter(isIn(features_0), features_1)))
+    if len(cut) > 0:
+        print(f"non empty cut in {datasetName}! ({len(cut)} points)")
+    #    print(f"{len(features_0)}/{len(features_1)} point before")
+    #    features_0 = np.array(list(filter(isNotIn(cut), features_0)))
+    #    features_1 = np.array(list(filter(isNotIn(cut), features_1)))
+    #    print(f"{len(features_0)}/{len(features_1)} points after")
     
     return DataSet(data0=features_0, data1=features_1)
 
@@ -85,7 +114,7 @@ def runExerciseForRepeater(datasetName):
     exercise.saveResultsTo(f"data_result/{datasetName}-{ganName}.csv")
     exercise.saveResultsTo(f"data_result/{ganName}-{datasetName}.csv")
     
-def runExerciseForSpheredNoise(datasetName):
+def runExerciseForSpheredNoise(datasetName, resultList=None):
     ganName = "SpheredNoise"
     print()
     print()
@@ -99,8 +128,10 @@ def runExerciseForSpheredNoise(datasetName):
     shuffler = genShuffler()
     exercise = Exercise(shuffleFunction=shuffler, numOfShuffles=5, numOfSlices=5)
     exercise.run(gan, data)
-    exercise.saveResultsTo(f"data_result/{datasetName}-{ganName}.csv")
+    avg = exercise.saveResultsTo(f"data_result/{datasetName}-{ganName}.csv")
     exercise.saveResultsTo(f"data_result/{ganName}-{datasetName}.csv")
+    if resultList is not None:
+        resultList[datasetName] = avg
     
 testSets = [
     "folding_abalone_17_vs_7_8_9_10",

+ 3 - 1
library/exercise.py

@@ -179,6 +179,7 @@ class Exercise:
 
 
     def saveResultsTo(self, fileName):
+        avgResults = {}
         with open(fileName, "w") as f:
             for (n, name) in enumerate(self.results):
                 if n > 0:
@@ -200,7 +201,8 @@ class Exercise:
                 f.write(f"max;" + mx.toCSV() + "\n")
                 f.write(f"avg;" + avg.toCSV() + "\n")
                 f.write(f"min;" + mi.toCSV() + "\n")
-
+                avgResults[name] = avg
+        return avgResults
 
 
 def plotCloud(data0, data1, dataNew):