há 4 anos atrás · cfb2d687fd
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,14 @@
 
				 
			
 
				 benchmark: docker/container.ok
			
 
				-	docker container run --rm -it -v `(pwd)`:/benchmark/data convganbenchmark python3 --version
			
 
				-	docker container run --rm -it -v `(pwd)`:/benchmark/data convganbenchmark
			
 
				+	docker container run --rm -it -v `(pwd)`:/benchmark/data convganbenchmark python3 /benchmark/data/run_all_exercises.py
			
 
				+	docker container run --rm -it -v `(pwd)`:/benchmark/data convganbenchmark chown -R `(./getMyUid) /benchmark/data/data_results
			
 
				 
			
 
				 
			
 
				+statistics: docker/container.ok
			
 
				+	docker container run --rm -it -v `(pwd)`:/benchmark/data convganbenchmark python3 /benchmark/data/statistics.py
			
 
				+	docker container run --rm -it -v `(pwd)`:/benchmark/data convganbenchmark chown -R `(./getMyUid)` /benchmark/data/data_result
			
 
				+
			
 
				 docker/container.ok: docker/Dockerfile docker/run.sh docker/requirements.txt
			
 
				 	docker build -t convganbenchmark docker/.
			
 
				 	date > $@
			
 
				+
			
--- a/getMyUid
+++ b/getMyUid
@@ -0,0 +1,2 @@
 
				+#!/bin/bash
			
 
				+echo $UID
			
--- a/library/analysis.py
+++ b/library/analysis.py
@@ -1,6 +1,6 @@
 
				 from library.exercise import Exercise
			
 
				 from library.dataset import DataSet, TrainTestData
			
 
				-from library.generators import SimpleGan, Repeater, SpheredNoise, ConvGAN, StupidToyListGan, CtGAN
			
 
				+from library.generators import ProWRAS, SimpleGan, Repeater, SpheredNoise, ConvGAN, StupidToyListGan, CtGAN
			
 
				 
			
 
				 import pickle
			
 
				 import numpy as np
			
@@ -260,10 +260,11 @@ def runAllTestSets(dataSetList):
 
				 
			
 
				 
			
 
				 
			
 
				-generators = [ ("Repeater",      lambda _data: Repeater())
			
 
				+generators = [ ("ProWRAS",       lambda _data: ProWRAS())
			
 
				+             , ("Repeater",      lambda _data: Repeater())
			
 
				              #, ("SpheredNoise",  lambda _data: SpheredNoise())
			
 
				              , ("SimpleGAN",     lambda data: SimpleGan(numOfFeatures=data.data0.shape[1]))
			
 
				+             , ("ctGAN",         lambda data: CtGAN(data.data0.shape[1]))
			
 
				              , ("convGAN",       lambda data: ConvGAN(data.data0.shape[1], neb=5, gen=5))
			
 
				              , ("convGAN-full",  lambda data: ConvGAN(data.data0.shape[1], neb=data.data0.shape[1], gen=data.data0.shape[1]))
			
 
				-             , ("ctGAN",         lambda data: CtGAN(data.data0.shape[1]))
			
 
				              ]
			
--- a/statistics.py
+++ b/statistics.py
@@ -0,0 +1,220 @@
 
				+import numpy as np
			
 
				+import matplotlib.pyplot as plt
			
 
				+from library.analysis import testSets, generators
			
 
				+
			
 
				+
			
 
				+testSets.append("Average")
			
 
				+
			
 
				+kScore = "cohens kappa score"
			
 
				+f1Score = "f1 score"
			
 
				+
			
 
				+ignoreSet = ["yeast_me2"]
			
 
				+
			
 
				+gans = [g[0] for g in generators]
			
 
				+algs = {"LR", "GB", "KNN"}
			
 
				+
			
 
				+dataset  = [
			
 
				+    "folding_abalone9-18",
			
 
				+    "folding_abalone_17_vs_7_8_9_10",
			
 
				+    "folding_car-vgood",
			
 
				+    "folding_car_good",
			
 
				+    "folding_flare-F",
			
 
				+    "folding_hypothyroid",
			
 
				+    "folding_kddcup-guess_passwd_vs_satan",
			
 
				+    "folding_kr-vs-k-three_vs_eleven",
			
 
				+    "folding_kr-vs-k-zero-one_vs_draw",
			
 
				+    "folding_shuttle-2_vs_5",
			
 
				+    "folding_winequality-red-4",
			
 
				+    "folding_yeast4",
			
 
				+    "folding_yeast5",
			
 
				+    "folding_yeast6",
			
 
				+    "folding_ozone_level",
			
 
				+    "folding_yeast_me2",
			
 
				+    "Average"
			
 
				+    ]
			
 
				+
			
 
				+knn_ProWRAS_f1 = [0.384,0.347,0.818,0.641,0.301,0.553,1.0,0.94,0.9,1.0,0.141,0.308,0.714,0.545,0.556,0.339,0.538]
			
 
				+knn_ProWRAS_k = [0.35,0.328,0.81,0.622,0.263,0.528,1.0,0.938,0.896,1.0,0.093,0.268,0.704,0.531,0.526,0.305,0.515]
			
 
				+
			
 
				+lr_ProWRAS_f1 = [0.488,0.315,0.407,0.103,0.341,0.446,0.99,0.928,0.853,1.0,0.158,0.308,0.591,0.326,0.347,0.295,0.472]
			
 
				+lr_ProWRAS_k = [0.446,0.287,0.371,0.033,0.3,0.407,0.99,0.926,0.847,1.0,0.119,0.268,0.574,0.3,0.319,0.254,0.441]
			
 
				+
			
 
				+gb_ProWRAS_f1 = [0.385,0.335,0.959,0.863,0.320,0.803,0.998,0.995,0.969,1.0,0.156,0.335,0.735,0.514,0.329,0.225,0.600]
			
 
				+gb_ProWRAS_k = [0.341,0.310,0.957,0.857,0.291,0.794,0.998,0.995,0.967,1.0,0.115,0.303,0.726,0.501,0.303,0.328,0.589]
			
 
				+
			
 
				+
			
 
				+ProWrasPaper = "ProWRAS-paper"
			
 
				+
			
 
				+statistic = { ProWrasPaper: {} }
			
 
				+for (n, f1, k) in zip(dataset, lr_ProWRAS_f1, lr_ProWRAS_k):
			
 
				+    if n in ignoreSet:
			
 
				+        continue
			
 
				+    
			
 
				+    if n not in statistic[ProWrasPaper]:
			
 
				+        statistic[ProWrasPaper][n] = {}
			
 
				+    
			
 
				+    statistic[ProWrasPaper][n]["LR"] = { kScore: k, f1Score: f1 }
			
 
				+
			
 
				+
			
 
				+for (n, f1, k) in zip(dataset, gb_ProWRAS_f1, gb_ProWRAS_k):
			
 
				+    if n in ignoreSet:
			
 
				+        continue
			
 
				+    
			
 
				+    if n not in statistic[ProWrasPaper]:
			
 
				+        statistic[ProWrasPaper][n] = {}
			
 
				+    
			
 
				+    statistic[ProWrasPaper][n]["GB"] = { kScore: k, f1Score: f1 }
			
 
				+
			
 
				+    
			
 
				+for (n, f1, k) in zip(dataset, knn_ProWRAS_f1, knn_ProWRAS_k):
			
 
				+    if n in ignoreSet:
			
 
				+        continue
			
 
				+    
			
 
				+    if n not in statistic[ProWrasPaper]:
			
 
				+        statistic[ProWrasPaper][n] = {}
			
 
				+    
			
 
				+    statistic[ProWrasPaper][n]["KNN"] = { kScore: k, f1Score: f1 }
			
 
				+    
			
 
				+
			
 
				+dataset = list(filter(lambda n: n not in ignoreSet, dataset))
			
 
				+
			
 
				+
			
 
				+def loadDiagnoseData(ganType, datasetName):
			
 
				+    fileName = f"data_result/{ganType}/{datasetName}.csv"
			
 
				+    r = {}
			
 
				+    try:
			
 
				+        with open(fileName) as f:
			
 
				+            newBlock = True
			
 
				+            n = ""
			
 
				+            for line in f:
			
 
				+                line = line.strip()
			
 
				+                if newBlock:
			
 
				+                    n = line
			
 
				+                    newBlock = False
			
 
				+                elif line == "---":
			
 
				+                    newBlock = True
			
 
				+                else:
			
 
				+                    parts = line.split(";")
			
 
				+                    if parts[0] == "avg":
			
 
				+                        r[n] = { f1Score: float(parts[5]), kScore: float(parts[6]) }
			
 
				+    except FileNotFoundError as e:
			
 
				+        print(f"Missing file: {fileName}")
			
 
				+    return r
			
 
				+
			
 
				+
			
 
				+
			
 
				+for gan in gans:
			
 
				+    if gan not in statistic:
			
 
				+        statistic[gan] = {}
			
 
				+    
			
 
				+    for ds in testSets:
			
 
				+        if ds != "Average":
			
 
				+            statistic[gan][ds] = loadDiagnoseData(gan, ds)
			
 
				+
			
 
				+
			
 
				+ks = set()
			
 
				+
			
 
				+for gan in statistic.keys():
			
 
				+    f1 = { n: 0.0 for n in algs }
			
 
				+    k =  { n: 0.0 for n in algs }
			
 
				+    c = 0
			
 
				+
			
 
				+    for ds in statistic[gan].keys():
			
 
				+        ks.add(ds)
			
 
				+        if ds != "Average":
			
 
				+            c += 1
			
 
				+            for n in algs:
			
 
				+                if n in statistic[gan][ds].keys():
			
 
				+                    f1[n] += statistic[gan][ds][n][f1Score]
			
 
				+                    k[n] += statistic[gan][ds][n][kScore]
			
 
				+
			
 
				+    avg = {}
			
 
				+    for n in algs:
			
 
				+        avg[n] = { f1Score: f1[n] / c, kScore: k[n] / c }
			
 
				+    statistic[gan]["Average"] = avg
			
 
				+
			
 
				+print(ks)
			
 
				+
			
 
				+
			
 
				+def cleanupName(name):
			
 
				+    return name.replace("folding_", "").replace("imblearn_", "").replace("kaggle_", "")
			
 
				+
			
 
				+
			
 
				+
			
 
				+def showDiagnose(algo, score):
			
 
				+    def gr(n):
			
 
				+        if n in resultList:
			
 
				+            return resultList[n][algo].data[score]
			
 
				+        else:
			
 
				+            return 0.0
			
 
				+    
			
 
				+    print(f"{algo}: {score}")
			
 
				+    
			
 
				+    gans = list(statistic.keys())
			
 
				+
			
 
				+    w = 0.8 / len(gans)
			
 
				+    bar = list(range(len(testSets)))
			
 
				+    plt.figure(figsize=(20, 18))
			
 
				+    for g in gans:
			
 
				+        values = [
			
 
				+                 (statistic[g][d][algo][score] if algo in statistic[g][d].keys() else 0.0) if d in statistic[g] else 0.0
			
 
				+                 for d in testSets
			
 
				+                 ]
			
 
				+        plt.barh(bar, values, w, label=g)
			
 
				+        
			
 
				+        
			
 
				+        bar = [i - w for i in bar]
			
 
				+
			
 
				+    plt.xlabel("Dataset")
			
 
				+    plt.ylabel(score)
			
 
				+    plt.yticks(range(len(testSets)), [cleanupName(name) for name in testSets])
			
 
				+    #plt.yticks(rotation="vertical")
			
 
				+    plt.legend()
			
 
				+    plt.savefig(f"data_result/statistic-{algo}-{score}.pdf")
			
 
				+    plt.show()
			
 
				+
			
 
				+
			
 
				+def showDiagnoseAverage(score, onlyOneBar=False):
			
 
				+    def gr(n):
			
 
				+        if n in resultList:
			
 
				+            return resultList[n][algo].data[score]
			
 
				+        else:
			
 
				+            return 0.0
			
 
				+    
			
 
				+    print(f"Average: {score}")
			
 
				+    
			
 
				+    gans = list(statistic.keys())
			
 
				+
			
 
				+    w = 0.8 / len(gans)
			
 
				+    if onlyOneBar:
			
 
				+        barType = "O"
			
 
				+        bar = range(len(algs))
			
 
				+    else:
			
 
				+        barType = "M"
			
 
				+        bar = [0.8 + i - w for i in range(len(algs)) ]
			
 
				+    plt.figure(figsize=(20, 18))
			
 
				+    for g in gans:
			
 
				+        values = [
			
 
				+                 (statistic[g]["Average"][algo][score] if algo in statistic[g]["Average"].keys() else 0.0)
			
 
				+                 for algo in algs
			
 
				+                 ]
			
 
				+        plt.barh(bar, values, w, label=g)
			
 
				+        
			
 
				+        if not onlyOneBar:
			
 
				+            bar = [i - w for i in bar]
			
 
				+
			
 
				+    plt.xlabel("Dataset")
			
 
				+    plt.ylabel(score)
			
 
				+    plt.yticks(range(len(algs)), algs)
			
 
				+    plt.legend()
			
 
				+    plt.savefig(f"data_result/statistic-Average-{score}-{barType}.pdf")
			
 
				+    plt.show()
			
 
				+
			
 
				+
			
 
				+for a in algs:
			
 
				+    showDiagnose(a, f1Score)
			
 
				+    showDiagnose(a, kScore)
			
 
				+    
			
 
				+showDiagnoseAverage(f1Score)
			
 
				+showDiagnoseAverage(kScore)
			
 
				+showDiagnoseAverage(kScore, True)