|
@@ -0,0 +1,220 @@
|
|
|
|
|
+import numpy as np
|
|
|
|
|
+import matplotlib.pyplot as plt
|
|
|
|
|
+from library.analysis import testSets, generators
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+testSets.append("Average")
|
|
|
|
|
+
|
|
|
|
|
+kScore = "cohens kappa score"
|
|
|
|
|
+f1Score = "f1 score"
|
|
|
|
|
+
|
|
|
|
|
+ignoreSet = ["yeast_me2"]
|
|
|
|
|
+
|
|
|
|
|
+gans = [g[0] for g in generators]
|
|
|
|
|
+algs = {"LR", "GB", "KNN"}
|
|
|
|
|
+
|
|
|
|
|
+dataset = [
|
|
|
|
|
+ "folding_abalone9-18",
|
|
|
|
|
+ "folding_abalone_17_vs_7_8_9_10",
|
|
|
|
|
+ "folding_car-vgood",
|
|
|
|
|
+ "folding_car_good",
|
|
|
|
|
+ "folding_flare-F",
|
|
|
|
|
+ "folding_hypothyroid",
|
|
|
|
|
+ "folding_kddcup-guess_passwd_vs_satan",
|
|
|
|
|
+ "folding_kr-vs-k-three_vs_eleven",
|
|
|
|
|
+ "folding_kr-vs-k-zero-one_vs_draw",
|
|
|
|
|
+ "folding_shuttle-2_vs_5",
|
|
|
|
|
+ "folding_winequality-red-4",
|
|
|
|
|
+ "folding_yeast4",
|
|
|
|
|
+ "folding_yeast5",
|
|
|
|
|
+ "folding_yeast6",
|
|
|
|
|
+ "folding_ozone_level",
|
|
|
|
|
+ "folding_yeast_me2",
|
|
|
|
|
+ "Average"
|
|
|
|
|
+ ]
|
|
|
|
|
+
|
|
|
|
|
+knn_ProWRAS_f1 = [0.384,0.347,0.818,0.641,0.301,0.553,1.0,0.94,0.9,1.0,0.141,0.308,0.714,0.545,0.556,0.339,0.538]
|
|
|
|
|
+knn_ProWRAS_k = [0.35,0.328,0.81,0.622,0.263,0.528,1.0,0.938,0.896,1.0,0.093,0.268,0.704,0.531,0.526,0.305,0.515]
|
|
|
|
|
+
|
|
|
|
|
+lr_ProWRAS_f1 = [0.488,0.315,0.407,0.103,0.341,0.446,0.99,0.928,0.853,1.0,0.158,0.308,0.591,0.326,0.347,0.295,0.472]
|
|
|
|
|
+lr_ProWRAS_k = [0.446,0.287,0.371,0.033,0.3,0.407,0.99,0.926,0.847,1.0,0.119,0.268,0.574,0.3,0.319,0.254,0.441]
|
|
|
|
|
+
|
|
|
|
|
+gb_ProWRAS_f1 = [0.385,0.335,0.959,0.863,0.320,0.803,0.998,0.995,0.969,1.0,0.156,0.335,0.735,0.514,0.329,0.225,0.600]
|
|
|
|
|
+gb_ProWRAS_k = [0.341,0.310,0.957,0.857,0.291,0.794,0.998,0.995,0.967,1.0,0.115,0.303,0.726,0.501,0.303,0.328,0.589]
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ProWrasPaper = "ProWRAS-paper"
|
|
|
|
|
+
|
|
|
|
|
+statistic = { ProWrasPaper: {} }
|
|
|
|
|
+for (n, f1, k) in zip(dataset, lr_ProWRAS_f1, lr_ProWRAS_k):
|
|
|
|
|
+ if n in ignoreSet:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ if n not in statistic[ProWrasPaper]:
|
|
|
|
|
+ statistic[ProWrasPaper][n] = {}
|
|
|
|
|
+
|
|
|
|
|
+ statistic[ProWrasPaper][n]["LR"] = { kScore: k, f1Score: f1 }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+for (n, f1, k) in zip(dataset, gb_ProWRAS_f1, gb_ProWRAS_k):
|
|
|
|
|
+ if n in ignoreSet:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ if n not in statistic[ProWrasPaper]:
|
|
|
|
|
+ statistic[ProWrasPaper][n] = {}
|
|
|
|
|
+
|
|
|
|
|
+ statistic[ProWrasPaper][n]["GB"] = { kScore: k, f1Score: f1 }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+for (n, f1, k) in zip(dataset, knn_ProWRAS_f1, knn_ProWRAS_k):
|
|
|
|
|
+ if n in ignoreSet:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ if n not in statistic[ProWrasPaper]:
|
|
|
|
|
+ statistic[ProWrasPaper][n] = {}
|
|
|
|
|
+
|
|
|
|
|
+ statistic[ProWrasPaper][n]["KNN"] = { kScore: k, f1Score: f1 }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+dataset = list(filter(lambda n: n not in ignoreSet, dataset))
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def loadDiagnoseData(ganType, datasetName):
|
|
|
|
|
+ fileName = f"data_result/{ganType}/{datasetName}.csv"
|
|
|
|
|
+ r = {}
|
|
|
|
|
+ try:
|
|
|
|
|
+ with open(fileName) as f:
|
|
|
|
|
+ newBlock = True
|
|
|
|
|
+ n = ""
|
|
|
|
|
+ for line in f:
|
|
|
|
|
+ line = line.strip()
|
|
|
|
|
+ if newBlock:
|
|
|
|
|
+ n = line
|
|
|
|
|
+ newBlock = False
|
|
|
|
|
+ elif line == "---":
|
|
|
|
|
+ newBlock = True
|
|
|
|
|
+ else:
|
|
|
|
|
+ parts = line.split(";")
|
|
|
|
|
+ if parts[0] == "avg":
|
|
|
|
|
+ r[n] = { f1Score: float(parts[5]), kScore: float(parts[6]) }
|
|
|
|
|
+ except FileNotFoundError as e:
|
|
|
|
|
+ print(f"Missing file: {fileName}")
|
|
|
|
|
+ return r
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+for gan in gans:
|
|
|
|
|
+ if gan not in statistic:
|
|
|
|
|
+ statistic[gan] = {}
|
|
|
|
|
+
|
|
|
|
|
+ for ds in testSets:
|
|
|
|
|
+ if ds != "Average":
|
|
|
|
|
+ statistic[gan][ds] = loadDiagnoseData(gan, ds)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ks = set()
|
|
|
|
|
+
|
|
|
|
|
+for gan in statistic.keys():
|
|
|
|
|
+ f1 = { n: 0.0 for n in algs }
|
|
|
|
|
+ k = { n: 0.0 for n in algs }
|
|
|
|
|
+ c = 0
|
|
|
|
|
+
|
|
|
|
|
+ for ds in statistic[gan].keys():
|
|
|
|
|
+ ks.add(ds)
|
|
|
|
|
+ if ds != "Average":
|
|
|
|
|
+ c += 1
|
|
|
|
|
+ for n in algs:
|
|
|
|
|
+ if n in statistic[gan][ds].keys():
|
|
|
|
|
+ f1[n] += statistic[gan][ds][n][f1Score]
|
|
|
|
|
+ k[n] += statistic[gan][ds][n][kScore]
|
|
|
|
|
+
|
|
|
|
|
+ avg = {}
|
|
|
|
|
+ for n in algs:
|
|
|
|
|
+ avg[n] = { f1Score: f1[n] / c, kScore: k[n] / c }
|
|
|
|
|
+ statistic[gan]["Average"] = avg
|
|
|
|
|
+
|
|
|
|
|
+print(ks)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def cleanupName(name):
|
|
|
|
|
+ return name.replace("folding_", "").replace("imblearn_", "").replace("kaggle_", "")
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def showDiagnose(algo, score):
|
|
|
|
|
+ def gr(n):
|
|
|
|
|
+ if n in resultList:
|
|
|
|
|
+ return resultList[n][algo].data[score]
|
|
|
|
|
+ else:
|
|
|
|
|
+ return 0.0
|
|
|
|
|
+
|
|
|
|
|
+ print(f"{algo}: {score}")
|
|
|
|
|
+
|
|
|
|
|
+ gans = list(statistic.keys())
|
|
|
|
|
+
|
|
|
|
|
+ w = 0.8 / len(gans)
|
|
|
|
|
+ bar = list(range(len(testSets)))
|
|
|
|
|
+ plt.figure(figsize=(20, 18))
|
|
|
|
|
+ for g in gans:
|
|
|
|
|
+ values = [
|
|
|
|
|
+ (statistic[g][d][algo][score] if algo in statistic[g][d].keys() else 0.0) if d in statistic[g] else 0.0
|
|
|
|
|
+ for d in testSets
|
|
|
|
|
+ ]
|
|
|
|
|
+ plt.barh(bar, values, w, label=g)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ bar = [i - w for i in bar]
|
|
|
|
|
+
|
|
|
|
|
+ plt.xlabel("Dataset")
|
|
|
|
|
+ plt.ylabel(score)
|
|
|
|
|
+ plt.yticks(range(len(testSets)), [cleanupName(name) for name in testSets])
|
|
|
|
|
+ #plt.yticks(rotation="vertical")
|
|
|
|
|
+ plt.legend()
|
|
|
|
|
+ plt.savefig(f"data_result/statistic-{algo}-{score}.pdf")
|
|
|
|
|
+ plt.show()
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def showDiagnoseAverage(score, onlyOneBar=False):
|
|
|
|
|
+ def gr(n):
|
|
|
|
|
+ if n in resultList:
|
|
|
|
|
+ return resultList[n][algo].data[score]
|
|
|
|
|
+ else:
|
|
|
|
|
+ return 0.0
|
|
|
|
|
+
|
|
|
|
|
+ print(f"Average: {score}")
|
|
|
|
|
+
|
|
|
|
|
+ gans = list(statistic.keys())
|
|
|
|
|
+
|
|
|
|
|
+ w = 0.8 / len(gans)
|
|
|
|
|
+ if onlyOneBar:
|
|
|
|
|
+ barType = "O"
|
|
|
|
|
+ bar = range(len(algs))
|
|
|
|
|
+ else:
|
|
|
|
|
+ barType = "M"
|
|
|
|
|
+ bar = [0.8 + i - w for i in range(len(algs)) ]
|
|
|
|
|
+ plt.figure(figsize=(20, 18))
|
|
|
|
|
+ for g in gans:
|
|
|
|
|
+ values = [
|
|
|
|
|
+ (statistic[g]["Average"][algo][score] if algo in statistic[g]["Average"].keys() else 0.0)
|
|
|
|
|
+ for algo in algs
|
|
|
|
|
+ ]
|
|
|
|
|
+ plt.barh(bar, values, w, label=g)
|
|
|
|
|
+
|
|
|
|
|
+ if not onlyOneBar:
|
|
|
|
|
+ bar = [i - w for i in bar]
|
|
|
|
|
+
|
|
|
|
|
+ plt.xlabel("Dataset")
|
|
|
|
|
+ plt.ylabel(score)
|
|
|
|
|
+ plt.yticks(range(len(algs)), algs)
|
|
|
|
|
+ plt.legend()
|
|
|
|
|
+ plt.savefig(f"data_result/statistic-Average-{score}-{barType}.pdf")
|
|
|
|
|
+ plt.show()
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+for a in algs:
|
|
|
|
|
+ showDiagnose(a, f1Score)
|
|
|
|
|
+ showDiagnose(a, kScore)
|
|
|
|
|
+
|
|
|
|
|
+showDiagnoseAverage(f1Score)
|
|
|
|
|
+showDiagnoseAverage(kScore)
|
|
|
|
|
+showDiagnoseAverage(kScore, True)
|