| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262 |
- import numpy as np
- import matplotlib.pyplot as plt
- from library.analysis import testSets, generators
- testSets.append("Average")
- kScore = "cohens kappa score"
- f1Score = "f1 score"
- ignoreSet = ["yeast_me2"]
- gans = generators.keys()
- algs = {"LR", "GB", "KNN", "GAN"}
- dataset = [
- "folding_abalone9-18",
- "folding_abalone_17_vs_7_8_9_10",
- "folding_car-vgood",
- "folding_car_good",
- "folding_flare-F",
- "folding_hypothyroid",
- "folding_kddcup-guess_passwd_vs_satan",
- "folding_kr-vs-k-three_vs_eleven",
- "folding_kr-vs-k-zero-one_vs_draw",
- "folding_shuttle-2_vs_5",
- "folding_winequality-red-4",
- "folding_yeast4",
- "folding_yeast5",
- "folding_yeast6",
- "folding_ozone_level",
- "folding_yeast_me2",
- "Average"
- ]
- knn_ProWRAS_f1 = [0.384,0.347,0.818,0.641,0.301,0.553,1.0,0.94,0.9,1.0,0.141,0.308,0.714,0.545,0.556,0.339,0.538]
- knn_ProWRAS_k = [0.35,0.328,0.81,0.622,0.263,0.528,1.0,0.938,0.896,1.0,0.093,0.268,0.704,0.531,0.526,0.305,0.515]
- lr_ProWRAS_f1 = [0.488,0.315,0.407,0.103,0.341,0.446,0.99,0.928,0.853,1.0,0.158,0.308,0.591,0.326,0.347,0.295,0.472]
- lr_ProWRAS_k = [0.446,0.287,0.371,0.033,0.3,0.407,0.99,0.926,0.847,1.0,0.119,0.268,0.574,0.3,0.319,0.254,0.441]
- gb_ProWRAS_f1 = [0.385,0.335,0.959,0.863,0.320,0.803,0.998,0.995,0.969,1.0,0.156,0.335,0.735,0.514,0.329,0.225,0.600]
- gb_ProWRAS_k = [0.341,0.310,0.957,0.857,0.291,0.794,0.998,0.995,0.967,1.0,0.115,0.303,0.726,0.501,0.303,0.328,0.589]
- ProWrasPaper = "ProWRAS-paper"
- statistic = { ProWrasPaper: {} }
- for (n, f1, k) in zip(dataset, lr_ProWRAS_f1, lr_ProWRAS_k):
- if n in ignoreSet:
- continue
-
- if n not in statistic[ProWrasPaper]:
- statistic[ProWrasPaper][n] = {}
-
- statistic[ProWrasPaper][n]["LR"] = { kScore: k, f1Score: f1 }
- for (n, f1, k) in zip(dataset, gb_ProWRAS_f1, gb_ProWRAS_k):
- if n in ignoreSet:
- continue
-
- if n not in statistic[ProWrasPaper]:
- statistic[ProWrasPaper][n] = {}
-
- statistic[ProWrasPaper][n]["GB"] = { kScore: k, f1Score: f1 }
-
- for (n, f1, k) in zip(dataset, knn_ProWRAS_f1, knn_ProWRAS_k):
- if n in ignoreSet:
- continue
-
- if n not in statistic[ProWrasPaper]:
- statistic[ProWrasPaper][n] = {}
-
- statistic[ProWrasPaper][n]["KNN"] = { kScore: k, f1Score: f1 }
-
- dataset = list(filter(lambda n: n not in ignoreSet, dataset))
- def loadDiagnoseData(ganType, datasetName):
- fileName = f"data_result/{ganType}/{datasetName}.csv"
- r = {}
- try:
- with open(fileName) as f:
- newBlock = True
- n = ""
- for line in f:
- line = line.strip()
- if newBlock:
- n = line
- newBlock = False
- elif line == "---":
- newBlock = True
- else:
- parts = line.split(";")
- if parts[0] == "avg":
- r[n] = { f1Score: float(parts[5]), kScore: float(parts[6]) }
- except FileNotFoundError as e:
- print(f"Missing file: {fileName}")
- return r
- for gan in gans:
- if gan not in statistic:
- statistic[gan] = {}
-
- for ds in testSets:
- if ds != "Average":
- statistic[gan][ds] = loadDiagnoseData(gan, ds)
- ks = set()
- for gan in statistic.keys():
- f1 = { n: 0.0 for n in algs }
- k = { n: 0.0 for n in algs }
- c = 0
- for ds in statistic[gan].keys():
- ks.add(ds)
- if ds != "Average":
- c += 1
- for n in algs:
- if n in statistic[gan][ds].keys():
- f1[n] += statistic[gan][ds][n][f1Score]
- k[n] += statistic[gan][ds][n][kScore]
- avg = {}
- for n in algs:
- avg[n] = { f1Score: f1[n] / c, kScore: k[n] / c }
- statistic[gan]["Average"] = avg
- print(ks)
- def cleanupName(name):
- return name.replace("folding_", "").replace("imblearn_", "").replace("kaggle_", "")
- def showDiagnose(algo, score):
- def gr(n):
- if n in resultList:
- return resultList[n][algo].data[score]
- else:
- return 0.0
-
- print(f"{algo}: {score}")
-
- gans = list(statistic.keys())
- w = 0.8 / len(gans)
- bar = list(range(len(testSets)))
- plt.figure(figsize=(20, 18))
- for g in gans:
- values = [
- (statistic[g][d][algo][score] if algo in statistic[g][d].keys() else 0.0) if d in statistic[g] else 0.0
- for d in testSets
- ]
- plt.barh(bar, values, w, label=g)
-
-
- bar = [i - w for i in bar]
- plt.ylabel("Dataset")
- plt.xlabel(score)
- plt.yticks(range(len(testSets)), [cleanupName(name) for name in testSets])
- #plt.yticks(rotation="vertical")
- plt.legend()
- plt.savefig(f"data_result/statistics/byAlgorithm/statistic-{algo}-{score}.pdf")
- plt.show()
- def showDiagnoseAverage(score, onlyOneBar=False):
- def gr(n):
- if n in resultList:
- return resultList[n][algo].data[score]
- else:
- return 0.0
-
- print(f"Average: {score}")
-
- gans = list(statistic.keys())
- w = 0.8 / len(gans)
- if onlyOneBar:
- barType = "O"
- bar = range(len(algs))
- else:
- barType = "M"
- bar = [0.8 + i - w for i in range(len(algs)) ]
- plt.figure(figsize=(20, 18))
- for g in gans:
- values = [
- (statistic[g]["Average"][algo][score] if algo in statistic[g]["Average"].keys() else 0.0)
- for algo in algs
- ]
- plt.barh(bar, values, w, label=g)
-
- if not onlyOneBar:
- bar = [i - w for i in bar]
- plt.ylabel("Dataset")
- plt.xlabel(score)
- plt.yticks(range(len(algs)), algs)
- plt.legend()
- plt.savefig(f"data_result/statistics/average/statistic-Average-{score}-{barType}.pdf")
- plt.show()
- def showDiagnoseDataset(dataset):
- def gr(n):
- if n in resultList:
- return resultList[n][algo].data[score]
- else:
- return 0.0
-
- print(f"{dataset}")
-
- gans = list(statistic.keys())
- scores = [(a, s) for a in algs for s in (f1Score, kScore)]
- w = 0.8 / len(scores)
- bar = list(range(len(gans)))
- plt.figure(figsize=(20, 18))
- for (algo, score) in scores:
- values = [
- (statistic[g][dataset][algo][score] if algo in statistic[g][dataset] else 0.0) if dataset in statistic[g] else 0.0
- for g in gans
- ]
- plt.barh(bar, values, w, label=f"{algo}: {score}")
-
-
- bar = [i - w for i in bar]
- plt.xlabel("score")
- plt.ylabel("Generator")
- plt.yticks(range(len(gans)), gans)
- #plt.yticks(rotation="vertical")
- plt.legend()
- plt.savefig(f"data_result/statistics/byDataset/statistic-{dataset}.pdf")
- plt.show()
- for a in algs:
- showDiagnose(a, f1Score)
- showDiagnose(a, kScore)
-
- showDiagnoseAverage(f1Score)
- showDiagnoseAverage(kScore)
- showDiagnoseAverage(kScore, True)
- for t in testSets:
- showDiagnoseDataset(t)
- showDiagnoseDataset("Average")
|