import numpy as np import matplotlib.pyplot as plt from library.analysis import testSets, generators testSets.append("Average") kScore = "cohens kappa score" f1Score = "f1 score" ignoreSet = ["yeast_me2"] gans = [g[0] for g in generators] algs = {"LR", "GB", "KNN"} dataset = [ "folding_abalone9-18", "folding_abalone_17_vs_7_8_9_10", "folding_car-vgood", "folding_car_good", "folding_flare-F", "folding_hypothyroid", "folding_kddcup-guess_passwd_vs_satan", "folding_kr-vs-k-three_vs_eleven", "folding_kr-vs-k-zero-one_vs_draw", "folding_shuttle-2_vs_5", "folding_winequality-red-4", "folding_yeast4", "folding_yeast5", "folding_yeast6", "folding_ozone_level", "folding_yeast_me2", "Average" ] knn_ProWRAS_f1 = [0.384,0.347,0.818,0.641,0.301,0.553,1.0,0.94,0.9,1.0,0.141,0.308,0.714,0.545,0.556,0.339,0.538] knn_ProWRAS_k = [0.35,0.328,0.81,0.622,0.263,0.528,1.0,0.938,0.896,1.0,0.093,0.268,0.704,0.531,0.526,0.305,0.515] lr_ProWRAS_f1 = [0.488,0.315,0.407,0.103,0.341,0.446,0.99,0.928,0.853,1.0,0.158,0.308,0.591,0.326,0.347,0.295,0.472] lr_ProWRAS_k = [0.446,0.287,0.371,0.033,0.3,0.407,0.99,0.926,0.847,1.0,0.119,0.268,0.574,0.3,0.319,0.254,0.441] gb_ProWRAS_f1 = [0.385,0.335,0.959,0.863,0.320,0.803,0.998,0.995,0.969,1.0,0.156,0.335,0.735,0.514,0.329,0.225,0.600] gb_ProWRAS_k = [0.341,0.310,0.957,0.857,0.291,0.794,0.998,0.995,0.967,1.0,0.115,0.303,0.726,0.501,0.303,0.328,0.589] ProWrasPaper = "ProWRAS-paper" statistic = { ProWrasPaper: {} } for (n, f1, k) in zip(dataset, lr_ProWRAS_f1, lr_ProWRAS_k): if n in ignoreSet: continue if n not in statistic[ProWrasPaper]: statistic[ProWrasPaper][n] = {} statistic[ProWrasPaper][n]["LR"] = { kScore: k, f1Score: f1 } for (n, f1, k) in zip(dataset, gb_ProWRAS_f1, gb_ProWRAS_k): if n in ignoreSet: continue if n not in statistic[ProWrasPaper]: statistic[ProWrasPaper][n] = {} statistic[ProWrasPaper][n]["GB"] = { kScore: k, f1Score: f1 } for (n, f1, k) in zip(dataset, knn_ProWRAS_f1, knn_ProWRAS_k): if n in ignoreSet: continue if n not in statistic[ProWrasPaper]: statistic[ProWrasPaper][n] = {} statistic[ProWrasPaper][n]["KNN"] = { kScore: k, f1Score: f1 } dataset = list(filter(lambda n: n not in ignoreSet, dataset)) def loadDiagnoseData(ganType, datasetName): fileName = f"data_result/{ganType}/{datasetName}.csv" r = {} try: with open(fileName) as f: newBlock = True n = "" for line in f: line = line.strip() if newBlock: n = line newBlock = False elif line == "---": newBlock = True else: parts = line.split(";") if parts[0] == "avg": r[n] = { f1Score: float(parts[5]), kScore: float(parts[6]) } except FileNotFoundError as e: print(f"Missing file: {fileName}") return r for gan in gans: if gan not in statistic: statistic[gan] = {} for ds in testSets: if ds != "Average": statistic[gan][ds] = loadDiagnoseData(gan, ds) ks = set() for gan in statistic.keys(): f1 = { n: 0.0 for n in algs } k = { n: 0.0 for n in algs } c = 0 for ds in statistic[gan].keys(): ks.add(ds) if ds != "Average": c += 1 for n in algs: if n in statistic[gan][ds].keys(): f1[n] += statistic[gan][ds][n][f1Score] k[n] += statistic[gan][ds][n][kScore] avg = {} for n in algs: avg[n] = { f1Score: f1[n] / c, kScore: k[n] / c } statistic[gan]["Average"] = avg print(ks) def cleanupName(name): return name.replace("folding_", "").replace("imblearn_", "").replace("kaggle_", "") def showDiagnose(algo, score): def gr(n): if n in resultList: return resultList[n][algo].data[score] else: return 0.0 print(f"{algo}: {score}") gans = list(statistic.keys()) w = 0.8 / len(gans) bar = list(range(len(testSets))) plt.figure(figsize=(20, 18)) for g in gans: values = [ (statistic[g][d][algo][score] if algo in statistic[g][d].keys() else 0.0) if d in statistic[g] else 0.0 for d in testSets ] plt.barh(bar, values, w, label=g) bar = [i - w for i in bar] plt.xlabel("Dataset") plt.ylabel(score) plt.yticks(range(len(testSets)), [cleanupName(name) for name in testSets]) #plt.yticks(rotation="vertical") plt.legend() plt.savefig(f"data_result/statistic-{algo}-{score}.pdf") plt.show() def showDiagnoseAverage(score, onlyOneBar=False): def gr(n): if n in resultList: return resultList[n][algo].data[score] else: return 0.0 print(f"Average: {score}") gans = list(statistic.keys()) w = 0.8 / len(gans) if onlyOneBar: barType = "O" bar = range(len(algs)) else: barType = "M" bar = [0.8 + i - w for i in range(len(algs)) ] plt.figure(figsize=(20, 18)) for g in gans: values = [ (statistic[g]["Average"][algo][score] if algo in statistic[g]["Average"].keys() else 0.0) for algo in algs ] plt.barh(bar, values, w, label=g) if not onlyOneBar: bar = [i - w for i in bar] plt.xlabel("Dataset") plt.ylabel(score) plt.yticks(range(len(algs)), algs) plt.legend() plt.savefig(f"data_result/statistic-Average-{score}-{barType}.pdf") plt.show() for a in algs: showDiagnose(a, f1Score) showDiagnose(a, kScore) showDiagnoseAverage(f1Score) showDiagnoseAverage(kScore) showDiagnoseAverage(kScore, True)