statistics.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. from library.analysis import testSets, generators
  4. testSets.append("Average")
  5. kScore = "cohens kappa score"
  6. f1Score = "f1 score"
  7. ignoreSet = ["yeast_me2"]
  8. gans = [g[0] for g in generators]
  9. algs = {"LR", "GB", "KNN"}
  10. dataset = [
  11. "folding_abalone9-18",
  12. "folding_abalone_17_vs_7_8_9_10",
  13. "folding_car-vgood",
  14. "folding_car_good",
  15. "folding_flare-F",
  16. "folding_hypothyroid",
  17. "folding_kddcup-guess_passwd_vs_satan",
  18. "folding_kr-vs-k-three_vs_eleven",
  19. "folding_kr-vs-k-zero-one_vs_draw",
  20. "folding_shuttle-2_vs_5",
  21. "folding_winequality-red-4",
  22. "folding_yeast4",
  23. "folding_yeast5",
  24. "folding_yeast6",
  25. "folding_ozone_level",
  26. "folding_yeast_me2",
  27. "Average"
  28. ]
  29. knn_ProWRAS_f1 = [0.384,0.347,0.818,0.641,0.301,0.553,1.0,0.94,0.9,1.0,0.141,0.308,0.714,0.545,0.556,0.339,0.538]
  30. knn_ProWRAS_k = [0.35,0.328,0.81,0.622,0.263,0.528,1.0,0.938,0.896,1.0,0.093,0.268,0.704,0.531,0.526,0.305,0.515]
  31. lr_ProWRAS_f1 = [0.488,0.315,0.407,0.103,0.341,0.446,0.99,0.928,0.853,1.0,0.158,0.308,0.591,0.326,0.347,0.295,0.472]
  32. lr_ProWRAS_k = [0.446,0.287,0.371,0.033,0.3,0.407,0.99,0.926,0.847,1.0,0.119,0.268,0.574,0.3,0.319,0.254,0.441]
  33. gb_ProWRAS_f1 = [0.385,0.335,0.959,0.863,0.320,0.803,0.998,0.995,0.969,1.0,0.156,0.335,0.735,0.514,0.329,0.225,0.600]
  34. gb_ProWRAS_k = [0.341,0.310,0.957,0.857,0.291,0.794,0.998,0.995,0.967,1.0,0.115,0.303,0.726,0.501,0.303,0.328,0.589]
  35. ProWrasPaper = "ProWRAS-paper"
  36. statistic = { ProWrasPaper: {} }
  37. for (n, f1, k) in zip(dataset, lr_ProWRAS_f1, lr_ProWRAS_k):
  38. if n in ignoreSet:
  39. continue
  40. if n not in statistic[ProWrasPaper]:
  41. statistic[ProWrasPaper][n] = {}
  42. statistic[ProWrasPaper][n]["LR"] = { kScore: k, f1Score: f1 }
  43. for (n, f1, k) in zip(dataset, gb_ProWRAS_f1, gb_ProWRAS_k):
  44. if n in ignoreSet:
  45. continue
  46. if n not in statistic[ProWrasPaper]:
  47. statistic[ProWrasPaper][n] = {}
  48. statistic[ProWrasPaper][n]["GB"] = { kScore: k, f1Score: f1 }
  49. for (n, f1, k) in zip(dataset, knn_ProWRAS_f1, knn_ProWRAS_k):
  50. if n in ignoreSet:
  51. continue
  52. if n not in statistic[ProWrasPaper]:
  53. statistic[ProWrasPaper][n] = {}
  54. statistic[ProWrasPaper][n]["KNN"] = { kScore: k, f1Score: f1 }
  55. dataset = list(filter(lambda n: n not in ignoreSet, dataset))
  56. def loadDiagnoseData(ganType, datasetName):
  57. fileName = f"data_result/{ganType}/{datasetName}.csv"
  58. r = {}
  59. try:
  60. with open(fileName) as f:
  61. newBlock = True
  62. n = ""
  63. for line in f:
  64. line = line.strip()
  65. if newBlock:
  66. n = line
  67. newBlock = False
  68. elif line == "---":
  69. newBlock = True
  70. else:
  71. parts = line.split(";")
  72. if parts[0] == "avg":
  73. r[n] = { f1Score: float(parts[5]), kScore: float(parts[6]) }
  74. except FileNotFoundError as e:
  75. print(f"Missing file: {fileName}")
  76. return r
  77. for gan in gans:
  78. if gan not in statistic:
  79. statistic[gan] = {}
  80. for ds in testSets:
  81. if ds != "Average":
  82. statistic[gan][ds] = loadDiagnoseData(gan, ds)
  83. ks = set()
  84. for gan in statistic.keys():
  85. f1 = { n: 0.0 for n in algs }
  86. k = { n: 0.0 for n in algs }
  87. c = 0
  88. for ds in statistic[gan].keys():
  89. ks.add(ds)
  90. if ds != "Average":
  91. c += 1
  92. for n in algs:
  93. if n in statistic[gan][ds].keys():
  94. f1[n] += statistic[gan][ds][n][f1Score]
  95. k[n] += statistic[gan][ds][n][kScore]
  96. avg = {}
  97. for n in algs:
  98. avg[n] = { f1Score: f1[n] / c, kScore: k[n] / c }
  99. statistic[gan]["Average"] = avg
  100. print(ks)
  101. def cleanupName(name):
  102. return name.replace("folding_", "").replace("imblearn_", "").replace("kaggle_", "")
  103. def showDiagnose(algo, score):
  104. def gr(n):
  105. if n in resultList:
  106. return resultList[n][algo].data[score]
  107. else:
  108. return 0.0
  109. print(f"{algo}: {score}")
  110. gans = list(statistic.keys())
  111. w = 0.8 / len(gans)
  112. bar = list(range(len(testSets)))
  113. plt.figure(figsize=(20, 18))
  114. for g in gans:
  115. values = [
  116. (statistic[g][d][algo][score] if algo in statistic[g][d].keys() else 0.0) if d in statistic[g] else 0.0
  117. for d in testSets
  118. ]
  119. plt.barh(bar, values, w, label=g)
  120. bar = [i - w for i in bar]
  121. plt.xlabel("Dataset")
  122. plt.ylabel(score)
  123. plt.yticks(range(len(testSets)), [cleanupName(name) for name in testSets])
  124. #plt.yticks(rotation="vertical")
  125. plt.legend()
  126. plt.savefig(f"data_result/statistic-{algo}-{score}.pdf")
  127. plt.show()
  128. def showDiagnoseAverage(score, onlyOneBar=False):
  129. def gr(n):
  130. if n in resultList:
  131. return resultList[n][algo].data[score]
  132. else:
  133. return 0.0
  134. print(f"Average: {score}")
  135. gans = list(statistic.keys())
  136. w = 0.8 / len(gans)
  137. if onlyOneBar:
  138. barType = "O"
  139. bar = range(len(algs))
  140. else:
  141. barType = "M"
  142. bar = [0.8 + i - w for i in range(len(algs)) ]
  143. plt.figure(figsize=(20, 18))
  144. for g in gans:
  145. values = [
  146. (statistic[g]["Average"][algo][score] if algo in statistic[g]["Average"].keys() else 0.0)
  147. for algo in algs
  148. ]
  149. plt.barh(bar, values, w, label=g)
  150. if not onlyOneBar:
  151. bar = [i - w for i in bar]
  152. plt.xlabel("Dataset")
  153. plt.ylabel(score)
  154. plt.yticks(range(len(algs)), algs)
  155. plt.legend()
  156. plt.savefig(f"data_result/statistic-Average-{score}-{barType}.pdf")
  157. plt.show()
  158. for a in algs:
  159. showDiagnose(a, f1Score)
  160. showDiagnose(a, kScore)
  161. showDiagnoseAverage(f1Score)
  162. showDiagnoseAverage(kScore)
  163. showDiagnoseAverage(kScore, True)