calcSimilarity.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. import json
  2. import math
  3. from library.analysis import testSets, generators
  4. def loadDataset(name):
  5. with open(name) as f:
  6. return json.load(f)
  7. return None
  8. def dist(x,y):
  9. s = 0.0
  10. for (a,b) in zip(x,y):
  11. s += (a - b) * (a - b)
  12. return math.sqrt(s)
  13. def distSet(s1, s2, compareSame=False):
  14. dSet = None
  15. for x in s1:
  16. dPoint = None
  17. for y in s2:
  18. d = dist(x, y)
  19. if d > 0 or not compareSame:
  20. if dPoint is None:
  21. dPoint = d
  22. else:
  23. dPoint = min(dPoint, d)
  24. if dSet is None:
  25. dSet = dPoint
  26. elif dPoint is not None:
  27. dSet = max(dSet, dPoint)
  28. return dSet
  29. def calcDistancesOfSlice(path, stepNr, sliceNr):
  30. data = loadDataset(f"{path}Step{stepNr}_Slice{sliceNr}.json")
  31. data_min = data['minority']
  32. data_maj = data['majority']
  33. data_syn = data['synthetic']
  34. data = None
  35. d_min = distSet(data_min, data_min, True)
  36. d_maj = distSet(data_min, data_maj) / d_min
  37. d_syn = distSet(data_min, data_syn) / d_min
  38. return (d_min, d_maj, d_syn)
  39. class Stat:
  40. def __init__(self, name="?"):
  41. self.mi = None
  42. self.mx = None
  43. self.s = 0.0
  44. self.n = 0.0
  45. self.name = name
  46. def add(self, x):
  47. if self.n == 0.0:
  48. self.mi = x
  49. self.mx = x
  50. self.s = x
  51. self.n = 1.0
  52. else:
  53. self.mi = min(self.mi, x)
  54. self.mx = max(self.mx, x)
  55. self.s += x
  56. self.n += 1.0
  57. def __str__(self):
  58. return f"{self.name} [{self.mi:.3f} .. {self.s / self.n:.3f} .. {self.mx:.3f}]"
  59. def value(self):
  60. return {
  61. "min": self.mi,
  62. "max": self.mx,
  63. "avg": self.s / self.n
  64. }
  65. class StatTriple:
  66. def __init__(self, title=""):
  67. self.s_min = Stat(title + " minority ")
  68. self.s_maj = Stat(title + " majority ")
  69. self.s_syn = Stat(title + " synthetic")
  70. def add(self, d):
  71. self.s_min.add(d[0])
  72. self.s_maj.add(d[1])
  73. self.s_syn.add(d[2])
  74. def print(self):
  75. print(self.s_min)
  76. print(self.s_maj)
  77. print(self.s_syn)
  78. def value(self):
  79. return {
  80. "minority": self.s_min.value(),
  81. "majority_factor": self.s_maj.value(),
  82. "synthetic_factor": self.s_syn.value()
  83. }
  84. def calcStatistic(path, title=""):
  85. s_triple = StatTriple(title)
  86. for stepNr in [1,2,3,4,5]:
  87. for sliceNr in [1,2,3,4,5]:
  88. d = calcDistancesOfSlice(path, stepNr, sliceNr)
  89. s_triple.add(d)
  90. s_triple.print()
  91. return s_triple.value()
  92. statistic = {}
  93. for g in generators.keys():
  94. statistic[g] = {}
  95. sAverage = StatTriple("Average")
  96. print(f"--------[ {g} ]--------")
  97. for s in testSets:
  98. st = calcStatistic(f"data_result/{g}/{s}/", s)
  99. statistic[g][s] = st
  100. sAverage.add( (st["minority"]["avg"], st["majority_factor"]["avg"], st["synthetic_factor"]["avg"]) )
  101. sAverage.print()
  102. statistic[g]["Average"] = sAverage.value()
  103. print()
  104. print(f"--------[ JSON ]--------")
  105. #print(json.dumps(statistic))
  106. with open("data_result/similarity.json", "w") as f:
  107. f.write(json.dumps(statistic))
  108. print(f"--------[ summary ]--------")
  109. for g in generators.keys():
  110. print(f"{g:32s}: {statistic[g]['Average']['synthetic_factor']['avg']}")
  111. print(f"--------[ done ]--------")