|
|
@@ -0,0 +1,149 @@
|
|
|
+import json
|
|
|
+import math
|
|
|
+from library.analysis import testSets, generators
|
|
|
+
|
|
|
+
|
|
|
+def loadDataset(name):
|
|
|
+ with open(name) as f:
|
|
|
+ return json.load(f)
|
|
|
+ return None
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+def dist(x,y):
|
|
|
+ s = 0.0
|
|
|
+ for (a,b) in zip(x,y):
|
|
|
+ s += (a - b) * (a - b)
|
|
|
+ return math.sqrt(s)
|
|
|
+
|
|
|
+def distSet(s1, s2, compareSame=False):
|
|
|
+ dSet = None
|
|
|
+ for x in s1:
|
|
|
+ dPoint = None
|
|
|
+ for y in s2:
|
|
|
+ d = dist(x, y)
|
|
|
+ if d > 0 or not compareSame:
|
|
|
+ if dPoint is None:
|
|
|
+ dPoint = d
|
|
|
+ else:
|
|
|
+ dPoint = min(dPoint, d)
|
|
|
+
|
|
|
+ if dSet is None:
|
|
|
+ dSet = dPoint
|
|
|
+ elif dPoint is not None:
|
|
|
+ dSet = max(dSet, dPoint)
|
|
|
+
|
|
|
+ return dSet
|
|
|
+
|
|
|
+
|
|
|
+def calcDistancesOfSlice(path, stepNr, sliceNr):
|
|
|
+ data = loadDataset(f"{path}Step{stepNr}_Slice{sliceNr}.json")
|
|
|
+
|
|
|
+ data_min = data['minority']
|
|
|
+ data_maj = data['majority']
|
|
|
+ data_syn = data['synthetic']
|
|
|
+ data = None
|
|
|
+
|
|
|
+ d_min = distSet(data_min, data_min, True)
|
|
|
+ d_maj = distSet(data_min, data_maj) / d_min
|
|
|
+ d_syn = distSet(data_min, data_syn) / d_min
|
|
|
+ return (d_min, d_maj, d_syn)
|
|
|
+
|
|
|
+
|
|
|
+class Stat:
|
|
|
+ def __init__(self, name="?"):
|
|
|
+ self.mi = None
|
|
|
+ self.mx = None
|
|
|
+ self.s = 0.0
|
|
|
+ self.n = 0.0
|
|
|
+ self.name = name
|
|
|
+
|
|
|
+ def add(self, x):
|
|
|
+ if self.n == 0.0:
|
|
|
+ self.mi = x
|
|
|
+ self.mx = x
|
|
|
+ self.s = x
|
|
|
+ self.n = 1.0
|
|
|
+ else:
|
|
|
+ self.mi = min(self.mi, x)
|
|
|
+ self.mx = max(self.mx, x)
|
|
|
+ self.s += x
|
|
|
+ self.n += 1.0
|
|
|
+
|
|
|
+ def __str__(self):
|
|
|
+ return f"{self.name} [{self.mi:.3f} .. {self.s / self.n:.3f} .. {self.mx:.3f}]"
|
|
|
+
|
|
|
+ def value(self):
|
|
|
+ return {
|
|
|
+ "min": self.mi,
|
|
|
+ "max": self.mx,
|
|
|
+ "avg": self.s / self.n
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+class StatTriple:
|
|
|
+ def __init__(self, title=""):
|
|
|
+ self.s_min = Stat(title + " minority ")
|
|
|
+ self.s_maj = Stat(title + " majority ")
|
|
|
+ self.s_syn = Stat(title + " synthetic")
|
|
|
+
|
|
|
+ def add(self, d):
|
|
|
+ self.s_min.add(d[0])
|
|
|
+ self.s_maj.add(d[1])
|
|
|
+ self.s_syn.add(d[2])
|
|
|
+
|
|
|
+
|
|
|
+ def print(self):
|
|
|
+ print(self.s_min)
|
|
|
+ print(self.s_maj)
|
|
|
+ print(self.s_syn)
|
|
|
+
|
|
|
+ def value(self):
|
|
|
+ return {
|
|
|
+ "minority": self.s_min.value(),
|
|
|
+ "majority_factor": self.s_maj.value(),
|
|
|
+ "synthetic_factor": self.s_syn.value()
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+def calcStatistic(path, title=""):
|
|
|
+ s_triple = StatTriple(title)
|
|
|
+
|
|
|
+ for stepNr in [1,2,3,4,5]:
|
|
|
+ for sliceNr in [1,2,3,4,5]:
|
|
|
+ d = calcDistancesOfSlice(path, stepNr, sliceNr)
|
|
|
+ s_triple.add(d)
|
|
|
+
|
|
|
+ s_triple.print()
|
|
|
+ return s_triple.value()
|
|
|
+
|
|
|
+statistic = {}
|
|
|
+
|
|
|
+for g in generators.keys():
|
|
|
+ statistic[g] = {}
|
|
|
+ sAverage = StatTriple("Average")
|
|
|
+ print(f"--------[ {g} ]--------")
|
|
|
+ for s in testSets:
|
|
|
+ st = calcStatistic(f"data_result/{g}/{s}/", s)
|
|
|
+ statistic[g][s] = st
|
|
|
+ sAverage.add( (st["minority"]["avg"], st["majority_factor"]["avg"], st["synthetic_factor"]["avg"]) )
|
|
|
+
|
|
|
+ sAverage.print()
|
|
|
+ statistic[g]["Average"] = sAverage.value()
|
|
|
+ print()
|
|
|
+
|
|
|
+
|
|
|
+print(f"--------[ JSON ]--------")
|
|
|
+#print(json.dumps(statistic))
|
|
|
+
|
|
|
+with open("data_result/similarity.json", "w") as f:
|
|
|
+ f.write(json.dumps(statistic))
|
|
|
+
|
|
|
+print(f"--------[ summary ]--------")
|
|
|
+
|
|
|
+for g in generators.keys():
|
|
|
+ print(f"{g:32s}: {statistic[g]['Average']['synthetic_factor']['avg']}")
|
|
|
+
|
|
|
+
|
|
|
+print(f"--------[ done ]--------")
|