import json import math from library.analysis import testSets, generators def loadDataset(name): with open(name) as f: return json.load(f) return None def dist(x,y): s = 0.0 for (a,b) in zip(x,y): s += (a - b) * (a - b) return math.sqrt(s) def distSet(s1, s2, compareSame=False): dSet = None for x in s1: dPoint = None for y in s2: d = dist(x, y) if d > 0 or not compareSame: if dPoint is None: dPoint = d else: dPoint = min(dPoint, d) if dSet is None: dSet = dPoint elif dPoint is not None: dSet = max(dSet, dPoint) return dSet def calcDistancesOfSlice(path, stepNr, sliceNr): data = loadDataset(f"{path}Step{stepNr}_Slice{sliceNr}.json") data_min = data['minority'] data_maj = data['majority'] data_syn = data['synthetic'] data = None d_min = distSet(data_min, data_min, True) d_maj = distSet(data_min, data_maj) / d_min d_syn = distSet(data_min, data_syn) / d_min return (d_min, d_maj, d_syn) class Stat: def __init__(self, name="?"): self.mi = None self.mx = None self.s = 0.0 self.n = 0.0 self.name = name def add(self, x): if self.n == 0.0: self.mi = x self.mx = x self.s = x self.n = 1.0 else: self.mi = min(self.mi, x) self.mx = max(self.mx, x) self.s += x self.n += 1.0 def __str__(self): return f"{self.name} [{self.mi:.3f} .. {self.s / self.n:.3f} .. {self.mx:.3f}]" def value(self): return { "min": self.mi, "max": self.mx, "avg": self.s / self.n } class StatTriple: def __init__(self, title=""): self.s_min = Stat(title + " minority ") self.s_maj = Stat(title + " majority ") self.s_syn = Stat(title + " synthetic") def add(self, d): self.s_min.add(d[0]) self.s_maj.add(d[1]) self.s_syn.add(d[2]) def print(self): print(self.s_min) print(self.s_maj) print(self.s_syn) def value(self): return { "minority": self.s_min.value(), "majority_factor": self.s_maj.value(), "synthetic_factor": self.s_syn.value() } def calcStatistic(path, title=""): s_triple = StatTriple(title) for stepNr in [1,2,3,4,5]: for sliceNr in [1,2,3,4,5]: d = calcDistancesOfSlice(path, stepNr, sliceNr) s_triple.add(d) s_triple.print() return s_triple.value() statistic = {} for g in generators.keys(): statistic[g] = {} sAverage = StatTriple("Average") print(f"--------[ {g} ]--------") for s in testSets: st = calcStatistic(f"data_result/{g}/{s}/", s) statistic[g][s] = st sAverage.add( (st["minority"]["avg"], st["majority_factor"]["avg"], st["synthetic_factor"]["avg"]) ) sAverage.print() statistic[g]["Average"] = sAverage.value() print() print(f"--------[ JSON ]--------") #print(json.dumps(statistic)) with open("data_result/similarity.json", "w") as f: f.write(json.dumps(statistic)) print(f"--------[ summary ]--------") for g in generators.keys(): print(f"{g:32s}: {statistic[g]['Average']['synthetic_factor']['avg']}") print(f"--------[ done ]--------")