| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149 |
- import json
- import math
- from library.analysis import testSets, generators
- def loadDataset(name):
- with open(name) as f:
- return json.load(f)
- return None
- def dist(x,y):
- s = 0.0
- for (a,b) in zip(x,y):
- s += (a - b) * (a - b)
- return math.sqrt(s)
- def distSet(s1, s2, compareSame=False):
- dSet = None
- for x in s1:
- dPoint = None
- for y in s2:
- d = dist(x, y)
- if d > 0 or not compareSame:
- if dPoint is None:
- dPoint = d
- else:
- dPoint = min(dPoint, d)
- if dSet is None:
- dSet = dPoint
- elif dPoint is not None:
- dSet = max(dSet, dPoint)
- return dSet
- def calcDistancesOfSlice(path, stepNr, sliceNr):
- data = loadDataset(f"{path}Step{stepNr}_Slice{sliceNr}.json")
- data_min = data['minority']
- data_maj = data['majority']
- data_syn = data['synthetic']
- data = None
- d_min = distSet(data_min, data_min, True)
- d_maj = distSet(data_min, data_maj) / d_min
- d_syn = distSet(data_min, data_syn) / d_min
- return (d_min, d_maj, d_syn)
- class Stat:
- def __init__(self, name="?"):
- self.mi = None
- self.mx = None
- self.s = 0.0
- self.n = 0.0
- self.name = name
- def add(self, x):
- if self.n == 0.0:
- self.mi = x
- self.mx = x
- self.s = x
- self.n = 1.0
- else:
- self.mi = min(self.mi, x)
- self.mx = max(self.mx, x)
- self.s += x
- self.n += 1.0
- def __str__(self):
- return f"{self.name} [{self.mi:.3f} .. {self.s / self.n:.3f} .. {self.mx:.3f}]"
- def value(self):
- return {
- "min": self.mi,
- "max": self.mx,
- "avg": self.s / self.n
- }
- class StatTriple:
- def __init__(self, title=""):
- self.s_min = Stat(title + " minority ")
- self.s_maj = Stat(title + " majority ")
- self.s_syn = Stat(title + " synthetic")
- def add(self, d):
- self.s_min.add(d[0])
- self.s_maj.add(d[1])
- self.s_syn.add(d[2])
- def print(self):
- print(self.s_min)
- print(self.s_maj)
- print(self.s_syn)
- def value(self):
- return {
- "minority": self.s_min.value(),
- "majority_factor": self.s_maj.value(),
- "synthetic_factor": self.s_syn.value()
- }
- def calcStatistic(path, title=""):
- s_triple = StatTriple(title)
- for stepNr in [1,2,3,4,5]:
- for sliceNr in [1,2,3,4,5]:
- d = calcDistancesOfSlice(path, stepNr, sliceNr)
- s_triple.add(d)
- s_triple.print()
- return s_triple.value()
- statistic = {}
- for g in generators.keys():
- statistic[g] = {}
- sAverage = StatTriple("Average")
- print(f"--------[ {g} ]--------")
- for s in testSets:
- st = calcStatistic(f"data_result/{g}/{s}/", s)
- statistic[g][s] = st
- sAverage.add( (st["minority"]["avg"], st["majority_factor"]["avg"], st["synthetic_factor"]["avg"]) )
- sAverage.print()
- statistic[g]["Average"] = sAverage.value()
- print()
- print(f"--------[ JSON ]--------")
- #print(json.dumps(statistic))
- with open("data_result/similarity.json", "w") as f:
- f.write(json.dumps(statistic))
- print(f"--------[ summary ]--------")
- for g in generators.keys():
- print(f"{g:32s}: {statistic[g]['Average']['synthetic_factor']['avg']}")
- print(f"--------[ done ]--------")
|