|
|
@@ -12,6 +12,28 @@ from imblearn.datasets import fetch_datasets
|
|
|
|
|
|
|
|
|
def loadDataset(datasetName):
|
|
|
+ def isSame(xs, ys):
|
|
|
+ for (x, y) in zip(xs, ys):
|
|
|
+ if x != y:
|
|
|
+ return False
|
|
|
+ return True
|
|
|
+
|
|
|
+ def isIn(ys):
|
|
|
+ def f(x):
|
|
|
+ for y in ys:
|
|
|
+ if isSame(x,y):
|
|
|
+ return True
|
|
|
+ return False
|
|
|
+ return f
|
|
|
+
|
|
|
+ def isNotIn(ys):
|
|
|
+ def f(x):
|
|
|
+ for y in ys:
|
|
|
+ if isSame(x,y):
|
|
|
+ return False
|
|
|
+ return True
|
|
|
+ return f
|
|
|
+
|
|
|
pickle_in = open(f"{datasetName}.pickle", "rb")
|
|
|
pickle_dict = pickle.load(pickle_in)
|
|
|
|
|
|
@@ -24,6 +46,13 @@ def loadDataset(datasetName):
|
|
|
label_0 = list(np.where(labels == 0)[0])
|
|
|
features_1 = features[label_1]
|
|
|
features_0 = features[label_0]
|
|
|
+ cut = np.array(list(filter(isIn(features_0), features_1)))
|
|
|
+ if len(cut) > 0:
|
|
|
+ print(f"non empty cut in {datasetName}! ({len(cut)} points)")
|
|
|
+ # print(f"{len(features_0)}/{len(features_1)} point before")
|
|
|
+ # features_0 = np.array(list(filter(isNotIn(cut), features_0)))
|
|
|
+ # features_1 = np.array(list(filter(isNotIn(cut), features_1)))
|
|
|
+ # print(f"{len(features_0)}/{len(features_1)} points after")
|
|
|
|
|
|
return DataSet(data0=features_0, data1=features_1)
|
|
|
|
|
|
@@ -85,7 +114,7 @@ def runExerciseForRepeater(datasetName):
|
|
|
exercise.saveResultsTo(f"data_result/{datasetName}-{ganName}.csv")
|
|
|
exercise.saveResultsTo(f"data_result/{ganName}-{datasetName}.csv")
|
|
|
|
|
|
-def runExerciseForSpheredNoise(datasetName):
|
|
|
+def runExerciseForSpheredNoise(datasetName, resultList=None):
|
|
|
ganName = "SpheredNoise"
|
|
|
print()
|
|
|
print()
|
|
|
@@ -99,8 +128,10 @@ def runExerciseForSpheredNoise(datasetName):
|
|
|
shuffler = genShuffler()
|
|
|
exercise = Exercise(shuffleFunction=shuffler, numOfShuffles=5, numOfSlices=5)
|
|
|
exercise.run(gan, data)
|
|
|
- exercise.saveResultsTo(f"data_result/{datasetName}-{ganName}.csv")
|
|
|
+ avg = exercise.saveResultsTo(f"data_result/{datasetName}-{ganName}.csv")
|
|
|
exercise.saveResultsTo(f"data_result/{ganName}-{datasetName}.csv")
|
|
|
+ if resultList is not None:
|
|
|
+ resultList[datasetName] = avg
|
|
|
|
|
|
testSets = [
|
|
|
"folding_abalone_17_vs_7_8_9_10",
|