analysis.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. from library.exercise import Exercise
  2. from library.dataset import DataSet, TrainTestData
  3. from library.GanExamples import StupidToyListGan
  4. from library.SimpleGan import SimpleGan
  5. from library.Repeater import Repeater
  6. from library.SpheredNoise import SpheredNoise
  7. from library.convGAN import ConvGAN
  8. import pickle
  9. import numpy as np
  10. import time
  11. import random
  12. from imblearn.datasets import fetch_datasets
  13. def loadDataset(datasetName):
  14. def isSame(xs, ys):
  15. for (x, y) in zip(xs, ys):
  16. if x != y:
  17. return False
  18. return True
  19. def isIn(ys):
  20. def f(x):
  21. for y in ys:
  22. if isSame(x,y):
  23. return True
  24. return False
  25. return f
  26. def isNotIn(ys):
  27. def f(x):
  28. for y in ys:
  29. if isSame(x,y):
  30. return False
  31. return True
  32. return f
  33. pickle_in = open(f"{datasetName}.pickle", "rb")
  34. pickle_dict = pickle.load(pickle_in)
  35. myData = pickle_dict["folding"]
  36. k = myData[0]
  37. labels = np.concatenate((k[1], k[3]), axis=0).astype(float)
  38. features = np.concatenate((k[0], k[2]), axis=0).astype(float)
  39. label_1 = list(np.where(labels == 1)[0])
  40. label_0 = list(np.where(labels == 0)[0])
  41. features_1 = features[label_1]
  42. features_0 = features[label_0]
  43. cut = np.array(list(filter(isIn(features_0), features_1)))
  44. if len(cut) > 0:
  45. print(f"non empty cut in {datasetName}! ({len(cut)} points)")
  46. # print(f"{len(features_0)}/{len(features_1)} point before")
  47. # features_0 = np.array(list(filter(isNotIn(cut), features_0)))
  48. # features_1 = np.array(list(filter(isNotIn(cut), features_1)))
  49. # print(f"{len(features_0)}/{len(features_1)} points after")
  50. return DataSet(data0=features_0, data1=features_1)
  51. def getRandGen(initValue, incValue=257, multValue=101, modulus=65537):
  52. value = initValue
  53. while True:
  54. value = ((multValue * value) + incValue) % modulus
  55. yield value
  56. def genShuffler():
  57. randGen = getRandGen(2021)
  58. def shuffler(data):
  59. data = list(data)
  60. size = len(data)
  61. shuffled = []
  62. while size > 0:
  63. p = next(randGen) % size
  64. size -= 1
  65. shuffled.append(data[p])
  66. data = data[0:p] + data[(p + 1):]
  67. return np.array(shuffled)
  68. return shuffler
  69. def runExerciseForSimpleGAN(datasetName):
  70. ganName = "SimpleGAN"
  71. print()
  72. print()
  73. print("///////////////////////////////////////////")
  74. print(f"// Running {ganName} on {datasetName}")
  75. print("///////////////////////////////////////////")
  76. print()
  77. data = loadDataset(f"data_input/{datasetName}")
  78. gan = SimpleGan(numOfFeatures=data.data0.shape[1])
  79. random.seed(2021)
  80. shuffler = genShuffler()
  81. exercise = Exercise(shuffleFunction=shuffler, numOfShuffles=5, numOfSlices=5)
  82. exercise.run(gan, data)
  83. exercise.saveResultsTo(f"data_result/{datasetName}-{ganName}.csv")
  84. exercise.saveResultsTo(f"data_result/{ganName}-{datasetName}.csv")
  85. def runExerciseForRepeater(datasetName):
  86. ganName = "Repeater"
  87. print()
  88. print()
  89. print("///////////////////////////////////////////")
  90. print(f"// Running {ganName} on {datasetName}")
  91. print("///////////////////////////////////////////")
  92. print()
  93. data = loadDataset(f"data_input/{datasetName}")
  94. gan = Repeater()
  95. random.seed(2021)
  96. shuffler = genShuffler()
  97. exercise = Exercise(shuffleFunction=shuffler, numOfShuffles=5, numOfSlices=5)
  98. exercise.run(gan, data)
  99. exercise.saveResultsTo(f"data_result/{datasetName}-{ganName}.csv")
  100. exercise.saveResultsTo(f"data_result/{ganName}-{datasetName}.csv")
  101. def runExerciseForSpheredNoise(datasetName, resultList=None):
  102. ganName = "SpheredNoise"
  103. print()
  104. print()
  105. print("///////////////////////////////////////////")
  106. print(f"// Running {ganName} on {datasetName}")
  107. print("///////////////////////////////////////////")
  108. print()
  109. data = loadDataset(f"data_input/{datasetName}")
  110. gan = SpheredNoise()
  111. random.seed(2021)
  112. shuffler = genShuffler()
  113. exercise = Exercise(shuffleFunction=shuffler, numOfShuffles=5, numOfSlices=5)
  114. exercise.run(gan, data)
  115. avg = exercise.saveResultsTo(f"data_result/{datasetName}-{ganName}.csv")
  116. exercise.saveResultsTo(f"data_result/{ganName}-{datasetName}.csv")
  117. if resultList is not None:
  118. resultList[datasetName] = avg
  119. def runExerciseForConvGAN(datasetName, resultList=None):
  120. ganName = "convGAN"
  121. print()
  122. print()
  123. print("///////////////////////////////////////////")
  124. print(f"// Running {ganName} on {datasetName}")
  125. print("///////////////////////////////////////////")
  126. print()
  127. data = loadDataset(f"data_input/{datasetName}")
  128. gan = ConvGAN(data.data0.shape[1])
  129. random.seed(2021)
  130. shuffler = genShuffler()
  131. exercise = Exercise(shuffleFunction=shuffler, numOfShuffles=5, numOfSlices=5)
  132. exercise.run(gan, data)
  133. avg = exercise.saveResultsTo(f"data_result/{datasetName}-{ganName}.csv")
  134. exercise.saveResultsTo(f"data_result/{ganName}-{datasetName}.csv")
  135. if resultList is not None:
  136. resultList[datasetName] = avg
  137. def runSpeedTestForConvGan(datasetName, ganGenerator):
  138. ganName = "convGAN"
  139. print()
  140. print()
  141. print("///////////////////////////////////////////")
  142. print(f"// Running speed test for {ganName} on {datasetName}")
  143. print("///////////////////////////////////////////")
  144. print()
  145. d = []
  146. t1 = time.time()
  147. data = loadDataset(f"data_input/{datasetName}")
  148. gan = ganGenerator(data.data0.shape[1])
  149. random.seed(2021)
  150. shuffler = genShuffler()
  151. exercise = Exercise(shuffleFunction=shuffler, numOfShuffles=3, numOfSlices=3)
  152. exercise.debug = (lambda _x: None)
  153. t2 = time.time()
  154. exercise.run(gan, data)
  155. t3 = time.time()
  156. d = (t3 - t1, t2 - t1, t3 - t2)
  157. print(f"Total Time: {d[0]}")
  158. print(f"Preparation Time: {d[1]}")
  159. print(f"Test Time: {d[2]}")
  160. return d, gan
  161. testSets = [
  162. "folding_abalone_17_vs_7_8_9_10",
  163. "folding_abalone9-18",
  164. "folding_car_good",
  165. "folding_car-vgood",
  166. "folding_flare-F",
  167. "folding_hypothyroid",
  168. "folding_kddcup-guess_passwd_vs_satan",
  169. "folding_kr-vs-k-three_vs_eleven",
  170. "folding_kr-vs-k-zero-one_vs_draw",
  171. "folding_shuttle-2_vs_5",
  172. "folding_winequality-red-4",
  173. "folding_yeast4",
  174. "folding_yeast5",
  175. "folding_yeast6"
  176. ]
  177. def runAllTestSets(dataSetList):
  178. for dsFileName in dataSetList:
  179. runExerciseForSimpleGAN(dataSetList)
  180. runExerciseForRepeater(dataSetList)