analysis.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. from library.exercise import Exercise
  2. from library.dataset import DataSet, TrainTestData
  3. from library.GanExamples import StupidToyListGan
  4. from library.SimpleGan import SimpleGan
  5. from library.Repeater import Repeater
  6. import pickle
  7. import numpy as np
  8. import random
  9. from imblearn.datasets import fetch_datasets
  10. def loadDataset(datasetName):
  11. pickle_in = open(f"{datasetName}.pickle", "rb")
  12. pickle_dict = pickle.load(pickle_in)
  13. myData = pickle_dict["folding"]
  14. k = myData[0]
  15. labels = np.concatenate((k[1], k[3]), axis=0).astype(float)
  16. features = np.concatenate((k[0], k[2]), axis=0).astype(float)
  17. label_1 = list(np.where(labels == 1)[0])
  18. label_0 = list(np.where(labels == 0)[0])
  19. features_1 = features[label_1]
  20. features_0 = features[label_0]
  21. return DataSet(data0=features_0, data1=features_1)
  22. def getRandGen(initValue, incValue=257, multValue=101, modulus=65537):
  23. value = initValue
  24. while True:
  25. value = ((multValue * value) + incValue) % modulus
  26. yield value
  27. def genShuffler():
  28. randGen = getRandGen(2021)
  29. def shuffler(data):
  30. data = list(data)
  31. size = len(data)
  32. shuffled = []
  33. while size > 0:
  34. p = next(randGen) % size
  35. size -= 1
  36. shuffled.append(data[p])
  37. data = data[0:p] + data[(p + 1):]
  38. return np.array(shuffled)
  39. return shuffler
  40. def runExerciseForSimpleGAN(datasetName):
  41. ganName = "SimpleGAN"
  42. print()
  43. print()
  44. print("///////////////////////////////////////////")
  45. print(f"// Running {ganName} on {datasetName}")
  46. print("///////////////////////////////////////////")
  47. print()
  48. data = loadDataset(f"data_input/{datasetName}")
  49. gan = SimpleGan(numOfFeatures=data.data0.shape[1])
  50. random.seed(2021)
  51. shuffler = genShuffler()
  52. exercise = Exercise(shuffleFunction=shuffler, numOfShuffles=5, numOfSlices=5)
  53. exercise.run(gan, data)
  54. exercise.saveResultsTo(f"data_result/{datasetName}-{ganName}.csv")
  55. exercise.saveResultsTo(f"data_result/{ganName}-{datasetName}.csv")
  56. def runExerciseForRepeater(datasetName):
  57. ganName = "Repeater"
  58. print()
  59. print()
  60. print("///////////////////////////////////////////")
  61. print(f"// Running {ganName} on {datasetName}")
  62. print("///////////////////////////////////////////")
  63. print()
  64. data = loadDataset(f"data_input/{datasetName}")
  65. gan = Repeater()
  66. random.seed(2021)
  67. shuffler = genShuffler()
  68. exercise = Exercise(shuffleFunction=shuffler, numOfShuffles=5, numOfSlices=5)
  69. exercise.run(gan, data)
  70. exercise.saveResultsTo(f"data_result/{datasetName}-{ganName}.csv")
  71. exercise.saveResultsTo(f"data_result/{ganName}-{datasetName}.csv")
  72. testSets = [
  73. "folding_abalone_17_vs_7_8_9_10",
  74. "folding_abalone9-18",
  75. "folding_car_good",
  76. "folding_car-vgood",
  77. "folding_flare-F",
  78. "folding_hypothyroid",
  79. "folding_kddcup-guess_passwd_vs_satan",
  80. "folding_kr-vs-k-three_vs_eleven",
  81. "folding_kr-vs-k-zero-one_vs_draw",
  82. "folding_shuttle-2_vs_5",
  83. "folding_winequality-red-4",
  84. "folding_yeast4",
  85. "folding_yeast5",
  86. "folding_yeast6"
  87. ]
  88. def runAllTestSets(dataSetList):
  89. for dsFileName in dataSetList:
  90. runExerciseForSimpleGAN(dataSetList)
  91. runExerciseForRepeater(dataSetList)