autoencoder.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. import numpy as np
  2. from library.interfaces import GanBaseClass
  3. from library.dataset import DataSet
  4. from sklearn.decomposition import PCA
  5. from sklearn.metrics import confusion_matrix
  6. from sklearn.metrics import f1_score
  7. from sklearn.metrics import cohen_kappa_score
  8. from sklearn.metrics import precision_score
  9. from sklearn.metrics import recall_score
  10. from sklearn.neighbors import NearestNeighbors
  11. from sklearn.utils import shuffle
  12. from imblearn.datasets import fetch_datasets
  13. from keras.layers import Dense, Input, Multiply, Flatten, Conv1D, Reshape
  14. from keras.models import Model
  15. from keras import backend as K
  16. from tqdm import tqdm
  17. import tensorflow as tf
  18. from tensorflow.keras.optimizers import Adam
  19. from tensorflow.keras.layers import Lambda
  20. from library.NNSearch import NNSearch
  21. import warnings
  22. warnings.filterwarnings("ignore")
  23. lossFunction = "mean_squared_logarithmic_error"
  24. #lossFunction = "mse"
  25. def newDense(size, activation="relu"): # softsign
  26. initializer = tf.keras.initializers.RandomUniform(minval=0.00001, maxval=float(size))
  27. initializer = "glorot_uniform"
  28. return Dense(int(size)
  29. , activation=activation
  30. , kernel_initializer=initializer
  31. , bias_initializer=initializer
  32. )
  33. class Autoencoder(GanBaseClass):
  34. """
  35. This is a toy example of a GAN.
  36. It repeats the first point of the training-data-set.
  37. """
  38. def __init__(self, n_feat, middleSize=4, eps=0.0001, debug=True):
  39. self.canPredict = False
  40. self.isTrained = False
  41. self.n_feat = n_feat
  42. self.middleSize = middleSize
  43. self.eps = eps
  44. self.debug = debug
  45. self.dataSet = None
  46. self.decoder = None
  47. self.encoder = None
  48. self.autoencoder = None
  49. self.cg = None
  50. self.scaler = 1.0
  51. self.lossFn = lossFunction #"mse"
  52. self.lossFn = "mean_squared_logarithmic_error"
  53. def reset(self, _dataSet):
  54. """
  55. Resets the trained GAN to an random state.
  56. """
  57. self.isTrained = False
  58. self.scaler = 1.0
  59. ## instanciate discriminator network and visualize architecture
  60. self.encoder = self._createEncoder()
  61. ## instanciate generator network and visualize architecture
  62. self.decoder = self._createDecoder()
  63. ## instanciate network and visualize architecture
  64. self.autoencoder = self._createAutoencoder(self.encoder, self.decoder)
  65. def train(self, dataSet):
  66. """
  67. Trains the GAN.
  68. It stores the data points in the training data set and mark as trained.
  69. *dataSet* is a instance of /library.dataset.DataSet/. It contains the training dataset.
  70. We are only interested in the first *maxListSize* points in class 1.
  71. """
  72. if dataSet.data1.shape[0] <= 0:
  73. raise AttributeError("Train: Expected data class 1 to contain at least one point.")
  74. d = dataSet.data1
  75. self.data1 = d
  76. self.scaler = 1.5 * tf.reduce_max(tf.abs(d)).numpy()
  77. scaleDown = 1.0 / self.scaler
  78. lastLoss = 0.0
  79. print(f"scaler: {self.scaler}")
  80. dScaled = scaleDown * d
  81. for epoch in range(1000):
  82. h = self.autoencoder.fit(d, dScaled, epochs=1, shuffle=True)
  83. #print(str(d[0]) + " →")
  84. #print(self.scaler * self.autoencoder.predict(np.array([d[0]])))
  85. loss = h.history["loss"][-1]
  86. if loss < self.eps:
  87. print(f"done in {epoch} rounds")
  88. break
  89. if epoch == 0:
  90. lastLoss = loss
  91. else:
  92. print(f"Loss: {lastLoss} → {loss}")
  93. if abs(lastLoss - loss) < (0.1 * self.eps) and epoch > 10:
  94. print(f"converged in {epoch} rounds")
  95. break
  96. else:
  97. lastLoss = loss
  98. code = self.encoder.predict(d)
  99. center = np.zeros(self.middleSize)
  100. for c in code:
  101. center = center + c
  102. center = (1.0 / float(d.shape[0])) * center
  103. d = 0.0
  104. for c in code:
  105. d = max(d, tf.reduce_max(tf.abs(c - center)).numpy())
  106. self.noise = (center, d)
  107. self.isTrained = True
  108. def generateDataPoint(self):
  109. """
  110. Returns one synthetic data point by repeating the stored list.
  111. """
  112. return (self.generateData(1))[0]
  113. def generateData(self, numOfSamples=1):
  114. """
  115. Generates a list of synthetic data-points.
  116. *numOfSamples* is a integer > 0. It gives the number of new generated samples.
  117. """
  118. if not self.isTrained:
  119. raise ValueError("Try to generate data with untrained Re.")
  120. noise = self.noise[0] + np.random.normal(0.0, self.noise[1], [numOfSamples, self.middleSize])
  121. syntheticPoints = self.decoder.predict(noise)
  122. # syntheticPoints = []
  123. # while len(syntheticPoints) < numOfSamples:
  124. # nRest = max(0, numOfSamples - len(syntheticPoints))
  125. # nBatch = min(nRest, len(self.data1))
  126. # syntheticPoints.extend(self.autoencoder.predict(self.data1[:nBatch]))
  127. return self.scaler * np.array(syntheticPoints)
  128. # ###############################################################
  129. # Hidden internal functions
  130. # ###############################################################
  131. # Creating the GAN
  132. def _createEncoder(self):
  133. """
  134. the generator network to generate synthetic samples from the convex space
  135. of arbitrary minority neighbourhoods
  136. """
  137. ## takes minority batch as input
  138. dataIn = Input(shape=(self.n_feat,))
  139. x = dataIn
  140. x = newDense(self.n_feat)(x)
  141. ##
  142. n = self.n_feat // 2
  143. x = newDense(max(n, self.middleSize))(x)
  144. x = newDense(self.middleSize)(x)
  145. model = Model(inputs=dataIn, outputs=x)
  146. opt = Adam(learning_rate=0.01)
  147. model.compile(loss=lossFunction, optimizer=opt)
  148. print("encoder")
  149. model.summary()
  150. return model
  151. def _createDecoder(self):
  152. """
  153. the generator network to generate synthetic samples from the convex space
  154. of arbitrary minority neighbourhoods
  155. """
  156. ## takes minority batch as input
  157. dataIn = Input(shape=(self.middleSize,))
  158. x = dataIn
  159. ##
  160. n = self.n_feat // 2
  161. #x = newDense(max(n, self.middleSize))(x)
  162. #x = newDense(self.n_feat)(x)
  163. x = newDense(self.n_feat)(x)
  164. model = Model(inputs=dataIn, outputs=x)
  165. opt = Adam(learning_rate=0.01)
  166. model.compile(loss=lossFunction, optimizer=opt)
  167. print("decoder")
  168. model.summary()
  169. return model
  170. def _createAutoencoder(self, encoder, decoder):
  171. """
  172. for joining the generator and the discriminator
  173. conv_coeff_generator-> generator network instance
  174. maj_min_discriminator -> discriminator network instance
  175. """
  176. #encoder.trainable = False
  177. ## input receives a neighbourhood minority batch
  178. ## and a proximal majority batch concatenated
  179. dataIn = Input(shape=(self.n_feat,))
  180. #x = newDense(self.middleSize)(dataIn)
  181. #x = newDense(self.n_feat)(x)
  182. #x = newDense(self.n_feat)(x)
  183. x = encoder(dataIn )
  184. x = decoder(x)
  185. ## note that, the discriminator will not be traied but will make decisions based
  186. ## on its previous training while using this function
  187. model = Model(inputs=dataIn, outputs=x)
  188. opt = Adam(learning_rate=0.01)
  189. model.compile(loss=self.lossFn, optimizer=opt)
  190. print("autoencoder")
  191. model.summary()
  192. return model