autoencoder.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. import numpy as np
  2. from library.interfaces import GanBaseClass
  3. from library.dataset import DataSet
  4. from sklearn.decomposition import PCA
  5. from sklearn.metrics import confusion_matrix
  6. from sklearn.metrics import f1_score
  7. from sklearn.metrics import cohen_kappa_score
  8. from sklearn.metrics import precision_score
  9. from sklearn.metrics import recall_score
  10. from sklearn.neighbors import NearestNeighbors
  11. from sklearn.utils import shuffle
  12. from imblearn.datasets import fetch_datasets
  13. from keras.layers import Dense, Input, Multiply, Flatten, Conv1D, Reshape
  14. from keras.models import Model
  15. from keras import backend as K
  16. from tqdm import tqdm
  17. import tensorflow as tf
  18. from tensorflow.keras.optimizers import Adam
  19. from tensorflow.keras.layers import Lambda
  20. from library.NNSearch import NNSearch
  21. import warnings
  22. warnings.filterwarnings("ignore")
  23. def newDense(size, activation="softsign"):
  24. initializer = tf.keras.initializers.RandomUniform(minval=0.00001, maxval=float(size))
  25. #initializer = "glorot_uniform"
  26. return Dense(int(size)
  27. , activation=activation
  28. #, kernel_initializer=initializer
  29. , bias_initializer=initializer
  30. )
  31. class Autoencoder(GanBaseClass):
  32. """
  33. This is a toy example of a GAN.
  34. It repeats the first point of the training-data-set.
  35. """
  36. def __init__(self, n_feat, middleSize=4, eps=0.0001, debug=True):
  37. self.isTrained = False
  38. self.n_feat = n_feat
  39. self.middleSize = middleSize
  40. self.eps = eps
  41. self.debug = debug
  42. self.dataSet = None
  43. self.decoder = None
  44. self.encoder = None
  45. self.autoencoder = None
  46. self.cg = None
  47. self.scaler = 1.0
  48. self.lossFn = "mse"
  49. self.lossFn = "mean_squared_logarithmic_error"
  50. def reset(self):
  51. """
  52. Resets the trained GAN to an random state.
  53. """
  54. self.isTrained = False
  55. self.scaler = 1.0
  56. ## instanciate discriminator network and visualize architecture
  57. self.encoder = self._createEncoder()
  58. ## instanciate generator network and visualize architecture
  59. self.decoder = self._createDecoder()
  60. ## instanciate network and visualize architecture
  61. self.autoencoder = self._createAutoencoder(self.encoder, self.decoder)
  62. def train(self, dataSet):
  63. """
  64. Trains the GAN.
  65. It stores the data points in the training data set and mark as trained.
  66. *dataSet* is a instance of /library.dataset.DataSet/. It contains the training dataset.
  67. We are only interested in the first *maxListSize* points in class 1.
  68. """
  69. if dataSet.data1.shape[0] <= 0:
  70. raise AttributeError("Train: Expected data class 1 to contain at least one point.")
  71. d = dataSet.data1
  72. self.data1 = d
  73. self.scaler = 1.1 * tf.reduce_max(tf.abs(d)).numpy()
  74. scaleDown = 1.0 / self.scaler
  75. lastLoss = 0.0
  76. print(f"scaler: {self.scaler}")
  77. for epoch in range(100):
  78. h = self.autoencoder.fit(d, scaleDown * d, epochs=10, shuffle=True)
  79. print(str(d[0]) + " →")
  80. print(self.scaler * self.autoencoder.predict(np.array([d[0]])))
  81. loss = h.history["loss"][-1]
  82. if loss < self.eps:
  83. print(f"done in {epoch} rounds")
  84. break
  85. if epoch == 0:
  86. lastLoss = loss
  87. else:
  88. print(f"Loss: {lastLoss} → {loss}")
  89. if abs(lastLoss - loss) < (0.1 * self.eps) and epoch > 10:
  90. print(f"converged in {epoch} rounds")
  91. break
  92. else:
  93. lastLoss = loss
  94. code = self.encoder.predict(d)
  95. center = np.zeros(self.middleSize)
  96. for c in code:
  97. center = center + c
  98. center = (1.0 / float(d.shape[0])) * center
  99. d = 0.0
  100. for c in code:
  101. d = max(d, tf.reduce_max(tf.abs(c - center)).numpy())
  102. self.noise = (center, d)
  103. self.isTrained = True
  104. def generateDataPoint(self):
  105. """
  106. Returns one synthetic data point by repeating the stored list.
  107. """
  108. return (self.generateData(1))[0]
  109. def generateData(self, numOfSamples=1):
  110. """
  111. Generates a list of synthetic data-points.
  112. *numOfSamples* is a integer > 0. It gives the number of new generated samples.
  113. """
  114. if not self.isTrained:
  115. raise ValueError("Try to generate data with untrained Re.")
  116. noise = self.noise[0] + np.random.normal(0.0, self.noise[1], [numOfSamples, self.middleSize])
  117. syntheticPoints = self.decoder.predict(noise)
  118. # syntheticPoints = []
  119. # while len(syntheticPoints) < numOfSamples:
  120. # nRest = max(0, numOfSamples - len(syntheticPoints))
  121. # nBatch = min(nRest, len(self.data1))
  122. # syntheticPoints.extend(self.autoencoder.predict(self.data1[:nBatch]))
  123. return self.scaler * np.array(syntheticPoints)
  124. # ###############################################################
  125. # Hidden internal functions
  126. # ###############################################################
  127. # Creating the GAN
  128. def _createEncoder(self):
  129. """
  130. the generator network to generate synthetic samples from the convex space
  131. of arbitrary minority neighbourhoods
  132. """
  133. ## takes minority batch as input
  134. dataIn = Input(shape=(self.n_feat,))
  135. x = dataIn
  136. ##
  137. n = self.n_feat // 2
  138. #x = newDense(max(n, self.middleSize))(x)
  139. x = newDense(self.n_feat)(x)
  140. x = newDense(self.middleSize)(x)
  141. model = Model(inputs=dataIn, outputs=x)
  142. opt = Adam(learning_rate=0.01)
  143. model.compile(loss='mean_squared_logarithmic_error', optimizer=opt)
  144. print("encoder")
  145. model.summary()
  146. return model
  147. def _createDecoder(self):
  148. """
  149. the generator network to generate synthetic samples from the convex space
  150. of arbitrary minority neighbourhoods
  151. """
  152. ## takes minority batch as input
  153. dataIn = Input(shape=(self.middleSize,))
  154. x = dataIn
  155. ##
  156. n = self.n_feat // 2
  157. x = newDense(max(n, self.middleSize))(x)
  158. #x = newDense(self.n_feat)(x)
  159. x = newDense(self.n_feat)(x)
  160. model = Model(inputs=dataIn, outputs=x)
  161. opt = Adam(learning_rate=0.01)
  162. model.compile(loss='mean_squared_logarithmic_error', optimizer=opt)
  163. print("decoder")
  164. model.summary()
  165. return model
  166. def _createAutoencoder(self, encoder, decoder):
  167. """
  168. for joining the generator and the discriminator
  169. conv_coeff_generator-> generator network instance
  170. maj_min_discriminator -> discriminator network instance
  171. """
  172. #encoder.trainable = False
  173. ## input receives a neighbourhood minority batch
  174. ## and a proximal majority batch concatenated
  175. dataIn = Input(shape=(self.n_feat,))
  176. #x = newDense(self.middleSize)(dataIn)
  177. #x = newDense(self.n_feat)(x)
  178. #x = newDense(self.n_feat)(x)
  179. x = encoder(dataIn )
  180. x = decoder(x)
  181. ## note that, the discriminator will not be traied but will make decisions based
  182. ## on its previous training while using this function
  183. model = Model(inputs=dataIn, outputs=x)
  184. opt = Adam(learning_rate=0.01)
  185. model.compile(loss=self.lossFn, optimizer=opt)
  186. print("autoencoder")
  187. model.summary()
  188. return model