autoencoder.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. import numpy as np
  2. from library.interfaces import GanBaseClass
  3. from library.dataset import DataSet
  4. from sklearn.decomposition import PCA
  5. from sklearn.metrics import confusion_matrix
  6. from sklearn.metrics import f1_score
  7. from sklearn.metrics import cohen_kappa_score
  8. from sklearn.metrics import precision_score
  9. from sklearn.metrics import recall_score
  10. from sklearn.neighbors import NearestNeighbors
  11. from sklearn.utils import shuffle
  12. from imblearn.datasets import fetch_datasets
  13. from keras.layers import Dense, Input, Multiply, Flatten, Conv1D, Reshape
  14. from keras.models import Model
  15. from keras import backend as K
  16. from tqdm import tqdm
  17. import tensorflow as tf
  18. from tensorflow.keras.optimizers import Adam
  19. from tensorflow.keras.layers import Lambda
  20. from library.NNSearch import NNSearch
  21. import warnings
  22. warnings.filterwarnings("ignore")
  23. lossFunction = "mean_squared_logarithmic_error"
  24. #lossFunction = "mse"
  25. def newDense(size, activation="relu"): # softsign
  26. initializer = tf.keras.initializers.RandomUniform(minval=0.00001, maxval=float(size))
  27. initializer = "glorot_uniform"
  28. return Dense(int(size)
  29. , activation=activation
  30. , kernel_initializer=initializer
  31. , bias_initializer=initializer
  32. )
  33. class Autoencoder(GanBaseClass):
  34. """
  35. This is a toy example of a GAN.
  36. It repeats the first point of the training-data-set.
  37. """
  38. def __init__(self, n_feat, middleSize=4, eps=0.0001, debug=True):
  39. self.isTrained = False
  40. self.n_feat = n_feat
  41. self.middleSize = middleSize
  42. self.eps = eps
  43. self.debug = debug
  44. self.dataSet = None
  45. self.decoder = None
  46. self.encoder = None
  47. self.autoencoder = None
  48. self.cg = None
  49. self.scaler = 1.0
  50. self.lossFn = lossFunction #"mse"
  51. self.lossFn = "mean_squared_logarithmic_error"
  52. def reset(self):
  53. """
  54. Resets the trained GAN to an random state.
  55. """
  56. self.isTrained = False
  57. self.scaler = 1.0
  58. ## instanciate discriminator network and visualize architecture
  59. self.encoder = self._createEncoder()
  60. ## instanciate generator network and visualize architecture
  61. self.decoder = self._createDecoder()
  62. ## instanciate network and visualize architecture
  63. self.autoencoder = self._createAutoencoder(self.encoder, self.decoder)
  64. def train(self, dataSet):
  65. """
  66. Trains the GAN.
  67. It stores the data points in the training data set and mark as trained.
  68. *dataSet* is a instance of /library.dataset.DataSet/. It contains the training dataset.
  69. We are only interested in the first *maxListSize* points in class 1.
  70. """
  71. if dataSet.data1.shape[0] <= 0:
  72. raise AttributeError("Train: Expected data class 1 to contain at least one point.")
  73. d = dataSet.data1
  74. self.data1 = d
  75. self.scaler = 1.5 * tf.reduce_max(tf.abs(d)).numpy()
  76. scaleDown = 1.0 / self.scaler
  77. lastLoss = 0.0
  78. print(f"scaler: {self.scaler}")
  79. dScaled = scaleDown * d
  80. for epoch in range(1000):
  81. h = self.autoencoder.fit(d, dScaled, epochs=1, shuffle=True)
  82. #print(str(d[0]) + " →")
  83. #print(self.scaler * self.autoencoder.predict(np.array([d[0]])))
  84. loss = h.history["loss"][-1]
  85. if loss < self.eps:
  86. print(f"done in {epoch} rounds")
  87. break
  88. if epoch == 0:
  89. lastLoss = loss
  90. else:
  91. print(f"Loss: {lastLoss} → {loss}")
  92. if abs(lastLoss - loss) < (0.1 * self.eps) and epoch > 10:
  93. print(f"converged in {epoch} rounds")
  94. break
  95. else:
  96. lastLoss = loss
  97. code = self.encoder.predict(d)
  98. center = np.zeros(self.middleSize)
  99. for c in code:
  100. center = center + c
  101. center = (1.0 / float(d.shape[0])) * center
  102. d = 0.0
  103. for c in code:
  104. d = max(d, tf.reduce_max(tf.abs(c - center)).numpy())
  105. self.noise = (center, d)
  106. self.isTrained = True
  107. def generateDataPoint(self):
  108. """
  109. Returns one synthetic data point by repeating the stored list.
  110. """
  111. return (self.generateData(1))[0]
  112. def generateData(self, numOfSamples=1):
  113. """
  114. Generates a list of synthetic data-points.
  115. *numOfSamples* is a integer > 0. It gives the number of new generated samples.
  116. """
  117. if not self.isTrained:
  118. raise ValueError("Try to generate data with untrained Re.")
  119. noise = self.noise[0] + np.random.normal(0.0, self.noise[1], [numOfSamples, self.middleSize])
  120. syntheticPoints = self.decoder.predict(noise)
  121. # syntheticPoints = []
  122. # while len(syntheticPoints) < numOfSamples:
  123. # nRest = max(0, numOfSamples - len(syntheticPoints))
  124. # nBatch = min(nRest, len(self.data1))
  125. # syntheticPoints.extend(self.autoencoder.predict(self.data1[:nBatch]))
  126. return self.scaler * np.array(syntheticPoints)
  127. # ###############################################################
  128. # Hidden internal functions
  129. # ###############################################################
  130. # Creating the GAN
  131. def _createEncoder(self):
  132. """
  133. the generator network to generate synthetic samples from the convex space
  134. of arbitrary minority neighbourhoods
  135. """
  136. ## takes minority batch as input
  137. dataIn = Input(shape=(self.n_feat,))
  138. x = dataIn
  139. x = newDense(self.n_feat)(x)
  140. ##
  141. n = self.n_feat // 2
  142. x = newDense(max(n, self.middleSize))(x)
  143. x = newDense(self.middleSize)(x)
  144. model = Model(inputs=dataIn, outputs=x)
  145. opt = Adam(learning_rate=0.01)
  146. model.compile(loss=lossFunction, optimizer=opt)
  147. print("encoder")
  148. model.summary()
  149. return model
  150. def _createDecoder(self):
  151. """
  152. the generator network to generate synthetic samples from the convex space
  153. of arbitrary minority neighbourhoods
  154. """
  155. ## takes minority batch as input
  156. dataIn = Input(shape=(self.middleSize,))
  157. x = dataIn
  158. ##
  159. n = self.n_feat // 2
  160. #x = newDense(max(n, self.middleSize))(x)
  161. #x = newDense(self.n_feat)(x)
  162. x = newDense(self.n_feat)(x)
  163. model = Model(inputs=dataIn, outputs=x)
  164. opt = Adam(learning_rate=0.01)
  165. model.compile(loss=lossFunction, optimizer=opt)
  166. print("decoder")
  167. model.summary()
  168. return model
  169. def _createAutoencoder(self, encoder, decoder):
  170. """
  171. for joining the generator and the discriminator
  172. conv_coeff_generator-> generator network instance
  173. maj_min_discriminator -> discriminator network instance
  174. """
  175. #encoder.trainable = False
  176. ## input receives a neighbourhood minority batch
  177. ## and a proximal majority batch concatenated
  178. dataIn = Input(shape=(self.n_feat,))
  179. #x = newDense(self.middleSize)(dataIn)
  180. #x = newDense(self.n_feat)(x)
  181. #x = newDense(self.n_feat)(x)
  182. x = encoder(dataIn )
  183. x = decoder(x)
  184. ## note that, the discriminator will not be traied but will make decisions based
  185. ## on its previous training while using this function
  186. model = Model(inputs=dataIn, outputs=x)
  187. opt = Adam(learning_rate=0.01)
  188. model.compile(loss=self.lossFn, optimizer=opt)
  189. print("autoencoder")
  190. model.summary()
  191. return model