LoGAN.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. import math
  2. import numpy as np
  3. import pandas as pd
  4. from tqdm import tqdm
  5. from keras.layers import Dense, Dropout, Input
  6. from keras.models import Model,Sequential
  7. from keras.layers.advanced_activations import LeakyReLU
  8. from keras.optimizers import Adam
  9. from sklearn.neighbors import NearestNeighbors
  10. from sklearn.manifold import TSNE
  11. from imblearn.datasets import fetch_datasets
  12. def adam_optimizer():
  13. return Adam(lr=0.0002, beta_1=0.5)
  14. def Neb_grps(data, near_neb):
  15. nbrs = NearestNeighbors(n_neighbors=near_neb, algorithm='ball_tree').fit(data)
  16. _distances, indices = nbrs.kneighbors(data)
  17. neb_class = list(indices)
  18. return np.asarray(neb_class)
  19. class GanTrainParameters:
  20. """
  21. Parameters for Training the GAN Network.
  22. """
  23. def __init__(self, n_feat, batch_size, min_t, features_0_trn, features_1_trn):
  24. self.batch_size = batch_size
  25. self.n_feat = n_feat
  26. self.min_t = min_t
  27. self.features_0_trn = features_0_trn
  28. self.features_1_trn = features_1_trn
  29. def im_batch_creator_min(self):
  30. nbd = Neb_grps(self.min_t, self.batch_size)
  31. rand = np.random.randint(low=0, high=self.features_1_trn.shape[0], size=1)
  32. idx = tuple(list(nbd[rand]))
  33. image_batch = self.features_1_trn[idx]
  34. return image_batch
  35. def im_batch_creator_maj(self):
  36. rand = np.random.randint(low=0, high=self.features_0_trn.shape[0], size=self.batch_size)
  37. image_batch = np.reshape(self.features_0_trn[rand[:,None]], (self.batch_size, self.n_feat))
  38. return image_batch
  39. class TLoRasNoise:
  40. """
  41. Noise function
  42. """
  43. def __init__(self, shadow=50, sigma=.005, num_afcomb=7):
  44. self.shadow = shadow
  45. self.sigma = sigma
  46. self.num_afcomb = num_afcomb
  47. def tLoRAS(self, data, num_samples, num_RACOS):
  48. np.random.seed(42)
  49. data_shadow = np.asarray([
  50. d + np.random.normal(0, self.sigma)
  51. for d in data[:num_samples]
  52. for _c in range(self.shadow)
  53. ])
  54. return np.asarray([
  55. self.shadowLcDataPoint(num_samples, data_shadow)
  56. for _i in range(num_RACOS)
  57. ])
  58. def shadowLcDataPoint(self, num_samples, data_shadow):
  59. idx = np.random.randint(self.shadow * num_samples, size=self.num_afcomb)
  60. w = np.random.randint(100, size=len(idx))
  61. aff_w = np.asarray(w/sum(w))
  62. data_tsl = np.array(data_shadow)[idx,:]
  63. return np.dot(aff_w, data_tsl)
  64. def noise(self, data, batch_size):
  65. return self.tLoRAS(data=data, num_samples=batch_size, num_RACOS=batch_size)
  66. class GAN:
  67. """
  68. Class for GAN.
  69. """
  70. def __init__(self, n_feat=1, noise=None):
  71. self.n_feat = n_feat
  72. if noise is None:
  73. self.noise = TLoRasNoise()
  74. else:
  75. self.noise = noise
  76. self.gan = self.create_gan(
  77. self.create_discriminator_min(),
  78. self.create_discriminator_maj(),
  79. self.create_generator())
  80. def create_generator(self):
  81. generator=Sequential()
  82. generator.add(Dense(units=25, input_dim=self.n_feat))
  83. generator.add(LeakyReLU(0.2))
  84. generator.add(Dense(units=256))
  85. generator.add(LeakyReLU(0.2))
  86. generator.add(Dense(units=512))
  87. generator.add(LeakyReLU(0.2))
  88. generator.add(Dense(units=256))
  89. generator.add(LeakyReLU(0.2))
  90. generator.add(Dense(units=25))
  91. generator.add(LeakyReLU(0.2))
  92. generator.add(Dense(units=self.n_feat))
  93. generator.compile(loss='binary_crossentropy', optimizer=adam_optimizer())
  94. return generator
  95. def create_discriminator_min(self):
  96. discriminator=Sequential()
  97. discriminator.add(Dense(units=1024,input_dim=self.n_feat))
  98. discriminator.add(LeakyReLU(0.2))
  99. discriminator.add(Dropout(0.3))
  100. discriminator.add(Dense(units=512))
  101. discriminator.add(LeakyReLU(0.2))
  102. discriminator.add(Dropout(0.3))
  103. discriminator.add(Dense(units=256))
  104. discriminator.add(LeakyReLU(0.2))
  105. discriminator.add(Dense(units=1, activation='sigmoid'))
  106. discriminator.compile(loss='binary_crossentropy', optimizer=adam_optimizer())
  107. return discriminator
  108. def create_discriminator_maj(self):
  109. discriminator=Sequential()
  110. discriminator.add(Dense(units=1024,input_dim=self.n_feat))
  111. discriminator.add(LeakyReLU(0.2))
  112. discriminator.add(Dropout(0.3))
  113. discriminator.add(Dense(units=512))
  114. discriminator.add(LeakyReLU(0.2))
  115. discriminator.add(Dropout(0.3))
  116. discriminator.add(Dense(units=256))
  117. discriminator.add(LeakyReLU(0.2))
  118. discriminator.add(Dense(units=1, activation='sigmoid'))
  119. discriminator.compile(loss='binary_crossentropy', optimizer=adam_optimizer())
  120. return discriminator
  121. def create_gan(self, discriminator_min, discriminator_maj, generator):
  122. discriminator_min.trainable=False
  123. discriminator_maj.trainable=False
  124. gan_input = Input(shape=(self.n_feat,))
  125. x = generator(gan_input)
  126. gan_output_min= discriminator_min(x)
  127. gan_output_maj= discriminator_maj(x)
  128. gan = Model(inputs=gan_input, outputs=[gan_output_min,gan_output_maj])
  129. gan.compile(loss=['binary_crossentropy','binary_crossentropy'], optimizer='adam')
  130. self.generator = generator
  131. self.discriminator_min = discriminator_min
  132. self.discriminator_maj = discriminator_maj
  133. return gan
  134. def train(self, parameters):
  135. for e in range(1,30+1 ):
  136. print(e)
  137. for _i in tqdm(range(parameters.batch_size)):
  138. # Get a random set of real images
  139. image_batch = parameters.im_batch_creator_min()
  140. #generate random noise as an input to initialize the generator
  141. noise_min = self.noise.noise(image_batch, parameters.batch_size)
  142. # Generate fake samples from noised input
  143. generated_images = self.generator.predict(noise_min)
  144. #Construct different batches of real and fake data
  145. X = np.concatenate((image_batch, generated_images))
  146. # Labels for generated and real data
  147. y_dis = np.zeros(2* parameters.batch_size)
  148. y_dis[: parameters.batch_size]=0.9
  149. #Pre train discriminator_min on fake and real data before starting the gan.
  150. self.discriminator_min.trainable = True
  151. _d_loss_min = self.discriminator_min.train_on_batch(X, y_dis)
  152. if e==0 or e>15:
  153. image_batch_maj = parameters.im_batch_creator_maj()
  154. X_maj = np.concatenate((image_batch_maj, generated_images))
  155. y_dis_maj=np.ones(2* parameters.batch_size)+1
  156. y_dis_maj[: parameters.batch_size]=0
  157. #Pre train discriminator_maj on fake and real data before starting the gan.
  158. self.discriminator_maj.trainable = True
  159. _d_loss_maj = self.discriminator_maj.train_on_batch(X_maj, y_dis_maj)
  160. #Tricking the noised input of the Generator as real data
  161. noise = self.noise.noise(image_batch, parameters.batch_size)
  162. y_gen_min = np.ones(parameters.batch_size)
  163. # During the training of gan,
  164. # the weights of discriminator should be fixed.
  165. #We can enforce that by setting the trainable flag
  166. self.discriminator_min.trainable = False
  167. self.discriminator_maj.trainable = False
  168. #training the GAN by alternating the training of the Discriminator
  169. #and training the chained GAN model with Discriminator’s weights freezed.
  170. _g_loss_min = self.gan.train_on_batch(noise, [y_gen_min, y_gen_min])
  171. def genFeat(self, parameters):
  172. im_batch = parameters.im_batch_creator_min()
  173. noise = self.noise.noise(im_batch, parameters.batch_size)
  174. return self.generator.predict(noise)
  175. def predict(self, data):
  176. y_pred = self.discriminator_maj.predict(data)
  177. return np.reshape(y_pred, len(data))
  178. class TrainTestData:
  179. """
  180. Stores features, data and labels for class 0 and class 1.
  181. """
  182. def __init__(self, features0, features1, trainFactor=0.9):
  183. self.nFeatures0 = len(features0)
  184. self.nFeatures1 = len(features1)
  185. self.features_0_trn, self.features_0_tst = self.splitUpData(features0, trainFactor)
  186. self.features_1_trn, self.features_1_tst = self.splitUpData(features1, trainFactor)
  187. self.testData, self.testLabels = self.joinData(self.features_1_tst, self.features_0_tst)
  188. self.trainData, self.trainLabels = self.joinData(self.features_1_trn, self.features_0_trn)
  189. def splitUpData(self, data, trainFactor=0.9):
  190. size = len(data)
  191. trainSize = math.ceil(size * trainFactor)
  192. trn = data[list(range(0, trainSize))]
  193. tst = data[list(range(trainSize, size))]
  194. return trn, tst
  195. def joinData(self, data0, data1):
  196. data = np.concatenate((data1, data0))
  197. labels = np.concatenate(( np.zeros(len(data1)) + 1, np.zeros(len(data0)) ))
  198. return data, labels
  199. if __name__ == "__main__":
  200. def createTrainParameters():
  201. data = fetch_datasets()['yeast_me2']
  202. labels = data.target
  203. features = data.data
  204. label_1 = list(np.where(labels == 1)[0])
  205. label_0 = list(np.where(labels == -1)[0])
  206. features_1 = features[label_1]
  207. features_0 = features[label_0]
  208. features_1_trn = features_1[list(range(0,math.ceil(len(features_1)*2/3)))]
  209. data_embedded_min = TSNE(perplexity=.1).fit_transform(features_1_trn)
  210. result_min = pd.DataFrame(data=data_embedded_min, columns=['t-SNE0', 't-SNE1'])
  211. min_t = np.asmatrix(result_min)
  212. min_t = min_t[0:len(features_1_trn)]
  213. min_t = min_t[:, [0,1]]
  214. return GanTrainParameters(
  215. n_feat=len(features[1]),
  216. batch_size=30,
  217. min_t=min_t,
  218. features_0_trn=features_0[list(range(0,math.ceil(len(features_0)*2/3)))],
  219. features_1_trn=features_1_trn
  220. )
  221. gtp = createTrainParameters()
  222. cGan = GAN(n_feat=gtp.n_feat)
  223. cGan.train(parameters=gtp)