convGAN.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604
  1. import os
  2. import math
  3. import random
  4. import numpy as np
  5. import pandas as pd
  6. import matplotlib.pyplot as plt
  7. import random
  8. from scipy import ndarray
  9. from sklearn.neighbors import NearestNeighbors
  10. from sklearn.decomposition import PCA
  11. from sklearn.metrics import confusion_matrix
  12. from sklearn.metrics import f1_score
  13. from sklearn.metrics import cohen_kappa_score
  14. from sklearn.metrics import precision_score
  15. from sklearn.metrics import recall_score
  16. from collections import Counter
  17. from imblearn.datasets import fetch_datasets
  18. from sklearn.preprocessing import StandardScaler
  19. import keras
  20. from keras.layers import Dense, Dropout, Input
  21. from keras.models import Model,Sequential
  22. from tqdm import tqdm
  23. from keras.layers.advanced_activations import LeakyReLU
  24. from tensorflow.keras.optimizers import Adam
  25. from keras import losses
  26. from keras import backend as K
  27. import tensorflow as tf
  28. import warnings
  29. warnings.filterwarnings("ignore")
  30. from sklearn.neighbors import KNeighborsClassifier
  31. from sklearn.ensemble import RandomForestClassifier
  32. from sklearn.ensemble import GradientBoostingClassifier
  33. from numpy.random import seed
  34. seed_num=1
  35. seed(seed_num)
  36. tf.random.set_seed(seed_num)
  37. from library.interfaces import GanBaseClass
  38. ## Import dataset
  39. data = fetch_datasets()['yeast_me2']
  40. ## Creating label and feature matrices
  41. labels_x=data.target ## labels of the data
  42. labels_x.shape
  43. features_x=data.data ## features of the data
  44. features_x.shape
  45. # Until now we have obtained the data. We divided it into training and test sets. we separated obtained seperate variables for the majority and miority classes and their labels for both sets.
  46. class ConvGAN(GanBaseClass):
  47. """
  48. This is a toy example of a GAN.
  49. It repeats the first point of the training-data-set.
  50. """
  51. def __init__(self, neb, gen, debug=False):
  52. self.isTrained = False
  53. self.neb = neb
  54. self.gen = gen
  55. self.loss_history = None
  56. self.debug = debug
  57. def reset(self):
  58. """
  59. Resets the trained GAN to an random state.
  60. """
  61. self.isTrained = False
  62. ## instanciate generator network and visualize architecture
  63. self.conv_sample_generator = conv_sample_gen()
  64. ## instanciate discriminator network and visualize architecture
  65. self.maj_min_discriminator = maj_min_disc()
  66. ## instanciate network and visualize architecture
  67. self.cg = convGAN(self.conv_sample_generator, self.maj_min_discriminator)
  68. def train(self, dataSet, neb_epochs=5):
  69. """
  70. Trains the GAN.
  71. It stores the data points in the training data set and mark as trained.
  72. *dataSet* is a instance of /library.dataset.DataSet/. It contains the training dataset.
  73. We are only interested in the first *maxListSize* points in class 1.
  74. """
  75. if dataSet.data1.shape[0] <= 0:
  76. raise AttributeError("Train: Expected data class 1 to contain at least one point.")
  77. # TODO: do actually training
  78. self.rough_learning(neb_epochs, dataSet.data1, dataSet.data0)
  79. self.isTrained = True
  80. def generateDataPoint(self):
  81. """
  82. Returns one synthetic data point by repeating the stored list.
  83. """
  84. return (self.generateData(1))[0]
  85. def generateData(self, numOfSamples=1):
  86. """
  87. Generates a list of synthetic data-points.
  88. *numOfSamples* is a integer > 0. It gives the number of new generated samples.
  89. """
  90. if not self.isTrained:
  91. raise ValueError("Try to generate data with untrained Re.")
  92. syntheticPoints = [] # TODO
  93. return np.array(syntheticPoints)
  94. # Hidden internal functions
  95. # Training
  96. def _rough_learning(self, neb_epochs, data_min):
  97. generator = self.conv_sample_generator
  98. discriminator = self.maj_min_discriminator
  99. GAN = self.cg
  100. loss_history=[] ## this is for stroring the loss for every run
  101. min_idx = 0
  102. neb_epoch_count = 1
  103. labels = []
  104. for i in range(2 * self.gen):
  105. if i < gen:
  106. labels.append(np.array([1,0]))
  107. else:
  108. labels.append(np.array([0,1]))
  109. labels = np.array(labels)
  110. labels = tf.convert_to_tensor(labels)
  111. for step in range(neb_epochs * len(data_min)):
  112. min_batch = NMB_guided(data_min, self.neb, min_idx) ## generate minority neighbourhood batch for every minority class sampls by index
  113. min_idx = min_idx + 1
  114. maj_batch = BMB(data_min,data_maj, self.neb, self.gen) ## generate random proximal majority batch
  115. conv_samples = generator.predict(min_batch) ## generate synthetic samples from convex space of minority neighbourhood batch using generator
  116. concat_sample = tf.concat([conv_samples, maj_batch], axis=0) ## concatenate them with the majority batch
  117. discriminator.trainable = True ## switch on discriminator training
  118. discriminator.fit(x=concat_sample, y=labels, verbose=0) ## train the discriminator with the concatenated samples and the one-hot encoded labels
  119. discriminator.trainable = False ## switch off the discriminator training again
  120. gan_loss_history = GAN.fit(concat_sample, y=labels, verbose=0) ## use the GAN to make the generator learn on the decisions made by the previous discriminator training
  121. loss_history.append(gan_loss_history.history['loss']) ## store the loss for the step
  122. if self.debug and ((step + 1) % 10 == 0):
  123. print(f"{step + 1} neighbourhood batches trained; running neighbourhood epoch {neb_epoch_count}")
  124. if min_idx == len(data_min) - 1:
  125. if self.debug:
  126. print(f"Neighbourhood epoch {neb_epoch_count} complete")
  127. neb_epoch_count = neb_epoch_count + 1
  128. min_idx = 0
  129. if self.debug:
  130. run_range = range(1, len(loss_history) + 1)
  131. plt.rcParams["figure.figsize"] = (16,10)
  132. plt.xticks(fontsize=20)
  133. plt.yticks(fontsize=20)
  134. plt.xlabel('runs', fontsize=25)
  135. plt.ylabel('loss', fontsize=25)
  136. plt.title('Rough learning loss for discriminator', fontsize=25)
  137. plt.plot(run_range, loss_history)
  138. plt.show()
  139. self.conv_sample_generator = generator
  140. self.maj_min_discriminator = discriminator
  141. self.cg = GAN
  142. self.loss_history = loss_history
  143. ## convGAN
  144. def unison_shuffled_copies(a, b,seed_perm):
  145. 'Shuffling the feature matrix along with the labels with same order'
  146. np.random.seed(seed_perm)##change seed 1,2,3,4,5
  147. assert len(a) == len(b)
  148. p = np.random.permutation(len(a))
  149. return a[p], b[p]
  150. def BMB(data_min,data_maj, neb, gen):
  151. ## Generate a borderline majority batch
  152. ## data_min -> minority class data
  153. ## data_maj -> majority class data
  154. ## neb -> oversampling neighbourhood
  155. ## gen -> convex combinations generated from each neighbourhood
  156. from sklearn.neighbors import NearestNeighbors
  157. from sklearn.utils import shuffle
  158. neigh = NearestNeighbors(neb)
  159. n_feat=data_min.shape[1]
  160. neigh.fit(data_maj)
  161. bmbi=[]
  162. for i in range(len(data_min)):
  163. indices=neigh.kneighbors([data_min[i]],neb,return_distance=False)
  164. bmbi.append(indices)
  165. bmbi=np.unique(np.array(bmbi).flatten())
  166. bmbi=shuffle(bmbi)
  167. bmb=data_maj[np.random.randint(len(data_maj),size=gen)]
  168. bmb=tf.convert_to_tensor(bmb)
  169. return bmb
  170. def NMB_guided(data_min, neb, index):
  171. ## generate a minority neighbourhood batch for a particular minority sample
  172. ## we need this for minority data generation
  173. ## we will generate synthetic samples for each training data neighbourhood
  174. ## index -> index of the minority sample in a training data whose neighbourhood we want to obtain
  175. ## data_min -> minority class data
  176. ## neb -> oversampling neighbourhood
  177. from sklearn.neighbors import NearestNeighbors
  178. from sklearn.utils import shuffle
  179. neigh = NearestNeighbors(neb)
  180. neigh.fit(data_min)
  181. ind=index
  182. nmbi=neigh.kneighbors([data_min[ind]],neb,return_distance=False)
  183. nmbi=shuffle(nmbi)
  184. nmb=data_min[nmbi]
  185. nmb=tf.convert_to_tensor(nmb[0])
  186. return (nmb)
  187. def conv_sample_gen():
  188. ## the generator network to generate synthetic samples from the convex space of arbitrary minority neighbourhoods
  189. min_neb_batch = keras.layers.Input(shape=(n_feat,)) ## takes minority batch as input
  190. x=tf.reshape(min_neb_batch, (1,neb,n_feat), name=None) ## reshaping the 2D tensor to 3D for using 1-D convolution, otherwise 1-D convolution won't work.
  191. x= keras.layers.Conv1D(n_feat, 3, activation='relu')(x) ## using 1-D convolution, feature dimension remains the same
  192. x= keras.layers.Flatten()(x) ## flatten after convolution
  193. x= keras.layers.Dense(neb*gen, activation='relu')(x) ## add dense layer to transform the vector to a convenient dimension
  194. x= keras.layers.Reshape((neb,gen))(x)## again, witching to 2-D tensor once we have the convenient shape
  195. s=K.sum(x,axis=1) ## row wise sum
  196. s_non_zero=tf.keras.layers.Lambda(lambda x: x+.000001)(s) ## adding a small constant to always ensure the row sums are non zero. if this is not done then during initialization the sum can be zero
  197. sinv=tf.math.reciprocal(s_non_zero) ## reprocals of the approximated row sum
  198. x=keras.layers.Multiply()([sinv,x]) ## At this step we ensure that row sum is 1 for every row in x. That means, each row is set of convex co-efficient
  199. aff=tf.transpose(x[0]) ## Now we transpose the matrix. So each column is now a set of convex coefficients
  200. synth=tf.matmul(aff,min_neb_batch) ## We now do matrix multiplication of the affine combinations with the original minority batch taken as input. This generates a convex transformation of the input minority batch
  201. model = Model(inputs=min_neb_batch, outputs=synth) ## finally we compile the generator with an arbitrary minortiy neighbourhood batch as input and a covex space transformation of the same number of samples as output
  202. opt = Adam(learning_rate=0.001)
  203. model.compile(loss='mean_squared_logarithmic_error', optimizer=opt)
  204. return model
  205. def maj_min_disc():
  206. ## the discriminator is trained intwo phase:
  207. ## first phase: while training GAN the discriminator learns to differentiate synthetic minority samples generated from convex minority data space against the borderline majority samples
  208. ## second phase: after the GAN generator learns to create synthetic samples, it can be used to generate synthetic samples to balance the dataset
  209. ## and then rettrain the discriminator with the balanced dataset
  210. samples=keras.layers.Input(shape=(n_feat,)) ## takes as input synthetic sample generated as input stacked upon a batch of borderline majority samples
  211. y= keras.layers.Dense(250, activation='relu')(samples) ## passed through two dense layers
  212. y= keras.layers.Dense(125, activation='relu')(y)
  213. output= keras.layers.Dense(2, activation='sigmoid')(y) ## two output nodes. outputs have to be one-hot coded (see labels variable before)
  214. model = Model(inputs=samples, outputs=output) ## compile model
  215. opt = Adam(learning_rate=0.0001)
  216. model.compile(loss='binary_crossentropy', optimizer=opt)
  217. return model
  218. def convGAN(generator,discriminator):
  219. ## for joining the generator and the discriminator
  220. ## conv_coeff_generator-> generator network instance
  221. ## maj_min_discriminator -> discriminator network instance
  222. maj_min_disc.trainable=False ## by default the discriminator trainability is switched off.
  223. ## Thus training the GAN means training the generator network as per previously trained discriminator network.
  224. batch_data = keras.layers.Input(shape=(n_feat,)) ## input receives a neighbourhood minority batch and a proximal majority batch concatenated
  225. min_batch = tf.keras.layers.Lambda(lambda x: x[:neb])(batch_data) ## extract minority batch
  226. maj_batch = tf.keras.layers.Lambda(lambda x: x[neb:])(batch_data) ## extract majority batch
  227. conv_samples=generator(min_batch) ## pass minority batch into generator to obtain convex space transformation (synthetic samples) of the minority neighbourhood input batch
  228. new_samples=tf.concat([conv_samples,maj_batch],axis=0) ## concatenate the synthetic samples with the majority samples
  229. output=discriminator(new_samples) ## pass the concatenated vector into the discriminator to know its decisions
  230. ## note that, the discriminator will not be traied but will make decisions based on its previous training while using this function
  231. model = Model(inputs=batch_data, outputs=output)
  232. opt = Adam(learning_rate=0.0001)
  233. model.compile(loss='mse', optimizer=opt)
  234. return model
  235. ## this is the main training process where the GAn learns to generate appropriate samples from the convex space
  236. ## this is the first training phase for the discriminator and the only training phase for the generator.
  237. def rough_learning_predictions(discriminator,test_data_numpy,test_labels_numpy):
  238. ## after the first phase of training the discriminator can be used for classification
  239. ## it already learns to differentiate the convex minority points with majority points during the first training phase
  240. y_pred_2d=discriminator.predict(tf.convert_to_tensor(test_data_numpy))
  241. ## discretisation of the labels
  242. y_pred=np.digitize(y_pred_2d[:,0], [.5])
  243. ## prediction shows a model with good recall and less precision
  244. c=confusion_matrix(test_labels_numpy, y_pred)
  245. f=f1_score(test_labels_numpy, y_pred)
  246. pr=precision_score(test_labels_numpy, y_pred)
  247. rc=recall_score(test_labels_numpy, y_pred)
  248. k=cohen_kappa_score(test_labels_numpy, y_pred)
  249. print('Rough learning confusion matrix:', c)
  250. print('Rough learning f1 score', f)
  251. print('Rough learning precision score', pr)
  252. print('Rough learning recall score', rc)
  253. print('Rough learning kappa score', k)
  254. return c,f,pr,rc,k
  255. def generate_data_for_min_point(data_min,neb,index,synth_num,generator):
  256. ## generate synth_num synthetic points for a particular minoity sample
  257. ## synth_num -> required number of data points that can be generated from a neighbourhood
  258. ## data_min -> minority class data
  259. ## neb -> oversampling neighbourhood
  260. ## index -> index of the minority sample in a training data whose neighbourhood we want to obtain
  261. runs=int(synth_num/neb)+1
  262. synth_set=[]
  263. for run in range(runs):
  264. batch=NMB_guided(data_min, neb, index)
  265. synth_batch=generator.predict(batch)
  266. for i in range(len(synth_batch)):
  267. synth_set.append(synth_batch[i])
  268. synth_set=synth_set[:synth_num]
  269. synth_set=np.array(synth_set)
  270. return(synth_set)
  271. def generate_synthetic_data(data_min,data_maj,neb,generator):
  272. ## roughly claculate the upper bound of the synthetic samples to be generated from each neighbourhood
  273. synth_num=((len(data_maj)-len(data_min))//len(data_min))+1
  274. ## generate synth_num synthetic samples from each minority neighbourhood
  275. synth_set=[]
  276. for i in range(len(data_min)):
  277. synth_i=generate_data_for_min_point(data_min,neb,i,synth_num,generator)
  278. for k in range(len(synth_i)):
  279. synth_set.append(synth_i[k])
  280. synth_set=synth_set[:(len(data_maj)-len(data_min))] ## extract the exact number of synthetic samples needed to exactly balance the two classes
  281. synth_set=np.array(synth_set)
  282. ovs_min_class=np.concatenate((data_min,synth_set),axis=0)
  283. ovs_training_dataset=np.concatenate((ovs_min_class,data_maj),axis=0)
  284. ovs_pca_labels=np.concatenate((np.zeros(len(data_min)),np.zeros(len(synth_set))+1,np.zeros(len(data_maj))+2))
  285. ovs_training_labels=np.concatenate((np.zeros(len(ovs_min_class))+1,np.zeros(len(data_maj))+0))
  286. ovs_training_labels_oh=[]
  287. for i in range(len(ovs_training_dataset)):
  288. if i<len(ovs_min_class):
  289. ovs_training_labels_oh.append(np.array([1,0]))
  290. else:
  291. ovs_training_labels_oh.append(np.array([0,1]))
  292. ovs_training_labels_oh=np.array(ovs_training_labels_oh)
  293. ovs_training_labels_oh=tf.convert_to_tensor(ovs_training_labels_oh)
  294. ## PCA visualization of the synthetic sata
  295. ## observe how the minority samples from convex space have optimal variance and avoids overlap with the majority
  296. pca = PCA(n_components=2)
  297. pca.fit(ovs_training_dataset)
  298. data_pca= pca.transform(ovs_training_dataset)
  299. ## plot PCA
  300. plt.rcParams["figure.figsize"] = (12,12)
  301. colors=['r', 'b', 'g']
  302. plt.xticks(fontsize=20)
  303. plt.yticks(fontsize=20)
  304. plt.xlabel('PCA1',fontsize=25)
  305. plt.ylabel('PCA2', fontsize=25)
  306. plt.title('PCA plot of oversampled data',fontsize=25)
  307. classes = ['minority', 'synthetic minority', 'majority']
  308. scatter=plt.scatter(data_pca[:,0], data_pca[:,1], c=ovs_pca_labels, cmap='Set1')
  309. plt.legend(handles=scatter.legend_elements()[0], labels=classes, fontsize=20)
  310. plt.show()
  311. return ovs_training_dataset, ovs_pca_labels, ovs_training_labels_oh
  312. def final_learning(discriminator, ovs_training_dataset, ovs_training_labels_oh, test_data_numpy, test_labels_numpy, num_epochs):
  313. print('\n')
  314. print('Final round training of the discrminator as a majority-minority classifier')
  315. print('\n')
  316. ## second phase training of the discriminator with balanced data
  317. history_second_learning=discriminator.fit(x=ovs_training_dataset,y=ovs_training_labels_oh, batch_size=20, epochs=num_epochs)
  318. ## loss of the second phase learning smoothly decreses
  319. ## this is because now the data is fixed and diverse convex combinations are no longer fed into the discriminator at every training step
  320. run_range=range(1,num_epochs+1)
  321. plt.rcParams["figure.figsize"] = (16,10)
  322. plt.xticks(fontsize=20)
  323. plt.yticks(fontsize=20)
  324. plt.xlabel('runs',fontsize=25)
  325. plt.ylabel('loss', fontsize=25)
  326. plt.title('Final learning loss for discriminator', fontsize=25)
  327. plt.plot(run_range, history_second_learning.history['loss'])
  328. plt.show()
  329. ## finally after second phase training the discriminator classifier has a more balanced performance
  330. ## meaning better F1-Score
  331. ## the recall decreases but the precision improves
  332. print('\n')
  333. y_pred_2d=discriminator.predict(tf.convert_to_tensor(test_data_numpy))
  334. y_pred=np.digitize(y_pred_2d[:,0], [.5])
  335. c=confusion_matrix(test_labels_numpy, y_pred)
  336. f=f1_score(test_labels_numpy, y_pred)
  337. pr=precision_score(test_labels_numpy, y_pred)
  338. rc=recall_score(test_labels_numpy, y_pred)
  339. k=cohen_kappa_score(test_labels_numpy, y_pred)
  340. print('Final learning confusion matrix:', c)
  341. print('Final learning f1 score', f)
  342. print('Final learning precision score', pr)
  343. print('Final learning recall score', rc)
  344. print('Final learning kappa score', k)
  345. return c,f,pr,rc,k
  346. def convGAN_train_end_to_end(training_data,training_labels,test_data,test_labels, neb, gen, neb_epochs,epochs_retrain_disc):
  347. ##minority class
  348. data_min=training_data[np.where(training_labels == 1)[0]]
  349. ##majority class
  350. data_maj=training_data[np.where(training_labels == 0)[0]]
  351. dataSet = DataSet(data0=data_maj, data1=data_min)
  352. gan = ConvGAN(neb, gen)
  353. gan.reset()
  354. ## instanciate generator network and visualize architecture
  355. conv_sample_generator = gan.conv_sample_generator
  356. print(conv_sample_generator.summary())
  357. print('\n')
  358. ## instanciate discriminator network and visualize architecture
  359. maj_min_discriminator = self.maj_min_discriminator
  360. print(maj_min_discriminator.summary())
  361. print('\n')
  362. ## instanciate network and visualize architecture
  363. cg = self.cg
  364. print(cg.summary())
  365. print('\n')
  366. print('Training the GAN, first round training of the discrminator as a majority-minority classifier')
  367. print('\n')
  368. ## train gan generator ## rough_train_discriminator
  369. gan.train(dataSet, neb_epochs)
  370. print('\n')
  371. ## rough learning results
  372. c_r,f_r,pr_r,rc_r,k_r=rough_learning_predictions(gan.maj_min_discriminator_r, test_data, test_labels)
  373. print('\n')
  374. ## generate synthetic data
  375. ovs_training_dataset, ovs_pca_labels, ovs_training_labels_oh=generate_synthetic_data(data_min, data_maj, gan.neb, gan.conv_sample_generator)
  376. print('\n')
  377. ## final training results
  378. c,f,pr,rc,k=final_learning(gan.maj_min_discriminator, ovs_training_dataset, ovs_training_labels_oh, test_data, test_labels, epochs_retrain_disc)
  379. return ((c_r,f_r,pr_r,rc_r,k_r),(c,f,pr,rc,k))
  380. ## specify parameters
  381. neb=gen=5 ##neb=gen required
  382. neb_epochs=10
  383. epochs_retrain_disc=50
  384. n_feat=len(features_x[1]) ## number of features
  385. ## Training
  386. np.random.seed(42)
  387. strata=5
  388. results=[]
  389. for seed_perm in range(strata):
  390. features_x,labels_x=unison_shuffled_copies(features_x,labels_x,seed_perm)
  391. ### Extracting all features and labels
  392. print('Extracting all features and labels for seed:'+ str(seed_perm)+'\n')
  393. ## Dividing data into training and testing datasets for 10-fold CV
  394. print('Dividing data into training and testing datasets for 10-fold CV for seed:'+ str(seed_perm)+'\n')
  395. label_1=list(np.where(labels_x == 1)[0])
  396. features_1=features_x[label_1]
  397. label_0=list(np.where(labels_x != 1)[0])
  398. features_0=features_x[label_0]
  399. a=len(features_1)//5
  400. b=len(features_0)//5
  401. fold_1_min=features_1[0:a]
  402. fold_1_maj=features_0[0:b]
  403. fold_1_tst=np.concatenate((fold_1_min,fold_1_maj))
  404. lab_1_tst=np.concatenate((np.zeros(len(fold_1_min))+1, np.zeros(len(fold_1_maj))))
  405. fold_2_min=features_1[a:2*a]
  406. fold_2_maj=features_0[b:2*b]
  407. fold_2_tst=np.concatenate((fold_2_min,fold_2_maj))
  408. lab_2_tst=np.concatenate((np.zeros(len(fold_1_min))+1, np.zeros(len(fold_1_maj))))
  409. fold_3_min=features_1[2*a:3*a]
  410. fold_3_maj=features_0[2*b:3*b]
  411. fold_3_tst=np.concatenate((fold_3_min,fold_3_maj))
  412. lab_3_tst=np.concatenate((np.zeros(len(fold_1_min))+1, np.zeros(len(fold_1_maj))))
  413. fold_4_min=features_1[3*a:4*a]
  414. fold_4_maj=features_0[3*b:4*b]
  415. fold_4_tst=np.concatenate((fold_4_min,fold_4_maj))
  416. lab_4_tst=np.concatenate((np.zeros(len(fold_1_min))+1, np.zeros(len(fold_1_maj))))
  417. fold_5_min=features_1[4*a:]
  418. fold_5_maj=features_0[4*b:]
  419. fold_5_tst=np.concatenate((fold_5_min,fold_5_maj))
  420. lab_5_tst=np.concatenate((np.zeros(len(fold_5_min))+1, np.zeros(len(fold_5_maj))))
  421. fold_1_trn=np.concatenate((fold_2_min,fold_3_min,fold_4_min,fold_5_min, fold_2_maj,fold_3_maj,fold_4_maj,fold_5_maj))
  422. lab_1_trn=np.concatenate((np.zeros(3*a+len(fold_5_min))+1,np.zeros(3*b+len(fold_5_maj))))
  423. fold_2_trn=np.concatenate((fold_1_min,fold_3_min,fold_4_min,fold_5_min,fold_1_maj,fold_3_maj,fold_4_maj,fold_5_maj))
  424. lab_2_trn=np.concatenate((np.zeros(3*a+len(fold_5_min))+1,np.zeros(3*b+len(fold_5_maj))))
  425. fold_3_trn=np.concatenate((fold_2_min,fold_1_min,fold_4_min,fold_5_min,fold_2_maj,fold_1_maj,fold_4_maj,fold_5_maj))
  426. lab_3_trn=np.concatenate((np.zeros(3*a+len(fold_5_min))+1,np.zeros(3*b+len(fold_5_maj))))
  427. fold_4_trn=np.concatenate((fold_2_min,fold_3_min,fold_1_min,fold_5_min,fold_2_maj,fold_3_maj,fold_1_maj,fold_5_maj))
  428. lab_4_trn=np.concatenate((np.zeros(3*a+len(fold_5_min))+1,np.zeros(3*b+len(fold_5_maj))))
  429. fold_5_trn=np.concatenate((fold_2_min,fold_3_min,fold_4_min,fold_1_min,fold_2_maj,fold_3_maj,fold_4_maj,fold_1_maj))
  430. lab_5_trn=np.concatenate((np.zeros(4*a)+1,np.zeros(4*b)))
  431. training_folds_feats=[fold_1_trn,fold_2_trn,fold_3_trn,fold_4_trn,fold_5_trn]
  432. testing_folds_feats=[fold_1_tst,fold_2_tst,fold_3_tst,fold_4_tst,fold_5_tst]
  433. training_folds_labels=[lab_1_trn,lab_2_trn,lab_3_trn,lab_4_trn,lab_5_trn]
  434. testing_folds_labels=[lab_1_tst,lab_2_tst,lab_3_tst,lab_4_tst,lab_5_tst]
  435. for i in range(5):
  436. print('\n')
  437. print('Executing fold: '+str(i+1))
  438. print('\n')
  439. r1,r2=convGAN_train_end_to_end(training_folds_feats[i],training_folds_labels[i],testing_folds_feats[i],testing_folds_labels[i], neb, gen, neb_epochs, epochs_retrain_disc)
  440. results.append(np.array([list(r1[1:]),list(r2[1:])]))
  441. results=np.array(results)
  442. ## Benchmark
  443. mean_rough=np.mean(results[:,0], axis=0)
  444. data_r={'F1-Score_r':[mean_rough[0]], 'Precision_r' : [mean_rough[1]], 'Recall_r' : [mean_rough[2]], 'Kappa_r': [mean_rough[3]]}
  445. df_r=pd.DataFrame(data=data_r)
  446. print('Rough training results:')
  447. print('\n')
  448. print(df_r)
  449. mean_final=np.mean(results[:,1], axis=0)
  450. data_f={'F1-Score_f':[mean_final[0]], 'Precision_f' : [mean_final[1]], 'Recall_f' : [mean_final[2]], 'Kappa_f': [mean_final[3]]}
  451. df_f=pd.DataFrame(data=data_f)
  452. print('Final training results:')
  453. print('\n')
  454. print(df_f)