fyrr
/
ConvGeNCode


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670
							import numpy as np
from numpy.random import seed
import pandas as pd
import matplotlib.pyplot as plt

from library.interfaces import GanBaseClass
from library.dataset import DataSet

from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.neighbors import NearestNeighbors
from sklearn.utils import shuffle
from imblearn.datasets import fetch_datasets

from keras.layers import Dense, Input, Multiply, Flatten, Conv1D, Reshape
from keras.models import Model
from keras import backend as K
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Lambda

import warnings
warnings.filterwarnings("ignore")

class ConvGAN(GanBaseClass):
    """
    This is a toy example of a GAN.
    It repeats the first point of the training-data-set.
    """
    def __init__(self, n_feat, neb, gen, debug=True):
        self.isTrained = False
        self.n_feat = n_feat
        self.neb = neb
        self.gen = gen
        self.loss_history = None
        self.debug = debug
        self.dataSet = None
        self.conv_sample_generator = None
        self.maj_min_discriminator = None
        self.cg = None

    def reset(self):
        """
        Resets the trained GAN to an random state.
        """
        self.isTrained = False
        ## instanciate generator network and visualize architecture
        self.conv_sample_generator = self._conv_sample_gen()

        ## instanciate discriminator network and visualize architecture
        self.maj_min_discriminator = self._maj_min_disc()

        ## instanciate network and visualize architecture
        self.cg = self._convGAN(self.conv_sample_generator, self.maj_min_discriminator)

    def train(self, dataSet, neb_epochs=5):
        """
        Trains the GAN.

        It stores the data points in the training data set and mark as trained.

        *dataSet* is a instance of /library.dataset.DataSet/. It contains the training dataset.
        We are only interested in the first *maxListSize* points in class 1.
        """
        if dataSet.data1.shape[0] <= 0:
            raise AttributeError("Train: Expected data class 1 to contain at least one point.")

        self.dataSet = dataSet
        self._rough_learning(neb_epochs, dataSet.data1, dataSet.data0)
        self.isTrained = True

    def generateDataPoint(self):
        """
        Returns one synthetic data point by repeating the stored list.
        """
        return (self.generateData(1))[0]


    def generateData(self, numOfSamples=1):
        """
        Generates a list of synthetic data-points.

        *numOfSamples* is a integer > 0. It gives the number of new generated samples.
        """
        if not self.isTrained:
            raise ValueError("Try to generate data with untrained Re.")

        data_min = self.dataSet.data1

        ## roughly claculate the upper bound of the synthetic samples to be generated from each neighbourhood
        synth_num = (numOfSamples // len(data_min)) + 1

        ## generate synth_num synthetic samples from each minority neighbourhood
        synth_set=[]
        for i in range(len(data_min)):
            synth_set.extend(self.generate_data_for_min_point(data_min, i, synth_num))

        synth_set = synth_set[:numOfSamples] ## extract the exact number of synthetic samples needed to exactly balance the two classes

        return np.array(synth_set)

    # ###############################################################
    # Hidden internal functions
    # ###############################################################

    # Creating the GAN
    def _conv_sample_gen(self):
        """
        the generator network to generate synthetic samples from the convex space
        of arbitrary minority neighbourhoods
        """

        ## takes minority batch as input
        min_neb_batch = Input(shape=(self.n_feat,))

        ## reshaping the 2D tensor to 3D for using 1-D convolution,
        ## otherwise 1-D convolution won't work.
        x = tf.reshape(min_neb_batch, (1, self.neb, self.n_feat), name=None)
        ## using 1-D convolution, feature dimension remains the same
        x = Conv1D(self.n_feat, 3, activation='relu')(x)
        ## flatten after convolution
        x = Flatten()(x)
        ## add dense layer to transform the vector to a convenient dimension
        x = Dense(self.neb * self.gen, activation='relu')(x)

        ## again, witching to 2-D tensor once we have the convenient shape
        x = Reshape((self.neb, self.gen))(x)
        ## row wise sum
        s = K.sum(x, axis=1)
        ## adding a small constant to always ensure the row sums are non zero.
        ## if this is not done then during initialization the sum can be zero.
        s_non_zero = Lambda(lambda x: x + .000001)(s)
        ## reprocals of the approximated row sum
        sinv = tf.math.reciprocal(s_non_zero)
        ## At this step we ensure that row sum is 1 for every row in x.
        ## That means, each row is set of convex co-efficient
        x = Multiply()([sinv, x])
        ## Now we transpose the matrix. So each column is now a set of convex coefficients
        aff=tf.transpose(x[0])
        ## We now do matrix multiplication of the affine combinations with the original
        ## minority batch taken as input. This generates a convex transformation
        ## of the input minority batch
        synth=tf.matmul(aff, min_neb_batch)
        ## finally we compile the generator with an arbitrary minortiy neighbourhood batch
        ## as input and a covex space transformation of the same number of samples as output
        model = Model(inputs=min_neb_batch, outputs=synth)
        opt = Adam(learning_rate=0.001)
        model.compile(loss='mean_squared_logarithmic_error', optimizer=opt)
        return model

    def _maj_min_disc(self):
        """
        the discriminator is trained intwo phase:
        first phase:  while training GAN the discriminator learns to differentiate synthetic
                      minority samples generated from convex minority data space against
                      the borderline majority samples
        second phase: after the GAN generator learns to create synthetic samples,
                      it can be used to generate synthetic samples to balance the dataset
                      and then rettrain the discriminator with the balanced dataset
        """

        ## takes as input synthetic sample generated as input stacked upon a batch of
        ## borderline majority samples
        samples = Input(shape=(self.n_feat,))
        
        ## passed through two dense layers
        y = Dense(250, activation='relu')(samples)
        y = Dense(125, activation='relu')(y)
        
        ## two output nodes. outputs have to be one-hot coded (see labels variable before)
        output = Dense(2, activation='sigmoid')(y)
        
        ## compile model
        model = Model(inputs=samples, outputs=output)
        opt = Adam(learning_rate=0.0001)
        model.compile(loss='binary_crossentropy', optimizer=opt)
        return model

    def _convGAN(self, generator, discriminator):
        """
        for joining the generator and the discriminator
        conv_coeff_generator-> generator network instance
        maj_min_discriminator -> discriminator network instance
        """
        ## by default the discriminator trainability is switched off.
        ## Thus training the GAN means training the generator network as per previously
        ## trained discriminator network.
        discriminator.trainable = False

        ## input receives a neighbourhood minority batch
        ## and a proximal majority batch concatenated
        batch_data = Input(shape=(self.n_feat,))
        
        ## extract minority batch
        min_batch = Lambda(lambda x: x[:self.neb])(batch_data)
        
        ## extract majority batch
        maj_batch = Lambda(lambda x: x[self.neb:])(batch_data)
        
        ## pass minority batch into generator to obtain convex space transformation
        ## (synthetic samples) of the minority neighbourhood input batch
        conv_samples = generator(min_batch)
        
        ## concatenate the synthetic samples with the majority samples
        new_samples = tf.concat([conv_samples, maj_batch],axis=0)
        
        ## pass the concatenated vector into the discriminator to know its decisions
        output = discriminator(new_samples)
        
        ## note that, the discriminator will not be traied but will make decisions based
        ## on its previous training while using this function
        model = Model(inputs=batch_data, outputs=output)
        opt = Adam(learning_rate=0.0001)
        model.compile(loss='mse', optimizer=opt)
        return model

    # Create synthetic points
    def _generate_data_for_min_point(self, data_min, index, synth_num):
        """
        generate synth_num synthetic points for a particular minoity sample
        synth_num -> required number of data points that can be generated from a neighbourhood
        data_min -> minority class data
        neb -> oversampling neighbourhood
        index -> index of the minority sample in a training data whose neighbourhood we want to obtain
        """

        runs = int(synth_num / self.neb) + 1
        synth_set = []
        for _run in range(runs):
            batch = self._NMB_guided(data_min, index)
            synth_batch = self.conv_sample_generator.predict(batch)
            for x in synth_batch:
                synth_set.append(x)

        return synth_set[:synth_num]


    # Training
    def _rough_learning(self, neb_epochs, data_min, data_maj):
        generator = self.conv_sample_generator
        discriminator = self.maj_min_discriminator
        GAN = self.cg
        loss_history=[] ## this is for stroring the loss for every run
        min_idx = 0
        neb_epoch_count = 1

        labels = []
        for i in range(2 * self.gen):
            if i < self.gen:
                labels.append(np.array([1,0]))
            else:
                labels.append(np.array([0,1]))
        labels = np.array(labels)
        labels = tf.convert_to_tensor(labels)


        for step in range(neb_epochs * len(data_min)):
            ## generate minority neighbourhood batch for every minority class sampls by index
            min_batch = self._NMB_guided(data_min, min_idx)
            min_idx = min_idx + 1
            ## generate random proximal majority batch
            maj_batch = self._BMB(data_min, data_maj)

            ## generate synthetic samples from convex space
            ## of minority neighbourhood batch using generator
            conv_samples = generator.predict(min_batch)
            ## concatenate them with the majority batch
            concat_sample = tf.concat([conv_samples, maj_batch], axis=0)

            ## switch on discriminator training
            discriminator.trainable = True
            ## train the discriminator with the concatenated samples and the one-hot encoded labels
            discriminator.fit(x=concat_sample, y=labels, verbose=0)
            ## switch off the discriminator training again
            discriminator.trainable = False

            ## use the GAN to make the generator learn on the decisions
            ## made by the previous discriminator training
            gan_loss_history = GAN.fit(concat_sample, y=labels, verbose=0)

            ## store the loss for the step
            loss_history.append(gan_loss_history.history['loss'])

            if self.debug and ((step + 1) % 10 == 0):
                print(f"{step + 1} neighbourhood batches trained; running neighbourhood epoch {neb_epoch_count}")

            if min_idx == len(data_min) - 1:
                if self.debug:
                    print(f"Neighbourhood epoch {neb_epoch_count} complete")
                neb_epoch_count = neb_epoch_count + 1
                min_idx = 0

        if self.debug:
            run_range = range(1, len(loss_history) + 1)
            plt.rcParams["figure.figsize"] = (16,10)
            plt.xticks(fontsize=20)
            plt.yticks(fontsize=20)
            plt.xlabel('runs', fontsize=25)
            plt.ylabel('loss', fontsize=25)
            plt.title('Rough learning loss for discriminator', fontsize=25)
            plt.plot(run_range, loss_history)
            plt.show()

        self.conv_sample_generator = generator
        self.maj_min_discriminator = discriminator
        self.cg = GAN
        self.loss_history = loss_history


    ## convGAN
    def _BMB(self, data_min, data_maj):

        ## Generate a borderline majority batch
        ## data_min -> minority class data
        ## data_maj -> majority class data
        ## neb -> oversampling neighbourhood
        ## gen -> convex combinations generated from each neighbourhood

        neigh = NearestNeighbors(self.neb)
        neigh.fit(data_maj)
        bmbi = [
            neigh.kneighbors([data_min[i]], self.neb, return_distance=False)
            for i in range(len(data_min))
            ]
        bmbi = np.unique(np.array(bmbi).flatten())
        bmbi = shuffle(bmbi)
        return tf.convert_to_tensor(
            data_maj[np.random.randint(len(data_maj), size=self.gen)]
            )


    def _NMB_guided(self, data_min, index):

        ## generate a minority neighbourhood batch for a particular minority sample
        ## we need this for minority data generation
        ## we will generate synthetic samples for each training data neighbourhood
        ## index -> index of the minority sample in a training data whose neighbourhood we want to obtain
        ## data_min -> minority class data
        ## neb -> oversampling neighbourhood

        neigh = NearestNeighbors(self.neb)
        neigh.fit(data_min)
        nmbi = neigh.kneighbors([data_min[index]], self.neb, return_distance=False)
        nmbi = shuffle(nmbi)
        nmb = data_min[nmbi]
        nmb = tf.convert_to_tensor(nmb[0])
        return nmb


## this is the main training process where the GAn learns to generate appropriate samples from the convex space
## this is the first training phase for the discriminator and the only training phase for the generator.


def rough_learning_predictions(discriminator,test_data_numpy,test_labels_numpy):

    ## after the first phase of training the discriminator can be used for classification
    ## it already learns to differentiate the convex minority points with majority points during the first training phase
    y_pred_2d=discriminator.predict(tf.convert_to_tensor(test_data_numpy))
    ## discretisation of the labels
    y_pred=np.digitize(y_pred_2d[:,0], [.5])
    ## prediction shows a model with good recall and less precision
    c=confusion_matrix(test_labels_numpy, y_pred)
    f=f1_score(test_labels_numpy, y_pred)
    pr=precision_score(test_labels_numpy, y_pred)
    rc=recall_score(test_labels_numpy, y_pred)
    k=cohen_kappa_score(test_labels_numpy, y_pred)
    print('Rough learning confusion matrix:', c)
    print('Rough learning f1 score', f)
    print('Rough learning precision score', pr)
    print('Rough learning recall score', rc)
    print('Rough learning kappa score', k)
    return c,f,pr,rc,k


def generate_synthetic_data(gan, data_min, data_maj):
    ## roughly claculate the upper bound of the synthetic samples to be generated from each neighbourhood
    synth_num=((len(data_maj)-len(data_min))//len(data_min))+1

    ## generate synth_num synthetic samples from each minority neighbourhood
    synth_set = gan.generateData(synth_num)

    ovs_min_class=np.concatenate((data_min,synth_set),axis=0)
    ovs_training_dataset=np.concatenate((ovs_min_class,data_maj),axis=0)
    ovs_pca_labels=np.concatenate((np.zeros(len(data_min)),np.zeros(len(synth_set))+1,np.zeros(len(data_maj))+2))
    # TODO ovs_training_labels=np.concatenate((np.zeros(len(ovs_min_class))+1,np.zeros(len(data_maj))+0))
    ovs_training_labels_oh=[]
    for i in range(len(ovs_training_dataset)):
        if i<len(ovs_min_class):
            ovs_training_labels_oh.append(np.array([1,0]))
        else:
            ovs_training_labels_oh.append(np.array([0,1]))
    ovs_training_labels_oh=np.array(ovs_training_labels_oh)
    ovs_training_labels_oh=tf.convert_to_tensor(ovs_training_labels_oh)


    ## PCA visualization of the synthetic sata
    ## observe how the minority samples from convex space have optimal variance and avoids overlap with the majority
    pca = PCA(n_components=2)
    pca.fit(ovs_training_dataset)
    data_pca= pca.transform(ovs_training_dataset)

    ## plot PCA
    plt.rcParams["figure.figsize"] = (12,12)

    # TODO colors=['r', 'b', 'g']
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    plt.xlabel('PCA1',fontsize=25)
    plt.ylabel('PCA2', fontsize=25)
    plt.title('PCA plot of oversampled data',fontsize=25)
    classes = ['minority', 'synthetic minority', 'majority']

    scatter=plt.scatter(data_pca[:,0], data_pca[:,1], c=ovs_pca_labels, cmap='Set1')
    plt.legend(handles=scatter.legend_elements()[0], labels=classes, fontsize=20)
    plt.show()

    return ovs_training_dataset, ovs_pca_labels, ovs_training_labels_oh


def final_learning(discriminator, ovs_training_dataset, ovs_training_labels_oh, test_data_numpy, test_labels_numpy, num_epochs):

    print('\n')
    print('Final round training of the discrminator as a majority-minority classifier')
    print('\n')
    ## second phase training of the discriminator with balanced data

    history_second_learning=discriminator.fit(x=ovs_training_dataset,y=ovs_training_labels_oh, batch_size=20, epochs=num_epochs)

    ## loss of the second phase learning smoothly decreses
    ## this is because now the data is fixed and diverse convex combinations are no longer fed into the discriminator at every training step
    run_range=range(1,num_epochs+1)
    plt.rcParams["figure.figsize"] = (16,10)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    plt.xlabel('runs',fontsize=25)
    plt.ylabel('loss', fontsize=25)
    plt.title('Final learning loss for discriminator', fontsize=25)
    plt.plot(run_range, history_second_learning.history['loss'])
    plt.show()

    ## finally after second phase training the discriminator classifier has a more balanced performance
    ## meaning better F1-Score
    ## the recall decreases but the precision improves
    print('\n')

    y_pred_2d=discriminator.predict(tf.convert_to_tensor(test_data_numpy))
    y_pred=np.digitize(y_pred_2d[:,0], [.5])
    c=confusion_matrix(test_labels_numpy, y_pred)
    f=f1_score(test_labels_numpy, y_pred)
    pr=precision_score(test_labels_numpy, y_pred)
    rc=recall_score(test_labels_numpy, y_pred)
    k=cohen_kappa_score(test_labels_numpy, y_pred)
    print('Final learning confusion matrix:', c)
    print('Final learning f1 score', f)
    print('Final learning precision score', pr)
    print('Final learning recall score', rc)
    print('Final learning kappa score', k)
    return c,f,pr,rc,k


def convGAN_train_end_to_end(training_data,training_labels,test_data,test_labels, neb, gen, neb_epochs,epochs_retrain_disc):

    ##minority class
    data_min=training_data[np.where(training_labels == 1)[0]]
    ##majority class
    data_maj=training_data[np.where(training_labels == 0)[0]]

    dataSet = DataSet(data0=data_maj, data1=data_min)

    gan = ConvGAN(data_min.shape[1], neb, gen)
    gan.reset()

    ## instanciate generator network and visualize architecture
    conv_sample_generator = gan.conv_sample_generator
    print(conv_sample_generator.summary())
    print('\n')

    ## instanciate discriminator network and visualize architecture
    maj_min_discriminator = gan.maj_min_discriminator
    print(maj_min_discriminator.summary())
    print('\n')

    ## instanciate network and visualize architecture
    cg = gan.cg
    print(cg.summary())
    print('\n')

    print('Training the GAN, first round training of the discrminator as a majority-minority classifier')
    print('\n')

    ## train gan generator ## rough_train_discriminator
    gan.train(dataSet, neb_epochs)
    print('\n')

    ## rough learning results
    c_r,f_r,pr_r,rc_r,k_r = rough_learning_predictions(gan.maj_min_discriminator_r, test_data, test_labels)
    print('\n')

    ## generate synthetic data
    ovs_training_dataset, ovs_pca_labels, ovs_training_labels_oh = generate_synthetic_data(gan, data_min, data_maj)
    print('\n')

    ## final training results
    c,f,pr,rc,k=final_learning(gan.maj_min_discriminator, ovs_training_dataset, ovs_training_labels_oh, test_data, test_labels, epochs_retrain_disc)

    return ((c_r,f_r,pr_r,rc_r,k_r),(c,f,pr,rc,k))


def unison_shuffled_copies(a, b,seed_perm):
    'Shuffling the feature matrix along with the labels with same order'
    np.random.seed(seed_perm)##change seed 1,2,3,4,5
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]


def runTest():
    seed_num=1
    seed(seed_num)
    tf.random.set_seed(seed_num)


    ## Import dataset
    data = fetch_datasets()['yeast_me2']

    ## Creating label and feature matrices
    labels_x = data.target ## labels of the data

    features_x = data.data ## features of the data

    # Until now we have obtained the data. We divided it into training and test sets. we separated obtained seperate variables for the majority and miority classes and their labels for both sets.

    ## specify parameters

    neb=gen=5 ##neb=gen required
    neb_epochs=10
    epochs_retrain_disc=50
    # TODO n_feat=len(features_x[1]) ## number of features


    ## Training
    np.random.seed(42)
    strata=5
    results=[]
    for seed_perm in range(strata):

        features_x,labels_x=unison_shuffled_copies(features_x,labels_x,seed_perm)

        ### Extracting all features and labels
        print('Extracting all features and labels for seed:'+ str(seed_perm)+'\n')

        ## Dividing data into training and testing datasets for 10-fold CV
        print('Dividing data into training and testing datasets for 10-fold CV for seed:'+ str(seed_perm)+'\n')
        label_1=list(np.where(labels_x == 1)[0])
        features_1=features_x[label_1]

        label_0=list(np.where(labels_x != 1)[0])
        features_0=features_x[label_0]

        a=len(features_1)//5
        b=len(features_0)//5

        fold_1_min=features_1[0:a]
        fold_1_maj=features_0[0:b]
        fold_1_tst=np.concatenate((fold_1_min,fold_1_maj))
        lab_1_tst=np.concatenate((np.zeros(len(fold_1_min))+1, np.zeros(len(fold_1_maj))))

        fold_2_min=features_1[a:2*a]
        fold_2_maj=features_0[b:2*b]
        fold_2_tst=np.concatenate((fold_2_min,fold_2_maj))
        lab_2_tst=np.concatenate((np.zeros(len(fold_1_min))+1, np.zeros(len(fold_1_maj))))

        fold_3_min=features_1[2*a:3*a]
        fold_3_maj=features_0[2*b:3*b]
        fold_3_tst=np.concatenate((fold_3_min,fold_3_maj))
        lab_3_tst=np.concatenate((np.zeros(len(fold_1_min))+1, np.zeros(len(fold_1_maj))))

        fold_4_min=features_1[3*a:4*a]
        fold_4_maj=features_0[3*b:4*b]
        fold_4_tst=np.concatenate((fold_4_min,fold_4_maj))
        lab_4_tst=np.concatenate((np.zeros(len(fold_1_min))+1, np.zeros(len(fold_1_maj))))


        fold_5_min=features_1[4*a:]
        fold_5_maj=features_0[4*b:]
        fold_5_tst=np.concatenate((fold_5_min,fold_5_maj))
        lab_5_tst=np.concatenate((np.zeros(len(fold_5_min))+1, np.zeros(len(fold_5_maj))))

        fold_1_trn=np.concatenate((fold_2_min,fold_3_min,fold_4_min,fold_5_min, fold_2_maj,fold_3_maj,fold_4_maj,fold_5_maj))

        lab_1_trn=np.concatenate((np.zeros(3*a+len(fold_5_min))+1,np.zeros(3*b+len(fold_5_maj))))

        fold_2_trn=np.concatenate((fold_1_min,fold_3_min,fold_4_min,fold_5_min,fold_1_maj,fold_3_maj,fold_4_maj,fold_5_maj))

        lab_2_trn=np.concatenate((np.zeros(3*a+len(fold_5_min))+1,np.zeros(3*b+len(fold_5_maj))))

        fold_3_trn=np.concatenate((fold_2_min,fold_1_min,fold_4_min,fold_5_min,fold_2_maj,fold_1_maj,fold_4_maj,fold_5_maj))

        lab_3_trn=np.concatenate((np.zeros(3*a+len(fold_5_min))+1,np.zeros(3*b+len(fold_5_maj))))

        fold_4_trn=np.concatenate((fold_2_min,fold_3_min,fold_1_min,fold_5_min,fold_2_maj,fold_3_maj,fold_1_maj,fold_5_maj))

        lab_4_trn=np.concatenate((np.zeros(3*a+len(fold_5_min))+1,np.zeros(3*b+len(fold_5_maj))))

        fold_5_trn=np.concatenate((fold_2_min,fold_3_min,fold_4_min,fold_1_min,fold_2_maj,fold_3_maj,fold_4_maj,fold_1_maj))

        lab_5_trn=np.concatenate((np.zeros(4*a)+1,np.zeros(4*b)))


        training_folds_feats=[fold_1_trn,fold_2_trn,fold_3_trn,fold_4_trn,fold_5_trn]

        testing_folds_feats=[fold_1_tst,fold_2_tst,fold_3_tst,fold_4_tst,fold_5_tst]

        training_folds_labels=[lab_1_trn,lab_2_trn,lab_3_trn,lab_4_trn,lab_5_trn]

        testing_folds_labels=[lab_1_tst,lab_2_tst,lab_3_tst,lab_4_tst,lab_5_tst]


        for i in range(5):

            print('\n')
            print('Executing fold: '+str(i+1))
            print('\n')

            r1,r2=convGAN_train_end_to_end(training_folds_feats[i],training_folds_labels[i],testing_folds_feats[i],testing_folds_labels[i], neb, gen, neb_epochs, epochs_retrain_disc)
            results.append(np.array([list(r1[1:]),list(r2[1:])]))
    results=np.array(results)


    ## Benchmark
    mean_rough=np.mean(results[:,0], axis=0)
    data_r={'F1-Score_r':[mean_rough[0]], 'Precision_r' : [mean_rough[1]], 'Recall_r' : [mean_rough[2]], 'Kappa_r': [mean_rough[3]]}
    df_r=pd.DataFrame(data=data_r)


    print('Rough training results:')
    print('\n')
    print(df_r)


    mean_final=np.mean(results[:,1], axis=0)
    data_f={'F1-Score_f':[mean_final[0]], 'Precision_f' : [mean_final[1]], 'Recall_f' : [mean_final[2]], 'Kappa_f': [mean_final[3]]}
    df_f=pd.DataFrame(data=data_f)


    print('Final training results:')
    print('\n')
    print(df_f)


if __name__ == "__main__":
    runTest()