|
|
@@ -40,7 +40,9 @@ seed(seed_num)
|
|
|
tf.random.set_seed(seed_num)
|
|
|
|
|
|
from library.interfaces import GanBaseClass
|
|
|
+from library.dataset import DataSet
|
|
|
|
|
|
+from sklearn.utils import shuffle
|
|
|
|
|
|
## Import dataset
|
|
|
data = fetch_datasets()['yeast_me2']
|
|
|
@@ -60,7 +62,7 @@ class ConvGAN(GanBaseClass):
|
|
|
This is a toy example of a GAN.
|
|
|
It repeats the first point of the training-data-set.
|
|
|
"""
|
|
|
- def __init__(self, neb, gen, debug=False):
|
|
|
+ def __init__(self, neb, gen, debug=True):
|
|
|
self.isTrained = False
|
|
|
self.neb = neb
|
|
|
self.gen = gen
|
|
|
@@ -94,7 +96,7 @@ class ConvGAN(GanBaseClass):
|
|
|
raise AttributeError("Train: Expected data class 1 to contain at least one point.")
|
|
|
|
|
|
# TODO: do actually training
|
|
|
- self.rough_learning(neb_epochs, dataSet.data1, dataSet.data0)
|
|
|
+ self._rough_learning(neb_epochs, dataSet.data1, dataSet.data0)
|
|
|
|
|
|
self.isTrained = True
|
|
|
|
|
|
@@ -123,7 +125,7 @@ class ConvGAN(GanBaseClass):
|
|
|
# Hidden internal functions
|
|
|
|
|
|
# Training
|
|
|
- def _rough_learning(self, neb_epochs, data_min):
|
|
|
+ def _rough_learning(self, neb_epochs, data_min, data_maj):
|
|
|
generator = self.conv_sample_generator
|
|
|
discriminator = self.maj_min_discriminator
|
|
|
GAN = self.cg
|
|
|
@@ -142,9 +144,9 @@ class ConvGAN(GanBaseClass):
|
|
|
|
|
|
|
|
|
for step in range(neb_epochs * len(data_min)):
|
|
|
- min_batch = NMB_guided(data_min, self.neb, min_idx) ## generate minority neighbourhood batch for every minority class sampls by index
|
|
|
+ min_batch = self._NMB_guided(data_min, min_idx) ## generate minority neighbourhood batch for every minority class sampls by index
|
|
|
min_idx = min_idx + 1
|
|
|
- maj_batch = BMB(data_min,data_maj, self.neb, self.gen) ## generate random proximal majority batch
|
|
|
+ maj_batch = self._BMB(data_min, data_maj) ## generate random proximal majority batch
|
|
|
|
|
|
conv_samples = generator.predict(min_batch) ## generate synthetic samples from convex space of minority neighbourhood batch using generator
|
|
|
concat_sample = tf.concat([conv_samples, maj_batch], axis=0) ## concatenate them with the majority batch
|
|
|
@@ -184,58 +186,46 @@ class ConvGAN(GanBaseClass):
|
|
|
|
|
|
|
|
|
|
|
|
-## convGAN
|
|
|
-def unison_shuffled_copies(a, b,seed_perm):
|
|
|
- 'Shuffling the feature matrix along with the labels with same order'
|
|
|
- np.random.seed(seed_perm)##change seed 1,2,3,4,5
|
|
|
- assert len(a) == len(b)
|
|
|
- p = np.random.permutation(len(a))
|
|
|
- return a[p], b[p]
|
|
|
-
|
|
|
-
|
|
|
-def BMB(data_min,data_maj, neb, gen):
|
|
|
-
|
|
|
- ## Generate a borderline majority batch
|
|
|
- ## data_min -> minority class data
|
|
|
- ## data_maj -> majority class data
|
|
|
- ## neb -> oversampling neighbourhood
|
|
|
- ## gen -> convex combinations generated from each neighbourhood
|
|
|
-
|
|
|
- from sklearn.neighbors import NearestNeighbors
|
|
|
- from sklearn.utils import shuffle
|
|
|
- neigh = NearestNeighbors(neb)
|
|
|
- n_feat=data_min.shape[1]
|
|
|
- neigh.fit(data_maj)
|
|
|
- bmbi=[]
|
|
|
- for i in range(len(data_min)):
|
|
|
- indices=neigh.kneighbors([data_min[i]],neb,return_distance=False)
|
|
|
- bmbi.append(indices)
|
|
|
- bmbi=np.unique(np.array(bmbi).flatten())
|
|
|
- bmbi=shuffle(bmbi)
|
|
|
- bmb=data_maj[np.random.randint(len(data_maj),size=gen)]
|
|
|
- bmb=tf.convert_to_tensor(bmb)
|
|
|
- return bmb
|
|
|
-
|
|
|
-
|
|
|
-def NMB_guided(data_min, neb, index):
|
|
|
-
|
|
|
- ## generate a minority neighbourhood batch for a particular minority sample
|
|
|
- ## we need this for minority data generation
|
|
|
- ## we will generate synthetic samples for each training data neighbourhood
|
|
|
- ## index -> index of the minority sample in a training data whose neighbourhood we want to obtain
|
|
|
- ## data_min -> minority class data
|
|
|
- ## neb -> oversampling neighbourhood
|
|
|
-
|
|
|
- from sklearn.neighbors import NearestNeighbors
|
|
|
- from sklearn.utils import shuffle
|
|
|
- neigh = NearestNeighbors(neb)
|
|
|
- neigh.fit(data_min)
|
|
|
- ind=index
|
|
|
- nmbi=neigh.kneighbors([data_min[ind]],neb,return_distance=False)
|
|
|
- nmbi=shuffle(nmbi)
|
|
|
- nmb=data_min[nmbi]
|
|
|
- nmb=tf.convert_to_tensor(nmb[0])
|
|
|
- return (nmb)
|
|
|
+ ## convGAN
|
|
|
+ def _BMB(self, data_min, data_maj):
|
|
|
+
|
|
|
+ ## Generate a borderline majority batch
|
|
|
+ ## data_min -> minority class data
|
|
|
+ ## data_maj -> majority class data
|
|
|
+ ## neb -> oversampling neighbourhood
|
|
|
+ ## gen -> convex combinations generated from each neighbourhood
|
|
|
+
|
|
|
+ neigh = NearestNeighbors(self.neb)
|
|
|
+ n_feat = data_min.shape[1]
|
|
|
+ neigh.fit(data_maj)
|
|
|
+ bmbi = [
|
|
|
+ neigh.kneighbors([data_min[i]], self.neb, return_distance=False)
|
|
|
+ for i in range(len(data_min))
|
|
|
+ ]
|
|
|
+ bmbi = np.unique(np.array(bmbi).flatten())
|
|
|
+ bmbi = shuffle(bmbi)
|
|
|
+ return tf.convert_to_tensor(
|
|
|
+ data_maj[np.random.randint(len(data_maj), size=self.gen)]
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+ def _NMB_guided(self, data_min, index):
|
|
|
+
|
|
|
+ ## generate a minority neighbourhood batch for a particular minority sample
|
|
|
+ ## we need this for minority data generation
|
|
|
+ ## we will generate synthetic samples for each training data neighbourhood
|
|
|
+ ## index -> index of the minority sample in a training data whose neighbourhood we want to obtain
|
|
|
+ ## data_min -> minority class data
|
|
|
+ ## neb -> oversampling neighbourhood
|
|
|
+
|
|
|
+ neigh = NearestNeighbors(self.neb)
|
|
|
+ neigh.fit(data_min)
|
|
|
+ ind = index
|
|
|
+ nmbi = neigh.kneighbors([data_min[ind]], self.neb, return_distance=False)
|
|
|
+ nmbi = shuffle(nmbi)
|
|
|
+ nmb = data_min[nmbi]
|
|
|
+ nmb = tf.convert_to_tensor(nmb[0])
|
|
|
+ return (nmb)
|
|
|
|
|
|
def conv_sample_gen():
|
|
|
|
|
|
@@ -454,12 +444,12 @@ def convGAN_train_end_to_end(training_data,training_labels,test_data,test_labels
|
|
|
print('\n')
|
|
|
|
|
|
## instanciate discriminator network and visualize architecture
|
|
|
- maj_min_discriminator = self.maj_min_discriminator
|
|
|
+ maj_min_discriminator = gan.maj_min_discriminator
|
|
|
print(maj_min_discriminator.summary())
|
|
|
print('\n')
|
|
|
|
|
|
## instanciate network and visualize architecture
|
|
|
- cg = self.cg
|
|
|
+ cg = gan.cg
|
|
|
print(cg.summary())
|
|
|
print('\n')
|
|
|
|
|
|
@@ -484,6 +474,14 @@ def convGAN_train_end_to_end(training_data,training_labels,test_data,test_labels
|
|
|
return ((c_r,f_r,pr_r,rc_r,k_r),(c,f,pr,rc,k))
|
|
|
|
|
|
|
|
|
+def unison_shuffled_copies(a, b,seed_perm):
|
|
|
+ 'Shuffling the feature matrix along with the labels with same order'
|
|
|
+ np.random.seed(seed_perm)##change seed 1,2,3,4,5
|
|
|
+ assert len(a) == len(b)
|
|
|
+ p = np.random.permutation(len(a))
|
|
|
+ return a[p], b[p]
|
|
|
+
|
|
|
+
|
|
|
## specify parameters
|
|
|
|
|
|
neb=gen=5 ##neb=gen required
|