|
@@ -25,6 +25,11 @@ import tensorflow as tf
|
|
|
from tensorflow.keras.optimizers import Adam
|
|
from tensorflow.keras.optimizers import Adam
|
|
|
from tensorflow.keras.layers import Lambda
|
|
from tensorflow.keras.layers import Lambda
|
|
|
|
|
|
|
|
|
|
+import time
|
|
|
|
|
+
|
|
|
|
|
+from library.NNSearch import NNSearch
|
|
|
|
|
+from library.timing import timing
|
|
|
|
|
+
|
|
|
import warnings
|
|
import warnings
|
|
|
warnings.filterwarnings("ignore")
|
|
warnings.filterwarnings("ignore")
|
|
|
|
|
|
|
@@ -55,6 +60,11 @@ class ConvGAN2(GanBaseClass):
|
|
|
self.conv_sample_generator = None
|
|
self.conv_sample_generator = None
|
|
|
self.maj_min_discriminator = None
|
|
self.maj_min_discriminator = None
|
|
|
self.cg = None
|
|
self.cg = None
|
|
|
|
|
+ self.tNbhFit = 0.0
|
|
|
|
|
+ self.tNbhSearch = 0.0
|
|
|
|
|
+ self.nNbhFit = 0
|
|
|
|
|
+ self.nNbhSearch = 0
|
|
|
|
|
+ self.timing = { name: timing(name) for name in ["reset", "train", "create points", "NMB", "BMB", "_generate_data_for_min_point","predict"]}
|
|
|
|
|
|
|
|
if neb > gen:
|
|
if neb > gen:
|
|
|
raise ValueError(f"Expected neb <= gen but got neb={neb} and gen={gen}.")
|
|
raise ValueError(f"Expected neb <= gen but got neb={neb} and gen={gen}.")
|
|
@@ -63,6 +73,7 @@ class ConvGAN2(GanBaseClass):
|
|
|
"""
|
|
"""
|
|
|
Resets the trained GAN to an random state.
|
|
Resets the trained GAN to an random state.
|
|
|
"""
|
|
"""
|
|
|
|
|
+ self.timing["reset"].start()
|
|
|
self.isTrained = False
|
|
self.isTrained = False
|
|
|
## instanciate generator network and visualize architecture
|
|
## instanciate generator network and visualize architecture
|
|
|
self.conv_sample_generator = self._conv_sample_gen()
|
|
self.conv_sample_generator = self._conv_sample_gen()
|
|
@@ -72,6 +83,7 @@ class ConvGAN2(GanBaseClass):
|
|
|
|
|
|
|
|
## instanciate network and visualize architecture
|
|
## instanciate network and visualize architecture
|
|
|
self.cg = self._convGAN(self.conv_sample_generator, self.maj_min_discriminator)
|
|
self.cg = self._convGAN(self.conv_sample_generator, self.maj_min_discriminator)
|
|
|
|
|
+ self.timing["reset"].stop()
|
|
|
|
|
|
|
|
if self.debug:
|
|
if self.debug:
|
|
|
print(self.conv_sample_generator.summary())
|
|
print(self.conv_sample_generator.summary())
|
|
@@ -92,12 +104,14 @@ class ConvGAN2(GanBaseClass):
|
|
|
*dataSet* is a instance of /library.dataset.DataSet/. It contains the training dataset.
|
|
*dataSet* is a instance of /library.dataset.DataSet/. It contains the training dataset.
|
|
|
We are only interested in the first *maxListSize* points in class 1.
|
|
We are only interested in the first *maxListSize* points in class 1.
|
|
|
"""
|
|
"""
|
|
|
|
|
+ self.timing["train"].start()
|
|
|
if dataSet.data1.shape[0] <= 0:
|
|
if dataSet.data1.shape[0] <= 0:
|
|
|
raise AttributeError("Train: Expected data class 1 to contain at least one point.")
|
|
raise AttributeError("Train: Expected data class 1 to contain at least one point.")
|
|
|
|
|
|
|
|
self.dataSet = dataSet
|
|
self.dataSet = dataSet
|
|
|
self._rough_learning(dataSet.data1, dataSet.data0)
|
|
self._rough_learning(dataSet.data1, dataSet.data0)
|
|
|
self.isTrained = True
|
|
self.isTrained = True
|
|
|
|
|
+ self.timing["train"].stop()
|
|
|
|
|
|
|
|
def generateDataPoint(self):
|
|
def generateDataPoint(self):
|
|
|
"""
|
|
"""
|
|
@@ -112,6 +126,7 @@ class ConvGAN2(GanBaseClass):
|
|
|
|
|
|
|
|
*numOfSamples* is a integer > 0. It gives the number of new generated samples.
|
|
*numOfSamples* is a integer > 0. It gives the number of new generated samples.
|
|
|
"""
|
|
"""
|
|
|
|
|
+ self.timing["create points"].start()
|
|
|
if not self.isTrained:
|
|
if not self.isTrained:
|
|
|
raise ValueError("Try to generate data with untrained Re.")
|
|
raise ValueError("Try to generate data with untrained Re.")
|
|
|
|
|
|
|
@@ -122,12 +137,14 @@ class ConvGAN2(GanBaseClass):
|
|
|
|
|
|
|
|
## generate synth_num synthetic samples from each minority neighbourhood
|
|
## generate synth_num synthetic samples from each minority neighbourhood
|
|
|
synth_set=[]
|
|
synth_set=[]
|
|
|
|
|
+ nmb = self._NMB_prepare(data_min)
|
|
|
for i in range(len(data_min)):
|
|
for i in range(len(data_min)):
|
|
|
- synth_set.extend(self._generate_data_for_min_point(data_min, i, synth_num))
|
|
|
|
|
|
|
+ synth_set.extend(self._generate_data_for_min_point(nmb, i, synth_num))
|
|
|
|
|
|
|
|
- synth_set = synth_set[:numOfSamples] ## extract the exact number of synthetic samples needed to exactly balance the two classes
|
|
|
|
|
|
|
+ synth_set = np.array(synth_set[:numOfSamples]) ## extract the exact number of synthetic samples needed to exactly balance the two classes
|
|
|
|
|
+ self.timing["create points"].stop()
|
|
|
|
|
|
|
|
- return np.array(synth_set)
|
|
|
|
|
|
|
+ return synth_set
|
|
|
|
|
|
|
|
# ###############################################################
|
|
# ###############################################################
|
|
|
# Hidden internal functions
|
|
# Hidden internal functions
|
|
@@ -249,7 +266,7 @@ class ConvGAN2(GanBaseClass):
|
|
|
return model
|
|
return model
|
|
|
|
|
|
|
|
# Create synthetic points
|
|
# Create synthetic points
|
|
|
- def _generate_data_for_min_point(self, data_min, index, synth_num):
|
|
|
|
|
|
|
+ def _generate_data_for_min_point(self, nmb, index, synth_num):
|
|
|
"""
|
|
"""
|
|
|
generate synth_num synthetic points for a particular minoity sample
|
|
generate synth_num synthetic points for a particular minoity sample
|
|
|
synth_num -> required number of data points that can be generated from a neighbourhood
|
|
synth_num -> required number of data points that can be generated from a neighbourhood
|
|
@@ -258,13 +275,19 @@ class ConvGAN2(GanBaseClass):
|
|
|
index -> index of the minority sample in a training data whose neighbourhood we want to obtain
|
|
index -> index of the minority sample in a training data whose neighbourhood we want to obtain
|
|
|
"""
|
|
"""
|
|
|
|
|
|
|
|
|
|
+ self.timing["_generate_data_for_min_point"].start()
|
|
|
runs = int(synth_num / self.neb) + 1
|
|
runs = int(synth_num / self.neb) + 1
|
|
|
synth_set = []
|
|
synth_set = []
|
|
|
for _run in range(runs):
|
|
for _run in range(runs):
|
|
|
- batch = self._NMB_guided(data_min, index)
|
|
|
|
|
|
|
+ batch = self._NMB_guided(nmb, index)
|
|
|
|
|
+ self.timing["predict"].start()
|
|
|
synth_batch = self.conv_sample_generator.predict(batch)
|
|
synth_batch = self.conv_sample_generator.predict(batch)
|
|
|
- for x in synth_batch:
|
|
|
|
|
- synth_set.append(x)
|
|
|
|
|
|
|
+ self.timing["predict"].stop()
|
|
|
|
|
+ synth_set.extend(synth_batch)
|
|
|
|
|
+ #for x in synth_batch:
|
|
|
|
|
+ # synth_set.append(x)
|
|
|
|
|
+
|
|
|
|
|
+ self.timing["_generate_data_for_min_point"].stop()
|
|
|
|
|
|
|
|
return synth_set[:synth_num]
|
|
return synth_set[:synth_num]
|
|
|
|
|
|
|
@@ -281,9 +304,10 @@ class ConvGAN2(GanBaseClass):
|
|
|
|
|
|
|
|
labels = tf.convert_to_tensor(create01Labels(2 * self.gen, self.gen))
|
|
labels = tf.convert_to_tensor(create01Labels(2 * self.gen, self.gen))
|
|
|
|
|
|
|
|
|
|
+ nmb = self._NMB_prepare(data_min)
|
|
|
for step in range(self.neb_epochs * len(data_min)):
|
|
for step in range(self.neb_epochs * len(data_min)):
|
|
|
## generate minority neighbourhood batch for every minority class sampls by index
|
|
## generate minority neighbourhood batch for every minority class sampls by index
|
|
|
- min_batch = self._NMB_guided(data_min, min_idx)
|
|
|
|
|
|
|
+ min_batch = self._NMB_guided(nmb, min_idx)
|
|
|
min_idx = min_idx + 1
|
|
min_idx = min_idx + 1
|
|
|
## generate random proximal majority batch
|
|
## generate random proximal majority batch
|
|
|
maj_batch = self._BMB(data_min, data_maj)
|
|
maj_batch = self._BMB(data_min, data_maj)
|
|
@@ -345,20 +369,26 @@ class ConvGAN2(GanBaseClass):
|
|
|
## neb -> oversampling neighbourhood
|
|
## neb -> oversampling neighbourhood
|
|
|
## gen -> convex combinations generated from each neighbourhood
|
|
## gen -> convex combinations generated from each neighbourhood
|
|
|
|
|
|
|
|
- #neigh = NearestNeighbors(self.neb)
|
|
|
|
|
- #neigh.fit(data_maj)
|
|
|
|
|
- # bmbi = [
|
|
|
|
|
- # neigh.kneighbors([data_min[i]], self.neb, return_distance=False)
|
|
|
|
|
- # for i in range(len(data_min))
|
|
|
|
|
- # ]
|
|
|
|
|
- # bmbi = np.unique(np.array(bmbi).flatten())
|
|
|
|
|
- # bmbi = shuffle(bmbi)
|
|
|
|
|
- return tf.convert_to_tensor(
|
|
|
|
|
|
|
+ self.timing["BMB"].start()
|
|
|
|
|
+ result = tf.convert_to_tensor(
|
|
|
data_maj[np.random.randint(len(data_maj), size=self.gen)]
|
|
data_maj[np.random.randint(len(data_maj), size=self.gen)]
|
|
|
)
|
|
)
|
|
|
|
|
+ self.timing["BMB"].stop()
|
|
|
|
|
+ return result
|
|
|
|
|
+
|
|
|
|
|
+ def _NMB_prepare(self, data_min):
|
|
|
|
|
+ self.timing["NMB"].start()
|
|
|
|
|
+ t = time.time()
|
|
|
|
|
+ neigh = NNSearch(self.neb)
|
|
|
|
|
+ #neigh = NearestNeighbors(self.neb)
|
|
|
|
|
+ neigh.fit(data_min)
|
|
|
|
|
+ self.tNbhFit += (time.time() - t)
|
|
|
|
|
+ self.nNbhFit += 1
|
|
|
|
|
+ self.timing["NMB"].stop()
|
|
|
|
|
+ return (data_min, neigh)
|
|
|
|
|
|
|
|
|
|
|
|
|
- def _NMB_guided(self, data_min, index):
|
|
|
|
|
|
|
+ def _NMB_guided(self, nmb, index):
|
|
|
|
|
|
|
|
## generate a minority neighbourhood batch for a particular minority sample
|
|
## generate a minority neighbourhood batch for a particular minority sample
|
|
|
## we need this for minority data generation
|
|
## we need this for minority data generation
|
|
@@ -366,13 +396,18 @@ class ConvGAN2(GanBaseClass):
|
|
|
## index -> index of the minority sample in a training data whose neighbourhood we want to obtain
|
|
## index -> index of the minority sample in a training data whose neighbourhood we want to obtain
|
|
|
## data_min -> minority class data
|
|
## data_min -> minority class data
|
|
|
## neb -> oversampling neighbourhood
|
|
## neb -> oversampling neighbourhood
|
|
|
-
|
|
|
|
|
- neigh = NearestNeighbors(self.neb)
|
|
|
|
|
- neigh.fit(data_min)
|
|
|
|
|
- nmbi = neigh.kneighbors([data_min[index]], self.neb, return_distance=False)
|
|
|
|
|
|
|
+ self.timing["NMB"].start()
|
|
|
|
|
+ (data_min, neigh) = nmb
|
|
|
|
|
+
|
|
|
|
|
+ t = time.time()
|
|
|
|
|
+ #nmbi = neigh.kneighbors([data_min[index]], self.neb, return_distance=False)
|
|
|
|
|
+ nmbi = np.array([neigh.neighbourhoodOfItem(index)])
|
|
|
|
|
+ self.tNbhSearch += (time.time() - t)
|
|
|
|
|
+ self.nNbhSearch += 1
|
|
|
nmbi = shuffle(nmbi)
|
|
nmbi = shuffle(nmbi)
|
|
|
nmb = data_min[nmbi]
|
|
nmb = data_min[nmbi]
|
|
|
nmb = tf.convert_to_tensor(nmb[0])
|
|
nmb = tf.convert_to_tensor(nmb[0])
|
|
|
|
|
+ self.timing["NMB"].stop()
|
|
|
return nmb
|
|
return nmb
|
|
|
|
|
|
|
|
|
|
|