il y a 4 ans · 76f2f4e2a5
--- a/library/NNSearch.py
+++ b/library/NNSearch.py
@@ -3,6 +3,7 @@ import math
 
				 import tensorflow as tf
			
 
				 import numpy as np
			
 
				 from sklearn.neighbors import NearestNeighbors
			
 
				+from sklearn.utils import shuffle
			
 
				 from library.timing import timing
			
 
				 
			
 
				 
			
@@ -11,6 +12,7 @@ class NNSearch:
 
				         self.nebSize = nebSize
			
 
				         self.neighbourhoods = []
			
 
				         self.timingDict = timingDict
			
 
				+        self.basePoints = []
			
 
				 
			
 
				 
			
 
				     def timerStart(self, name):
			
@@ -28,21 +30,50 @@ class NNSearch:
 
				     def neighbourhoodOfItem(self, i):
			
 
				         return self.neighbourhoods[i]
			
 
				 
			
 
				+    def getNbhPointsOfItem(self, index):
			
 
				+        return self.getPointsFromIndices(self.neighbourhoodOfItem(index))
			
 
				 
			
 
				-    def fit(self, X, nebSize=None):
			
 
				+    def getPointsFromIndices(self, indices):
			
 
				+        nmbi = shuffle(np.array([indices]))
			
 
				+        nmb = self.basePoints[nmbi]
			
 
				+        return tf.convert_to_tensor(nmb[0])
			
 
				+
			
 
				+    def neighbourhoodOfItemList(self, items, maxCount=None):
			
 
				+        nbhIndices = set()
			
 
				+        duplicates = []
			
 
				+        for i in items:
			
 
				+            for x in self.neighbourhoodOfItem(i):
			
 
				+                if x in nbhIndices:
			
 
				+                    duplicates.append(x)
			
 
				+                else:
			
 
				+                    nbhIndices.add(x)
			
 
				+
			
 
				+        nbhIndices = list(nbhIndices)
			
 
				+        if maxCount is not None:
			
 
				+            if len(nbhIndices) < maxCount:
			
 
				+                nbhIndices.extend(duplicates)
			
 
				+            nbhIndices = nbhIndices[0:maxCount]
			
 
				+
			
 
				+        return self.getPointsFromIndices(nbhIndices)
			
 
				+
			
 
				+
			
 
				+    def fit(self, haystack, needles=None, nebSize=None):
			
 
				         self.timerStart("NN_fit_chained_init")
			
 
				         if nebSize == None:
			
 
				             nebSize = self.nebSize
			
 
				 
			
 
				-        nPoints = len(X)
			
 
				-        nFeatures = len(X[0])
			
 
				+        if needles is None:
			
 
				+            needles = haystack
			
 
				+
			
 
				+        self.basePoints = haystack
			
 
				 
			
 
				         neigh = NearestNeighbors(n_neighbors=nebSize)
			
 
				-        neigh.fit(X)
			
 
				+        neigh.fit(haystack)
			
 
				         self.timerStop("NN_fit_chained_init")
			
 
				         self.timerStart("NN_fit_chained_toList")
			
 
				         self.neighbourhoods = [
			
 
				                 (neigh.kneighbors([x], nebSize, return_distance=False))[0]
			
 
				-                for (i, x) in enumerate(X)
			
 
				+                for (i, x) in enumerate(needles)
			
 
				                 ]
			
 
				         self.timerStop("NN_fit_chained_toList")
			
 
				+        return self
			
--- a/library/generators/convGAN.py
+++ b/library/generators/convGAN.py
@@ -45,7 +45,7 @@ class ConvGAN(GanBaseClass):
 
				     This is a toy example of a GAN.
			
 
				     It repeats the first point of the training-data-set.
			
 
				     """
			
 
				-    def __init__(self, n_feat, neb=5, gen=5, neb_epochs=10, debug=True):
			
 
				+    def __init__(self, n_feat, neb=5, gen=5, neb_epochs=10, withMajorhoodNbSearch=False, debug=False):
			
 
				         self.isTrained = False
			
 
				         self.n_feat = n_feat
			
 
				         self.neb = neb
			
@@ -56,6 +56,7 @@ class ConvGAN(GanBaseClass):
 
				         self.dataSet = None
			
 
				         self.conv_sample_generator = None
			
 
				         self.maj_min_discriminator = None
			
 
				+        self.withMajorhoodNbSearch = withMajorhoodNbSearch
			
 
				         self.cg = None
			
 
				 
			
 
				         if neb > gen:
			
@@ -98,7 +99,11 @@ class ConvGAN(GanBaseClass):
 
				             raise AttributeError("Train: Expected data class 1 to contain at least one point.")
			
 
				 
			
 
				         self.dataSet = dataSet
			
 
				-        self.nmb = self._NMB_prepare(dataSet.data1)
			
 
				+        self.nmbMin = NNSearch(self.neb).fit(haystack=dataSet.data1)
			
 
				+        if self.withMajorhoodNbSearch:
			
 
				+            self.nmbMaj = NNSearch(self.neb).fit(haystack=dataSet.data0, needles=dataSet.data1)
			
 
				+        else:
			
 
				+            self.nmbMaj = None
			
 
				         self._rough_learning(dataSet.data1, dataSet.data0)
			
 
				         self.isTrained = True
			
 
				 
			
@@ -265,7 +270,7 @@ class ConvGAN(GanBaseClass):
 
				         runs = int(synth_num / self.neb) + 1
			
 
				         synth_set = []
			
 
				         for _run in range(runs):
			
 
				-            batch = self._NMB_guided(index)
			
 
				+            batch = self.nmbMin.getNbhPointsOfItem(index)
			
 
				             synth_batch = self.conv_sample_generator.predict(batch)
			
 
				             synth_set.extend(synth_batch)
			
 
				 
			
@@ -286,10 +291,11 @@ class ConvGAN(GanBaseClass):
 
				 
			
 
				         for step in range(self.neb_epochs * len(data_min)):
			
 
				             ## generate minority neighbourhood batch for every minority class sampls by index
			
 
				-            min_batch = self._NMB_guided(min_idx)
			
 
				+            min_batch_indices = self.nmbMin.neighbourhoodOfItem(min_idx)
			
 
				+            min_batch = self.nmbMin.getPointsFromIndices(min_batch_indices)
			
 
				             min_idx = min_idx + 1
			
 
				             ## generate random proximal majority batch
			
 
				-            maj_batch = self._BMB(data_min, data_maj)
			
 
				+            maj_batch = self._BMB(data_maj, min_batch_indices)
			
 
				 
			
 
				             ## generate synthetic samples from convex space
			
 
				             ## of minority neighbourhood batch using generator
			
@@ -340,7 +346,7 @@ class ConvGAN(GanBaseClass):
 
				 
			
 
				 
			
 
				     ## convGAN
			
 
				-    def _BMB(self, data_min, data_maj):
			
 
				+    def _BMB(self, data_maj, min_idxs):
			
 
				 
			
 
				         ## Generate a borderline majority batch
			
 
				         ## data_min -> minority class data
			
@@ -348,29 +354,9 @@ class ConvGAN(GanBaseClass):
 
				         ## neb -> oversampling neighbourhood
			
 
				         ## gen -> convex combinations generated from each neighbourhood
			
 
				 
			
 
				-        return tf.convert_to_tensor(
			
 
				-            data_maj[np.random.randint(len(data_maj), size=self.gen)]
			
 
				-            )
			
 
				-
			
 
				-    def _NMB_prepare(self, data_min):
			
 
				-        neigh = NNSearch(self.neb)
			
 
				-        neigh.fit(data_min)
			
 
				-        return (data_min, neigh)
			
 
				-
			
 
				-
			
 
				-    def _NMB_guided(self, index):
			
 
				-
			
 
				-        ## generate a minority neighbourhood batch for a particular minority sample
			
 
				-        ## we need this for minority data generation
			
 
				-        ## we will generate synthetic samples for each training data neighbourhood
			
 
				-        ## index -> index of the minority sample in a training data whose neighbourhood we want to obtain
			
 
				-        ## data_min -> minority class data
			
 
				-        ## neb -> oversampling neighbourhood
			
 
				-        (data_min, neigh) = self.nmb
			
 
				-
			
 
				-        nmbi = np.array([neigh.neighbourhoodOfItem(index)])
			
 
				-        nmbi = shuffle(nmbi)
			
 
				-        nmb = data_min[nmbi]
			
 
				-        nmb = tf.convert_to_tensor(nmb[0])
			
 
				-        return nmb
			
 
				-
			
 
				+        if self.nmbMaj is not None:
			
 
				+            return self.nmbMaj.neighbourhoodOfItemList(min_idxs, maxCount=self.gen)
			
 
				+        else:
			
 
				+            return tf.convert_to_tensor(
			
 
				+                data_maj[np.random.randint(len(data_maj), size=self.gen)]
			
 
				+                )