3 yıl önce · 1358aecbc5
--- a/library/generators/NextConvGeN.py
+++ b/library/generators/NextConvGeN.py
@@ -30,11 +30,11 @@ def create01Labels(totalSize, sizeFirstHalf):
 
				     labels.extend(repeat(np.array([0,1]), totalSize - sizeFirstHalf))
			
 
				     return np.array(labels)
			
 
				 
			
 
				-class ConvGeN(GanBaseClass):
			
 
				+class NextConvGeN(GanBaseClass):
			
 
				     """
			
 
				     This is the ConvGeN class. ConvGeN is a synthetic point generator for imbalanced datasets.
			
 
				     """
			
 
				-    def __init__(self, n_feat, neb=5, gen=None, neb_epochs=10, maj_proximal=False, debug=False):
			
 
				+    def __init__(self, n_feat, neb=5, gen=None, neb_epochs=10, fdc=None, maj_proximal=False, debug=False):
			
 
				         self.isTrained = False
			
 
				         self.n_feat = n_feat
			
 
				         self.neb = neb
			
@@ -50,11 +50,12 @@ class ConvGeN(GanBaseClass):
 
				         self.maj_proximal = maj_proximal
			
 
				         self.cg = None
			
 
				         self.canPredict = True
			
 
				+        self.fdc = fdc
			
 
				 
			
 
				         if self.neb is not None and self.gen is not None and self.neb > self.gen:
			
 
				             raise ValueError(f"Expected neb <= gen but got neb={neb} and gen={gen}.")
			
 
				 
			
 
				-    def reset(self, dataSet):
			
 
				+    def reset(self, data):
			
 
				         """
			
 
				         Creates the network.
			
 
				 
			
@@ -64,8 +65,8 @@ class ConvGeN(GanBaseClass):
 
				         """
			
 
				         self.isTrained = False
			
 
				 
			
 
				-        if dataSet is not None:
			
 
				-            nMinoryPoints = dataSet.data1.shape[0]
			
 
				+        if data is not None:
			
 
				+            nMinoryPoints = data.shape[0]
			
 
				             if self.nebInitial is None:
			
 
				                 self.neb = nMinoryPoints
			
 
				             else:
			
@@ -96,7 +97,7 @@ class ConvGeN(GanBaseClass):
 
				             print(self.cg.summary())
			
 
				             print('\n')
			
 
				 
			
 
				-    def train(self, dataSet, discTrainCount=5):
			
 
				+    def train(self, data, discTrainCount=5):
			
 
				         """
			
 
				         Trains the Network.
			
 
				 
			
@@ -104,24 +105,24 @@ class ConvGeN(GanBaseClass):
 
				         
			
 
				         *discTrainCount* gives the number of extra training for the discriminator for each epoch. (>= 0)
			
 
				         """
			
 
				-        if dataSet.data1.shape[0] <= 0:
			
 
				+        if data.shape[0] <= 0:
			
 
				             raise AttributeError("Train: Expected data class 1 to contain at least one point.")
			
 
				 
			
 
				         # Store size of minority class. This is needed during point generation.
			
 
				-        self.minSetSize = dataSet.data1.shape[0]
			
 
				+        self.minSetSize = data.shape[0]
			
 
				 
			
 
				+        normalizedData = data
			
 
				+        if self.fdc is not None:
			
 
				+            normalizedData = self.fdc.normalize(data)
			
 
				+        
			
 
				         # Precalculate neighborhoods
			
 
				-        self.nmbMin = NNSearch(self.neb).fit(haystack=dataSet.data1)
			
 
				-        if self.maj_proximal:
			
 
				-            self.nmbMaj = NNSearch(self.neb).fit(haystack=dataSet.data0, needles=dataSet.data1)
			
 
				-        else:
			
 
				-            self.nmbMaj = None
			
 
				+        self.nmbMin = NNSearch(self.neb).fit(haystack=normalizedData)
			
 
				+        self.nmbMin.basePoints = data
			
 
				 
			
 
				         # Do the training.
			
 
				-        self._rough_learning(dataSet.data1, dataSet.data0, discTrainCount)
			
 
				+        self._rough_learning(data, discTrainCount)
			
 
				         
			
 
				         # Neighborhood in majority class is no longer needed. So save memory.
			
 
				-        self.nmbMaj = None
			
 
				         self.isTrained = True
			
 
				 
			
 
				     def generateDataPoint(self):
			
@@ -149,7 +150,10 @@ class ConvGeN(GanBaseClass):
 
				             synth_set.extend(self._generate_data_for_min_point(i, synth_num))
			
 
				 
			
 
				         ## extract the exact number of synthetic samples needed to exactly balance the two classes
			
 
				-        synth_set = np.array(synth_set[:numOfSamples]) 
			
 
				+        synth_set = np.array(synth_set[:numOfSamples])
			
 
				+        
			
 
				+        if fdc is not None:
			
 
				+            synth_set = fdc.fixPointsToDataset(synth_set)
			
 
				 
			
 
				         return synth_set
			
 
				 
			
@@ -302,13 +306,13 @@ class ConvGeN(GanBaseClass):
 
				 
			
 
				 
			
 
				     # Training
			
 
				-    def _rough_learning(self, data_min, data_maj, discTrainCount):
			
 
				+    def _rough_learning(self, data, discTrainCount):
			
 
				         generator = self.conv_sample_generator
			
 
				         discriminator = self.maj_min_discriminator
			
 
				         convGeN = self.cg
			
 
				         loss_history = [] ## this is for stroring the loss for every run
			
 
				         step = 0
			
 
				-        minSetSize = len(data_min)
			
 
				+        minSetSize = len(data)
			
 
				 
			
 
				         labels = tf.convert_to_tensor(create01Labels(2 * self.gen, self.gen))
			
 
				         nLabels = 2 * self.gen
			
@@ -321,7 +325,7 @@ class ConvGeN(GanBaseClass):
 
				                         min_batch_indices = shuffle(self.nmbMin.neighbourhoodOfItem(min_idx))
			
 
				                         min_batch = self.nmbMin.getPointsFromIndices(min_batch_indices)
			
 
				                         ## generate random proximal majority batch
			
 
				-                        maj_batch = self._BMB(data_maj, min_batch_indices)
			
 
				+                        maj_batch = self._BMB(min_batch_indices)
			
 
				 
			
 
				                         ## generate synthetic samples from convex space
			
 
				                         ## of minority neighbourhood batch using generator
			
@@ -342,7 +346,7 @@ class ConvGeN(GanBaseClass):
 
				                 min_batch = self.nmbMin.getPointsFromIndices(min_batch_indices)
			
 
				                 
			
 
				                 ## generate random proximal majority batch
			
 
				-                maj_batch = self._BMB(data_maj, min_batch_indices)
			
 
				+                maj_batch = self._BMB(min_batch_indices)
			
 
				 
			
 
				                 ## generate synthetic samples from convex space
			
 
				                 ## of minority neighbourhood batch using generator
			
@@ -389,17 +393,15 @@ class ConvGeN(GanBaseClass):
 
				         self.loss_history = loss_history
			
 
				 
			
 
				 
			
 
				-    def _BMB(self, data_maj, min_idxs):
			
 
				+    def _BMB(self, min_idxs):
			
 
				 
			
 
				         ## Generate a borderline majority batch
			
 
				         ## data_maj -> majority class data
			
 
				         ## min_idxs -> indices of points in minority class
			
 
				         ## gen -> convex combinations generated from each neighbourhood
			
 
				 
			
 
				-        if self.nmbMaj is not None:
			
 
				-            return self.nmbMaj.neighbourhoodOfItemList(shuffle(min_idxs), maxCount=self.gen)
			
 
				-        else:
			
 
				-            return tf.convert_to_tensor(data_maj[np.random.randint(len(data_maj), size=self.gen)])
			
 
				+        indices = [i for i in range(self.minSetSize) if i not in min_idxs]
			
 
				+        return self.nmbMin.neighbourhoodOfItemList(shuffle(indices), maxCount=self.gen)
			
 
				 
			
 
				 
			
 
				     def retrainDiscriminitor(self, data, labels):