3 lat temu · 1dee8f0aaf
--- a/XConvGeN-Example.ipynb
+++ b/XConvGeN-Example.ipynb
--- a/library/generators/XConvGeN.py
+++ b/library/generators/XConvGeN.py
@@ -172,20 +172,21 @@ class XConvGeN(GanBaseClass):
 
															             .from_generator(neighborhoodGenerator, output_types=tf.float32)
														
 
															             .repeat()
														
 
															             )
														
 
															-        batch = neighborhoods.take(runs * self.minSetSize).batch(32)
														
 
															+        batch = neighborhoods.take(runs * self.minSetSize)
														
 
															-        synth_batch = self.conv_sample_generator.predict(batch)
														
 
															+        synth_batch = self.conv_sample_generator.predict(batch.batch(32), verbose=0)
														
 
															-        n = 0
														
 
															-        synth_set = []
														
 
															-        for (x,y) in zip(neighborhoods, synth_batch):
														
 
															-            synth_set.extend(self.correct_feature_types(x.numpy(), y))
														
 
															-            n += len(y)
														
 
															-            if n >= numOfSamples:
														
 
															-                break
														
 
															+        pairs = tf.data.Dataset.zip(
														
 
															+            ( batch
														
 
															+            , tf.data.Dataset.from_tensor_slices(synth_batch)
														
 
															+            ))
														
 
															+
														
 
															+        corrected = pairs.map(self.correct_feature_types())
														
 
															         ## extract the exact number of synthetic samples needed to exactly balance the two classes
														
 
															-        return np.array(synth_set[:numOfSamples])
														
 
															+        r = np.concatenate(np.array(list(corrected.take(1 + (numOfSamples // self.gen)))), axis=0)[:numOfSamples]
														
 
															+
														
 
															+        return r
														
 
															     def predictReal(self, data):
														
 
															         """
														
@@ -370,7 +371,11 @@ class XConvGeN(GanBaseClass):
 
															         nLabels = 2 * self.gen
														
 
															         labels = np.array(create01Labels(nLabels, self.gen))
														
 
															         labelsGeN = np.array([labels])
														
 
															-        
														
 
															+
														
 
															+        def getNeighborhoods():
														
 
															+            for index in range(self.minSetSize):
														
 
															+                yield indexToBatches(index)
														
 
															+
														
 
															         def indexToBatches(min_idx):
														
 
															             self.timing["NBH"].start()
														
 
															             ## generate minority neighbourhood batch for every minority class sampls by index
														
@@ -383,33 +388,6 @@ class XConvGeN(GanBaseClass):
 
															             return (min_batch, maj_batch)
														
 
															-        def createSamples(min_idx):
														
 
															-            min_batch, maj_batch = indexToBatches(min_idx)
														
 
															-
														
 
															-            self.timing["GenSamples"].start()
														
 
															-            ## generate synthetic samples from convex space
														
 
															-            ## of minority neighbourhood batch using generator
														
 
															-            conv_samples = generator.predict(np.array([min_batch]), batch_size=self.neb, verbose=0)
														
 
															-            conv_samples = tf.reshape(conv_samples, shape=(self.gen, self.n_feat))
														
 
															-            self.timing["GenSamples"].stop()
														
 
															-
														
 
															-            self.timing["FixType"].start()
														
 
															-            ## Fix feature types
														
 
															-            conv_samples = self.correct_feature_types(min_batch.numpy(), conv_samples)
														
 
															-            self.timing["FixType"].stop()
														
 
															-
														
 
															-            ## concatenate them with the majority batch
														
 
															-            conv_samples = [conv_samples, maj_batch]
														
 
															-            return conv_samples
														
 
															-
														
 
															-        def genSamplesForDisc():
														
 
															-            for min_idx in range(minSetSize):
														
 
															-                yield createSamples(min_idx)
														
 
															-
														
 
															-        def genSamplesForGeN():
														
 
															-            for min_idx in range(minSetSize):
														
 
															-                yield indexToBatches(min_idx)
														
 
															-
														
 
															         def unbatch(rows):
														
 
															             def fn():
														
 
															                 for row in rows:
														
@@ -432,8 +410,19 @@ class XConvGeN(GanBaseClass):
 
															             ## Training of the discriminator.
														
 
															             #
														
 
															             # Get all neighborhoods and synthetic points as data stream.
														
 
															-            a = tf.data.Dataset.from_generator(genSamplesForDisc, output_types=tf.float32).repeat().take(discTrainCount * self.minSetSize)
														
 
															-            a = tf.data.Dataset.from_generator(unbatch(a), output_types=tf.float32)
														
 
															+            nbhPairs = tf.data.Dataset.from_generator(getNeighborhoods, output_types=tf.float32).repeat().take(discTrainCount * self.minSetSize)
														
 
															+            nbhMin = nbhPairs.map(lambda x: x[0])
														
 
															+            batchMaj = nbhPairs.map(lambda x: x[1])
														
 
															+
														
 
															+            fnCt = self.correct_feature_types()
														
 
															+            synth_batch = self.conv_sample_generator.predict(nbhMin.batch(32), verbose=0)
														
 
															+            pairMinMaj = tf.data.Dataset.zip(
														
 
															+                ( nbhMin
														
 
															+                , tf.data.Dataset.from_tensor_slices(synth_batch)
														
 
															+                , batchMaj
														
 
															+                )).map(lambda x, y, z: [fnCt(x,y), z])
														
 
															+            
														
 
															+            a = tf.data.Dataset.from_generator(unbatch(pairMinMaj), output_types=tf.float32)
														
 
															             # Get all labels as data stream.
														
 
															             b = tf.data.Dataset.from_tensor_slices(labels).repeat()
														
@@ -453,7 +442,7 @@ class XConvGeN(GanBaseClass):
 
															             #
														
 
															             # Get all neighborhoods as data stream.
														
 
															             a = (tf.data.Dataset
														
 
															-                .from_generator(genSamplesForGeN, output_types=tf.float32)
														
 
															+                .from_generator(getNeighborhoods, output_types=tf.float32)
														
 
															                 .map(lambda x: [[tf.concat([x[0], padd], axis=0), x[1]]]))
														
 
															             # Get all labels as data stream.
														
@@ -519,33 +508,58 @@ class XConvGeN(GanBaseClass):
 
															         s = [bar(v) for v in x]
														
 
															         print(f"[{s[0]}] [{s[1]}] [{s[2]}]", end="\r")
														
 
															-    def correct_feature_types(self, batch, synth_batch):
														
 
															+    def correct_feature_types(self):
														
 
															+        # batch[0] = original points (gen x n_feat)
														
 
															+        # batch[1] = synthetic points (gen x n_feat)
														
 
															+        
														
 
															+        @tf.function
														
 
															+        def voidFunction(reference, synth):
														
 
															+            return synth
														
 
															+    
														
 
															         if self.fdc is None:
														
 
															-            return synth_batch
														
 
															+            return voidFunction
														
 
															-        def bestMatchOf(referenceValues, value):
														
 
															-            if referenceValues is not None:
														
 
															-                best = referenceValues[0]
														
 
															-                d = abs(best - value)
														
 
															-                for x in referenceValues:
														
 
															-                    dx = abs(x - value)
														
 
															-                    if dx < d:
														
 
															-                        best = x
														
 
															-                        d = dx
														
 
															-                return best
														
 
															-            else:
														
 
															-                return value
														
 
															+        columns = set(self.fdc.nom_list or [])
														
 
															+        for y in (self.fdc.ord_list or []):
														
 
															+            columns.add(y)
														
 
															+        columns = list(columns)
														
 
															+        
														
 
															+        if len(columns) == 0:
														
 
															+            return voidFunction
														
 
															-        def correctVector(referenceLists, v):
														
 
															-            return np.array([bestMatchOf(referenceLists[i], v[i]) for i in range(len(v))])
														
 
															+        neb = self.neb
														
 
															+        n_feat = self.n_feat
														
 
															+        nn = tf.constant([(1.0 if x in columns else 0.0) for x in range(n_feat)])
														
 
															+        if n_feat is None:
														
 
															+            print("ERRROR n_feat is None")
														
 
															+
														
 
															+        if nn is None:
														
 
															+            print("ERRROR nn is None")
														
 
															+
														
 
															+        @tf.function
														
 
															+        def bestMatchOf(vi):
														
 
															+            value = vi[0]
														
 
															+            c = vi[1][0]
														
 
															+            r = vi[2]
														
 
															+            if c != 0.0:
														
 
															+                d = tf.abs(value - r)
														
 
															+                return r[tf.math.argmin(d)]
														
 
															+            else:
														
 
															+                return value[0]
														
 
															-        referenceLists = [None for _ in range(self.n_feat)]
														
 
															-        for i in (self.fdc.nom_list or []):
														
 
															-            referenceLists[i] = list(set(list(batch[:, i])))
														
 
															-
														
 
															-        for i in (self.fdc.ord_list or []):
														
 
															-            referenceLists[i] = list(set(list(batch[:, i])))
														
 
															-
														
 
															-        # print(batch.shape, synth_batch.shape)
														
 
															-
														
 
															-        return Lambda(lambda x: np.array([correctVector(referenceLists, y) for y in x]))(synth_batch)
														
 
															+        @tf.function
														
 
															+        def indexted(v, rt):
														
 
															+            vv = tf.reshape(tf.repeat([v], neb, axis=1), (n_feat, neb))
														
 
															+            vn = tf.reshape(tf.repeat([nn], neb, axis=1), (n_feat, neb))
														
 
															+            return tf.stack((vv, vn, rt), axis=1)
														
 
															+        
														
 
															+        @tf.function
														
 
															+        def correctVector(v, rt):
														
 
															+            return tf.map_fn(lambda x: bestMatchOf(x), indexted(v, rt))
														
 
															+
														
 
															+        @tf.function
														
 
															+        def fn(reference, synth):
														
 
															+            rt = tf.transpose(reference)
														
 
															+            return tf.map_fn(lambda x: correctVector(x, rt), synth)
														
 
															+        
														
 
															+        return fn