Explorar o código

Merged new submodules from master branch.

Kristian Schultz %!s(int64=4) %!d(string=hai) anos
pai
achega
d8fed6d73e

+ 6 - 0
.gitmodules

@@ -0,0 +1,6 @@
+[submodule "external/ProWRAS"]
+	path = external/ProWRAS
+	url = https://github.com/COSPOV/ProWRAS.git
+[submodule "external/LoRAS"]
+	path = external/LoRAS
+	url = https://github.com/COSPOV/LoRAS.git

A diferenza do arquivo foi suprimida porque é demasiado grande
+ 57 - 17
Statistics.ipynb


A diferenza do arquivo foi suprimida porque é demasiado grande
+ 190 - 0
SyntheticPointDistribution.ipynb


+ 1 - 0
external/LoRAS

@@ -0,0 +1 @@
+Subproject commit d11d6f301200a94e9157c2bfd6ac7200c369af3b

+ 1 - 0
external/ProWRAS

@@ -0,0 +1 @@
+Subproject commit 3f8c72485c78b020acab54681f3dcd57f8ea4761

BIN=BIN
images/example_ProWRAS.pdf


BIN=BIN
images/example_basisData.pdf


BIN=BIN
images/example_convGAN.pdf


BIN=BIN
images/example_folding_car_good_ProWRAS.pdf


BIN=BIN
images/example_folding_car_good_basisData.pdf


BIN=BIN
images/example_folding_car_good_convGAN.pdf


BIN=BIN
images/example_folding_car_good_simpleGAN.pdf


BIN=BIN
images/example_folding_yeast4_ProWRAS.pdf


BIN=BIN
images/example_folding_yeast4_basisData.pdf


BIN=BIN
images/example_folding_yeast4_convGAN.pdf


BIN=BIN
images/example_folding_yeast4_simpleGAN.pdf


BIN=BIN
images/example_imblearn_ozone_level_ProWRAS.pdf


BIN=BIN
images/example_imblearn_ozone_level_basisData.pdf


BIN=BIN
images/example_imblearn_ozone_level_convGAN.pdf


BIN=BIN
images/example_imblearn_ozone_level_simpleGAN.pdf


BIN=BIN
images/example_simpleGAN.pdf


BIN=BIN
images/example_x_folding_car_good_ProWRAS.pdf


BIN=BIN
images/example_x_folding_car_good_basisData.pdf


BIN=BIN
images/example_x_folding_car_good_simpleGAN.pdf


+ 82 - 0
library/LoRAS_ProWRAS.py

@@ -0,0 +1,82 @@
+from library.ext_prowras import ProWRAS_gen
+from library.interfaces import GanBaseClass
+
+
+class ProWRAS(GanBaseClass):
+    """
+    This is a toy example of a GAN.
+    It repeats the first point of the training-data-set.
+    """
+
+    def __init__(self
+        , max_levels = 5
+        , convex_nbd = 5
+        , n_neighbors = 5
+        , max_concov = None
+        , theta = 1.0
+        , shadow = 100
+        , sigma = 0.000001
+        , n_jobs = 1
+        , debug = False
+        ):
+        """
+        Initializes the class and mark it as untrained.
+        """
+        self.data = None
+        self.max_levels = max_levels
+        self.convex_nbd = convex_nbd
+        self.n_neighbors = n_neighbors
+        self.max_concov = max_concov
+        self.theta = theta
+        self.shadow = shadow
+        self.sigma = sigma
+        self.n_jobs = n_jobs
+        self.debug = debug
+
+    def reset(self):
+        """
+        Resets the trained GAN to an random state.
+        """
+        pass
+
+    def train(self, dataSet):
+        """
+        Trains the GAN.
+
+        It stores the first data-point in the training data-set and mark the GAN as trained.
+
+        *dataSet* is a instance of /library.dataset.DataSet/. It contains the training dataset.
+        We are only interested in the class 1.
+        """
+        self.data = dataSet
+
+    def generateDataPoint(self):
+        """
+        Generates one synthetic data-point by copying the stored data point.
+        """
+        return self.generateData(1)[0]
+
+    def generateData(self, numOfSamples=1):
+        """
+        Generates a list of synthetic data-points.
+
+        *numOfSamples* is a integer > 0. It gives the number of new generated samples.
+        """
+        if self.max_concov is not None:
+            max_concov = self.max_concov
+        else:
+            max_concov = self.data.data.shape[0]
+
+        return ProWRAS_gen(
+            data = self.data.data,
+            labels = self.data.labels,
+            max_levels = self.max_levels,
+            convex_nbd = self.convex_nbd,
+            n_neighbors = self.n_neighbors,
+            max_concov = max_concov,
+            num_samples_to_generate = numOfSamples,
+            theta = self.theta,
+            shadow = self.shadow,
+            sigma = self.sigma,
+            n_jobs = self.n_jobs,
+            enableDebug = self.debug)[0][:numOfSamples]

+ 1 - 0
library/analysis.py

@@ -228,6 +228,7 @@ testSets = [
     "imblearn_webpage",
     "imblearn_mammography",
     "imblearn_protein_homo",
+    "imblearn_ozone_level",
     "kaggle_creditcard"
     ]
 

+ 37 - 23
library/exercise.py

@@ -5,9 +5,6 @@ in generating synthetic samples for datasets with a minority class.
 
 
 import numpy as np
-import pandas as pd
-
-import seaborn as sns
 from sklearn.decomposition import PCA
 from sklearn.preprocessing import StandardScaler
 import matplotlib.pyplot as plt
@@ -205,33 +202,50 @@ class Exercise:
         return avgResults
 
 
-def plotCloud(data0, data1, dataNew):
+def plotCloud(data0, data1, dataNew=None, outputFile=None, title=""):
     """
     Does a PCA analysis of the given data and plot the both important axis.
     """
+
     # Normalizes the data.
-    data_t = StandardScaler().fit_transform(np.concatenate([data0, data1, dataNew]))
+    if dataNew is None:
+        data_t = StandardScaler().fit_transform(np.concatenate([data0, data1]))
+    else:
+        data_t = StandardScaler().fit_transform(np.concatenate([data0, data1, dataNew]))
+
 
     # Run the PCA analysis.
     pca = PCA(n_components=2)
     pc = pca.fit_transform(data_t)
 
-    # Create a DataFrame for plotting.
-    result = pd.DataFrame(data=pc, columns=['PCA0', 'PCA1'])
-    result['Cluster'] = np.concatenate([
-        np.zeros(len(data0)),
-        np.zeros(len(data1)) + 1,
-        np.zeros(len(dataNew)) + 2
-        ])
-
-    # Plot the analysis results.
-    sns.set( font_scale=1.2)
-    sns.lmplot( x="PCA0", y="PCA1",
-      data=result,
-      fit_reg=False,
-      hue='Cluster', # color by cluster
-      legend=False,
-      scatter_kws={"s": 3}, palette="Set1") # specify the point size
-
-    plt.legend(title='', loc='upper left', labels=['0', '1', '2'])
+    fig, ax = plt.subplots(sharex=True, sharey=True)
+    fig.set_dpi(600)
+    fig.set_figwidth(10)
+    fig.set_figheight(10)
+    fig.set_facecolor("white")
+    ax.set_title(title)
+
+    def doSubplot(m, n, c):
+        pca0 = [x[0] for x in pc[m : m + n]]
+        pca1 = [x[1] for x in pc[m : m + n]]
+        s = ax.scatter(pca0, pca1, c=c)
+
+    m = 0
+    n = len(data0)
+    doSubplot(m, n, "gray")
+    
+    m += n
+    n = len(data1)
+    doSubplot(m, n, "red")
+
+    if dataNew is not None:
+        m += n
+        n = len(dataNew)
+        doSubplot(m, n, "blue")
+
+    ax.legend(title="", loc='upper left', labels=['majority', 'minority', 'synthetic minority'])
+    ax.set_xlabel("PCA0")
+    ax.set_ylabel("PCA1")
     plt.show()
+    if outputFile is not None:
+        fig.savefig(outputFile)

+ 1 - 0
library/ext_prowras.py

@@ -0,0 +1 @@
+../external/ProWRAS/Library/prowras/prowras.py

A diferenza do arquivo foi suprimida porque é demasiado grande
+ 8 - 11
run_all_with_convGan.ipynb


Algúns arquivos non se mostraron porque demasiados arquivos cambiaron neste cambio