|
@@ -5,9 +5,6 @@ in generating synthetic samples for datasets with a minority class.
|
|
|
|
|
|
|
|
|
|
|
|
|
import numpy as np
|
|
import numpy as np
|
|
|
-import pandas as pd
|
|
|
|
|
-
|
|
|
|
|
-import seaborn as sns
|
|
|
|
|
from sklearn.decomposition import PCA
|
|
from sklearn.decomposition import PCA
|
|
|
from sklearn.preprocessing import StandardScaler
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.pyplot as plt
|
|
@@ -205,33 +202,50 @@ class Exercise:
|
|
|
return avgResults
|
|
return avgResults
|
|
|
|
|
|
|
|
|
|
|
|
|
-def plotCloud(data0, data1, dataNew):
|
|
|
|
|
|
|
+def plotCloud(data0, data1, dataNew=None, outputFile=None, title=""):
|
|
|
"""
|
|
"""
|
|
|
Does a PCA analysis of the given data and plot the both important axis.
|
|
Does a PCA analysis of the given data and plot the both important axis.
|
|
|
"""
|
|
"""
|
|
|
|
|
+
|
|
|
# Normalizes the data.
|
|
# Normalizes the data.
|
|
|
- data_t = StandardScaler().fit_transform(np.concatenate([data0, data1, dataNew]))
|
|
|
|
|
|
|
+ if dataNew is None:
|
|
|
|
|
+ data_t = StandardScaler().fit_transform(np.concatenate([data0, data1]))
|
|
|
|
|
+ else:
|
|
|
|
|
+ data_t = StandardScaler().fit_transform(np.concatenate([data0, data1, dataNew]))
|
|
|
|
|
+
|
|
|
|
|
|
|
|
# Run the PCA analysis.
|
|
# Run the PCA analysis.
|
|
|
pca = PCA(n_components=2)
|
|
pca = PCA(n_components=2)
|
|
|
pc = pca.fit_transform(data_t)
|
|
pc = pca.fit_transform(data_t)
|
|
|
|
|
|
|
|
- # Create a DataFrame for plotting.
|
|
|
|
|
- result = pd.DataFrame(data=pc, columns=['PCA0', 'PCA1'])
|
|
|
|
|
- result['Cluster'] = np.concatenate([
|
|
|
|
|
- np.zeros(len(data0)),
|
|
|
|
|
- np.zeros(len(data1)) + 1,
|
|
|
|
|
- np.zeros(len(dataNew)) + 2
|
|
|
|
|
- ])
|
|
|
|
|
-
|
|
|
|
|
- # Plot the analysis results.
|
|
|
|
|
- sns.set( font_scale=1.2)
|
|
|
|
|
- sns.lmplot( x="PCA0", y="PCA1",
|
|
|
|
|
- data=result,
|
|
|
|
|
- fit_reg=False,
|
|
|
|
|
- hue='Cluster', # color by cluster
|
|
|
|
|
- legend=False,
|
|
|
|
|
- scatter_kws={"s": 3}, palette="Set1") # specify the point size
|
|
|
|
|
-
|
|
|
|
|
- plt.legend(title='', loc='upper left', labels=['0', '1', '2'])
|
|
|
|
|
|
|
+ fig, ax = plt.subplots(sharex=True, sharey=True)
|
|
|
|
|
+ fig.set_dpi(600)
|
|
|
|
|
+ fig.set_figwidth(10)
|
|
|
|
|
+ fig.set_figheight(10)
|
|
|
|
|
+ fig.set_facecolor("white")
|
|
|
|
|
+ ax.set_title(title)
|
|
|
|
|
+
|
|
|
|
|
+ def doSubplot(m, n, c):
|
|
|
|
|
+ pca0 = [x[0] for x in pc[m : m + n]]
|
|
|
|
|
+ pca1 = [x[1] for x in pc[m : m + n]]
|
|
|
|
|
+ s = ax.scatter(pca0, pca1, c=c)
|
|
|
|
|
+
|
|
|
|
|
+ m = 0
|
|
|
|
|
+ n = len(data0)
|
|
|
|
|
+ doSubplot(m, n, "gray")
|
|
|
|
|
+
|
|
|
|
|
+ m += n
|
|
|
|
|
+ n = len(data1)
|
|
|
|
|
+ doSubplot(m, n, "red")
|
|
|
|
|
+
|
|
|
|
|
+ if dataNew is not None:
|
|
|
|
|
+ m += n
|
|
|
|
|
+ n = len(dataNew)
|
|
|
|
|
+ doSubplot(m, n, "blue")
|
|
|
|
|
+
|
|
|
|
|
+ ax.legend(title="", loc='upper left', labels=['majority', 'minority', 'synthetic minority'])
|
|
|
|
|
+ ax.set_xlabel("PCA0")
|
|
|
|
|
+ ax.set_ylabel("PCA1")
|
|
|
plt.show()
|
|
plt.show()
|
|
|
|
|
+ if outputFile is not None:
|
|
|
|
|
+ fig.savefig(outputFile)
|