|
|
@@ -3,6 +3,8 @@ import numpy as np
|
|
|
from numba import jit
|
|
|
import umap.umap_ as umap
|
|
|
|
|
|
+from fdc.tools import Timing
|
|
|
+
|
|
|
# --[ Known to be used but can we avoid it? ]----
|
|
|
import pandas as pd
|
|
|
from fdc.visualize import plotMapping
|
|
|
@@ -107,6 +109,8 @@ class FDC:
|
|
|
visual=None
|
|
|
):
|
|
|
|
|
|
+ timing = Timing("FDC.normalize")
|
|
|
+
|
|
|
# Take instance value if parameter was not given.
|
|
|
visual = value(visual, self.visual)
|
|
|
with_2d_embedding = value(with_2d_embedding, self.with_2d_embedding)
|
|
|
@@ -115,6 +119,8 @@ class FDC:
|
|
|
np.random.seed(42)
|
|
|
concat_column_names = []
|
|
|
concat_lists = []
|
|
|
+
|
|
|
+ timing.step("init")
|
|
|
|
|
|
# Reducing features into 2dim or 1dim
|
|
|
actions = [
|
|
|
@@ -129,12 +135,14 @@ class FDC:
|
|
|
concat_lists.append(emb)
|
|
|
for n in range(emb.shape[1]):
|
|
|
concat_column_names.append(f"{name}_UMAP_{n}")
|
|
|
+ timing.step(f"clustering {name}")
|
|
|
|
|
|
# Merge results
|
|
|
if concat_lists == []:
|
|
|
raise ValueError("Expected at least one non empty column list.")
|
|
|
|
|
|
result_concat = np.concatenate(concat_lists, axis=1)
|
|
|
+ timing.step("concat")
|
|
|
|
|
|
# Create 2d embedding from 5d embedding
|
|
|
if with_2d_embedding or visual:
|
|
|
@@ -145,10 +153,13 @@ class FDC:
|
|
|
, metric='euclidean'
|
|
|
, random_state=42
|
|
|
).fit_transform(result_concat)
|
|
|
+
|
|
|
+ timing.step("umap 5 -> 2")
|
|
|
|
|
|
if self.use_pandas_output:
|
|
|
result_reduced = pd.DataFrame(
|
|
|
data=result_reduced, columns=['UMAP_0', 'UMAP_1'])
|
|
|
+ timing.step("array -> DataFrame")
|
|
|
|
|
|
# Show mapping if needed
|
|
|
if visual:
|
|
|
@@ -157,11 +168,15 @@ class FDC:
|
|
|
else:
|
|
|
plotMapping(pd.DataFrame(
|
|
|
data=result_reduced, columns=['UMAP_0', 'UMAP_1']))
|
|
|
+ timing.step("plotting")
|
|
|
|
|
|
# Transform to pandas DataFrame if needed.
|
|
|
if self.use_pandas_output:
|
|
|
result_concat = pd.DataFrame(
|
|
|
data=result_concat, columns=concat_column_names)
|
|
|
+ timing.step("array -> DataFrame")
|
|
|
+
|
|
|
+ timing.step("total")
|
|
|
|
|
|
if with_2d_embedding:
|
|
|
#returns both 5D and 2D embeddings
|