clustering.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. import numpy as np
  2. from sklearn.cluster import AgglomerativeClustering
  3. from sklearn.cluster import KMeans
  4. from sklearn.cluster import DBSCAN
  5. from fdc.visualize import plotCluster
  6. def aglo_clustering(number_of_clusters, affinity, linkage
  7. , five_d_embedding, two_d_embedding
  8. , visual=False):
  9. np.random.seed(42)
  10. ag_cluster = AgglomerativeClustering(
  11. n_clusters=number_of_clusters
  12. , affinity=affinity
  13. , linkage=linkage
  14. )
  15. clusters = ag_cluster.fit_predict(five_d_embedding)
  16. (values, counts) = np.unique(clusters, return_counts=True)
  17. two_d_embedding['Cluster'] = clusters
  18. if visual:
  19. plotCluster(two_d_embedding, clusterName="Cluster", xName="UMAP_0", yName="UMAP_1", stroke=3)
  20. return two_d_embedding.Cluster.to_list(), counts
  21. class Clustering:
  22. def __init__(self,high_dim,low_dim,visual):
  23. self.high_dim=high_dim
  24. self.low_dim=low_dim
  25. self.visual=visual
  26. def Agglomerative(self,number_of_clusters, affinity, linkage):
  27. self.number_of_clusters=number_of_clusters
  28. self.affinity=affinity
  29. self.linkage=linkage
  30. ag_cluster = AgglomerativeClustering(n_clusters=number_of_clusters, affinity=affinity, linkage=linkage)
  31. clusters = ag_cluster.fit_predict(self.high_dim)
  32. (values, counts) = np.unique(clusters, return_counts=True)
  33. self.low_dim['Cluster'] = clusters
  34. if self.visual:
  35. plotCluster(self.low_dim, clusterName="Cluster", xName="UMAP_0", yName="UMAP_1", stroke=3)
  36. return self.low_dim.Cluster.to_list(), counts
  37. def DBSCAN(self,eps,min_samples):
  38. self.eps=eps
  39. self.min_samples=min_samples
  40. dbscan = DBSCAN(eps=eps, min_samples = min_samples)
  41. clusters = dbscan.fit_predict(self.high_dim)
  42. (values, counts) = np.unique(clusters, return_counts=True)
  43. self.low_dim['Cluster'] = clusters
  44. if self.visual:
  45. plotCluster(self.low_dim, clusterName="Cluster", xName="UMAP_0", yName="UMAP_1", stroke=3)
  46. return self.low_dim.Cluster.to_list(), counts
  47. def K_means(self,no_of_clusters):
  48. self.no_of_clusters=no_of_clusters
  49. kmeans = KMeans(n_clusters=no_of_clusters)
  50. clusters = kmeans.fit_predict(self.high_dim)
  51. (values, counts) = np.unique(clusters, return_counts=True)
  52. self.low_dim['Cluster'] = clusters
  53. if self.visual:
  54. plotCluster(self.low_dim, clusterName="Cluster", xName="UMAP_0", yName="UMAP_1", stroke=3)
  55. return self.low_dim.Cluster.to_list(), counts