clustering.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. import numpy as np
  2. from sklearn.cluster import AgglomerativeClustering
  3. from sklearn.cluster import KMeans
  4. from sklearn.cluster import DBSCAN
  5. from fdc.visualize import plotCluster
  6. class Clustering:
  7. def __init__(self, high_dim, low_dim, visual):
  8. self.high_dim = high_dim
  9. self.low_dim = low_dim
  10. self.visual = visual
  11. def Agglomerative(self, number_of_clusters, affinity, linkage):
  12. ag_cluster = AgglomerativeClustering(n_clusters=number_of_clusters, affinity=affinity, linkage=linkage)
  13. clusters = ag_cluster.fit_predict(self.high_dim)
  14. (values, counts) = np.unique(clusters, return_counts=True)
  15. self.low_dim['Cluster'] = clusters
  16. if self.visual:
  17. plotCluster(self.low_dim, clusterName="Cluster", xName="UMAP_0", yName="UMAP_1", stroke=3)
  18. return self.low_dim.Cluster.to_list(), counts
  19. def DBSCAN(self, eps, min_samples):
  20. dbscan = DBSCAN(eps=eps, min_samples=min_samples)
  21. clusters = dbscan.fit_predict(self.high_dim)
  22. (values, counts) = np.unique(clusters, return_counts=True)
  23. self.low_dim['Cluster'] = clusters
  24. if self.visual:
  25. plotCluster(self.low_dim, clusterName="Cluster", xName="UMAP_0", yName="UMAP_1", stroke=3)
  26. return self.low_dim.Cluster.to_list(), counts
  27. def K_means(self, no_of_clusters):
  28. kmeans = KMeans(n_clusters=no_of_clusters)
  29. clusters = kmeans.fit_predict(self.high_dim)
  30. (values, counts) = np.unique(clusters, return_counts=True)
  31. self.low_dim['Cluster'] = clusters
  32. if self.visual:
  33. plotCluster(self.low_dim, clusterName="Cluster", xName="UMAP_0", yName="UMAP_1", stroke=3)
  34. return self.low_dim.Cluster.to_list(), counts