3 年之前 · a6b730fafa
--- a/fdcTool.py
+++ b/fdcTool.py
@@ -13,87 +13,6 @@ from fdc.fdc import canberra_modified, FDC, Clustering
 
															 from fdc.dataSheet import DataSheet
														
 
															-
														
 
															-
														
 
															-# class FdcToolbox:
														
 
															-# 
														
 
															-#   def __init__(self, file_name, index_col=0):
														
 
															-#     data = pd.read_csv(file_name, index_col=0)
														
 
															-#     self.data = data.sample(frac=1)
														
 
															-# 
														
 
															-#     self.value_dict = {}
														
 
															-#     self.value_dict_rev = {}
														
 
															-#     self.cols_cont = []
														
 
															-#     self.cols_ord = []
														
 
															-#     self.cols_nom = []
														
 
															-# 
														
 
															-#     for k in self.data.dtypes.keys():
														
 
															-#       t = str(self.data.dtypes[k])
														
 
															-#       if t[:3] == "int":
														
 
															-#         self.cols_ord.append(k)
														
 
															-#       elif t == "object":
														
 
															-#         self.cols_nom.append(k)
														
 
															-#       else:
														
 
															-#         self.cols_cont.append(k)
														
 
															-# 
														
 
															-#     self.has_missing_values = False
														
 
															-#     self.updateMissingValuesState()
														
 
															-# 
														
 
															-#   def updateMissingValuesState(self):
														
 
															-#     self.has_missing_values = False
														
 
															-#     for k in self.data.isna().sum():
														
 
															-#       if k > 0:
														
 
															-#         self.has_missing_values = True
														
 
															-#         break
														
 
															-# 
														
 
															-#   def showStatistic(self):
														
 
															-#     print(f"Fratures: {self.data.shape[1]}")
														
 
															-#     print(f"Points:   {self.data.shape[0]}")
														
 
															-#     print(f"Columns:")
														
 
															-# 
														
 
															-#     for k in self.data.dtypes.keys():
														
 
															-#       t = str(self.data.dtypes[k])
														
 
															-#       e = " c"
														
 
															-#       if k in self.cols_ord:
														
 
															-#         e = " o"
														
 
															-#       if k in self.cols_nom:
														
 
															-#         e = " n"
														
 
															-#       indentPair(k, t, e)
														
 
															-#     print()
														
 
															-#     print(f"Missing values:")
														
 
															-# 
														
 
															-#     n = 0
														
 
															-#     d = self.data.isna().sum()
														
 
															-#     for k in d.keys():
														
 
															-#       if d[k] > 0:
														
 
															-#         indentPair(k, str(d[k]))
														
 
															-#         n += 1
														
 
															-#     if n == 0:
														
 
															-#       print("  none")
														
 
															-# 
														
 
															-# 
														
 
															-#   def fixDatatypes(self):
														
 
															-#     columnsToFix = []
														
 
															-#     for k in self.data.dtypes.keys():
														
 
															-#       if str(self.data.dtypes[k]) == "object":
														
 
															-#         columnsToFix.append(k)
														
 
															-# 
														
 
															-#     self.value_dict = {}
														
 
															-#     self.value_dict_rev = {}
														
 
															-#     for c in columnsToFix:
														
 
															-#       histogram = self.data[c].value_counts()
														
 
															-#       self.value_dict[c] = { k : n for n, k in enumerate(histogram.keys()) }
														
 
															-#       self.value_dict_rev[c] = { n : k for n, k in enumerate(histogram.keys()) }
														
 
															-#     
														
 
															-#     if len(self.value_dict.keys()) > 0:
														
 
															-#       self.data.replace(self.value_dict, inplace=True)
														
 
															-# 
														
 
															-#   def fix_missing_values(self):
														
 
															-#     self.data = fix_missing_values(self.data, 4)
														
 
															-#     self.updateMissingValuesState()
														
 
															-# 
														
 
															-
														
 
															-
														
 
															 filename='healthcare-dataset-stroke-data.csv'
														
 
															 np.random.seed(42)
														
 
															 tb = DataSheet(filename)