| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203 |
- from tkinter import *
- from tkinter import ttk
- from fdc.dataSheet import DataSheet
- from fdc.fdc import canberra_modified, FDC, Clustering
- class MessageBox:
- def __init__(self, message):
- self.root = Tk()
- self.root.geometry("300x100")
-
- w = Label(self.root, text=message)
- w.pack(side=TOP)
- w = Button(self.root, text="OK", command=self.onClickOk)
- w.pack(side=TOP)
- def onClickOk(self):
- self.root.destroy()
- def run(self):
- self.root.mainloop()
-
- class SpreadSheet:
- def __init__(self, data):
- self.root = Tk()
- self.root.geometry("600x400")
- yscroll = Scrollbar(self.root, orient=VERTICAL)
- yscroll.pack(side=RIGHT, fill=Y)
- xscroll = Scrollbar(self.root, orient=HORIZONTAL)
- xscroll.pack(side=BOTTOM, fill=X)
- columns = list(data.columns)
- frame = ttk.Treeview(
- self.root,
- columns=columns,
- xscrollcommand=xscroll.set,
- yscrollcommand=yscroll.set
- )
- frame.pack(fill=BOTH, expand=True)
- xscroll.config(command=frame.xview)
- yscroll.config(command=frame.yview)
- for c in columns:
- frame.heading(c, text=c)
- for (n, row) in zip(data.index, data.values):
- frame.insert('', 'end', n, text=n, values=[str(v) for v in row])
- def run(self):
- self.root.mainloop()
-
- class UI:
- def __init__(self):
- self.root = Tk()
- self.root.geometry("640x400")
- frame = Frame(self.root)
- frame.pack()
- self.columnTableItems = []
- self.createButtonFrame()
- self.columnFrame = self.createColumnOverview()
- self.root.title("Test")
- self.dataSheet = None
- # ---------------------------------------------------------------------------
- # Data mapping
- # ---------------------------------------------------------------------------
- def run(self):
- self.root.mainloop()
- # ---------------------------------------------------------------------------
- # Create UI parts
- # ---------------------------------------------------------------------------
- def createButtonFrame(self, side=TOP):
- frame = Frame(self.root)
- frame.pack(side=side)
- c = 0
- def newButton(c, text, command):
- button = Button(frame, text=text, command=command)
- button.pack(side=LEFT, padx=3, pady=3)
- return c + 1
- c = newButton(c, "Load data", self.onClickedLoadData)
- c = newButton(c, "Save data", self.onClickedSaveData)
- c = newButton(c, "Show data", self.onClickedShowData)
- c = newButton(c, "Fix data types", self.onClickedFixDataTypes)
- c = newButton(c, "Fix missing values", self.onClickedFixMissingValues)
- c = newButton(c, "do FDC", self.onClickedDoFdc)
- return frame
- def createColumnOverview(self, side=TOP):
- myscroll = Scrollbar(self.root)
- myscroll.pack(side = RIGHT, fill = Y)
- frame = ttk.Treeview(self.root, columns=("type", "missing"), yscrollcommand=myscroll.set)
- frame.pack(side=side, fill=BOTH, expand=True)
- myscroll.config(command=frame.yview)
- frame.heading("type", text="Type")
- frame.heading("missing", text="# Missing Values")
- return frame
-
- # ---------------------------------------------------------------------------
- # Button actions
- # ---------------------------------------------------------------------------
- def onClickedLoadData(self):
- print("Clicked 'load data'")
- filename='healthcare-dataset-stroke-data.csv'
- self.dataSheet = DataSheet(filename)
- self.updateColumnTable()
- MessageBox("Data loaded.").run()
- def onClickedSaveData(self):
- print("Clicked 'save data'")
- def onClickedShowData(self):
- print("Clicked 'show data'")
- if self.dataSheet is None:
- MessageBox("No data loaded.").run()
- else:
- s = SpreadSheet(self.dataSheet.data)
- s.run()
- def onClickedFixDataTypes(self):
- print("Fix data types")
- if self.dataSheet:
- self.dataSheet.fixDatatypes()
- self.updateColumnTable()
- MessageBox("Done.").run()
- def onClickedFixMissingValues(self):
- print("Fix missing values")
- if self.dataSheet:
- self.dataSheet.fix_missing_values()
- self.updateColumnTable()
- MessageBox("Done.").run()
- def onClickedDoFdc(self):
- print("Clicked 'do fdc'")
- fdc = FDC(clustering_cont=Clustering('euclidean')
- , clustering_ord=Clustering(canberra_modified)
- , clustering_nom=Clustering('hamming', max_components=1)
- , visual=False
- , use_pandas_output=True
- , with_2d_embedding=False
- )
- fdc.selectFeatures(continueous=self.dataSheet.cols_cont, nomial=self.dataSheet.cols_nom, ordinal=self.dataSheet.cols_ord)
- fdcData = fdc.normalize(self.dataSheet.data)
- self.dataSheet = DataSheet(dataFrame=fdcData)
- self.updateColumnTable()
- MessageBox("Done.").run()
- # ---------------------------------------------------------------------------
- # Update View
- # ---------------------------------------------------------------------------
- def updateColumnTable(self):
- for i in self.columnTableItems:
- self.columnFrame.delete(i)
- self.columnTableItems = []
- if self.dataSheet is None:
- return
- dTypes = self.dataSheet.data.dtypes
- row = 0
- d = self.dataSheet.data.isna().sum()
- for ((n, t), s) in zip(dTypes.items(), d):
- i = self.columnFrame.insert('', 'end', row, text=n, values=(str(t), str(s)))
- self.columnTableItems.append(i)
- row += 1
- # ---------------------------------------------------------------------------
- # Other Stuff
- # ---------------------------------------------------------------------------
- ui = UI()
- ui.run()
|