ds_statistic.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. from library.analysis import *
  2. def padLeft(n, text, p=" "):
  3. while len(text) < n:
  4. text = text + p
  5. return text
  6. class Table:
  7. def __init__(self, cols):
  8. self.rows = []
  9. self.cols = cols
  10. self.colSize = [len(c) for c in cols]
  11. def showHead(self):
  12. h = ""
  13. b = ""
  14. for (i,c) in enumerate(self.cols):
  15. if h != "":
  16. h += "|"
  17. b += "|"
  18. h += padLeft(2 + self.colSize[i], " " + c)
  19. b += padLeft(2 + self.colSize[i], "", "-")
  20. print(h)
  21. print(b)
  22. def showRow(self, row):
  23. r = ""
  24. for (i,c) in enumerate(row):
  25. if r != "":
  26. r += "|"
  27. r += padLeft(2 + self.colSize[i], " " + c)
  28. print(r)
  29. def show(self):
  30. self.showHead()
  31. for r in self.rows:
  32. self.showRow(r)
  33. def addRow(self, row):
  34. for i in range(len(row)):
  35. row[i] = str(row[i])
  36. self.colSize[i] = max(self.colSize[i], len(row[i]))
  37. self.rows.append(row)
  38. class CheckTree:
  39. def __init__(self, data=None):
  40. self.tree = {}
  41. if data is not None:
  42. for x in data:
  43. self.add(x)
  44. def add(self, xs):
  45. t = self.tree
  46. for x in xs:
  47. if x not in t:
  48. t[x] = {}
  49. t = t[x]
  50. def isIn(self, xs):
  51. t = self.tree
  52. for x in xs:
  53. if x not in t:
  54. return False
  55. t = t[x]
  56. return True
  57. def isSame(xs, ys):
  58. for (x, y) in zip(xs, ys):
  59. if x != y:
  60. return False
  61. return True
  62. def countCommon(setA, setB):
  63. n = 0
  64. print("->")
  65. tree = CheckTree(setB)
  66. for x in setA:
  67. if tree.isIn(x):
  68. n += 1
  69. print("<-")
  70. tree = CheckTree(setA)
  71. for x in setB:
  72. if tree.isIn(x):
  73. n += 1
  74. return n
  75. table = Table(["dataset", "features", "points total", "majority", "minority", "common"])
  76. if __name__ == "__main__":
  77. for ds in testSets:
  78. d = loadDataset("data_input/" + ds)
  79. print((d.data0.shape[0], d.data1.shape[0]))
  80. table.addRow(
  81. [ ds
  82. , d.data0.shape[1]
  83. , d.data0.shape[0] + d.data1.shape[0]
  84. , d.data0.shape[0]
  85. , d.data1.shape[0]
  86. , countCommon(d.data0, d.data1)
  87. ])
  88. table.show()