NNSearch_experimental.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. import math
  2. import tensorflow as tf
  3. import numpy as np
  4. from sklearn.neighbors import NearestNeighbors
  5. from library.timing import timing
  6. def dist(x,y):
  7. return sum(map(lambda z: (z[0] - z[1])*(z[0] - z[1]), zip(x, y)))
  8. def maxby(data, fn, startValue=0.0):
  9. m = startValue
  10. for v in data:
  11. m = max(m, fn(v))
  12. return m
  13. def distancesToPoint(p, points):
  14. w = np.array(np.repeat([p], len(points), axis=0))
  15. d = tf.keras.layers.Subtract()([w, np.array(points)])
  16. t = tf.keras.layers.Dot(axes=(1,1))([d,d])
  17. # As the concrete distance is not needed and sqrt(x) is strict monotone
  18. # we avoid here unneccessary calculating of expensive roots.
  19. return t.numpy()
  20. def calculateCenter(points):
  21. if points.shape[0] == 1:
  22. return points[0]
  23. return tf.keras.layers.Average()(list(points)).numpy()
  24. class Ball:
  25. def __init__(self, points=None, indices=None, parent=None, center=None):
  26. if center is not None:
  27. self.center = center
  28. elif points is not None:
  29. self.center = calculateCenter(np.array(points))
  30. else:
  31. raise ParameterError("Missing points or center")
  32. self.radius = 0
  33. self.points = []
  34. self.indices = set()
  35. self.childs = []
  36. self.parent = parent
  37. if points is not None and indices is not None:
  38. for (i, r) in zip(indices, distancesToPoint(self.center, points)):
  39. self.add(i, r)
  40. elif points is not None:
  41. raise ParameterError("Missing indices")
  42. def findIt(self, r):
  43. if r == self.points[0][1]:
  44. return 0
  45. upper = len(self.points) - 1
  46. lower = 0
  47. while upper > lower + 1:
  48. h = (upper + lower) // 2
  49. if self.points[h][1] >= r:
  50. upper = h
  51. else:
  52. lower = h
  53. return upper
  54. def add(self, xi, r):
  55. if xi in self.indices:
  56. return False
  57. # Here we know, that x is not in points.
  58. newEntry = (xi, r)
  59. self.indices.add(xi)
  60. # Special case: empty list or new element will extend the radius:
  61. # Here we can avoid the search
  62. if self.points == [] or r >= self.radius:
  63. self.points.append(newEntry)
  64. self.radius = r
  65. return True
  66. # Special case: r <= min radius
  67. # Here we can avoid the search
  68. if self.points[0][1] >= r:
  69. self.points = [newEntry] + self.points
  70. return True
  71. # Here we know that min radius < r < max radius.
  72. # So len(points) >= 2.
  73. pos = self.findIt(r)
  74. # here shoul be: r(pos) >= r > r(pos - 1)
  75. self.points = self.points[:pos] + [newEntry] + self.points[pos: ]
  76. return True
  77. def remove(self, xi, r):
  78. if xi not in self.indices:
  79. return False
  80. # special case: remove the element with the heighest radius:
  81. if self.points[-1][0] == xi:
  82. self.indices.remove(xi)
  83. self.points = self.points[: -1]
  84. if self.points == []:
  85. self.radius = 0.0
  86. else:
  87. self.radius = self.points[-1][1]
  88. return True
  89. pos = self.findIt(r)
  90. nPoints = len(self.points)
  91. # pos is the smalest position with:
  92. # r(pos - 1) < r <= r(pos)
  93. # Just in case: check that we remove the correct index.
  94. while pos < nPoints and r == self.points[pos]:
  95. if self.points[pos][0] == xi:
  96. self.indices.remove(xi)
  97. self.points = self.points[:pos] + self.points[(pos + 1): ]
  98. return True
  99. pos += 1
  100. return False
  101. def divideBall(self, X):
  102. indices = [i for (i, _r) in self.points]
  103. points = np.array([X[i] for i in indices])
  104. distances = tf.keras.layers.Dot(axes=(1,1))([points, points]).numpy()
  105. ball = Ball(points, indices, center=np.zeros(len(points[0])))
  106. h = len(ball) // 2
  107. indicesA = [i for (i, _) in ball.points[:h]]
  108. indicesB = [i for (i, _) in ball.points[h:]]
  109. pointsA = np.array([X[i] for i in indicesA])
  110. pointsB = np.array([X[i] for i in indicesB])
  111. self.childs = []
  112. print(f"{len(points)} -> <{len(pointsA)}|{len(pointsB)}>")
  113. self.childs.append(Ball(pointsA, indicesA, self))
  114. self.childs.append(Ball(pointsB, indicesB, self))
  115. return self.childs
  116. def __len__(self):
  117. return len(self.points)
  118. def smalestBallFor(self, i):
  119. if i not in self.indices:
  120. return None
  121. for c in self.childs:
  122. b = c.smalestBallFor(i)
  123. if b is not None:
  124. return b
  125. return self
  126. class NNSearch:
  127. def __init__(self, nebSize=5, timingDict=None):
  128. self.nebSize = nebSize
  129. self.neighbourhoods = []
  130. self.timingDict = timingDict
  131. def timerStart(self, name):
  132. if self.timingDict is not None:
  133. if name not in self.timingDict:
  134. self.timingDict[name] = timing(name)
  135. self.timingDict[name].start()
  136. def timerStop(self, name):
  137. if self.timingDict is not None:
  138. if name in self.timingDict:
  139. self.timingDict[name].stop()
  140. def neighbourhoodOfItem(self, i):
  141. return self.neighbourhoods[i]
  142. def fit(self, X, nebSize=None):
  143. self.timerStart("NN_fit_all")
  144. self.fit_chained(X, nebSize)
  145. #self.fit_bruteForce_np(X, nebSize)
  146. self.timerStop("NN_fit_all")
  147. def fit_optimized(self, X, nebSize=None):
  148. self.timerStart("NN_fit_all")
  149. if nebSize == None:
  150. nebSize = self.nebSize
  151. nPoints = len(X)
  152. nFeatures = len(X[0])
  153. if nFeatures > 15 or nebSize >= (nPoints // 2):
  154. print("Using brute force")
  155. self.fit_bruteForce_np(X, nebSize)
  156. else:
  157. print("Using chained")
  158. self.fit_chained(X, nebSize)
  159. self.timerStop("NN_fit_all")
  160. def fit_bruteForce(self, X, nebSize=None):
  161. if nebSize == None:
  162. nebSize = self.nebSize
  163. self.timerStart("NN_fit_bf_init")
  164. isGreaterThan = lambda x, y: x[1] > y[1]
  165. self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))]
  166. self.timerStop("NN_fit_bf_init")
  167. self.timerStart("NN_fit_bf_loop")
  168. for (i, x) in enumerate(X):
  169. nbh = self.neighbourhoods[i]
  170. for (j, y) in enumerate(X[i+1:]):
  171. j += i + 1
  172. self.timerStart("NN_fit_bf_dist")
  173. d = dist(x,y)
  174. self.timerStop("NN_fit_bf_dist")
  175. self.timerStart("NN_fit_bf_insert")
  176. nbh.insert((j,d))
  177. self.neighbourhoods[j].insert((i,d))
  178. self.timerStop("NN_fit_bf_insert")
  179. self.timerStart("NN_fit_bf_toList")
  180. self.neighbourhoods[i] = nbh.toArray(lambda v: v[0])
  181. self.timerStop("NN_fit_bf_toList")
  182. self.timerStop("NN_fit_bf_loop")
  183. def fit_bruteForce_np(self, X, nebSize=None):
  184. self.timerStart("NN_fit_bfnp_init")
  185. numOfPoints = len(X)
  186. nFeatures = len(X[0])
  187. tX = tf.convert_to_tensor(X)
  188. def distancesTo(x):
  189. w = np.repeat([x], numOfPoints, axis=0)
  190. d = tf.keras.layers.Subtract()([w,tX])
  191. t = tf.keras.layers.Dot(axes=(1,1))([d,d])
  192. return t.numpy()
  193. if nebSize == None:
  194. nebSize = self.nebSize
  195. isGreaterThan = lambda x, y: x[1] > y[1]
  196. self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))]
  197. self.timerStop("NN_fit_bfnp_init")
  198. self.timerStart("NN_fit_bfnp_loop")
  199. for (i, x) in enumerate(X):
  200. self.timerStart("NN_fit_bfnp_dist")
  201. distances = distancesTo(x)
  202. self.timerStop("NN_fit_bfnp_dist")
  203. nbh = self.neighbourhoods[i]
  204. for (j, y) in enumerate(X[i+1:]):
  205. j += i + 1
  206. d = distances[j]
  207. self.timerStart("NN_fit_bfnp_insert")
  208. nbh.insert((j,d))
  209. self.neighbourhoods[j].insert((i,d))
  210. self.timerStop("NN_fit_bfnp_insert")
  211. self.timerStart("NN_fit_bfnp_toList")
  212. self.neighbourhoods[i] = nbh.toArray(lambda v: v[0])
  213. self.timerStop("NN_fit_bfnp_toList")
  214. self.timerStop("NN_fit_bfnp_loop")
  215. def fit_chained(self, X, nebSize=None):
  216. self.timerStart("NN_fit_chained_init")
  217. if nebSize == None:
  218. nebSize = self.nebSize
  219. nPoints = len(X)
  220. nFeatures = len(X[0])
  221. neigh = NearestNeighbors(n_neighbors=nebSize)
  222. neigh.fit(X)
  223. self.timerStop("NN_fit_chained_init")
  224. self.timerStart("NN_fit_chained_toList")
  225. self.neighbourhoods = [
  226. (neigh.kneighbors([x], nebSize, return_distance=False))[0]
  227. for (i, x) in enumerate(X)
  228. ]
  229. self.timerStop("NN_fit_chained_toList")
  230. # ===============================================================
  231. # Heuristic search
  232. # ===============================================================
  233. def fit_heuristic(self, X, nebSize=None):
  234. if nebSize == None:
  235. nebSize = self.nebSize
  236. nPoints = len(X)
  237. def walkUp(nbh, ball, x, i):
  238. while ball.parent is not None:
  239. print(f"{i}: up (r: {nbh.getMax()})")
  240. oldBall = ball
  241. ball = ball.parent
  242. for c in ball.childs:
  243. if c != oldBall:
  244. walkDown(nbh, c, x)
  245. def walkDown(nbh, ball, x):
  246. if ball is None:
  247. return
  248. print(f"{i}: down (r: {nbh.getMax()})")
  249. if dist(x, ball.center) - ball.radius < nbh.getMax()[1]:
  250. if ball.childs == []:
  251. for (j, _) in ball.points:
  252. nbh.insert((j, dist(x, X[j])))
  253. else:
  254. for c in ball.childs:
  255. walkDown(nbh, c, x)
  256. def countBoles(b):
  257. if b is None:
  258. return 0
  259. return 1 + sum(map(countBoles, b.childs))
  260. root = Ball(X, range(len(X)))
  261. queue = [root]
  262. while queue != []:
  263. ball = queue[0]
  264. queue = queue[1:]
  265. if len(ball) <= nebSize:
  266. continue
  267. queue = ball.divideBall(X) + queue
  268. isGreaterThan = lambda x, y: x[1] > y[1]
  269. self.neighbourhoods = [MaxHeap(nPoints, isGreaterThan, (i, 0.0)) for i in range(len(X))]
  270. print("#B: " + str(countBoles(root)))
  271. exit()
  272. z = X[0]
  273. for (i, x) in enumerate(X):
  274. nbh = self.neighbourhoods[i]
  275. b = root.smalestBallFor(i)
  276. if b.parent is not None:
  277. b = b.parent
  278. for (j, _) in b.points:
  279. d = dist(x, X[j])
  280. nbh.insert((j, d))
  281. walkUp(nbh, b, x, i)