NNSearch_experimental.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. import math
  2. from ctypes import cdll, c_uint, c_double, c_void_p
  3. import tensorflow as tf
  4. import numpy as np
  5. import numpy.ctypeslib as npct
  6. array_2d_uint = npct.ndpointer(dtype=np.uint, ndim=2, flags='CONTIGUOUS')
  7. array_1d_double = npct.ndpointer(dtype=np.double, ndim=1, flags='CONTIGUOUS')
  8. array_2d_double = npct.ndpointer(dtype=np.double, ndim=2, flags='CONTIGUOUS')
  9. from sklearn.neighbors import NearestNeighbors
  10. from library.timing import timing
  11. from library.MaxHeap import MaxHeap
  12. nbhLib = cdll.LoadLibrary("./library/c/libNeighborhood.so")
  13. nbhLib.Neighborhood.rettype = None
  14. nbhLib.Neighborhood.argtypes = [c_uint, c_uint, c_uint, array_2d_double, array_2d_uint]
  15. nbhLib.NeighborhoodHeuristic.rettype = None
  16. nbhLib.NeighborhoodHeuristic.argtypes = [c_uint, c_uint, c_uint, array_2d_double, array_2d_uint]
  17. def dist(x,y):
  18. return sum(map(lambda z: (z[0] - z[1])*(z[0] - z[1]), zip(x, y)))
  19. def maxby(data, fn, startValue=0.0):
  20. m = startValue
  21. for v in data:
  22. m = max(m, fn(v))
  23. return m
  24. def distancesToPoint(p, points):
  25. w = np.array(np.repeat([p], len(points), axis=0))
  26. d = tf.keras.layers.Subtract()([w, np.array(points)])
  27. t = tf.keras.layers.Dot(axes=(1,1))([d,d])
  28. # As the concrete distance is not needed and sqrt(x) is strict monotone
  29. # we avoid here unneccessary calculating of expensive roots.
  30. return t.numpy()
  31. def calculateCenter(points):
  32. if points.shape[0] == 1:
  33. return points[0]
  34. return tf.keras.layers.Average()(list(points)).numpy()
  35. class Ball:
  36. def __init__(self, points=None, indices=None, parent=None, center=None):
  37. if center is not None:
  38. self.center = center
  39. elif points is not None:
  40. self.center = calculateCenter(np.array(points))
  41. else:
  42. raise ParameterError("Missing points or center")
  43. self.radius = 0
  44. self.points = []
  45. self.indices = set()
  46. self.childs = []
  47. self.parent = parent
  48. if points is not None and indices is not None:
  49. for (i, r) in zip(indices, distancesToPoint(self.center, points)):
  50. self.add(i, r)
  51. elif points is not None:
  52. raise ParameterError("Missing indices")
  53. def findIt(self, r):
  54. if r == self.points[0][1]:
  55. return 0
  56. upper = len(self.points) - 1
  57. lower = 0
  58. while upper > lower + 1:
  59. h = (upper + lower) // 2
  60. if self.points[h][1] >= r:
  61. upper = h
  62. else:
  63. lower = h
  64. return upper
  65. def add(self, xi, r):
  66. if xi in self.indices:
  67. return False
  68. # Here we know, that x is not in points.
  69. newEntry = (xi, r)
  70. self.indices.add(xi)
  71. # Special case: empty list or new element will extend the radius:
  72. # Here we can avoid the search
  73. if self.points == [] or r >= self.radius:
  74. self.points.append(newEntry)
  75. self.radius = r
  76. return True
  77. # Special case: r <= min radius
  78. # Here we can avoid the search
  79. if self.points[0][1] >= r:
  80. self.points = [newEntry] + self.points
  81. return True
  82. # Here we know that min radius < r < max radius.
  83. # So len(points) >= 2.
  84. pos = self.findIt(r)
  85. # here shoul be: r(pos) >= r > r(pos - 1)
  86. self.points = self.points[:pos] + [newEntry] + self.points[pos: ]
  87. return True
  88. def remove(self, xi, r):
  89. if xi not in self.indices:
  90. return False
  91. # special case: remove the element with the heighest radius:
  92. if self.points[-1][0] == xi:
  93. self.indices.remove(xi)
  94. self.points = self.points[: -1]
  95. if self.points == []:
  96. self.radius = 0.0
  97. else:
  98. self.radius = self.points[-1][1]
  99. return True
  100. pos = self.findIt(r)
  101. nPoints = len(self.points)
  102. # pos is the smalest position with:
  103. # r(pos - 1) < r <= r(pos)
  104. # Just in case: check that we remove the correct index.
  105. while pos < nPoints and r == self.points[pos]:
  106. if self.points[pos][0] == xi:
  107. self.indices.remove(xi)
  108. self.points = self.points[:pos] + self.points[(pos + 1): ]
  109. return True
  110. pos += 1
  111. return False
  112. def divideBall(self, X):
  113. indices = [i for (i, _r) in self.points]
  114. points = np.array([X[i] for i in indices])
  115. distances = tf.keras.layers.Dot(axes=(1,1))([points, points]).numpy()
  116. ball = Ball(points, indices, center=np.zeros(len(points[0])))
  117. h = len(ball) // 2
  118. indicesA = [i for (i, _) in ball.points[:h]]
  119. indicesB = [i for (i, _) in ball.points[h:]]
  120. pointsA = np.array([X[i] for i in indicesA])
  121. pointsB = np.array([X[i] for i in indicesB])
  122. self.childs = []
  123. print(f"{len(points)} -> <{len(pointsA)}|{len(pointsB)}>")
  124. self.childs.append(Ball(pointsA, indicesA, self))
  125. self.childs.append(Ball(pointsB, indicesB, self))
  126. return self.childs
  127. def __len__(self):
  128. return len(self.points)
  129. def smalestBallFor(self, i):
  130. if i not in self.indices:
  131. return None
  132. for c in self.childs:
  133. b = c.smalestBallFor(i)
  134. if b is not None:
  135. return b
  136. return self
  137. class NNSearch:
  138. def __init__(self, nebSize=5, timingDict=None):
  139. self.nebSize = nebSize
  140. self.neighbourhoods = []
  141. self.timingDict = timingDict
  142. def timerStart(self, name):
  143. if self.timingDict is not None:
  144. if name not in self.timingDict:
  145. self.timingDict[name] = timing(name)
  146. self.timingDict[name].start()
  147. def timerStop(self, name):
  148. if self.timingDict is not None:
  149. if name in self.timingDict:
  150. self.timingDict[name].stop()
  151. def neighbourhoodOfItem(self, i):
  152. return self.neighbourhoods[i]
  153. def fit(self, X, nebSize=None):
  154. self.timerStart("NN_fit_all")
  155. self.fit_chained(X, nebSize)
  156. #self.fit_bruteForce_np(X, nebSize)
  157. self.timerStop("NN_fit_all")
  158. def fit_optimized(self, X, nebSize=None):
  159. self.timerStart("NN_fit_all")
  160. if nebSize == None:
  161. nebSize = self.nebSize
  162. nPoints = len(X)
  163. nFeatures = len(X[0])
  164. if nFeatures > 15 or nebSize >= (nPoints // 2):
  165. print("Using brute force")
  166. self.fit_bruteForce_np(X, nebSize)
  167. else:
  168. print("Using chained")
  169. self.fit_chained(X, nebSize)
  170. self.timerStop("NN_fit_all")
  171. def fit_bruteForce(self, X, nebSize=None):
  172. if nebSize == None:
  173. nebSize = self.nebSize
  174. self.timerStart("NN_fit_bf_init")
  175. isGreaterThan = lambda x, y: x[1] > y[1]
  176. self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))]
  177. self.timerStop("NN_fit_bf_init")
  178. self.timerStart("NN_fit_bf_loop")
  179. for (i, x) in enumerate(X):
  180. nbh = self.neighbourhoods[i]
  181. for (j, y) in enumerate(X[i+1:]):
  182. j += i + 1
  183. self.timerStart("NN_fit_bf_dist")
  184. d = dist(x,y)
  185. self.timerStop("NN_fit_bf_dist")
  186. self.timerStart("NN_fit_bf_insert")
  187. nbh.insert((j,d))
  188. self.neighbourhoods[j].insert((i,d))
  189. self.timerStop("NN_fit_bf_insert")
  190. self.timerStart("NN_fit_bf_toList")
  191. self.neighbourhoods[i] = nbh.toArray(lambda v: v[0])
  192. self.timerStop("NN_fit_bf_toList")
  193. self.timerStop("NN_fit_bf_loop")
  194. def fit_bruteForce_np(self, X, nebSize=None):
  195. self.timerStart("NN_fit_bfnp_init")
  196. numOfPoints = len(X)
  197. nFeatures = len(X[0])
  198. tX = tf.convert_to_tensor(X)
  199. def distancesTo(x):
  200. w = np.repeat([x], numOfPoints, axis=0)
  201. d = tf.keras.layers.Subtract()([w,tX])
  202. t = tf.keras.layers.Dot(axes=(1,1))([d,d])
  203. return t.numpy()
  204. if nebSize == None:
  205. nebSize = self.nebSize
  206. isGreaterThan = lambda x, y: x[1] > y[1]
  207. self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))]
  208. self.timerStop("NN_fit_bfnp_init")
  209. self.timerStart("NN_fit_bfnp_loop")
  210. for (i, x) in enumerate(X):
  211. self.timerStart("NN_fit_bfnp_dist")
  212. distances = distancesTo(x)
  213. self.timerStop("NN_fit_bfnp_dist")
  214. nbh = self.neighbourhoods[i]
  215. for (j, y) in enumerate(X[i+1:]):
  216. j += i + 1
  217. d = distances[j]
  218. self.timerStart("NN_fit_bfnp_insert")
  219. nbh.insert((j,d))
  220. self.neighbourhoods[j].insert((i,d))
  221. self.timerStop("NN_fit_bfnp_insert")
  222. self.timerStart("NN_fit_bfnp_toList")
  223. self.neighbourhoods[i] = nbh.toArray(lambda v: v[0])
  224. self.timerStop("NN_fit_bfnp_toList")
  225. self.timerStop("NN_fit_bfnp_loop")
  226. def fit_chained(self, X, nebSize=None):
  227. self.timerStart("NN_fit_chained_init")
  228. if nebSize == None:
  229. nebSize = self.nebSize
  230. nPoints = len(X)
  231. nFeatures = len(X[0])
  232. neigh = NearestNeighbors(n_neighbors=nebSize)
  233. neigh.fit(X)
  234. self.timerStop("NN_fit_chained_init")
  235. self.timerStart("NN_fit_chained_toList")
  236. self.neighbourhoods = [
  237. (neigh.kneighbors([x], nebSize, return_distance=False))[0]
  238. for (i, x) in enumerate(X)
  239. ]
  240. self.timerStop("NN_fit_chained_toList")
  241. def fit_cLib(self, X, nebSize=None):
  242. self.timerStart("NN_fit_cLib_init")
  243. if nebSize == None:
  244. nebSize = self.nebSize
  245. nbh = np.array([np.zeros(nebSize, dtype=np.uint) for i in range(X.shape[0])])
  246. self.timerStop("NN_fit_cLib_init")
  247. self.timerStart("NN_fit_cLib_call")
  248. nbhLib.Neighborhood(nebSize, X.shape[0], X.shape[1], X, nbh)
  249. self.timerStop("NN_fit_cLib_call")
  250. self.timerStart("NN_fit_cLib_list")
  251. self.neighbourhoods = list(nbh)
  252. self.timerStop("NN_fit_cLib_list")
  253. def fit_cLibHeuristic(self, X, nebSize=None):
  254. self.timerStart("NN_fit_cLib_init")
  255. if nebSize == None:
  256. nebSize = self.nebSize
  257. nbh = np.array([np.zeros(nebSize, dtype=np.uint) for i in range(X.shape[0])])
  258. self.timerStop("NN_fit_cLib_init")
  259. self.timerStart("NN_fit_cLib_call")
  260. nbhLib.NeighborhoodHeuristic(nebSize, X.shape[0], X.shape[1], X, nbh)
  261. self.timerStop("NN_fit_cLib_call")
  262. self.timerStart("NN_fit_cLib_list")
  263. self.neighbourhoods = list(nbh)
  264. self.timerStop("NN_fit_cLib_list")
  265. # ===============================================================
  266. # Heuristic search
  267. # ===============================================================
  268. def fit_heuristic(self, X, nebSize=None):
  269. if nebSize == None:
  270. nebSize = self.nebSize
  271. nPoints = len(X)
  272. def walkUp(nbh, ball, x, i):
  273. while ball.parent is not None:
  274. print(f"{i}: up (r: {nbh.getMax()})")
  275. oldBall = ball
  276. ball = ball.parent
  277. for c in ball.childs:
  278. if c != oldBall:
  279. walkDown(nbh, c, x)
  280. def walkDown(nbh, ball, x):
  281. if ball is None:
  282. return
  283. print(f"{i}: down (r: {nbh.getMax()})")
  284. if dist(x, ball.center) - ball.radius < nbh.getMax()[1]:
  285. if ball.childs == []:
  286. for (j, _) in ball.points:
  287. nbh.insert((j, dist(x, X[j])))
  288. else:
  289. for c in ball.childs:
  290. walkDown(nbh, c, x)
  291. def countBoles(b):
  292. if b is None:
  293. return 0
  294. return 1 + sum(map(countBoles, b.childs))
  295. root = Ball(X, range(len(X)))
  296. queue = [root]
  297. while queue != []:
  298. ball = queue[0]
  299. queue = queue[1:]
  300. if len(ball) <= nebSize:
  301. continue
  302. queue = ball.divideBall(X) + queue
  303. isGreaterThan = lambda x, y: x[1] > y[1]
  304. self.neighbourhoods = [MaxHeap(nPoints, isGreaterThan, (i, 0.0)) for i in range(len(X))]
  305. print("#B: " + str(countBoles(root)))
  306. exit()
  307. z = X[0]
  308. for (i, x) in enumerate(X):
  309. nbh = self.neighbourhoods[i]
  310. b = root.smalestBallFor(i)
  311. if b.parent is not None:
  312. b = b.parent
  313. for (j, _) in b.points:
  314. d = dist(x, X[j])
  315. nbh.insert((j, d))
  316. walkUp(nbh, b, x, i)