import math import tensorflow as tf import numpy as np from sklearn.neighbors import NearestNeighbors from library.timing import timing def dist(x,y): return sum(map(lambda z: (z[0] - z[1])*(z[0] - z[1]), zip(x, y))) def maxby(data, fn, startValue=0.0): m = startValue for v in data: m = max(m, fn(v)) return m def distancesToPoint(p, points): w = np.array(np.repeat([p], len(points), axis=0)) d = tf.keras.layers.Subtract()([w, np.array(points)]) t = tf.keras.layers.Dot(axes=(1,1))([d,d]) # As the concrete distance is not needed and sqrt(x) is strict monotone # we avoid here unneccessary calculating of expensive roots. return t.numpy() def calculateCenter(points): if points.shape[0] == 1: return points[0] return tf.keras.layers.Average()(list(points)).numpy() class MaxHeap: def __init__(self, maxSize=None, isGreaterThan=None, smalestValue=(-1,0.0)): self.heap = [] self.size = 0 self.maxSize = maxSize self.isGreaterThan = isGreaterThan if isGreaterThan is not None else (lambda a, b: a > b) self.smalestValue = smalestValue self.indices = set() self.wasChanged = False self.insert(smalestValue) def insert(self, v): if self.maxSize is not None and self.size >= self.maxSize: return self.replaceMax(v) if v[0] in self.indices: return False self.indices.add(v[0]) pos = self.size self.size += 1 self.heap.append(v) while pos > 0: w = self.heap[pos // 2] if not self.isGreaterThan(v, w): break self.heap[pos] = w pos = pos // 2 self.heap[pos] = v self.wasChanged = True return True def childPos(self, pos): c = (pos + 1) * 2 return (c - 1, c) def removeMax(self): if self.heap == []: self.size = 0 return False x = self.heap[0] self.indices.remove(x[0]) self.heap[0] = self.heap[-1] self.heap = self.heap[:-1] self.size -= 1 x = self.heap[0] pos = 0 size = self.size while pos < size: (left, right) = self.childPos(pos) if left >= size: break y = self.heap[left] if right >= size: if self.isGreaterThan(y, x): self.heap[pos] = y self.heap[left] = x break z = self.heap[right] (best, v) = (left, y) if self.isGreaterThan(y, z) else (right, z) if not self.isGreaterThan(v, x): break self.heap[pos] = v self.heap[best] = x pos = best self.wasChanged = True return True def replaceMax(self, x): if self.heap == []: self.heap = [x] self.size = 1 self.indices.add(x[0]) self.wasChanged = True return True if x[0] in self.indices: return False if self.isGreaterThan(x, self.heap[0]): return False self.indices.remove((self.heap[0])[0]) self.indices.add(x[0]) self.heap[0] = x pos = 0 size = len(self.heap) while pos < size: (left, right) = self.childPos(pos) if left >= size: break y = self.heap[left] if right >= size: if self.isGreaterThan(y, x): self.heap[pos] = y self.heap[left] = x break z = self.heap[right] (best, v) = (left, y) if self.isGreaterThan(y, z) else (right, z) if not self.isGreaterThan(v, x): break self.heap[pos] = v self.heap[best] = x pos = best self.wasChanged = True return True def getMax(self): if self.heap == []: return self.smalestValue return self.heap[0] def setMaxSize(self, maxSize): self.maxSize = maxSize while self.size > maxSize: self.removeMax() def toArray(self, mapFn=None): return list(self.indices) def length(self): return self.size class Ball: def __init__(self, points=None, indices=None, parent=None, center=None): if center is not None: self.center = center elif points is not None: self.center = calculateCenter(np.array(points)) else: raise ParameterError("Missing points or center") self.radius = 0 self.points = [] self.indices = set() self.childs = [] self.parent = parent if points is not None and indices is not None: for (i, r) in zip(indices, distancesToPoint(self.center, points)): self.add(i, r) elif points is not None: raise ParameterError("Missing indices") def findIt(self, r): if r == self.points[0][1]: return 0 upper = len(self.points) - 1 lower = 0 while upper > lower + 1: h = (upper + lower) // 2 if self.points[h][1] >= r: upper = h else: lower = h return upper def add(self, xi, r): if xi in self.indices: return False # Here we know, that x is not in points. newEntry = (xi, r) self.indices.add(xi) # Special case: empty list or new element will extend the radius: # Here we can avoid the search if self.points == [] or r >= self.radius: self.points.append(newEntry) self.radius = r return True # Special case: r <= min radius # Here we can avoid the search if self.points[0][1] >= r: self.points = [newEntry] + self.points return True # Here we know that min radius < r < max radius. # So len(points) >= 2. pos = self.findIt(r) # here shoul be: r(pos) >= r > r(pos - 1) self.points = self.points[:pos] + [newEntry] + self.points[pos: ] return True def remove(self, xi, r): if xi not in self.indices: return False # special case: remove the element with the heighest radius: if self.points[-1][0] == xi: self.indices.remove(xi) self.points = self.points[: -1] if self.points == []: self.radius = 0.0 else: self.radius = self.points[-1][1] return True pos = self.findIt(r) nPoints = len(self.points) # pos is the smalest position with: # r(pos - 1) < r <= r(pos) # Just in case: check that we remove the correct index. while pos < nPoints and r == self.points[pos]: if self.points[pos][0] == xi: self.indices.remove(xi) self.points = self.points[:pos] + self.points[(pos + 1): ] return True pos += 1 return False def divideBall(self, X): indices = [i for (i, _r) in self.points] points = np.array([X[i] for i in indices]) distances = tf.keras.layers.Dot(axes=(1,1))([points, points]).numpy() ball = Ball(points, indices, center=np.zeros(len(points[0]))) h = len(ball) // 2 indicesA = [i for (i, _) in ball.points[:h]] indicesB = [i for (i, _) in ball.points[h:]] pointsA = np.array([X[i] for i in indicesA]) pointsB = np.array([X[i] for i in indicesB]) self.childs = [] print(f"{len(points)} -> <{len(pointsA)}|{len(pointsB)}>") self.childs.append(Ball(pointsA, indicesA, self)) self.childs.append(Ball(pointsB, indicesB, self)) return self.childs def __len__(self): return len(self.points) def smalestBallFor(self, i): if i not in self.indices: return None for c in self.childs: b = c.smalestBallFor(i) if b is not None: return b return self class NNSearch: def __init__(self, nebSize=5, timingDict=None): self.nebSize = nebSize self.neighbourhoods = [] self.timingDict = timingDict def timerStart(self, name): if self.timingDict is not None: if name not in self.timingDict: self.timingDict[name] = timing(name) self.timingDict[name].start() def timerStop(self, name): if self.timingDict is not None: if name in self.timingDict: self.timingDict[name].stop() def neighbourhoodOfItem(self, i): return self.neighbourhoods[i] def fit(self, X, nebSize=None): self.timerStart("NN_fit_all") self.fit_chained(X, nebSize) #self.fit_bruteForce_np(X, nebSize) self.timerStop("NN_fit_all") def fit_optimized(self, X, nebSize=None): self.timerStart("NN_fit_all") if nebSize == None: nebSize = self.nebSize nPoints = len(X) nFeatures = len(X[0]) if nFeatures > 15 or nebSize >= (nPoints // 2): print("Using brute force") self.fit_bruteForce_np(X, nebSize) else: print("Using chained") self.fit_chained(X, nebSize) self.timerStop("NN_fit_all") def fit_bruteForce(self, X, nebSize=None): if nebSize == None: nebSize = self.nebSize self.timerStart("NN_fit_bf_init") isGreaterThan = lambda x, y: x[1] > y[1] self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))] self.timerStop("NN_fit_bf_init") self.timerStart("NN_fit_bf_loop") for (i, x) in enumerate(X): nbh = self.neighbourhoods[i] for (j, y) in enumerate(X[i+1:]): j += i + 1 self.timerStart("NN_fit_bf_dist") d = dist(x,y) self.timerStop("NN_fit_bf_dist") self.timerStart("NN_fit_bf_insert") nbh.insert((j,d)) self.neighbourhoods[j].insert((i,d)) self.timerStop("NN_fit_bf_insert") self.timerStart("NN_fit_bf_toList") self.neighbourhoods[i] = nbh.toArray(lambda v: v[0]) self.timerStop("NN_fit_bf_toList") self.timerStop("NN_fit_bf_loop") def fit_bruteForce_np(self, X, nebSize=None): self.timerStart("NN_fit_bfnp_init") numOfPoints = len(X) nFeatures = len(X[0]) tX = tf.convert_to_tensor(X) def distancesTo(x): w = np.repeat([x], numOfPoints, axis=0) d = tf.keras.layers.Subtract()([w,tX]) t = tf.keras.layers.Dot(axes=(1,1))([d,d]) return t.numpy() if nebSize == None: nebSize = self.nebSize isGreaterThan = lambda x, y: x[1] > y[1] self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))] self.timerStop("NN_fit_bfnp_init") self.timerStart("NN_fit_bfnp_loop") for (i, x) in enumerate(X): self.timerStart("NN_fit_bfnp_dist") distances = distancesTo(x) self.timerStop("NN_fit_bfnp_dist") nbh = self.neighbourhoods[i] for (j, y) in enumerate(X[i+1:]): j += i + 1 d = distances[j] self.timerStart("NN_fit_bfnp_insert") nbh.insert((j,d)) self.neighbourhoods[j].insert((i,d)) self.timerStop("NN_fit_bfnp_insert") self.timerStart("NN_fit_bfnp_toList") self.neighbourhoods[i] = nbh.toArray(lambda v: v[0]) self.timerStop("NN_fit_bfnp_toList") self.timerStop("NN_fit_bfnp_loop") def fit_chained(self, X, nebSize=None): self.timerStart("NN_fit_chained_init") if nebSize == None: nebSize = self.nebSize nPoints = len(X) nFeatures = len(X[0]) neigh = NearestNeighbors(n_neighbors=nebSize) neigh.fit(X) self.timerStop("NN_fit_chained_init") self.timerStart("NN_fit_chained_toList") self.neighbourhoods = [ (neigh.kneighbors([x], nebSize, return_distance=False))[0] for (i, x) in enumerate(X) ] self.timerStop("NN_fit_chained_toList") # =============================================================== # Heuristic search # =============================================================== def fit_heuristic(self, X, nebSize=None): if nebSize == None: nebSize = self.nebSize nPoints = len(X) def walkUp(nbh, ball, x, i): while ball.parent is not None: print(f"{i}: up (r: {nbh.getMax()})") oldBall = ball ball = ball.parent for c in ball.childs: if c != oldBall: walkDown(nbh, c, x) def walkDown(nbh, ball, x): if ball is None: return print(f"{i}: down (r: {nbh.getMax()})") if dist(x, ball.center) - ball.radius < nbh.getMax()[1]: if ball.childs == []: for (j, _) in ball.points: nbh.insert((j, dist(x, X[j]))) else: for c in ball.childs: walkDown(nbh, c, x) def countBoles(b): if b is None: return 0 return 1 + sum(map(countBoles, b.childs)) root = Ball(X, range(len(X))) queue = [root] while queue != []: ball = queue[0] queue = queue[1:] if len(ball) <= nebSize: continue queue = ball.divideBall(X) + queue isGreaterThan = lambda x, y: x[1] > y[1] self.neighbourhoods = [MaxHeap(nPoints, isGreaterThan, (i, 0.0)) for i in range(len(X))] print("#B: " + str(countBoles(root))) exit() z = X[0] for (i, x) in enumerate(X): nbh = self.neighbourhoods[i] b = root.smalestBallFor(i) if b.parent is not None: b = b.parent for (j, _) in b.points: d = dist(x, X[j]) nbh.insert((j, d)) walkUp(nbh, b, x, i)