| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376 |
- import math
- import tensorflow as tf
- import numpy as np
- from sklearn.neighbors import NearestNeighbors
- from library.timing import timing
- def dist(x,y):
- return sum(map(lambda z: (z[0] - z[1])*(z[0] - z[1]), zip(x, y)))
- def maxby(data, fn, startValue=0.0):
- m = startValue
- for v in data:
- m = max(m, fn(v))
- return m
- def distancesToPoint(p, points):
- w = np.array(np.repeat([p], len(points), axis=0))
- d = tf.keras.layers.Subtract()([w, np.array(points)])
- t = tf.keras.layers.Dot(axes=(1,1))([d,d])
- # As the concrete distance is not needed and sqrt(x) is strict monotone
- # we avoid here unneccessary calculating of expensive roots.
- return t.numpy()
- def calculateCenter(points):
- if points.shape[0] == 1:
- return points[0]
- return tf.keras.layers.Average()(list(points)).numpy()
- class Ball:
- def __init__(self, points=None, indices=None, parent=None, center=None):
- if center is not None:
- self.center = center
- elif points is not None:
- self.center = calculateCenter(np.array(points))
- else:
- raise ParameterError("Missing points or center")
- self.radius = 0
- self.points = []
- self.indices = set()
- self.childs = []
- self.parent = parent
- if points is not None and indices is not None:
- for (i, r) in zip(indices, distancesToPoint(self.center, points)):
- self.add(i, r)
- elif points is not None:
- raise ParameterError("Missing indices")
- def findIt(self, r):
- if r == self.points[0][1]:
- return 0
- upper = len(self.points) - 1
- lower = 0
- while upper > lower + 1:
- h = (upper + lower) // 2
- if self.points[h][1] >= r:
- upper = h
- else:
- lower = h
- return upper
- def add(self, xi, r):
- if xi in self.indices:
- return False
- # Here we know, that x is not in points.
- newEntry = (xi, r)
- self.indices.add(xi)
- # Special case: empty list or new element will extend the radius:
- # Here we can avoid the search
- if self.points == [] or r >= self.radius:
- self.points.append(newEntry)
- self.radius = r
- return True
- # Special case: r <= min radius
- # Here we can avoid the search
- if self.points[0][1] >= r:
- self.points = [newEntry] + self.points
- return True
- # Here we know that min radius < r < max radius.
- # So len(points) >= 2.
- pos = self.findIt(r)
- # here shoul be: r(pos) >= r > r(pos - 1)
- self.points = self.points[:pos] + [newEntry] + self.points[pos: ]
- return True
- def remove(self, xi, r):
- if xi not in self.indices:
- return False
- # special case: remove the element with the heighest radius:
- if self.points[-1][0] == xi:
- self.indices.remove(xi)
- self.points = self.points[: -1]
- if self.points == []:
- self.radius = 0.0
- else:
- self.radius = self.points[-1][1]
- return True
- pos = self.findIt(r)
- nPoints = len(self.points)
- # pos is the smalest position with:
- # r(pos - 1) < r <= r(pos)
- # Just in case: check that we remove the correct index.
- while pos < nPoints and r == self.points[pos]:
- if self.points[pos][0] == xi:
- self.indices.remove(xi)
- self.points = self.points[:pos] + self.points[(pos + 1): ]
- return True
- pos += 1
- return False
- def divideBall(self, X):
- indices = [i for (i, _r) in self.points]
- points = np.array([X[i] for i in indices])
-
- distances = tf.keras.layers.Dot(axes=(1,1))([points, points]).numpy()
- ball = Ball(points, indices, center=np.zeros(len(points[0])))
- h = len(ball) // 2
- indicesA = [i for (i, _) in ball.points[:h]]
- indicesB = [i for (i, _) in ball.points[h:]]
- pointsA = np.array([X[i] for i in indicesA])
- pointsB = np.array([X[i] for i in indicesB])
- self.childs = []
- print(f"{len(points)} -> <{len(pointsA)}|{len(pointsB)}>")
- self.childs.append(Ball(pointsA, indicesA, self))
- self.childs.append(Ball(pointsB, indicesB, self))
- return self.childs
- def __len__(self):
- return len(self.points)
- def smalestBallFor(self, i):
- if i not in self.indices:
- return None
- for c in self.childs:
- b = c.smalestBallFor(i)
- if b is not None:
- return b
- return self
- class NNSearch:
- def __init__(self, nebSize=5, timingDict=None):
- self.nebSize = nebSize
- self.neighbourhoods = []
- self.timingDict = timingDict
- def timerStart(self, name):
- if self.timingDict is not None:
- if name not in self.timingDict:
- self.timingDict[name] = timing(name)
- self.timingDict[name].start()
- def timerStop(self, name):
- if self.timingDict is not None:
- if name in self.timingDict:
- self.timingDict[name].stop()
- def neighbourhoodOfItem(self, i):
- return self.neighbourhoods[i]
- def fit(self, X, nebSize=None):
- self.timerStart("NN_fit_all")
- self.fit_chained(X, nebSize)
- #self.fit_bruteForce_np(X, nebSize)
- self.timerStop("NN_fit_all")
-
- def fit_optimized(self, X, nebSize=None):
- self.timerStart("NN_fit_all")
- if nebSize == None:
- nebSize = self.nebSize
- nPoints = len(X)
- nFeatures = len(X[0])
- if nFeatures > 15 or nebSize >= (nPoints // 2):
- print("Using brute force")
- self.fit_bruteForce_np(X, nebSize)
- else:
- print("Using chained")
- self.fit_chained(X, nebSize)
- self.timerStop("NN_fit_all")
- def fit_bruteForce(self, X, nebSize=None):
- if nebSize == None:
- nebSize = self.nebSize
- self.timerStart("NN_fit_bf_init")
- isGreaterThan = lambda x, y: x[1] > y[1]
- self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))]
- self.timerStop("NN_fit_bf_init")
- self.timerStart("NN_fit_bf_loop")
- for (i, x) in enumerate(X):
- nbh = self.neighbourhoods[i]
- for (j, y) in enumerate(X[i+1:]):
- j += i + 1
- self.timerStart("NN_fit_bf_dist")
- d = dist(x,y)
- self.timerStop("NN_fit_bf_dist")
- self.timerStart("NN_fit_bf_insert")
- nbh.insert((j,d))
- self.neighbourhoods[j].insert((i,d))
- self.timerStop("NN_fit_bf_insert")
- self.timerStart("NN_fit_bf_toList")
- self.neighbourhoods[i] = nbh.toArray(lambda v: v[0])
- self.timerStop("NN_fit_bf_toList")
- self.timerStop("NN_fit_bf_loop")
- def fit_bruteForce_np(self, X, nebSize=None):
- self.timerStart("NN_fit_bfnp_init")
- numOfPoints = len(X)
- nFeatures = len(X[0])
- tX = tf.convert_to_tensor(X)
- def distancesTo(x):
- w = np.repeat([x], numOfPoints, axis=0)
- d = tf.keras.layers.Subtract()([w,tX])
- t = tf.keras.layers.Dot(axes=(1,1))([d,d])
- return t.numpy()
- if nebSize == None:
- nebSize = self.nebSize
- isGreaterThan = lambda x, y: x[1] > y[1]
- self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))]
- self.timerStop("NN_fit_bfnp_init")
- self.timerStart("NN_fit_bfnp_loop")
- for (i, x) in enumerate(X):
- self.timerStart("NN_fit_bfnp_dist")
- distances = distancesTo(x)
- self.timerStop("NN_fit_bfnp_dist")
- nbh = self.neighbourhoods[i]
- for (j, y) in enumerate(X[i+1:]):
- j += i + 1
- d = distances[j]
- self.timerStart("NN_fit_bfnp_insert")
- nbh.insert((j,d))
- self.neighbourhoods[j].insert((i,d))
- self.timerStop("NN_fit_bfnp_insert")
- self.timerStart("NN_fit_bfnp_toList")
- self.neighbourhoods[i] = nbh.toArray(lambda v: v[0])
- self.timerStop("NN_fit_bfnp_toList")
- self.timerStop("NN_fit_bfnp_loop")
- def fit_chained(self, X, nebSize=None):
- self.timerStart("NN_fit_chained_init")
- if nebSize == None:
- nebSize = self.nebSize
- nPoints = len(X)
- nFeatures = len(X[0])
- neigh = NearestNeighbors(n_neighbors=nebSize)
- neigh.fit(X)
- self.timerStop("NN_fit_chained_init")
- self.timerStart("NN_fit_chained_toList")
- self.neighbourhoods = [
- (neigh.kneighbors([x], nebSize, return_distance=False))[0]
- for (i, x) in enumerate(X)
- ]
- self.timerStop("NN_fit_chained_toList")
- # ===============================================================
- # Heuristic search
- # ===============================================================
- def fit_heuristic(self, X, nebSize=None):
- if nebSize == None:
- nebSize = self.nebSize
- nPoints = len(X)
- def walkUp(nbh, ball, x, i):
- while ball.parent is not None:
- print(f"{i}: up (r: {nbh.getMax()})")
- oldBall = ball
- ball = ball.parent
- for c in ball.childs:
- if c != oldBall:
- walkDown(nbh, c, x)
- def walkDown(nbh, ball, x):
- if ball is None:
- return
- print(f"{i}: down (r: {nbh.getMax()})")
- if dist(x, ball.center) - ball.radius < nbh.getMax()[1]:
- if ball.childs == []:
- for (j, _) in ball.points:
- nbh.insert((j, dist(x, X[j])))
- else:
- for c in ball.childs:
- walkDown(nbh, c, x)
- def countBoles(b):
- if b is None:
- return 0
- return 1 + sum(map(countBoles, b.childs))
- root = Ball(X, range(len(X)))
- queue = [root]
- while queue != []:
- ball = queue[0]
- queue = queue[1:]
- if len(ball) <= nebSize:
- continue
- queue = ball.divideBall(X) + queue
- isGreaterThan = lambda x, y: x[1] > y[1]
- self.neighbourhoods = [MaxHeap(nPoints, isGreaterThan, (i, 0.0)) for i in range(len(X))]
- print("#B: " + str(countBoles(root)))
- exit()
- z = X[0]
- for (i, x) in enumerate(X):
- nbh = self.neighbourhoods[i]
- b = root.smalestBallFor(i)
- if b.parent is not None:
- b = b.parent
- for (j, _) in b.points:
- d = dist(x, X[j])
- nbh.insert((j, d))
- walkUp(nbh, b, x, i)
|