|
|
@@ -1,8 +1,12 @@
|
|
|
import math
|
|
|
|
|
|
+import tensorflow as tf
|
|
|
+import numpy as np
|
|
|
+from sklearn.neighbors import NearestNeighbors
|
|
|
+
|
|
|
|
|
|
def dist(x,y):
|
|
|
- return math.sqrt(sum(map(lambda z: (z[0] - z[1])**2, zip(x, y))))
|
|
|
+ return sum(map(lambda z: (z[0] - z[1])*(z[0] - z[1]), zip(x, y)))
|
|
|
|
|
|
def maxby(data, fn, startValue=0.0):
|
|
|
m = startValue
|
|
|
@@ -10,20 +14,40 @@ def maxby(data, fn, startValue=0.0):
|
|
|
m = max(m, fn(v))
|
|
|
return m
|
|
|
|
|
|
+def distancesToPoint(p, points):
|
|
|
+ w = np.array(np.repeat([p], len(points), axis=0))
|
|
|
+ d = tf.keras.layers.Subtract()([w, np.array(points)])
|
|
|
+ t = tf.keras.layers.Dot(axes=(1,1))([d,d])
|
|
|
+ # As the concrete distance is not needed and sqrt(x) is strict monotone
|
|
|
+ # we avoid here unneccessary calculating of expensive roots.
|
|
|
+ return t.numpy()
|
|
|
+
|
|
|
+
|
|
|
+def calculateCenter(points):
|
|
|
+ if points.shape[0] == 1:
|
|
|
+ return points[0]
|
|
|
+ return tf.keras.layers.Average()(list(points)).numpy()
|
|
|
+
|
|
|
|
|
|
class MaxHeap:
|
|
|
- def __init__(self, maxSize=None, isGreaterThan=None, smalestValue=0.0):
|
|
|
+ def __init__(self, maxSize=None, isGreaterThan=None, smalestValue=(-1,0.0)):
|
|
|
self.heap = []
|
|
|
self.size = 0
|
|
|
self.maxSize = maxSize
|
|
|
self.isGreaterThan = isGreaterThan if isGreaterThan is not None else (lambda a, b: a > b)
|
|
|
self.smalestValue = smalestValue
|
|
|
+ self.indices = set()
|
|
|
+ self.wasChanged = False
|
|
|
+ self.insert(smalestValue)
|
|
|
|
|
|
def insert(self, v):
|
|
|
if self.maxSize is not None and self.size >= self.maxSize:
|
|
|
- self.replaceMax(v)
|
|
|
- return
|
|
|
+ return self.replaceMax(v)
|
|
|
+
|
|
|
+ if v[0] in self.indices:
|
|
|
+ return False
|
|
|
|
|
|
+ self.indices.add(v[0])
|
|
|
pos = self.size
|
|
|
self.size += 1
|
|
|
self.heap.append(v)
|
|
|
@@ -34,6 +58,8 @@ class MaxHeap:
|
|
|
self.heap[pos] = w
|
|
|
pos = pos // 2
|
|
|
self.heap[pos] = v
|
|
|
+ self.wasChanged = True
|
|
|
+ return True
|
|
|
|
|
|
|
|
|
def childPos(self, pos):
|
|
|
@@ -44,8 +70,11 @@ class MaxHeap:
|
|
|
def removeMax(self):
|
|
|
if self.heap == []:
|
|
|
self.size = 0
|
|
|
- return
|
|
|
+ return False
|
|
|
|
|
|
+ x = self.heap[0]
|
|
|
+ self.indices.remove(x[0])
|
|
|
+
|
|
|
self.heap[0] = self.heap[-1]
|
|
|
self.heap = self.heap[:-1]
|
|
|
self.size -= 1
|
|
|
@@ -77,17 +106,26 @@ class MaxHeap:
|
|
|
self.heap[best] = x
|
|
|
pos = best
|
|
|
|
|
|
+ self.wasChanged = True
|
|
|
+ return True
|
|
|
+
|
|
|
|
|
|
def replaceMax(self, x):
|
|
|
if self.heap == []:
|
|
|
self.heap = [x]
|
|
|
self.size = 1
|
|
|
- return
|
|
|
+ self.indices.add(x[0])
|
|
|
+ self.wasChanged = True
|
|
|
+ return True
|
|
|
|
|
|
+ if x[0] in self.indices:
|
|
|
+ return False
|
|
|
|
|
|
if self.isGreaterThan(x, self.heap[0]):
|
|
|
- return
|
|
|
+ return False
|
|
|
|
|
|
+ self.indices.remove((self.heap[0])[0])
|
|
|
+ self.indices.add(x[0])
|
|
|
self.heap[0] = x
|
|
|
pos = 0
|
|
|
size = len(self.heap)
|
|
|
@@ -115,17 +153,22 @@ class MaxHeap:
|
|
|
self.heap[best] = x
|
|
|
pos = best
|
|
|
|
|
|
+ self.wasChanged = True
|
|
|
+ return True
|
|
|
+
|
|
|
def getMax(self):
|
|
|
if self.heap == []:
|
|
|
return self.smalestValue
|
|
|
return self.heap[0]
|
|
|
|
|
|
|
|
|
+ def setMaxSize(self, maxSize):
|
|
|
+ self.maxSize = maxSize
|
|
|
+ while self.size > maxSize:
|
|
|
+ self.removeMax()
|
|
|
+
|
|
|
def toArray(self, mapFn=None):
|
|
|
- if mapFn is None:
|
|
|
- return self.heap.copy()
|
|
|
- else:
|
|
|
- return [mapFn(x) for x in self.heap]
|
|
|
+ return list(self.indices)
|
|
|
|
|
|
|
|
|
def length(self):
|
|
|
@@ -134,22 +177,176 @@ class MaxHeap:
|
|
|
|
|
|
|
|
|
|
|
|
+class Ball:
|
|
|
+ def __init__(self, points=None, indices=None, parent=None, center=None):
|
|
|
+ if center is not None:
|
|
|
+ self.center = center
|
|
|
+ elif points is not None:
|
|
|
+ self.center = calculateCenter(np.array(points))
|
|
|
+ else:
|
|
|
+ raise ParameterError("Missing points or center")
|
|
|
+ self.radius = 0
|
|
|
+ self.points = []
|
|
|
+ self.indices = set()
|
|
|
+ self.childs = []
|
|
|
+ self.parent = parent
|
|
|
+
|
|
|
+ if points is not None and indices is not None:
|
|
|
+ for (i, r) in zip(indices, distancesToPoint(self.center, points)):
|
|
|
+ self.add(i, r)
|
|
|
+ elif points is not None:
|
|
|
+ raise ParameterError("Missing indices")
|
|
|
+
|
|
|
+ def findIt(self, r):
|
|
|
+ if r == self.points[0][1]:
|
|
|
+ return 0
|
|
|
+
|
|
|
+ upper = len(self.points) - 1
|
|
|
+ lower = 0
|
|
|
+ while upper > lower + 1:
|
|
|
+ h = (upper + lower) // 2
|
|
|
+ if self.points[h][1] >= r:
|
|
|
+ upper = h
|
|
|
+ else:
|
|
|
+ lower = h
|
|
|
+ return upper
|
|
|
+
|
|
|
+
|
|
|
+ def add(self, xi, r):
|
|
|
+ if xi in self.indices:
|
|
|
+ return False
|
|
|
+
|
|
|
+ # Here we know, that x is not in points.
|
|
|
+ newEntry = (xi, r)
|
|
|
+ self.indices.add(xi)
|
|
|
+
|
|
|
+ # Special case: empty list or new element will extend the radius:
|
|
|
+ # Here we can avoid the search
|
|
|
+ if self.points == [] or r >= self.radius:
|
|
|
+ self.points.append(newEntry)
|
|
|
+ self.radius = r
|
|
|
+ return True
|
|
|
+
|
|
|
+ # Special case: r <= min radius
|
|
|
+ # Here we can avoid the search
|
|
|
+ if self.points[0][1] >= r:
|
|
|
+ self.points = [newEntry] + self.points
|
|
|
+ return True
|
|
|
+
|
|
|
+ # Here we know that min radius < r < max radius.
|
|
|
+ # So len(points) >= 2.
|
|
|
+
|
|
|
+ pos = self.findIt(r)
|
|
|
+
|
|
|
+ # here shoul be: r(pos) >= r > r(pos - 1)
|
|
|
+ self.points = self.points[:pos] + [newEntry] + self.points[pos: ]
|
|
|
+
|
|
|
+ return True
|
|
|
+
|
|
|
+ def remove(self, xi, r):
|
|
|
+ if xi not in self.indices:
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
+ # special case: remove the element with the heighest radius:
|
|
|
+ if self.points[-1][0] == xi:
|
|
|
+ self.indices.remove(xi)
|
|
|
+ self.points = self.points[: -1]
|
|
|
+ if self.points == []:
|
|
|
+ self.radius = 0.0
|
|
|
+ else:
|
|
|
+ self.radius = self.points[-1][1]
|
|
|
+ return True
|
|
|
+
|
|
|
+ pos = self.findIt(r)
|
|
|
+ nPoints = len(self.points)
|
|
|
+ # pos is the smalest position with:
|
|
|
+ # r(pos - 1) < r <= r(pos)
|
|
|
+
|
|
|
+ # Just in case: check that we remove the correct index.
|
|
|
+ while pos < nPoints and r == self.points[pos]:
|
|
|
+ if self.points[pos][0] == xi:
|
|
|
+ self.indices.remove(xi)
|
|
|
+ self.points = self.points[:pos] + self.points[(pos + 1): ]
|
|
|
+ return True
|
|
|
+ pos += 1
|
|
|
+
|
|
|
+ return False
|
|
|
+
|
|
|
+ def divideBall(self, X):
|
|
|
+ indices = [i for (i, _r) in self.points]
|
|
|
+ points = np.array([X[i] for i in indices])
|
|
|
+
|
|
|
+ distances = tf.keras.layers.Dot(axes=(1,1))([points, points]).numpy()
|
|
|
+ ball = Ball(points, indices, center=np.zeros(len(points[0])))
|
|
|
+
|
|
|
+ h = len(ball) // 2
|
|
|
+ indicesA = [i for (i, _) in ball.points[:h]]
|
|
|
+ indicesB = [i for (i, _) in ball.points[h:]]
|
|
|
+ pointsA = np.array([X[i] for i in indicesA])
|
|
|
+ pointsB = np.array([X[i] for i in indicesB])
|
|
|
+
|
|
|
+ self.childs = []
|
|
|
+
|
|
|
+ print(f"{len(points)} -> <{len(pointsA)}|{len(pointsB)}>")
|
|
|
+ self.childs.append(Ball(pointsA, indicesA, self))
|
|
|
+ self.childs.append(Ball(pointsB, indicesB, self))
|
|
|
+
|
|
|
+ return self.childs
|
|
|
+
|
|
|
+ def __len__(self):
|
|
|
+ return len(self.points)
|
|
|
+
|
|
|
+ def smalestBallFor(self, i):
|
|
|
+ if i not in self.indices:
|
|
|
+ return None
|
|
|
+
|
|
|
+ for c in self.childs:
|
|
|
+ b = c.smalestBallFor(i)
|
|
|
+ if b is not None:
|
|
|
+ return b
|
|
|
+
|
|
|
+ return self
|
|
|
+
|
|
|
+
|
|
|
|
|
|
class NNSearch:
|
|
|
def __init__(self, nebSize=5):
|
|
|
self.nebSize = nebSize
|
|
|
self.neighbourhoods = []
|
|
|
|
|
|
+
|
|
|
+ def neighbourhoodOfItem(self, i):
|
|
|
+ return self.neighbourhoods[i]
|
|
|
+
|
|
|
+
|
|
|
def fit(self, X, nebSize=None):
|
|
|
+ self.fit_bruteForce_np(X, nebSize)
|
|
|
+
|
|
|
+ def fit_optimized(self, X, nebSize=None):
|
|
|
+ if nebSize == None:
|
|
|
+ nebSize = self.nebSize
|
|
|
+
|
|
|
+ nPoints = len(X)
|
|
|
+ nFeatures = len(X[0])
|
|
|
+
|
|
|
+ if nFeatures > 15 or nebSize >= (nPoints // 2):
|
|
|
+ print("Using brute force")
|
|
|
+ self.fit_bruteForce_np(X, nebSize)
|
|
|
+ else:
|
|
|
+ print("Using chained")
|
|
|
+ self.fit_chained(X, nebSize)
|
|
|
+
|
|
|
+
|
|
|
+ def fit_bruteForce(self, X, nebSize=None):
|
|
|
if nebSize == None:
|
|
|
nebSize = self.nebSize
|
|
|
|
|
|
isGreaterThan = lambda x, y: x[1] > y[1]
|
|
|
- self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (None, 0.0)) for _i in range(len(X))]
|
|
|
+ self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))]
|
|
|
|
|
|
for (i, x) in enumerate(X):
|
|
|
nbh = self.neighbourhoods[i]
|
|
|
- nbh.insert((i, 0.0))
|
|
|
|
|
|
for (j, y) in enumerate(X[i+1:]):
|
|
|
j += i + 1
|
|
|
@@ -160,5 +357,121 @@ class NNSearch:
|
|
|
self.neighbourhoods[i] = nbh.toArray(lambda v: v[0])
|
|
|
|
|
|
|
|
|
- def neighbourhoodOfItem(self, i):
|
|
|
- return self.neighbourhoods[i]
|
|
|
+ def fit_bruteForce_np(self, X, nebSize=None):
|
|
|
+ numOfPoints = len(X)
|
|
|
+ nFeatures = len(X[0])
|
|
|
+
|
|
|
+ tX = tf.convert_to_tensor(X)
|
|
|
+
|
|
|
+ def distancesTo(x):
|
|
|
+ w = np.repeat([x], numOfPoints, axis=0)
|
|
|
+ d = tf.keras.layers.Subtract()([w,tX])
|
|
|
+ t = tf.keras.layers.Dot(axes=(1,1))([d,d])
|
|
|
+ return t.numpy()
|
|
|
+
|
|
|
+ if nebSize == None:
|
|
|
+ nebSize = self.nebSize
|
|
|
+
|
|
|
+ isGreaterThan = lambda x, y: x[1] > y[1]
|
|
|
+ self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))]
|
|
|
+
|
|
|
+ for (i, x) in enumerate(X):
|
|
|
+ distances = distancesTo(x)
|
|
|
+ nbh = self.neighbourhoods[i]
|
|
|
+
|
|
|
+ for (j, y) in enumerate(X[i+1:]):
|
|
|
+ j += i + 1
|
|
|
+ d = distances[j]
|
|
|
+ nbh.insert((j,d))
|
|
|
+ self.neighbourhoods[j].insert((i,d))
|
|
|
+
|
|
|
+ self.neighbourhoods[i] = nbh.toArray(lambda v: v[0])
|
|
|
+
|
|
|
+
|
|
|
+ def fit_chained(self, X, nebSize=None):
|
|
|
+ if nebSize == None:
|
|
|
+ nebSize = self.nebSize
|
|
|
+
|
|
|
+ nPoints = len(X)
|
|
|
+ nFeatures = len(X[0])
|
|
|
+
|
|
|
+ neigh = NearestNeighbors(n_neighbors=nebSize)
|
|
|
+ neigh.fit(X)
|
|
|
+ self.neighbourhoods = [
|
|
|
+ (neigh.kneighbors([x], nebSize, return_distance=False))[0]
|
|
|
+ for (i, x) in enumerate(X)
|
|
|
+ ]
|
|
|
+
|
|
|
+
|
|
|
+ # ===============================================================
|
|
|
+ # Heuristic search
|
|
|
+ # ===============================================================
|
|
|
+ def fit_heuristic(self, X, nebSize=None):
|
|
|
+ if nebSize == None:
|
|
|
+ nebSize = self.nebSize
|
|
|
+
|
|
|
+ nPoints = len(X)
|
|
|
+
|
|
|
+
|
|
|
+ def walkUp(nbh, ball, x, i):
|
|
|
+ while ball.parent is not None:
|
|
|
+ print(f"{i}: up (r: {nbh.getMax()})")
|
|
|
+ oldBall = ball
|
|
|
+ ball = ball.parent
|
|
|
+ for c in ball.childs:
|
|
|
+ if c != oldBall:
|
|
|
+ walkDown(nbh, c, x)
|
|
|
+
|
|
|
+ def walkDown(nbh, ball, x):
|
|
|
+ if ball is None:
|
|
|
+ return
|
|
|
+
|
|
|
+ print(f"{i}: down (r: {nbh.getMax()})")
|
|
|
+
|
|
|
+ if dist(x, ball.center) - ball.radius < nbh.getMax()[1]:
|
|
|
+ if ball.childs == []:
|
|
|
+ for (j, _) in ball.points:
|
|
|
+ nbh.insert((j, dist(x, X[j])))
|
|
|
+ else:
|
|
|
+ for c in ball.childs:
|
|
|
+ walkDown(nbh, c, x)
|
|
|
+
|
|
|
+
|
|
|
+ def countBoles(b):
|
|
|
+ if b is None:
|
|
|
+ return 0
|
|
|
+
|
|
|
+ return 1 + sum(map(countBoles, b.childs))
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ root = Ball(X, range(len(X)))
|
|
|
+ queue = [root]
|
|
|
+ while queue != []:
|
|
|
+ ball = queue[0]
|
|
|
+ queue = queue[1:]
|
|
|
+ if len(ball) <= nebSize:
|
|
|
+ continue
|
|
|
+
|
|
|
+ queue = ball.divideBall(X) + queue
|
|
|
+
|
|
|
+
|
|
|
+ isGreaterThan = lambda x, y: x[1] > y[1]
|
|
|
+ self.neighbourhoods = [MaxHeap(nPoints, isGreaterThan, (i, 0.0)) for i in range(len(X))]
|
|
|
+
|
|
|
+ print("#B: " + str(countBoles(root)))
|
|
|
+
|
|
|
+ exit()
|
|
|
+ z = X[0]
|
|
|
+ for (i, x) in enumerate(X):
|
|
|
+ nbh = self.neighbourhoods[i]
|
|
|
+
|
|
|
+ b = root.smalestBallFor(i)
|
|
|
+ if b.parent is not None:
|
|
|
+ b = b.parent
|
|
|
+
|
|
|
+ for (j, _) in b.points:
|
|
|
+ d = dist(x, X[j])
|
|
|
+ nbh.insert((j, d))
|
|
|
+
|
|
|
+ walkUp(nbh, b, x, i)
|