4 yıl önce · f74901bc3c
--- a/SpeedTest.ipynb
+++ b/SpeedTest.ipynb
--- a/library/NNSearch.py
+++ b/library/NNSearch.py
@@ -1,8 +1,12 @@
 
				 import math
			
 
				 
			
 
				+import tensorflow as tf
			
 
				+import numpy as np
			
 
				+from sklearn.neighbors import NearestNeighbors
			
 
				+
			
 
				 
			
 
				 def dist(x,y):
			
 
				-    return math.sqrt(sum(map(lambda z: (z[0] - z[1])**2, zip(x, y))))
			
 
				+    return sum(map(lambda z: (z[0] - z[1])*(z[0] - z[1]), zip(x, y)))
			
 
				 
			
 
				 def maxby(data, fn, startValue=0.0):
			
 
				     m = startValue
			
@@ -10,20 +14,40 @@ def maxby(data, fn, startValue=0.0):
 
				         m = max(m, fn(v))
			
 
				     return m
			
 
				 
			
 
				+def distancesToPoint(p, points):
			
 
				+    w = np.array(np.repeat([p], len(points), axis=0))
			
 
				+    d = tf.keras.layers.Subtract()([w, np.array(points)])
			
 
				+    t = tf.keras.layers.Dot(axes=(1,1))([d,d])
			
 
				+    # As the concrete distance is not needed and sqrt(x) is strict monotone
			
 
				+    # we avoid here unneccessary calculating of expensive roots.
			
 
				+    return t.numpy()
			
 
				+
			
 
				+
			
 
				+def calculateCenter(points):
			
 
				+    if points.shape[0] == 1:
			
 
				+        return points[0]
			
 
				+    return tf.keras.layers.Average()(list(points)).numpy()
			
 
				+
			
 
				 
			
 
				 class MaxHeap:
			
 
				-    def __init__(self, maxSize=None, isGreaterThan=None, smalestValue=0.0):
			
 
				+    def __init__(self, maxSize=None, isGreaterThan=None, smalestValue=(-1,0.0)):
			
 
				         self.heap = []
			
 
				         self.size = 0
			
 
				         self.maxSize = maxSize
			
 
				         self.isGreaterThan = isGreaterThan if isGreaterThan is not None else (lambda a, b: a > b)
			
 
				         self.smalestValue = smalestValue
			
 
				+        self.indices = set()
			
 
				+        self.wasChanged = False
			
 
				+        self.insert(smalestValue)
			
 
				 
			
 
				     def insert(self, v):
			
 
				         if self.maxSize is not None and self.size >= self.maxSize:
			
 
				-            self.replaceMax(v)
			
 
				-            return
			
 
				+            return self.replaceMax(v)
			
 
				+
			
 
				+        if v[0] in self.indices:
			
 
				+            return False
			
 
				 
			
 
				+        self.indices.add(v[0])
			
 
				         pos = self.size
			
 
				         self.size += 1
			
 
				         self.heap.append(v)
			
@@ -34,6 +58,8 @@ class MaxHeap:
 
				             self.heap[pos] = w
			
 
				             pos = pos // 2
			
 
				             self.heap[pos] = v
			
 
				+        self.wasChanged = True
			
 
				+        return True
			
 
				 
			
 
				 
			
 
				     def childPos(self, pos):
			
@@ -44,8 +70,11 @@ class MaxHeap:
 
				     def removeMax(self):
			
 
				         if self.heap == []:
			
 
				             self.size = 0
			
 
				-            return
			
 
				+            return False
			
 
				         
			
 
				+        x = self.heap[0]
			
 
				+        self.indices.remove(x[0])
			
 
				+
			
 
				         self.heap[0] = self.heap[-1]
			
 
				         self.heap = self.heap[:-1]
			
 
				         self.size -= 1
			
@@ -77,17 +106,26 @@ class MaxHeap:
 
				             self.heap[best] = x
			
 
				             pos = best
			
 
				 
			
 
				+        self.wasChanged = True
			
 
				+        return True
			
 
				+
			
 
				 
			
 
				     def replaceMax(self, x):
			
 
				         if self.heap == []:
			
 
				             self.heap = [x]
			
 
				             self.size = 1
			
 
				-            return
			
 
				+            self.indices.add(x[0])
			
 
				+            self.wasChanged = True
			
 
				+            return True
			
 
				         
			
 
				+        if x[0] in self.indices:
			
 
				+            return False
			
 
				 
			
 
				         if self.isGreaterThan(x, self.heap[0]):
			
 
				-            return
			
 
				+            return False
			
 
				 
			
 
				+        self.indices.remove((self.heap[0])[0])
			
 
				+        self.indices.add(x[0])
			
 
				         self.heap[0] = x
			
 
				         pos = 0
			
 
				         size = len(self.heap)
			
@@ -115,17 +153,22 @@ class MaxHeap:
 
				             self.heap[best] = x
			
 
				             pos = best
			
 
				 
			
 
				+        self.wasChanged = True
			
 
				+        return True
			
 
				+
			
 
				     def getMax(self):
			
 
				         if self.heap == []:
			
 
				             return self.smalestValue
			
 
				         return self.heap[0]
			
 
				 
			
 
				 
			
 
				+    def setMaxSize(self, maxSize):
			
 
				+        self.maxSize = maxSize
			
 
				+        while self.size > maxSize:
			
 
				+            self.removeMax()
			
 
				+
			
 
				     def toArray(self, mapFn=None):
			
 
				-        if mapFn is None:
			
 
				-            return self.heap.copy()
			
 
				-        else:
			
 
				-            return [mapFn(x) for x in self.heap]
			
 
				+            return list(self.indices)
			
 
				 
			
 
				 
			
 
				     def length(self):
			
@@ -134,22 +177,176 @@ class MaxHeap:
 
				 
			
 
				 
			
 
				 
			
 
				+class Ball:
			
 
				+    def __init__(self, points=None, indices=None, parent=None, center=None):
			
 
				+        if center is not None:
			
 
				+            self.center = center
			
 
				+        elif points is not None:
			
 
				+            self.center = calculateCenter(np.array(points))
			
 
				+        else:
			
 
				+            raise ParameterError("Missing points or center")
			
 
				+        self.radius = 0
			
 
				+        self.points = []
			
 
				+        self.indices = set()
			
 
				+        self.childs = []
			
 
				+        self.parent = parent
			
 
				+
			
 
				+        if points is not None and indices is not None:
			
 
				+            for (i, r) in zip(indices, distancesToPoint(self.center, points)):
			
 
				+                self.add(i, r)
			
 
				+        elif points is not None:
			
 
				+            raise ParameterError("Missing indices")
			
 
				+
			
 
				+    def findIt(self, r):
			
 
				+        if r == self.points[0][1]:
			
 
				+            return 0
			
 
				+
			
 
				+        upper = len(self.points) - 1
			
 
				+        lower = 0
			
 
				+        while upper > lower + 1:
			
 
				+            h = (upper + lower) // 2
			
 
				+            if self.points[h][1] >= r:
			
 
				+                upper = h
			
 
				+            else:
			
 
				+                lower = h
			
 
				+        return upper
			
 
				+
			
 
				+
			
 
				+    def add(self, xi, r):
			
 
				+        if xi in self.indices:
			
 
				+            return False
			
 
				+
			
 
				+        # Here we know, that x is not in points.
			
 
				+        newEntry = (xi, r)
			
 
				+        self.indices.add(xi)
			
 
				+
			
 
				+        # Special case: empty list or new element will extend the radius:
			
 
				+        # Here we can avoid the search
			
 
				+        if self.points == [] or r >= self.radius:
			
 
				+            self.points.append(newEntry)
			
 
				+            self.radius = r
			
 
				+            return True
			
 
				+
			
 
				+        # Special case: r <= min radius
			
 
				+        # Here we can avoid the search
			
 
				+        if self.points[0][1] >= r:
			
 
				+            self.points = [newEntry] + self.points
			
 
				+            return True
			
 
				+
			
 
				+        # Here we know that min radius < r < max radius.
			
 
				+        # So len(points) >= 2.
			
 
				+
			
 
				+        pos = self.findIt(r)
			
 
				+
			
 
				+        # here shoul be: r(pos) >= r > r(pos - 1)
			
 
				+        self.points = self.points[:pos] + [newEntry] + self.points[pos: ]
			
 
				+
			
 
				+        return True
			
 
				+
			
 
				+    def remove(self, xi, r):
			
 
				+        if xi not in self.indices:
			
 
				+            return False
			
 
				+
			
 
				+
			
 
				+        # special case: remove the element with the heighest radius:
			
 
				+        if self.points[-1][0] == xi:
			
 
				+            self.indices.remove(xi)
			
 
				+            self.points = self.points[: -1]
			
 
				+            if self.points == []:
			
 
				+                self.radius = 0.0
			
 
				+            else:
			
 
				+                self.radius = self.points[-1][1]
			
 
				+            return True
			
 
				+
			
 
				+        pos = self.findIt(r)
			
 
				+        nPoints = len(self.points)
			
 
				+        # pos is the smalest position with:
			
 
				+        # r(pos - 1) < r <= r(pos)
			
 
				+
			
 
				+        # Just in case: check that we remove the correct index.
			
 
				+        while pos < nPoints and r == self.points[pos]:
			
 
				+            if self.points[pos][0] == xi:
			
 
				+                self.indices.remove(xi)
			
 
				+                self.points = self.points[:pos] + self.points[(pos + 1): ]
			
 
				+                return True
			
 
				+            pos += 1
			
 
				+
			
 
				+        return False
			
 
				+
			
 
				+    def divideBall(self, X):
			
 
				+        indices = [i for (i, _r) in self.points]
			
 
				+        points = np.array([X[i] for i in indices])
			
 
				+        
			
 
				+        distances = tf.keras.layers.Dot(axes=(1,1))([points, points]).numpy()
			
 
				+        ball = Ball(points, indices, center=np.zeros(len(points[0])))
			
 
				+
			
 
				+        h = len(ball) // 2
			
 
				+        indicesA = [i for (i, _) in ball.points[:h]] 
			
 
				+        indicesB = [i for (i, _) in ball.points[h:]] 
			
 
				+        pointsA = np.array([X[i] for i in indicesA])
			
 
				+        pointsB = np.array([X[i] for i in indicesB])
			
 
				+
			
 
				+        self.childs = []
			
 
				+
			
 
				+        print(f"{len(points)} -> <{len(pointsA)}|{len(pointsB)}>")
			
 
				+        self.childs.append(Ball(pointsA, indicesA, self))
			
 
				+        self.childs.append(Ball(pointsB, indicesB, self))
			
 
				+
			
 
				+        return self.childs
			
 
				+
			
 
				+    def __len__(self):
			
 
				+        return len(self.points)
			
 
				+
			
 
				+    def smalestBallFor(self, i):
			
 
				+        if i not in self.indices:
			
 
				+            return None
			
 
				+
			
 
				+        for c in self.childs:
			
 
				+            b = c.smalestBallFor(i)
			
 
				+            if b is not None:
			
 
				+                return b
			
 
				+
			
 
				+        return self
			
 
				+
			
 
				+
			
 
				 
			
 
				 class NNSearch:
			
 
				     def __init__(self, nebSize=5):
			
 
				         self.nebSize = nebSize
			
 
				         self.neighbourhoods = []
			
 
				 
			
 
				+
			
 
				+    def neighbourhoodOfItem(self, i):
			
 
				+        return self.neighbourhoods[i]
			
 
				+
			
 
				+
			
 
				     def fit(self, X, nebSize=None):
			
 
				+        self.fit_bruteForce_np(X, nebSize)
			
 
				+       
			
 
				+    def fit_optimized(self, X, nebSize=None):
			
 
				+        if nebSize == None:
			
 
				+            nebSize = self.nebSize
			
 
				+
			
 
				+        nPoints = len(X)
			
 
				+        nFeatures = len(X[0])
			
 
				+
			
 
				+        if nFeatures > 15 or nebSize >= (nPoints // 2):
			
 
				+            print("Using brute force")
			
 
				+            self.fit_bruteForce_np(X, nebSize)
			
 
				+        else:
			
 
				+            print("Using chained")
			
 
				+            self.fit_chained(X, nebSize)
			
 
				+
			
 
				+
			
 
				+    def fit_bruteForce(self, X, nebSize=None):
			
 
				         if nebSize == None:
			
 
				             nebSize = self.nebSize
			
 
				 
			
 
				         isGreaterThan = lambda x, y: x[1] > y[1]
			
 
				-        self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (None, 0.0)) for _i in range(len(X))]
			
 
				+        self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))]
			
 
				 
			
 
				         for (i, x) in enumerate(X):
			
 
				             nbh = self.neighbourhoods[i]
			
 
				-            nbh.insert((i, 0.0))
			
 
				 
			
 
				             for (j, y) in enumerate(X[i+1:]):
			
 
				                 j += i + 1
			
@@ -160,5 +357,121 @@ class NNSearch:
 
				             self.neighbourhoods[i] = nbh.toArray(lambda v: v[0])
			
 
				 
			
 
				 
			
 
				-    def neighbourhoodOfItem(self, i):
			
 
				-        return self.neighbourhoods[i]
			
 
				+    def fit_bruteForce_np(self, X, nebSize=None):
			
 
				+        numOfPoints = len(X)
			
 
				+        nFeatures = len(X[0])
			
 
				+
			
 
				+        tX = tf.convert_to_tensor(X)
			
 
				+
			
 
				+        def distancesTo(x):
			
 
				+            w = np.repeat([x], numOfPoints, axis=0)
			
 
				+            d = tf.keras.layers.Subtract()([w,tX])
			
 
				+            t = tf.keras.layers.Dot(axes=(1,1))([d,d])
			
 
				+            return t.numpy()
			
 
				+
			
 
				+        if nebSize == None:
			
 
				+            nebSize = self.nebSize
			
 
				+
			
 
				+        isGreaterThan = lambda x, y: x[1] > y[1]
			
 
				+        self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))]
			
 
				+
			
 
				+        for (i, x) in enumerate(X):
			
 
				+            distances = distancesTo(x)
			
 
				+            nbh = self.neighbourhoods[i]
			
 
				+
			
 
				+            for (j, y) in enumerate(X[i+1:]):
			
 
				+                j += i + 1
			
 
				+                d = distances[j]
			
 
				+                nbh.insert((j,d))
			
 
				+                self.neighbourhoods[j].insert((i,d))
			
 
				+
			
 
				+            self.neighbourhoods[i] = nbh.toArray(lambda v: v[0])
			
 
				+
			
 
				+
			
 
				+    def fit_chained(self, X, nebSize=None):
			
 
				+        if nebSize == None:
			
 
				+            nebSize = self.nebSize
			
 
				+
			
 
				+        nPoints = len(X)
			
 
				+        nFeatures = len(X[0])
			
 
				+
			
 
				+        neigh = NearestNeighbors(n_neighbors=nebSize)
			
 
				+        neigh.fit(X)
			
 
				+        self.neighbourhoods = [
			
 
				+                (neigh.kneighbors([x], nebSize, return_distance=False))[0]
			
 
				+                for (i, x) in enumerate(X)
			
 
				+                ]
			
 
				+
			
 
				+
			
 
				+    # ===============================================================
			
 
				+    # Heuristic search
			
 
				+    # ===============================================================
			
 
				+    def fit_heuristic(self, X, nebSize=None):
			
 
				+        if nebSize == None:
			
 
				+            nebSize = self.nebSize
			
 
				+
			
 
				+        nPoints = len(X)
			
 
				+
			
 
				+
			
 
				+        def walkUp(nbh, ball, x, i):
			
 
				+            while ball.parent is not None:
			
 
				+                print(f"{i}: up (r: {nbh.getMax()})")
			
 
				+                oldBall = ball
			
 
				+                ball = ball.parent
			
 
				+                for c in ball.childs:
			
 
				+                    if c != oldBall:
			
 
				+                        walkDown(nbh, c, x)
			
 
				+
			
 
				+        def walkDown(nbh, ball, x):
			
 
				+            if ball is None:
			
 
				+                return
			
 
				+
			
 
				+            print(f"{i}: down (r: {nbh.getMax()})")
			
 
				+
			
 
				+            if dist(x, ball.center) - ball.radius < nbh.getMax()[1]:
			
 
				+                if ball.childs == []:
			
 
				+                    for (j, _) in ball.points:
			
 
				+                        nbh.insert((j, dist(x, X[j])))
			
 
				+                else:
			
 
				+                    for c in ball.childs:
			
 
				+                        walkDown(nbh, c, x)
			
 
				+
			
 
				+
			
 
				+        def countBoles(b):
			
 
				+            if b is None:
			
 
				+                return 0
			
 
				+
			
 
				+            return 1 + sum(map(countBoles, b.childs))
			
 
				+
			
 
				+
			
 
				+
			
 
				+        root = Ball(X, range(len(X)))
			
 
				+        queue = [root]
			
 
				+        while queue != []:
			
 
				+            ball = queue[0]
			
 
				+            queue = queue[1:]
			
 
				+            if len(ball) <= nebSize:
			
 
				+                continue
			
 
				+
			
 
				+            queue = ball.divideBall(X) + queue
			
 
				+
			
 
				+
			
 
				+        isGreaterThan = lambda x, y: x[1] > y[1]
			
 
				+        self.neighbourhoods = [MaxHeap(nPoints, isGreaterThan, (i, 0.0)) for i in range(len(X))]
			
 
				+
			
 
				+        print("#B: " + str(countBoles(root)))
			
 
				+
			
 
				+        exit()
			
 
				+        z = X[0]
			
 
				+        for (i, x) in enumerate(X):
			
 
				+            nbh = self.neighbourhoods[i]
			
 
				+
			
 
				+            b = root.smalestBallFor(i)
			
 
				+            if b.parent is not None:
			
 
				+                b = b.parent
			
 
				+
			
 
				+            for (j, _) in b.points:
			
 
				+                d = dist(x, X[j])
			
 
				+                nbh.insert((j, d))
			
 
				+
			
 
				+            walkUp(nbh, b, x, i)