4 år sedan · 66588d2558
--- a/library/NNSearch_experimental.py
+++ b/library/NNSearch_experimental.py
@@ -2,6 +2,7 @@ import math
 
				 from ctypes import cdll, c_uint, c_double, c_void_p
			
 
				 
			
 
				 import tensorflow as tf
			
 
				+import tensorflow.keras.layers as L
			
 
				 import numpy as np
			
 
				 import numpy.ctypeslib as npct
			
 
				 
			
@@ -23,8 +24,15 @@ nbhLib.NeighborhoodHeuristic.rettype = None
 
				 nbhLib.NeighborhoodHeuristic.argtypes = [c_uint, c_uint, c_uint, array_2d_double, array_2d_uint]
			
 
				 
			
 
				 
			
 
				+
			
 
				+def scalarP(a, b):
			
 
				+    return sum(map(lambda c: c[0] * c[1], zip(a, b)))
			
 
				+
			
 
				+def norm2(v):
			
 
				+    return sum(map(lambda z: z*z, v))
			
 
				+
			
 
				 def dist(x,y):
			
 
				-    return sum(map(lambda z: (z[0] - z[1])*(z[0] - z[1]), zip(x, y)))
			
 
				+    return norm2(x - y)
			
 
				 
			
 
				 def maxby(data, fn, startValue=0.0):
			
 
				     m = startValue
			
@@ -34,8 +42,8 @@ def maxby(data, fn, startValue=0.0):
 
				 
			
 
				 def distancesToPoint(p, points):
			
 
				     w = np.array(np.repeat([p], len(points), axis=0))
			
 
				-    d = tf.keras.layers.Subtract()([w, np.array(points)])
			
 
				-    t = tf.keras.layers.Dot(axes=(1,1))([d,d])
			
 
				+    d = L.Subtract()([w, np.array(points)])
			
 
				+    t = L.Dot(axes=(1,1))([d,d])
			
 
				     # As the concrete distance is not needed and sqrt(x) is strict monotone
			
 
				     # we avoid here unneccessary calculating of expensive roots.
			
 
				     return t.numpy()
			
@@ -44,11 +52,45 @@ def distancesToPoint(p, points):
 
				 def calculateCenter(points):
			
 
				     if points.shape[0] == 1:
			
 
				         return points[0]
			
 
				-    return tf.keras.layers.Average()(list(points)).numpy()
			
 
				+    return tf.keras.layers.Average()(np.array(points)).numpy()
			
 
				+
			
 
				+
			
 
				+def centerPoints(points):
			
 
				+    points = np.array(points)
			
 
				+    center = L.Average()(list(points)).numpy()
			
 
				+    ctr = np.array(np.repeat([center], points.shape[0], axis=0))
			
 
				+    return L.Subtract()([ctr, points]).numpy()
			
 
				+    
			
 
				+
			
 
				+def maxNormPoints(points):
			
 
				+    points = np.array(points)
			
 
				+    a = L.Lambda(lambda x: np.abs(x))(points)
			
 
				+    a = L.Reshape((points.shape[1], 1))(a)
			
 
				+    m = L.GlobalMaxPooling1D()(a)
			
 
				+    m = L.Reshape((1,))
			
 
				+    return m.numpy()
			
 
				+
			
 
				 
			
 
				+def twoNormSquaredPoints(points):
			
 
				+    points = np.array(points)
			
 
				+    return L.Dot(axes=(1,1))([points,points]).numpy()
			
 
				+    
			
 
				 
			
 
				+def twoNormPoints(points):
			
 
				+    points = np.array(points)
			
 
				+    nsq = L.Dot(axes=(1,1))([points,points])
			
 
				+    return L.Lambda(lambda x: np.sqrt(x))(points).numpy()
			
 
				 
			
 
				 
			
 
				+def norms(points):
			
 
				+    points = np.array(points)
			
 
				+    a = L.Lambda(lambda x: np.abs(x))(points)
			
 
				+    a = L.Reshape((points.shape[1], 1))(a)
			
 
				+    m = L.GlobalMaxPooling1D()(a)
			
 
				+    m = L.Reshape((1,))(m)
			
 
				+    nsq = L.Dot(axes=(1,1))([points,points])
			
 
				+    return L.Concatenate()([m, nsq]).numpy()
			
 
				+
			
 
				 
			
 
				 
			
 
				 class Ball:
			
@@ -351,72 +393,110 @@ class NNSearch:
 
				     # ===============================================================
			
 
				     # Heuristic search
			
 
				     # ===============================================================
			
 
				-    def fit_heuristic(self, X, nebSize=None):
			
 
				+    def fit_heuristic(self, X, nebSize=None, debugLayer=0, withDouble=True):
			
 
				         if nebSize == None:
			
 
				             nebSize = self.nebSize
			
 
				 
			
 
				+        self.timerStart("NN_fit_heuristic_init")
			
 
				         nPoints = len(X)
			
 
				-
			
 
				-
			
 
				-        def walkUp(nbh, ball, x, i):
			
 
				-            while ball.parent is not None:
			
 
				-                print(f"{i}: up (r: {nbh.getMax()})")
			
 
				-                oldBall = ball
			
 
				-                ball = ball.parent
			
 
				-                for c in ball.childs:
			
 
				-                    if c != oldBall:
			
 
				-                        walkDown(nbh, c, x)
			
 
				-
			
 
				-        def walkDown(nbh, ball, x):
			
 
				-            if ball is None:
			
 
				-                return
			
 
				-
			
 
				-            print(f"{i}: down (r: {nbh.getMax()})")
			
 
				-
			
 
				-            if dist(x, ball.center) - ball.radius < nbh.getMax()[1]:
			
 
				-                if ball.childs == []:
			
 
				-                    for (j, _) in ball.points:
			
 
				-                        nbh.insert((j, dist(x, X[j])))
			
 
				-                else:
			
 
				-                    for c in ball.childs:
			
 
				-                        walkDown(nbh, c, x)
			
 
				-
			
 
				-
			
 
				-        def countBoles(b):
			
 
				-            if b is None:
			
 
				-                return 0
			
 
				-
			
 
				-            return 1 + sum(map(countBoles, b.childs))
			
 
				-
			
 
				-
			
 
				-
			
 
				-        root = Ball(X, range(len(X)))
			
 
				-        queue = [root]
			
 
				-        while queue != []:
			
 
				-            ball = queue[0]
			
 
				-            queue = queue[1:]
			
 
				-            if len(ball) <= nebSize:
			
 
				-                continue
			
 
				-
			
 
				-            queue = ball.divideBall(X) + queue
			
 
				-
			
 
				-
			
 
				+        nFeatures = len(X[0])
			
 
				+        nHeuristic = max(1, int(math.log(nFeatures)))
			
 
				         isGreaterThan = lambda x, y: x[1] > y[1]
			
 
				-        self.neighbourhoods = [MaxHeap(nPoints, isGreaterThan, (i, 0.0)) for i in range(len(X))]
			
 
				+        self.neighbourhoods = [MaxHeap(maxSize=nebSize, isGreaterThan=isGreaterThan, smalestValue=(i, 0.0)) for i in range(len(X))]
			
 
				 
			
 
				-        print("#B: " + str(countBoles(root)))
			
 
				 
			
 
				-        exit()
			
 
				+        self.timerStart("NN_fit_heuristic_lineStart")
			
 
				         z = X[0]
			
 
				+        farest = 0
			
 
				+        bestDist = 0.0
			
 
				         for (i, x) in enumerate(X):
			
 
				-            nbh = self.neighbourhoods[i]
			
 
				+            d = dist(x, z)
			
 
				+            if d > bestDist:
			
 
				+                farest = i
			
 
				+                bestDist = d
			
 
				 
			
 
				-            b = root.smalestBallFor(i)
			
 
				-            if b.parent is not None:
			
 
				-                b = b.parent
			
 
				+        lineStart = farest
			
 
				+        z = X[lineStart]
			
 
				+        self.timerStop("NN_fit_heuristic_lineStart")
			
 
				 
			
 
				-            for (j, _) in b.points:
			
 
				-                d = dist(x, X[j])
			
 
				-                nbh.insert((j, d))
			
 
				+        # print(f"lineStart: {lineStart}@{z} ... {bestDist}")
			
 
				 
			
 
				-            walkUp(nbh, b, x, i)
			
 
				+        self.timerStart("NN_fit_heuristic_lineEnd")
			
 
				+        bestDist = 0.0
			
 
				+        for (i, x) in enumerate(X):
			
 
				+            d = dist(x, z)
			
 
				+            if d > bestDist:
			
 
				+                farest = i
			
 
				+                bestDist = d
			
 
				+
			
 
				+        lineEnd = farest
			
 
				+        self.timerStop("NN_fit_heuristic_lineEnd")
			
 
				+
			
 
				+        self.timerStart("NN_fit_heuristic_line")
			
 
				+        # print(f"lineEnd: {lineEnd}@{X[lineEnd]} ... {bestDist}")
			
 
				+        u = (X[lineEnd] - z)
			
 
				+        uFactor = (1 / math.sqrt(norm2(u)))
			
 
				+        u = uFactor * u
			
 
				+        # print(f"u: {u} ... {norm2(u)}")
			
 
				+
			
 
				+        def heuristic(i,x):
			
 
				+            p = z + (scalarP(u, x - z) * u)
			
 
				+            dz = math.sqrt(dist(z, p))
			
 
				+            dx = math.sqrt(dist(x, p)) 
			
 
				+            return (i, dz, dx)
			
 
				+
			
 
				+        line = [heuristic(i, x) for (i,x) in enumerate(X) ]
			
 
				+        line.sort(key= lambda a: a[1])
			
 
				+        self.timerStop("NN_fit_heuristic_line")
			
 
				+        self.timerStop("NN_fit_heuristic_init")
			
 
				+
			
 
				+        self.timerStart("NN_fit_heuristic_loop")
			
 
				+        s = 0
			
 
				+        ff = False
			
 
				+        ptsDone = set()
			
 
				+        for (i,(xi, di, dix)) in enumerate(line):
			
 
				+            self.timerStart("NN_fit_heuristic_loop_init")
			
 
				+            h = self.neighbourhoods[xi]
			
 
				+            z = X[xi]
			
 
				+            self.timerStop("NN_fit_heuristic_loop_init")
			
 
				+            ptsDone.add(xi)
			
 
				+
			
 
				+            self.timerStart("NN_fit_heuristic_loop_distance")
			
 
				+            ll = [(xj, norm2([dj - di, djx - dix])) for (xj, dj, djx) in line[i:]]
			
 
				+            # ll = [(xj, dist(
			
 
				+            #     np.array([di, dix] + list(X[xi][0:nHeuristic])),
			
 
				+            #     np.array([dj, djx] + list(X[xj][0:nHeuristic]))))
			
 
				+            #     for (xj, dj, djx) in line[1:]
			
 
				+            #     ]
			
 
				+            ll.sort(key = lambda a: a[1])
			
 
				+            kk = distancesToPoint(z, [X[j] for (j, _) in ll])
			
 
				+            self.timerStop("NN_fit_heuristic_loop_distance")
			
 
				+
			
 
				+            for (d, (xj, djx)) in zip(kk, ll):
			
 
				+                ign = h.size >= nebSize and djx > h.getMax()[1]
			
 
				+                if ign:
			
 
				+                    break
			
 
				+                else:
			
 
				+                    #d = dist(X[xj], z)
			
 
				+                    self.timerStart("NN_fit_heuristic_insert")
			
 
				+                    s += 1
			
 
				+                    h.insert((xj, d))
			
 
				+                    k = self.neighbourhoods[xj]
			
 
				+                    if not isinstance(k, list):
			
 
				+                        k.insert((xi, d))
			
 
				+                    self.timerStop("NN_fit_heuristic_insert")
			
 
				+    
			
 
				+                # if xi == debugLayer:
			
 
				+                #     d = dist(X[xj], z)
			
 
				+                #     hint = ""
			
 
				+                #     if djx > d:
			
 
				+                #         hint += "!!"
			
 
				+                #     if ign:
			
 
				+                #         hint += "*"
			
 
				+                #     print(f"xj:{xj}   dx:{h.getMax()[1]:0.1f}   djx:{djx:0.1f}  d:{d:0.1f}" + hint)
			
 
				+            self.timerStart("NN_fit_heuristic_toArray")
			
 
				+            self.neighbourhoods[xi] = h.toArray()
			
 
				+            self.timerStop("NN_fit_heuristic_toArray")
			
 
				+        self.timerStop("NN_fit_heuristic_loop")
			
 
				+        print(f"calculated distances: {s} / {nPoints * (nPoints - 1)}")
			
 
				+