Prechádzať zdrojové kódy

Experimented a bit more with the heuristic ... still not hapy with it.

Kristian Schultz 4 rokov pred
rodič
commit
66588d2558
1 zmenil súbory, kde vykonal 141 pridanie a 61 odobranie
  1. 141 61
      library/NNSearch_experimental.py

+ 141 - 61
library/NNSearch_experimental.py

@@ -2,6 +2,7 @@ import math
 from ctypes import cdll, c_uint, c_double, c_void_p
 
 import tensorflow as tf
+import tensorflow.keras.layers as L
 import numpy as np
 import numpy.ctypeslib as npct
 
@@ -23,8 +24,15 @@ nbhLib.NeighborhoodHeuristic.rettype = None
 nbhLib.NeighborhoodHeuristic.argtypes = [c_uint, c_uint, c_uint, array_2d_double, array_2d_uint]
 
 
+
+def scalarP(a, b):
+    return sum(map(lambda c: c[0] * c[1], zip(a, b)))
+
+def norm2(v):
+    return sum(map(lambda z: z*z, v))
+
 def dist(x,y):
-    return sum(map(lambda z: (z[0] - z[1])*(z[0] - z[1]), zip(x, y)))
+    return norm2(x - y)
 
 def maxby(data, fn, startValue=0.0):
     m = startValue
@@ -34,8 +42,8 @@ def maxby(data, fn, startValue=0.0):
 
 def distancesToPoint(p, points):
     w = np.array(np.repeat([p], len(points), axis=0))
-    d = tf.keras.layers.Subtract()([w, np.array(points)])
-    t = tf.keras.layers.Dot(axes=(1,1))([d,d])
+    d = L.Subtract()([w, np.array(points)])
+    t = L.Dot(axes=(1,1))([d,d])
     # As the concrete distance is not needed and sqrt(x) is strict monotone
     # we avoid here unneccessary calculating of expensive roots.
     return t.numpy()
@@ -44,11 +52,45 @@ def distancesToPoint(p, points):
 def calculateCenter(points):
     if points.shape[0] == 1:
         return points[0]
-    return tf.keras.layers.Average()(list(points)).numpy()
+    return tf.keras.layers.Average()(np.array(points)).numpy()
+
+
+def centerPoints(points):
+    points = np.array(points)
+    center = L.Average()(list(points)).numpy()
+    ctr = np.array(np.repeat([center], points.shape[0], axis=0))
+    return L.Subtract()([ctr, points]).numpy()
+    
+
+def maxNormPoints(points):
+    points = np.array(points)
+    a = L.Lambda(lambda x: np.abs(x))(points)
+    a = L.Reshape((points.shape[1], 1))(a)
+    m = L.GlobalMaxPooling1D()(a)
+    m = L.Reshape((1,))
+    return m.numpy()
+
 
+def twoNormSquaredPoints(points):
+    points = np.array(points)
+    return L.Dot(axes=(1,1))([points,points]).numpy()
+    
 
+def twoNormPoints(points):
+    points = np.array(points)
+    nsq = L.Dot(axes=(1,1))([points,points])
+    return L.Lambda(lambda x: np.sqrt(x))(points).numpy()
 
 
+def norms(points):
+    points = np.array(points)
+    a = L.Lambda(lambda x: np.abs(x))(points)
+    a = L.Reshape((points.shape[1], 1))(a)
+    m = L.GlobalMaxPooling1D()(a)
+    m = L.Reshape((1,))(m)
+    nsq = L.Dot(axes=(1,1))([points,points])
+    return L.Concatenate()([m, nsq]).numpy()
+
 
 
 class Ball:
@@ -351,72 +393,110 @@ class NNSearch:
     # ===============================================================
     # Heuristic search
     # ===============================================================
-    def fit_heuristic(self, X, nebSize=None):
+    def fit_heuristic(self, X, nebSize=None, debugLayer=0, withDouble=True):
         if nebSize == None:
             nebSize = self.nebSize
 
+        self.timerStart("NN_fit_heuristic_init")
         nPoints = len(X)
-
-
-        def walkUp(nbh, ball, x, i):
-            while ball.parent is not None:
-                print(f"{i}: up (r: {nbh.getMax()})")
-                oldBall = ball
-                ball = ball.parent
-                for c in ball.childs:
-                    if c != oldBall:
-                        walkDown(nbh, c, x)
-
-        def walkDown(nbh, ball, x):
-            if ball is None:
-                return
-
-            print(f"{i}: down (r: {nbh.getMax()})")
-
-            if dist(x, ball.center) - ball.radius < nbh.getMax()[1]:
-                if ball.childs == []:
-                    for (j, _) in ball.points:
-                        nbh.insert((j, dist(x, X[j])))
-                else:
-                    for c in ball.childs:
-                        walkDown(nbh, c, x)
-
-
-        def countBoles(b):
-            if b is None:
-                return 0
-
-            return 1 + sum(map(countBoles, b.childs))
-
-
-
-        root = Ball(X, range(len(X)))
-        queue = [root]
-        while queue != []:
-            ball = queue[0]
-            queue = queue[1:]
-            if len(ball) <= nebSize:
-                continue
-
-            queue = ball.divideBall(X) + queue
-
-
+        nFeatures = len(X[0])
+        nHeuristic = max(1, int(math.log(nFeatures)))
         isGreaterThan = lambda x, y: x[1] > y[1]
-        self.neighbourhoods = [MaxHeap(nPoints, isGreaterThan, (i, 0.0)) for i in range(len(X))]
+        self.neighbourhoods = [MaxHeap(maxSize=nebSize, isGreaterThan=isGreaterThan, smalestValue=(i, 0.0)) for i in range(len(X))]
 
-        print("#B: " + str(countBoles(root)))
 
-        exit()
+        self.timerStart("NN_fit_heuristic_lineStart")
         z = X[0]
+        farest = 0
+        bestDist = 0.0
         for (i, x) in enumerate(X):
-            nbh = self.neighbourhoods[i]
+            d = dist(x, z)
+            if d > bestDist:
+                farest = i
+                bestDist = d
 
-            b = root.smalestBallFor(i)
-            if b.parent is not None:
-                b = b.parent
+        lineStart = farest
+        z = X[lineStart]
+        self.timerStop("NN_fit_heuristic_lineStart")
 
-            for (j, _) in b.points:
-                d = dist(x, X[j])
-                nbh.insert((j, d))
+        # print(f"lineStart: {lineStart}@{z} ... {bestDist}")
 
-            walkUp(nbh, b, x, i)
+        self.timerStart("NN_fit_heuristic_lineEnd")
+        bestDist = 0.0
+        for (i, x) in enumerate(X):
+            d = dist(x, z)
+            if d > bestDist:
+                farest = i
+                bestDist = d
+
+        lineEnd = farest
+        self.timerStop("NN_fit_heuristic_lineEnd")
+
+        self.timerStart("NN_fit_heuristic_line")
+        # print(f"lineEnd: {lineEnd}@{X[lineEnd]} ... {bestDist}")
+        u = (X[lineEnd] - z)
+        uFactor = (1 / math.sqrt(norm2(u)))
+        u = uFactor * u
+        # print(f"u: {u} ... {norm2(u)}")
+
+        def heuristic(i,x):
+            p = z + (scalarP(u, x - z) * u)
+            dz = math.sqrt(dist(z, p))
+            dx = math.sqrt(dist(x, p)) 
+            return (i, dz, dx)
+
+        line = [heuristic(i, x) for (i,x) in enumerate(X) ]
+        line.sort(key= lambda a: a[1])
+        self.timerStop("NN_fit_heuristic_line")
+        self.timerStop("NN_fit_heuristic_init")
+
+        self.timerStart("NN_fit_heuristic_loop")
+        s = 0
+        ff = False
+        ptsDone = set()
+        for (i,(xi, di, dix)) in enumerate(line):
+            self.timerStart("NN_fit_heuristic_loop_init")
+            h = self.neighbourhoods[xi]
+            z = X[xi]
+            self.timerStop("NN_fit_heuristic_loop_init")
+            ptsDone.add(xi)
+
+            self.timerStart("NN_fit_heuristic_loop_distance")
+            ll = [(xj, norm2([dj - di, djx - dix])) for (xj, dj, djx) in line[i:]]
+            # ll = [(xj, dist(
+            #     np.array([di, dix] + list(X[xi][0:nHeuristic])),
+            #     np.array([dj, djx] + list(X[xj][0:nHeuristic]))))
+            #     for (xj, dj, djx) in line[1:]
+            #     ]
+            ll.sort(key = lambda a: a[1])
+            kk = distancesToPoint(z, [X[j] for (j, _) in ll])
+            self.timerStop("NN_fit_heuristic_loop_distance")
+
+            for (d, (xj, djx)) in zip(kk, ll):
+                ign = h.size >= nebSize and djx > h.getMax()[1]
+                if ign:
+                    break
+                else:
+                    #d = dist(X[xj], z)
+                    self.timerStart("NN_fit_heuristic_insert")
+                    s += 1
+                    h.insert((xj, d))
+                    k = self.neighbourhoods[xj]
+                    if not isinstance(k, list):
+                        k.insert((xi, d))
+                    self.timerStop("NN_fit_heuristic_insert")
+    
+                # if xi == debugLayer:
+                #     d = dist(X[xj], z)
+                #     hint = ""
+                #     if djx > d:
+                #         hint += "!!"
+                #     if ign:
+                #         hint += "*"
+                #     print(f"xj:{xj}   dx:{h.getMax()[1]:0.1f}   djx:{djx:0.1f}  d:{d:0.1f}" + hint)
+            self.timerStart("NN_fit_heuristic_toArray")
+            self.neighbourhoods[xi] = h.toArray()
+            self.timerStop("NN_fit_heuristic_toArray")
+        self.timerStop("NN_fit_heuristic_loop")
+        print(f"calculated distances: {s} / {nPoints * (nPoints - 1)}")
+