Browse Source

Cleaned up: NNSearch.py

Kristian Schultz 4 years ago
parent
commit
9a8d6bffd6
1 changed files with 0 additions and 472 deletions
  1. 0 472
      library/NNSearch.py

+ 0 - 472
library/NNSearch.py

@@ -6,311 +6,6 @@ from sklearn.neighbors import NearestNeighbors
 from library.timing import timing
 
 
-def dist(x,y):
-    return sum(map(lambda z: (z[0] - z[1])*(z[0] - z[1]), zip(x, y)))
-
-def maxby(data, fn, startValue=0.0):
-    m = startValue
-    for v in data:
-        m = max(m, fn(v))
-    return m
-
-def distancesToPoint(p, points):
-    w = np.array(np.repeat([p], len(points), axis=0))
-    d = tf.keras.layers.Subtract()([w, np.array(points)])
-    t = tf.keras.layers.Dot(axes=(1,1))([d,d])
-    # As the concrete distance is not needed and sqrt(x) is strict monotone
-    # we avoid here unneccessary calculating of expensive roots.
-    return t.numpy()
-
-
-def calculateCenter(points):
-    if points.shape[0] == 1:
-        return points[0]
-    return tf.keras.layers.Average()(list(points)).numpy()
-
-
-class MaxHeap:
-    def __init__(self, maxSize=None, isGreaterThan=None, smalestValue=(-1,0.0)):
-        self.heap = []
-        self.size = 0
-        self.maxSize = maxSize
-        self.isGreaterThan = isGreaterThan if isGreaterThan is not None else (lambda a, b: a > b)
-        self.smalestValue = smalestValue
-        self.indices = set()
-        self.wasChanged = False
-        self.insert(smalestValue)
-
-    def insert(self, v):
-        if self.maxSize is not None and self.size >= self.maxSize:
-            return self.replaceMax(v)
-
-        if v[0] in self.indices:
-            return False
-
-        self.indices.add(v[0])
-        pos = self.size
-        self.size += 1
-        self.heap.append(v)
-        while pos > 0:
-            w = self.heap[pos // 2]
-            if not self.isGreaterThan(v, w):
-                break
-            self.heap[pos] = w
-            pos = pos // 2
-            self.heap[pos] = v
-        self.wasChanged = True
-        return True
-
-
-    def childPos(self, pos):
-        c = (pos + 1) * 2
-        return (c - 1, c)
-
-
-    def removeMax(self):
-        if self.heap == []:
-            self.size = 0
-            return False
-        
-        x = self.heap[0]
-        self.indices.remove(x[0])
-
-        self.heap[0] = self.heap[-1]
-        self.heap = self.heap[:-1]
-        self.size -= 1
-
-        x = self.heap[0]
-        pos = 0
-        size = self.size
-
-        while pos < size:
-            (left, right) = self.childPos(pos)
-
-            if left >= size:
-                break
-
-            y = self.heap[left]
-            if right >= size:
-                if self.isGreaterThan(y, x):
-                    self.heap[pos] = y
-                    self.heap[left] = x
-                break
-
-            z = self.heap[right]
-            (best, v) = (left, y) if self.isGreaterThan(y, z) else (right, z)
-
-            if not self.isGreaterThan(v, x):
-                break
-
-            self.heap[pos] = v
-            self.heap[best] = x
-            pos = best
-
-        self.wasChanged = True
-        return True
-
-
-    def replaceMax(self, x):
-        if self.heap == []:
-            self.heap = [x]
-            self.size = 1
-            self.indices.add(x[0])
-            self.wasChanged = True
-            return True
-        
-        if x[0] in self.indices:
-            return False
-
-        if self.isGreaterThan(x, self.heap[0]):
-            return False
-
-        self.indices.remove((self.heap[0])[0])
-        self.indices.add(x[0])
-        self.heap[0] = x
-        pos = 0
-        size = len(self.heap)
-
-        while pos < size:
-            (left, right) = self.childPos(pos)
-
-            if left >= size:
-                break
-
-            y = self.heap[left]
-            if right >= size:
-                if self.isGreaterThan(y, x):
-                    self.heap[pos] = y
-                    self.heap[left] = x
-                break
-
-            z = self.heap[right]
-            (best, v) = (left, y) if self.isGreaterThan(y, z) else (right, z)
-
-            if not self.isGreaterThan(v, x):
-                break
-
-            self.heap[pos] = v
-            self.heap[best] = x
-            pos = best
-
-        self.wasChanged = True
-        return True
-
-    def getMax(self):
-        if self.heap == []:
-            return self.smalestValue
-        return self.heap[0]
-
-
-    def setMaxSize(self, maxSize):
-        self.maxSize = maxSize
-        while self.size > maxSize:
-            self.removeMax()
-
-    def toArray(self, mapFn=None):
-            return list(self.indices)
-
-
-    def length(self):
-        return self.size
-
-
-
-
-class Ball:
-    def __init__(self, points=None, indices=None, parent=None, center=None):
-        if center is not None:
-            self.center = center
-        elif points is not None:
-            self.center = calculateCenter(np.array(points))
-        else:
-            raise ParameterError("Missing points or center")
-        self.radius = 0
-        self.points = []
-        self.indices = set()
-        self.childs = []
-        self.parent = parent
-
-        if points is not None and indices is not None:
-            for (i, r) in zip(indices, distancesToPoint(self.center, points)):
-                self.add(i, r)
-        elif points is not None:
-            raise ParameterError("Missing indices")
-
-    def findIt(self, r):
-        if r == self.points[0][1]:
-            return 0
-
-        upper = len(self.points) - 1
-        lower = 0
-        while upper > lower + 1:
-            h = (upper + lower) // 2
-            if self.points[h][1] >= r:
-                upper = h
-            else:
-                lower = h
-        return upper
-
-
-    def add(self, xi, r):
-        if xi in self.indices:
-            return False
-
-        # Here we know, that x is not in points.
-        newEntry = (xi, r)
-        self.indices.add(xi)
-
-        # Special case: empty list or new element will extend the radius:
-        # Here we can avoid the search
-        if self.points == [] or r >= self.radius:
-            self.points.append(newEntry)
-            self.radius = r
-            return True
-
-        # Special case: r <= min radius
-        # Here we can avoid the search
-        if self.points[0][1] >= r:
-            self.points = [newEntry] + self.points
-            return True
-
-        # Here we know that min radius < r < max radius.
-        # So len(points) >= 2.
-
-        pos = self.findIt(r)
-
-        # here shoul be: r(pos) >= r > r(pos - 1)
-        self.points = self.points[:pos] + [newEntry] + self.points[pos: ]
-
-        return True
-
-    def remove(self, xi, r):
-        if xi not in self.indices:
-            return False
-
-
-        # special case: remove the element with the heighest radius:
-        if self.points[-1][0] == xi:
-            self.indices.remove(xi)
-            self.points = self.points[: -1]
-            if self.points == []:
-                self.radius = 0.0
-            else:
-                self.radius = self.points[-1][1]
-            return True
-
-        pos = self.findIt(r)
-        nPoints = len(self.points)
-        # pos is the smalest position with:
-        # r(pos - 1) < r <= r(pos)
-
-        # Just in case: check that we remove the correct index.
-        while pos < nPoints and r == self.points[pos]:
-            if self.points[pos][0] == xi:
-                self.indices.remove(xi)
-                self.points = self.points[:pos] + self.points[(pos + 1): ]
-                return True
-            pos += 1
-
-        return False
-
-    def divideBall(self, X):
-        indices = [i for (i, _r) in self.points]
-        points = np.array([X[i] for i in indices])
-        
-        distances = tf.keras.layers.Dot(axes=(1,1))([points, points]).numpy()
-        ball = Ball(points, indices, center=np.zeros(len(points[0])))
-
-        h = len(ball) // 2
-        indicesA = [i for (i, _) in ball.points[:h]] 
-        indicesB = [i for (i, _) in ball.points[h:]] 
-        pointsA = np.array([X[i] for i in indicesA])
-        pointsB = np.array([X[i] for i in indicesB])
-
-        self.childs = []
-
-        print(f"{len(points)} -> <{len(pointsA)}|{len(pointsB)}>")
-        self.childs.append(Ball(pointsA, indicesA, self))
-        self.childs.append(Ball(pointsB, indicesB, self))
-
-        return self.childs
-
-    def __len__(self):
-        return len(self.points)
-
-    def smalestBallFor(self, i):
-        if i not in self.indices:
-            return None
-
-        for c in self.childs:
-            b = c.smalestBallFor(i)
-            if b is not None:
-                return b
-
-        return self
-
-
-
 class NNSearch:
     def __init__(self, nebSize=5, timingDict=None):
         self.nebSize = nebSize
@@ -335,99 +30,6 @@ class NNSearch:
 
 
     def fit(self, X, nebSize=None):
-        self.timerStart("NN_fit_all")
-        self.fit_chained(X, nebSize)
-        #self.fit_bruteForce_np(X, nebSize)
-        self.timerStop("NN_fit_all")
-       
-    def fit_optimized(self, X, nebSize=None):
-        self.timerStart("NN_fit_all")
-        if nebSize == None:
-            nebSize = self.nebSize
-
-        nPoints = len(X)
-        nFeatures = len(X[0])
-
-        if nFeatures > 15 or nebSize >= (nPoints // 2):
-            print("Using brute force")
-            self.fit_bruteForce_np(X, nebSize)
-        else:
-            print("Using chained")
-            self.fit_chained(X, nebSize)
-        self.timerStop("NN_fit_all")
-
-
-    def fit_bruteForce(self, X, nebSize=None):
-        if nebSize == None:
-            nebSize = self.nebSize
-
-        self.timerStart("NN_fit_bf_init")
-        isGreaterThan = lambda x, y: x[1] > y[1]
-        self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))]
-        self.timerStop("NN_fit_bf_init")
-
-        self.timerStart("NN_fit_bf_loop")
-        for (i, x) in enumerate(X):
-            nbh = self.neighbourhoods[i]
-
-            for (j, y) in enumerate(X[i+1:]):
-                j += i + 1
-                self.timerStart("NN_fit_bf_dist")
-                d = dist(x,y)
-                self.timerStop("NN_fit_bf_dist")
-                self.timerStart("NN_fit_bf_insert")
-                nbh.insert((j,d))
-                self.neighbourhoods[j].insert((i,d))
-                self.timerStop("NN_fit_bf_insert")
-
-            self.timerStart("NN_fit_bf_toList")
-            self.neighbourhoods[i] = nbh.toArray(lambda v: v[0])
-            self.timerStop("NN_fit_bf_toList")
-        self.timerStop("NN_fit_bf_loop")
-
-
-    def fit_bruteForce_np(self, X, nebSize=None):
-        self.timerStart("NN_fit_bfnp_init")
-        numOfPoints = len(X)
-        nFeatures = len(X[0])
-
-        tX = tf.convert_to_tensor(X)
-
-        def distancesTo(x):
-            w = np.repeat([x], numOfPoints, axis=0)
-            d = tf.keras.layers.Subtract()([w,tX])
-            t = tf.keras.layers.Dot(axes=(1,1))([d,d])
-            return t.numpy()
-
-        if nebSize == None:
-            nebSize = self.nebSize
-
-        isGreaterThan = lambda x, y: x[1] > y[1]
-        self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))]
-        self.timerStop("NN_fit_bfnp_init")
-
-        self.timerStart("NN_fit_bfnp_loop")
-        for (i, x) in enumerate(X):
-            self.timerStart("NN_fit_bfnp_dist")
-            distances = distancesTo(x)
-            self.timerStop("NN_fit_bfnp_dist")
-            nbh = self.neighbourhoods[i]
-
-            for (j, y) in enumerate(X[i+1:]):
-                j += i + 1
-                d = distances[j]
-                self.timerStart("NN_fit_bfnp_insert")
-                nbh.insert((j,d))
-                self.neighbourhoods[j].insert((i,d))
-                self.timerStop("NN_fit_bfnp_insert")
-
-            self.timerStart("NN_fit_bfnp_toList")
-            self.neighbourhoods[i] = nbh.toArray(lambda v: v[0])
-            self.timerStop("NN_fit_bfnp_toList")
-        self.timerStop("NN_fit_bfnp_loop")
-
-
-    def fit_chained(self, X, nebSize=None):
         self.timerStart("NN_fit_chained_init")
         if nebSize == None:
             nebSize = self.nebSize
@@ -444,77 +46,3 @@ class NNSearch:
                 for (i, x) in enumerate(X)
                 ]
         self.timerStop("NN_fit_chained_toList")
-
-
-    # ===============================================================
-    # Heuristic search
-    # ===============================================================
-    def fit_heuristic(self, X, nebSize=None):
-        if nebSize == None:
-            nebSize = self.nebSize
-
-        nPoints = len(X)
-
-
-        def walkUp(nbh, ball, x, i):
-            while ball.parent is not None:
-                print(f"{i}: up (r: {nbh.getMax()})")
-                oldBall = ball
-                ball = ball.parent
-                for c in ball.childs:
-                    if c != oldBall:
-                        walkDown(nbh, c, x)
-
-        def walkDown(nbh, ball, x):
-            if ball is None:
-                return
-
-            print(f"{i}: down (r: {nbh.getMax()})")
-
-            if dist(x, ball.center) - ball.radius < nbh.getMax()[1]:
-                if ball.childs == []:
-                    for (j, _) in ball.points:
-                        nbh.insert((j, dist(x, X[j])))
-                else:
-                    for c in ball.childs:
-                        walkDown(nbh, c, x)
-
-
-        def countBoles(b):
-            if b is None:
-                return 0
-
-            return 1 + sum(map(countBoles, b.childs))
-
-
-
-        root = Ball(X, range(len(X)))
-        queue = [root]
-        while queue != []:
-            ball = queue[0]
-            queue = queue[1:]
-            if len(ball) <= nebSize:
-                continue
-
-            queue = ball.divideBall(X) + queue
-
-
-        isGreaterThan = lambda x, y: x[1] > y[1]
-        self.neighbourhoods = [MaxHeap(nPoints, isGreaterThan, (i, 0.0)) for i in range(len(X))]
-
-        print("#B: " + str(countBoles(root)))
-
-        exit()
-        z = X[0]
-        for (i, x) in enumerate(X):
-            nbh = self.neighbourhoods[i]
-
-            b = root.smalestBallFor(i)
-            if b.parent is not None:
-                b = b.parent
-
-            for (j, _) in b.points:
-                d = dist(x, X[j])
-                nbh.insert((j, d))
-
-            walkUp(nbh, b, x, i)