|
@@ -1,368 +0,0 @@
|
|
|
-import math
|
|
|
|
|
-from ctypes import cdll, c_uint, c_double, c_void_p
|
|
|
|
|
-
|
|
|
|
|
-import tensorflow as tf
|
|
|
|
|
-import tensorflow.keras.layers as L
|
|
|
|
|
-import numpy as np
|
|
|
|
|
-import numpy.ctypeslib as npct
|
|
|
|
|
-
|
|
|
|
|
-array_2d_uint = npct.ndpointer(dtype=np.uint, ndim=2, flags='CONTIGUOUS')
|
|
|
|
|
-array_1d_double = npct.ndpointer(dtype=np.double, ndim=1, flags='CONTIGUOUS')
|
|
|
|
|
-array_2d_double = npct.ndpointer(dtype=np.double, ndim=2, flags='CONTIGUOUS')
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-from sklearn.neighbors import NearestNeighbors
|
|
|
|
|
-from library.timing import timing
|
|
|
|
|
-from library.MaxHeap import MaxHeap
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-nbhLib = cdll.LoadLibrary("./library/c/libNeighborhood.so")
|
|
|
|
|
-nbhLib.Neighborhood.rettype = None
|
|
|
|
|
-nbhLib.Neighborhood.argtypes = [c_uint, c_uint, c_uint, array_2d_double, array_2d_uint]
|
|
|
|
|
-
|
|
|
|
|
-#nbhLib.NeighborhoodHeuristic.rettype = None
|
|
|
|
|
-#nbhLib.NeighborhoodHeuristic.argtypes = [c_uint, c_uint, c_uint, array_2d_double, array_2d_uint]
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def scalarP(a, b):
|
|
|
|
|
- return sum(map(lambda c: c[0] * c[1], zip(a, b)))
|
|
|
|
|
-
|
|
|
|
|
-def norm2(v):
|
|
|
|
|
- return sum(map(lambda z: z*z, v))
|
|
|
|
|
-
|
|
|
|
|
-def dist(x,y):
|
|
|
|
|
- return norm2(x - y)
|
|
|
|
|
-
|
|
|
|
|
-def maxby(data, fn, startValue=0.0):
|
|
|
|
|
- m = startValue
|
|
|
|
|
- for v in data:
|
|
|
|
|
- m = max(m, fn(v))
|
|
|
|
|
- return m
|
|
|
|
|
-
|
|
|
|
|
-def distancesToPoint(p, points):
|
|
|
|
|
- w = np.array(np.repeat([p], len(points), axis=0))
|
|
|
|
|
- d = L.Subtract()([w, np.array(points)])
|
|
|
|
|
- t = L.Dot(axes=(1,1))([d,d])
|
|
|
|
|
- # As the concrete distance is not needed and sqrt(x) is strict monotone
|
|
|
|
|
- # we avoid here unneccessary calculating of expensive roots.
|
|
|
|
|
- return t.numpy()
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def calculateCenter(points):
|
|
|
|
|
- if points.shape[0] == 1:
|
|
|
|
|
- return points[0]
|
|
|
|
|
- return tf.keras.layers.Average()(np.array(points)).numpy()
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def centerPoints(points):
|
|
|
|
|
- points = np.array(points)
|
|
|
|
|
- center = L.Average()(list(points)).numpy()
|
|
|
|
|
- ctr = np.array(np.repeat([center], points.shape[0], axis=0))
|
|
|
|
|
- return L.Subtract()([ctr, points]).numpy()
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def maxNormPoints(points):
|
|
|
|
|
- points = np.array(points)
|
|
|
|
|
- a = L.Lambda(lambda x: np.abs(x))(points)
|
|
|
|
|
- a = L.Reshape((points.shape[1], 1))(a)
|
|
|
|
|
- m = L.GlobalMaxPooling1D()(a)
|
|
|
|
|
- m = L.Reshape((1,))
|
|
|
|
|
- return m.numpy()
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def twoNormSquaredPoints(points):
|
|
|
|
|
- points = np.array(points)
|
|
|
|
|
- return L.Dot(axes=(1,1))([points,points]).numpy()
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def twoNormPoints(points):
|
|
|
|
|
- points = np.array(points)
|
|
|
|
|
- nsq = L.Dot(axes=(1,1))([points,points])
|
|
|
|
|
- return L.Lambda(lambda x: np.sqrt(x))(points).numpy()
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def norms(points):
|
|
|
|
|
- points = np.array(points)
|
|
|
|
|
- a = L.Lambda(lambda x: np.abs(x))(points)
|
|
|
|
|
- a = L.Reshape((points.shape[1], 1))(a)
|
|
|
|
|
- m = L.GlobalMaxPooling1D()(a)
|
|
|
|
|
- m = L.Reshape((1,))(m)
|
|
|
|
|
- nsq = L.Dot(axes=(1,1))([points,points])
|
|
|
|
|
- return L.Concatenate()([m, nsq]).numpy()
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-class NNSearch:
|
|
|
|
|
- def __init__(self, nebSize=5, timingDict=None):
|
|
|
|
|
- self.nebSize = nebSize
|
|
|
|
|
- self.neighbourhoods = []
|
|
|
|
|
- self.timingDict = timingDict
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
- def timerStart(self, name):
|
|
|
|
|
- if self.timingDict is not None:
|
|
|
|
|
- if name not in self.timingDict:
|
|
|
|
|
- self.timingDict[name] = timing(name)
|
|
|
|
|
-
|
|
|
|
|
- self.timingDict[name].start()
|
|
|
|
|
-
|
|
|
|
|
- def timerStop(self, name):
|
|
|
|
|
- if self.timingDict is not None:
|
|
|
|
|
- if name in self.timingDict:
|
|
|
|
|
- self.timingDict[name].stop()
|
|
|
|
|
-
|
|
|
|
|
- def neighbourhoodOfItem(self, i):
|
|
|
|
|
- return self.neighbourhoods[i]
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
- def fit(self, X, nebSize=None):
|
|
|
|
|
- self.timerStart("NN_fit_all")
|
|
|
|
|
- self.fit_chained(X, nebSize)
|
|
|
|
|
- #self.fit_bruteForce_np(X, nebSize)
|
|
|
|
|
- self.timerStop("NN_fit_all")
|
|
|
|
|
-
|
|
|
|
|
- def fit_optimized(self, X, nebSize=None):
|
|
|
|
|
- self.timerStart("NN_fit_all")
|
|
|
|
|
- if nebSize == None:
|
|
|
|
|
- nebSize = self.nebSize
|
|
|
|
|
-
|
|
|
|
|
- nPoints = len(X)
|
|
|
|
|
- nFeatures = len(X[0])
|
|
|
|
|
-
|
|
|
|
|
- if nFeatures > 15 or nebSize >= (nPoints // 2):
|
|
|
|
|
- print("Using brute force")
|
|
|
|
|
- self.fit_bruteForce_np(X, nebSize)
|
|
|
|
|
- else:
|
|
|
|
|
- print("Using chained")
|
|
|
|
|
- self.fit_chained(X, nebSize)
|
|
|
|
|
- self.timerStop("NN_fit_all")
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
- def fit_bruteForce(self, X, nebSize=None):
|
|
|
|
|
- if nebSize == None:
|
|
|
|
|
- nebSize = self.nebSize
|
|
|
|
|
-
|
|
|
|
|
- self.timerStart("NN_fit_bf_init")
|
|
|
|
|
- isGreaterThan = lambda x, y: x[1] > y[1]
|
|
|
|
|
- self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))]
|
|
|
|
|
- self.timerStop("NN_fit_bf_init")
|
|
|
|
|
-
|
|
|
|
|
- self.timerStart("NN_fit_bf_loop")
|
|
|
|
|
- for (i, x) in enumerate(X):
|
|
|
|
|
- nbh = self.neighbourhoods[i]
|
|
|
|
|
-
|
|
|
|
|
- for (j, y) in enumerate(X[i+1:]):
|
|
|
|
|
- j += i + 1
|
|
|
|
|
- self.timerStart("NN_fit_bf_dist")
|
|
|
|
|
- d = dist(x,y)
|
|
|
|
|
- self.timerStop("NN_fit_bf_dist")
|
|
|
|
|
- self.timerStart("NN_fit_bf_insert")
|
|
|
|
|
- nbh.insert((j,d))
|
|
|
|
|
- self.neighbourhoods[j].insert((i,d))
|
|
|
|
|
- self.timerStop("NN_fit_bf_insert")
|
|
|
|
|
-
|
|
|
|
|
- self.timerStart("NN_fit_bf_toList")
|
|
|
|
|
- self.neighbourhoods[i] = nbh.toArray(lambda v: v[0])
|
|
|
|
|
- self.timerStop("NN_fit_bf_toList")
|
|
|
|
|
- self.timerStop("NN_fit_bf_loop")
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
- def fit_bruteForce_np(self, X, nebSize=None):
|
|
|
|
|
- self.timerStart("NN_fit_bfnp_init")
|
|
|
|
|
- numOfPoints = len(X)
|
|
|
|
|
- nFeatures = len(X[0])
|
|
|
|
|
-
|
|
|
|
|
- tX = tf.convert_to_tensor(X)
|
|
|
|
|
-
|
|
|
|
|
- def distancesTo(x):
|
|
|
|
|
- w = np.repeat([x], numOfPoints, axis=0)
|
|
|
|
|
- d = tf.keras.layers.Subtract()([w,tX])
|
|
|
|
|
- t = tf.keras.layers.Dot(axes=(1,1))([d,d])
|
|
|
|
|
- return t.numpy()
|
|
|
|
|
-
|
|
|
|
|
- if nebSize == None:
|
|
|
|
|
- nebSize = self.nebSize
|
|
|
|
|
-
|
|
|
|
|
- isGreaterThan = lambda x, y: x[1] > y[1]
|
|
|
|
|
- self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))]
|
|
|
|
|
- self.timerStop("NN_fit_bfnp_init")
|
|
|
|
|
-
|
|
|
|
|
- self.timerStart("NN_fit_bfnp_loop")
|
|
|
|
|
- for (i, x) in enumerate(X):
|
|
|
|
|
- self.timerStart("NN_fit_bfnp_dist")
|
|
|
|
|
- distances = distancesTo(x)
|
|
|
|
|
- self.timerStop("NN_fit_bfnp_dist")
|
|
|
|
|
- nbh = self.neighbourhoods[i]
|
|
|
|
|
-
|
|
|
|
|
- for (j, y) in enumerate(X[i+1:]):
|
|
|
|
|
- j += i + 1
|
|
|
|
|
- d = distances[j]
|
|
|
|
|
- self.timerStart("NN_fit_bfnp_insert")
|
|
|
|
|
- nbh.insert((j,d))
|
|
|
|
|
- self.neighbourhoods[j].insert((i,d))
|
|
|
|
|
- self.timerStop("NN_fit_bfnp_insert")
|
|
|
|
|
-
|
|
|
|
|
- self.timerStart("NN_fit_bfnp_toList")
|
|
|
|
|
- self.neighbourhoods[i] = nbh.toArray(lambda v: v[0])
|
|
|
|
|
- self.timerStop("NN_fit_bfnp_toList")
|
|
|
|
|
- self.timerStop("NN_fit_bfnp_loop")
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
- def fit_chained(self, X, nebSize=None):
|
|
|
|
|
- self.timerStart("NN_fit_chained_init")
|
|
|
|
|
- if nebSize == None:
|
|
|
|
|
- nebSize = self.nebSize
|
|
|
|
|
-
|
|
|
|
|
- nPoints = len(X)
|
|
|
|
|
- nFeatures = len(X[0])
|
|
|
|
|
-
|
|
|
|
|
- neigh = NearestNeighbors(n_neighbors=nebSize)
|
|
|
|
|
- neigh.fit(X)
|
|
|
|
|
- self.timerStop("NN_fit_chained_init")
|
|
|
|
|
- self.timerStart("NN_fit_chained_toList")
|
|
|
|
|
- self.neighbourhoods = [
|
|
|
|
|
- (neigh.kneighbors([x], nebSize, return_distance=False))[0]
|
|
|
|
|
- for (i, x) in enumerate(X)
|
|
|
|
|
- ]
|
|
|
|
|
- self.timerStop("NN_fit_chained_toList")
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
- def fit_cLib(self, X, nebSize=None):
|
|
|
|
|
- self.timerStart("NN_fit_cLib_init")
|
|
|
|
|
- if nebSize == None:
|
|
|
|
|
- nebSize = self.nebSize
|
|
|
|
|
-
|
|
|
|
|
- nbh = np.array([np.zeros(nebSize, dtype=np.uint) for i in range(X.shape[0])])
|
|
|
|
|
- self.timerStop("NN_fit_cLib_init")
|
|
|
|
|
- self.timerStart("NN_fit_cLib_call")
|
|
|
|
|
- nbhLib.Neighborhood(nebSize, X.shape[0], X.shape[1], X, nbh)
|
|
|
|
|
- self.timerStop("NN_fit_cLib_call")
|
|
|
|
|
- self.timerStart("NN_fit_cLib_list")
|
|
|
|
|
- self.neighbourhoods = list(nbh)
|
|
|
|
|
- self.timerStop("NN_fit_cLib_list")
|
|
|
|
|
-
|
|
|
|
|
- # def fit_cLibHeuristic(self, X, nebSize=None):
|
|
|
|
|
- # self.timerStart("NN_fit_cLib_init")
|
|
|
|
|
- # if nebSize == None:
|
|
|
|
|
- # nebSize = self.nebSize
|
|
|
|
|
- #
|
|
|
|
|
- # nbh = np.array([np.zeros(nebSize, dtype=np.uint) for i in range(X.shape[0])])
|
|
|
|
|
- # self.timerStop("NN_fit_cLib_init")
|
|
|
|
|
- # self.timerStart("NN_fit_cLib_call")
|
|
|
|
|
- # nbhLib.NeighborhoodHeuristic(nebSize, X.shape[0], X.shape[1], X, nbh)
|
|
|
|
|
- # self.timerStop("NN_fit_cLib_call")
|
|
|
|
|
- # self.timerStart("NN_fit_cLib_list")
|
|
|
|
|
- # self.neighbourhoods = list(nbh)
|
|
|
|
|
- # self.timerStop("NN_fit_cLib_list")
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
- # ===============================================================
|
|
|
|
|
- # Heuristic search
|
|
|
|
|
- # ===============================================================
|
|
|
|
|
- def fit_heuristic(self, X, nebSize=None, debugLayer=0, withDouble=True):
|
|
|
|
|
- if nebSize == None:
|
|
|
|
|
- nebSize = self.nebSize
|
|
|
|
|
-
|
|
|
|
|
- self.timerStart("NN_fit_heuristic_init")
|
|
|
|
|
- nPoints = len(X)
|
|
|
|
|
- nFeatures = len(X[0])
|
|
|
|
|
- nHeuristic = max(1, int(math.log(nFeatures)))
|
|
|
|
|
- isGreaterThan = lambda x, y: x[1] > y[1]
|
|
|
|
|
- self.neighbourhoods = [MaxHeap(maxSize=nebSize, isGreaterThan=isGreaterThan, smalestValue=(i, 0.0)) for i in range(len(X))]
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
- self.timerStart("NN_fit_heuristic_lineStart")
|
|
|
|
|
- z = X[0]
|
|
|
|
|
- farest = 0
|
|
|
|
|
- bestDist = 0.0
|
|
|
|
|
- for (i, x) in enumerate(X):
|
|
|
|
|
- d = dist(x, z)
|
|
|
|
|
- if d > bestDist:
|
|
|
|
|
- farest = i
|
|
|
|
|
- bestDist = d
|
|
|
|
|
-
|
|
|
|
|
- lineStart = farest
|
|
|
|
|
- z = X[lineStart]
|
|
|
|
|
- self.timerStop("NN_fit_heuristic_lineStart")
|
|
|
|
|
-
|
|
|
|
|
- # print(f"lineStart: {lineStart}@{z} ... {bestDist}")
|
|
|
|
|
-
|
|
|
|
|
- self.timerStart("NN_fit_heuristic_lineEnd")
|
|
|
|
|
- bestDist = 0.0
|
|
|
|
|
- for (i, x) in enumerate(X):
|
|
|
|
|
- d = dist(x, z)
|
|
|
|
|
- if d > bestDist:
|
|
|
|
|
- farest = i
|
|
|
|
|
- bestDist = d
|
|
|
|
|
-
|
|
|
|
|
- lineEnd = farest
|
|
|
|
|
- self.timerStop("NN_fit_heuristic_lineEnd")
|
|
|
|
|
-
|
|
|
|
|
- self.timerStart("NN_fit_heuristic_line")
|
|
|
|
|
- # print(f"lineEnd: {lineEnd}@{X[lineEnd]} ... {bestDist}")
|
|
|
|
|
- u = (X[lineEnd] - z)
|
|
|
|
|
- uFactor = (1 / math.sqrt(norm2(u)))
|
|
|
|
|
- u = uFactor * u
|
|
|
|
|
- # print(f"u: {u} ... {norm2(u)}")
|
|
|
|
|
-
|
|
|
|
|
- def heuristic(i,x):
|
|
|
|
|
- p = z + (scalarP(u, x - z) * u)
|
|
|
|
|
- dz = math.sqrt(dist(z, p))
|
|
|
|
|
- dx = math.sqrt(dist(x, p))
|
|
|
|
|
- return (i, dz, dx)
|
|
|
|
|
-
|
|
|
|
|
- line = [heuristic(i, x) for (i,x) in enumerate(X) ]
|
|
|
|
|
- line.sort(key= lambda a: a[1])
|
|
|
|
|
- self.timerStop("NN_fit_heuristic_line")
|
|
|
|
|
- self.timerStop("NN_fit_heuristic_init")
|
|
|
|
|
-
|
|
|
|
|
- self.timerStart("NN_fit_heuristic_loop")
|
|
|
|
|
- s = 0
|
|
|
|
|
- ff = False
|
|
|
|
|
- ptsDone = set()
|
|
|
|
|
- for (i,(xi, di, dix)) in enumerate(line):
|
|
|
|
|
- self.timerStart("NN_fit_heuristic_loop_init")
|
|
|
|
|
- h = self.neighbourhoods[xi]
|
|
|
|
|
- z = X[xi]
|
|
|
|
|
- self.timerStop("NN_fit_heuristic_loop_init")
|
|
|
|
|
- ptsDone.add(xi)
|
|
|
|
|
-
|
|
|
|
|
- self.timerStart("NN_fit_heuristic_loop_distance")
|
|
|
|
|
- ll = [(xj, norm2([dj - di, djx - dix])) for (xj, dj, djx) in line[i:]]
|
|
|
|
|
- # ll = [(xj, dist(
|
|
|
|
|
- # np.array([di, dix] + list(X[xi][0:nHeuristic])),
|
|
|
|
|
- # np.array([dj, djx] + list(X[xj][0:nHeuristic]))))
|
|
|
|
|
- # for (xj, dj, djx) in line[1:]
|
|
|
|
|
- # ]
|
|
|
|
|
- ll.sort(key = lambda a: a[1])
|
|
|
|
|
- kk = distancesToPoint(z, [X[j] for (j, _) in ll])
|
|
|
|
|
- self.timerStop("NN_fit_heuristic_loop_distance")
|
|
|
|
|
-
|
|
|
|
|
- for (d, (xj, djx)) in zip(kk, ll):
|
|
|
|
|
- ign = h.size >= nebSize and djx > h.getMax()[1]
|
|
|
|
|
- if ign:
|
|
|
|
|
- break
|
|
|
|
|
- else:
|
|
|
|
|
- #d = dist(X[xj], z)
|
|
|
|
|
- self.timerStart("NN_fit_heuristic_insert")
|
|
|
|
|
- s += 1
|
|
|
|
|
- h.insert((xj, d))
|
|
|
|
|
- k = self.neighbourhoods[xj]
|
|
|
|
|
- if not isinstance(k, list):
|
|
|
|
|
- k.insert((xi, d))
|
|
|
|
|
- self.timerStop("NN_fit_heuristic_insert")
|
|
|
|
|
-
|
|
|
|
|
- # if xi == debugLayer:
|
|
|
|
|
- # d = dist(X[xj], z)
|
|
|
|
|
- # hint = ""
|
|
|
|
|
- # if djx > d:
|
|
|
|
|
- # hint += "!!"
|
|
|
|
|
- # if ign:
|
|
|
|
|
- # hint += "*"
|
|
|
|
|
- # print(f"xj:{xj} dx:{h.getMax()[1]:0.1f} djx:{djx:0.1f} d:{d:0.1f}" + hint)
|
|
|
|
|
- self.timerStart("NN_fit_heuristic_toArray")
|
|
|
|
|
- self.neighbourhoods[xi] = h.toArray()
|
|
|
|
|
- self.timerStop("NN_fit_heuristic_toArray")
|
|
|
|
|
- self.timerStop("NN_fit_heuristic_loop")
|
|
|
|
|
- print(f"calculated distances: {s} / {nPoints * (nPoints - 1)}")
|
|
|
|
|
-
|
|
|