import math from ctypes import cdll, c_uint, c_double, c_void_p import tensorflow as tf import tensorflow.keras.layers as L import numpy as np import numpy.ctypeslib as npct array_2d_uint = npct.ndpointer(dtype=np.uint, ndim=2, flags='CONTIGUOUS') array_1d_double = npct.ndpointer(dtype=np.double, ndim=1, flags='CONTIGUOUS') array_2d_double = npct.ndpointer(dtype=np.double, ndim=2, flags='CONTIGUOUS') from sklearn.neighbors import NearestNeighbors from library.timing import timing from library.MaxHeap import MaxHeap nbhLib = cdll.LoadLibrary("./library/c/libNeighborhood.so") nbhLib.Neighborhood.rettype = None nbhLib.Neighborhood.argtypes = [c_uint, c_uint, c_uint, array_2d_double, array_2d_uint] nbhLib.NeighborhoodHeuristic.rettype = None nbhLib.NeighborhoodHeuristic.argtypes = [c_uint, c_uint, c_uint, array_2d_double, array_2d_uint] def scalarP(a, b): return sum(map(lambda c: c[0] * c[1], zip(a, b))) def norm2(v): return sum(map(lambda z: z*z, v)) def dist(x,y): return norm2(x - y) def maxby(data, fn, startValue=0.0): m = startValue for v in data: m = max(m, fn(v)) return m def distancesToPoint(p, points): w = np.array(np.repeat([p], len(points), axis=0)) d = L.Subtract()([w, np.array(points)]) t = L.Dot(axes=(1,1))([d,d]) # As the concrete distance is not needed and sqrt(x) is strict monotone # we avoid here unneccessary calculating of expensive roots. return t.numpy() def calculateCenter(points): if points.shape[0] == 1: return points[0] return tf.keras.layers.Average()(np.array(points)).numpy() def centerPoints(points): points = np.array(points) center = L.Average()(list(points)).numpy() ctr = np.array(np.repeat([center], points.shape[0], axis=0)) return L.Subtract()([ctr, points]).numpy() def maxNormPoints(points): points = np.array(points) a = L.Lambda(lambda x: np.abs(x))(points) a = L.Reshape((points.shape[1], 1))(a) m = L.GlobalMaxPooling1D()(a) m = L.Reshape((1,)) return m.numpy() def twoNormSquaredPoints(points): points = np.array(points) return L.Dot(axes=(1,1))([points,points]).numpy() def twoNormPoints(points): points = np.array(points) nsq = L.Dot(axes=(1,1))([points,points]) return L.Lambda(lambda x: np.sqrt(x))(points).numpy() def norms(points): points = np.array(points) a = L.Lambda(lambda x: np.abs(x))(points) a = L.Reshape((points.shape[1], 1))(a) m = L.GlobalMaxPooling1D()(a) m = L.Reshape((1,))(m) nsq = L.Dot(axes=(1,1))([points,points]) return L.Concatenate()([m, nsq]).numpy() class NNSearch: def __init__(self, nebSize=5, timingDict=None): self.nebSize = nebSize self.neighbourhoods = [] self.timingDict = timingDict def timerStart(self, name): if self.timingDict is not None: if name not in self.timingDict: self.timingDict[name] = timing(name) self.timingDict[name].start() def timerStop(self, name): if self.timingDict is not None: if name in self.timingDict: self.timingDict[name].stop() def neighbourhoodOfItem(self, i): return self.neighbourhoods[i] def fit(self, X, nebSize=None): self.timerStart("NN_fit_all") self.fit_chained(X, nebSize) #self.fit_bruteForce_np(X, nebSize) self.timerStop("NN_fit_all") def fit_optimized(self, X, nebSize=None): self.timerStart("NN_fit_all") if nebSize == None: nebSize = self.nebSize nPoints = len(X) nFeatures = len(X[0]) if nFeatures > 15 or nebSize >= (nPoints // 2): print("Using brute force") self.fit_bruteForce_np(X, nebSize) else: print("Using chained") self.fit_chained(X, nebSize) self.timerStop("NN_fit_all") def fit_bruteForce(self, X, nebSize=None): if nebSize == None: nebSize = self.nebSize self.timerStart("NN_fit_bf_init") isGreaterThan = lambda x, y: x[1] > y[1] self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))] self.timerStop("NN_fit_bf_init") self.timerStart("NN_fit_bf_loop") for (i, x) in enumerate(X): nbh = self.neighbourhoods[i] for (j, y) in enumerate(X[i+1:]): j += i + 1 self.timerStart("NN_fit_bf_dist") d = dist(x,y) self.timerStop("NN_fit_bf_dist") self.timerStart("NN_fit_bf_insert") nbh.insert((j,d)) self.neighbourhoods[j].insert((i,d)) self.timerStop("NN_fit_bf_insert") self.timerStart("NN_fit_bf_toList") self.neighbourhoods[i] = nbh.toArray(lambda v: v[0]) self.timerStop("NN_fit_bf_toList") self.timerStop("NN_fit_bf_loop") def fit_bruteForce_np(self, X, nebSize=None): self.timerStart("NN_fit_bfnp_init") numOfPoints = len(X) nFeatures = len(X[0]) tX = tf.convert_to_tensor(X) def distancesTo(x): w = np.repeat([x], numOfPoints, axis=0) d = tf.keras.layers.Subtract()([w,tX]) t = tf.keras.layers.Dot(axes=(1,1))([d,d]) return t.numpy() if nebSize == None: nebSize = self.nebSize isGreaterThan = lambda x, y: x[1] > y[1] self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (i, 0.0)) for i in range(len(X))] self.timerStop("NN_fit_bfnp_init") self.timerStart("NN_fit_bfnp_loop") for (i, x) in enumerate(X): self.timerStart("NN_fit_bfnp_dist") distances = distancesTo(x) self.timerStop("NN_fit_bfnp_dist") nbh = self.neighbourhoods[i] for (j, y) in enumerate(X[i+1:]): j += i + 1 d = distances[j] self.timerStart("NN_fit_bfnp_insert") nbh.insert((j,d)) self.neighbourhoods[j].insert((i,d)) self.timerStop("NN_fit_bfnp_insert") self.timerStart("NN_fit_bfnp_toList") self.neighbourhoods[i] = nbh.toArray(lambda v: v[0]) self.timerStop("NN_fit_bfnp_toList") self.timerStop("NN_fit_bfnp_loop") def fit_chained(self, X, nebSize=None): self.timerStart("NN_fit_chained_init") if nebSize == None: nebSize = self.nebSize nPoints = len(X) nFeatures = len(X[0]) neigh = NearestNeighbors(n_neighbors=nebSize) neigh.fit(X) self.timerStop("NN_fit_chained_init") self.timerStart("NN_fit_chained_toList") self.neighbourhoods = [ (neigh.kneighbors([x], nebSize, return_distance=False))[0] for (i, x) in enumerate(X) ] self.timerStop("NN_fit_chained_toList") def fit_cLib(self, X, nebSize=None): self.timerStart("NN_fit_cLib_init") if nebSize == None: nebSize = self.nebSize nbh = np.array([np.zeros(nebSize, dtype=np.uint) for i in range(X.shape[0])]) self.timerStop("NN_fit_cLib_init") self.timerStart("NN_fit_cLib_call") nbhLib.Neighborhood(nebSize, X.shape[0], X.shape[1], X, nbh) self.timerStop("NN_fit_cLib_call") self.timerStart("NN_fit_cLib_list") self.neighbourhoods = list(nbh) self.timerStop("NN_fit_cLib_list") def fit_cLibHeuristic(self, X, nebSize=None): self.timerStart("NN_fit_cLib_init") if nebSize == None: nebSize = self.nebSize nbh = np.array([np.zeros(nebSize, dtype=np.uint) for i in range(X.shape[0])]) self.timerStop("NN_fit_cLib_init") self.timerStart("NN_fit_cLib_call") nbhLib.NeighborhoodHeuristic(nebSize, X.shape[0], X.shape[1], X, nbh) self.timerStop("NN_fit_cLib_call") self.timerStart("NN_fit_cLib_list") self.neighbourhoods = list(nbh) self.timerStop("NN_fit_cLib_list") # =============================================================== # Heuristic search # =============================================================== def fit_heuristic(self, X, nebSize=None, debugLayer=0, withDouble=True): if nebSize == None: nebSize = self.nebSize self.timerStart("NN_fit_heuristic_init") nPoints = len(X) nFeatures = len(X[0]) nHeuristic = max(1, int(math.log(nFeatures))) isGreaterThan = lambda x, y: x[1] > y[1] self.neighbourhoods = [MaxHeap(maxSize=nebSize, isGreaterThan=isGreaterThan, smalestValue=(i, 0.0)) for i in range(len(X))] self.timerStart("NN_fit_heuristic_lineStart") z = X[0] farest = 0 bestDist = 0.0 for (i, x) in enumerate(X): d = dist(x, z) if d > bestDist: farest = i bestDist = d lineStart = farest z = X[lineStart] self.timerStop("NN_fit_heuristic_lineStart") # print(f"lineStart: {lineStart}@{z} ... {bestDist}") self.timerStart("NN_fit_heuristic_lineEnd") bestDist = 0.0 for (i, x) in enumerate(X): d = dist(x, z) if d > bestDist: farest = i bestDist = d lineEnd = farest self.timerStop("NN_fit_heuristic_lineEnd") self.timerStart("NN_fit_heuristic_line") # print(f"lineEnd: {lineEnd}@{X[lineEnd]} ... {bestDist}") u = (X[lineEnd] - z) uFactor = (1 / math.sqrt(norm2(u))) u = uFactor * u # print(f"u: {u} ... {norm2(u)}") def heuristic(i,x): p = z + (scalarP(u, x - z) * u) dz = math.sqrt(dist(z, p)) dx = math.sqrt(dist(x, p)) return (i, dz, dx) line = [heuristic(i, x) for (i,x) in enumerate(X) ] line.sort(key= lambda a: a[1]) self.timerStop("NN_fit_heuristic_line") self.timerStop("NN_fit_heuristic_init") self.timerStart("NN_fit_heuristic_loop") s = 0 ff = False ptsDone = set() for (i,(xi, di, dix)) in enumerate(line): self.timerStart("NN_fit_heuristic_loop_init") h = self.neighbourhoods[xi] z = X[xi] self.timerStop("NN_fit_heuristic_loop_init") ptsDone.add(xi) self.timerStart("NN_fit_heuristic_loop_distance") ll = [(xj, norm2([dj - di, djx - dix])) for (xj, dj, djx) in line[i:]] # ll = [(xj, dist( # np.array([di, dix] + list(X[xi][0:nHeuristic])), # np.array([dj, djx] + list(X[xj][0:nHeuristic])))) # for (xj, dj, djx) in line[1:] # ] ll.sort(key = lambda a: a[1]) kk = distancesToPoint(z, [X[j] for (j, _) in ll]) self.timerStop("NN_fit_heuristic_loop_distance") for (d, (xj, djx)) in zip(kk, ll): ign = h.size >= nebSize and djx > h.getMax()[1] if ign: break else: #d = dist(X[xj], z) self.timerStart("NN_fit_heuristic_insert") s += 1 h.insert((xj, d)) k = self.neighbourhoods[xj] if not isinstance(k, list): k.insert((xi, d)) self.timerStop("NN_fit_heuristic_insert") # if xi == debugLayer: # d = dist(X[xj], z) # hint = "" # if djx > d: # hint += "!!" # if ign: # hint += "*" # print(f"xj:{xj} dx:{h.getMax()[1]:0.1f} djx:{djx:0.1f} d:{d:0.1f}" + hint) self.timerStart("NN_fit_heuristic_toArray") self.neighbourhoods[xi] = h.toArray() self.timerStop("NN_fit_heuristic_toArray") self.timerStop("NN_fit_heuristic_loop") print(f"calculated distances: {s} / {nPoints * (nPoints - 1)}")