Prechádzať zdrojové kódy

Added c library for faster neighborhood search.

Kristian Schultz 4 rokov pred
rodič
commit
8f5d156ae1

+ 46 - 0
library/NNSearch_experimental.py

@@ -1,9 +1,26 @@
 import math
+from ctypes import cdll, c_uint, c_double, c_void_p
 
 import tensorflow as tf
 import numpy as np
+import numpy.ctypeslib as npct
+
+array_2d_uint = npct.ndpointer(dtype=np.uint, ndim=2, flags='CONTIGUOUS')
+array_1d_double = npct.ndpointer(dtype=np.double, ndim=1, flags='CONTIGUOUS')
+array_2d_double = npct.ndpointer(dtype=np.double, ndim=2, flags='CONTIGUOUS')
+
+
 from sklearn.neighbors import NearestNeighbors
 from library.timing import timing
+from library.MaxHeap import MaxHeap
+
+
+nbhLib = cdll.LoadLibrary("./library/c/libNeighborhood.so")
+nbhLib.Neighborhood.rettype = None
+nbhLib.Neighborhood.argtypes = [c_uint, c_uint, c_uint, array_2d_double, array_2d_uint]
+
+nbhLib.NeighborhoodHeuristic.rettype = None
+nbhLib.NeighborhoodHeuristic.argtypes = [c_uint, c_uint, c_uint, array_2d_double, array_2d_uint]
 
 
 def dist(x,y):
@@ -302,6 +319,35 @@ class NNSearch:
         self.timerStop("NN_fit_chained_toList")
 
 
+    def fit_cLib(self, X, nebSize=None):
+        self.timerStart("NN_fit_cLib_init")
+        if nebSize == None:
+            nebSize = self.nebSize
+        
+        nbh = np.array([np.zeros(nebSize, dtype=np.uint) for i in range(X.shape[0])])
+        self.timerStop("NN_fit_cLib_init")
+        self.timerStart("NN_fit_cLib_call")
+        nbhLib.Neighborhood(nebSize, X.shape[0], X.shape[1], X, nbh)
+        self.timerStop("NN_fit_cLib_call")
+        self.timerStart("NN_fit_cLib_list")
+        self.neighbourhoods = list(nbh)
+        self.timerStop("NN_fit_cLib_list")
+
+    def fit_cLibHeuristic(self, X, nebSize=None):
+        self.timerStart("NN_fit_cLib_init")
+        if nebSize == None:
+            nebSize = self.nebSize
+        
+        nbh = np.array([np.zeros(nebSize, dtype=np.uint) for i in range(X.shape[0])])
+        self.timerStop("NN_fit_cLib_init")
+        self.timerStart("NN_fit_cLib_call")
+        nbhLib.NeighborhoodHeuristic(nebSize, X.shape[0], X.shape[1], X, nbh)
+        self.timerStop("NN_fit_cLib_call")
+        self.timerStart("NN_fit_cLib_list")
+        self.neighbourhoods = list(nbh)
+        self.timerStop("NN_fit_cLib_list")
+        
+
     # ===============================================================
     # Heuristic search
     # ===============================================================

+ 106 - 0
library/c/Heuristic.c

@@ -0,0 +1,106 @@
+#include "MaxHeap.h"
+#include "utils.h"
+
+#define Point(i)    (&(data[i * nFeatures]))
+
+
+pyWord findFarestPointTo(SearchParams params, pyReal * point) {
+    pyReal d, bestDist = 0.0;
+    pyWord i, bestIdx = 0 - 1;
+
+    for(i = 0 ; i < params->nPoints ; i ++ ) {
+        d = distSquared(point, Point(i));
+        if (d > bestDist) {
+            bestDist = d;
+            bestIdx = i;
+        }
+    }
+
+    return bestIdx;
+}
+
+
+
+void NeighborhoodHeuristic(const pyWord nbhSize, const pyWord nPoints, const pyWord nFeatures, const pyReal * data, pyWord * neighborhoods) {
+    SearchParams params = {
+        .nbhSize = nbhSize,
+        .nPoints = nPoints,
+        .nFeatures = nFeatures,
+        .data = data,
+        .neighborhoods = neighborhoods
+    };
+
+    int success = 0;
+    pyWord i, j;
+    pyReal d;
+    pyReal lineVector = NULL;
+    pyReal accu = NULL;
+
+    if(initNbhHeaps(&params)) {
+        return;
+    }
+
+    do {
+
+        const pyWord lineStartIdx = findFarestPointTo(params, Point(0));
+        if(lineStartIdx >= params->nPoints) {
+            break;
+        }
+
+        pyReal * lineStartPoint = Point(lineStartIdx);
+        const pyWord lineEndIdx = findFarestPointTo(params, lineStartPoint);
+        pyReal * lineEndPoint = Point(lineEndIdx);
+
+
+        pyReal lineVector * = malloc(params->nFeatures * sizeof(pyReal));
+        if(!lineVector) {
+            break;
+        }
+        lineVector = vecSub(nFeatures, lineEndPoint, lineStartPoint); 
+        d = vecNorm(lineVector);
+        if(d == 0.0) {
+            break;
+        }
+        vecScale(nFeatures, lineVector, 1.0 / d, lineVector);
+
+
+        pyReal accu * = malloc(params->nFeatures * sizeof(pyReal));
+        if(!accu) {
+            break;
+        }
+
+        // Berechne alle Distanzen
+        for(i = 0; i < nPoints; i ++) {
+            const pyReal *x = Point(i);
+
+            vecSub(nFeatures, accu, x, lineStartPoint);
+            d = vecScalarProduct(nFeatures, lineVector, accu);
+            vecScale(nFeatures, accu, d, lineVector);
+            vecAdd(nFeatures, accu, accu, lineStartPoint);
+
+            const pyReal di1 = sqrt(distSquared(accu, lineStartPoint, nFeatures));
+            const pyReal di2 = sqrt(distSquared(accu, x, nFeatures));
+
+            
+            for(j = i + 1; j < nPoints; j ++) {
+                const pyReal * y = Point(j);
+                d = distSquared(x, y, nFeatures);
+                
+                maxHeap_insert(&(params.nbhHeaps.heaps[i]), j, d);
+                maxHeap_insert(&(params.nbhHeaps.heaps[j]), i, d);
+            }
+        }
+    } while(0);
+
+    if(lineVector) {
+        free(lineVector);
+    }
+
+    if(!success) {
+        nbhSearchBruteForce(&params);
+    }
+    
+    freeNbhHeaps(&params);
+}
+
+

+ 50 - 0
library/c/LinAlgebra.c

@@ -0,0 +1,50 @@
+#include "types.h"
+#include <math.h>
+
+pyReal vecScalarProduct(const pyWord size, const pyReal * a, const pyReal * b) {
+    pyWord i;
+    pyReal s = 0.0;
+
+    for(i = 0; i < size ; i ++) {
+        s += a[i] * b[i];
+    }
+
+    return s;
+}
+
+pyReal vecNorm(const pyWord size, const pyReal * a) {
+    pyWord i;
+    pyReal s = 0.0;
+
+    for(i = 0; i < size ; i ++) {
+        s += a[i] * a[i];
+    }
+
+    return sqrt(s);
+}
+
+pyReal vecDistSquared(const pyWord size, const pyReal * a, const pyReal * b) {
+    pyWord i;
+    pyReal s = 0.0, d;
+
+    for(i = 0; i < size ; i ++) {
+        d = a[i] - b[i]
+        s += d * d;
+    }
+
+    return s;
+}
+
+void vecSubst(const pyWord size, pyReal * dst, const pyReal * a, const pyReal * b ) {
+    pyWord i;
+    for(i = 0; i < size ; i ++) {
+        dst[i] = a[i] - b[i];
+    }
+}
+
+void vecScale(const pyWord size, pyReal * dst, pyReal s, const pyReal * a) {
+    pyWord i;
+    for(i = 0; i < size ; i ++) {
+        dst[i] = s * a[i];
+    }
+}

+ 12 - 0
library/c/LinAlgebra.h

@@ -0,0 +1,12 @@
+#ifndef _LIN_ALGEBRA_H_
+#define _LIN_ALGEBRA_H_
+
+#include "types.h"
+
+pyReal vecScalarProduct(const pyWord size, const pyReal * a, const pyReal * b);
+pyReal vecNorm(const pyWord size, const pyReal * a);
+pyReal vecDistSquared(const pyWord size, const pyReal * a, const pyReal * b);
+void vecSubst(const pyWord size, pyReal * dst, const pyReal * a, const pyReal * b );
+
+
+#endif

+ 11 - 0
library/c/Makefile

@@ -0,0 +1,11 @@
+c_files = Neighborhood.c Heuristic.c utils.c MaxHeap.c
+h_files = Neighborhood.h utils.h MaxHeap.h types.h
+
+
+libNeighborhood.so: $(c_files) $(h_files)
+	gcc -shared -Wall -O2 $(c_files) -o $@
+
+
+test: test.c $(c_files)
+	gcc -Wall -O2 $< -o test.exe -DDoTest
+	./test.exe

+ 61 - 0
library/c/MaxHeap.c

@@ -0,0 +1,61 @@
+#include "MaxHeap.h"
+
+static inline void setItem(MaxHeap * heap, const pyWord pos, const pyWord x, const pyWord d) {
+    heap->indices[pos] = x;
+    heap->distances[pos] = d;
+}
+
+static inline void copyItem(MaxHeap * heap, const pyWord dst, const pyWord src) {
+    heap->indices[dst] = heap->indices[src];
+    heap->distances[dst] = heap->distances[src];
+}
+
+
+void maxHeap_insert(MaxHeap * heap, const pyWord id, const pyReal distance) {
+    if(!heap || heap->maxItems <= 0) {
+        return;
+    }
+
+    if(heap->size < heap->maxItems) {
+        pyWord pos = heap->size;
+        heap->size ++;
+        setItem(heap, pos, id, distance);
+        while(pos > 0) {
+            const pyWord p = pos / 2;
+            if (heap->distances[p] >= heap->distances[pos]) {
+                break;
+            }
+            copyItem(heap, pos, p);
+            pos = p;
+            setItem(heap, pos, id, distance);
+        }
+        return;
+    }
+
+    if(distance > heap->distances[0]) {
+        return;
+    }
+
+    setItem(heap, 0, id, distance);
+
+    pyWord pos = 0;
+    while(pos < heap->size) {
+        const pyWord right = (pos + 1) * 2;
+        const pyWord left = right - 1;
+
+        if(left >= heap->size) {
+            break;
+        }
+
+        const pyWord v = (right >= heap->size || heap->distances[right] < heap->distances[left]) ? left : right;
+
+        if(distance >= heap->distances[v]) {
+            break;
+        }
+
+        copyItem(heap, pos, v);
+        setItem(heap, v, id, distance);
+        pos = v;
+    }
+}
+

+ 8 - 0
library/c/MaxHeap.h

@@ -0,0 +1,8 @@
+#ifndef _MAX_HEAP_H_
+#define _MAX_HEAP_H_
+
+#include "types.h"
+
+void maxHeap_insert(MaxHeap * heap, const pyWord id, const pyReal distance);
+
+#endif

+ 46 - 0
library/c/Neighborhood.c

@@ -0,0 +1,46 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "MaxHeap.h"
+#include "utils.h"
+
+#define Point(i)    (&(data[i * nFeatures]))
+
+
+void nbhSearchBruteForce(SearchParams * params) {
+    pyWord i, j;
+    pyReal d;
+
+    // Berechne alle Distanzen
+    for(i = 0; i < nPoints; i ++) {
+        const pyReal *x = Point(i);
+        
+        for(j = i + 1; j < nPoints; j ++) {
+            const pyReal * y = Point(j);
+            d = distSquared(x, y, nFeatures);
+            
+            maxHeap_insert(&(params.nbhHeaps.heaps[i]), j, d);
+            maxHeap_insert(&(params.nbhHeaps.heaps[j]), i, d);
+        }
+    }
+}
+
+
+void Neighborhood(const pyWord nbhSize, const pyWord nPoints, const pyWord nFeatures, const pyReal * data, pyWord * neighborhoods) {
+    SearchParams params = {
+        .nbhSize = nbhSize,
+        .nPoints = nPoints,
+        .nFeatures = nFeatures,
+        .data = data,
+        .neighborhoods = neighborhoods
+    };
+
+    if(initNbhHeaps(&params)) {
+        return;
+    }
+
+    nbhSearchBruteForce(params);
+    
+    freeNbhHeaps(&params);
+}
+
+

+ 10 - 0
library/c/Neighborhood.h

@@ -0,0 +1,10 @@
+#ifndef _UTILS_H_
+#define _UTILS_H_
+
+#include "types.h"
+
+void nbhSearchBruteForce(SearchParams * params);
+
+void Neighborhood(const pyWord nbhSize, const pyWord nPoints, const pyWord nFeatures, const pyReal * data, pyWord * neighborhoods);
+
+#endif

+ 72 - 0
library/c/test.c

@@ -0,0 +1,72 @@
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "Neighborhood.h"
+#include "MaxHeap.h"
+
+
+
+#ifdef DoTest
+void printHeap(const MaxHeap * heap) {
+    pyWord p;
+
+    printf("%u / %u\n", (unsigned int)heap->size, (unsigned int)heap->maxItems);
+    for(p = 0; p < heap->size; p ++) {
+        printf("[%u] %u -> %lf\n", (unsigned int) p, (unsigned int)heap->indices[p], heap->distances[p]);
+    }
+}
+
+
+int main(void) {
+#define testcases 5
+    pyReal distances[testcases];
+    pyWord idxs[testcases];
+    MaxHeap heap = {
+        .distances = distances,
+        .indices = idxs,
+        .maxItems = testcases,
+        .size = 1
+    };
+
+    distances[0] = 0.0;
+    idxs[0] = 999;
+
+    printHeap(&heap);
+    maxHeap_insert(&heap, 6, 6.0);
+    maxHeap_insert(&heap, 1, 1.0);
+    maxHeap_insert(&heap, 4, 4.0);
+    maxHeap_insert(&heap, 3, 3.0);
+    maxHeap_insert(&heap, 2, 2.0);
+    maxHeap_insert(&heap, 5, 5.0);
+    printHeap(&heap);
+
+
+
+#define nPoints 9
+#define nbhSize 5
+    pyWord nbh[nPoints * nbhSize];
+    pyReal points[2 * nPoints];
+    int x, y, p;
+    p = 0;
+    for(y = 0; y < 3 ; y ++) {
+        for(x = 0; x < 3 ; x ++) {
+            points[p] = x;
+            points[p + 1] = y;
+            printf("%d: (%d,%d)\n", p/2, x, y);
+            p += 2;
+        }
+    }
+
+    Neighborhood(nbhSize, nPoints, 2, points, nbh);
+    for(y = 0; y < nPoints; y++) {
+        pyWord * p = &nbh[y * nbhSize];
+        printf("%d [%d,%d] ", y, y % 3, y / 3);
+        for(x = 0; x < nbhSize ; x ++) {
+            printf(" (%u|%u,%u)", (unsigned int)p[x], (unsigned int)(p[x] % 3), (unsigned int)(p[x] / 3));
+        }
+        printf("\n");
+    }
+}
+#endif
+
+

+ 28 - 0
library/c/types.h

@@ -0,0 +1,28 @@
+#ifndef _TYPES_H_
+#define _TYPES_H_
+
+typedef unsigned long long pyWord;
+typedef double pyReal;
+
+typedef struct s_MaxHeap {
+    pyReal * distances;
+    pyWord * indices;
+    pyWord maxItems;
+    pyWord size;
+} MaxHeap;
+
+typedef struct s_NbhHeaps {
+    MaxHeap * heaps;
+    pyReal * distances;
+} NbhHeaps;
+
+typedef struct s_SearchParams {
+    const pyWord nbhSize;
+    const pyWord nPoints;
+    const pyWord nFeatures;
+    const pyReal * data;
+    pyWord * neighborhoods;
+    NbhHeaps nbhHeaps;
+} SearchParams;
+
+#endif

+ 73 - 0
library/c/utils.c

@@ -0,0 +1,73 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "utils.h"
+
+
+pyReal distSquared(const pyReal * u, const pyReal * v, const pyWord nFeatures) {
+    pyWord n;
+    pyReal s = 0.0, t;
+    
+    for( n = 0 ; n < nFeatures ; n ++) {
+        t = u[n] - v[n];
+        s += t * t;
+    }
+    
+    return s;
+}
+
+
+void freeNbhHeaps(SearchParams * params) {
+    if(params->nbhHeaps.heaps) {
+       free(params->nbhHeaps.heaps);
+        params->nbhHeaps.heaps = NULL;
+    }
+    
+    if(params->nbhHeaps.distances) {
+        free(params->nbhHeaps.distances);
+        params->nbhHeaps.distances = NULL;
+    }
+}
+
+
+int initNbhHeaps(SearchParams * params) {
+    pyWord i, j;
+    
+    pyWord * nbhI;
+    pyReal * distsI;
+
+    params->nbhHeaps.heaps = NULL;
+    params->nbhHeaps.distances = NULL;
+
+    params->nbhHeaps.distances = (pyReal*) malloc((params->nPoints + 1) * params->nbhSize * sizeof(pyReal));
+    if(params->nbhHeaps.distances == NULL) {
+        puts("Out of memory!");
+        return 1;
+    }
+
+    params->nbhHeaps.heaps = (MaxHeap*) malloc((params->nPoints + 1) * sizeof(MaxHeap));
+    if(params->nbhHeaps.heaps == NULL) {
+        free(params->nbhHeaps.distances);
+        params->nbhHeaps.distances = NULL;
+        puts("Out of memory!");
+        return 1;
+    }
+    
+    // Initialisiere Distanzen und Nachbarschaften
+    for(i = 0; i < params->nPoints; i ++) {
+        distsI = &(params->nbhHeaps.distances[i * params->nbhSize]);
+        nbhI = &(params->neighborhoods[i * params->nbhSize]);
+        params->nbhHeaps.heaps[i].distances = distsI;
+        params->nbhHeaps.heaps[i].indices = nbhI;
+        params->nbhHeaps.heaps[i].maxItems = params->nbhSize;
+        params->nbhHeaps.heaps[i].size = 1;
+
+        for(j = 0; j < params->nbhSize; j ++) {
+            nbhI[j] = i;
+            distsI[j] = 0.0;
+        }
+    }
+
+    return 0;
+}
+
+

+ 11 - 0
library/c/utils.h

@@ -0,0 +1,11 @@
+#ifndef _UTILS_H_
+#define _UTILS_H_
+
+#include "types.h"
+
+pyReal distSquared(const pyReal * u, const pyReal * v, const pyWord nFeatures);
+
+int initNbhHeaps(SearchParams * params);
+void freeNbhHeaps(SearchParams * params);
+
+#endif