import math def dist(x,y): return math.sqrt(sum(map(lambda z: (z[0] - z[1])**2, zip(x, y)))) def maxby(data, fn, startValue=0.0): m = startValue for v in data: m = max(m, fn(v)) return m class MaxHeap: def __init__(self, maxSize=None, isGreaterThan=None, smalestValue=0.0): self.heap = [] self.size = 0 self.maxSize = maxSize self.isGreaterThan = isGreaterThan if isGreaterThan is not None else (lambda a, b: a > b) self.smalestValue = smalestValue def insert(self, v): if self.maxSize is not None and self.size >= self.maxSize: self.replaceMax(v) return pos = self.size self.size += 1 self.heap.append(v) while pos > 0: w = self.heap[pos // 2] if not self.isGreaterThan(v, w): break self.heap[pos] = w pos = pos // 2 self.heap[pos] = v def childPos(self, pos): c = (pos + 1) * 2 return (c - 1, c) def removeMax(self): if self.heap == []: self.size = 0 return self.heap[0] = self.heap[-1] self.heap = self.heap[:-1] self.size -= 1 x = self.heap[0] pos = 0 size = self.size while pos < size: (left, right) = self.childPos(pos) if left >= size: break y = self.heap[left] if right >= size: if self.isGreaterThan(y, x): self.heap[pos] = y self.heap[left] = x break z = self.heap[right] (best, v) = (left, y) if self.isGreaterThan(y, z) else (right, z) if not self.isGreaterThan(v, x): break self.heap[pos] = v self.heap[best] = x pos = best def replaceMax(self, x): if self.heap == []: self.heap = [x] self.size = 1 return if self.isGreaterThan(x, self.heap[0]): return self.heap[0] = x pos = 0 size = len(self.heap) while pos < size: (left, right) = self.childPos(pos) if left >= size: break y = self.heap[left] if right >= size: if self.isGreaterThan(y, x): self.heap[pos] = y self.heap[left] = x break z = self.heap[right] (best, v) = (left, y) if self.isGreaterThan(y, z) else (right, z) if not self.isGreaterThan(v, x): break self.heap[pos] = v self.heap[best] = x pos = best def getMax(self): if self.heap == []: return self.smalestValue return self.heap[0] def toArray(self, mapFn=None): if mapFn is None: return self.heap.copy() else: return [mapFn(x) for x in self.heap] def length(self): return self.size class NNSearch: def __init__(self, nebSize=5): self.nebSize = nebSize self.neighbourhoods = [] def fit(self, X, nebSize=None): if nebSize == None: nebSize = self.nebSize isGreaterThan = lambda x, y: x[1] > y[1] self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (None, 0.0)) for _i in range(len(X))] for (i, x) in enumerate(X): nbh = self.neighbourhoods[i] nbh.insert((i, 0.0)) for (j, y) in enumerate(X[i+1:]): j += i + 1 d = dist(x,y) nbh.insert((j,d)) self.neighbourhoods[j].insert((i,d)) self.neighbourhoods[i] = nbh.toArray(lambda v: v[0]) def neighbourhoodOfItem(self, i): return self.neighbourhoods[i]