| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164 |
- import math
- def dist(x,y):
- return math.sqrt(sum(map(lambda z: (z[0] - z[1])**2, zip(x, y))))
- def maxby(data, fn, startValue=0.0):
- m = startValue
- for v in data:
- m = max(m, fn(v))
- return m
- class MaxHeap:
- def __init__(self, maxSize=None, isGreaterThan=None, smalestValue=0.0):
- self.heap = []
- self.size = 0
- self.maxSize = maxSize
- self.isGreaterThan = isGreaterThan if isGreaterThan is not None else (lambda a, b: a > b)
- self.smalestValue = smalestValue
- def insert(self, v):
- if self.maxSize is not None and self.size >= self.maxSize:
- self.replaceMax(v)
- return
- pos = self.size
- self.size += 1
- self.heap.append(v)
- while pos > 0:
- w = self.heap[pos // 2]
- if not self.isGreaterThan(v, w):
- break
- self.heap[pos] = w
- pos = pos // 2
- self.heap[pos] = v
- def childPos(self, pos):
- c = (pos + 1) * 2
- return (c - 1, c)
- def removeMax(self):
- if self.heap == []:
- self.size = 0
- return
-
- self.heap[0] = self.heap[-1]
- self.heap = self.heap[:-1]
- self.size -= 1
- x = self.heap[0]
- pos = 0
- size = self.size
- while pos < size:
- (left, right) = self.childPos(pos)
- if left >= size:
- break
- y = self.heap[left]
- if right >= size:
- if self.isGreaterThan(y, x):
- self.heap[pos] = y
- self.heap[left] = x
- break
- z = self.heap[right]
- (best, v) = (left, y) if self.isGreaterThan(y, z) else (right, z)
- if not self.isGreaterThan(v, x):
- break
- self.heap[pos] = v
- self.heap[best] = x
- pos = best
- def replaceMax(self, x):
- if self.heap == []:
- self.heap = [x]
- self.size = 1
- return
-
- if self.isGreaterThan(x, self.heap[0]):
- return
- self.heap[0] = x
- pos = 0
- size = len(self.heap)
- while pos < size:
- (left, right) = self.childPos(pos)
- if left >= size:
- break
- y = self.heap[left]
- if right >= size:
- if self.isGreaterThan(y, x):
- self.heap[pos] = y
- self.heap[left] = x
- break
- z = self.heap[right]
- (best, v) = (left, y) if self.isGreaterThan(y, z) else (right, z)
- if not self.isGreaterThan(v, x):
- break
- self.heap[pos] = v
- self.heap[best] = x
- pos = best
- def getMax(self):
- if self.heap == []:
- return self.smalestValue
- return self.heap[0]
- def toArray(self, mapFn=None):
- if mapFn is None:
- return self.heap.copy()
- else:
- return [mapFn(x) for x in self.heap]
- def length(self):
- return self.size
- class NNSearch:
- def __init__(self, nebSize=5):
- self.nebSize = nebSize
- self.neighbourhoods = []
- def fit(self, X, nebSize=None):
- if nebSize == None:
- nebSize = self.nebSize
- isGreaterThan = lambda x, y: x[1] > y[1]
- self.neighbourhoods = [MaxHeap(nebSize, isGreaterThan, (None, 0.0)) for _i in range(len(X))]
- for (i, x) in enumerate(X):
- nbh = self.neighbourhoods[i]
- nbh.insert((i, 0.0))
- for (j, y) in enumerate(X[i+1:]):
- j += i + 1
- d = dist(x,y)
- nbh.insert((j,d))
- self.neighbourhoods[j].insert((i,d))
- self.neighbourhoods[i] = nbh.toArray(lambda v: v[0])
- def neighbourhoodOfItem(self, i):
- return self.neighbourhoods[i]
|