Prechádzať zdrojové kódy

More speedup for distance matrix.

Kristian Schultz 3 rokov pred
rodič
commit
fc2b935434
1 zmenil súbory, kde vykonal 3 pridanie a 15 odobranie
  1. 3 15
      fdc/missingValues.py

+ 3 - 15
fdc/missingValues.py

@@ -63,22 +63,10 @@ def create_distance_matrix(dense_data):
 
     matrix = [[ None for i in range(size)] for j in range(size)]
 
-    # Calculate the distances. As the distance matrix is symmetric we can do
-    # that with at most n*n/2 distance evaluations.
+    # Calculate the squared euclidian distances.
     for nx, x in enumerate(dense_data):
-        for ny, y in enumerate(dense_data):
-            # Same index so distance is 0.0
-            if nx == ny:
-                matrix[nx][ny] = 0.0
-            
-            # As the matrix is symetric we can copy already calculated values.
-            # As distance computation is expencive we should do that.
-            elif nx > ny:
-                matrix[nx][ny] = matrix[ny][nx]
-            
-            # Unseen pair so calculate the distance.
-            else:
-                matrix[nx][ny] = distance.euclidean(x, y)
+        b = dense_data - x
+        matrix[nx] = np.sum(b*b, axis=1)
 
     # Calculate the indices and replace the distance rows.
     # So we create our result matrix and do cleanup at the same time.