|
@@ -44,7 +44,7 @@ def create_total_impute(data, distance_matrix, missing_value_list):
|
|
|
]
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
-def create_distance_matrix(dense_data):
|
|
|
|
|
|
|
+def create_distance_matrix_old(dense_data):
|
|
|
dense_data_index = np.array(dense_data.index)
|
|
dense_data_index = np.array(dense_data.index)
|
|
|
dense_data = np.array(dense_data)
|
|
dense_data = np.array(dense_data)
|
|
|
|
|
|
|
@@ -56,6 +56,39 @@ def create_distance_matrix(dense_data):
|
|
|
])
|
|
])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+def create_distance_matrix(dense_data):
|
|
|
|
|
+ dense_data_index = np.array(dense_data.index)
|
|
|
|
|
+ dense_data = np.array(dense_data)
|
|
|
|
|
+ size = len(dense_data)
|
|
|
|
|
+
|
|
|
|
|
+ matrix = [[ None for i in range(size)] for j in range(size)]
|
|
|
|
|
+
|
|
|
|
|
+ # Calculate the distances. As the distance matrix is symmetric we can do
|
|
|
|
|
+ # that with at most n*n/2 distance evaluations.
|
|
|
|
|
+ for nx, x in enumerate(dense_data):
|
|
|
|
|
+ for ny, y in enumerate(dense_data):
|
|
|
|
|
+ # Same index so distance is 0.0
|
|
|
|
|
+ if nx == ny:
|
|
|
|
|
+ matrix[nx][ny] = 0.0
|
|
|
|
|
+
|
|
|
|
|
+ # As the matrix is symetric we can copy already calculated values.
|
|
|
|
|
+ # As distance computation is expencive we should do that.
|
|
|
|
|
+ elif nx > ny:
|
|
|
|
|
+ matrix[nx][ny] = matrix[ny][nx]
|
|
|
|
|
+
|
|
|
|
|
+ # Unseen pair so calculate the distance.
|
|
|
|
|
+ else:
|
|
|
|
|
+ matrix[nx][ny] = distance.euclidean(x, y)
|
|
|
|
|
+
|
|
|
|
|
+ # Calculate the indices and replace the distance rows.
|
|
|
|
|
+ # So we create our result matrix and do cleanup at the same time.
|
|
|
|
|
+ for n in range(size):
|
|
|
|
|
+ matrix[n] = dense_data_index[ np.argsort(matrix[n]) ]
|
|
|
|
|
+
|
|
|
|
|
+ return np.array(matrix)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
|
|
|
def fix_missing_values(data, limit=4):
|
|
def fix_missing_values(data, limit=4):
|
|
|
timing = tools.Timing("fix_missing_values")
|
|
timing = tools.Timing("fix_missing_values")
|