diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..516b516 --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2025-05-15 - Vectorizing Euclidean Distance with Expansion Formula +**Learning:** Using the expansion formula $||a-b||^2 = ||a||^2 + ||b||^2 - 2ab$ for vectorized distance calculation provides a significant speedup (~7x-17x) compared to per-query loops. However, it can introduce small negative values due to floating-point precision errors (subtractive cancellation), which must be handled with `np.maximum(dists_sq, 0)`. +**Action:** Always use `np.maximum(dists_sq, 0)` when using the expansion formula and allow for small `rtol`/`atol` in tests. diff --git a/face_engine/models/basic_estimator.py b/face_engine/models/basic_estimator.py index fbbf2b9..e4aad75 100644 --- a/face_engine/models/basic_estimator.py +++ b/face_engine/models/basic_estimator.py @@ -18,23 +18,39 @@ class BasicEstimator(Estimator, name="basic"): def __init__(self): self.embeddings = None self.class_names = None + self.fitted_norms_sq = None def fit(self, embeddings, class_names, **kwargs): - self.embeddings = embeddings + self.embeddings = np.asarray(embeddings) self.class_names = class_names + # Pre-calculate squared norms of fitted embeddings for faster distance calculation + self.fitted_norms_sq = np.sum(self.embeddings**2, axis=1) def predict(self, embeddings): if self.class_names is None: raise TrainError("Model is not fitted yet!") - scores = [] - class_names = [] - for embedding in embeddings: - distances = np.linalg.norm(self.embeddings - embedding, axis=1) - index = np.argmin(distances) - score = np.exp(-0.5 * distances[index] ** 2) - scores.append(score) - class_names.append(self.class_names[index]) + embeddings = np.asarray(embeddings) + if embeddings.size == 0: + return [], [] + + # Vectorized Euclidean distance using the expansion: ||a-b||^2 = ||a||^2 + ||b||^2 - 2ab + # A: self.embeddings (N x D), B: embeddings (M x D) + # Resulting distances matrix: (M x N) + query_norms_sq = np.sum(embeddings**2, axis=1) + dot_product = np.dot(embeddings, self.embeddings.T) + + # dists_sq = query_norms_sq[:, None] + fitted_norms_sq[None, :] - 2 * dot_product + dists_sq = query_norms_sq[:, np.newaxis] + self.fitted_norms_sq - 2 * dot_product + # Handle potential negative values due to floating point precision + dists_sq = np.maximum(dists_sq, 0) + + indices = np.argmin(dists_sq, axis=1) + min_dists_sq = dists_sq[np.arange(len(indices)), indices] + + scores = np.exp(-0.5 * min_dists_sq).tolist() + class_names = [self.class_names[i] for i in indices] + return scores, class_names def save(self, dirname): @@ -46,3 +62,7 @@ def load(self, dirname): name = "%s.estimator.%s" % (self.name, "p") with open(os.path.join(dirname, name), "rb") as file: self.__dict__.update(pickle.load(file)) + + # Re-calculate fitted_norms_sq for backward compatibility + if self.embeddings is not None and self.fitted_norms_sq is None: + self.fitted_norms_sq = np.sum(self.embeddings**2, axis=1)