Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## 2025-05-21 - [Vectorized Distance Numerical Precision]
**Learning:** Using the expansion formula ||a-b||^2 = ||a||^2 + ||b||^2 - 2ab for vectorized distance calculation provides significant speedup but can introduce small floating-point discrepancies (negative values) due to subtractive cancellation.
**Action:** Always use np.maximum(dists_sq, 0) when using this formula to ensure distances are non-negative.
40 changes: 31 additions & 9 deletions face_engine/models/basic_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,43 @@ def __init__(self):
self.class_names = None

def fit(self, embeddings, class_names, **kwargs):
self.embeddings = embeddings
self.embeddings = np.asarray(embeddings)
self.class_names = class_names
# Pre-calculate squared norms of fitted embeddings for faster distance computation
self.norms_sq = np.sum(np.square(self.embeddings), axis=1)

def predict(self, embeddings):
if self.class_names is None:
raise TrainError("Model is not fitted yet!")

scores = []
class_names = []
for embedding in embeddings:
distances = np.linalg.norm(self.embeddings - embedding, axis=1)
index = np.argmin(distances)
score = np.exp(-0.5 * distances[index] ** 2)
scores.append(score)
class_names.append(self.class_names[index])
# Ensure embeddings is a numpy array
embeddings = np.asarray(embeddings)

# Vectorized distance calculation using the expansion formula:
# ||a - b||^2 = ||a||^2 + ||b||^2 - 2 * a . b
# This is significantly faster than looping and using np.linalg.norm.

# Get pre-calculated squared norms of fitted embeddings
# Use getattr for backward compatibility with models fitted in older versions
norms_sq_fitted = getattr(self, "norms_sq", None)
if norms_sq_fitted is None:
norms_sq_fitted = np.sum(np.square(self.embeddings), axis=1)

norms_sq_input = np.sum(np.square(embeddings), axis=1)
dot_product = np.dot(embeddings, self.embeddings.T)

# dists_sq shape: (n_input, n_fitted)
dists_sq = norms_sq_input[:, np.newaxis] + norms_sq_fitted - 2 * dot_product

# Clip to 0 to avoid tiny negative values due to floating point precision
dists_sq = np.maximum(dists_sq, 0)

indices = np.argmin(dists_sq, axis=1)
min_dists_sq = dists_sq[np.arange(len(embeddings)), indices]

scores = np.exp(-0.5 * min_dists_sq).tolist()
class_names = [self.class_names[i] for i in indices]

return scores, class_names

def save(self, dirname):
Expand Down