Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## 2025-05-15 - [Numerical Precision in Vectorized Distance Calculation]
**Learning:** Using the expansion formula ||a-b||^2 = ||a||^2 + ||b||^2 - 2ab for vectorized Euclidean distance is much faster but can produce small negative values due to floating-point precision limits.
**Action:** Always use np.maximum(dists_sq, 0) when calculating squared distances with this formula to ensure numerical stability.
37 changes: 28 additions & 9 deletions face_engine/models/basic_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,40 @@ def __init__(self):
self.class_names = None

def fit(self, embeddings, class_names, **kwargs):
self.embeddings = embeddings
self.embeddings = np.asarray(embeddings)
self.class_names = class_names
# Pre-calculate squared norms for faster distance calculation in predict
self.norms_sq = np.sum(self.embeddings**2, axis=1)

def predict(self, embeddings):
if self.class_names is None:
raise TrainError("Model is not fitted yet!")

scores = []
class_names = []
for embedding in embeddings:
distances = np.linalg.norm(self.embeddings - embedding, axis=1)
index = np.argmin(distances)
score = np.exp(-0.5 * distances[index] ** 2)
scores.append(score)
class_names.append(self.class_names[index])
embeddings = np.asarray(embeddings)
if len(embeddings) == 0:
return [], []

# Using expansion formula: ||a-b||^2 = ||a||^2 + ||b||^2 - 2ab
# to calculate squared Euclidean distances in a vectorized way.
input_norms_sq = np.sum(embeddings**2, axis=1, keepdims=True)

# Handle cases where model might have been loaded without pre-calculated norms
fitted_norms_sq = getattr(self, "norms_sq", None)
if fitted_norms_sq is None:
fitted_norms_sq = np.sum(self.embeddings**2, axis=1)

# Vectorized squared distance calculation: (M, 1) + (N,) - 2 * (M, N) -> (M, N)
dists_sq = input_norms_sq + fitted_norms_sq - 2 * np.dot(embeddings, self.embeddings.T)

# Ensure numerical stability (prevent tiny negative values due to floating point precision)
dists_sq = np.maximum(dists_sq, 0)

indices = np.argmin(dists_sq, axis=1)
min_dists_sq = dists_sq[np.arange(len(embeddings)), indices]

scores = np.exp(-0.5 * min_dists_sq).tolist()
class_names = [self.class_names[i] for i in indices]

return scores, class_names

def save(self, dirname):
Expand Down