Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions methods/EverCore/src/agentic_layer/rerank_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,19 @@ def _create_service_from_config(
)
return DeepInfraRerankService(config)
elif provider.lower() == "voyage":
# Codex P1 / CodeRabbit major: the shared RERANK_MODEL default in
# env.template is "Qwen/Qwen3-Reranker-4B", which Voyage rejects.
# When the model is unset or still on the non-Voyage default, fall
# back to VoyageRerankConfig's provider-specific default ("rerank-2.5").
voyage_default_model = VoyageRerankConfig.__dataclass_fields__["model"].default
non_voyage_defaults = {"", None, "Qwen/Qwen3-Reranker-4B"}
resolved_model = (
model if model not in non_voyage_defaults else voyage_default_model
)
config = VoyageRerankConfig(
api_key=api_key,
base_url=base_url or 'https://api.voyageai.com/v1/rerank',
model=model,
base_url=base_url or "https://api.voyageai.com/v1/rerank",
model=resolved_model,
timeout=timeout,
max_retries=max_retries,
batch_size=batch_size,
Expand Down
62 changes: 52 additions & 10 deletions methods/EverCore/src/agentic_layer/rerank_voyage.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,30 +144,68 @@ def _parse_response(
async def rerank_documents(
self, query: str, documents: List[str], instruction: Optional[str] = None
) -> Dict[str, Any]:
"""Low-level reranking; ``instruction`` is ignored — Voyage uses the
query/documents pair directly."""
"""Low-level reranking.

Voyage's `/v1/rerank` endpoint accepts only `query` + `documents`; it
has no separate ``instruction`` field. To preserve the call-site
contract (search_mem_service passes skill-specific instructions), the
instruction is prepended to the query when provided, matching the
behaviour of vLLM/DeepInfra implementations.
"""
if not documents:
return {"results": []}

batch_size = self.config.batch_size or 100
# Codex R2 P2 + CodeRabbit PR8 follow-up: validate batch_size before
# slicing so a misconfigured negative/zero RERANK_BATCH_SIZE fails
# loudly instead of silently producing zero batches. The previous
# `or 100` short-circuit silently corrected batch_size=0 to 100,
# masking the fail-fast contract this code is meant to enforce.
batch_size = self.config.batch_size
if batch_size is None:
batch_size = 100
if batch_size <= 0:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Validate zero batch size before applying fallback default

The new fail-fast guard does not trigger for RERANK_BATCH_SIZE=0 because batch_size is still computed as self.config.batch_size or 100 just above this check, so zero is converted to 100 before validation. That means a zero misconfiguration still runs silently instead of raising, which defeats the intended “fail loudly” behavior for invalid batch sizes.

Useful? React with 👍 / 👎.

raise RerankError(
f"Invalid Voyage batch_size={batch_size}; must be > 0"
)

# Codex R2 P2: honour instruction (was silently dropped, breaking
# skill-biased reranking in search_mem_service).
effective_query = (
f"{instruction}\n{query}" if instruction else query
)

batches = [
documents[i : i + batch_size] for i in range(0, len(documents), batch_size)
]

batch_tasks = [
self._send_rerank_request_batch(query, batch) for batch in batches
self._send_rerank_request_batch(effective_query, batch) for batch in batches
]
batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)

# Codex R2 P1: fail-fast on batch errors. The previous implementation
# extended scores with synthetic -100.0 sentinels and continued, which
# silently degraded ordering when Voyage was unreachable. With
# RERANK_FALLBACK_PROVIDER=none in production this re-introduced the
# exact silent-fail pattern that took 3 days to detect in the
# original Qdrant-migration bug. Raise so the rerank_service factory
# / HybridRerankService can route to a configured fallback or
# propagate the outage to the caller.
failures = [
(i, r) for i, r in enumerate(batch_results) if isinstance(r, Exception)
]
if failures:
first_i, first_err = failures[0]
raise RerankError(
f"Voyage rerank failed for {len(failures)}/{len(batches)} batches; "
f"first failure: batch {first_i}: {first_err}"
)

all_scores: List[float] = []
total_input_tokens = 0
last_response = None

for i, result in enumerate(batch_results):
if isinstance(result, Exception):
logger.error(f"Voyage rerank batch {i} failed: {result}")
all_scores.extend([-100.0] * len(batches[i]))
continue
for result in batch_results:
all_scores.extend(result.get("scores", []))
total_input_tokens += result.get("input_tokens", 0)
last_response = result
Expand Down Expand Up @@ -221,8 +259,12 @@ async def rerank_memories(
return []

try:
# CodeRabbit security: avoid logging raw query (multi-tenant data
# leakage risk). Log only metadata.
logger.debug(
f"Voyage reranking, query: {query!r}, num_texts={len(all_texts)}"
"Voyage reranking: query_len=%d, num_texts=%d",
len(query),
len(all_texts),
)
rerank_result = await self.rerank_documents(query, all_texts, instruction)
if "results" not in rerank_result:
Expand Down
Loading