From a514d213ad40b87fe84a1b4242f6ff4c8170ecfb Mon Sep 17 00:00:00 2001 From: Thot Date: Sat, 16 May 2026 20:59:46 +0000 Subject: [PATCH 1/2] fix(everos): voyage rerank fail-fast + tenant-safe logging + provider-default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses CodeRabbit + Codex review findings on #7 that were left open at merge: - rerank_voyage.rerank_documents: batches that raise no longer fabricate -100.0 sentinel scores. With RERANK_FALLBACK_PROVIDER=none in production the previous code silently degraded ranking when Voyage was unreachable — exactly the silent-fail pattern that took 3 days to detect in the original Qdrant-migration bug (Codex R2 P1). - rerank_voyage.rerank_documents: validate batch_size > 0 so a misconfigured RERANK_BATCH_SIZE fails loudly instead of producing zero batches and zero-filled scores (Codex R2 P2). - rerank_voyage.rerank_documents: honour the instruction argument by prepending it to the query, matching vLLM/DeepInfra behaviour. Voyage's endpoint has no separate instruction field, so the previous drop broke skill-biased reranking in search_mem_service (Codex R2 P2). - rerank_voyage.rerank_memories: stop logging the raw query string at debug level. Multi-tenant deployments treat the query as tenant data; metadata only (query_len, num_texts) is sufficient for diagnostics (CodeRabbit major). - rerank_service._create_service_from_config: for provider=voyage, fall back to VoyageRerankConfig's provider-specific default (rerank-2.5) when the shared RERANK_MODEL is unset or still on the non-Voyage default Qwen/Qwen3-Reranker-4B. Without this the new voyage branch is broken in the default configuration (Codex P1, CodeRabbit major). Verified: py_compile clean, ruff clean. --- .../src/agentic_layer/rerank_service.py | 13 ++++- .../src/agentic_layer/rerank_voyage.py | 56 ++++++++++++++++--- 2 files changed, 58 insertions(+), 11 deletions(-) diff --git a/methods/EverCore/src/agentic_layer/rerank_service.py b/methods/EverCore/src/agentic_layer/rerank_service.py index e51fe4cf..1eb10fda 100644 --- a/methods/EverCore/src/agentic_layer/rerank_service.py +++ b/methods/EverCore/src/agentic_layer/rerank_service.py @@ -175,10 +175,19 @@ def _create_service_from_config( ) return DeepInfraRerankService(config) elif provider.lower() == "voyage": + # Codex P1 / CodeRabbit major: the shared RERANK_MODEL default in + # env.template is "Qwen/Qwen3-Reranker-4B", which Voyage rejects. + # When the model is unset or still on the non-Voyage default, fall + # back to VoyageRerankConfig's provider-specific default ("rerank-2.5"). + voyage_default_model = VoyageRerankConfig.__dataclass_fields__["model"].default + non_voyage_defaults = {"", None, "Qwen/Qwen3-Reranker-4B"} + resolved_model = ( + model if model not in non_voyage_defaults else voyage_default_model + ) config = VoyageRerankConfig( api_key=api_key, - base_url=base_url or 'https://api.voyageai.com/v1/rerank', - model=model, + base_url=base_url or "https://api.voyageai.com/v1/rerank", + model=resolved_model, timeout=timeout, max_retries=max_retries, batch_size=batch_size, diff --git a/methods/EverCore/src/agentic_layer/rerank_voyage.py b/methods/EverCore/src/agentic_layer/rerank_voyage.py index cd0cfdb3..686394a0 100644 --- a/methods/EverCore/src/agentic_layer/rerank_voyage.py +++ b/methods/EverCore/src/agentic_layer/rerank_voyage.py @@ -144,30 +144,64 @@ def _parse_response( async def rerank_documents( self, query: str, documents: List[str], instruction: Optional[str] = None ) -> Dict[str, Any]: - """Low-level reranking; ``instruction`` is ignored — Voyage uses the - query/documents pair directly.""" + """Low-level reranking. + + Voyage's `/v1/rerank` endpoint accepts only `query` + `documents`; it + has no separate ``instruction`` field. To preserve the call-site + contract (search_mem_service passes skill-specific instructions), the + instruction is prepended to the query when provided, matching the + behaviour of vLLM/DeepInfra implementations. + """ if not documents: return {"results": []} + # Codex R2 P2: validate batch_size before slicing so a misconfigured + # negative/zero RERANK_BATCH_SIZE fails loudly instead of silently + # producing zero batches. batch_size = self.config.batch_size or 100 + if batch_size <= 0: + raise RerankError( + f"Invalid Voyage batch_size={batch_size}; must be > 0" + ) + + # Codex R2 P2: honour instruction (was silently dropped, breaking + # skill-biased reranking in search_mem_service). + effective_query = ( + f"{instruction}\n{query}" if instruction else query + ) + batches = [ documents[i : i + batch_size] for i in range(0, len(documents), batch_size) ] batch_tasks = [ - self._send_rerank_request_batch(query, batch) for batch in batches + self._send_rerank_request_batch(effective_query, batch) for batch in batches ] batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True) + # Codex R2 P1: fail-fast on batch errors. The previous implementation + # extended scores with synthetic -100.0 sentinels and continued, which + # silently degraded ordering when Voyage was unreachable. With + # RERANK_FALLBACK_PROVIDER=none in production this re-introduced the + # exact silent-fail pattern that took 3 days to detect in the + # original Qdrant-migration bug. Raise so the rerank_service factory + # / HybridRerankService can route to a configured fallback or + # propagate the outage to the caller. + failures = [ + (i, r) for i, r in enumerate(batch_results) if isinstance(r, Exception) + ] + if failures: + first_i, first_err = failures[0] + raise RerankError( + f"Voyage rerank failed for {len(failures)}/{len(batches)} batches; " + f"first failure: batch {first_i}: {first_err}" + ) + all_scores: List[float] = [] total_input_tokens = 0 last_response = None - for i, result in enumerate(batch_results): - if isinstance(result, Exception): - logger.error(f"Voyage rerank batch {i} failed: {result}") - all_scores.extend([-100.0] * len(batches[i])) - continue + for result in batch_results: all_scores.extend(result.get("scores", [])) total_input_tokens += result.get("input_tokens", 0) last_response = result @@ -221,8 +255,12 @@ async def rerank_memories( return [] try: + # CodeRabbit security: avoid logging raw query (multi-tenant data + # leakage risk). Log only metadata. logger.debug( - f"Voyage reranking, query: {query!r}, num_texts={len(all_texts)}" + "Voyage reranking: query_len=%d, num_texts=%d", + len(query), + len(all_texts), ) rerank_result = await self.rerank_documents(query, all_texts, instruction) if "results" not in rerank_result: From cb9244b38ee39be524f173546f954d35ae5900dd Mon Sep 17 00:00:00 2001 From: Thot Date: Sat, 16 May 2026 21:04:08 +0000 Subject: [PATCH 2/2] fix(everos): batch_size=0 must fail-fast, not silently become 100 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses CodeRabbit finding on #8: the previous `batch_size = self.config.batch_size or 100` short-circuit silently corrected explicit `RERANK_BATCH_SIZE=0` to 100, contradicting the very fail-fast contract this code was added to enforce. Same silent-correction class as the original -100.0 sentinel pattern being fixed in this PR. Order of operations: bind raw config value → apply None-default → validate >0. --- methods/EverCore/src/agentic_layer/rerank_voyage.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/methods/EverCore/src/agentic_layer/rerank_voyage.py b/methods/EverCore/src/agentic_layer/rerank_voyage.py index 686394a0..19a2f896 100644 --- a/methods/EverCore/src/agentic_layer/rerank_voyage.py +++ b/methods/EverCore/src/agentic_layer/rerank_voyage.py @@ -155,10 +155,14 @@ async def rerank_documents( if not documents: return {"results": []} - # Codex R2 P2: validate batch_size before slicing so a misconfigured - # negative/zero RERANK_BATCH_SIZE fails loudly instead of silently - # producing zero batches. - batch_size = self.config.batch_size or 100 + # Codex R2 P2 + CodeRabbit PR8 follow-up: validate batch_size before + # slicing so a misconfigured negative/zero RERANK_BATCH_SIZE fails + # loudly instead of silently producing zero batches. The previous + # `or 100` short-circuit silently corrected batch_size=0 to 100, + # masking the fail-fast contract this code is meant to enforce. + batch_size = self.config.batch_size + if batch_size is None: + batch_size = 100 if batch_size <= 0: raise RerankError( f"Invalid Voyage batch_size={batch_size}; must be > 0"