From a514d213ad40b87fe84a1b4242f6ff4c8170ecfb Mon Sep 17 00:00:00 2001
From: Thot <thot@xinfty.space>
Date: Sat, 16 May 2026 20:59:46 +0000
Subject: [PATCH 1/2] fix(everos): voyage rerank fail-fast + tenant-safe
 logging + provider-default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses CodeRabbit + Codex review findings on #7 that were left open at
merge:

- rerank_voyage.rerank_documents: batches that raise no longer fabricate
  -100.0 sentinel scores. With RERANK_FALLBACK_PROVIDER=none in production
  the previous code silently degraded ranking when Voyage was unreachable —
  exactly the silent-fail pattern that took 3 days to detect in the
  original Qdrant-migration bug (Codex R2 P1).
- rerank_voyage.rerank_documents: validate batch_size > 0 so a misconfigured
  RERANK_BATCH_SIZE fails loudly instead of producing zero batches and
  zero-filled scores (Codex R2 P2).
- rerank_voyage.rerank_documents: honour the instruction argument by
  prepending it to the query, matching vLLM/DeepInfra behaviour. Voyage's
  endpoint has no separate instruction field, so the previous drop broke
  skill-biased reranking in search_mem_service (Codex R2 P2).
- rerank_voyage.rerank_memories: stop logging the raw query string at
  debug level. Multi-tenant deployments treat the query as tenant data;
  metadata only (query_len, num_texts) is sufficient for diagnostics
  (CodeRabbit major).
- rerank_service._create_service_from_config: for provider=voyage, fall
  back to VoyageRerankConfig's provider-specific default (rerank-2.5)
  when the shared RERANK_MODEL is unset or still on the non-Voyage
  default Qwen/Qwen3-Reranker-4B. Without this the new voyage branch
  is broken in the default configuration (Codex P1, CodeRabbit major).

Verified: py_compile clean, ruff clean.
---
 .../src/agentic_layer/rerank_service.py       | 13 ++++-
 .../src/agentic_layer/rerank_voyage.py        | 56 ++++++++++++++++---
 2 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/methods/EverCore/src/agentic_layer/rerank_service.py b/methods/EverCore/src/agentic_layer/rerank_service.py
index e51fe4cf..1eb10fda 100644
--- a/methods/EverCore/src/agentic_layer/rerank_service.py
+++ b/methods/EverCore/src/agentic_layer/rerank_service.py
@@ -175,10 +175,19 @@ def _create_service_from_config(
         )
         return DeepInfraRerankService(config)
     elif provider.lower() == "voyage":
+        # Codex P1 / CodeRabbit major: the shared RERANK_MODEL default in
+        # env.template is "Qwen/Qwen3-Reranker-4B", which Voyage rejects.
+        # When the model is unset or still on the non-Voyage default, fall
+        # back to VoyageRerankConfig's provider-specific default ("rerank-2.5").
+        voyage_default_model = VoyageRerankConfig.__dataclass_fields__["model"].default
+        non_voyage_defaults = {"", None, "Qwen/Qwen3-Reranker-4B"}
+        resolved_model = (
+            model if model not in non_voyage_defaults else voyage_default_model
+        )
         config = VoyageRerankConfig(
             api_key=api_key,
-            base_url=base_url or 'https://api.voyageai.com/v1/rerank',
-            model=model,
+            base_url=base_url or "https://api.voyageai.com/v1/rerank",
+            model=resolved_model,
             timeout=timeout,
             max_retries=max_retries,
             batch_size=batch_size,
diff --git a/methods/EverCore/src/agentic_layer/rerank_voyage.py b/methods/EverCore/src/agentic_layer/rerank_voyage.py
index cd0cfdb3..686394a0 100644
--- a/methods/EverCore/src/agentic_layer/rerank_voyage.py
+++ b/methods/EverCore/src/agentic_layer/rerank_voyage.py
@@ -144,30 +144,64 @@ def _parse_response(
     async def rerank_documents(
         self, query: str, documents: List[str], instruction: Optional[str] = None
     ) -> Dict[str, Any]:
-        """Low-level reranking; ``instruction`` is ignored — Voyage uses the
-        query/documents pair directly."""
+        """Low-level reranking.
+
+        Voyage's `/v1/rerank` endpoint accepts only `query` + `documents`; it
+        has no separate ``instruction`` field. To preserve the call-site
+        contract (search_mem_service passes skill-specific instructions), the
+        instruction is prepended to the query when provided, matching the
+        behaviour of vLLM/DeepInfra implementations.
+        """
         if not documents:
             return {"results": []}
 
+        # Codex R2 P2: validate batch_size before slicing so a misconfigured
+        # negative/zero RERANK_BATCH_SIZE fails loudly instead of silently
+        # producing zero batches.
         batch_size = self.config.batch_size or 100
+        if batch_size <= 0:
+            raise RerankError(
+                f"Invalid Voyage batch_size={batch_size}; must be > 0"
+            )
+
+        # Codex R2 P2: honour instruction (was silently dropped, breaking
+        # skill-biased reranking in search_mem_service).
+        effective_query = (
+            f"{instruction}\n{query}" if instruction else query
+        )
+
         batches = [
             documents[i : i + batch_size] for i in range(0, len(documents), batch_size)
         ]
 
         batch_tasks = [
-            self._send_rerank_request_batch(query, batch) for batch in batches
+            self._send_rerank_request_batch(effective_query, batch) for batch in batches
         ]
         batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)
 
+        # Codex R2 P1: fail-fast on batch errors. The previous implementation
+        # extended scores with synthetic -100.0 sentinels and continued, which
+        # silently degraded ordering when Voyage was unreachable. With
+        # RERANK_FALLBACK_PROVIDER=none in production this re-introduced the
+        # exact silent-fail pattern that took 3 days to detect in the
+        # original Qdrant-migration bug. Raise so the rerank_service factory
+        # / HybridRerankService can route to a configured fallback or
+        # propagate the outage to the caller.
+        failures = [
+            (i, r) for i, r in enumerate(batch_results) if isinstance(r, Exception)
+        ]
+        if failures:
+            first_i, first_err = failures[0]
+            raise RerankError(
+                f"Voyage rerank failed for {len(failures)}/{len(batches)} batches; "
+                f"first failure: batch {first_i}: {first_err}"
+            )
+
         all_scores: List[float] = []
         total_input_tokens = 0
         last_response = None
 
-        for i, result in enumerate(batch_results):
-            if isinstance(result, Exception):
-                logger.error(f"Voyage rerank batch {i} failed: {result}")
-                all_scores.extend([-100.0] * len(batches[i]))
-                continue
+        for result in batch_results:
             all_scores.extend(result.get("scores", []))
             total_input_tokens += result.get("input_tokens", 0)
             last_response = result
@@ -221,8 +255,12 @@ async def rerank_memories(
             return []
 
         try:
+            # CodeRabbit security: avoid logging raw query (multi-tenant data
+            # leakage risk). Log only metadata.
             logger.debug(
-                f"Voyage reranking, query: {query!r}, num_texts={len(all_texts)}"
+                "Voyage reranking: query_len=%d, num_texts=%d",
+                len(query),
+                len(all_texts),
             )
             rerank_result = await self.rerank_documents(query, all_texts, instruction)
             if "results" not in rerank_result:

From cb9244b38ee39be524f173546f954d35ae5900dd Mon Sep 17 00:00:00 2001
From: Thot <thot@xinfty.space>
Date: Sat, 16 May 2026 21:04:08 +0000
Subject: [PATCH 2/2] fix(everos): batch_size=0 must fail-fast, not silently
 become 100
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses CodeRabbit finding on #8: the previous `batch_size = self.config.batch_size or 100` short-circuit silently corrected explicit `RERANK_BATCH_SIZE=0` to 100, contradicting the very fail-fast contract this code was added to enforce. Same silent-correction class as the original -100.0 sentinel pattern being fixed in this PR.

Order of operations: bind raw config value → apply None-default → validate >0.
---
 methods/EverCore/src/agentic_layer/rerank_voyage.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/methods/EverCore/src/agentic_layer/rerank_voyage.py b/methods/EverCore/src/agentic_layer/rerank_voyage.py
index 686394a0..19a2f896 100644
--- a/methods/EverCore/src/agentic_layer/rerank_voyage.py
+++ b/methods/EverCore/src/agentic_layer/rerank_voyage.py
@@ -155,10 +155,14 @@ async def rerank_documents(
         if not documents:
             return {"results": []}
 
-        # Codex R2 P2: validate batch_size before slicing so a misconfigured
-        # negative/zero RERANK_BATCH_SIZE fails loudly instead of silently
-        # producing zero batches.
-        batch_size = self.config.batch_size or 100
+        # Codex R2 P2 + CodeRabbit PR8 follow-up: validate batch_size before
+        # slicing so a misconfigured negative/zero RERANK_BATCH_SIZE fails
+        # loudly instead of silently producing zero batches. The previous
+        # `or 100` short-circuit silently corrected batch_size=0 to 100,
+        # masking the fail-fast contract this code is meant to enforce.
+        batch_size = self.config.batch_size
+        if batch_size is None:
+            batch_size = 100
         if batch_size <= 0:
             raise RerankError(
                 f"Invalid Voyage batch_size={batch_size}; must be > 0"