From 290600f7d96a0d6fcb75464289ccf218a3b6b530 Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Thu, 14 May 2026 07:58:35 +0000 Subject: [PATCH 1/2] fix(qdrant): allow service to boot with VECTOR_STORE_BACKEND=qdrant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes that together unblock the Qdrant cutover. Without them the service crashes at FastAPI startup in a loop even though the Qdrant adapter itself is healthy. 1. Symmetric env-gate in MilvusLifespanProvider (core/lifespan/milvus_lifespan.py) QdrantLifespanProvider is already gated by VECTOR_STORE_BACKEND — when the value is anything other than 'qdrant' it is a no-op. The Milvus lifespan had no such gate: it always tried to connect to Milvus and raised on the first call when Milvus was offline. Because the Milvus lifespan runs at order=18 (before order=19 Qdrant lifespan), the Qdrant lifespan never got a chance to start. Added an early no-op return when VECTOR_STORE_BACKEND=qdrant for both startup and shutdown. 2. Lazy collection resolution in BaseMilvusRepository (core/oxm/milvus/base_repository.py) BaseMilvusRepository.__init__ called model.async_collection() eagerly, which raises 'Collection instance not created' when the Milvus lifespan has not initialised the collection — exactly the case when the service runs in Qdrant mode. Because controllers like SearchMemoryService construct Milvus repositories unconditionally, this crashed the whole FastAPI startup. Replaced with a lazy @property: __init__ now only stashes the model class; the collection is resolved on first access. Result: Qdrant-mode boots cleanly, and Milvus-mode behaviour is unchanged (first repo call still resolves the collection the same way). 3. Corrected lazy import in QdrantConnectionCache key helper (core/tenants/tenantize/oxm/qdrant/config_utils.py) The lazy import inside get_tenant_qdrant_config was still pointing to the pre-rename module path core.tenants.tenantize.tenant_context; the actual module is core.tenants.tenant_contextvar. The error only fires when the Qdrant adapter tries to resolve a tenant — i.e. exactly at QdrantLifespanProvider.startup. Fixed the import string. Verified by booting the service with VECTOR_STORE_BACKEND=qdrant: lifespan order metrics -> mongodb -> milvus(no-op) -> elasticsearch -> qdrant (initialised) -> business -> longjob completes; /health returns 200; the Qdrant collections (101 today: 72 phase-3 sweep + 29 user_profile) are discovered and ready. --- .../EverCore/src/core/lifespan/milvus_lifespan.py | 13 +++++++++++++ .../src/core/oxm/milvus/base_repository.py | 15 ++++++++++++++- .../tenants/tenantize/oxm/qdrant/config_utils.py | 2 +- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/methods/EverCore/src/core/lifespan/milvus_lifespan.py b/methods/EverCore/src/core/lifespan/milvus_lifespan.py index 6d6f916d..af56dde5 100644 --- a/methods/EverCore/src/core/lifespan/milvus_lifespan.py +++ b/methods/EverCore/src/core/lifespan/milvus_lifespan.py @@ -2,6 +2,7 @@ Milvus lifespan provider implementation """ +import os from collections import defaultdict from fastapi import FastAPI from typing import Any @@ -41,6 +42,18 @@ async def startup(self, app: FastAPI) -> Any: Returns: Any: Milvus client information """ + # Symmetric env-gate (mirrors QdrantLifespanProvider): when running with + # the Qdrant adapter as the active vector backend, the Milvus lifespan + # must be a no-op. Without this gate it always tries to connect to + # Milvus and crashes the service when Milvus is offline (e.g. after + # cutover or during outage), preventing the Qdrant adapter — which + # runs at order=19, AFTER milvus order=18 — from ever starting. + if os.getenv("VECTOR_STORE_BACKEND", "milvus") == "qdrant": + logger.info( + "VECTOR_STORE_BACKEND=qdrant — Milvus lifespan startup no-op" + ) + return None + logger.info("Initializing Milvus connection...") try: diff --git a/methods/EverCore/src/core/oxm/milvus/base_repository.py b/methods/EverCore/src/core/oxm/milvus/base_repository.py index cfa49e67..025e866f 100644 --- a/methods/EverCore/src/core/oxm/milvus/base_repository.py +++ b/methods/EverCore/src/core/oxm/milvus/base_repository.py @@ -40,10 +40,23 @@ def __init__(self, model: Type[T]): """ self.model = model self.model_name = model.__name__ - self.collection: Optional[AsyncCollection] = model.async_collection() + # Lazy collection resolution: the underlying Milvus collection is only + # available once `MilvusLifespanProvider` has run `ensure_loaded()`. + # When VECTOR_STORE_BACKEND=qdrant the Milvus lifespan is a no-op, so + # eager resolution here would crash the entire app startup at every + # repository `__init__`. We defer to first access of `self.collection` + # so the app boots and qdrant-mode never touches the Milvus pathway. + self._collection_cache: Optional[AsyncCollection] = None self.schema = model._SCHEMA self.all_output_fields = [field.name for field in self.schema.fields] + @property + def collection(self) -> AsyncCollection: + """Lazy accessor for the underlying async Milvus collection.""" + if self._collection_cache is None: + self._collection_cache = self.model.async_collection() + return self._collection_cache + # ==================== Basic CRUD Operations ==================== async def insert(self, entity: T, flush: bool = False) -> str: diff --git a/methods/EverCore/src/core/tenants/tenantize/oxm/qdrant/config_utils.py b/methods/EverCore/src/core/tenants/tenantize/oxm/qdrant/config_utils.py index 4e857b57..2f806c9f 100644 --- a/methods/EverCore/src/core/tenants/tenantize/oxm/qdrant/config_utils.py +++ b/methods/EverCore/src/core/tenants/tenantize/oxm/qdrant/config_utils.py @@ -35,7 +35,7 @@ def get_tenant_qdrant_config() -> Optional[Dict[str, Any]]: oder ``None`` falls kein Tenant aktiv. """ # Lazy import vermeidet Circular-Dependency bei Adapter-Discovery-Time. - from core.tenants.tenantize.tenant_context import get_current_tenant + from core.tenants.tenant_contextvar import get_current_tenant # Fail-closed: an unexpected error during tenant resolution must not # degrade silently to the shared base-prefix path — that would route a From 774be68cce0c08c6a0b0f30d952971608a564b43 Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Thu, 14 May 2026 08:14:10 +0000 Subject: [PATCH 2/2] fix(qdrant): normalize VECTOR_STORE_BACKEND in milvus_lifespan gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QdrantLifespanProvider already treats the env flag case-insensitively via .strip().lower(); MilvusLifespanProvider must do the same. Without the normalization a deployment that sets VECTOR_STORE_BACKEND=QDRANT (or the value with surrounding whitespace) would still start Qdrant — and also try to start Milvus, regressing the very crash this PR fixes. --- methods/EverCore/src/core/lifespan/milvus_lifespan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/methods/EverCore/src/core/lifespan/milvus_lifespan.py b/methods/EverCore/src/core/lifespan/milvus_lifespan.py index af56dde5..034569cf 100644 --- a/methods/EverCore/src/core/lifespan/milvus_lifespan.py +++ b/methods/EverCore/src/core/lifespan/milvus_lifespan.py @@ -48,7 +48,7 @@ async def startup(self, app: FastAPI) -> Any: # Milvus and crashes the service when Milvus is offline (e.g. after # cutover or during outage), preventing the Qdrant adapter — which # runs at order=19, AFTER milvus order=18 — from ever starting. - if os.getenv("VECTOR_STORE_BACKEND", "milvus") == "qdrant": + if os.getenv("VECTOR_STORE_BACKEND", "milvus").strip().lower() == "qdrant": logger.info( "VECTOR_STORE_BACKEND=qdrant — Milvus lifespan startup no-op" )