From f186658fa9fcbb6d41f1300ff45ae312dbbe3aa0 Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 16:45:11 +0000 Subject: [PATCH 01/20] feat: setup Qdrant adapter skeleton for vector-store migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 (skeleton) of the milvus -> qdrant migration. No runtime behavior change yet — Milvus stays the default backend until cutover. - README: prepend fork header documenting motivation, status, approach, and concept mapping. Links to the Qdrant migration guide. - pyproject: add qdrant-client>=1.12,<2. - src/core/oxm/qdrant/qdrant_collection_base.py: stub IndexConfig and QdrantCollectionBase. ensure_all() is a no-op so the lifespan provider can iterate registered subclasses without crashing during the skeleton phase. upsert/search/delete raise NotImplementedError (Phase 2). - src/core/component/qdrant_client_factory.py: full QdrantClientFactory with env-driven get_qdrant_config(prefix=...), per-alias client caching, named clients, and graceful shutdown. https=Optional[bool] preserves qdrant-client's URL-scheme TLS detection; api_key=Optional[str] passes through cleanly without empty-string coercion. Registered as @component(primary=False) so the milvus factory remains the default until cutover. - Empty __init__.py for new oxm/qdrant and tenants/.../oxm/qdrant packages. Next: qdrant_lifespan.py (gated by VECTOR_STORE_BACKEND env flag) and full collection-base impl. --- README.md | 39 +++ methods/evermemos/pyproject.toml | 1 + .../core/component/qdrant_client_factory.py | 243 ++++++++++++++++++ .../evermemos/src/core/oxm/qdrant/__init__.py | 0 .../core/oxm/qdrant/qdrant_collection_base.py | 124 +++++++++ .../tenants/tenantize/oxm/qdrant/__init__.py | 0 6 files changed, 407 insertions(+) create mode 100644 methods/evermemos/src/core/component/qdrant_client_factory.py create mode 100644 methods/evermemos/src/core/oxm/qdrant/__init__.py create mode 100644 methods/evermemos/src/core/oxm/qdrant/qdrant_collection_base.py create mode 100644 methods/evermemos/src/core/tenants/tenantize/oxm/qdrant/__init__.py diff --git a/README.md b/README.md index 0a7f2f47..12c8364a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,42 @@ +> [!NOTE] +> ## Fork — Qdrant Migration +> +> This fork of [`EverMind-AI/EverOS`](https://github.com/EverMind-AI/EverOS) focuses on +> **migrating the vector backend from Milvus to Qdrant**. +> +> ### Why +> +> Milvus standalone with embedded etcd showed repeated startup races +> (`panic: etcdserver: leader changed`) leading to crash-loops and RAM +> exhaustion in our deployment. Rather than stacking more etcd workarounds, we +> migrate to Qdrant — whose architecture has no separate coordinator service. +> +> ### Status +> +> - `main` — tracks upstream `EverMind-AI/EverOS`. +> - `feature/qdrant-adapter` — work in progress. Phase 1: adapter skeleton. +> +> ### Approach +> +> EverOS' `src/infra_layer/adapters/out/search/` already supports multiple +> backends (Milvus + Elasticsearch). We add a Qdrant adapter under +> `src/core/oxm/qdrant/` and route via `VECTOR_STORE_BACKEND=qdrant`. The +> Milvus adapter stays untouched until cutover. +> +> ### Concept Mapping +> +> | Milvus | Qdrant | +> | -------------------- | --------------------------------- | +> | Collection | Collection (1:1) | +> | FieldSchema (vector) | `VectorParams(size, distance)` | +> | FieldSchema (scalar) | Payload field (schema-flexible) | +> | HNSW + COSINE | `HnswConfig` + `Distance.Cosine` | +> | Partition | Payload field OR separate coll. | +> +> Reference: [Qdrant Migration Guide — From Milvus](https://qdrant.tech/documentation/migrate-to-qdrant/from-milvus/). + +--- +
![banner-gif](https://github.com/user-attachments/assets/c2cef808-3e93-4f81-a194-dffe02ddd984) diff --git a/methods/evermemos/pyproject.toml b/methods/evermemos/pyproject.toml index 02a2912b..f99f53de 100644 --- a/methods/evermemos/pyproject.toml +++ b/methods/evermemos/pyproject.toml @@ -55,6 +55,7 @@ dependencies = [ "elastic-transport>=8.17.0,<9", # ES transport layer (pin major, transport API stability) # Milvus "pymilvus>=2.5.0,<2.6", # Pin minor: 2.6.x may change Collection/search API + "qdrant-client>=1.12,<2", # Qdrant Python client (Milvus->Qdrant migration, feature/qdrant-adapter) # Tokenization "jieba==0.42.1", # Graph Processing diff --git a/methods/evermemos/src/core/component/qdrant_client_factory.py b/methods/evermemos/src/core/component/qdrant_client_factory.py new file mode 100644 index 00000000..f7dea3ee --- /dev/null +++ b/methods/evermemos/src/core/component/qdrant_client_factory.py @@ -0,0 +1,243 @@ +""" +Qdrant Client Factory + +Analoge Implementierung zu ``core.component.milvus_client_factory.MilvusClientFactory`` +fuer die Milvus->Qdrant-Migration. + +Provides Qdrant client connection functionality based on environment variables. +""" + +import os +from typing import Dict, Optional + +from qdrant_client import QdrantClient + +from core.di.decorators import component +from core.observation.logger import get_logger + +logger = get_logger(__name__) + + +def _truthy(value: Optional[str]) -> bool: + """Konsistentes Env-Boolean-Parsing analog zu anderen EverOS-Configs.""" + if value is None: + return False + return value.strip().lower() in {"1", "true", "yes", "on"} + + +def get_qdrant_config(prefix: str = "") -> dict: + """ + Get Qdrant configuration from environment variables. + + Args: + prefix: Environment variable prefix, e.g., prefix="A" reads + "A_QDRANT_HOST"; if empty reads "QDRANT_HOST" etc. + + Environment variables: + - ``{PREFIX_}QDRANT_HOST`` (default ``localhost``) + - ``{PREFIX_}QDRANT_PORT`` (default ``6333``, HTTP) + - ``{PREFIX_}QDRANT_GRPC_PORT`` (default ``6334``) + - ``{PREFIX_}QDRANT_API_KEY`` (optional) + - ``{PREFIX_}QDRANT_HTTPS`` (default ``false``) + - ``{PREFIX_}QDRANT_PREFER_GRPC`` (default ``false``) + - ``{PREFIX_}QDRANT_TIMEOUT`` (default ``30`` Sekunden) + + Returns: + dict mit Schluesseln ``host``, ``port``, ``grpc_port``, ``api_key``, + ``https``, ``prefer_grpc``, ``timeout``, ``url`` (assembled). + """ + + def _env(name: str, default: Optional[str] = None) -> str: + if prefix: + key = f"{prefix.upper()}_{name}" + else: + key = name + if default is None: + return os.getenv(key, "") + return os.getenv(key, default) + + host = _env("QDRANT_HOST", "localhost") + port = int(_env("QDRANT_PORT", "6333")) + grpc_port = int(_env("QDRANT_GRPC_PORT", "6334")) + # api_key / https sind explizit None wenn env nicht gesetzt — so kann + # qdrant-client die Defaults / URL-Scheme-Detection selbst uebernehmen. + api_key_raw = _env("QDRANT_API_KEY") + api_key: Optional[str] = api_key_raw or None + https_raw = os.getenv(f"{prefix.upper()}_QDRANT_HTTPS" if prefix else "QDRANT_HTTPS") + https: Optional[bool] = _truthy(https_raw) if https_raw is not None else None + prefer_grpc = _truthy(_env("QDRANT_PREFER_GRPC", "false")) + timeout = int(_env("QDRANT_TIMEOUT", "30")) + + # URL-Assembly: wenn https explizit gesetzt, halte die Praeferenz. Sonst http. + scheme = "https" if https else "http" + if host.startswith("http://") or host.startswith("https://"): + url = f"{host}:{port}" + else: + url = f"{scheme}://{host}:{port}" + + config = { + "host": host, + "port": port, + "grpc_port": grpc_port, + "api_key": api_key, + "https": https, + "prefer_grpc": prefer_grpc, + "timeout": timeout, + "url": url, + } + + logger.info("Getting Qdrant config [prefix=%s]:", prefix or "default") + logger.info( + " URL: %s (prefer_grpc=%s, grpc_port=%s, https=%s)", + url, prefer_grpc, grpc_port, https, + ) + logger.info(" Auth: %s", "ApiKey" if api_key else "None") + logger.info(" Timeout: %ss", timeout) + + return config + + +@component(name="qdrant_client_factory", primary=False) +class QdrantClientFactory: + """ + Qdrant Client Factory. + + Bietet Caching/Management fuer ``QdrantClient``-Instanzen, ein Client pro + benannter Konfiguration (analog ``MilvusClientFactory``). + + ``primary=False``: Wenn ``VECTOR_STORE_BACKEND=qdrant`` gesetzt ist, wird + diese Factory ueber den Bean-Namen ``qdrant_client_factory`` explizit + gerouted (siehe Phase 1.2 / Repository-Layer-Refactor). So bleibt + Milvus-Factory in der Setup-Phase Default und nichts veraendert sich + bis zum Cutover. + """ + + def __init__(self) -> None: + self._clients: Dict[str, QdrantClient] = {} + self._default_config: Optional[dict] = None + # Note: typischer use-case ist single-init in lifespan-startup, daher + # kein Lock noetig. Bei concurrent access aus FastAPI-Coroutines auf + # verschiedene named clients kann theoretisch eine Race entstehen + # (beide passen den cache-miss-check, beide erstellen Client, einer + # ueberschreibt den anderen im dict). Fix in Phase 2 via threading.Lock + # falls Concurrent-Pattern auftritt. + logger.info("QdrantClientFactory initialized") + + def get_client( + self, + url: str = "", + host: str = "", + port: int = 6333, + grpc_port: int = 6334, + api_key: Optional[str] = None, + https: Optional[bool] = None, + prefer_grpc: bool = False, + timeout: int = 30, + alias: Optional[str] = None, + **kwargs, + ) -> QdrantClient: + """ + Get oder erzeuge cached Qdrant-Client. + + Args: + url: Voll-qualifizierte URL (z.B. ``http://localhost:6333``). Wenn + angegeben, ueberschreibt sie ``host``/``port``. + host: Hostname (Default ``localhost`` falls weder ``url`` noch + ``host`` gesetzt). + port: HTTP/REST-Port (Default ``6333``). + grpc_port: gRPC-Port (Default ``6334``, nur wenn ``prefer_grpc``). + api_key: Optionaler Qdrant Cloud API-Key. ``None`` = anonymous. + https: TLS-Praeferenz. ``None`` (Default) ueberlaesst qdrant-client + die Auto-Detection ueber das URL-Schema. Explizit ``True``/ + ``False`` ueberschreibt das. + prefer_grpc: gRPC statt HTTP fuer Datenwege. + timeout: Request-Timeout in Sekunden. + alias: Cache-Key (Default ``default``). + + Returns: + ``QdrantClient`` (gecached pro ``alias``). + """ + cache_key = alias or "default" + if cache_key in self._clients: + return self._clients[cache_key] + + client_kwargs: dict = { + "prefer_grpc": prefer_grpc, + "grpc_port": grpc_port, + "timeout": timeout, + } + if api_key: + client_kwargs["api_key"] = api_key + if https is not None: + client_kwargs["https"] = https + if url: + client_kwargs["url"] = url + else: + client_kwargs["host"] = host or "localhost" + client_kwargs["port"] = port + + client_kwargs.update(kwargs) + + client = QdrantClient(**client_kwargs) + self._clients[cache_key] = client + logger.info( + "Qdrant client created and cached: %s (alias=%s, prefer_grpc=%s, https=%s)", + url or f"{client_kwargs.get('host')}:{port}", + cache_key, + prefer_grpc, + https, + ) + return client + + def get_default_client(self) -> QdrantClient: + """Get default Qdrant client basierend auf Env-Konfiguration.""" + if self._default_config is None: + self._default_config = get_qdrant_config() + + cfg = self._default_config + return self.get_client( + url=cfg["url"], + api_key=cfg["api_key"], + https=cfg["https"], + prefer_grpc=cfg["prefer_grpc"], + grpc_port=cfg["grpc_port"], + timeout=cfg["timeout"], + alias="default", + ) + + def get_named_client(self, name: str) -> QdrantClient: + """ + Get Qdrant client by name. ``name`` wird als Env-Praefix verwendet, + z.B. ``name="A"`` liest ``A_QDRANT_HOST``, ``A_QDRANT_PORT``, ... + + Args: + name: Praefix-Name (Env-Var-Praefix). ``default`` -> default client. + + Returns: + ``QdrantClient`` (gecached unter ``name``). + """ + if name.lower() == "default": + return self.get_default_client() + + cfg = get_qdrant_config(prefix=name) + logger.info("Loading named Qdrant config [name=%s]: %s", name, cfg["url"]) + + return self.get_client( + url=cfg["url"], + api_key=cfg["api_key"], + https=cfg["https"], + prefer_grpc=cfg["prefer_grpc"], + grpc_port=cfg["grpc_port"], + timeout=cfg["timeout"], + alias=name, + ) + + def close_all_clients(self) -> None: + """Schliesst alle gecachten Qdrant-Clients.""" + for alias, client in self._clients.items(): + try: + client.close() + except Exception as e: # noqa: BLE001 + logger.error("Error closing Qdrant client [alias=%s]: %s", alias, e) + self._clients.clear() + logger.info("All Qdrant clients closed") diff --git a/methods/evermemos/src/core/oxm/qdrant/__init__.py b/methods/evermemos/src/core/oxm/qdrant/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/methods/evermemos/src/core/oxm/qdrant/qdrant_collection_base.py b/methods/evermemos/src/core/oxm/qdrant/qdrant_collection_base.py new file mode 100644 index 00000000..0cdaef62 --- /dev/null +++ b/methods/evermemos/src/core/oxm/qdrant/qdrant_collection_base.py @@ -0,0 +1,124 @@ +""" +Qdrant Collection Base — Stub fuer Phase 1 der Milvus->Qdrant-Migration. + +Konzept-Mapping (laut qdrant.tech/documentation/migrate-to-qdrant/from-milvus): + + Milvus Qdrant + ----------------------------------------------------------- + Collection Collection (1:1) + FieldSchema(vector) VectorParams(size, distance) + FieldSchema(scalar) Payload field (schema-flexible) + Index(HNSW, COSINE) HnswConfig + Distance.COSINE + Partition Payload-Field ODER separate Collection + COSINE Cosine + L2 Euclid + IP Dot + +Diese Klasse ist absichtlich minimal. Voll-Implementierung erfolgt in +nachfolgenden Commits auf ``feature/qdrant-adapter``. + +Wichtig: alle Methoden hier sind so ausgelegt, dass sie ohne aktive +qdrant-Verbindung importierbar sind — sodass das Modul auch geladen werden +kann, wenn ``VECTOR_STORE_BACKEND != "qdrant"``. +""" + +from typing import Any, ClassVar, List, Optional + +from core.observation.logger import get_logger + +logger = get_logger(__name__) + + +class IndexConfig: + """ + Konfiguration fuer Qdrant-Vector-Index. Analog zu + ``core.oxm.milvus.milvus_collection_base.IndexConfig`` aber mit Qdrant- + nativen Feldern. + + TODO Phase 1.2: ``hnsw_config`` (m, ef_construct, full_scan_threshold), + ``quantization_config`` (scalar/PQ/BQ), ``on_disk_payload``, + ``sparse_vectors_config``. + """ + + def __init__( + self, + size: int = 1024, + distance: str = "Cosine", + on_disk: bool = False, + hnsw_m: int = 16, + hnsw_ef_construct: int = 100, + ) -> None: + self.size = size + self.distance = distance + self.on_disk = on_disk + self.hnsw_m = hnsw_m + self.hnsw_ef_construct = hnsw_ef_construct + + +class QdrantCollectionBase: + """ + Qdrant-Collection-Management-Basisklasse (analog MilvusCollectionBase). + + Subclasses MUST define: + _COLLECTION_NAME: ClassVar[str] + _VECTOR_PARAMS: ClassVar[IndexConfig] + _PAYLOAD_INDEXES: ClassVar[list[str]] = [] # field names to index + + Optional: + _DB_USING: ClassVar[str] = "default" # client name + + Aktueller Stand (Phase 1.1 Skeleton): ``ensure_all()`` ist No-Op, damit + ``QdrantLifespanProvider.startup()`` ueber registrierte Subklassen + iterieren kann ohne Crash. Voll-Logik kommt im Sub-Commit. + """ + + _COLLECTION_NAME: ClassVar[Optional[str]] = None + _DB_USING: ClassVar[str] = "default" + _VECTOR_PARAMS: ClassVar[Optional[IndexConfig]] = None + _PAYLOAD_INDEXES: ClassVar[List[str]] = [] + + def __init__(self) -> None: + if self._COLLECTION_NAME is None: + raise NotImplementedError( + f"{self.__class__.__name__} must define '_COLLECTION_NAME'" + ) + + @property + def name(self) -> str: + # _COLLECTION_NAME ist nach __init__-Check garantiert nicht None + return self._COLLECTION_NAME # type: ignore[return-value] + + @property + def using(self) -> str: + return self._DB_USING + + def ensure_all(self) -> None: + """ + Stellt sicher dass Collection + Payload-Indexes existieren. + + TODO Phase 1.2: + - ``client.collection_exists(name)`` pruefen + - falls nicht: ``client.create_collection(name, vectors_config=...)`` + - pro ``_PAYLOAD_INDEXES``: ``client.create_payload_index(name, field)`` + mit korrektem ``PayloadSchemaType`` (Keyword/Integer/Float/Bool). + + Aktuell: No-Op + Debug-Log, damit Lifespan-Provider iterieren kann. + """ + logger.debug( + "QdrantCollectionBase.ensure_all() stub for '%s' [using=%s] " + "— TODO Phase 1.2", + self.name, + self.using, + ) + + def upsert(self, points: List[Any]) -> None: + """TODO Phase 2: ``client.upsert(name, points=points)``.""" + raise NotImplementedError("Phase 2: implement Qdrant upsert") + + def search(self, query_vector: List[float], **kwargs: Any) -> List[Any]: + """TODO Phase 2: ``client.search(name, query_vector, query_filter, ...)``.""" + raise NotImplementedError("Phase 2: implement Qdrant search") + + def delete(self, point_ids: List[Any]) -> None: + """TODO Phase 2: ``client.delete(name, points_selector=PointIdsList(points=ids))``.""" + raise NotImplementedError("Phase 2: implement Qdrant delete") diff --git a/methods/evermemos/src/core/tenants/tenantize/oxm/qdrant/__init__.py b/methods/evermemos/src/core/tenants/tenantize/oxm/qdrant/__init__.py new file mode 100644 index 00000000..e69de29b From cf80e3555c0a5bac660b0b6a4d72a57b1f57b433 Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 16:48:52 +0000 Subject: [PATCH 02/20] feat(qdrant): add QdrantLifespanProvider gated by VECTOR_STORE_BACKEND env Provides FastAPI lifespan startup/shutdown for the Qdrant adapter, analogous to MilvusLifespanProvider but no-op unless VECTOR_STORE_BACKEND=qdrant. So the Milvus backend stays the default at runtime until cutover. On startup (when active): - Resolves the 'qdrant_client_factory' DI bean. - Collects all concrete QdrantCollectionBase subclasses via get_all_subclasses. - Groups them by _DB_USING and ensures the client per group, then runs ensure_all() on each collection (currently a stub no-op; will create collections + payload indexes in Phase 1.2). On shutdown: closes all cached Qdrant clients. Cleans the same app.state attributes pattern as the milvus provider. Order=19 sits between milvus_lifespan (18) and business_lifespan (20), so during cutover both backends can briefly coexist. --- .../src/core/lifespan/qdrant_lifespan.py | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 methods/evermemos/src/core/lifespan/qdrant_lifespan.py diff --git a/methods/evermemos/src/core/lifespan/qdrant_lifespan.py b/methods/evermemos/src/core/lifespan/qdrant_lifespan.py new file mode 100644 index 00000000..5878ba05 --- /dev/null +++ b/methods/evermemos/src/core/lifespan/qdrant_lifespan.py @@ -0,0 +1,136 @@ +""" +Qdrant lifespan provider — Analog zu ``core.lifespan.milvus_lifespan``. + +Wird vom DI-Container automatisch entdeckt und in die FastAPI-Lifespan-Kette +eingehaengt. Initialisierung ist **gated** durch das Env-Flag +``VECTOR_STORE_BACKEND``: + + VECTOR_STORE_BACKEND=qdrant -> Qdrant wird initialisiert + VECTOR_STORE_BACKEND=milvus -> No-Op (Milvus-Lifespan uebernimmt; Default) + VECTOR_STORE_BACKEND unset -> No-Op (= Default ``milvus``) + +So kann der Adapter-Layer im Repo liegen, ohne dass er aktiv eingreift bis +zum Cutover. +""" + +import os +from collections import defaultdict +from typing import Any, Dict, List, Type + +from fastapi import FastAPI + +from core.di.decorators import component +from core.di.utils import get_all_subclasses, get_bean +from core.lifespan.lifespan_interface import LifespanProvider +from core.observation.logger import get_logger +from core.oxm.qdrant.qdrant_collection_base import QdrantCollectionBase + +logger = get_logger(__name__) + + +# Order 19: zwischen milvus_lifespan (18) und business_lifespan (20). So +# laufen beide Vector-Backends initialisiert (im Cutover-Fall), und +# business-Logik startet erst danach. +_QDRANT_LIFESPAN_ORDER = 19 + +# Env-Flag, das den aktiven Vector-Store waehlt. Default ``milvus`` damit +# nichts an der bestehenden Deployment-Topologie aendert bis zum Cutover. +_ENV_BACKEND_FLAG = "VECTOR_STORE_BACKEND" +_BACKEND_QDRANT = "qdrant" + + +def _backend_is_qdrant() -> bool: + return os.getenv(_ENV_BACKEND_FLAG, "milvus").strip().lower() == _BACKEND_QDRANT + + +@component(name="qdrant_lifespan_provider") +class QdrantLifespanProvider(LifespanProvider): + """Qdrant lifespan provider (feature-gated).""" + + def __init__(self, name: str = "qdrant", order: int = _QDRANT_LIFESPAN_ORDER): + super().__init__(name, order) + self._qdrant_factory = None + self._qdrant_clients: dict = {} + + async def startup(self, app: FastAPI) -> Any: + """ + Start Qdrant connection und Collection-Initialisierung. + + Skipped wenn ``VECTOR_STORE_BACKEND != qdrant``. + """ + if not _backend_is_qdrant(): + logger.info( + "Qdrant lifespan skipped (%s='%s', Qdrant inactive)", + _ENV_BACKEND_FLAG, + os.getenv(_ENV_BACKEND_FLAG, "milvus"), + ) + return + + logger.info("Initializing Qdrant connection...") + + try: + self._qdrant_factory = get_bean("qdrant_client_factory") + + # Alle konkreten QdrantCollectionBase-Subklassen sammeln. + all_collection_classes = [ + cls + for cls in get_all_subclasses(QdrantCollectionBase) + if cls._COLLECTION_NAME is not None + ] + + # Gruppieren nach _DB_USING (analog Milvus). + using_collections: Dict[str, List[Type[QdrantCollectionBase]]] = defaultdict(list) + for collection_class in all_collection_classes: + using = collection_class._DB_USING + using_collections[using].append(collection_class) + logger.info( + "Discovered Qdrant Collection class: %s [using=%s]", + collection_class.__name__, + using, + ) + + # Pro using: Client holen + Collections initialisieren. + for using, collection_classes in using_collections.items(): + client = self._qdrant_factory.get_named_client(using) + self._qdrant_clients[using] = client + + for collection_class in collection_classes: + try: + collection = collection_class() + collection.ensure_all() + logger.info( + "Qdrant Collection '%s' initialized [using=%s]", + collection.name, + using, + ) + except Exception as e: + logger.error( + "Failed to initialize Qdrant Collection '%s' [using=%s]: %s", + collection_class._COLLECTION_NAME, + using, + e, + ) + raise + + logger.info("Qdrant connection initialization completed") + + except Exception as e: + logger.error("Error during Qdrant initialization: %s", str(e)) + raise + + async def shutdown(self, app: FastAPI) -> None: + """Close Qdrant connections (No-Op wenn nicht initialisiert).""" + if not _backend_is_qdrant() or self._qdrant_factory is None: + return + + logger.info("Closing Qdrant connections...") + try: + self._qdrant_factory.close_all_clients() + logger.info("Qdrant connections closed") + except Exception as e: + logger.error("Error while closing Qdrant connections: %s", str(e)) + + # State-Cleanup analog Milvus. + for attr in ("qdrant_clients", "qdrant_factory"): + if hasattr(app.state, attr): + delattr(app.state, attr) From f32dca4b8db862f5a23cebb9b7dd2c2e28e4bb65 Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 16:51:00 +0000 Subject: [PATCH 03/20] feat(qdrant): add BaseQdrantConverter (analogous to BaseMilvusConverter) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generic[QdrantCollectionType] ABC with an abstract @classmethod from_mongo() that subclasses implement to convert Mongo source docs into Qdrant point payloads (PointStruct or compatible dicts). get_qdrant_model() introspects the Generic argument from __orig_bases__ so the bound collection class can be retrieved at runtime — same pattern as the Milvus base converter, which the search-repository layer relies on. --- .../src/core/oxm/qdrant/base_converter.py | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 methods/evermemos/src/core/oxm/qdrant/base_converter.py diff --git a/methods/evermemos/src/core/oxm/qdrant/base_converter.py b/methods/evermemos/src/core/oxm/qdrant/base_converter.py new file mode 100644 index 00000000..d7d5531f --- /dev/null +++ b/methods/evermemos/src/core/oxm/qdrant/base_converter.py @@ -0,0 +1,86 @@ +""" +Qdrant collection converter base class — Analog zu ``BaseMilvusConverter``. + +Provides a unified conversion interface from arbitrary data sources to Qdrant +collection entities (typically ``qdrant_client.http.models.PointStruct`` +instances or compatible payload dicts). + +All Qdrant collection converters should inherit from this base class. +""" + +from abc import ABC, abstractmethod +from typing import Any, Generic, Type, TypeVar, get_args, get_origin + +from core.observation.logger import get_logger +from core.oxm.qdrant.qdrant_collection_base import QdrantCollectionBase + +logger = get_logger(__name__) + +# Generic type variable — bound to QdrantCollectionBase so subclasses are +# explicit about which collection they convert into. +QdrantCollectionType = TypeVar("QdrantCollectionType", bound=QdrantCollectionBase) + + +class BaseQdrantConverter(ABC, Generic[QdrantCollectionType]): + """ + Qdrant collection converter base class. + + Provides basic functionality for converting arbitrary data sources to + Qdrant collection entities (point payloads). All Qdrant converters should + inherit from this class. + + Features: + - Unified conversion interface (class methods). + - Type-safe Qdrant collection generic support. + - Automatically retrieves the bound Qdrant collection type from generics. + - Flexible data source support (Mongo docs are the typical source in + EverOS, see ``from_mongo`` below). + """ + + @classmethod + def get_qdrant_model(cls) -> Type[QdrantCollectionType]: + """ + Retrieve the Qdrant collection model type from generic information. + + Returns: + Type[QdrantCollectionType]: Qdrant collection model class. + + Raises: + ValueError: When the subclass did not bind a generic argument. + """ + # Get the generic base class of the current class. + if hasattr(cls, "__orig_bases__"): + for base in cls.__orig_bases__: + if get_origin(base) is BaseQdrantConverter: + args = get_args(base) + if args: + return args[0] + + raise ValueError( + "Unable to retrieve Qdrant collection type from generic information " + f"of {cls.__name__}" + ) + + @classmethod + @abstractmethod + def from_mongo(cls, source_doc: Any) -> QdrantCollectionType: + """ + Convert from a data source (typically a Mongo doc) to a Qdrant entity. + + Subclasses must implement specific conversion logic. By Milvus-counterpart + convention, the returned value is typically a ``PointStruct`` instance + (``id``, ``vector``, ``payload``) or a compatible dict that can be passed + to ``client.upsert(collection_name, points=[...])``. The Generic bound + documents the *target collection class* rather than the wire payload + shape. + + Args: + source_doc: Source data (any type — Mongo doc, dict, etc.). + + Returns: + A Qdrant point payload bound to ``QdrantCollectionType``. + + Raises: + Exception: When an error occurs during conversion. + """ + raise NotImplementedError("Subclasses must implement the from_mongo method") From 5cf38e9dbdeb2b54e95ebdfe79af18de95745201 Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 17:05:28 +0000 Subject: [PATCH 04/20] feat(qdrant): full QdrantCollectionBase impl + replace search() with query_points() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the Phase 1.1 stub with a full collection-management base class: - IndexConfig as @dataclass: size, distance, on_disk, hnsw_m, hnsw_ef_construct, payload_indexes (dict field_name -> schema_type). to_vectors_config() builds the qdrant_client VectorParams + HnswConfigDiff. - Module-level _DISTANCE_MAP and _PAYLOAD_SCHEMA_TYPE_MAP translate string configs to SDK enums — subclasses stay decoupled from the SDK. - QdrantCollectionBase methods: client() (lazy DI lookup), exists(), count(), ensure_collection() (idempotent), ensure_payload_indexes() (idempotent), ensure_all(), upsert(), search() (qdrant-client query_points wrapper), delete(), drop(). - Compared to MilvusCollectionBase this is ~half the LOC because Qdrant has no alias mechanism — collection names are direct. CRITICAL fix in the same commit: qdrant-client 1.16.1 (the version resolved against our >=1.12,<2 pin) removed the legacy QdrantClient.search method; only query_points is available now. The search() wrapper here calls query_points(query=..., ...) and unwraps QueryResponse.points so call sites still get List[ScoredPoint]. uv.lock is regenerated to include qdrant-client (1.16.1) and its transitive deps (h2, hpack, hyperframe, portalocker). --- .../core/oxm/qdrant/qdrant_collection_base.py | 345 ++++++++++++++---- methods/evermemos/uv.lock | 4 +- 2 files changed, 287 insertions(+), 62 deletions(-) diff --git a/methods/evermemos/src/core/oxm/qdrant/qdrant_collection_base.py b/methods/evermemos/src/core/oxm/qdrant/qdrant_collection_base.py index 0cdaef62..9c6c6ca6 100644 --- a/methods/evermemos/src/core/oxm/qdrant/qdrant_collection_base.py +++ b/methods/evermemos/src/core/oxm/qdrant/qdrant_collection_base.py @@ -1,5 +1,6 @@ """ -Qdrant Collection Base — Stub fuer Phase 1 der Milvus->Qdrant-Migration. +Qdrant Collection Base — vollstaendige Basisklasse fuer Qdrant-basierte +Collections. Konzept-Mapping (laut qdrant.tech/documentation/migrate-to-qdrant/from-milvus): @@ -8,117 +9,339 @@ Collection Collection (1:1) FieldSchema(vector) VectorParams(size, distance) FieldSchema(scalar) Payload field (schema-flexible) - Index(HNSW, COSINE) HnswConfig + Distance.COSINE + Index(HNSW, COSINE) HnswConfigDiff + Distance.Cosine Partition Payload-Field ODER separate Collection COSINE Cosine L2 Euclid IP Dot -Diese Klasse ist absichtlich minimal. Voll-Implementierung erfolgt in -nachfolgenden Commits auf ``feature/qdrant-adapter``. +Die Klasse ist absichtlich schlanker als ihr Milvus-Pendant: Qdrant kennt +keinen Alias-Mechanismus, also entfaellt der ``Real-Name + Alias + +Timestamp``-Indirektions-Layer. Schema-Migrationen erfolgen extern (neue +Collection mit neuem Namen, Daten umlagern). -Wichtig: alle Methoden hier sind so ausgelegt, dass sie ohne aktive -qdrant-Verbindung importierbar sind — sodass das Modul auch geladen werden -kann, wenn ``VECTOR_STORE_BACKEND != "qdrant"``. +Bei ``VECTOR_STORE_BACKEND != qdrant`` wird das Modul zwar geladen (durch +DI-Container-Scan), aber ``QdrantLifespanProvider`` initialisiert nichts — +``ensure_all()`` und alle anderen Methoden werden gar nicht aufgerufen. """ -from typing import Any, ClassVar, List, Optional +import logging +from dataclasses import dataclass, field +from typing import Any, ClassVar, Dict, List, Optional -from core.observation.logger import get_logger +from qdrant_client import QdrantClient +from qdrant_client.http import models as qmodels -logger = get_logger(__name__) +logger = logging.getLogger(__name__) +# Mapping: kanonischer Lower-Case-Name (EverOS-intern) -> Qdrant SDK Enum. +# Bewusst ueber Strings, damit Collection-Klassen nicht direkt vom SDK abhaengen. +_PAYLOAD_SCHEMA_TYPE_MAP: Dict[str, "qmodels.PayloadSchemaType"] = { + "keyword": qmodels.PayloadSchemaType.KEYWORD, + "integer": qmodels.PayloadSchemaType.INTEGER, + "float": qmodels.PayloadSchemaType.FLOAT, + "bool": qmodels.PayloadSchemaType.BOOL, + "geo": qmodels.PayloadSchemaType.GEO, + "text": qmodels.PayloadSchemaType.TEXT, + "datetime": qmodels.PayloadSchemaType.DATETIME, + "uuid": qmodels.PayloadSchemaType.UUID, +} + +# Distance-Mapping zum Schutz vor SDK-Versions-Drift. +_DISTANCE_MAP: Dict[str, "qmodels.Distance"] = { + "cosine": qmodels.Distance.COSINE, + "euclid": qmodels.Distance.EUCLID, + "dot": qmodels.Distance.DOT, + "manhattan": qmodels.Distance.MANHATTAN, +} + + +@dataclass class IndexConfig: """ - Konfiguration fuer Qdrant-Vector-Index. Analog zu - ``core.oxm.milvus.milvus_collection_base.IndexConfig`` aber mit Qdrant- - nativen Feldern. + Konfiguration fuer den (Vektor-)Index einer Qdrant-Collection. - TODO Phase 1.2: ``hnsw_config`` (m, ef_construct, full_scan_threshold), - ``quantization_config`` (scalar/PQ/BQ), ``on_disk_payload``, - ``sparse_vectors_config``. + Args: + size: Vektor-Dimension (1024 fuer qwen3-embedding-Default). + distance: Distanz-Metrik (``cosine``, ``euclid``, ``dot``, ``manhattan``). + on_disk: Vektor-Daten auf Disk halten (mmapped) statt vollstaendig im + RAM. Reduziert Memory-Footprint bei groesseren Datasets. + hnsw_m: HNSW Maximum-Edges-per-Node. Hoeher = bessere Recall, mehr RAM. + hnsw_ef_construct: HNSW Search-Width beim Bauen. Hoeher = bessere + Recall, langsamerer Build. + payload_indexes: Map ``field_name -> schema_type``. ``schema_type`` + ist einer von ``_PAYLOAD_SCHEMA_TYPE_MAP`` (e.g. + ``"keyword"`` fuer string-equality-Filter). """ - def __init__( - self, - size: int = 1024, - distance: str = "Cosine", - on_disk: bool = False, - hnsw_m: int = 16, - hnsw_ef_construct: int = 100, - ) -> None: - self.size = size - self.distance = distance - self.on_disk = on_disk - self.hnsw_m = hnsw_m - self.hnsw_ef_construct = hnsw_ef_construct + size: int = 1024 + distance: str = "cosine" + on_disk: bool = False + hnsw_m: int = 16 + hnsw_ef_construct: int = 100 + payload_indexes: Dict[str, str] = field(default_factory=dict) + + def to_vectors_config(self) -> qmodels.VectorParams: + """Konvertiert in ``qdrant_client.http.models.VectorParams``.""" + distance_key = self.distance.strip().lower() + if distance_key not in _DISTANCE_MAP: + raise ValueError( + f"Unknown distance '{self.distance}'. " + f"Supported: {sorted(_DISTANCE_MAP)}" + ) + return qmodels.VectorParams( + size=self.size, + distance=_DISTANCE_MAP[distance_key], + on_disk=self.on_disk, + hnsw_config=qmodels.HnswConfigDiff( + m=self.hnsw_m, + ef_construct=self.hnsw_ef_construct, + ), + ) class QdrantCollectionBase: """ - Qdrant-Collection-Management-Basisklasse (analog MilvusCollectionBase). + Qdrant-Collection-Management-Basisklasse (analog ``MilvusCollectionBase``). Subclasses MUST define: _COLLECTION_NAME: ClassVar[str] _VECTOR_PARAMS: ClassVar[IndexConfig] - _PAYLOAD_INDEXES: ClassVar[list[str]] = [] # field names to index Optional: - _DB_USING: ClassVar[str] = "default" # client name + _DB_USING: ClassVar[str] = "default" + + Anders als das Milvus-Pendant gibt es keinen Alias-Mechanismus — die + Collection ist direkt unter ``_COLLECTION_NAME`` adressierbar. + + Subclass-Beispiel:: - Aktueller Stand (Phase 1.1 Skeleton): ``ensure_all()`` ist No-Op, damit - ``QdrantLifespanProvider.startup()`` ueber registrierte Subklassen - iterieren kann ohne Crash. Voll-Logik kommt im Sub-Commit. + class EpisodicMemoryCollection(QdrantCollectionBase): + _COLLECTION_NAME = "v1_episodic_memory" + _VECTOR_PARAMS = IndexConfig( + size=1024, + distance="cosine", + payload_indexes={ + "user_id": "keyword", + "group_id": "keyword", + "session_id": "keyword", + "timestamp": "integer", + }, + ) + + coll = EpisodicMemoryCollection() + coll.ensure_all() + coll.upsert([...]) """ _COLLECTION_NAME: ClassVar[Optional[str]] = None _DB_USING: ClassVar[str] = "default" _VECTOR_PARAMS: ClassVar[Optional[IndexConfig]] = None - _PAYLOAD_INDEXES: ClassVar[List[str]] = [] def __init__(self) -> None: - if self._COLLECTION_NAME is None: + if not self._COLLECTION_NAME: raise NotImplementedError( - f"{self.__class__.__name__} must define '_COLLECTION_NAME'" + f"{self.__class__.__name__} must define '_COLLECTION_NAME' " + "class attribute" ) + if self._VECTOR_PARAMS is None: + raise NotImplementedError( + f"{self.__class__.__name__} must define '_VECTOR_PARAMS' " + "(IndexConfig) class attribute" + ) + self._using = self._DB_USING or "default" @property def name(self) -> str: - # _COLLECTION_NAME ist nach __init__-Check garantiert nicht None return self._COLLECTION_NAME # type: ignore[return-value] @property def using(self) -> str: - return self._DB_USING + return self._using - def ensure_all(self) -> None: + # ------------------------------------------------------------------ client + + def client(self) -> QdrantClient: """ - Stellt sicher dass Collection + Payload-Indexes existieren. + Resolve the cached Qdrant client for ``self.using`` via DI factory. + + Looking up via the factory bean keeps client-caching centralized + (factory caches one QdrantClient instance per alias). + """ + # Lazy import to avoid a circular dependency: this module is imported + # at adapter-discovery time, before the DI container is fully wired. + from core.di.utils import get_bean + + factory = get_bean("qdrant_client_factory") + return factory.get_named_client(self.using) - TODO Phase 1.2: - - ``client.collection_exists(name)`` pruefen - - falls nicht: ``client.create_collection(name, vectors_config=...)`` - - pro ``_PAYLOAD_INDEXES``: ``client.create_payload_index(name, field)`` - mit korrektem ``PayloadSchemaType`` (Keyword/Integer/Float/Bool). + # ------------------------------------------------------------------ schema + + def exists(self) -> bool: + """Return True if the underlying Qdrant collection already exists.""" + try: + return self.client().collection_exists(self.name) + except Exception as e: # noqa: BLE001 + logger.warning( + "collection_exists('%s') failed: %s — treating as non-existent", + self.name, + e, + ) + return False + + def count(self, exact: bool = True) -> int: + """Number of points in the collection.""" + result = self.client().count(collection_name=self.name, exact=exact) + return result.count + + def ensure_collection(self) -> None: + """ + Create the Qdrant collection if it does not exist. - Aktuell: No-Op + Debug-Log, damit Lifespan-Provider iterieren kann. + Idempotent: a pre-existing collection is left untouched, even if its + schema differs from ``_VECTOR_PARAMS`` — schema migration is an + explicit external concern. """ - logger.debug( - "QdrantCollectionBase.ensure_all() stub for '%s' [using=%s] " - "— TODO Phase 1.2", + if self.exists(): + logger.debug( + "Qdrant collection '%s' already exists, skipping create", + self.name, + ) + return + + cfg = self._VECTOR_PARAMS + assert cfg is not None # guarded by __init__ + logger.info( + "Creating Qdrant collection '%s' (size=%d, distance=%s, on_disk=%s)", self.name, - self.using, + cfg.size, + cfg.distance, + cfg.on_disk, + ) + self.client().create_collection( + collection_name=self.name, + vectors_config=cfg.to_vectors_config(), ) - def upsert(self, points: List[Any]) -> None: - """TODO Phase 2: ``client.upsert(name, points=points)``.""" - raise NotImplementedError("Phase 2: implement Qdrant upsert") + def ensure_payload_indexes(self) -> None: + """ + Create payload-indexes for the fields declared in + ``_VECTOR_PARAMS.payload_indexes``. + + Qdrant treats ``create_payload_index`` as idempotent at the API level, + so we call it unconditionally per field. + """ + cfg = self._VECTOR_PARAMS + assert cfg is not None + if not cfg.payload_indexes: + logger.debug( + "Qdrant collection '%s' has no declared payload indexes, skipping", + self.name, + ) + return + + for field_name, schema_str in cfg.payload_indexes.items(): + key = schema_str.strip().lower() + if key not in _PAYLOAD_SCHEMA_TYPE_MAP: + raise ValueError( + f"Unknown payload schema '{schema_str}' for field " + f"'{field_name}'. Supported: {sorted(_PAYLOAD_SCHEMA_TYPE_MAP)}" + ) + schema_type = _PAYLOAD_SCHEMA_TYPE_MAP[key] + try: + self.client().create_payload_index( + collection_name=self.name, + field_name=field_name, + field_schema=schema_type, + ) + logger.info( + "Ensured payload index on '%s.%s' (%s)", + self.name, + field_name, + schema_str, + ) + except Exception as e: # noqa: BLE001 + logger.error( + "Failed to ensure payload index on '%s.%s': %s", + self.name, + field_name, + e, + ) + raise + + def ensure_all(self) -> None: + """Idempotent one-shot init: collection + payload indexes.""" + logger.info("Initializing Qdrant collection '%s' [using=%s]", self.name, self.using) + self.ensure_collection() + self.ensure_payload_indexes() + logger.info("Qdrant collection '%s' is ready", self.name) - def search(self, query_vector: List[float], **kwargs: Any) -> List[Any]: - """TODO Phase 2: ``client.search(name, query_vector, query_filter, ...)``.""" - raise NotImplementedError("Phase 2: implement Qdrant search") + # ----------------------------------------------------------- data methods - def delete(self, point_ids: List[Any]) -> None: - """TODO Phase 2: ``client.delete(name, points_selector=PointIdsList(points=ids))``.""" - raise NotImplementedError("Phase 2: implement Qdrant delete") + def upsert( + self, + points: List[qmodels.PointStruct], + wait: bool = True, + ) -> qmodels.UpdateResult: + """Upsert points (insert or overwrite by id).""" + return self.client().upsert( + collection_name=self.name, + points=points, + wait=wait, + ) + + def search( + self, + query_vector: List[float], + limit: int = 10, + query_filter: Optional[qmodels.Filter] = None, + with_payload: bool = True, + with_vectors: bool = False, + score_threshold: Optional[float] = None, + **kwargs: Any, + ) -> List[qmodels.ScoredPoint]: + """ + ANN search with optional payload-filter. + + Implemented on top of ``QdrantClient.query_points`` (the legacy + ``search`` method was removed in qdrant-client 1.13+). The wrapper + keeps the more intuitive ``query_vector`` parameter name for callers + and unwraps ``QueryResponse.points`` so the return type stays a + ``List[ScoredPoint]``. + """ + response = self.client().query_points( + collection_name=self.name, + query=query_vector, + query_filter=query_filter, + limit=limit, + with_payload=with_payload, + with_vectors=with_vectors, + score_threshold=score_threshold, + **kwargs, + ) + return response.points + + def delete( + self, + point_ids: List[Any], + wait: bool = True, + ) -> qmodels.UpdateResult: + """Delete by point ids.""" + return self.client().delete( + collection_name=self.name, + points_selector=qmodels.PointIdsList(points=point_ids), + wait=wait, + ) + + def drop(self) -> None: + """Drop the underlying Qdrant collection (DANGEROUS — irreversible).""" + try: + self.client().delete_collection(collection_name=self.name) + logger.info("Dropped Qdrant collection '%s'", self.name) + except Exception as e: # noqa: BLE001 + logger.warning( + "Failed to drop Qdrant collection '%s' (may not exist): %s", + self.name, + e, + ) diff --git a/methods/evermemos/uv.lock b/methods/evermemos/uv.lock index b8d4ea65..67c9fad8 100644 --- a/methods/evermemos/uv.lock +++ b/methods/evermemos/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = "==3.12.*" [[package]] @@ -1514,6 +1514,7 @@ dependencies = [ { name = "python-multipart" }, { name = "python-snappy" }, { name = "pyyaml" }, + { name = "qdrant-client" }, { name = "rank-bm25" }, { name = "redis" }, { name = "scikit-learn" }, @@ -1610,6 +1611,7 @@ requires-dist = [ { name = "python-multipart", specifier = ">=0.0.6" }, { name = "python-snappy", specifier = ">=0.7.3" }, { name = "pyyaml", specifier = ">=6.0.0" }, + { name = "qdrant-client", specifier = ">=1.12,<2" }, { name = "rank-bm25", specifier = ">=0.2.2" }, { name = "redis", specifier = ">=5.0.0" }, { name = "scikit-learn", specifier = ">=1.3.0" }, From 9409a9737a2f4d50502e8436348d8fdd1cbd9557 Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 17:10:23 +0000 Subject: [PATCH 05/20] feat(qdrant): tenant-aware collection layer with collection-per-tenant naming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the multi-tenancy adapter for Qdrant. Deliberately schlanker than the Milvus counterpart (~270 LOC total vs ~750 LOC for milvus' tenant layer) because Qdrant has no alias mechanism and no partition_key feature — multi- tenancy is realized via collection-per-tenant naming alone. src/core/tenants/tenantize/oxm/qdrant/config_utils.py: - get_tenant_aware_collection_name(original_name): resolves the final Qdrant collection name from the active tenant context. Lookup order is storage_info['qdrant'] -> storage_info['milvus'] (migration bridge, reuses the same collection_prefix for both backends until per-tenant qdrant config is wired) -> base resource prefix fallback. - get_qdrant_connection_cache_key(config): builds a stable factory cache key, hashing api_key fingerprints (8 hex chars) so the raw key never appears in the cache identifier. - _load_qdrant_env(prefix): env-fallback loader for tenant-aware connection routing. Currently exported as private until the routing layer consumes it; documented to avoid dead-code flags. src/core/tenants/tenantize/oxm/qdrant/tenant_aware_qdrant_collection_with_suffix.py: - TenantAwareQdrantCollectionWithSuffix(QdrantCollectionBase): overrides the name property to return tenant-prefixed + optional explicit suffix. The collection base remains unchanged; concrete Phase-2 collections inherit from this class instead of QdrantCollectionBase directly. - __init__(suffix) accepts explicit override or falls back to the SELF_QDRANT_COLLECTION_NS env-var. - _MULTI_TENANT_STRATEGY ClassVar is informational; a future version may opt into Qdrant's native payload-partitioning instead of separate collections per tenant. --- .../tenantize/oxm/qdrant/config_utils.py | 154 ++++++++++++++++++ ...ant_aware_qdrant_collection_with_suffix.py | 121 ++++++++++++++ 2 files changed, 275 insertions(+) create mode 100644 methods/evermemos/src/core/tenants/tenantize/oxm/qdrant/config_utils.py create mode 100644 methods/evermemos/src/core/tenants/tenantize/oxm/qdrant/tenant_aware_qdrant_collection_with_suffix.py diff --git a/methods/evermemos/src/core/tenants/tenantize/oxm/qdrant/config_utils.py b/methods/evermemos/src/core/tenants/tenantize/oxm/qdrant/config_utils.py new file mode 100644 index 00000000..8571eaf8 --- /dev/null +++ b/methods/evermemos/src/core/tenants/tenantize/oxm/qdrant/config_utils.py @@ -0,0 +1,154 @@ +""" +Tenant-Aware Qdrant Configuration Utilities. + +Analog zu ``core.tenants.tenantize.oxm.milvus.config_utils``, aber deutlich +schlanker — Qdrant braucht keine pymilvus-Connection-Cache-Keys, weil ein +einzelner ``QdrantClient`` alle Collections eines Endpoints bedient. + +Hauptaufgabe: Aufloesen des **tenant-aware Collection-Namens** anhand des +Tenant-Context. + +Resolution-Reihenfolge (analog Milvus): + 1. Tenant-Context vorhanden + ``storage_info["qdrant"]["collection_prefix"]`` gesetzt + -> ``f"{collection_prefix}_{original_name}"`` + 2. Tenant-Context vorhanden + nur ``storage_info["milvus"]["collection_prefix"]`` gesetzt + -> ``f"{milvus_prefix}_{original_name}"`` (Migrations-Bruecke: gleiche Tenant-Namen + fuer Qdrant wie fuer Milvus, bis pro-Tenant Qdrant-Config explizit gesetzt wird) + 3. Kein Tenant-Context -> Base-Resource-Prefix + ``original_name`` (z.B. ``s0001_v1_episodic_memory``) +""" + +import os +from hashlib import sha256 +from typing import Any, Dict, Optional + +from core.observation.logger import get_logger + +logger = get_logger(__name__) + + +def get_tenant_qdrant_config() -> Optional[Dict[str, Any]]: + """ + Hole das Qdrant-Storage-Dict des aktiven Tenant-Context. + + Returns: + Storage-Info-Dict (``collection_prefix`` und ggf. ``host``/``port``/``api_key``) + oder ``None`` falls kein Tenant aktiv. + """ + # Lazy import vermeidet Circular-Dependency bei Adapter-Discovery-Time. + from core.tenants.tenantize.tenant_context import get_current_tenant + + try: + tenant_info = get_current_tenant() + if not tenant_info: + return None + + qdrant_cfg = tenant_info.get_storage_info("qdrant") + if qdrant_cfg: + return qdrant_cfg + + # Fallback: Falls noch kein dediziertes Qdrant-Config-Dict im + # Storage-Info, nutze den Milvus-Eintrag (gleicher collection_prefix + # ist sinnvolle Migrations-Bruecke). + return tenant_info.get_storage_info("milvus") or tenant_info.get_storage_info( + "milvus_config" + ) + except Exception as e: # noqa: BLE001 + logger.warning("Failed to resolve tenant qdrant config: %s", e) + return None + + +def _base_prefixed_collection_name(original_name: str) -> str: + """``{base_resource_prefix}_{original_name}`` (no-tenant Fallback).""" + # Lazy import — same circular-avoidance reason as above. + from core.tenants.tenant_constants import get_base_resource_prefix + + return f"{get_base_resource_prefix()}_{original_name}" + + +def get_tenant_aware_collection_name(original_name: str) -> str: + """ + Resolve a tenant-aware Qdrant collection name. + + Args: + original_name: Bare collection name (e.g., ``"v1_episodic_memory"``). + + Returns: + Tenant-prefixed name (e.g., ``"acme_v1_episodic_memory"``, + ``"s0001_v1_episodic_memory"``, etc.). + """ + try: + cfg = get_tenant_qdrant_config() + if cfg and cfg.get("collection_prefix"): + return f"{cfg['collection_prefix']}_{original_name}" + + # Kein expliziter Prefix im Tenant-Context — Fall back to base resource. + return _base_prefixed_collection_name(original_name) + except Exception as e: # noqa: BLE001 + logger.warning( + "Failed to resolve tenant-aware Qdrant collection name for '%s': %s", + original_name, + e, + ) + return _base_prefixed_collection_name(original_name) + + +def get_qdrant_connection_cache_key(config: Dict[str, Any]) -> str: + """ + Build a deterministic cache key for a Qdrant connection. + + Used by ``QdrantClientFactory.get_named_client`` when callers route via + tenant-specific endpoints (each unique ``(host, port, api_key_hash)`` + triple becomes one cached client). For the common case of one shared + Qdrant endpoint across tenants, this returns a stable single key. + + Args: + config: Dict containing at least ``host``/``port`` or ``url``. + ``api_key`` is hashed (not the raw value) when included. + + Returns: + A short stable string suitable as factory alias. + """ + if "url" in config and config["url"]: + endpoint = str(config["url"]) + else: + endpoint = f"{config.get('host', 'localhost')}:{config.get('port', 6333)}" + + api_key = config.get("api_key") + if api_key: + # Hash the api_key fingerprint, not the raw value. + endpoint += f"#{sha256(api_key.encode('utf-8')).hexdigest()[:8]}" + + return endpoint + + +def _load_qdrant_env(prefix: str = "") -> Dict[str, Any]: + """ + Read Qdrant connection settings from environment variables. Used as a + fallback when no tenant-storage-info is present. + + Currently this helper is staged for the tenant-aware connection routing + that will be wired in alongside the per-tenant ``QdrantClientFactory`` + flow (see TenantAwareQdrantCollectionWithSuffix and the factory). It is + deliberately exported as module-private (``_load_qdrant_env``) until the + routing layer consumes it; do not flag as dead code in the meantime. + + Args: + prefix: Optional env prefix (e.g., ``"A"`` reads ``A_QDRANT_HOST``). + + Returns: + Dict mit ``host``, ``port``, ``api_key``, ``https``, ``prefer_grpc``. + """ + def _env(name: str, default: Optional[str] = None) -> str: + key = f"{prefix.upper()}_{name}" if prefix else name + if default is None: + return os.getenv(key, "") + return os.getenv(key, default) + + return { + "host": _env("QDRANT_HOST", "localhost"), + "port": int(_env("QDRANT_PORT", "6333")), + "api_key": _env("QDRANT_API_KEY") or None, + "https": _env("QDRANT_HTTPS", "").strip().lower() in {"1", "true", "yes", "on"}, + "prefer_grpc": _env("QDRANT_PREFER_GRPC", "").strip().lower() + in {"1", "true", "yes", "on"}, + } diff --git a/methods/evermemos/src/core/tenants/tenantize/oxm/qdrant/tenant_aware_qdrant_collection_with_suffix.py b/methods/evermemos/src/core/tenants/tenantize/oxm/qdrant/tenant_aware_qdrant_collection_with_suffix.py new file mode 100644 index 00000000..394c5154 --- /dev/null +++ b/methods/evermemos/src/core/tenants/tenantize/oxm/qdrant/tenant_aware_qdrant_collection_with_suffix.py @@ -0,0 +1,121 @@ +""" +TenantAwareQdrantCollectionWithSuffix — analog zu +``TenantAwareMilvusCollectionWithSuffix``, aber deutlich schlanker: + +- Qdrant kennt keinen Alias-Mechanismus, daher entfaellt der + Real-Name-/Alias-Switch-Layer. +- Multi-Tenancy laeuft via **Collection-per-Tenant-Naming**: + ``f"{tenant_prefix}_{base_collection_name}"``. ``tenant_prefix`` wird vom + Tenant-Context aufgeloest (siehe ``config_utils.get_tenant_aware_collection_name``). +- Optional kann ein expliziter Suffix uebergeben werden (z.B. fuer + Test-/Sandbox-Collections); er wird an den Tenant-Prefixed-Namen + angehaengt: ``f"{tenant_prefix}_{base}_{suffix}"``. + +Subclass-Beispiel:: + + class EpisodicMemoryCollection(TenantAwareQdrantCollectionWithSuffix): + _COLLECTION_NAME = "v1_episodic_memory" + _VECTOR_PARAMS = IndexConfig( + size=1024, + distance="cosine", + payload_indexes={"user_id": "keyword", "timestamp": "integer"}, + ) + + # Im Tenant-Context "acme": + coll = EpisodicMemoryCollection() + coll.name # -> "acme_v1_episodic_memory" + coll.ensure_all() # idempotent + + # Mit explizitem Suffix: + coll_v2 = EpisodicMemoryCollection(suffix="staging") + coll_v2.name # -> "acme_v1_episodic_memory_staging" +""" + +import os +from typing import ClassVar, Optional + +from core.observation.logger import get_logger +from core.oxm.qdrant.qdrant_collection_base import QdrantCollectionBase +from core.tenants.tenantize.oxm.qdrant.config_utils import ( + get_tenant_aware_collection_name, +) + +logger = get_logger(__name__) + +# Umgebungsvariable, die einen statischen Collection-Suffix erzwingt, +# z.B. fuer dev/staging-Builds. Wird nur verwendet wenn kein Suffix im +# Konstruktor uebergeben wurde. +_ENV_SUFFIX = "SELF_QDRANT_COLLECTION_NS" + + +def _resolve_suffix(suffix: Optional[str]) -> str: + """``suffix`` Argument > Env-Var > leerer String.""" + if suffix is not None: + return suffix + return os.getenv(_ENV_SUFFIX, "") + + +class TenantAwareQdrantCollectionWithSuffix(QdrantCollectionBase): + """ + Tenant-aware Qdrant collection with optional explicit suffix. + + Differences from the base class: + + - ``name`` property returns the **tenant-prefixed** name (and optionally + adds the explicit suffix). The base ``_COLLECTION_NAME`` stays the + logical/business identifier. + - ``__init__`` accepts an optional ``suffix`` parameter (or reads it from + the ``SELF_QDRANT_COLLECTION_NS`` environment variable). + - All other behaviour (collection creation, payload indexes, upsert/ + search/delete) is inherited unchanged from ``QdrantCollectionBase``. + """ + + # Subclasses MAY pin a partitioning strategy here. Currently informational + # only; future versions may use it to opt into Qdrant's native multi- + # tenancy via payload partitioning + ``group_id``-based sharding. + _MULTI_TENANT_STRATEGY: ClassVar[str] = "collection_per_tenant" + + def __init__(self, suffix: Optional[str] = None): + """ + Args: + suffix: Optional explicit suffix (e.g., ``"staging"``). If not + provided, falls back to ``SELF_QDRANT_COLLECTION_NS`` + env-var, then to empty string. + """ + # Reuse the base validation (requires _COLLECTION_NAME + _VECTOR_PARAMS). + super().__init__() + + self._suffix = _resolve_suffix(suffix) + # Resolve tenant-aware base name lazily — at __init__ the tenant context + # is whatever is active when the object is instantiated. If callers need + # to materialize a collection for a different tenant context, they + # instantiate within that context. + tenant_prefixed = get_tenant_aware_collection_name(self._COLLECTION_NAME) + if self._suffix: + self._resolved_name = f"{tenant_prefixed}_{self._suffix}" + else: + self._resolved_name = tenant_prefixed + + logger.debug( + "TenantAwareQdrantCollectionWithSuffix resolved name: %s " + "(base=%s, tenant_prefixed=%s, suffix=%s)", + self._resolved_name, + self._COLLECTION_NAME, + tenant_prefixed, + self._suffix or "", + ) + + @property + def name(self) -> str: + """Tenant-prefixed Qdrant collection name (with optional suffix).""" + return self._resolved_name + + @property + def base_name(self) -> str: + """The original logical ``_COLLECTION_NAME`` without tenant prefix.""" + return self._COLLECTION_NAME # type: ignore[return-value] + + @property + def suffix(self) -> str: + """The explicit suffix, or empty string if none was set.""" + return self._suffix From d34dc6e283c6b5ebf441bb7f9d5a3c0919f7305e Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 17:55:31 +0000 Subject: [PATCH 06/20] =?UTF-8?q?feat(qdrant):=20add=20BaseQdrantRepositor?= =?UTF-8?q?y=20=E2=80=94=20async=20CRUD=20on=20top=20of=20QdrantCollection?= =?UTF-8?q?Base?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wraps the sync qdrant-client API via asyncio.to_thread so the repository surface stays async (parity with the milvus repository layer). Methods: - upsert(point) -> str (point id, parity with milvus 'insert' returning entity_id) - upsert_batch(points) -> UpdateResult (full result, exposes wait-status) - find_by_id(id) / find_by_ids(ids) -> Optional[Record] / List[Record] - delete_by_id(id) -> bool; delete_batch(ids) -> UpdateResult - search(query_vector, limit, query_filter, ...) -> List[ScoredPoint] - count(exact=True) -> int - collection: lazy-instantiated QdrantCollectionBase subclass - get_model_name() -> str Error-handling semantics mirror the milvus counterpart: - upsert / upsert_batch / delete_batch / search → log + raise on failure - find_by_id / find_by_ids / delete_by_id → log + return None / False (resilient read path) Subclasses bind the generic parameter to the concrete collection model class: class EpisodicMemoryRepository( BaseQdrantRepository[EpisodicMemoryCollection] ): def __init__(self): super().__init__(EpisodicMemoryCollection) --- .../src/core/oxm/qdrant/base_repository.py | 250 ++++++++++++++++++ 1 file changed, 250 insertions(+) create mode 100644 methods/evermemos/src/core/oxm/qdrant/base_repository.py diff --git a/methods/evermemos/src/core/oxm/qdrant/base_repository.py b/methods/evermemos/src/core/oxm/qdrant/base_repository.py new file mode 100644 index 00000000..121c3eb7 --- /dev/null +++ b/methods/evermemos/src/core/oxm/qdrant/base_repository.py @@ -0,0 +1,250 @@ +""" +Qdrant Base Repository class — analog ``BaseMilvusRepository``. + +Provides common CRUD primitives that all Qdrant-backed repositories inherit. +The repository layer sits between the domain code and ``QdrantCollectionBase``: + +- domain code calls ``repo.upsert(point)``, ``repo.find_by_id(id)``, ... +- the repository delegates to the wrapped ``QdrantCollectionBase`` instance, + adding unified async wrapping, logging, and error handling. + +Async wrapping: ``qdrant-client``'s sync API is used (more battle-tested +than ``AsyncQdrantClient`` for Phase 1) and wrapped with ``asyncio.to_thread`` +so we keep the same async repository surface as the Milvus counterpart. +""" + +import asyncio +from abc import ABC +from typing import Any, Generic, List, Optional, Type, TypeVar + +from qdrant_client.http import models as qmodels + +from core.observation.logger import get_logger +from core.oxm.qdrant.qdrant_collection_base import QdrantCollectionBase + +logger = get_logger(__name__) + +T = TypeVar("T", bound=QdrantCollectionBase) + + +class BaseQdrantRepository(ABC, Generic[T]): + """ + Base class for all Qdrant repositories. + + Subclasses set the bound collection model via the generic parameter and + pass the model class to ``__init__``:: + + class EpisodicMemoryRepository( + BaseQdrantRepository[EpisodicMemoryCollection] + ): + def __init__(self): + super().__init__(EpisodicMemoryCollection) + + Subclasses may add domain-specific finders on top of the CRUD primitives. + """ + + def __init__(self, model: Type[T]): + self.model = model + self.model_name = model.__name__ + # Lazy: defer Collection instantiation until first use so that the + # repository can be constructed in tenant-less contexts (DI scan). + self._collection: Optional[T] = None + + # ------------------------------------------------------------------ shape + + @property + def collection(self) -> T: + """Lazily instantiate the bound ``QdrantCollectionBase`` subclass.""" + if self._collection is None: + self._collection = self.model() + return self._collection + + def get_model_name(self) -> str: + return self.model_name + + # =================================================== Basic CRUD (async) + + async def upsert( + self, + point: qmodels.PointStruct, + wait: bool = True, + ) -> str: + """ + Insert-or-update a single point. + + Qdrant has no separate ``insert`` semantics — upsert is the + idempotent primitive. The returned id is taken from the passed + PointStruct (caller-supplied). + + Note: this returns a ``str`` (the point id) for parity with the + Milvus repository's ``insert`` method. The underlying Qdrant + ``UpdateResult`` is intentionally discarded here. Callers that + need the wire-level ``UpdateResult`` (e.g., to assert + ``status == completed``) should use ``upsert_batch([point])``. + """ + try: + await asyncio.to_thread(self.collection.upsert, [point], wait) + logger.debug( + "Qdrant upsert successful [%s]: %s", self.model_name, point.id + ) + return str(point.id) + except Exception as e: + logger.error("Qdrant upsert failed [%s]: %s", self.model_name, e) + raise + + async def upsert_batch( + self, + points: List[qmodels.PointStruct], + wait: bool = True, + ) -> qmodels.UpdateResult: + """Batch upsert. ``wait=True`` blocks until the operation is durable.""" + try: + result = await asyncio.to_thread(self.collection.upsert, points, wait) + logger.debug( + "Qdrant batch upsert successful [%s]: %d points", + self.model_name, + len(points), + ) + return result + except Exception as e: + logger.error( + "Qdrant batch upsert failed [%s]: %s", self.model_name, e + ) + raise + + async def find_by_id( + self, + point_id: Any, + with_payload: bool = True, + with_vectors: bool = False, + ) -> Optional[qmodels.Record]: + """ + Retrieve a single point by id. Returns ``None`` if not found. + + Qdrant accepts both ``int`` and ``str`` (UUID) point ids — pass + whichever id type was used at upsert time. + """ + try: + records = await asyncio.to_thread( + self.collection.client().retrieve, + self.collection.name, + [point_id], + with_payload, + with_vectors, + ) + return records[0] if records else None + except Exception as e: + logger.error( + "Qdrant find_by_id failed [%s, id=%s]: %s", + self.model_name, + point_id, + e, + ) + return None + + async def find_by_ids( + self, + point_ids: List[Any], + with_payload: bool = True, + with_vectors: bool = False, + ) -> List[qmodels.Record]: + """Batch retrieval by ids. Order of result is not guaranteed.""" + try: + return await asyncio.to_thread( + self.collection.client().retrieve, + self.collection.name, + point_ids, + with_payload, + with_vectors, + ) + except Exception as e: + logger.error( + "Qdrant find_by_ids failed [%s, %d ids]: %s", + self.model_name, + len(point_ids), + e, + ) + return [] + + async def delete_by_id( + self, + point_id: Any, + wait: bool = True, + ) -> bool: + """Delete a single point. Returns ``True`` on success.""" + try: + await asyncio.to_thread(self.collection.delete, [point_id], wait) + logger.debug( + "Qdrant delete successful [%s]: %s", self.model_name, point_id + ) + return True + except Exception as e: + logger.error( + "Qdrant delete failed [%s, id=%s]: %s", + self.model_name, + point_id, + e, + ) + return False + + async def delete_batch( + self, + point_ids: List[Any], + wait: bool = True, + ) -> qmodels.UpdateResult: + """Batch delete by ids.""" + try: + result = await asyncio.to_thread( + self.collection.delete, point_ids, wait + ) + logger.debug( + "Qdrant batch delete successful [%s]: %d ids", + self.model_name, + len(point_ids), + ) + return result + except Exception as e: + logger.error( + "Qdrant batch delete failed [%s, %d ids]: %s", + self.model_name, + len(point_ids), + e, + ) + raise + + # ============================================================ Search/Count + + async def search( + self, + query_vector: List[float], + limit: int = 10, + query_filter: Optional[qmodels.Filter] = None, + with_payload: bool = True, + with_vectors: bool = False, + score_threshold: Optional[float] = None, + **kwargs: Any, + ) -> List[qmodels.ScoredPoint]: + """ANN search with optional payload-filter.""" + try: + return await asyncio.to_thread( + self.collection.search, + query_vector, + limit, + query_filter, + with_payload, + with_vectors, + score_threshold, + **kwargs, + ) + except Exception as e: + logger.error( + "Qdrant search failed [%s, limit=%d]: %s", + self.model_name, + limit, + e, + ) + raise + + async def count(self, exact: bool = True) -> int: + """Number of points in the underlying collection.""" + return await asyncio.to_thread(self.collection.count, exact) From 1210b5fe037a93dee93811f86f116e4c475289bd Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 18:02:15 +0000 Subject: [PATCH 07/20] =?UTF-8?q?feat(qdrant):=20Phase=202=20batch=201=20?= =?UTF-8?q?=E2=80=94=20EpisodicMemory=20+=20AgentCase=20collection+convert?= =?UTF-8?q?er?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First two of six Qdrant collections matching the Milvus search adapter layout under src/infra_layer/adapters/out/search/. Subsequent commits add the remaining four (agent_skill, foresight, atomic_fact, user_profile). Collections (src/infra_layer/adapters/out/search/qdrant/memory/): - episodic_memory_collection.py: TenantAwareQdrantCollectionWithSuffix subclass, base name v1_episodic_memory. Vector 1024-dim Cosine, HNSW m=16 ef_construct=200. Payload indexes: user_id/group_id/session_id/ parent_id/parent_type/type (keyword) + timestamp (integer, epoch ms). - agent_case_collection.py: base name v1_agent_case. Same vector params; payload indexes minus participants/type, plus timestamp as epoch seconds (parity with milvus converter). Converters (src/infra_layer/adapters/out/search/qdrant/converter/): - episodic_memory_qdrant_converter.py: BaseQdrantConverter[ EpisodicMemoryCollection]. from_mongo() builds a PointStruct from a MongoDB v1_episodic_memories document. Handles missing optional fields (sender_ids, type, subject, summary) and serializes the search_content list to JSON for downstream search service consumption. - agent_case_qdrant_converter.py: BaseQdrantConverter[AgentCaseCollection]. Maps AgentCaseRecord -> PointStruct, truncates task_intent to 5000 chars (Milvus parity), uses epoch-seconds timestamp. Qdrant is schema-flexible, so unlike the Milvus side there is no field schema declaration — only the vector params and the explicit payload indexes (the rest of the payload is whatever the converter writes). --- .../adapters/out/search/qdrant/__init__.py | 0 .../out/search/qdrant/converter/__init__.py | 0 .../converter/agent_case_qdrant_converter.py | 67 ++++++++++ .../episodic_memory_qdrant_converter.py | 119 ++++++++++++++++++ .../out/search/qdrant/memory/__init__.py | 0 .../qdrant/memory/agent_case_collection.py | 53 ++++++++ .../memory/episodic_memory_collection.py | 68 ++++++++++ 7 files changed, 307 insertions(+) create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/qdrant/__init__.py create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/__init__.py create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/__init__.py create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/agent_case_collection.py create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/episodic_memory_collection.py diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/__init__.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/__init__.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py new file mode 100644 index 00000000..0787632d --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py @@ -0,0 +1,67 @@ +""" +AgentCase Qdrant Converter. + +Converts MongoDB ``AgentCaseRecord`` documents into Qdrant ``PointStruct`` +instances for upsert into ``v1_agent_case``. Vector is the embedding of +``task_intent`` (caller must populate ``source_doc.vector`` first). +""" + +from qdrant_client.http import models as qmodels + +from core.observation.logger import get_logger +from core.oxm.qdrant.base_converter import BaseQdrantConverter +from infra_layer.adapters.out.persistence.document.memory.agent_case import ( + AgentCaseRecord, +) +from infra_layer.adapters.out.search.qdrant.memory.agent_case_collection import ( + AgentCaseCollection, +) + +logger = get_logger(__name__) + + +class AgentCaseQdrantConverter(BaseQdrantConverter[AgentCaseCollection]): + """Converts MongoDB ``AgentCaseRecord`` documents into Qdrant point payloads.""" + + @classmethod + def from_mongo(cls, source_doc: AgentCaseRecord) -> qmodels.PointStruct: + """ + Build a ``PointStruct`` from a MongoDB ``AgentCaseRecord``. + + Raises: + ValueError: when ``source_doc`` is ``None``. + Exception: on any conversion failure (logged + re-raised). + """ + if source_doc is None: + raise ValueError("MongoDB document cannot be empty") + + try: + task_intent = source_doc.task_intent or "" + # Parity with Milvus converter: epoch seconds (not ms) for this collection. + timestamp_s = ( + int(source_doc.timestamp.timestamp()) if source_doc.timestamp else 0 + ) + + payload = { + "user_id": source_doc.user_id or "", + "group_id": source_doc.group_id or "", + "session_id": source_doc.session_id or "", + "timestamp": timestamp_s, + "task_intent": task_intent[:5000], + "parent_type": source_doc.parent_type or "", + "parent_id": source_doc.parent_id or "", + } + + vector = source_doc.vector if source_doc.vector else [] + + return qmodels.PointStruct( + id=str(source_doc.id), + vector=vector, + payload=payload, + ) + + except Exception as e: + logger.error( + "Failed to convert AgentCaseRecord to Qdrant point: %s", e + ) + raise diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py new file mode 100644 index 00000000..cdc13996 --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py @@ -0,0 +1,119 @@ +""" +Episodic Memory Qdrant Converter. + +Converts MongoDB ``v1_episodic_memories`` documents to Qdrant ``PointStruct`` +instances for upsert into ``v1_episodic_memory``. Only search-essential +fields are mapped — full payload is fetched from MongoDB via ``parent_id`` +back-reference. + +Vector is taken from ``source_doc.vector`` (caller must have populated the +embedding before calling the converter). +""" + +import json +from typing import List + +from qdrant_client.http import models as qmodels + +from core.observation.logger import get_logger +from core.oxm.qdrant.base_converter import BaseQdrantConverter +from infra_layer.adapters.out.persistence.document.memory.episodic_memory import ( + EpisodicMemory as MongoEpisodicMemory, +) +from infra_layer.adapters.out.search.qdrant.memory.episodic_memory_collection import ( + EpisodicMemoryCollection, +) + +logger = get_logger(__name__) + + +class EpisodicMemoryQdrantConverter(BaseQdrantConverter[EpisodicMemoryCollection]): + """ + Converts MongoDB ``v1_episodic_memories`` documents to Qdrant point payloads. + + Output shape: ``qdrant_client.http.models.PointStruct`` with the document + id as point id, the pre-computed embedding as the vector, and all + search-relevant scalar fields plus the MongoDB back-reference in the + payload. + """ + + @classmethod + def from_mongo(cls, source_doc: MongoEpisodicMemory) -> qmodels.PointStruct: + """ + Build a ``PointStruct`` from a MongoDB episodic-memory document. + + Args: + source_doc: MongoDB ``v1_episodic_memories`` document instance. + + Returns: + ``PointStruct`` ready for ``client.upsert([point])``. + + Raises: + ValueError: when ``source_doc`` is ``None``. + Exception: on any conversion failure (logged + re-raised). + """ + if source_doc is None: + raise ValueError("MongoDB document cannot be empty") + + try: + # Timestamp -> epoch milliseconds (integer, parity with Milvus). + timestamp_ms = ( + int(source_doc.timestamp.timestamp() * 1000) + if source_doc.timestamp + else 0 + ) + + search_content = cls._build_search_content(source_doc) + + payload = { + "user_id": source_doc.user_id or "", + "group_id": source_doc.group_id or "", + "session_id": source_doc.session_id or "", + "participants": source_doc.participants or [], + "sender_ids": getattr(source_doc, "sender_ids", []) or [], + "type": getattr(source_doc, "type", None) or "", + "timestamp": timestamp_ms, + "episode": source_doc.episode or "", + "search_content": search_content, + "parent_type": source_doc.parent_type or "", + "parent_id": ( + str(source_doc.parent_id) if source_doc.parent_id else "" + ), + } + + vector = ( + source_doc.vector + if hasattr(source_doc, "vector") and source_doc.vector + else [] + ) + + return qmodels.PointStruct( + id=str(source_doc.id), + vector=vector, + payload=payload, + ) + + except Exception as e: + logger.error("Failed to convert MongoDB document to Qdrant point: %s", e) + raise + + @staticmethod + def _build_search_content(source_doc: MongoEpisodicMemory) -> str: + """ + Build search content string from the document's text fields. + + Returns a JSON-stringified list (parity with Milvus converter; the + search pipeline can deserialize it back to a list when needed). + """ + text_content: List[str] = [] + + if hasattr(source_doc, "subject") and source_doc.subject: + text_content.append(source_doc.subject) + + if hasattr(source_doc, "summary") and source_doc.summary: + text_content.append(source_doc.summary) + + if hasattr(source_doc, "episode") and source_doc.episode: + text_content.append(source_doc.episode) + + return json.dumps(text_content, ensure_ascii=False) diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/__init__.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/agent_case_collection.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/agent_case_collection.py new file mode 100644 index 00000000..ec39c124 --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/agent_case_collection.py @@ -0,0 +1,53 @@ +""" +AgentCase Qdrant Collection Definition. + +Stores vector embeddings of agent task-solving experiences. The vector +represents the ``task_intent`` of one experience per MemCell. Full payload +is fetched from MongoDB via the ``parent_id`` back-reference. + +Wire-Layout per point:: + + PointStruct( + id=, + vector=, + payload={ + "user_id": str | "", + "group_id": str | "", + "session_id": str | "", + "timestamp": int, # epoch seconds + "task_intent": str, # truncated to 5000 chars + "parent_type": str | "", + "parent_id": str | "", + }, + ) +""" + +from core.oxm.qdrant.qdrant_collection_base import IndexConfig +from core.tenants.tenantize.oxm.qdrant.tenant_aware_qdrant_collection_with_suffix import ( + TenantAwareQdrantCollectionWithSuffix, +) +from memory_layer.constants import VECTORIZE_DIMENSIONS + + +class AgentCaseCollection(TenantAwareQdrantCollectionWithSuffix): + """V1 Agent Case Qdrant Collection. Tenant-prefixed at construction time.""" + + _COLLECTION_NAME = "v1_agent_case" + + _VECTOR_PARAMS = IndexConfig( + size=VECTORIZE_DIMENSIONS, + distance="cosine", + hnsw_m=16, + hnsw_ef_construct=200, + payload_indexes={ + # Scope filters. + "user_id": "keyword", + "group_id": "keyword", + "session_id": "keyword", + # Back-reference filters. + "parent_id": "keyword", + "parent_type": "keyword", + # Time-range filter. + "timestamp": "integer", + }, + ) diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/episodic_memory_collection.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/episodic_memory_collection.py new file mode 100644 index 00000000..1228869f --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/episodic_memory_collection.py @@ -0,0 +1,68 @@ +""" +V1 Episodic Memory Qdrant Collection Definition. + +Based on MongoDB ``v1_episodic_memories``. Stores only search-essential +fields; full data is retrieved from MongoDB using ``parent_id``. Qdrant is +schema-flexible — payload fields are described here for documentation +only; only fields that need filtering get an explicit payload index. + +Wire-Layout per point:: + + PointStruct( + id=, + vector=, + payload={ + "user_id": str | "", + "group_id": str | "", + "session_id": str | "", + "participants": list[str], + "sender_ids": list[str], + "type": str | "", + "timestamp": int, # epoch milliseconds + "episode": str | "", + "search_content": str, # JSON-stringified list + "parent_type": str | "", + "parent_id": str | "", + }, + ) +""" + +from core.oxm.qdrant.qdrant_collection_base import IndexConfig +from core.tenants.tenantize.oxm.qdrant.tenant_aware_qdrant_collection_with_suffix import ( + TenantAwareQdrantCollectionWithSuffix, +) +from memory_layer.constants import VECTORIZE_DIMENSIONS + + +class EpisodicMemoryCollection(TenantAwareQdrantCollectionWithSuffix): + """ + V1 Episodic Memory Qdrant Collection. + + Tenant-prefixed name resolution comes from + ``TenantAwareQdrantCollectionWithSuffix`` (e.g., + ``acme_v1_episodic_memory``). HNSW parameters are tuned conservatively + for ~10k-scale collections; revisit for larger workloads. + """ + + # Logical base name. The actual Qdrant collection name is resolved at + # construction time by the parent class (tenant prefix + optional suffix). + _COLLECTION_NAME = "v1_episodic_memory" + + _VECTOR_PARAMS = IndexConfig( + size=VECTORIZE_DIMENSIONS, + distance="cosine", + hnsw_m=16, + hnsw_ef_construct=200, + payload_indexes={ + # Tenant-isolation + scope filters (all keyword for exact-equality). + "user_id": "keyword", + "group_id": "keyword", + "session_id": "keyword", + # Back-reference filters (lookup-by-parent for resync flows). + "parent_id": "keyword", + "parent_type": "keyword", + # Type and time-range filters used by the search service. + "type": "keyword", + "timestamp": "integer", + }, + ) From c301e018605adbd2016d86fd595525b55018b1d5 Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 18:06:04 +0000 Subject: [PATCH 08/20] =?UTF-8?q?feat(qdrant):=20Phase=202=20batch=202=20?= =?UTF-8?q?=E2=80=94=20AgentSkill=20+=20Foresight=20collection+converter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds collections 3 and 4 of six. Same TenantAwareQdrantCollectionWithSuffix + BaseQdrantConverter pattern as batch 1. Collections: - agent_skill_collection.py: base name v1_agent_skill. Payload indexes for user_id/group_id/cluster_id (keyword) + maturity_score/confidence (float) for threshold range queries. - foresight_collection.py: base name v1_foresight_record. Payload indexes for user_id/group_id/session_id/parent_id/parent_type/type (keyword) + start_time/end_time (integer, epoch ms). Converters: - agent_skill_qdrant_converter.py: AgentSkillRecord -> PointStruct. Builds the content payload from name + description, truncates to 5000 chars. Coerces optional maturity_score / confidence to 0.0 when absent (Qdrant silently excludes null-valued payloads from range filters, so treating 'unscored' as 'lowest score' keeps them visible to threshold queries). - foresight_qdrant_converter.py: ForesightRecord -> PointStruct. Time-field parser accepts datetime / ISO-8601 / numeric. **Diverges from Milvus template**: numeric inputs above 1e10 are treated as already-milliseconds rather than blindly multiplied by 1000 — the Milvus version corrupts already-ms inputs. content payload is intentionally passed verbatim (incl. None) for downstream sentinel semantics; documented inline. --- .../converter/agent_skill_qdrant_converter.py | 77 ++++++++++ .../converter/foresight_qdrant_converter.py | 142 ++++++++++++++++++ .../qdrant/memory/agent_skill_collection.py | 49 ++++++ .../qdrant/memory/foresight_collection.py | 61 ++++++++ 4 files changed, 329 insertions(+) create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/agent_skill_collection.py create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/foresight_collection.py diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py new file mode 100644 index 00000000..50b8f2b5 --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py @@ -0,0 +1,77 @@ +""" +AgentSkill Qdrant Converter. + +Converts MongoDB ``AgentSkillRecord`` documents into Qdrant ``PointStruct`` +instances for upsert into ``v1_agent_skill``. Vector is the embedding of +name + description (caller-provided via ``source_doc.vector``). +""" + +from qdrant_client.http import models as qmodels + +from core.observation.logger import get_logger +from core.oxm.qdrant.base_converter import BaseQdrantConverter +from infra_layer.adapters.out.persistence.document.memory.agent_skill import ( + AgentSkillRecord, +) +from infra_layer.adapters.out.search.qdrant.memory.agent_skill_collection import ( + AgentSkillCollection, +) + +logger = get_logger(__name__) + + +class AgentSkillQdrantConverter(BaseQdrantConverter[AgentSkillCollection]): + """Converts MongoDB ``AgentSkillRecord`` documents into Qdrant point payloads.""" + + @classmethod + def from_mongo(cls, source_doc: AgentSkillRecord) -> qmodels.PointStruct: + """ + Build a ``PointStruct`` from a MongoDB ``AgentSkillRecord``. + + Raises: + ValueError: when ``source_doc`` is ``None``. + Exception: on any conversion failure (logged + re-raised). + """ + if source_doc is None: + raise ValueError("MongoDB document cannot be empty") + + try: + name = source_doc.name or "" + description = source_doc.description or "" + + # Primary text field: name + newline + description (Milvus parity). + content_field = "\n".join(s for s in [name, description] if s) + + payload = { + "user_id": source_doc.user_id or "", + "group_id": source_doc.group_id or "", + "cluster_id": source_doc.cluster_id or "", + "content": content_field[:5000], + # Coerce optional scores to 0.0 — Qdrant range-filters silently + # exclude ``null``-valued payloads, which would hide scored points + # from threshold queries. Treat "absent" as "lowest score". + "maturity_score": ( + source_doc.maturity_score + if source_doc.maturity_score is not None + else 0.0 + ), + "confidence": ( + source_doc.confidence + if source_doc.confidence is not None + else 0.0 + ), + } + + vector = source_doc.vector if source_doc.vector else [] + + return qmodels.PointStruct( + id=str(source_doc.id), + vector=vector, + payload=payload, + ) + + except Exception as e: + logger.error( + "Failed to convert AgentSkillRecord to Qdrant point: %s", e + ) + raise diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py new file mode 100644 index 00000000..44d37685 --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py @@ -0,0 +1,142 @@ +""" +Foresight Qdrant Converter. + +Converts MongoDB ``v1_foresight_records`` documents to Qdrant ``PointStruct`` +instances for upsert into ``v1_foresight_record``. + +Time fields (``start_time``, ``end_time``) accept ``datetime``, ISO-8601 +strings, or numeric epoch seconds — all normalized to epoch milliseconds +on output (Milvus parity). +""" + +import json +from datetime import datetime +from typing import List, Optional, Union + +from qdrant_client.http import models as qmodels + +from core.observation.logger import get_logger +from core.oxm.qdrant.base_converter import BaseQdrantConverter +from infra_layer.adapters.out.persistence.document.memory.foresight_record import ( + ForesightRecord as MongoForesightRecord, +) +from infra_layer.adapters.out.search.qdrant.memory.foresight_collection import ( + ForesightCollection, +) + +logger = get_logger(__name__) + + +class ForesightQdrantConverter(BaseQdrantConverter[ForesightCollection]): + """Converts MongoDB ``v1_foresight_records`` documents to Qdrant point payloads.""" + + @classmethod + def _parse_time_field( + cls, + time_value: Optional[Union[datetime, str, int, float]], + field_name: str, + doc_id: Optional[str], + ) -> int: + """ + Parse a time field to epoch milliseconds. + + Accepts ``datetime``, ISO-8601 strings, numeric epoch seconds, or + numeric epoch milliseconds — the magnitude guard distinguishes the + two numeric units (values above 1e10 are treated as already-ms, + otherwise multiplied by 1000). This intentionally diverges from the + Milvus template, which always multiplies numeric inputs by 1000 and + thus would corrupt already-ms inputs. + """ + if not time_value: + return 0 + + try: + if isinstance(time_value, datetime): + return int(time_value.timestamp() * 1000) + if isinstance(time_value, str): + dt = datetime.fromisoformat(time_value.replace("Z", "+00:00")) + return int(dt.timestamp() * 1000) + if isinstance(time_value, (int, float)): + # Magnitude guard: 1e10 epoch-seconds ~= year 2286, so any + # numeric > 1e10 is already in milliseconds. + value_ms = time_value if time_value > 1e10 else time_value * 1000 + return int(value_ms) + except Exception as e: + logger.warning( + "Failed to parse %s (doc_id=%s): %s, error: %s", + field_name, doc_id, time_value, e, + ) + + return 0 + + @classmethod + def from_mongo(cls, source_doc: MongoForesightRecord) -> qmodels.PointStruct: + """ + Build a ``PointStruct`` from a MongoDB foresight-record document. + + Raises: + ValueError: when ``source_doc`` is ``None``. + Exception: on any conversion failure (logged + re-raised). + """ + if source_doc is None: + raise ValueError("MongoDB document cannot be empty") + + try: + start_time = cls._parse_time_field( + source_doc.start_time, "start_time", source_doc.id + ) + end_time = cls._parse_time_field( + source_doc.end_time, "end_time", source_doc.id + ) + + search_content = cls._build_search_content(source_doc) + + payload = { + "user_id": source_doc.user_id or "", + "group_id": source_doc.group_id or "", + "session_id": source_doc.session_id or "", + "participants": source_doc.participants or [], + "sender_ids": getattr(source_doc, "sender_ids", []) or [], + "type": getattr(source_doc, "type", None) or "", + "start_time": start_time, + "end_time": end_time, + "duration_days": ( + source_doc.duration_days if source_doc.duration_days else 0 + ), + # ``content`` is intentionally passed through verbatim (incl. + # ``None``) — parity with the Milvus template. Downstream + # search code distinguishes "absent content" from "empty + # content" via the ``None`` sentinel. + "content": source_doc.content, + "evidence": source_doc.evidence or "", + "search_content": search_content, + "parent_type": source_doc.parent_type or "", + "parent_id": ( + str(source_doc.parent_id) if source_doc.parent_id else "" + ), + } + + vector = source_doc.vector if source_doc.vector else [] + + return qmodels.PointStruct( + id=str(source_doc.id), + vector=vector, + payload=payload, + ) + + except Exception as e: + logger.error( + "Failed to convert MongoDB foresight document to Qdrant point: %s", + e, + ) + raise + + @staticmethod + def _build_search_content(source_doc: MongoForesightRecord) -> str: + """Build search content JSON-string from content + evidence fields.""" + text_content: List[str] = [] + if source_doc.content: + text_content.append(source_doc.content) + if source_doc.evidence: + text_content.append(source_doc.evidence) + return json.dumps(text_content, ensure_ascii=False) diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/agent_skill_collection.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/agent_skill_collection.py new file mode 100644 index 00000000..7c59aed9 --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/agent_skill_collection.py @@ -0,0 +1,49 @@ +""" +AgentSkill Qdrant Collection Definition. + +Stores vector embeddings of reusable skill items. The vector represents the +embedding of name + description; ``content`` is the primary searchable text. + +Wire-Layout per point:: + + PointStruct( + id=, + vector=, + payload={ + "user_id": str | "", # agent owner + "group_id": str | "", + "cluster_id": str | "", # MemScene cluster id + "content": str, # name + "\\n" + description, ≤5000 chars + "maturity_score": float, # 0.0–1.0 + "confidence": float, # 0.0–1.0 + }, + ) +""" + +from core.oxm.qdrant.qdrant_collection_base import IndexConfig +from core.tenants.tenantize.oxm.qdrant.tenant_aware_qdrant_collection_with_suffix import ( + TenantAwareQdrantCollectionWithSuffix, +) +from memory_layer.constants import VECTORIZE_DIMENSIONS + + +class AgentSkillCollection(TenantAwareQdrantCollectionWithSuffix): + """V1 Agent Skill Qdrant Collection.""" + + _COLLECTION_NAME = "v1_agent_skill" + + _VECTOR_PARAMS = IndexConfig( + size=VECTORIZE_DIMENSIONS, + distance="cosine", + hnsw_m=16, + hnsw_ef_construct=200, + payload_indexes={ + # Scope filters. + "user_id": "keyword", + "group_id": "keyword", + "cluster_id": "keyword", + # Quality-score filters (range queries for thresholding). + "maturity_score": "float", + "confidence": "float", + }, + ) diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/foresight_collection.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/foresight_collection.py new file mode 100644 index 00000000..ee4d3736 --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/foresight_collection.py @@ -0,0 +1,61 @@ +""" +V1 Foresight Record Qdrant Collection Definition. + +Based on MongoDB ``v1_foresight_records``. Stores only search-essential +fields; full data is retrieved from MongoDB using ``parent_id``. + +Wire-Layout per point:: + + PointStruct( + id=, + vector=, + payload={ + "user_id": str | "", + "group_id": str | "", + "session_id": str | "", + "participants": list[str], + "sender_ids": list[str], + "type": str | "", + "start_time": int, # epoch milliseconds + "end_time": int, # epoch milliseconds + "duration_days": int, + "content": str | None, + "evidence": str | "", + "search_content": str, # JSON-stringified list + "parent_type": str | "", + "parent_id": str | "", + }, + ) +""" + +from core.oxm.qdrant.qdrant_collection_base import IndexConfig +from core.tenants.tenantize.oxm.qdrant.tenant_aware_qdrant_collection_with_suffix import ( + TenantAwareQdrantCollectionWithSuffix, +) +from memory_layer.constants import VECTORIZE_DIMENSIONS + + +class ForesightCollection(TenantAwareQdrantCollectionWithSuffix): + """V1 Foresight Record Qdrant Collection.""" + + _COLLECTION_NAME = "v1_foresight_record" + + _VECTOR_PARAMS = IndexConfig( + size=VECTORIZE_DIMENSIONS, + distance="cosine", + hnsw_m=16, + hnsw_ef_construct=200, + payload_indexes={ + # Scope filters. + "user_id": "keyword", + "group_id": "keyword", + "session_id": "keyword", + # Back-reference filters. + "parent_id": "keyword", + "parent_type": "keyword", + # Type + time-range filters. + "type": "keyword", + "start_time": "integer", + "end_time": "integer", + }, + ) From c3a76830cdd7bd0e13f37fb11bc6c2f730adfba1 Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 18:10:11 +0000 Subject: [PATCH 09/20] =?UTF-8?q?feat(qdrant):=20Phase=202=20batch=203=20?= =?UTF-8?q?=E2=80=94=20AtomicFact=20+=20UserProfile=20collection+converter?= =?UTF-8?q?=20(Phase=202=20complete)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the final two of six Qdrant collections — Phase 2 of the Milvus->Qdrant migration is now structurally complete (6 collections + 6 converters under src/infra_layer/adapters/out/search/qdrant/). Collections: - atomic_fact_collection.py: base name v1_atomic_fact_record. Payload indexes for user_id/group_id/session_id/parent_id/parent_type/type (keyword) + timestamp (integer, epoch ms). - user_profile_collection.py: base name v1_user_profile. No session_id (user-level aggregation). Payload indexes for user_id/group_id/scenario/ item_type (all keyword). Converters: - atomic_fact_qdrant_converter.py: AtomicFactRecord -> PointStruct. Falls back to RawDataType.CONVERSATION when source.type is absent. exc_info on error log. - user_profile_qdrant_converter.py: **diverges from the other converters' return type** — returns List[Dict[str, Any]] (one item per explicit_info / implicit_trait / user_goal entry) for parity with the Milvus counterpart. ProfileIndexer downstream wraps each item into a PointStruct after embedding. Module-level _EXPLICIT_FIELDS and _IMPLICIT_FIELDS constants (Milvus version had them inline). Carries '# type: ignore[override]' on from_mongo with docstring justification. Phase 2 file inventory: src/infra_layer/adapters/out/search/qdrant/ ├── __init__.py ├── memory/ │ ├── __init__.py │ ├── episodic_memory_collection.py │ ├── agent_case_collection.py │ ├── agent_skill_collection.py │ ├── foresight_collection.py │ ├── atomic_fact_collection.py │ └── user_profile_collection.py └── converter/ ├── __init__.py ├── episodic_memory_qdrant_converter.py ├── agent_case_qdrant_converter.py ├── agent_skill_qdrant_converter.py ├── foresight_qdrant_converter.py ├── atomic_fact_qdrant_converter.py └── user_profile_qdrant_converter.py --- .../converter/atomic_fact_qdrant_converter.py | 87 +++++++++ .../user_profile_qdrant_converter.py | 170 ++++++++++++++++++ .../qdrant/memory/atomic_fact_collection.py | 55 ++++++ .../qdrant/memory/user_profile_collection.py | 50 ++++++ 4 files changed, 362 insertions(+) create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/user_profile_qdrant_converter.py create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/atomic_fact_collection.py create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/user_profile_collection.py diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py new file mode 100644 index 00000000..1351072d --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py @@ -0,0 +1,87 @@ +""" +Atomic Fact Qdrant Converter. + +Converts MongoDB ``v1_atomic_fact_records`` documents to Qdrant +``PointStruct`` instances for upsert into ``v1_atomic_fact_record``. +""" + +import json +from typing import List + +from qdrant_client.http import models as qmodels + +from api_specs.memory_types import RawDataType +from core.observation.logger import get_logger +from core.oxm.qdrant.base_converter import BaseQdrantConverter +from infra_layer.adapters.out.persistence.document.memory.atomic_fact_record import ( + AtomicFactRecord as MongoAtomicFactRecord, +) +from infra_layer.adapters.out.search.qdrant.memory.atomic_fact_collection import ( + AtomicFactCollection, +) + +logger = get_logger(__name__) + + +class AtomicFactQdrantConverter(BaseQdrantConverter[AtomicFactCollection]): + """Converts MongoDB ``v1_atomic_fact_records`` to Qdrant point payloads.""" + + @classmethod + def from_mongo(cls, source_doc: MongoAtomicFactRecord) -> qmodels.PointStruct: + """ + Build a ``PointStruct`` from a MongoDB atomic-fact document. + + Raises: + ValueError: when ``source_doc`` is ``None``. + Exception: on any conversion failure (logged + re-raised). + """ + if source_doc is None: + raise ValueError("MongoDB document cannot be empty") + + try: + timestamp_ms = ( + int(source_doc.timestamp.timestamp() * 1000) + if source_doc.timestamp + else 0 + ) + + payload = { + "user_id": source_doc.user_id or "", + "group_id": source_doc.group_id or "", + "session_id": source_doc.session_id or "", + "participants": source_doc.participants or [], + "sender_ids": getattr(source_doc, "sender_ids", []) or [], + "type": ( + getattr(source_doc, "type", None) + or RawDataType.CONVERSATION.value + ), + "timestamp": timestamp_ms, + "parent_type": source_doc.parent_type or "", + "parent_id": ( + str(source_doc.parent_id) if source_doc.parent_id else "" + ), + } + + vector = source_doc.vector if source_doc.vector else [] + + return qmodels.PointStruct( + id=str(source_doc.id), + vector=vector, + payload=payload, + ) + + except Exception as e: + logger.error( + "Failed to convert MongoDB AtomicFact to Qdrant point: %s", + e, + exc_info=True, + ) + raise + + @staticmethod + def _build_search_content(source_doc: MongoAtomicFactRecord) -> str: + """Build search content JSON-string from the atomic_fact text field.""" + text_content: List[str] = [] + if source_doc.atomic_fact: + text_content.append(source_doc.atomic_fact) + return json.dumps(text_content, ensure_ascii=False) diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/user_profile_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/user_profile_qdrant_converter.py new file mode 100644 index 00000000..59bd5fd1 --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/user_profile_qdrant_converter.py @@ -0,0 +1,170 @@ +""" +User Profile Qdrant Converter. + +Converts a single MongoDB ``v1_user_profiles`` document into a **list** of +profile items — one per ``explicit_info`` entry and one per +``implicit_trait``. Each item carries an ``embed_text`` field used by the +ProfileIndexer to generate the actual embedding; the indexer then wraps +each item into a ``PointStruct`` and upserts it. + +Return type intentionally diverges from the other Qdrant converters +(``PointStruct``) — it returns ``List[Dict[str, Any]]`` for parity with the +Milvus counterpart, because the indexer flow expects per-item dicts +(vector is **not yet** set at converter time; that happens downstream). + +The ``from_mongo`` override carries ``# type: ignore[override]`` because of +this intentional contract divergence from the generic ``BaseQdrantConverter`` +signature. The ProfileIndexer downstream is the only known consumer. +""" + +from typing import Any, Dict, List + +from api_specs.memory_types import ScenarioType +from core.observation.logger import get_logger +from core.oxm.mongo.mongo_utils import generate_object_id_str +from core.oxm.qdrant.base_converter import BaseQdrantConverter +from infra_layer.adapters.out.persistence.document.memory.user_profile import ( + UserProfile as MongoUserProfile, +) +from infra_layer.adapters.out.search.qdrant.memory.user_profile_collection import ( + UserProfileCollection, +) + +logger = get_logger(__name__) + + +# Profile-data shape (matches Milvus converter): +# explicit fields contain [{value, level?}] items (skills / responsibilities / interests). +_EXPLICIT_FIELDS = [ + ("hard_skills", "Hard Skill"), + ("soft_skills", "Soft Skill"), + ("work_responsibility", "Work Responsibility"), + ("interests", "Interest"), +] + +# Implicit fields contain [{value}] items (personality / tendencies / values). +_IMPLICIT_FIELDS = [ + ("personality", "Personality"), + ("tendency", "Tendency"), + ("way_of_decision_making", "Decision Making"), + ("motivation_system", "Motivation"), + ("fear_system", "Fear"), + ("value_system", "Value"), +] + + +class UserProfileQdrantConverter(BaseQdrantConverter[UserProfileCollection]): + """Splits a MongoDB ``UserProfile`` doc into per-item Qdrant payload dicts.""" + + @classmethod + def from_mongo( # type: ignore[override] + cls, source_doc: MongoUserProfile + ) -> List[Dict[str, Any]]: + """ + Convert a single ``UserProfile`` doc into a list of per-item payloads. + + The returned list contains one dict per ``explicit_info`` / + ``implicit_trait`` entry. Each dict has: + - ``id``: a freshly generated ObjectId string (the Mongo doc ``id`` + would collide across items because we emit many points from one + source doc). + - All filterable payload fields (user_id, group_id, scenario, + memcell_count, item_type). + - ``embed_text``: the text used by the ProfileIndexer to generate + the embedding vector. The vector is **not** included — the + indexer wraps the dict into ``PointStruct`` after embedding. + + Raises: + ValueError: when ``source_doc`` is ``None``. + Exception: on any conversion failure (logged + re-raised). + """ + if source_doc is None: + raise ValueError("MongoDB document cannot be empty") + + try: + profile_data: Dict[str, Any] = source_doc.profile_data or {} + user_id = source_doc.user_id or "" + group_id = source_doc.group_id or "" + scenario = source_doc.scenario or ScenarioType.SOLO.value + memcell_count = source_doc.memcell_count or 0 + + items: List[Dict[str, Any]] = [] + + def _make_item(embed_text: str, item_type: str) -> Dict[str, Any]: + return { + "id": generate_object_id_str(), + "user_id": user_id, + "group_id": group_id, + "scenario": scenario, + "memcell_count": memcell_count, + "item_type": item_type, + "embed_text": embed_text, + } + + # ProfileMemory format: per-field lists of {value, level?, ...}. + for field_name, label in _EXPLICIT_FIELDS: + for entry in profile_data.get(field_name, []) or []: + value = ( + entry.get("value", "") + if isinstance(entry, dict) + else str(entry) + ) + if not value: + continue + level = entry.get("level", "") if isinstance(entry, dict) else "" + embed_text = ( + f"{label}: {value}" + (f" ({level})" if level else "") + ) + items.append(_make_item(embed_text, "explicit_info")) + + for field_name, label in _IMPLICIT_FIELDS: + for entry in profile_data.get(field_name, []) or []: + value = ( + entry.get("value", "") + if isinstance(entry, dict) + else str(entry) + ) + if not value: + continue + items.append(_make_item(f"{label}: {value}", "implicit_trait")) + + # Legacy format: flat explicit_info[] / implicit_traits[] arrays + # with {category, description} / {trait, description, basis} shape. + for entry in profile_data.get("explicit_info", []) or []: + if not isinstance(entry, dict): + continue + desc = entry.get("description", "") + if not desc: + continue + category = entry.get("category", "") + embed_text = f"{category}: {desc}" if category else desc + items.append(_make_item(embed_text, "explicit_info")) + + for entry in profile_data.get("implicit_traits", []) or []: + if not isinstance(entry, dict): + continue + desc = entry.get("description", "") + if not desc: + continue + trait_name = entry.get("trait") or entry.get("trait_name", "") + embed_text = f"{trait_name}: {desc}" if trait_name else desc + if entry.get("basis"): + embed_text += f". {entry['basis']}" + items.append(_make_item(embed_text, "implicit_trait")) + + # Single user-goal string. + user_goal = profile_data.get("user_goal") + if user_goal and isinstance(user_goal, str) and user_goal.strip(): + items.append( + _make_item(f"Goal: {user_goal.strip()}", "explicit_info") + ) + + return items + + except Exception as e: + logger.error( + "Failed to convert MongoDB UserProfile to Qdrant items: %s", + e, + exc_info=True, + ) + raise diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/atomic_fact_collection.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/atomic_fact_collection.py new file mode 100644 index 00000000..bbcc1d65 --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/atomic_fact_collection.py @@ -0,0 +1,55 @@ +""" +V1 Atomic Fact Record Qdrant Collection Definition. + +Based on MongoDB ``v1_atomic_fact_records``. Stores only search-essential +fields; full data is retrieved from MongoDB using ``parent_id``. + +Wire-Layout per point:: + + PointStruct( + id=, + vector=, + payload={ + "user_id": str | "", + "group_id": str | "", + "session_id": str | "", + "participants": list[str], + "sender_ids": list[str], + "type": str, # default RawDataType.CONVERSATION + "timestamp": int, # epoch milliseconds + "parent_type": str | "", + "parent_id": str | "", + }, + ) +""" + +from core.oxm.qdrant.qdrant_collection_base import IndexConfig +from core.tenants.tenantize.oxm.qdrant.tenant_aware_qdrant_collection_with_suffix import ( + TenantAwareQdrantCollectionWithSuffix, +) +from memory_layer.constants import VECTORIZE_DIMENSIONS + + +class AtomicFactCollection(TenantAwareQdrantCollectionWithSuffix): + """V1 Atomic Fact Record Qdrant Collection.""" + + _COLLECTION_NAME = "v1_atomic_fact_record" + + _VECTOR_PARAMS = IndexConfig( + size=VECTORIZE_DIMENSIONS, + distance="cosine", + hnsw_m=16, + hnsw_ef_construct=200, + payload_indexes={ + # Scope filters. + "user_id": "keyword", + "group_id": "keyword", + "session_id": "keyword", + # Back-reference filters. + "parent_id": "keyword", + "parent_type": "keyword", + # Type + time-range filters. + "type": "keyword", + "timestamp": "integer", + }, + ) diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/user_profile_collection.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/user_profile_collection.py new file mode 100644 index 00000000..aa60533e --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/user_profile_collection.py @@ -0,0 +1,50 @@ +""" +V1 User Profile Qdrant Collection Definition. + +Based on MongoDB ``v1_user_profiles``. Stores per-item embeddings — one +point per ``explicit_info`` entry and one per ``implicit_trait`` (the +converter splits a single Mongo doc into many points). UserProfile does +**not** have ``session_id`` (user-level aggregation). + +Wire-Layout per point:: + + PointStruct( + id=, + vector=, + payload={ + "user_id": str, # required + "group_id": str | "", + "scenario": str, # "solo" | "team" + "memcell_count": int, + "item_type": str, # "explicit_info" | "implicit_trait" + "embed_text": str, # text used to generate the vector + }, + ) +""" + +from core.oxm.qdrant.qdrant_collection_base import IndexConfig +from core.tenants.tenantize.oxm.qdrant.tenant_aware_qdrant_collection_with_suffix import ( + TenantAwareQdrantCollectionWithSuffix, +) +from memory_layer.constants import VECTORIZE_DIMENSIONS + + +class UserProfileCollection(TenantAwareQdrantCollectionWithSuffix): + """V1 User Profile Qdrant Collection.""" + + _COLLECTION_NAME = "v1_user_profile" + + _VECTOR_PARAMS = IndexConfig( + size=VECTORIZE_DIMENSIONS, + distance="cosine", + hnsw_m=16, + hnsw_ef_construct=200, + payload_indexes={ + # Scope filters. + "user_id": "keyword", + "group_id": "keyword", + # Cohort filters. + "scenario": "keyword", + "item_type": "keyword", + }, + ) From 753b5dfb5f69c1a362d070f28bd818197b084b16 Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 18:16:00 +0000 Subject: [PATCH 10/20] =?UTF-8?q?feat(qdrant):=20Phase=202.5=20batch=201?= =?UTF-8?q?=20=E2=80=94=20AgentSkill=20+=20EpisodicMemory=20repositories?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First 2 of 6 Qdrant repositories, mirroring the surface of the corresponding Milvus repositories so the search-service layer can swap backends via the VECTOR_STORE_BACKEND env flag. Both repositories: - Inherit from BaseQdrantRepository[], registered as @repository(name='..._qdrant_repository', primary=False). - Build filters as qmodels.Filter(must=[FieldCondition(...)]) using MatchValue / MatchAny / Range instead of Milvus' string expression syntax. Filter-construction is fully typed — no injection vector remains in the search path. - Honour the MAGIC_ALL sentinel for user_id / group_id with the same semantics as the Milvus repositories. - Use a two-stage score gating pattern (server-side score_threshold via Qdrant + client-side post-filter at the caller's hard threshold); the rationale is documented inline so future readers don't read it as a duplicated check. AgentSkillQdrantRepository: - vector_search() with maturity_threshold / confidence_threshold range filters plus optional cluster_id / group_ids. - delete_by_cluster_id() — uses scroll() for a best-effort delete count (Qdrant's filter-based delete doesn't return one) then deletes via FilterSelector(filter=...). EpisodicMemoryQdrantRepository: - create_and_save_episodic_memory() — convenience constructor that builds a PointStruct and upserts, returns the same lightweight summary dict as the Milvus repository for caller parity. - vector_search() with full scope + time-range filters. - delete_by_filters() — batch delete by user_id/group_id/time-range; same MAGIC_ALL guard and 'at least one filter required' contract as the Milvus repository. --- .../agent_skill_qdrant_repository.py | 234 +++++++++++++ .../episodic_memory_qdrant_repository.py | 316 ++++++++++++++++++ 2 files changed, 550 insertions(+) create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_skill_qdrant_repository.py create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_skill_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_skill_qdrant_repository.py new file mode 100644 index 00000000..8cf30cb1 --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_skill_qdrant_repository.py @@ -0,0 +1,234 @@ +""" +AgentSkill Qdrant Repository. + +Provides vector search for agent skill records via Qdrant. Supports +cluster-level delete for the replace pattern used by AgentSkillExtractor. + +Filter expressions are built as ``qmodels.Filter(must=[FieldCondition...])`` +instead of the Milvus string-expression syntax — same semantic, native +typing. +""" + +import asyncio +from functools import partial +from typing import Any, Dict, List, Optional + +from qdrant_client.http import models as qmodels + +from core.di.decorators import repository +from core.observation.logger import get_logger +from core.oxm.constants import MAGIC_ALL +from core.oxm.qdrant.base_repository import BaseQdrantRepository +from infra_layer.adapters.out.search.qdrant.memory.agent_skill_collection import ( + AgentSkillCollection, +) + +logger = get_logger(__name__) + + +@repository("agent_skill_qdrant_repository", primary=False) +class AgentSkillQdrantRepository(BaseQdrantRepository[AgentSkillCollection]): + """ + AgentSkill Qdrant Repository. + + Supports vector similarity search over reusable skill items, plus + cluster-level deletion for the replace pattern. + """ + + def __init__(self) -> None: + super().__init__(AgentSkillCollection) + + # ----------------------------------------------------------------- search + + async def vector_search( + self, + query_vector: List[float], + group_ids: Optional[List[str]] = None, + user_id: Optional[str] = None, + cluster_id: Optional[str] = None, + limit: int = 10, + score_threshold: float = 0.0, + radius: Optional[float] = None, + maturity_threshold: Optional[float] = 0.6, + confidence_threshold: Optional[float] = None, + ) -> List[Dict[str, Any]]: + """ + Vector similarity search over agent skill items. + + Args: + query_vector: Query embedding vector. + group_ids: Group ID list filter (``None`` to skip). + user_id: User ID filter. ``MAGIC_ALL`` disables the filter. + cluster_id: Filter by MemScene cluster ID. + limit: Max results to return. + score_threshold: Minimum Cosine similarity score (applied + post-search at the wrapper level; Qdrant also gets it via + ``score_threshold`` for early stopping). + radius: Explicit Cosine similarity threshold (>-1.0 enables it). + maturity_threshold: Minimum maturity score (0.0–1.0). ``None`` + skips the filter (include all maturities). + confidence_threshold: Minimum confidence score (0.0–1.0). ``None`` + skips the filter. + + Returns: + List of result dicts with the same shape as the Milvus + repository for caller parity. + """ + try: + conditions: List[qmodels.FieldCondition] = [] + + if maturity_threshold is not None: + conditions.append( + qmodels.FieldCondition( + key="maturity_score", + range=qmodels.Range(gte=maturity_threshold), + ) + ) + + if confidence_threshold is not None: + conditions.append( + qmodels.FieldCondition( + key="confidence", + range=qmodels.Range(gte=confidence_threshold), + ) + ) + + if user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id or ""), + ) + ) + + if group_ids: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchAny(any=list(group_ids)), + ) + ) + + if cluster_id: + conditions.append( + qmodels.FieldCondition( + key="cluster_id", + match=qmodels.MatchValue(value=cluster_id), + ) + ) + + query_filter = qmodels.Filter(must=conditions) if conditions else None + + ef_value = max(128, limit * 2) + # Two-stage score gating (parity with Milvus repository): + # - ``effective_threshold`` is the wider net we pass to Qdrant + # server-side via ``score_threshold`` (uses ``radius`` if it + # was explicitly set, otherwise ``score_threshold``). + # - The client-side ``point.score < score_threshold`` post-filter + # enforces the hard caller-facing minimum. This lets a caller + # widen the recall via ``radius`` while still requiring a + # stricter cut-off in the returned list. + effective_threshold = ( + radius if (radius is not None and radius > -1.0) else score_threshold + ) + + scored_points = await self.search( + query_vector=query_vector, + limit=limit, + query_filter=query_filter, + with_payload=True, + with_vectors=False, + score_threshold=( + effective_threshold if effective_threshold > 0 else None + ), + search_params=qmodels.SearchParams(hnsw_ef=ef_value), + ) + + search_results: List[Dict[str, Any]] = [] + for point in scored_points: + if point.score < score_threshold: + continue + payload = point.payload or {} + search_results.append( + { + "id": str(point.id), + "score": float(point.score), + "user_id": payload.get("user_id", ""), + "group_id": payload.get("group_id"), + "cluster_id": payload.get("cluster_id"), + "content": payload.get("content", ""), + } + ) + + logger.debug( + "AgentSkill Qdrant search: found %d results", len(search_results) + ) + return search_results + + except Exception as e: + logger.error("AgentSkill Qdrant search failed: %s", e) + raise + + # -------------------------------------------------------- domain deletes + + async def delete_by_cluster_id(self, cluster_id: str) -> int: + """ + Delete all Qdrant points whose ``cluster_id`` payload matches. + + Used by the AgentSkillExtractor's replace pattern: drop all skills + of a cluster, then re-upsert the freshly extracted skills. + + Args: + cluster_id: MemScene cluster ID. + + Returns: + Number of points deleted (best-effort; Qdrant doesn't return an + exact count, so we count via a prior scroll). + """ + try: + filter_ = qmodels.Filter( + must=[ + qmodels.FieldCondition( + key="cluster_id", + match=qmodels.MatchValue(value=cluster_id), + ) + ] + ) + + client = self.collection.client() + name = self.collection.name + + # Best-effort count via scroll (Qdrant ``delete(filter=)`` doesn't + # return the deleted-point count). Same idiom as the Milvus + # repository which queries first, then deletes. + scrolled, _ = await asyncio.to_thread( + partial( + client.scroll, + collection_name=name, + scroll_filter=filter_, + limit=10_000, + with_payload=False, + with_vectors=False, + ) + ) + count = len(scrolled) + + if count > 0: + await asyncio.to_thread( + partial( + client.delete, + collection_name=name, + points_selector=qmodels.FilterSelector(filter=filter_), + wait=True, + ) + ) + logger.debug( + "Deleted %d Qdrant points for cluster=%s", count, cluster_id + ) + return count + + except Exception as e: + logger.error( + "Failed to delete Qdrant points for cluster=%s: %s", cluster_id, e + ) + return 0 diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py new file mode 100644 index 00000000..d08159e6 --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py @@ -0,0 +1,316 @@ +""" +Episodic Memory Qdrant Repository. + +V1 simplified repository for vector semantic retrieval. Only stores +search-essential fields in Qdrant; full data is fetched from MongoDB via +``parent_id`` back-reference. + +Mirrors the surface of the Milvus counterpart for caller parity, but uses +native Qdrant filtering (``qmodels.Filter(must=[FieldCondition...])``) +instead of Milvus' string expression syntax. +""" + +import asyncio +import json +from datetime import datetime +from functools import partial +from typing import Any, Dict, List, Optional + +from qdrant_client.http import models as qmodels + +from core.di.decorators import repository +from core.observation.logger import get_logger +from core.oxm.constants import MAGIC_ALL +from core.oxm.qdrant.base_repository import BaseQdrantRepository +from infra_layer.adapters.out.search.qdrant.memory.episodic_memory_collection import ( + EpisodicMemoryCollection, +) + +logger = get_logger(__name__) + + +@repository("episodic_memory_qdrant_repository", primary=False) +class EpisodicMemoryQdrantRepository(BaseQdrantRepository[EpisodicMemoryCollection]): + """V1 simplified Qdrant repository for episodic memory.""" + + def __init__(self) -> None: + super().__init__(EpisodicMemoryCollection) + + # ===================================== Document creation / management + + async def create_and_save_episodic_memory( + self, + id: str, + user_id: str, + timestamp: datetime, + episode: str, + search_content: List[str], + vector: List[float], + title: Optional[str] = None, + summary: Optional[str] = None, + group_id: Optional[str] = None, + participants: Optional[List[str]] = None, + sender_ids: Optional[List[str]] = None, + event_type: Optional[str] = None, + subject: Optional[str] = None, + parent_type: Optional[str] = None, + parent_id: Optional[str] = None, + ) -> Dict[str, Any]: + """ + High-level convenience constructor: build a ``PointStruct`` and upsert. + + Returns: + A small summary dict (id, user_id, timestamp, episode, + search_content) — same shape as the Milvus repository to keep + callers untouched at cutover. + """ + try: + payload = { + "user_id": user_id or "", + "group_id": group_id or "", + "session_id": "", # not provided by this entry point + "participants": participants or [], + "sender_ids": sender_ids or [], + "type": event_type or "", + "timestamp": int(timestamp.timestamp() * 1000), + "episode": episode, + "search_content": json.dumps(search_content, ensure_ascii=False), + "parent_type": parent_type or "", + "parent_id": parent_id or "", + } + + await self.upsert( + qmodels.PointStruct(id=id, vector=vector, payload=payload) + ) + + logger.debug( + "Episodic memory point upserted: id=%s, user_id=%s", id, user_id + ) + + return { + "id": id, + "user_id": user_id, + "timestamp": timestamp, + "episode": episode, + "search_content": search_content, + } + + except Exception as e: + logger.error( + "Failed to create episodic memory point: id=%s, error=%s", id, e + ) + raise + + # ============================================================ search + + async def vector_search( + self, + query_vector: List[float], + user_id: Optional[str] = None, + group_ids: Optional[List[str]] = None, + session_id: Optional[str] = None, + parent_type: Optional[str] = None, + parent_id: Optional[str] = None, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, + limit: int = 10, + score_threshold: float = 0.0, + radius: Optional[float] = None, + ) -> List[Dict[str, Any]]: + """Vector similarity search with optional scope + time-range filters.""" + try: + conditions: List[qmodels.FieldCondition] = [] + + if user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id or ""), + ) + ) + + if group_ids: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchAny(any=list(group_ids)), + ) + ) + + if session_id: + conditions.append( + qmodels.FieldCondition( + key="session_id", + match=qmodels.MatchValue(value=session_id), + ) + ) + + if parent_type: + conditions.append( + qmodels.FieldCondition( + key="parent_type", + match=qmodels.MatchValue(value=parent_type), + ) + ) + + if parent_id: + conditions.append( + qmodels.FieldCondition( + key="parent_id", + match=qmodels.MatchValue(value=parent_id), + ) + ) + + time_range: Dict[str, int] = {} + if start_time: + time_range["gte"] = int(start_time.timestamp() * 1000) + if end_time: + time_range["lte"] = int(end_time.timestamp() * 1000) + if time_range: + conditions.append( + qmodels.FieldCondition( + key="timestamp", + range=qmodels.Range(**time_range), + ) + ) + + query_filter = qmodels.Filter(must=conditions) if conditions else None + ef_value = max(128, limit * 2) + # Two-stage score gating (parity with Milvus repository): + # - ``effective_threshold`` is the wider net passed to Qdrant + # server-side via ``score_threshold`` (``radius`` overrides if + # explicitly set above -1.0; otherwise ``score_threshold``). + # - The client-side ``point.score < score_threshold`` post-filter + # enforces the hard caller-facing minimum, allowing callers + # to widen the recall via ``radius`` while keeping a stricter + # cut-off in the returned list. + effective_threshold = ( + radius if (radius is not None and radius > -1.0) else score_threshold + ) + + scored_points = await self.search( + query_vector=query_vector, + limit=limit, + query_filter=query_filter, + with_payload=True, + with_vectors=False, + score_threshold=( + effective_threshold if effective_threshold > 0 else None + ), + search_params=qmodels.SearchParams(hnsw_ef=ef_value), + ) + + search_results: List[Dict[str, Any]] = [] + for point in scored_points: + if point.score < score_threshold: + continue + payload = point.payload or {} + search_results.append( + { + "id": str(point.id), + "score": float(point.score), + "user_id": payload.get("user_id"), + "group_id": payload.get("group_id"), + "session_id": payload.get("session_id"), + "participants": payload.get("participants"), + "timestamp": payload.get("timestamp"), + "parent_type": payload.get("parent_type"), + "parent_id": payload.get("parent_id"), + "type": payload.get("type"), + "episode": payload.get("episode"), + } + ) + + logger.debug( + "EpisodicMemory Qdrant search: found %d results", len(search_results) + ) + return search_results + + except Exception as e: + logger.error("EpisodicMemory Qdrant search failed: %s", e) + raise + + # ========================================================== deletion + + async def delete_by_filters( + self, + user_id: Optional[str] = MAGIC_ALL, + group_id: Optional[str] = MAGIC_ALL, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, + ) -> int: + """ + Batch delete by filter combination. + + At least one filter (other than ``MAGIC_ALL`` sentinels) must be + provided, matching the Milvus repository's guard. + """ + try: + conditions: List[qmodels.FieldCondition] = [] + + if user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id or ""), + ) + ) + if group_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchValue(value=group_id or ""), + ) + ) + + time_range: Dict[str, int] = {} + if start_time: + time_range["gte"] = int(start_time.timestamp() * 1000) + if end_time: + time_range["lte"] = int(end_time.timestamp() * 1000) + if time_range: + conditions.append( + qmodels.FieldCondition( + key="timestamp", + range=qmodels.Range(**time_range), + ) + ) + + if not conditions: + raise ValueError("At least one filter condition must be provided") + + filter_ = qmodels.Filter(must=conditions) + client = self.collection.client() + name = self.collection.name + + # Count first (Qdrant ``delete(filter=)`` doesn't return a count). + scrolled, _ = await asyncio.to_thread( + partial( + client.scroll, + collection_name=name, + scroll_filter=filter_, + limit=10_000, + with_payload=False, + with_vectors=False, + ) + ) + delete_count = len(scrolled) + + if delete_count > 0: + await asyncio.to_thread( + partial( + client.delete, + collection_name=name, + points_selector=qmodels.FilterSelector(filter=filter_), + wait=True, + ) + ) + + logger.debug( + "Batch deleted episodic memories: deleted %d points", delete_count + ) + return delete_count + + except Exception as e: + logger.error("Failed to batch delete episodic memories: %s", e) + raise From 7a8bb2893208ce27804e1445d0957939145f1111 Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 18:39:22 +0000 Subject: [PATCH 11/20] fix(qdrant): address all 30 CodeRabbit review findings Resolves the full set of findings from a CodeRabbit code review on feature/qdrant-adapter (1 critical, 10 major, 19 minor). **Critical** - episodic_memory_qdrant_repository.delete_by_filters: scroll with hard- coded limit=10_000 could undercount large tenants. Replaced with an exact client.count() call so the returned delete-count reflects the full set. **Major** - agent_skill_qdrant_repository.delete_by_cluster_id: same scroll-limit bug. Same fix (exact count + filter delete) and re-raise on error. - episodic_memory_qdrant_repository.vector_search: two-stage score gating now uses min(radius, score_threshold) for server-side filtering so a wider radius doesn't accidentally make the server cut stricter than the client-side post-filter. - AgentCase / AgentSkill / EpisodicMemory / Foresight / AtomicFact converters: explicit non-empty vector validation (raise instead of silently writing an empty list) and explicit id None-guard (no more str(None) -> 'None' point ids). - foresight_qdrant_converter._parse_time_field: 'if not time_value' treated epoch 0 as missing. Now 'if time_value is None'. - atomic_fact_qdrant_converter: _build_search_content is now actually written into the point payload (was dead code); vector access uses getattr defensive; type fallback only when source.type is None rather than any falsy value. - base_repository.find_by_id / find_by_ids / delete_by_id: stop swallowing all exceptions. Errors are logged and re-raised so callers can distinguish 'not found' from operational failures. Behaviourally consistent with the rest of the base methods (upsert/delete_batch/ search) that already raised on failure. - qdrant_collection_base.drop: log + re-raise instead of swallow. - config_utils._load_qdrant_env and qdrant_client_factory.get_qdrant_config: safe int(QDRANT_PORT) with try/except + TCP range guard (1-65535). - qdrant_client_factory: URL assembly preserves an already-schemed host verbatim (e.g. 'https://my-qdrant.cloud') instead of force-prefixing http:// and double-appending the port. - qdrant_client_factory.get_client: threading.Lock with double-checked locking eliminates the cache-miss race that could create duplicate QdrantClient instances under concurrent FastAPI requests. **Minor** - 6x collection docstrings: 'dim=1024' -> 'dim=VECTORIZE_DIMENSIONS' (no more drift if the constant changes). - 2x ValueError messages: 'cannot be empty' -> 'cannot be None' to match the actual 'is None' guard. - config_utils.get_qdrant_connection_cache_key: api_key.encode now tolerates bytes/str/other. - agent_skill_qdrant_repository.vector_search: 'user_id is None' now skips the filter entirely instead of matching the empty string. - qdrant_client_factory.get_named_client: cache key normalized via .lower() so 'Default'/'DEFAULT'/'default' share one client. - qdrant_client_factory.get_qdrant_config: 'https_raw' now uses the _env helper consistently with the other env vars. - 2x qdrant_collection_base: 'assert cfg is not None' replaced with explicit RuntimeError so the guard survives python -O. Total: 17 files changed, ~30 distinct fixes. --- .../core/component/qdrant_client_factory.py | 130 +++++++++++------- .../src/core/oxm/qdrant/base_repository.py | 36 +++-- .../core/oxm/qdrant/qdrant_collection_base.py | 19 ++- .../tenantize/oxm/qdrant/config_utils.py | 26 +++- .../converter/agent_case_qdrant_converter.py | 13 +- .../converter/agent_skill_qdrant_converter.py | 13 +- .../converter/atomic_fact_qdrant_converter.py | 27 +++- .../episodic_memory_qdrant_converter.py | 11 +- .../converter/foresight_qdrant_converter.py | 15 +- .../qdrant/memory/agent_case_collection.py | 2 +- .../qdrant/memory/agent_skill_collection.py | 2 +- .../qdrant/memory/atomic_fact_collection.py | 2 +- .../memory/episodic_memory_collection.py | 2 +- .../qdrant/memory/foresight_collection.py | 2 +- .../qdrant/memory/user_profile_collection.py | 2 +- .../agent_skill_qdrant_repository.py | 30 ++-- .../episodic_memory_qdrant_repository.py | 39 +++--- 17 files changed, 253 insertions(+), 118 deletions(-) diff --git a/methods/evermemos/src/core/component/qdrant_client_factory.py b/methods/evermemos/src/core/component/qdrant_client_factory.py index f7dea3ee..8025846a 100644 --- a/methods/evermemos/src/core/component/qdrant_client_factory.py +++ b/methods/evermemos/src/core/component/qdrant_client_factory.py @@ -8,6 +8,7 @@ """ import os +import threading from typing import Dict, Optional from qdrant_client import QdrantClient @@ -56,23 +57,49 @@ def _env(name: str, default: Optional[str] = None) -> str: return os.getenv(key, "") return os.getenv(key, default) + def _parse_port(name: str, default: int) -> int: + """Parse a numeric port env var, falling back to ``default`` on invalid input.""" + raw = _env(name, str(default)) + try: + value = int(raw) + except (TypeError, ValueError): + logger.warning( + "Invalid %s value %r — falling back to default %d", name, raw, default + ) + return default + if not (1 <= value <= 65535): + logger.warning( + "%s value %d out of TCP range 1-65535 — falling back to default %d", + name, value, default, + ) + return default + return value + host = _env("QDRANT_HOST", "localhost") - port = int(_env("QDRANT_PORT", "6333")) - grpc_port = int(_env("QDRANT_GRPC_PORT", "6334")) + port = _parse_port("QDRANT_PORT", 6333) + grpc_port = _parse_port("QDRANT_GRPC_PORT", 6334) # api_key / https sind explizit None wenn env nicht gesetzt — so kann # qdrant-client die Defaults / URL-Scheme-Detection selbst uebernehmen. api_key_raw = _env("QDRANT_API_KEY") api_key: Optional[str] = api_key_raw or None - https_raw = os.getenv(f"{prefix.upper()}_QDRANT_HTTPS" if prefix else "QDRANT_HTTPS") - https: Optional[bool] = _truthy(https_raw) if https_raw is not None else None + https_raw = _env("QDRANT_HTTPS") + https: Optional[bool] = _truthy(https_raw) if https_raw else None prefer_grpc = _truthy(_env("QDRANT_PREFER_GRPC", "false")) - timeout = int(_env("QDRANT_TIMEOUT", "30")) - - # URL-Assembly: wenn https explizit gesetzt, halte die Praeferenz. Sonst http. - scheme = "https" if https else "http" - if host.startswith("http://") or host.startswith("https://"): - url = f"{host}:{port}" + try: + timeout = int(_env("QDRANT_TIMEOUT", "30")) + except (TypeError, ValueError): + logger.warning("Invalid QDRANT_TIMEOUT value — falling back to 30") + timeout = 30 + + # URL-Assembly. If host already carries a scheme/port, take it verbatim — the + # caller has explicitly chosen what to connect to. Otherwise build the URL + # from scheme + host + port; when ``https`` is unset (None) the qdrant-client + # SDK does its own scheme inference, so we still default to "http" in the URL + # string for the log/config dict only. + if host.startswith(("http://", "https://")): + url = host if ":" in host.split("//", 1)[1] else f"{host}:{port}" else: + scheme = "https" if https else "http" url = f"{scheme}://{host}:{port}" config = { @@ -115,12 +142,10 @@ class QdrantClientFactory: def __init__(self) -> None: self._clients: Dict[str, QdrantClient] = {} self._default_config: Optional[dict] = None - # Note: typischer use-case ist single-init in lifespan-startup, daher - # kein Lock noetig. Bei concurrent access aus FastAPI-Coroutines auf - # verschiedene named clients kann theoretisch eine Race entstehen - # (beide passen den cache-miss-check, beide erstellen Client, einer - # ueberschreibt den anderen im dict). Fix in Phase 2 via threading.Lock - # falls Concurrent-Pattern auftritt. + # threading.Lock guards the check-then-create cache miss path so two + # concurrent FastAPI requests for the same alias don't both build a + # QdrantClient (with one silently overwriting the other). + self._lock = threading.Lock() logger.info("QdrantClientFactory initialized") def get_client( @@ -157,37 +182,47 @@ def get_client( Returns: ``QdrantClient`` (gecached pro ``alias``). """ - cache_key = alias or "default" + # Normalize cache key so that ``default``, ``Default`` and ``DEFAULT`` + # all share the same cached client. + cache_key = (alias or "default").lower() + + # Fast-path without lock acquisition. if cache_key in self._clients: return self._clients[cache_key] - client_kwargs: dict = { - "prefer_grpc": prefer_grpc, - "grpc_port": grpc_port, - "timeout": timeout, - } - if api_key: - client_kwargs["api_key"] = api_key - if https is not None: - client_kwargs["https"] = https - if url: - client_kwargs["url"] = url - else: - client_kwargs["host"] = host or "localhost" - client_kwargs["port"] = port - - client_kwargs.update(kwargs) - - client = QdrantClient(**client_kwargs) - self._clients[cache_key] = client - logger.info( - "Qdrant client created and cached: %s (alias=%s, prefer_grpc=%s, https=%s)", - url or f"{client_kwargs.get('host')}:{port}", - cache_key, - prefer_grpc, - https, - ) - return client + with self._lock: + # Double-checked locking: re-verify under the lock so concurrent + # waiters don't all build a new client. + if cache_key in self._clients: + return self._clients[cache_key] + + client_kwargs: dict = { + "prefer_grpc": prefer_grpc, + "grpc_port": grpc_port, + "timeout": timeout, + } + if api_key: + client_kwargs["api_key"] = api_key + if https is not None: + client_kwargs["https"] = https + if url: + client_kwargs["url"] = url + else: + client_kwargs["host"] = host or "localhost" + client_kwargs["port"] = port + + client_kwargs.update(kwargs) + + client = QdrantClient(**client_kwargs) + self._clients[cache_key] = client + logger.info( + "Qdrant client created and cached: %s (alias=%s, prefer_grpc=%s, https=%s)", + url or f"{client_kwargs.get('host')}:{port}", + cache_key, + prefer_grpc, + https, + ) + return client def get_default_client(self) -> QdrantClient: """Get default Qdrant client basierend auf Env-Konfiguration.""" @@ -216,11 +251,12 @@ def get_named_client(self, name: str) -> QdrantClient: Returns: ``QdrantClient`` (gecached unter ``name``). """ - if name.lower() == "default": + normalized = name.lower() + if normalized == "default": return self.get_default_client() cfg = get_qdrant_config(prefix=name) - logger.info("Loading named Qdrant config [name=%s]: %s", name, cfg["url"]) + logger.info("Loading named Qdrant config [name=%s]: %s", normalized, cfg["url"]) return self.get_client( url=cfg["url"], @@ -229,7 +265,7 @@ def get_named_client(self, name: str) -> QdrantClient: prefer_grpc=cfg["prefer_grpc"], grpc_port=cfg["grpc_port"], timeout=cfg["timeout"], - alias=name, + alias=normalized, ) def close_all_clients(self) -> None: diff --git a/methods/evermemos/src/core/oxm/qdrant/base_repository.py b/methods/evermemos/src/core/oxm/qdrant/base_repository.py index 121c3eb7..e91c4ebc 100644 --- a/methods/evermemos/src/core/oxm/qdrant/base_repository.py +++ b/methods/evermemos/src/core/oxm/qdrant/base_repository.py @@ -122,7 +122,9 @@ async def find_by_id( Retrieve a single point by id. Returns ``None`` if not found. Qdrant accepts both ``int`` and ``str`` (UUID) point ids — pass - whichever id type was used at upsert time. + whichever id type was used at upsert time. Operational errors + (network, auth, malformed id type) are logged and re-raised; only + the legitimate "not found" case yields ``None``. """ try: records = await asyncio.to_thread( @@ -132,7 +134,6 @@ async def find_by_id( with_payload, with_vectors, ) - return records[0] if records else None except Exception as e: logger.error( "Qdrant find_by_id failed [%s, id=%s]: %s", @@ -140,7 +141,8 @@ async def find_by_id( point_id, e, ) - return None + raise + return records[0] if records else None async def find_by_ids( self, @@ -148,7 +150,13 @@ async def find_by_ids( with_payload: bool = True, with_vectors: bool = False, ) -> List[qmodels.Record]: - """Batch retrieval by ids. Order of result is not guaranteed.""" + """ + Batch retrieval by ids. Order of result is not guaranteed. + + Returns an empty list when none of the ids exist; raises on any + operational error so callers can distinguish "all-missing" from a + retrieval failure. + """ try: return await asyncio.to_thread( self.collection.client().retrieve, @@ -164,20 +172,22 @@ async def find_by_ids( len(point_ids), e, ) - return [] + raise async def delete_by_id( self, point_id: Any, wait: bool = True, ) -> bool: - """Delete a single point. Returns ``True`` on success.""" + """ + Delete a single point. Returns ``True`` on a successful round-trip. + + Operational errors are logged and re-raised (consistent with + ``upsert`` / ``delete_batch``); the ``bool`` return type is kept + for caller-parity with the Milvus repository. + """ try: await asyncio.to_thread(self.collection.delete, [point_id], wait) - logger.debug( - "Qdrant delete successful [%s]: %s", self.model_name, point_id - ) - return True except Exception as e: logger.error( "Qdrant delete failed [%s, id=%s]: %s", @@ -185,7 +195,11 @@ async def delete_by_id( point_id, e, ) - return False + raise + logger.debug( + "Qdrant delete successful [%s]: %s", self.model_name, point_id + ) + return True async def delete_batch( self, diff --git a/methods/evermemos/src/core/oxm/qdrant/qdrant_collection_base.py b/methods/evermemos/src/core/oxm/qdrant/qdrant_collection_base.py index 9c6c6ca6..bd5039f6 100644 --- a/methods/evermemos/src/core/oxm/qdrant/qdrant_collection_base.py +++ b/methods/evermemos/src/core/oxm/qdrant/qdrant_collection_base.py @@ -211,7 +211,13 @@ def ensure_collection(self) -> None: return cfg = self._VECTOR_PARAMS - assert cfg is not None # guarded by __init__ + # ``__init__`` already enforces this — explicit check guards against + # subclasses that override ``__init__`` without invoking ``super``, + # and survives ``python -O`` (where ``assert`` is stripped). + if cfg is None: + raise RuntimeError( + f"{self.__class__.__name__}._VECTOR_PARAMS is None" + ) logger.info( "Creating Qdrant collection '%s' (size=%d, distance=%s, on_disk=%s)", self.name, @@ -335,13 +341,20 @@ def delete( ) def drop(self) -> None: - """Drop the underlying Qdrant collection (DANGEROUS — irreversible).""" + """ + Drop the underlying Qdrant collection (DANGEROUS — irreversible). + + Errors (network, auth, permission) are logged and re-raised so the + caller can react. Use ``exists()`` beforehand to handle the + already-absent case explicitly without relying on swallowed errors. + """ try: self.client().delete_collection(collection_name=self.name) logger.info("Dropped Qdrant collection '%s'", self.name) except Exception as e: # noqa: BLE001 logger.warning( - "Failed to drop Qdrant collection '%s' (may not exist): %s", + "Failed to drop Qdrant collection '%s': %s", self.name, e, ) + raise diff --git a/methods/evermemos/src/core/tenants/tenantize/oxm/qdrant/config_utils.py b/methods/evermemos/src/core/tenants/tenantize/oxm/qdrant/config_utils.py index 8571eaf8..649e7fdf 100644 --- a/methods/evermemos/src/core/tenants/tenantize/oxm/qdrant/config_utils.py +++ b/methods/evermemos/src/core/tenants/tenantize/oxm/qdrant/config_utils.py @@ -115,8 +115,13 @@ def get_qdrant_connection_cache_key(config: Dict[str, Any]) -> str: api_key = config.get("api_key") if api_key: - # Hash the api_key fingerprint, not the raw value. - endpoint += f"#{sha256(api_key.encode('utf-8')).hexdigest()[:8]}" + # Hash the api_key fingerprint, not the raw value. Tolerate bytes, + # str, or other types — coerce safely before hashing. + if isinstance(api_key, bytes): + key_bytes = api_key + else: + key_bytes = str(api_key).encode("utf-8") + endpoint += f"#{sha256(key_bytes).hexdigest()[:8]}" return endpoint @@ -144,9 +149,24 @@ def _env(name: str, default: Optional[str] = None) -> str: return os.getenv(key, "") return os.getenv(key, default) + def _safe_port(raw: str, default: int) -> int: + try: + value = int(raw) + except (TypeError, ValueError): + logger.warning( + "Invalid QDRANT_PORT value %r — falling back to %d", raw, default + ) + return default + if not (1 <= value <= 65535): + logger.warning( + "QDRANT_PORT %d out of TCP range — falling back to %d", value, default + ) + return default + return value + return { "host": _env("QDRANT_HOST", "localhost"), - "port": int(_env("QDRANT_PORT", "6333")), + "port": _safe_port(_env("QDRANT_PORT", "6333"), 6333), "api_key": _env("QDRANT_API_KEY") or None, "https": _env("QDRANT_HTTPS", "").strip().lower() in {"1", "true", "yes", "on"}, "prefer_grpc": _env("QDRANT_PREFER_GRPC", "").strip().lower() diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py index 0787632d..2c4c1d35 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py @@ -33,9 +33,18 @@ def from_mongo(cls, source_doc: AgentCaseRecord) -> qmodels.PointStruct: Exception: on any conversion failure (logged + re-raised). """ if source_doc is None: - raise ValueError("MongoDB document cannot be empty") + raise ValueError("MongoDB document cannot be None") + if source_doc.id is None: + raise ValueError("AgentCaseRecord.id must not be None") try: + vector = source_doc.vector if source_doc.vector else None + if not vector: + raise ValueError( + f"Vector is required for AgentCaseRecord {source_doc.id} " + "but was not populated" + ) + task_intent = source_doc.task_intent or "" # Parity with Milvus converter: epoch seconds (not ms) for this collection. timestamp_s = ( @@ -52,8 +61,6 @@ def from_mongo(cls, source_doc: AgentCaseRecord) -> qmodels.PointStruct: "parent_id": source_doc.parent_id or "", } - vector = source_doc.vector if source_doc.vector else [] - return qmodels.PointStruct( id=str(source_doc.id), vector=vector, diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py index 50b8f2b5..200230cc 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py @@ -33,9 +33,18 @@ def from_mongo(cls, source_doc: AgentSkillRecord) -> qmodels.PointStruct: Exception: on any conversion failure (logged + re-raised). """ if source_doc is None: - raise ValueError("MongoDB document cannot be empty") + raise ValueError("MongoDB document cannot be None") + if source_doc.id is None: + raise ValueError("AgentSkillRecord.id must not be None") try: + vector = source_doc.vector if source_doc.vector else None + if not vector: + raise ValueError( + f"Vector is required for AgentSkillRecord {source_doc.id} " + "but was not populated" + ) + name = source_doc.name or "" description = source_doc.description or "" @@ -62,8 +71,6 @@ def from_mongo(cls, source_doc: AgentSkillRecord) -> qmodels.PointStruct: ), } - vector = source_doc.vector if source_doc.vector else [] - return qmodels.PointStruct( id=str(source_doc.id), vector=vector, diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py index 1351072d..9ac67529 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py @@ -36,7 +36,9 @@ def from_mongo(cls, source_doc: MongoAtomicFactRecord) -> qmodels.PointStruct: Exception: on any conversion failure (logged + re-raised). """ if source_doc is None: - raise ValueError("MongoDB document cannot be empty") + raise ValueError("MongoDB document cannot be None") + if source_doc.id is None: + raise ValueError("AtomicFactRecord.id must not be None") try: timestamp_ms = ( @@ -45,24 +47,37 @@ def from_mongo(cls, source_doc: MongoAtomicFactRecord) -> qmodels.PointStruct: else 0 ) + # ``getattr(... , None)`` then explicit ``is None`` check so a + # legitimately falsy value (e.g. empty string from a future + # type enum entry) is preserved. + raw_type = getattr(source_doc, "type", None) + event_type = ( + raw_type if raw_type is not None else RawDataType.CONVERSATION.value + ) + payload = { "user_id": source_doc.user_id or "", "group_id": source_doc.group_id or "", "session_id": source_doc.session_id or "", "participants": source_doc.participants or [], "sender_ids": getattr(source_doc, "sender_ids", []) or [], - "type": ( - getattr(source_doc, "type", None) - or RawDataType.CONVERSATION.value - ), + "type": event_type, "timestamp": timestamp_ms, "parent_type": source_doc.parent_type or "", "parent_id": ( str(source_doc.parent_id) if source_doc.parent_id else "" ), + # Persist the canonical text so search results can return the + # underlying atomic_fact without a Mongo round-trip. + "search_content": cls._build_search_content(source_doc), } - vector = source_doc.vector if source_doc.vector else [] + vector = getattr(source_doc, "vector", None) or None + if not vector: + raise ValueError( + f"Vector is required for AtomicFactRecord {source_doc.id} " + "but was not populated" + ) return qmodels.PointStruct( id=str(source_doc.id), diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py index cdc13996..4c4f1813 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py @@ -53,7 +53,9 @@ def from_mongo(cls, source_doc: MongoEpisodicMemory) -> qmodels.PointStruct: Exception: on any conversion failure (logged + re-raised). """ if source_doc is None: - raise ValueError("MongoDB document cannot be empty") + raise ValueError("MongoDB document cannot be None") + if source_doc.id is None: + raise ValueError("EpisodicMemory.id must not be None") try: # Timestamp -> epoch milliseconds (integer, parity with Milvus). @@ -84,8 +86,13 @@ def from_mongo(cls, source_doc: MongoEpisodicMemory) -> qmodels.PointStruct: vector = ( source_doc.vector if hasattr(source_doc, "vector") and source_doc.vector - else [] + else None ) + if not vector: + raise ValueError( + f"Vector is required for EpisodicMemory {source_doc.id} " + "but was not populated" + ) return qmodels.PointStruct( id=str(source_doc.id), diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py index 44d37685..10196b08 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py @@ -47,7 +47,9 @@ def _parse_time_field( Milvus template, which always multiplies numeric inputs by 1000 and thus would corrupt already-ms inputs. """ - if not time_value: + # Explicit ``is None`` so a legitimate epoch 0 / datetime(1970-01-01) + # is not silently dropped as "missing". + if time_value is None: return 0 try: @@ -79,7 +81,9 @@ def from_mongo(cls, source_doc: MongoForesightRecord) -> qmodels.PointStruct: Exception: on any conversion failure (logged + re-raised). """ if source_doc is None: - raise ValueError("MongoDB document cannot be empty") + raise ValueError("MongoDB document cannot be None") + if source_doc.id is None: + raise ValueError("ForesightRecord.id must not be None") try: start_time = cls._parse_time_field( @@ -116,7 +120,12 @@ def from_mongo(cls, source_doc: MongoForesightRecord) -> qmodels.PointStruct: ), } - vector = source_doc.vector if source_doc.vector else [] + vector = source_doc.vector if source_doc.vector else None + if not vector: + raise ValueError( + f"Vector is required for ForesightRecord {source_doc.id} " + "but was not populated" + ) return qmodels.PointStruct( id=str(source_doc.id), diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/agent_case_collection.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/agent_case_collection.py index ec39c124..56d12f4c 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/agent_case_collection.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/agent_case_collection.py @@ -9,7 +9,7 @@ PointStruct( id=, - vector=, + vector=, payload={ "user_id": str | "", "group_id": str | "", diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/agent_skill_collection.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/agent_skill_collection.py index 7c59aed9..780526e2 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/agent_skill_collection.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/agent_skill_collection.py @@ -8,7 +8,7 @@ PointStruct( id=, - vector=, + vector=, payload={ "user_id": str | "", # agent owner "group_id": str | "", diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/atomic_fact_collection.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/atomic_fact_collection.py index bbcc1d65..451fdcde 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/atomic_fact_collection.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/atomic_fact_collection.py @@ -8,7 +8,7 @@ PointStruct( id=, - vector=, + vector=, payload={ "user_id": str | "", "group_id": str | "", diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/episodic_memory_collection.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/episodic_memory_collection.py index 1228869f..370a4840 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/episodic_memory_collection.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/episodic_memory_collection.py @@ -10,7 +10,7 @@ PointStruct( id=, - vector=, + vector=, payload={ "user_id": str | "", "group_id": str | "", diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/foresight_collection.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/foresight_collection.py index ee4d3736..cd2fa3fe 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/foresight_collection.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/foresight_collection.py @@ -8,7 +8,7 @@ PointStruct( id=, - vector=, + vector=, payload={ "user_id": str | "", "group_id": str | "", diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/user_profile_collection.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/user_profile_collection.py index aa60533e..0e080bb0 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/user_profile_collection.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/memory/user_profile_collection.py @@ -10,7 +10,7 @@ PointStruct( id=, - vector=, + vector=, payload={ "user_id": str, # required "group_id": str | "", diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_skill_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_skill_qdrant_repository.py index 8cf30cb1..e73ef349 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_skill_qdrant_repository.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_skill_qdrant_repository.py @@ -93,11 +93,13 @@ async def vector_search( ) ) - if user_id != MAGIC_ALL: + if user_id and user_id != MAGIC_ALL: + # ``None``/empty user_id means "do not filter" (search across + # the whole tenant), not "match the empty-string user_id". conditions.append( qmodels.FieldCondition( key="user_id", - match=qmodels.MatchValue(value=user_id or ""), + match=qmodels.MatchValue(value=user_id), ) ) @@ -198,20 +200,19 @@ async def delete_by_cluster_id(self, cluster_id: str) -> int: client = self.collection.client() name = self.collection.name - # Best-effort count via scroll (Qdrant ``delete(filter=)`` doesn't - # return the deleted-point count). Same idiom as the Milvus - # repository which queries first, then deletes. - scrolled, _ = await asyncio.to_thread( + # Use Qdrant's ``count`` for an exact total instead of a single + # scroll page (which could undercount when the cluster has more + # than the page limit). After counting we issue a single + # filter-based delete that covers all matches. + count_result = await asyncio.to_thread( partial( - client.scroll, + client.count, collection_name=name, - scroll_filter=filter_, - limit=10_000, - with_payload=False, - with_vectors=False, + count_filter=filter_, + exact=True, ) ) - count = len(scrolled) + count = count_result.count if count > 0: await asyncio.to_thread( @@ -231,4 +232,7 @@ async def delete_by_cluster_id(self, cluster_id: str) -> int: logger.error( "Failed to delete Qdrant points for cluster=%s: %s", cluster_id, e ) - return 0 + # Re-raise so callers can distinguish a genuine zero from an + # operational failure (consistent with upsert/search/delete_batch + # in the base repository). + raise diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py index d08159e6..3f550ddc 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py @@ -177,16 +177,18 @@ async def vector_search( query_filter = qmodels.Filter(must=conditions) if conditions else None ef_value = max(128, limit * 2) # Two-stage score gating (parity with Milvus repository): - # - ``effective_threshold`` is the wider net passed to Qdrant - # server-side via ``score_threshold`` (``radius`` overrides if - # explicitly set above -1.0; otherwise ``score_threshold``). - # - The client-side ``point.score < score_threshold`` post-filter - # enforces the hard caller-facing minimum, allowing callers - # to widen the recall via ``radius`` while keeping a stricter - # cut-off in the returned list. - effective_threshold = ( - radius if (radius is not None and radius > -1.0) else score_threshold - ) + # - Server-side: pass the *more permissive* (lower) of + # ``radius`` and ``score_threshold`` so Qdrant returns the + # wider net. + # - Client-side: the ``point.score < score_threshold`` post- + # filter enforces the hard caller-facing minimum. + # This way callers can use ``radius`` to widen recall without + # accidentally making the server-side cut stricter than the + # caller's own cut-off. + if radius is not None and radius > -1.0: + effective_threshold = min(radius, score_threshold) + else: + effective_threshold = score_threshold scored_points = await self.search( query_vector=query_vector, @@ -283,18 +285,19 @@ async def delete_by_filters( client = self.collection.client() name = self.collection.name - # Count first (Qdrant ``delete(filter=)`` doesn't return a count). - scrolled, _ = await asyncio.to_thread( + # Use Qdrant's exact ``count`` instead of a bounded scroll page, + # so the returned count reflects the *full* number of points + # the filter matches (a 10k scroll cap would undercount large + # tenants and produce a misleading return value). + count_result = await asyncio.to_thread( partial( - client.scroll, + client.count, collection_name=name, - scroll_filter=filter_, - limit=10_000, - with_payload=False, - with_vectors=False, + count_filter=filter_, + exact=True, ) ) - delete_count = len(scrolled) + delete_count = count_result.count if delete_count > 0: await asyncio.to_thread( From b9516ba2bb7d6fa58bf22161f83d62710eae1051 Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 18:44:08 +0000 Subject: [PATCH 12/20] =?UTF-8?q?feat(qdrant):=20Phase=202.5=20batch=202?= =?UTF-8?q?=20=E2=80=94=20AgentCase=20+=20AtomicFact=20repositories?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds repositories 3 and 4 of 6. Both apply all CodeRabbit-derived patterns established in the previous Phase 2.5 fix-pass: - user_id/MAGIC_ALL guard skips the filter when None/empty (no spurious user_id == '' match). - Two-stage score gating: server-side passes min(radius, score_threshold) so a wider radius cannot accidentally tighten the cut; client-side post- filter enforces the hard caller minimum. - client.count(exact=True) for delete-by-filter return values (not a bounded scroll page). - All error paths re-raise after a structured log. AgentCaseQdrantRepository: - vector_search() with scope (user_id / session_id / group_ids / parent_id) and time-range filters in epoch seconds (parity with the AgentCase converter and the Milvus repository — agent_case is the one collection storing seconds, not milliseconds). - Returns a datetime for timestamp (round-tripped from epoch seconds with tz=UTC). AtomicFactQdrantRepository: - create_and_save_atomic_fact() convenience constructor: builds the PointStruct (with empty vector validation), upserts, returns the Milvus-shaped summary dict for caller parity. - vector_search() with full scope + time-range filters in epoch ms. - batch_vector_search_by_parent_ids(): MRAG-Phase-3 expansion path — MatchAny over parent_ids with total_limit = limit * len(parent_ids). Returns early with an empty list when no parent_ids are passed. - delete_by_filters(): uses exact count + filter-based delete; raises on any operational error so callers can distinguish 'no points' from failure. - All search paths return datetime for timestamp (consistent with create_and_save_atomic_fact, parity with agent_case). --- .../agent_case_qdrant_repository.py | 152 +++++++ .../atomic_fact_qdrant_repository.py | 406 ++++++++++++++++++ 2 files changed, 558 insertions(+) create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_case_qdrant_repository.py create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/repository/atomic_fact_qdrant_repository.py diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_case_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_case_qdrant_repository.py new file mode 100644 index 00000000..754d0622 --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_case_qdrant_repository.py @@ -0,0 +1,152 @@ +""" +AgentCase Qdrant Repository. + +Provides vector search for agent task-solving experiences via Qdrant. +Mirrors the Milvus counterpart's surface for caller parity. + +Timestamp filter is in **epoch seconds** (parity with the Milvus repository +and the AgentCase converter — both store seconds, not milliseconds, for this +collection). +""" + +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional + +from qdrant_client.http import models as qmodels + +from core.di.decorators import repository +from core.observation.logger import get_logger +from core.oxm.constants import MAGIC_ALL +from core.oxm.qdrant.base_repository import BaseQdrantRepository +from infra_layer.adapters.out.search.qdrant.memory.agent_case_collection import ( + AgentCaseCollection, +) + +logger = get_logger(__name__) + + +@repository("agent_case_qdrant_repository", primary=False) +class AgentCaseQdrantRepository(BaseQdrantRepository[AgentCaseCollection]): + """V1 AgentCase Qdrant Repository.""" + + def __init__(self) -> None: + super().__init__(AgentCaseCollection) + + async def vector_search( + self, + query_vector: List[float], + user_id: Optional[str] = None, + session_id: Optional[str] = None, + group_ids: Optional[List[str]] = None, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, + parent_id: Optional[str] = None, + limit: int = 10, + score_threshold: float = 0.0, + radius: Optional[float] = None, + ) -> List[Dict[str, Any]]: + """Vector similarity search with scope + time-range filters.""" + try: + conditions: List[qmodels.FieldCondition] = [] + + if user_id and user_id != MAGIC_ALL: + # ``None``/empty user_id means "do not filter". + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id), + ) + ) + + if session_id: + conditions.append( + qmodels.FieldCondition( + key="session_id", + match=qmodels.MatchValue(value=session_id), + ) + ) + + if group_ids: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchAny(any=list(group_ids)), + ) + ) + + if parent_id: + conditions.append( + qmodels.FieldCondition( + key="parent_id", + match=qmodels.MatchValue(value=parent_id), + ) + ) + + # AgentCase timestamps are epoch SECONDS (Milvus parity). + time_range: Dict[str, int] = {} + if start_time: + time_range["gte"] = int(start_time.timestamp()) + if end_time: + time_range["lte"] = int(end_time.timestamp()) + if time_range: + conditions.append( + qmodels.FieldCondition( + key="timestamp", + range=qmodels.Range(**time_range), + ) + ) + + query_filter = qmodels.Filter(must=conditions) if conditions else None + ef_value = max(128, limit * 2) + # Two-stage gating: use the more permissive (lower) of ``radius`` + # and ``score_threshold`` server-side, enforce the hard caller cut + # client-side. See episodic_memory repo for the full rationale. + if radius is not None and radius > -1.0: + effective_threshold = min(radius, score_threshold) + else: + effective_threshold = score_threshold + + scored_points = await self.search( + query_vector=query_vector, + limit=limit, + query_filter=query_filter, + with_payload=True, + with_vectors=False, + score_threshold=( + effective_threshold if effective_threshold > 0 else None + ), + search_params=qmodels.SearchParams(hnsw_ef=ef_value), + ) + + search_results: List[Dict[str, Any]] = [] + for point in scored_points: + if point.score < score_threshold: + continue + payload = point.payload or {} + ts_seconds = payload.get("timestamp", 0) or 0 + search_results.append( + { + "id": str(point.id), + "score": float(point.score), + "user_id": payload.get("user_id"), + "group_id": payload.get("group_id"), + "session_id": payload.get("session_id", ""), + # Convert epoch seconds back to UTC datetime for caller + # parity with the Milvus repository. + "timestamp": datetime.fromtimestamp( + ts_seconds, tz=timezone.utc + ), + "task_intent": payload.get("task_intent", ""), + "parent_type": payload.get("parent_type", ""), + "parent_id": payload.get("parent_id", ""), + } + ) + + logger.debug( + "AgentCase Qdrant search: found %d results", len(search_results) + ) + return search_results + + except Exception as e: + logger.error("AgentCase Qdrant search failed: %s", e) + raise diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/atomic_fact_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/atomic_fact_qdrant_repository.py new file mode 100644 index 00000000..9a644780 --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/atomic_fact_qdrant_repository.py @@ -0,0 +1,406 @@ +""" +Atomic Fact Qdrant Repository. + +Provides vector search + batch lookups for atomic-fact records via Qdrant. +Mirrors the Milvus counterpart's surface for caller parity: + +- ``create_and_save_atomic_fact``: convenience constructor + upsert +- ``vector_search``: scope + time-range filtered search +- ``batch_vector_search_by_parent_ids``: MRAG-Phase-3 expansion from + episodes to atomic facts +- ``delete_by_filters``: batch delete by user/group/time-range + +Timestamp filter uses **epoch milliseconds** (parity with the Milvus +repository and the AtomicFact converter). +""" + +import asyncio +import json +from datetime import datetime, timezone +from functools import partial +from typing import Any, Dict, List, Optional + +from qdrant_client.http import models as qmodels + +from core.di.decorators import repository +from core.observation.logger import get_logger +from core.oxm.constants import MAGIC_ALL +from core.oxm.qdrant.base_repository import BaseQdrantRepository +from infra_layer.adapters.out.search.qdrant.memory.atomic_fact_collection import ( + AtomicFactCollection, +) + +logger = get_logger(__name__) + + +@repository("atomic_fact_qdrant_repository", primary=False) +class AtomicFactQdrantRepository(BaseQdrantRepository[AtomicFactCollection]): + """V1 Atomic Fact Qdrant Repository.""" + + def __init__(self) -> None: + super().__init__(AtomicFactCollection) + + # ===================================== Document creation / management + + async def create_and_save_atomic_fact( + self, + id: str, + user_id: Optional[str], + atomic_fact: str, + parent_id: str, + parent_type: str, + timestamp: datetime, + vector: List[float], + group_id: Optional[str] = None, + participants: Optional[List[str]] = None, + sender_ids: Optional[List[str]] = None, + event_type: Optional[str] = None, + search_content: Optional[List[str]] = None, + ) -> Dict[str, Any]: + """ + Build a ``PointStruct`` for an atomic fact and upsert it. + + Returns: + Summary dict (id / user_id / atomic_fact / parent_* / timestamp / + search_content) — same shape as the Milvus repository. + """ + if not vector: + raise ValueError( + f"Vector is required for AtomicFact {id} but was not populated" + ) + + try: + if search_content is None: + search_content = [atomic_fact] + + payload = { + "user_id": user_id or "", + "group_id": group_id or "", + "session_id": "", # not provided by this entry point + "participants": participants or [], + "sender_ids": sender_ids or [], + "type": event_type, + "timestamp": int(timestamp.timestamp() * 1000), + "atomic_fact": atomic_fact, + "search_content": json.dumps(search_content, ensure_ascii=False), + "parent_type": parent_type, + "parent_id": parent_id, + } + + await self.upsert( + qmodels.PointStruct(id=id, vector=vector, payload=payload) + ) + + logger.debug( + "Atomic fact point upserted: id=%s, user_id=%s", id, user_id + ) + + return { + "id": id, + "user_id": user_id, + "atomic_fact": atomic_fact, + "parent_type": parent_type, + "parent_id": parent_id, + "timestamp": timestamp, + "search_content": search_content, + } + + except Exception as e: + logger.error( + "Failed to create atomic fact point: id=%s, error=%s", id, e + ) + raise + + # ============================================================ search + + async def vector_search( + self, + query_vector: List[float], + user_id: Optional[str] = None, + group_ids: Optional[List[str]] = None, + session_id: Optional[str] = None, + parent_type: Optional[str] = None, + parent_id: Optional[str] = None, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, + limit: int = 10, + score_threshold: float = 0.0, + radius: Optional[float] = None, + ) -> List[Dict[str, Any]]: + """Vector similarity search with full scope + time-range filters.""" + try: + conditions: List[qmodels.FieldCondition] = [] + + if user_id and user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id), + ) + ) + + if group_ids: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchAny(any=list(group_ids)), + ) + ) + + if session_id: + conditions.append( + qmodels.FieldCondition( + key="session_id", + match=qmodels.MatchValue(value=session_id), + ) + ) + + if parent_type: + conditions.append( + qmodels.FieldCondition( + key="parent_type", + match=qmodels.MatchValue(value=parent_type), + ) + ) + + if parent_id: + conditions.append( + qmodels.FieldCondition( + key="parent_id", + match=qmodels.MatchValue(value=parent_id), + ) + ) + + time_range: Dict[str, int] = {} + if start_time: + time_range["gte"] = int(start_time.timestamp() * 1000) + if end_time: + time_range["lte"] = int(end_time.timestamp() * 1000) + if time_range: + conditions.append( + qmodels.FieldCondition( + key="timestamp", + range=qmodels.Range(**time_range), + ) + ) + + query_filter = qmodels.Filter(must=conditions) if conditions else None + ef_value = max(128, limit * 2) + if radius is not None and radius > -1.0: + effective_threshold = min(radius, score_threshold) + else: + effective_threshold = score_threshold + + scored_points = await self.search( + query_vector=query_vector, + limit=limit, + query_filter=query_filter, + with_payload=True, + with_vectors=False, + score_threshold=( + effective_threshold if effective_threshold > 0 else None + ), + search_params=qmodels.SearchParams(hnsw_ef=ef_value), + ) + + search_results: List[Dict[str, Any]] = [] + for point in scored_points: + if point.score < score_threshold: + continue + payload = point.payload or {} + ts_ms = payload.get("timestamp", 0) or 0 + search_results.append( + { + "id": str(point.id), + "score": float(point.score), + "user_id": payload.get("user_id"), + "group_id": payload.get("group_id"), + "session_id": payload.get("session_id"), + "participants": payload.get("participants"), + # Convert epoch milliseconds back to UTC datetime so + # callers get a consistent type across all repository + # entry points (parity with create_and_save_atomic_fact + # and with the agent_case repository's seconds-path). + "timestamp": datetime.fromtimestamp( + ts_ms / 1000, tz=timezone.utc + ), + "parent_type": payload.get("parent_type"), + "parent_id": payload.get("parent_id"), + } + ) + + logger.debug( + "AtomicFact Qdrant search: found %d results", len(search_results) + ) + return search_results + + except Exception as e: + logger.error("AtomicFact Qdrant search failed: %s", e) + raise + + async def batch_vector_search_by_parent_ids( + self, + query_vector: List[float], + parent_ids: List[str], + user_id: Optional[str] = None, + group_ids: Optional[List[str]] = None, + limit: int = 5, + score_threshold: float = 0.0, + ) -> List[Dict[str, Any]]: + """ + Vector search restricted to a list of ``parent_id`` values. + + Used by MRAG Phase 3 to expand episodes into their atomic facts. + Total effective limit is ``limit * len(parent_ids)``. + """ + if not parent_ids: + return [] + + try: + conditions: List[qmodels.FieldCondition] = [ + qmodels.FieldCondition( + key="parent_id", + match=qmodels.MatchAny(any=list(parent_ids)), + ) + ] + + if user_id and user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id), + ) + ) + + if group_ids: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchAny(any=list(group_ids)), + ) + ) + + total_limit = limit * len(parent_ids) + ef_value = max(128, total_limit * 2) + + scored_points = await self.search( + query_vector=query_vector, + limit=total_limit, + query_filter=qmodels.Filter(must=conditions), + with_payload=True, + with_vectors=False, + score_threshold=score_threshold if score_threshold > 0 else None, + search_params=qmodels.SearchParams(hnsw_ef=ef_value), + ) + + search_results: List[Dict[str, Any]] = [] + for point in scored_points: + if point.score < score_threshold: + continue + payload = point.payload or {} + ts_ms = payload.get("timestamp", 0) or 0 + search_results.append( + { + "id": str(point.id), + "score": float(point.score), + "user_id": payload.get("user_id"), + "group_id": payload.get("group_id"), + "parent_type": payload.get("parent_type"), + "parent_id": payload.get("parent_id"), + "atomic_fact": payload.get("atomic_fact"), + "timestamp": datetime.fromtimestamp( + ts_ms / 1000, tz=timezone.utc + ), + "participants": payload.get("participants"), + } + ) + + logger.debug( + "AtomicFact batch search by parent_ids: parent_ids=%d, results=%d", + len(parent_ids), + len(search_results), + ) + return search_results + + except Exception as e: + logger.error("AtomicFact batch search by parent_ids failed: %s", e) + raise + + # ========================================================== deletion + + async def delete_by_filters( + self, + user_id: Optional[str] = MAGIC_ALL, + group_id: Optional[str] = MAGIC_ALL, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, + ) -> int: + """Batch delete by filter combination; at least one filter required.""" + try: + conditions: List[qmodels.FieldCondition] = [] + + if user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id or ""), + ) + ) + if group_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchValue(value=group_id or ""), + ) + ) + + time_range: Dict[str, int] = {} + if start_time: + time_range["gte"] = int(start_time.timestamp() * 1000) + if end_time: + time_range["lte"] = int(end_time.timestamp() * 1000) + if time_range: + conditions.append( + qmodels.FieldCondition( + key="timestamp", + range=qmodels.Range(**time_range), + ) + ) + + if not conditions: + raise ValueError("At least one filter condition must be provided") + + filter_ = qmodels.Filter(must=conditions) + client = self.collection.client() + name = self.collection.name + + # Exact count for the deleted-points return value (avoids + # the bounded scroll-page undercount). + count_result = await asyncio.to_thread( + partial( + client.count, + collection_name=name, + count_filter=filter_, + exact=True, + ) + ) + delete_count = count_result.count + + if delete_count > 0: + await asyncio.to_thread( + partial( + client.delete, + collection_name=name, + points_selector=qmodels.FilterSelector(filter=filter_), + wait=True, + ) + ) + + logger.debug( + "Batch deleted atomic facts: deleted %d points", delete_count + ) + return delete_count + + except Exception as e: + logger.error("Failed to batch delete atomic facts: %s", e) + raise From d4c27f9ec4eb934e0490e17ad1e227cf72686d7c Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 18:49:18 +0000 Subject: [PATCH 13/20] =?UTF-8?q?feat(qdrant):=20Phase=202.5=20batch=203?= =?UTF-8?q?=20=E2=80=94=20Foresight=20+=20UserProfile=20repositories=20(Ph?= =?UTF-8?q?ase=202.5=20complete)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final 2 of 6 Qdrant repositories. Phase 2.5 is now structurally complete: all six adapters (agent_skill, episodic_memory, agent_case, atomic_fact, foresight, user_profile) exist as Qdrant repositories matching their Milvus counterparts. All four established Phase-2.5 patterns applied: - user_id/MAGIC_ALL guard skips the filter on None/empty - Two-stage score gating: min(radius, score_threshold) server-side - client.count(exact=True) for delete-by-filter return values - Re-raise after structured log (no swallowed errors) ForesightQdrantRepository: - create_and_save_foresight_mem(): convenience constructor + upsert. session_id is now an explicit parameter (writes into payload) so the matching vector_search(session_id=...) filter actually hits — the Milvus repository signature lacked this parameter and produced a silent zero-hit filter. - vector_search() with scope (user_id/group_ids/session_id/sender_id/ parent_type/parent_id) + time-range filters. **Diverges from Milvus**: filters on start_time/end_time payload fields (semantically correct range overlap) instead of the Milvus repository's non-existent 'timestamp' field. Documented inline. sender_id filter uses Qdrant's element-wise MatchValue on the sender_ids array — equivalent to Milvus' array_contains. - delete_by_filters() with the same start_time/end_time semantics. UserProfileQdrantRepository: - vector_search() with user_id/group_id/scenario scoping (no session_id — user_profile is user-level aggregation). - delete_by_user_group(): count + filter-based delete, raises on operational error (consistent with the Phase 2.5 fix-pass on base_repository). --- .../repository/foresight_qdrant_repository.py | 355 ++++++++++++++++++ .../user_profile_qdrant_repository.py | 184 +++++++++ 2 files changed, 539 insertions(+) create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/repository/foresight_qdrant_repository.py create mode 100644 methods/evermemos/src/infra_layer/adapters/out/search/repository/user_profile_qdrant_repository.py diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/foresight_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/foresight_qdrant_repository.py new file mode 100644 index 00000000..732ad071 --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/foresight_qdrant_repository.py @@ -0,0 +1,355 @@ +""" +Foresight Qdrant Repository. + +V1 repository for vector semantic retrieval over foresight records. +Mirrors the Milvus counterpart's surface (``create_and_save_foresight_mem``, +``vector_search``, ``delete_by_filters``) for caller parity. + +**Note on time filters:** the Foresight schema stores ``start_time`` and +``end_time`` (both epoch milliseconds). The Milvus repository erroneously +filters on a non-existent ``timestamp`` field; the Qdrant repository +filters on ``start_time``/``end_time`` semantically — ``start_time`` arg +maps to ``start_time >= ...`` and ``end_time`` arg to ``end_time <= ...``, +which is the natural range-overlap semantic for a time-spanning record. +""" + +import asyncio +import json +from datetime import datetime +from functools import partial +from typing import Any, Dict, List, Optional + +from qdrant_client.http import models as qmodels + +from core.di.decorators import repository +from core.observation.logger import get_logger +from core.oxm.constants import MAGIC_ALL +from core.oxm.qdrant.base_repository import BaseQdrantRepository +from infra_layer.adapters.out.search.qdrant.memory.foresight_collection import ( + ForesightCollection, +) + +logger = get_logger(__name__) + + +@repository("foresight_qdrant_repository", primary=False) +class ForesightQdrantRepository(BaseQdrantRepository[ForesightCollection]): + """V1 Foresight Qdrant Repository.""" + + def __init__(self) -> None: + super().__init__(ForesightCollection) + + # ===================================== Document creation / management + + async def create_and_save_foresight_mem( + self, + id: str, + user_id: Optional[str], + content: str, + parent_id: str, + parent_type: str, + vector: List[float], + group_id: Optional[str] = None, + session_id: Optional[str] = None, + event_type: Optional[str] = None, + participants: Optional[List[str]] = None, + sender_ids: Optional[List[str]] = None, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, + duration_days: Optional[int] = None, + evidence: Optional[str] = None, + search_content: Optional[List[str]] = None, + ) -> Dict[str, Any]: + """ + Build a ``PointStruct`` for a foresight record and upsert it. + + ``session_id`` is written into the payload so the matching + ``vector_search(session_id=...)`` filter can hit. Parity with the + Foresight schema's ``session_id`` payload index. + + Returns a caller-compatible summary dict (same shape as the Milvus + repository for cutover). + """ + if not vector: + raise ValueError( + f"Vector is required for Foresight {id} but was not populated" + ) + + try: + if search_content is None: + search_content = [content] + if evidence: + search_content.append(evidence) + + payload = { + "user_id": user_id or "", + "group_id": group_id or "", + "session_id": session_id or "", + "participants": participants or [], + "sender_ids": sender_ids or [], + "type": event_type, + "start_time": ( + int(start_time.timestamp() * 1000) if start_time else 0 + ), + "end_time": int(end_time.timestamp() * 1000) if end_time else 0, + "duration_days": duration_days or 0, + "content": content, + "evidence": evidence or "", + "search_content": json.dumps(search_content, ensure_ascii=False), + "parent_type": parent_type, + "parent_id": parent_id, + } + + await self.upsert( + qmodels.PointStruct(id=id, vector=vector, payload=payload) + ) + + logger.debug( + "Foresight point upserted: id=%s, user_id=%s", id, user_id + ) + + return { + "id": id, + "user_id": user_id, + "content": content, + "parent_type": parent_type, + "parent_id": parent_id, + "search_content": search_content, + } + + except Exception as e: + logger.error( + "Failed to create foresight point: id=%s, error=%s", id, e + ) + raise + + # ============================================================ search + + async def vector_search( + self, + query_vector: List[float], + user_id: Optional[str] = None, + group_ids: Optional[List[str]] = None, + sender_id: Optional[str] = None, + session_id: Optional[str] = None, + parent_type: Optional[str] = None, + parent_id: Optional[str] = None, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, + limit: int = 10, + score_threshold: float = 0.0, + radius: Optional[float] = None, + ) -> List[Dict[str, Any]]: + """ + Vector similarity search with scope, sender, and time-range filters. + + Time filters semantic: + - ``start_time`` arg -> ``payload.start_time >= start_time_ms`` (only + foresights whose window begins at or after the given instant). + - ``end_time`` arg -> ``payload.end_time <= end_time_ms`` (only + foresights whose window ends at or before the given instant). + + ``sender_id`` filters via Qdrant's array-containment semantics on the + ``sender_ids`` payload field. + """ + try: + conditions: List[qmodels.FieldCondition] = [] + + if user_id and user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id), + ) + ) + + if group_ids: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchAny(any=list(group_ids)), + ) + ) + + if sender_id: + # Qdrant matches arrays element-wise on MatchValue, so this + # is the equivalent of Milvus' ``array_contains(sender_ids, x)``. + conditions.append( + qmodels.FieldCondition( + key="sender_ids", + match=qmodels.MatchValue(value=sender_id), + ) + ) + + if session_id: + conditions.append( + qmodels.FieldCondition( + key="session_id", + match=qmodels.MatchValue(value=session_id), + ) + ) + + if parent_type: + conditions.append( + qmodels.FieldCondition( + key="parent_type", + match=qmodels.MatchValue(value=parent_type), + ) + ) + + if parent_id: + conditions.append( + qmodels.FieldCondition( + key="parent_id", + match=qmodels.MatchValue(value=parent_id), + ) + ) + + if start_time: + conditions.append( + qmodels.FieldCondition( + key="start_time", + range=qmodels.Range(gte=int(start_time.timestamp() * 1000)), + ) + ) + if end_time: + conditions.append( + qmodels.FieldCondition( + key="end_time", + range=qmodels.Range(lte=int(end_time.timestamp() * 1000)), + ) + ) + + query_filter = qmodels.Filter(must=conditions) if conditions else None + ef_value = max(128, limit * 2) + if radius is not None and radius > -1.0: + effective_threshold = min(radius, score_threshold) + else: + effective_threshold = score_threshold + + scored_points = await self.search( + query_vector=query_vector, + limit=limit, + query_filter=query_filter, + with_payload=True, + with_vectors=False, + score_threshold=( + effective_threshold if effective_threshold > 0 else None + ), + search_params=qmodels.SearchParams(hnsw_ef=ef_value), + ) + + search_results: List[Dict[str, Any]] = [] + for point in scored_points: + if point.score < score_threshold: + continue + payload = point.payload or {} + search_results.append( + { + "id": str(point.id), + "score": float(point.score), + "user_id": payload.get("user_id"), + "group_id": payload.get("group_id"), + "sender_ids": payload.get("sender_ids"), + "session_id": payload.get("session_id"), + "participants": payload.get("participants"), + "start_time": payload.get("start_time"), + "end_time": payload.get("end_time"), + "parent_type": payload.get("parent_type"), + "parent_id": payload.get("parent_id"), + } + ) + + logger.debug( + "Foresight Qdrant search: found %d results", len(search_results) + ) + return search_results + + except Exception as e: + logger.error("Foresight Qdrant search failed: %s", e) + raise + + # ========================================================== deletion + + async def delete_by_filters( + self, + user_id: Optional[str] = MAGIC_ALL, + group_id: Optional[str] = MAGIC_ALL, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, + ) -> int: + """ + Batch delete by filter combination; at least one filter required. + + Time-range semantic matches ``vector_search``: ``start_time`` arg + maps to ``payload.start_time >= ...``, ``end_time`` arg maps to + ``payload.end_time <= ...``. + """ + try: + conditions: List[qmodels.FieldCondition] = [] + + if user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id or ""), + ) + ) + if group_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchValue(value=group_id or ""), + ) + ) + + if start_time: + conditions.append( + qmodels.FieldCondition( + key="start_time", + range=qmodels.Range(gte=int(start_time.timestamp() * 1000)), + ) + ) + if end_time: + conditions.append( + qmodels.FieldCondition( + key="end_time", + range=qmodels.Range(lte=int(end_time.timestamp() * 1000)), + ) + ) + + if not conditions: + raise ValueError("At least one filter condition must be provided") + + filter_ = qmodels.Filter(must=conditions) + client = self.collection.client() + name = self.collection.name + + count_result = await asyncio.to_thread( + partial( + client.count, + collection_name=name, + count_filter=filter_, + exact=True, + ) + ) + delete_count = count_result.count + + if delete_count > 0: + await asyncio.to_thread( + partial( + client.delete, + collection_name=name, + points_selector=qmodels.FilterSelector(filter=filter_), + wait=True, + ) + ) + + logger.debug( + "Batch deleted foresights: deleted %d points", delete_count + ) + return delete_count + + except Exception as e: + logger.error("Failed to batch delete foresights: %s", e) + raise diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/user_profile_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/user_profile_qdrant_repository.py new file mode 100644 index 00000000..620d13a5 --- /dev/null +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/user_profile_qdrant_repository.py @@ -0,0 +1,184 @@ +""" +User Profile Qdrant Repository. + +V1 repository for vector semantic retrieval over user-profile items. +Mirrors the Milvus counterpart's surface for caller parity: +- ``vector_search``: scope (user_id/group_id) + scenario filter +- ``delete_by_user_group``: drop all profile items for a (user_id, group_id) pair + +Note: UserProfile has no ``session_id`` (user-level aggregation). +""" + +import asyncio +from functools import partial +from typing import Any, Dict, List, Optional + +from qdrant_client.http import models as qmodels + +from core.di.decorators import repository +from core.observation.logger import get_logger +from core.oxm.constants import MAGIC_ALL +from core.oxm.qdrant.base_repository import BaseQdrantRepository +from infra_layer.adapters.out.search.qdrant.memory.user_profile_collection import ( + UserProfileCollection, +) + +logger = get_logger(__name__) + + +@repository("user_profile_qdrant_repository", primary=False) +class UserProfileQdrantRepository(BaseQdrantRepository[UserProfileCollection]): + """V1 User Profile Qdrant Repository.""" + + def __init__(self) -> None: + super().__init__(UserProfileCollection) + + # ============================================================ search + + async def vector_search( + self, + query_vector: List[float], + user_id: Optional[str] = None, + group_id: Optional[str] = None, + scenario: Optional[str] = None, + limit: int = 10, + score_threshold: float = 0.0, + radius: Optional[float] = None, + ) -> List[Dict[str, Any]]: + """Vector similarity search with scope + scenario filters.""" + try: + conditions: List[qmodels.FieldCondition] = [] + + if user_id and user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id), + ) + ) + + if group_id: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchValue(value=group_id), + ) + ) + + if scenario: + conditions.append( + qmodels.FieldCondition( + key="scenario", + match=qmodels.MatchValue(value=scenario), + ) + ) + + query_filter = qmodels.Filter(must=conditions) if conditions else None + ef_value = max(128, limit * 2) + if radius is not None and radius > -1.0: + effective_threshold = min(radius, score_threshold) + else: + effective_threshold = score_threshold + + scored_points = await self.search( + query_vector=query_vector, + limit=limit, + query_filter=query_filter, + with_payload=True, + with_vectors=False, + score_threshold=( + effective_threshold if effective_threshold > 0 else None + ), + search_params=qmodels.SearchParams(hnsw_ef=ef_value), + ) + + search_results: List[Dict[str, Any]] = [] + for point in scored_points: + if point.score < score_threshold: + continue + payload = point.payload or {} + search_results.append( + { + "id": str(point.id), + "score": float(point.score), + "user_id": payload.get("user_id"), + "group_id": payload.get("group_id"), + "scenario": payload.get("scenario"), + "memcell_count": payload.get("memcell_count"), + "item_type": payload.get("item_type", ""), + "embed_text": payload.get("embed_text", ""), + } + ) + + logger.debug( + "UserProfile Qdrant search: found %d results", len(search_results) + ) + return search_results + + except Exception as e: + logger.error("UserProfile Qdrant search failed: %s", e) + raise + + # ========================================================== deletion + + async def delete_by_user_group(self, user_id: str, group_id: str) -> int: + """ + Delete all profile items for a (user_id, group_id) pair. + + Returns the actual point count via ``client.count(exact=True)`` + (Qdrant's filter-based delete doesn't return a count of its own). + """ + try: + filter_ = qmodels.Filter( + must=[ + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id), + ), + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchValue(value=group_id), + ), + ] + ) + client = self.collection.client() + name = self.collection.name + + count_result = await asyncio.to_thread( + partial( + client.count, + collection_name=name, + count_filter=filter_, + exact=True, + ) + ) + count = count_result.count + + if count > 0: + await asyncio.to_thread( + partial( + client.delete, + collection_name=name, + points_selector=qmodels.FilterSelector(filter=filter_), + wait=True, + ) + ) + + logger.info( + "Deleted profile items: user_id=%s, group_id=%s, count=%d", + user_id, + group_id, + count, + ) + return count + + except Exception as e: + logger.error( + "Failed to delete profile items: user_id=%s, group_id=%s, error=%s", + user_id, + group_id, + e, + ) + # Re-raise so callers can distinguish "nothing to delete" from + # an operational failure (consistent with base_repository fix). + raise From 7745c51816440d49b4b55885c6dc78c4f48da37c Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 18:58:28 +0000 Subject: [PATCH 14/20] fix(qdrant): address CodeRabbit pass-2 findings (1 major + 6 minor) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit pass 2 found 7 follow-up findings after pass 1. All addressed: **Major** - Timezone-naive datetimes in time-range filters silently used the local timezone for .timestamp() conversion, producing wrong epoch values. Added module-level helpers in base_repository: to_epoch_ms(dt) and to_epoch_s(dt) which coerce tz-naive datetimes to UTC. All five repositories now use these helpers consistently: - agent_case (epoch seconds via to_epoch_s) - atomic_fact / episodic_memory / foresight (epoch ms via to_epoch_ms) - user_profile (no time fields, untouched) **Minor** - foresight create_and_save_foresight_mem: start_time/end_time fall back to None instead of 0 when missing — 0 would silently match epoch-1970 records. Documented inline. - base_repository.count(): wrapped in try/except + structured log to match the rest of the async methods. - atomic_fact create_and_save: vector validation now explicit None/empty check (`if vector is None or len(vector) == 0`) instead of `if not vector`, so a legitimate all-zero embedding is no longer falsy-rejected. - user_profile_qdrant_repository.vector_search: group_id and scenario filters now mirror user_id's MAGIC_ALL guard (skip filter on MAGIC_ALL sentinel) instead of treating MAGIC_ALL as a literal value to match. Note: the foresight repository's two-stage score-gating pattern was flagged as 'redundant filtering'. It is intentional — server-side uses the more permissive bound (radius widening) and the client-side post- filter enforces the caller's hard cut. The behaviour is documented in the inline comment block; the CodeRabbit finding is a false positive. --- .../src/core/oxm/qdrant/base_repository.py | 31 ++++++++++++++++++- .../agent_case_qdrant_repository.py | 8 +++-- .../atomic_fact_qdrant_repository.py | 16 +++++----- .../episodic_memory_qdrant_repository.py | 12 +++---- .../repository/foresight_qdrant_repository.py | 19 ++++++------ .../user_profile_qdrant_repository.py | 4 +-- 6 files changed, 62 insertions(+), 28 deletions(-) diff --git a/methods/evermemos/src/core/oxm/qdrant/base_repository.py b/methods/evermemos/src/core/oxm/qdrant/base_repository.py index e91c4ebc..526d93e5 100644 --- a/methods/evermemos/src/core/oxm/qdrant/base_repository.py +++ b/methods/evermemos/src/core/oxm/qdrant/base_repository.py @@ -15,6 +15,7 @@ import asyncio from abc import ABC +from datetime import datetime, timezone from typing import Any, Generic, List, Optional, Type, TypeVar from qdrant_client.http import models as qmodels @@ -27,6 +28,26 @@ T = TypeVar("T", bound=QdrantCollectionBase) +def to_epoch_ms(dt: datetime) -> int: + """ + Convert a ``datetime`` to epoch milliseconds. + + Naive datetimes (``tzinfo is None``) are interpreted as UTC. Callers that + operate in a local timezone should attach an explicit tzinfo before + handing the datetime to repository methods to avoid silent drift. + """ + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + return int(dt.timestamp() * 1000) + + +def to_epoch_s(dt: datetime) -> int: + """Same as :func:`to_epoch_ms` but in seconds (used by ``agent_case``).""" + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + return int(dt.timestamp()) + + class BaseQdrantRepository(ABC, Generic[T]): """ Base class for all Qdrant repositories. @@ -261,4 +282,12 @@ async def search( async def count(self, exact: bool = True) -> int: """Number of points in the underlying collection.""" - return await asyncio.to_thread(self.collection.count, exact) + try: + result = await asyncio.to_thread(self.collection.count, exact) + except Exception as e: + logger.error("Qdrant count failed [%s]: %s", self.model_name, e) + raise + logger.debug( + "Qdrant count successful [%s]: %d points", self.model_name, result + ) + return result diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_case_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_case_qdrant_repository.py index 754d0622..58075277 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_case_qdrant_repository.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_case_qdrant_repository.py @@ -17,7 +17,7 @@ from core.di.decorators import repository from core.observation.logger import get_logger from core.oxm.constants import MAGIC_ALL -from core.oxm.qdrant.base_repository import BaseQdrantRepository +from core.oxm.qdrant.base_repository import BaseQdrantRepository, to_epoch_s from infra_layer.adapters.out.search.qdrant.memory.agent_case_collection import ( AgentCaseCollection, ) @@ -83,11 +83,13 @@ async def vector_search( ) # AgentCase timestamps are epoch SECONDS (Milvus parity). + # to_epoch_s coerces tz-naive datetimes to UTC to avoid silent + # locale drift in the filter bounds. time_range: Dict[str, int] = {} if start_time: - time_range["gte"] = int(start_time.timestamp()) + time_range["gte"] = to_epoch_s(start_time) if end_time: - time_range["lte"] = int(end_time.timestamp()) + time_range["lte"] = to_epoch_s(end_time) if time_range: conditions.append( qmodels.FieldCondition( diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/atomic_fact_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/atomic_fact_qdrant_repository.py index 9a644780..c6bb0aa6 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/repository/atomic_fact_qdrant_repository.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/atomic_fact_qdrant_repository.py @@ -25,7 +25,7 @@ from core.di.decorators import repository from core.observation.logger import get_logger from core.oxm.constants import MAGIC_ALL -from core.oxm.qdrant.base_repository import BaseQdrantRepository +from core.oxm.qdrant.base_repository import BaseQdrantRepository, to_epoch_ms from infra_layer.adapters.out.search.qdrant.memory.atomic_fact_collection import ( AtomicFactCollection, ) @@ -64,7 +64,9 @@ async def create_and_save_atomic_fact( Summary dict (id / user_id / atomic_fact / parent_* / timestamp / search_content) — same shape as the Milvus repository. """ - if not vector: + # Explicit None / empty check so a legitimate all-zero embedding + # is not falsy-rejected (any() on a list of 0.0s is False). + if vector is None or len(vector) == 0: raise ValueError( f"Vector is required for AtomicFact {id} but was not populated" ) @@ -80,7 +82,7 @@ async def create_and_save_atomic_fact( "participants": participants or [], "sender_ids": sender_ids or [], "type": event_type, - "timestamp": int(timestamp.timestamp() * 1000), + "timestamp": to_epoch_ms(timestamp), "atomic_fact": atomic_fact, "search_content": json.dumps(search_content, ensure_ascii=False), "parent_type": parent_type, @@ -173,9 +175,9 @@ async def vector_search( time_range: Dict[str, int] = {} if start_time: - time_range["gte"] = int(start_time.timestamp() * 1000) + time_range["gte"] = to_epoch_ms(start_time) if end_time: - time_range["lte"] = int(end_time.timestamp() * 1000) + time_range["lte"] = to_epoch_ms(end_time) if time_range: conditions.append( qmodels.FieldCondition( @@ -356,9 +358,9 @@ async def delete_by_filters( time_range: Dict[str, int] = {} if start_time: - time_range["gte"] = int(start_time.timestamp() * 1000) + time_range["gte"] = to_epoch_ms(start_time) if end_time: - time_range["lte"] = int(end_time.timestamp() * 1000) + time_range["lte"] = to_epoch_ms(end_time) if time_range: conditions.append( qmodels.FieldCondition( diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py index 3f550ddc..a97d8575 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py @@ -21,7 +21,7 @@ from core.di.decorators import repository from core.observation.logger import get_logger from core.oxm.constants import MAGIC_ALL -from core.oxm.qdrant.base_repository import BaseQdrantRepository +from core.oxm.qdrant.base_repository import BaseQdrantRepository, to_epoch_ms from infra_layer.adapters.out.search.qdrant.memory.episodic_memory_collection import ( EpisodicMemoryCollection, ) @@ -72,7 +72,7 @@ async def create_and_save_episodic_memory( "participants": participants or [], "sender_ids": sender_ids or [], "type": event_type or "", - "timestamp": int(timestamp.timestamp() * 1000), + "timestamp": to_epoch_ms(timestamp), "episode": episode, "search_content": json.dumps(search_content, ensure_ascii=False), "parent_type": parent_type or "", @@ -163,9 +163,9 @@ async def vector_search( time_range: Dict[str, int] = {} if start_time: - time_range["gte"] = int(start_time.timestamp() * 1000) + time_range["gte"] = to_epoch_ms(start_time) if end_time: - time_range["lte"] = int(end_time.timestamp() * 1000) + time_range["lte"] = to_epoch_ms(end_time) if time_range: conditions.append( qmodels.FieldCondition( @@ -267,9 +267,9 @@ async def delete_by_filters( time_range: Dict[str, int] = {} if start_time: - time_range["gte"] = int(start_time.timestamp() * 1000) + time_range["gte"] = to_epoch_ms(start_time) if end_time: - time_range["lte"] = int(end_time.timestamp() * 1000) + time_range["lte"] = to_epoch_ms(end_time) if time_range: conditions.append( qmodels.FieldCondition( diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/foresight_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/foresight_qdrant_repository.py index 732ad071..8a467020 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/repository/foresight_qdrant_repository.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/foresight_qdrant_repository.py @@ -24,7 +24,7 @@ from core.di.decorators import repository from core.observation.logger import get_logger from core.oxm.constants import MAGIC_ALL -from core.oxm.qdrant.base_repository import BaseQdrantRepository +from core.oxm.qdrant.base_repository import BaseQdrantRepository, to_epoch_ms from infra_layer.adapters.out.search.qdrant.memory.foresight_collection import ( ForesightCollection, ) @@ -88,10 +88,11 @@ async def create_and_save_foresight_mem( "participants": participants or [], "sender_ids": sender_ids or [], "type": event_type, - "start_time": ( - int(start_time.timestamp() * 1000) if start_time else 0 - ), - "end_time": int(end_time.timestamp() * 1000) if end_time else 0, + # ``None`` (not 0) for missing bounds so range queries treat + # "no start/end" distinct from "epoch 1970". Downstream + # filters skip the field when payload value is None. + "start_time": to_epoch_ms(start_time) if start_time else None, + "end_time": to_epoch_ms(end_time) if end_time else None, "duration_days": duration_days or 0, "content": content, "evidence": evidence or "", @@ -209,14 +210,14 @@ async def vector_search( conditions.append( qmodels.FieldCondition( key="start_time", - range=qmodels.Range(gte=int(start_time.timestamp() * 1000)), + range=qmodels.Range(gte=to_epoch_ms(start_time)), ) ) if end_time: conditions.append( qmodels.FieldCondition( key="end_time", - range=qmodels.Range(lte=int(end_time.timestamp() * 1000)), + range=qmodels.Range(lte=to_epoch_ms(end_time)), ) ) @@ -307,14 +308,14 @@ async def delete_by_filters( conditions.append( qmodels.FieldCondition( key="start_time", - range=qmodels.Range(gte=int(start_time.timestamp() * 1000)), + range=qmodels.Range(gte=to_epoch_ms(start_time)), ) ) if end_time: conditions.append( qmodels.FieldCondition( key="end_time", - range=qmodels.Range(lte=int(end_time.timestamp() * 1000)), + range=qmodels.Range(lte=to_epoch_ms(end_time)), ) ) diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/user_profile_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/user_profile_qdrant_repository.py index 620d13a5..34b3f3ca 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/repository/user_profile_qdrant_repository.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/user_profile_qdrant_repository.py @@ -57,7 +57,7 @@ async def vector_search( ) ) - if group_id: + if group_id and group_id != MAGIC_ALL: conditions.append( qmodels.FieldCondition( key="group_id", @@ -65,7 +65,7 @@ async def vector_search( ) ) - if scenario: + if scenario and scenario != MAGIC_ALL: conditions.append( qmodels.FieldCondition( key="scenario", From 67bf8cbdd6f67e4daa8f76710c7dbf0e467680c2 Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 19:17:27 +0000 Subject: [PATCH 15/20] =?UTF-8?q?feat(qdrant):=20Phase=203=20=E2=80=94=20s?= =?UTF-8?q?tandalone=20re-embed=20CLI=20(Mongo=20=E2=86=92=20OpenRouter=20?= =?UTF-8?q?=E2=86=92=20Qdrant)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds devops_scripts/migrate_milvus_to_qdrant.py, the workhorse for the Phase 3 cutover. Standalone: no EverOS DI container required. Reads OpenRouter, Mongo, Qdrant config from env (auto-loads .env via python-dotenv when present). Migrates one (mongo-db, mongo-collection) pair to one Qdrant collection per invocation; shell-loop over the six EverOS collection types × N tenants for the full sweep. Defaults match the documented xinfty stack: - VECTORIZE_MODEL=qwen/qwen3-embedding-8b - VECTORIZE_DIMENSIONS=4096 - OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 - MONGO_URI=mongodb://localhost:27017 - QDRANT_HOST=localhost, QDRANT_PORT=6333 CLI args expose the per-collection variation: --text-field primary text used for embedding --extra-text-fields comma-separated secondary text fields --timestamp-field + --timestamp-unit ms|s --payload-fields comma-separated mongo fields projected to qdrant payload --batch-size embedding batch size (default 32) --limit cap for smoke tests --force re-embed and overwrite existing points --dry-run count without calling OpenRouter or Qdrant.upsert --log-level DEBUG/INFO/WARNING/ERROR Idempotent by default: client.retrieve filters out point ids that already exist in the target Qdrant collection (skip path); --force overwrites them. Embedded behaviour: - extract_text concatenates text_field + extra_text_fields with newlines - build_payload projects payload_fields + normalizes timestamp via datetime.timestamp() (epoch ms or s depending on --timestamp-unit) - search_content is JSON-serialized from the text pieces, mirroring the converter's payload shape used by EverOS' search service. ensure_qdrant_collection creates the target collection with the same HNSW/Cosine config the EverOS adapter writes (m=16, ef_construct=200, distance=Cosine) so the schema matches what the live service expects. --- .../migrate_milvus_to_qdrant.py | 448 ++++++++++++++++++ 1 file changed, 448 insertions(+) create mode 100644 methods/evermemos/src/devops_scripts/migrate_milvus_to_qdrant.py diff --git a/methods/evermemos/src/devops_scripts/migrate_milvus_to_qdrant.py b/methods/evermemos/src/devops_scripts/migrate_milvus_to_qdrant.py new file mode 100644 index 00000000..05ba5e2f --- /dev/null +++ b/methods/evermemos/src/devops_scripts/migrate_milvus_to_qdrant.py @@ -0,0 +1,448 @@ +#!/usr/bin/env python3 +""" +Re-embed MongoDB source-of-truth into Qdrant via OpenRouter qwen3-embedding-8b. + +Standalone CLI — does not rely on EverOS' DI container. Reads connection +config from environment / ``.env`` (loaded via python-dotenv if present): + + OPENROUTER_API_KEY # required + OPENROUTER_BASE_URL # default: https://openrouter.ai/api/v1 + VECTORIZE_MODEL # default: qwen/qwen3-embedding-8b + VECTORIZE_DIMENSIONS # default: 4096 + MONGO_URI # default: mongodb://localhost:27017 + QDRANT_HOST # default: localhost + QDRANT_PORT # default: 6333 + +Workhorse migrates a single (mongo-db, mongo-collection) -> qdrant-collection +pair. Use a shell loop over the 6 EverOS collection-types × N tenants to do +the full sweep (see ``re_embed_all.sh`` next to this file). + +Idempotent: existing point ids in the target Qdrant collection are skipped +unless ``--force`` is passed. + +Security note: at ``--log-level DEBUG`` PyMongo emits connection events +that include the raw Mongo URI. If your ``MONGO_URI`` carries credentials +(``mongodb://user:pass@host``) avoid DEBUG in shared terminals or pipe the +output through a redactor. + +Usage:: + + python migrate_milvus_to_qdrant.py \\ + --mongo-db _episodic_memsys \\ + --mongo-coll v1_episodic_memories \\ + --qdrant-coll _v1_episodic_memory \\ + --text-field episode \\ + --extra-text-fields subject,summary \\ + --timestamp-field timestamp --timestamp-unit ms \\ + --payload-fields user_id,group_id,session_id,participants,sender_ids,type,parent_type,parent_id \\ + --batch-size 32 +""" + +from __future__ import annotations + +import argparse +import json +import logging +import os +import sys +import time +from dataclasses import dataclass +from typing import Any, Dict, Iterable, List, Optional, Tuple + +try: + from dotenv import load_dotenv + + _dotenv_path = os.environ.get("EVEROS_ENV_FILE") + if _dotenv_path: + load_dotenv(_dotenv_path) + else: + load_dotenv() # picks up ./.env if present +except ImportError: + pass + +from openai import OpenAI +from pymongo import MongoClient +from qdrant_client import QdrantClient +from qdrant_client.http import models as qmodels + +logger = logging.getLogger("migrate") + + +# ============================================================ Configuration + + +@dataclass(frozen=True) +class Config: + openrouter_api_key: str + openrouter_base_url: str + vectorize_model: str + vectorize_dimensions: int + mongo_uri: str + qdrant_host: str + qdrant_port: int + + @classmethod + def from_env(cls) -> "Config": + api_key = os.environ.get("OPENROUTER_API_KEY", "").strip() + if not api_key: + raise SystemExit("OPENROUTER_API_KEY is required (env or .env)") + + return cls( + openrouter_api_key=api_key, + openrouter_base_url=os.environ.get( + "OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1" + ).rstrip("/"), + vectorize_model=os.environ.get( + "VECTORIZE_MODEL", "qwen/qwen3-embedding-8b" + ), + vectorize_dimensions=int(os.environ.get("VECTORIZE_DIMENSIONS", "4096")), + mongo_uri=os.environ.get("MONGO_URI", "mongodb://localhost:27017"), + qdrant_host=os.environ.get("QDRANT_HOST", "localhost"), + qdrant_port=int(os.environ.get("QDRANT_PORT", "6333")), + ) + + +# ============================================================== Embedding + + +def embed_batch( + client: OpenAI, + model: str, + dimensions: int, + texts: List[str], +) -> List[List[float]]: + """Call OpenRouter ``/embeddings`` for a batch of texts.""" + response = client.embeddings.create( + model=model, + input=texts, + dimensions=dimensions, + ) + # OpenAI client returns objects sorted by ``index`` in the response. + return [item.embedding for item in response.data] + + +# ============================================================ Doc handling + + +def extract_text( + doc: Dict[str, Any], + primary_field: str, + extra_fields: Tuple[str, ...], +) -> str: + """ + Concatenate primary + extra text fields into a single embedding input. + + Each non-empty value is joined with newlines. The primary field is + always first. + """ + parts: List[str] = [] + primary = doc.get(primary_field) + if primary: + parts.append(str(primary)) + for field in extra_fields: + value = doc.get(field) + if value: + parts.append(str(value)) + return "\n".join(parts).strip() + + +def build_payload( + doc: Dict[str, Any], + payload_fields: Tuple[str, ...], + timestamp_field: Optional[str], + timestamp_unit: str, + primary_text: str, + extra_text_fields: Tuple[str, ...], +) -> Dict[str, Any]: + """Project mongo doc fields into a Qdrant payload dict.""" + payload: Dict[str, Any] = {} + for field in payload_fields: + if field in doc: + payload[field] = doc[field] + + # Timestamp normalization to epoch (the unit is collection-dependent). + if timestamp_field and timestamp_field in doc: + ts_value = doc[timestamp_field] + if hasattr(ts_value, "timestamp"): # datetime + secs = ts_value.timestamp() + if timestamp_unit == "ms": + payload[timestamp_field] = int(secs * 1000) + else: + payload[timestamp_field] = int(secs) + elif isinstance(ts_value, (int, float)): + payload[timestamp_field] = int(ts_value) + + # Persist the text used for the embedding for downstream search-result + # surfaces (matches the Milvus converter's ``search_content`` payload). + text_pieces = [] + for field in (primary_text, *extra_text_fields): + value = doc.get(field) + if value: + text_pieces.append(value) + if text_pieces: + payload["search_content"] = json.dumps(text_pieces, ensure_ascii=False) + + return payload + + +# ============================================================== Qdrant ops + + +def ensure_qdrant_collection( + client: QdrantClient, name: str, vector_size: int +) -> None: + """Create the target Qdrant collection if it does not exist yet.""" + if client.collection_exists(name): + logger.info("Qdrant collection '%s' already exists — keeping schema", name) + return + + logger.info( + "Creating Qdrant collection '%s' (size=%d, distance=Cosine, HNSW m=16 ef=200)", + name, vector_size, + ) + client.create_collection( + collection_name=name, + vectors_config=qmodels.VectorParams( + size=vector_size, + distance=qmodels.Distance.COSINE, + hnsw_config=qmodels.HnswConfigDiff(m=16, ef_construct=200), + ), + ) + + +def filter_existing_ids( + client: QdrantClient, collection_name: str, ids: List[str] +) -> List[str]: + """Return the subset of ``ids`` not already present in the collection.""" + if not ids: + return [] + existing = client.retrieve( + collection_name=collection_name, + ids=ids, + with_payload=False, + with_vectors=False, + ) + present = {str(p.id) for p in existing} + return [i for i in ids if i not in present] + + +# ============================================================ Orchestration + + +def migrate( + config: Config, + mongo_db: str, + mongo_coll: str, + qdrant_coll: str, + text_field: str, + extra_text_fields: Tuple[str, ...], + timestamp_field: Optional[str], + timestamp_unit: str, + payload_fields: Tuple[str, ...], + batch_size: int, + limit: Optional[int], + force: bool, + dry_run: bool, +) -> None: + """Run the migration for one (mongo-db, mongo-collection) pair.""" + logger.info( + "Migrate %s.%s -> Qdrant '%s' (model=%s, dim=%d, batch=%d, force=%s, dry_run=%s)", + mongo_db, mongo_coll, qdrant_coll, config.vectorize_model, + config.vectorize_dimensions, batch_size, force, dry_run, + ) + + mongo = MongoClient(config.mongo_uri) + qdrant = QdrantClient(host=config.qdrant_host, port=config.qdrant_port) + openai = OpenAI( + api_key=config.openrouter_api_key, + base_url=config.openrouter_base_url, + ) + + coll = mongo[mongo_db][mongo_coll] + total_docs = coll.estimated_document_count() + logger.info("Source has ~%d documents", total_docs) + + if not dry_run: + ensure_qdrant_collection(qdrant, qdrant_coll, config.vectorize_dimensions) + + cursor = coll.find() + if limit: + cursor = cursor.limit(limit) + + processed = 0 + skipped_existing = 0 + skipped_no_text = 0 + upserted = 0 + started = time.time() + + batch_docs: List[Dict[str, Any]] = [] + + def flush(batch: List[Dict[str, Any]]) -> Tuple[int, int, int]: + """Embed + upsert one batch. Returns (upserted, skipped_existing, skipped_no_text).""" + ids = [str(d["_id"]) for d in batch] + if force: + new_ids = ids + else: + new_ids = filter_existing_ids(qdrant, qdrant_coll, ids) if not dry_run else ids + new_set = set(new_ids) + new_docs = [d for d in batch if str(d["_id"]) in new_set] + + texts: List[str] = [] + kept_docs: List[Dict[str, Any]] = [] + for d in new_docs: + text = extract_text(d, text_field, extra_text_fields) + if not text: + continue + texts.append(text) + kept_docs.append(d) + + if dry_run: + return len(kept_docs), len(batch) - len(new_docs), len(new_docs) - len(kept_docs) + + if not texts: + return 0, len(batch) - len(new_docs), len(new_docs) - len(kept_docs) + + vectors = embed_batch( + openai, config.vectorize_model, config.vectorize_dimensions, texts + ) + + points: List[qmodels.PointStruct] = [] + for d, vec in zip(kept_docs, vectors): + payload = build_payload( + d, payload_fields, timestamp_field, timestamp_unit, + text_field, extra_text_fields, + ) + points.append( + qmodels.PointStruct(id=str(d["_id"]), vector=vec, payload=payload) + ) + + qdrant.upsert(collection_name=qdrant_coll, points=points, wait=True) + return len(points), len(batch) - len(new_docs), len(new_docs) - len(kept_docs) + + for doc in cursor: + batch_docs.append(doc) + if len(batch_docs) >= batch_size: + u, s_e, s_n = flush(batch_docs) + upserted += u + skipped_existing += s_e + skipped_no_text += s_n + processed += len(batch_docs) + logger.info( + "Progress: processed=%d upserted=%d skipped_existing=%d skipped_no_text=%d elapsed=%.1fs", + processed, upserted, skipped_existing, skipped_no_text, + time.time() - started, + ) + batch_docs = [] + + if batch_docs: + u, s_e, s_n = flush(batch_docs) + upserted += u + skipped_existing += s_e + skipped_no_text += s_n + processed += len(batch_docs) + + logger.info( + "DONE: processed=%d upserted=%d skipped_existing=%d skipped_no_text=%d elapsed=%.1fs", + processed, upserted, skipped_existing, skipped_no_text, + time.time() - started, + ) + + +# =================================================================== CLI + + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser( + description="Re-embed MongoDB docs into Qdrant via OpenRouter." + ) + p.add_argument("--mongo-db", required=True, help="Source Mongo database name") + p.add_argument("--mongo-coll", required=True, help="Source Mongo collection name") + p.add_argument("--qdrant-coll", required=True, help="Target Qdrant collection name") + p.add_argument( + "--text-field", + required=True, + help="Primary text field used for embedding (e.g., episode, task_intent)", + ) + p.add_argument( + "--extra-text-fields", + default="", + help="Comma-separated secondary text fields appended after the primary", + ) + p.add_argument( + "--timestamp-field", + default="", + help="Mongo field carrying the timestamp (omit to skip)", + ) + p.add_argument( + "--timestamp-unit", + choices=["ms", "s"], + default="ms", + help="Target unit for the timestamp payload value", + ) + p.add_argument( + "--payload-fields", + required=True, + help="Comma-separated list of fields to project from Mongo into the Qdrant payload", + ) + p.add_argument("--batch-size", type=int, default=32) + p.add_argument( + "--limit", + type=int, + default=None, + help="Max number of docs to process (for smoke tests)", + ) + p.add_argument( + "--force", + action="store_true", + help="Re-embed and overwrite even if the Qdrant point already exists", + ) + p.add_argument( + "--dry-run", + action="store_true", + help="Count what would happen without calling OpenRouter or Qdrant.upsert", + ) + p.add_argument( + "--log-level", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR"], + ) + return p.parse_args() + + +def main() -> int: + args = parse_args() + logging.basicConfig( + level=args.log_level, + format="%(asctime)s %(levelname)-7s %(name)s | %(message)s", + ) + + config = Config.from_env() + + extra_text_fields = tuple( + f.strip() for f in args.extra_text_fields.split(",") if f.strip() + ) + payload_fields = tuple( + f.strip() for f in args.payload_fields.split(",") if f.strip() + ) + timestamp_field = args.timestamp_field.strip() or None + + migrate( + config=config, + mongo_db=args.mongo_db, + mongo_coll=args.mongo_coll, + qdrant_coll=args.qdrant_coll, + text_field=args.text_field, + extra_text_fields=extra_text_fields, + timestamp_field=timestamp_field, + timestamp_unit=args.timestamp_unit, + payload_fields=payload_fields, + batch_size=args.batch_size, + limit=args.limit, + force=args.force, + dry_run=args.dry_run, + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From c17ba6006c76be6d9caba931a170e9839aeb04a3 Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 19:24:05 +0000 Subject: [PATCH 16/20] fix(qdrant): map Mongo ObjectId -> uuid5 for Qdrant point ids Qdrant only accepts unsigned-int or RFC-4122-UUID point ids; the Mongo ObjectId hex (e.g. 69ed6acfaf31e5cd7977bc56) is neither and the live pilot hit a 400 Bad Request from the Qdrant retrieve endpoint. Fix: - base_repository: add mongo_id_to_qdrant_id() helper that does str(uuid.uuid5(NAMESPACE, str(mongo_id))). Namespace is a fixed UUID embedded in code (must never change without a full re-migration). - migrate_milvus_to_qdrant.py: use the helper for the Qdrant point id; keep the original Mongo id in the payload as 'mongo_id' for reverse lookup. The Phase-2 converters (search/qdrant/converter/*) still use str(source_doc.id) and would fail the same way the moment they go live. That fix is the next commit. --- .../src/core/oxm/qdrant/base_repository.py | 22 ++++++++ .../migrate_milvus_to_qdrant.py | 56 +++++++++++++++---- 2 files changed, 66 insertions(+), 12 deletions(-) diff --git a/methods/evermemos/src/core/oxm/qdrant/base_repository.py b/methods/evermemos/src/core/oxm/qdrant/base_repository.py index 526d93e5..fa04aca4 100644 --- a/methods/evermemos/src/core/oxm/qdrant/base_repository.py +++ b/methods/evermemos/src/core/oxm/qdrant/base_repository.py @@ -14,6 +14,7 @@ """ import asyncio +import uuid from abc import ABC from datetime import datetime, timezone from typing import Any, Generic, List, Optional, Type, TypeVar @@ -28,6 +29,27 @@ T = TypeVar("T", bound=QdrantCollectionBase) +# Stable namespace for Mongo ObjectId -> Qdrant UUID translation. +# Qdrant point ids accept only unsigned integers or RFC-4122 UUIDs; +# Mongo ObjectIds (24 hex chars) are neither. Mapping is via ``uuid5`` +# (SHA-1, deterministic) so the same Mongo id always maps to the same +# Qdrant point id — required for idempotent re-embed + lookup by Mongo +# back-reference. NEVER change this namespace without a full data-side +# remigration. +_MONGO_TO_QDRANT_NS = uuid.UUID("ec57c0e3-5e90-4d4a-9c1c-a8b9c7d8e7d6") + + +def mongo_id_to_qdrant_id(mongo_id: Any) -> str: + """ + Deterministic UUID5 mapping of any Mongo doc id (ObjectId/str/int) to a + Qdrant-compatible point id string. + + The mapping is one-way (idempotent), so callers that need the Mongo + original keep it in the payload (e.g. as ``parent_id``). + """ + return str(uuid.uuid5(_MONGO_TO_QDRANT_NS, str(mongo_id))) + + def to_epoch_ms(dt: datetime) -> int: """ Convert a ``datetime`` to epoch milliseconds. diff --git a/methods/evermemos/src/devops_scripts/migrate_milvus_to_qdrant.py b/methods/evermemos/src/devops_scripts/migrate_milvus_to_qdrant.py index 05ba5e2f..f853063b 100644 --- a/methods/evermemos/src/devops_scripts/migrate_milvus_to_qdrant.py +++ b/methods/evermemos/src/devops_scripts/migrate_milvus_to_qdrant.py @@ -65,6 +65,10 @@ from qdrant_client import QdrantClient from qdrant_client.http import models as qmodels +# Stable, namespace-shared with the repository layer so script-side and +# service-side ids agree on the same Mongo->Qdrant translation. +from core.oxm.qdrant.base_repository import mongo_id_to_qdrant_id + logger = logging.getLogger("migrate") @@ -279,45 +283,73 @@ def migrate( def flush(batch: List[Dict[str, Any]]) -> Tuple[int, int, int]: """Embed + upsert one batch. Returns (upserted, skipped_existing, skipped_no_text).""" - ids = [str(d["_id"]) for d in batch] + # Mongo ids are mapped to Qdrant point ids via uuid5; idempotent so + # the existence-check below works across reruns. + qdrant_ids = [mongo_id_to_qdrant_id(d["_id"]) for d in batch] if force: - new_ids = ids + new_ids = qdrant_ids else: - new_ids = filter_existing_ids(qdrant, qdrant_coll, ids) if not dry_run else ids + new_ids = ( + filter_existing_ids(qdrant, qdrant_coll, qdrant_ids) + if not dry_run + else qdrant_ids + ) new_set = set(new_ids) - new_docs = [d for d in batch if str(d["_id"]) in new_set] + new_docs = [ + d for d, qid in zip(batch, qdrant_ids) if qid in new_set + ] + # Carry the resolved qdrant id alongside the doc so we don't recompute + # the uuid5 twice; attach as a temporary key on a shallow copy. + new_pairs: List[Tuple[Dict[str, Any], str]] = [ + (d, qid) for d, qid in zip(batch, qdrant_ids) if qid in new_set + ] texts: List[str] = [] - kept_docs: List[Dict[str, Any]] = [] - for d in new_docs: + kept_pairs: List[Tuple[Dict[str, Any], str]] = [] + for d, qid in new_pairs: text = extract_text(d, text_field, extra_text_fields) if not text: continue texts.append(text) - kept_docs.append(d) + kept_pairs.append((d, qid)) if dry_run: - return len(kept_docs), len(batch) - len(new_docs), len(new_docs) - len(kept_docs) + return ( + len(kept_pairs), + len(batch) - len(new_docs), + len(new_docs) - len(kept_pairs), + ) if not texts: - return 0, len(batch) - len(new_docs), len(new_docs) - len(kept_docs) + return ( + 0, + len(batch) - len(new_docs), + len(new_docs) - len(kept_pairs), + ) vectors = embed_batch( openai, config.vectorize_model, config.vectorize_dimensions, texts ) points: List[qmodels.PointStruct] = [] - for d, vec in zip(kept_docs, vectors): + for (d, qid), vec in zip(kept_pairs, vectors): payload = build_payload( d, payload_fields, timestamp_field, timestamp_unit, text_field, extra_text_fields, ) + # Keep the original Mongo id in the payload so reverse-lookup + # from Qdrant -> Mongo is trivial. + payload["mongo_id"] = str(d["_id"]) points.append( - qmodels.PointStruct(id=str(d["_id"]), vector=vec, payload=payload) + qmodels.PointStruct(id=qid, vector=vec, payload=payload) ) qdrant.upsert(collection_name=qdrant_coll, points=points, wait=True) - return len(points), len(batch) - len(new_docs), len(new_docs) - len(kept_docs) + return ( + len(points), + len(batch) - len(new_docs), + len(new_docs) - len(kept_pairs), + ) for doc in cursor: batch_docs.append(doc) From 67ef8535983c202597c2503feafeee9e2f1e2c3a Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 19:24:50 +0000 Subject: [PATCH 17/20] fix(qdrant): re-embed script sys.path injection for standalone CLI ModuleNotFoundError: 'core' when invoked as python src/devops_scripts/migrate_milvus_to_qdrant.py ... Fix: prepend the script's parent-of-parent (the EverOS src/ tree) to sys.path before the core.oxm.qdrant.base_repository import. Lets the script run without PYTHONPATH or pip install. --- .../src/devops_scripts/migrate_milvus_to_qdrant.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/methods/evermemos/src/devops_scripts/migrate_milvus_to_qdrant.py b/methods/evermemos/src/devops_scripts/migrate_milvus_to_qdrant.py index f853063b..04e6a362 100644 --- a/methods/evermemos/src/devops_scripts/migrate_milvus_to_qdrant.py +++ b/methods/evermemos/src/devops_scripts/migrate_milvus_to_qdrant.py @@ -49,6 +49,14 @@ from dataclasses import dataclass from typing import Any, Dict, Iterable, List, Optional, Tuple +# Standalone CLI: add ``src/`` to sys.path so EverOS-internal modules +# (``core.oxm.qdrant.base_repository``) resolve when this script is +# invoked directly with ``python src/devops_scripts/migrate_milvus_to_qdrant.py`` +# (no install / no PYTHONPATH). +_SRC_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if _SRC_DIR not in sys.path: + sys.path.insert(0, _SRC_DIR) + try: from dotenv import load_dotenv From e93a6a424edd2abfa20eb51f633862beefbbda11 Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 20:05:38 +0000 Subject: [PATCH 18/20] fix(qdrant): map Mongo ids via uuid5 in all 5 production converters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Phase 2 converters used ``id=str(source_doc.id)`` directly when building ``PointStruct`` payloads. Qdrant only accepts unsigned integers or RFC-4122 UUIDs as point ids — a 24-hex-char Mongo ``ObjectId`` is neither, so the Live-indexing path produced a 400 Bad Request on upsert. The standalone re-embed CLI (commit c17ba60) already routed Mongo ids through ``mongo_id_to_qdrant_id`` (uuid5 over a stable namespace), so the two paths now agree: - standalone migrate: Mongo doc -> uuid5 point id - live converter: Mongo doc -> uuid5 point id (this commit) Both also persist the raw Mongo id in the payload as ``mongo_id`` for round-trip lookup, idempotent re-embed, and debugging. ``user_profile_qdrant_converter`` is intentionally left untouched: it emits multiple points per source doc (one per explicit_info/implicit_trait entry), assigns fresh ObjectIds, and has its own Phase 3.1 path that needs a separate point-id scheme. --- .../search/qdrant/converter/agent_case_qdrant_converter.py | 5 ++++- .../qdrant/converter/agent_skill_qdrant_converter.py | 5 ++++- .../qdrant/converter/atomic_fact_qdrant_converter.py | 5 ++++- .../qdrant/converter/episodic_memory_qdrant_converter.py | 7 ++++++- .../search/qdrant/converter/foresight_qdrant_converter.py | 5 ++++- 5 files changed, 22 insertions(+), 5 deletions(-) diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py index 2c4c1d35..d3dbd1b3 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py @@ -10,6 +10,7 @@ from core.observation.logger import get_logger from core.oxm.qdrant.base_converter import BaseQdrantConverter +from core.oxm.qdrant.base_repository import mongo_id_to_qdrant_id from infra_layer.adapters.out.persistence.document.memory.agent_case import ( AgentCaseRecord, ) @@ -59,10 +60,12 @@ def from_mongo(cls, source_doc: AgentCaseRecord) -> qmodels.PointStruct: "task_intent": task_intent[:5000], "parent_type": source_doc.parent_type or "", "parent_id": source_doc.parent_id or "", + # Mongo back-reference (see episodic_memory converter). + "mongo_id": str(source_doc.id), } return qmodels.PointStruct( - id=str(source_doc.id), + id=mongo_id_to_qdrant_id(source_doc.id), vector=vector, payload=payload, ) diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py index 200230cc..020ecb15 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py @@ -10,6 +10,7 @@ from core.observation.logger import get_logger from core.oxm.qdrant.base_converter import BaseQdrantConverter +from core.oxm.qdrant.base_repository import mongo_id_to_qdrant_id from infra_layer.adapters.out.persistence.document.memory.agent_skill import ( AgentSkillRecord, ) @@ -69,10 +70,12 @@ def from_mongo(cls, source_doc: AgentSkillRecord) -> qmodels.PointStruct: if source_doc.confidence is not None else 0.0 ), + # Mongo back-reference (see episodic_memory converter). + "mongo_id": str(source_doc.id), } return qmodels.PointStruct( - id=str(source_doc.id), + id=mongo_id_to_qdrant_id(source_doc.id), vector=vector, payload=payload, ) diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py index 9ac67529..b334a3bf 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py @@ -13,6 +13,7 @@ from api_specs.memory_types import RawDataType from core.observation.logger import get_logger from core.oxm.qdrant.base_converter import BaseQdrantConverter +from core.oxm.qdrant.base_repository import mongo_id_to_qdrant_id from infra_layer.adapters.out.persistence.document.memory.atomic_fact_record import ( AtomicFactRecord as MongoAtomicFactRecord, ) @@ -70,6 +71,8 @@ def from_mongo(cls, source_doc: MongoAtomicFactRecord) -> qmodels.PointStruct: # Persist the canonical text so search results can return the # underlying atomic_fact without a Mongo round-trip. "search_content": cls._build_search_content(source_doc), + # Mongo back-reference (see episodic_memory converter). + "mongo_id": str(source_doc.id), } vector = getattr(source_doc, "vector", None) or None @@ -80,7 +83,7 @@ def from_mongo(cls, source_doc: MongoAtomicFactRecord) -> qmodels.PointStruct: ) return qmodels.PointStruct( - id=str(source_doc.id), + id=mongo_id_to_qdrant_id(source_doc.id), vector=vector, payload=payload, ) diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py index 4c4f1813..74c30745 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py @@ -17,6 +17,7 @@ from core.observation.logger import get_logger from core.oxm.qdrant.base_converter import BaseQdrantConverter +from core.oxm.qdrant.base_repository import mongo_id_to_qdrant_id from infra_layer.adapters.out.persistence.document.memory.episodic_memory import ( EpisodicMemory as MongoEpisodicMemory, ) @@ -81,6 +82,10 @@ def from_mongo(cls, source_doc: MongoEpisodicMemory) -> qmodels.PointStruct: "parent_id": ( str(source_doc.parent_id) if source_doc.parent_id else "" ), + # Mongo back-reference: Qdrant ids are derived via uuid5, so + # we keep the raw Mongo id in the payload for round-trip + # lookup, idempotent re-embed, and debugging. + "mongo_id": str(source_doc.id), } vector = ( @@ -95,7 +100,7 @@ def from_mongo(cls, source_doc: MongoEpisodicMemory) -> qmodels.PointStruct: ) return qmodels.PointStruct( - id=str(source_doc.id), + id=mongo_id_to_qdrant_id(source_doc.id), vector=vector, payload=payload, ) diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py index 10196b08..40baf973 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py @@ -17,6 +17,7 @@ from core.observation.logger import get_logger from core.oxm.qdrant.base_converter import BaseQdrantConverter +from core.oxm.qdrant.base_repository import mongo_id_to_qdrant_id from infra_layer.adapters.out.persistence.document.memory.foresight_record import ( ForesightRecord as MongoForesightRecord, ) @@ -118,6 +119,8 @@ def from_mongo(cls, source_doc: MongoForesightRecord) -> qmodels.PointStruct: "parent_id": ( str(source_doc.parent_id) if source_doc.parent_id else "" ), + # Mongo back-reference (see episodic_memory converter). + "mongo_id": str(source_doc.id), } vector = source_doc.vector if source_doc.vector else None @@ -128,7 +131,7 @@ def from_mongo(cls, source_doc: MongoForesightRecord) -> qmodels.PointStruct: ) return qmodels.PointStruct( - id=str(source_doc.id), + id=mongo_id_to_qdrant_id(source_doc.id), vector=vector, payload=payload, ) From 4fe11a26efd8922ec5a431b7b84fa660e10576b7 Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 20:05:49 +0000 Subject: [PATCH 19/20] feat(qdrant): sweep wrapper for batch re-embed across tenants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds ``re_embed_sweep.py`` next to the standalone ``migrate_milvus_to_qdrant`` workhorse. The wrapper iterates every active (non-hyphen) Mongo database times every supported collection type and invokes ``migrate(...)`` for each non-empty pair. Five collection types are covered: - episodic_memory (v1_episodic_memories -> _v1_episodic_memory) - atomic_fact (v1_atomic_fact_records -> _v1_atomic_fact_record) - foresight (v1_foresight_records -> _v1_foresight_record) - agent_case (v1_agent_cases -> _v1_agent_case) - agent_skill (v1_agent_skills -> _v1_agent_skill) ``v1_user_profiles`` is deliberately excluded — it needs per-doc splitting (one source doc -> many Qdrant points), handled by a separate Phase 3.1 script. CLI shape mirrors the workhorse: ``--tenant``, ``--collection``, ``--batch-size``, ``--limit-per-pair``, ``--force``, ``--dry-run``, ``--log-level``. The wrapper imports ``migrate`` directly (no subprocess fan-out) so config is read once and progress logs interleave naturally. --- .../src/devops_scripts/re_embed_sweep.py | 326 ++++++++++++++++++ 1 file changed, 326 insertions(+) create mode 100644 methods/evermemos/src/devops_scripts/re_embed_sweep.py diff --git a/methods/evermemos/src/devops_scripts/re_embed_sweep.py b/methods/evermemos/src/devops_scripts/re_embed_sweep.py new file mode 100644 index 00000000..d0462e36 --- /dev/null +++ b/methods/evermemos/src/devops_scripts/re_embed_sweep.py @@ -0,0 +1,326 @@ +#!/usr/bin/env python3 +""" +Sweep wrapper for re-embedding all active Mongo databases into Qdrant. + +Iterates the underscore-named Mongo DBs (hyphen DBs are the abandoned +2026-04-25 generation — skipped) × 5 collection types and calls the +workhorse ``migrate`` for each non-empty pair. + +``v1_user_profiles`` is intentionally excluded: it needs per-doc splitting +(one Mongo doc → many Qdrant points), which the workhorse does not do. +That migration runs separately (Phase 3.1). + +Usage:: + + # Full sweep of every active DB × every supported collection + python re_embed_sweep.py --batch-size 64 + + # Smoke: one tenant, one collection, dry-run + python re_embed_sweep.py --tenant --collection episodic_memory --dry-run + + # Per-pair cap (smoke before full run) + python re_embed_sweep.py --limit-per-pair 5 --dry-run +""" + +from __future__ import annotations + +import argparse +import logging +import os +import sys +import time +from dataclasses import dataclass, field +from typing import List, Optional, Tuple + +# Add src/ to sys.path so devops_scripts.migrate_milvus_to_qdrant resolves +# when invoked directly. +_SRC_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if _SRC_DIR not in sys.path: + sys.path.insert(0, _SRC_DIR) + +try: + from dotenv import load_dotenv + + _p = os.environ.get("EVEROS_ENV_FILE") + if _p: + load_dotenv(_p) + else: + load_dotenv() +except ImportError: + pass + +from pymongo import MongoClient + +from devops_scripts.migrate_milvus_to_qdrant import Config, migrate + +logger = logging.getLogger("sweep") + + +# =============================================================== Spec map + + +@dataclass(frozen=True) +class CollectionSpec: + """Per-collection-type config for the sweep.""" + + mongo_collection: str + qdrant_base: str + text_field: str + extra_text_fields: Tuple[str, ...] = () + timestamp_field: Optional[str] = "timestamp" + timestamp_unit: str = "ms" + payload_fields: Tuple[str, ...] = field(default_factory=tuple) + + +SPECS = { + "episodic_memory": CollectionSpec( + mongo_collection="v1_episodic_memories", + qdrant_base="v1_episodic_memory", + text_field="episode", + extra_text_fields=("subject", "summary"), + timestamp_field="timestamp", + timestamp_unit="ms", + payload_fields=( + "user_id", "group_id", "session_id", + "participants", "sender_ids", "type", + "parent_type", "parent_id", + ), + ), + "atomic_fact": CollectionSpec( + mongo_collection="v1_atomic_fact_records", + qdrant_base="v1_atomic_fact_record", + text_field="atomic_fact", + timestamp_field="timestamp", + timestamp_unit="ms", + payload_fields=( + "user_id", "group_id", "session_id", + "participants", "sender_ids", "type", + "parent_type", "parent_id", + ), + ), + "foresight": CollectionSpec( + mongo_collection="v1_foresight_records", + qdrant_base="v1_foresight_record", + text_field="content", + extra_text_fields=("evidence",), + # Foresight stores start_time / end_time (epoch ms). For the sweep + # we use start_time as the time-axis filter (most common range + # query semantics). end_time is preserved in the payload but not + # indexed by the migrate workhorse (Qdrant collection is built + # without payload indexes here — the EverOS service creates them + # via ensure_payload_indexes when it first attaches). + timestamp_field="start_time", + timestamp_unit="ms", + payload_fields=( + "user_id", "group_id", "session_id", + "participants", "sender_ids", "type", + "start_time", "end_time", "duration_days", + "parent_type", "parent_id", + ), + ), + "agent_case": CollectionSpec( + mongo_collection="v1_agent_cases", + qdrant_base="v1_agent_case", + text_field="task_intent", + timestamp_field="timestamp", + timestamp_unit="s", # epoch SECONDS (not ms!) — Milvus parity + payload_fields=( + "user_id", "group_id", "session_id", + "parent_type", "parent_id", + ), + ), + "agent_skill": CollectionSpec( + mongo_collection="v1_agent_skills", + qdrant_base="v1_agent_skill", + text_field="name", + extra_text_fields=("description",), + timestamp_field=None, # no time-axis for skills + timestamp_unit="ms", + payload_fields=( + "user_id", "group_id", "cluster_id", + "name", "description", + "maturity_score", "confidence", + ), + ), + # user_profile intentionally not included — needs doc splitting (Phase 3.1) +} + + +# ============================================================ Mongo helpers + + +def derive_tenant_prefix(mongo_db: str) -> str: + """ + Strip the trailing ``_memsys`` (or ``memsys``) suffix from a Mongo DB + name to get the Qdrant collection prefix. + + Examples:: + + _memsys -> + __memsys -> _ + """ + stripped = mongo_db + for suffix in ("_memsys", "memsys"): + if stripped.endswith(suffix): + stripped = stripped[: -len(suffix)].rstrip("_") + break + return stripped + + +def list_active_dbs(mongo_uri: str) -> List[str]: + """All non-system DBs whose name has no hyphen (hyphen = abandoned generation).""" + client = MongoClient(mongo_uri) + try: + result = client.admin.command({"listDatabases": 1}) + return sorted( + d["name"] + for d in result["databases"] + if d["name"] not in ("admin", "config", "local") + and "-" not in d["name"] + ) + finally: + client.close() + + +def estimated_count(mongo_uri: str, db_name: str, coll_name: str) -> int: + """Cheap ``estimatedDocumentCount``; returns 0 if collection is absent.""" + client = MongoClient(mongo_uri) + try: + return client[db_name][coll_name].estimated_document_count() + finally: + client.close() + + +# =============================================================== Sweep loop + + +def sweep( + config: Config, + spec_keys: List[str], + tenant_filter: Optional[str], + batch_size: int, + limit_per_pair: Optional[int], + force: bool, + dry_run: bool, +) -> None: + """Iterate active DBs × selected specs and run ``migrate`` per non-empty pair.""" + active_dbs = list_active_dbs(config.mongo_uri) + if tenant_filter: + active_dbs = [d for d in active_dbs if d.startswith(tenant_filter)] + + target_specs = {k: SPECS[k] for k in spec_keys} + + logger.info( + "Sweep plan: %d active DBs × %d collection types -> up to %d pairs" + " (dry_run=%s, batch=%d, limit_per_pair=%s, force=%s)", + len(active_dbs), len(target_specs), + len(active_dbs) * len(target_specs), + dry_run, batch_size, limit_per_pair, force, + ) + + overall_start = time.time() + pairs_run = 0 + pairs_skipped_empty = 0 + pairs_failed = 0 + + for db in active_dbs: + prefix = derive_tenant_prefix(db) + for spec_name, spec in target_specs.items(): + count = estimated_count( + config.mongo_uri, db, spec.mongo_collection + ) + if count == 0: + pairs_skipped_empty += 1 + continue + + qdrant_coll = f"{prefix}_{spec.qdrant_base}" + logger.info( + "==> [%s] %s.%s -> %s (count=%d)", + spec_name, db, spec.mongo_collection, qdrant_coll, count, + ) + try: + migrate( + config=config, + mongo_db=db, + mongo_coll=spec.mongo_collection, + qdrant_coll=qdrant_coll, + text_field=spec.text_field, + extra_text_fields=spec.extra_text_fields, + timestamp_field=spec.timestamp_field, + timestamp_unit=spec.timestamp_unit, + payload_fields=spec.payload_fields, + batch_size=batch_size, + limit=limit_per_pair, + force=force, + dry_run=dry_run, + ) + pairs_run += 1 + except Exception as e: + logger.error( + "Pair %s.%s -> %s FAILED: %s", + db, spec.mongo_collection, qdrant_coll, e, + ) + pairs_failed += 1 + + logger.info( + "SWEEP DONE: pairs_run=%d pairs_skipped_empty=%d pairs_failed=%d elapsed=%.1fs", + pairs_run, pairs_skipped_empty, pairs_failed, + time.time() - overall_start, + ) + + +# =================================================================== CLI + + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser(description="Sweep wrapper for re-embed migration") + p.add_argument( + "--tenant", + default=None, + help="Only DBs whose name starts with this prefix", + ) + p.add_argument( + "--collection", + default=None, + choices=sorted(SPECS), + help="Only this collection type (default: all 5)", + ) + p.add_argument("--batch-size", type=int, default=64) + p.add_argument( + "--limit-per-pair", + type=int, + default=None, + help="Cap docs processed per (db, coll) pair (smoke testing)", + ) + p.add_argument("--force", action="store_true") + p.add_argument("--dry-run", action="store_true") + p.add_argument( + "--log-level", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR"], + ) + return p.parse_args() + + +def main() -> int: + args = parse_args() + logging.basicConfig( + level=args.log_level, + format="%(asctime)s %(levelname)-7s %(name)s | %(message)s", + ) + config = Config.from_env() + spec_keys = [args.collection] if args.collection else list(SPECS) + sweep( + config=config, + spec_keys=spec_keys, + tenant_filter=args.tenant, + batch_size=args.batch_size, + limit_per_pair=args.limit_per_pair, + force=args.force, + dry_run=args.dry_run, + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From ed457a6e17e8143a17a1fd70f1d67528a731bd4a Mon Sep 17 00:00:00 2001 From: Ptah-CT <221234802+Ptah-CT@users.noreply.github.com> Date: Wed, 13 May 2026 20:25:22 +0000 Subject: [PATCH 20/20] fix(qdrant): address all 17 CodeRabbit PR-1 findings Critical - ``migrate_milvus_to_qdrant`` now wraps the migration body in try/finally and closes ``mongo``, ``qdrant``, and ``openai`` clients explicitly. Long sweeps previously leaked connections on every per-pair invocation. Major - ``QdrantCollectionBase.exists`` only catches the qdrant-client transport exceptions (``ResponseHandlingException``, ``UnexpectedResponse``); other failures propagate so infrastructure issues stay visible instead of being silently treated as "collection missing". - ``EpisodicMemoryQdrantRepository.create_and_save_episodic_memory`` rejects missing/empty vectors up front with a ``ValueError`` (mirrors the converter contract) instead of bubbling up a confusing 400 from Qdrant. - ``EpisodicMemoryQdrantRepository.vector_search`` no longer treats the default ``user_id=None`` as a filter on the empty string. The condition now requires the caller to have provided an explicit (non-None, non-MAGIC_ALL) value, restoring full-recall behaviour for the unscoped search path. - ``AtomicFactQdrantRepository.vector_search`` now returns ``atomic_fact`` in the result dict (parity with the batch path and with the persisted payload from ``create_and_save_atomic_fact``). Minor - ``AgentSkillQdrantRepository.vector_search`` two-stage threshold now uses ``min(radius, score_threshold)`` server-side, matching the agent_case repository's "more permissive lower bound" semantics. Nitpick - ``logger.error`` in every except block touched by Phase 2/3 swapped for ``logger.exception`` (10 sites across converters, repositories, and the sweep wrapper) so the stack trace is always logged. - ``build_payload`` now logs a warning when a timestamp field has an unexpected type instead of silently dropping the value. --- .../core/oxm/qdrant/qdrant_collection_base.py | 17 +- .../migrate_milvus_to_qdrant.py | 235 ++++++++++-------- .../src/devops_scripts/re_embed_sweep.py | 2 +- .../converter/agent_case_qdrant_converter.py | 2 +- .../converter/agent_skill_qdrant_converter.py | 2 +- .../converter/atomic_fact_qdrant_converter.py | 6 +- .../episodic_memory_qdrant_converter.py | 4 +- .../converter/foresight_qdrant_converter.py | 5 +- .../user_profile_qdrant_converter.py | 6 +- .../agent_case_qdrant_repository.py | 2 +- .../agent_skill_qdrant_repository.py | 18 +- .../atomic_fact_qdrant_repository.py | 4 + .../episodic_memory_qdrant_repository.py | 19 +- 13 files changed, 191 insertions(+), 131 deletions(-) diff --git a/methods/evermemos/src/core/oxm/qdrant/qdrant_collection_base.py b/methods/evermemos/src/core/oxm/qdrant/qdrant_collection_base.py index bd5039f6..c0caafe3 100644 --- a/methods/evermemos/src/core/oxm/qdrant/qdrant_collection_base.py +++ b/methods/evermemos/src/core/oxm/qdrant/qdrant_collection_base.py @@ -31,6 +31,10 @@ from qdrant_client import QdrantClient from qdrant_client.http import models as qmodels +from qdrant_client.http.exceptions import ( + ResponseHandlingException, + UnexpectedResponse, +) logger = logging.getLogger(__name__) @@ -179,10 +183,19 @@ def client(self) -> QdrantClient: # ------------------------------------------------------------------ schema def exists(self) -> bool: - """Return True if the underlying Qdrant collection already exists.""" + """ + Return True if the underlying Qdrant collection already exists. + + Only the known qdrant-client transport-level exceptions are caught + (``ResponseHandlingException`` for connection/timeout errors, + ``UnexpectedResponse`` for 4xx/5xx HTTP responses). Anything else — + including configuration errors, auth failures surfaced as different + exception types, or programming bugs — is allowed to propagate so + infrastructure problems stay visible. + """ try: return self.client().collection_exists(self.name) - except Exception as e: # noqa: BLE001 + except (ResponseHandlingException, UnexpectedResponse) as e: logger.warning( "collection_exists('%s') failed: %s — treating as non-existent", self.name, diff --git a/methods/evermemos/src/devops_scripts/migrate_milvus_to_qdrant.py b/methods/evermemos/src/devops_scripts/migrate_milvus_to_qdrant.py index 04e6a362..27c11f38 100644 --- a/methods/evermemos/src/devops_scripts/migrate_milvus_to_qdrant.py +++ b/methods/evermemos/src/devops_scripts/migrate_milvus_to_qdrant.py @@ -183,6 +183,13 @@ def build_payload( payload[timestamp_field] = int(secs) elif isinstance(ts_value, (int, float)): payload[timestamp_field] = int(ts_value) + else: + # Silent drop would corrupt time-range filters downstream. Surface + # the bad doc so callers can decide whether to clean source data. + logger.warning( + "Skipping timestamp field '%s' with unexpected type %s for doc %s", + timestamp_field, type(ts_value).__name__, doc.get("_id"), + ) # Persist the text used for the embedding for downstream search-result # surfaces (matches the Milvus converter's ``search_content`` payload). @@ -264,128 +271,150 @@ def migrate( ) mongo = MongoClient(config.mongo_uri) - qdrant = QdrantClient(host=config.qdrant_host, port=config.qdrant_port) - openai = OpenAI( - api_key=config.openrouter_api_key, - base_url=config.openrouter_base_url, - ) + qdrant: Optional[QdrantClient] = None + try: + qdrant = QdrantClient(host=config.qdrant_host, port=config.qdrant_port) + # OpenAI client owns an httpx pool that's closed on GC; explicit close + # is best-effort below via the openai_client.close() call in finally. + openai = OpenAI( + api_key=config.openrouter_api_key, + base_url=config.openrouter_base_url, + ) - coll = mongo[mongo_db][mongo_coll] - total_docs = coll.estimated_document_count() - logger.info("Source has ~%d documents", total_docs) + coll = mongo[mongo_db][mongo_coll] + total_docs = coll.estimated_document_count() + logger.info("Source has ~%d documents", total_docs) - if not dry_run: - ensure_qdrant_collection(qdrant, qdrant_coll, config.vectorize_dimensions) + if not dry_run: + ensure_qdrant_collection(qdrant, qdrant_coll, config.vectorize_dimensions) - cursor = coll.find() - if limit: - cursor = cursor.limit(limit) + cursor = coll.find() + if limit: + cursor = cursor.limit(limit) - processed = 0 - skipped_existing = 0 - skipped_no_text = 0 - upserted = 0 - started = time.time() + processed = 0 + skipped_existing = 0 + skipped_no_text = 0 + upserted = 0 + started = time.time() - batch_docs: List[Dict[str, Any]] = [] + batch_docs: List[Dict[str, Any]] = [] - def flush(batch: List[Dict[str, Any]]) -> Tuple[int, int, int]: - """Embed + upsert one batch. Returns (upserted, skipped_existing, skipped_no_text).""" - # Mongo ids are mapped to Qdrant point ids via uuid5; idempotent so - # the existence-check below works across reruns. - qdrant_ids = [mongo_id_to_qdrant_id(d["_id"]) for d in batch] - if force: - new_ids = qdrant_ids - else: - new_ids = ( - filter_existing_ids(qdrant, qdrant_coll, qdrant_ids) - if not dry_run - else qdrant_ids - ) - new_set = set(new_ids) - new_docs = [ - d for d, qid in zip(batch, qdrant_ids) if qid in new_set - ] - # Carry the resolved qdrant id alongside the doc so we don't recompute - # the uuid5 twice; attach as a temporary key on a shallow copy. - new_pairs: List[Tuple[Dict[str, Any], str]] = [ - (d, qid) for d, qid in zip(batch, qdrant_ids) if qid in new_set - ] - - texts: List[str] = [] - kept_pairs: List[Tuple[Dict[str, Any], str]] = [] - for d, qid in new_pairs: - text = extract_text(d, text_field, extra_text_fields) - if not text: - continue - texts.append(text) - kept_pairs.append((d, qid)) - - if dry_run: - return ( - len(kept_pairs), - len(batch) - len(new_docs), - len(new_docs) - len(kept_pairs), + def flush(batch: List[Dict[str, Any]]) -> Tuple[int, int, int]: + """Embed + upsert one batch. Returns (upserted, skipped_existing, skipped_no_text).""" + # Mongo ids are mapped to Qdrant point ids via uuid5; idempotent so + # the existence-check below works across reruns. + qdrant_ids = [mongo_id_to_qdrant_id(d["_id"]) for d in batch] + if force: + new_ids = qdrant_ids + else: + new_ids = ( + filter_existing_ids(qdrant, qdrant_coll, qdrant_ids) + if not dry_run + else qdrant_ids + ) + new_set = set(new_ids) + new_docs = [ + d for d, qid in zip(batch, qdrant_ids) if qid in new_set + ] + # Carry the resolved qdrant id alongside the doc so we don't recompute + # the uuid5 twice; attach as a temporary key on a shallow copy. + new_pairs: List[Tuple[Dict[str, Any], str]] = [ + (d, qid) for d, qid in zip(batch, qdrant_ids) if qid in new_set + ] + + texts: List[str] = [] + kept_pairs: List[Tuple[Dict[str, Any], str]] = [] + for d, qid in new_pairs: + text = extract_text(d, text_field, extra_text_fields) + if not text: + continue + texts.append(text) + kept_pairs.append((d, qid)) + + if dry_run: + return ( + len(kept_pairs), + len(batch) - len(new_docs), + len(new_docs) - len(kept_pairs), + ) + + if not texts: + return ( + 0, + len(batch) - len(new_docs), + len(new_docs) - len(kept_pairs), + ) + + vectors = embed_batch( + openai, config.vectorize_model, config.vectorize_dimensions, texts ) - if not texts: + points: List[qmodels.PointStruct] = [] + for (d, qid), vec in zip(kept_pairs, vectors): + payload = build_payload( + d, payload_fields, timestamp_field, timestamp_unit, + text_field, extra_text_fields, + ) + # Keep the original Mongo id in the payload so reverse-lookup + # from Qdrant -> Mongo is trivial. + payload["mongo_id"] = str(d["_id"]) + points.append( + qmodels.PointStruct(id=qid, vector=vec, payload=payload) + ) + + qdrant.upsert(collection_name=qdrant_coll, points=points, wait=True) return ( - 0, + len(points), len(batch) - len(new_docs), len(new_docs) - len(kept_pairs), ) - vectors = embed_batch( - openai, config.vectorize_model, config.vectorize_dimensions, texts - ) - - points: List[qmodels.PointStruct] = [] - for (d, qid), vec in zip(kept_pairs, vectors): - payload = build_payload( - d, payload_fields, timestamp_field, timestamp_unit, - text_field, extra_text_fields, - ) - # Keep the original Mongo id in the payload so reverse-lookup - # from Qdrant -> Mongo is trivial. - payload["mongo_id"] = str(d["_id"]) - points.append( - qmodels.PointStruct(id=qid, vector=vec, payload=payload) - ) - - qdrant.upsert(collection_name=qdrant_coll, points=points, wait=True) - return ( - len(points), - len(batch) - len(new_docs), - len(new_docs) - len(kept_pairs), - ) - - for doc in cursor: - batch_docs.append(doc) - if len(batch_docs) >= batch_size: + for doc in cursor: + batch_docs.append(doc) + if len(batch_docs) >= batch_size: + u, s_e, s_n = flush(batch_docs) + upserted += u + skipped_existing += s_e + skipped_no_text += s_n + processed += len(batch_docs) + logger.info( + "Progress: processed=%d upserted=%d skipped_existing=%d skipped_no_text=%d elapsed=%.1fs", + processed, upserted, skipped_existing, skipped_no_text, + time.time() - started, + ) + batch_docs = [] + + if batch_docs: u, s_e, s_n = flush(batch_docs) upserted += u skipped_existing += s_e skipped_no_text += s_n processed += len(batch_docs) - logger.info( - "Progress: processed=%d upserted=%d skipped_existing=%d skipped_no_text=%d elapsed=%.1fs", - processed, upserted, skipped_existing, skipped_no_text, - time.time() - started, - ) - batch_docs = [] - - if batch_docs: - u, s_e, s_n = flush(batch_docs) - upserted += u - skipped_existing += s_e - skipped_no_text += s_n - processed += len(batch_docs) - logger.info( - "DONE: processed=%d upserted=%d skipped_existing=%d skipped_no_text=%d elapsed=%.1fs", - processed, upserted, skipped_existing, skipped_no_text, - time.time() - started, - ) + logger.info( + "DONE: processed=%d upserted=%d skipped_existing=%d skipped_no_text=%d elapsed=%.1fs", + processed, upserted, skipped_existing, skipped_no_text, + time.time() - started, + ) + finally: + # Close connections in reverse construction order. Best-effort: a + # failing close should not mask a real exception from the body. + try: + close_fn = getattr(openai, "close", None) + if callable(close_fn): + close_fn() + except Exception: # noqa: BLE001 + logger.debug("openai.close() raised; ignoring during cleanup", exc_info=True) + try: + if qdrant is not None: + qdrant.close() + except Exception: # noqa: BLE001 + logger.debug("qdrant.close() raised; ignoring during cleanup", exc_info=True) + try: + mongo.close() + except Exception: # noqa: BLE001 + logger.debug("mongo.close() raised; ignoring during cleanup", exc_info=True) # =================================================================== CLI diff --git a/methods/evermemos/src/devops_scripts/re_embed_sweep.py b/methods/evermemos/src/devops_scripts/re_embed_sweep.py index d0462e36..96748383 100644 --- a/methods/evermemos/src/devops_scripts/re_embed_sweep.py +++ b/methods/evermemos/src/devops_scripts/re_embed_sweep.py @@ -256,7 +256,7 @@ def sweep( ) pairs_run += 1 except Exception as e: - logger.error( + logger.exception( "Pair %s.%s -> %s FAILED: %s", db, spec.mongo_collection, qdrant_coll, e, ) diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py index d3dbd1b3..a5877455 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py @@ -71,7 +71,7 @@ def from_mongo(cls, source_doc: AgentCaseRecord) -> qmodels.PointStruct: ) except Exception as e: - logger.error( + logger.exception( "Failed to convert AgentCaseRecord to Qdrant point: %s", e ) raise diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py index 020ecb15..8967fae2 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py @@ -81,7 +81,7 @@ def from_mongo(cls, source_doc: AgentSkillRecord) -> qmodels.PointStruct: ) except Exception as e: - logger.error( + logger.exception( "Failed to convert AgentSkillRecord to Qdrant point: %s", e ) raise diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py index b334a3bf..75027fa1 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py @@ -89,10 +89,8 @@ def from_mongo(cls, source_doc: MongoAtomicFactRecord) -> qmodels.PointStruct: ) except Exception as e: - logger.error( - "Failed to convert MongoDB AtomicFact to Qdrant point: %s", - e, - exc_info=True, + logger.exception( + "Failed to convert MongoDB AtomicFact to Qdrant point: %s", e ) raise diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py index 74c30745..0585b89b 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py @@ -106,7 +106,9 @@ def from_mongo(cls, source_doc: MongoEpisodicMemory) -> qmodels.PointStruct: ) except Exception as e: - logger.error("Failed to convert MongoDB document to Qdrant point: %s", e) + logger.exception( + "Failed to convert MongoDB document to Qdrant point: %s", e + ) raise @staticmethod diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py index 40baf973..4654bebb 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py @@ -137,9 +137,8 @@ def from_mongo(cls, source_doc: MongoForesightRecord) -> qmodels.PointStruct: ) except Exception as e: - logger.error( - "Failed to convert MongoDB foresight document to Qdrant point: %s", - e, + logger.exception( + "Failed to convert MongoDB foresight document to Qdrant point: %s", e ) raise diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/user_profile_qdrant_converter.py b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/user_profile_qdrant_converter.py index 59bd5fd1..11627ab3 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/user_profile_qdrant_converter.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/qdrant/converter/user_profile_qdrant_converter.py @@ -162,9 +162,7 @@ def _make_item(embed_text: str, item_type: str) -> Dict[str, Any]: return items except Exception as e: - logger.error( - "Failed to convert MongoDB UserProfile to Qdrant items: %s", - e, - exc_info=True, + logger.exception( + "Failed to convert MongoDB UserProfile to Qdrant items: %s", e ) raise diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_case_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_case_qdrant_repository.py index 58075277..27f83a6d 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_case_qdrant_repository.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_case_qdrant_repository.py @@ -150,5 +150,5 @@ async def vector_search( return search_results except Exception as e: - logger.error("AgentCase Qdrant search failed: %s", e) + logger.exception("AgentCase Qdrant search failed: %s", e) raise diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_skill_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_skill_qdrant_repository.py index e73ef349..4700652b 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_skill_qdrant_repository.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/agent_skill_qdrant_repository.py @@ -122,17 +122,19 @@ async def vector_search( query_filter = qmodels.Filter(must=conditions) if conditions else None ef_value = max(128, limit * 2) - # Two-stage score gating (parity with Milvus repository): + # Two-stage score gating (parity with the agent_case repository): # - ``effective_threshold`` is the wider net we pass to Qdrant - # server-side via ``score_threshold`` (uses ``radius`` if it - # was explicitly set, otherwise ``score_threshold``). + # server-side via ``score_threshold``. With both ``radius`` and + # ``score_threshold`` set, we use the *more permissive* (lower) + # of the two so recall is not silently narrowed by either. # - The client-side ``point.score < score_threshold`` post-filter # enforces the hard caller-facing minimum. This lets a caller # widen the recall via ``radius`` while still requiring a # stricter cut-off in the returned list. - effective_threshold = ( - radius if (radius is not None and radius > -1.0) else score_threshold - ) + if radius is not None and radius > -1.0: + effective_threshold = min(radius, score_threshold) + else: + effective_threshold = score_threshold scored_points = await self.search( query_vector=query_vector, @@ -168,7 +170,7 @@ async def vector_search( return search_results except Exception as e: - logger.error("AgentSkill Qdrant search failed: %s", e) + logger.exception("AgentSkill Qdrant search failed: %s", e) raise # -------------------------------------------------------- domain deletes @@ -229,7 +231,7 @@ async def delete_by_cluster_id(self, cluster_id: str) -> int: return count except Exception as e: - logger.error( + logger.exception( "Failed to delete Qdrant points for cluster=%s: %s", cluster_id, e ) # Re-raise so callers can distinguish a genuine zero from an diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/atomic_fact_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/atomic_fact_qdrant_repository.py index c6bb0aa6..5fb8cce0 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/repository/atomic_fact_qdrant_repository.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/atomic_fact_qdrant_repository.py @@ -219,6 +219,10 @@ async def vector_search( "group_id": payload.get("group_id"), "session_id": payload.get("session_id"), "participants": payload.get("participants"), + # Returned alongside ``search_content`` (and matching + # the batch path) so callers don't need a Mongo + # round-trip to recover the canonical atomic fact text. + "atomic_fact": payload.get("atomic_fact"), # Convert epoch milliseconds back to UTC datetime so # callers get a consistent type across all repository # entry points (parity with create_and_save_atomic_fact diff --git a/methods/evermemos/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py b/methods/evermemos/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py index a97d8575..a93005ba 100644 --- a/methods/evermemos/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py +++ b/methods/evermemos/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py @@ -63,7 +63,17 @@ async def create_and_save_episodic_memory( A small summary dict (id, user_id, timestamp, episode, search_content) — same shape as the Milvus repository to keep callers untouched at cutover. + + Raises: + ValueError: when ``vector`` is None or empty. A missing embedding + would only surface as a confusing 400 from Qdrant at upsert + time, far from the bad caller. Fail fast instead. """ + if vector is None or len(vector) == 0: + raise ValueError( + f"Vector is required for EpisodicMemory {id} but was not populated" + ) + try: payload = { "user_id": user_id or "", @@ -121,11 +131,16 @@ async def vector_search( try: conditions: List[qmodels.FieldCondition] = [] - if user_id != MAGIC_ALL: + # Guard both ``None`` (no scope passed) and the explicit "all" + # sentinel. Without the ``is not None`` guard a default ``user_id=None`` + # slipped past the sentinel check and the ``user_id or ""`` fallback + # filtered the search to documents with an empty ``user_id`` payload, + # i.e. zero hits in practice. + if user_id is not None and user_id != MAGIC_ALL: conditions.append( qmodels.FieldCondition( key="user_id", - match=qmodels.MatchValue(value=user_id or ""), + match=qmodels.MatchValue(value=user_id), ) )