diff --git a/README.md b/README.md index f3aa81ba..41d9a9fe 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,51 @@ +> [!NOTE] +> ## Fork — Qdrant Migration +> +> This fork of [`EverMind-AI/EverOS`](https://github.com/EverMind-AI/EverOS) focuses on +> **migrating the vector backend from Milvus to Qdrant**. +> +> ### Why +> +> Milvus standalone with embedded etcd showed repeated startup races +> (`panic: etcdserver: leader changed`) leading to crash-loops and RAM +> exhaustion in our deployment. Rather than stacking more etcd workarounds, we +> migrate to Qdrant — whose architecture has no separate coordinator service. +> +> ### Status +> +> - `main` — tracks upstream `EverMind-AI/EverOS`. +> - `qdrant/rebase-evercore` — full Milvus parity. Phase 1: ``QdrantCollectionBase`` + tenant-aware naming. Phase 2: 6 collections + 6 converters (EpisodicMemory, AtomicFact, Foresight, AgentCase, AgentSkill, UserProfile). Phase 2.5: 6 ``@repository``-decorated adapters with two-stage score gating and tz-aware epoch helpers. Phase 3: standalone re-embed CLI (Mongo → OpenRouter → Qdrant) + sweep wrapper across tenants. +> +> ### Approach +> +> EverOS' `src/infra_layer/adapters/out/search/` already supports multiple +> backends (Milvus + Elasticsearch). We add a Qdrant adapter split across: +> +> - `src/core/oxm/qdrant/` — base classes (`QdrantCollectionBase`, +> `BaseQdrantConverter`, `BaseQdrantRepository`) and tenant-aware naming. +> - `src/infra_layer/adapters/out/search/qdrant/` — the concrete collections +> and converters per memory type (episodic, atomic_fact, foresight, +> agent_case, agent_skill, user_profile). +> - `src/infra_layer/adapters/out/search/repository/` — the +> `@repository`-decorated adapters that EverOS routes to. +> +> Routing is gated by `VECTOR_STORE_BACKEND=qdrant`. The Milvus adapter +> stays untouched until cutover. +> +> ### Concept Mapping +> +> | Milvus | Qdrant | +> | -------------------- | --------------------------------- | +> | Collection | Collection (1:1) | +> | FieldSchema (vector) | `VectorParams(size, distance)` | +> | FieldSchema (scalar) | Payload field (schema-flexible) | +> | HNSW + COSINE | `HnswConfig` + `Distance.Cosine` | +> | Partition | Payload field OR separate coll. | +> +> Reference: [Qdrant Migration Guide — From Milvus](https://qdrant.tech/documentation/migrate-to-qdrant/from-milvus/). + +--- +
![banner-gif](https://github.com/user-attachments/assets/0bf97efd-580f-4a53-a2a2-58d6daea7290) @@ -59,24 +107,24 @@ Use cases show what persistent memory makes possible in real products and workfl -![banner-gif](https://github.com/user-attachments/assets/650b901b-c9ba-4001-bac7-626b009df830) +[![banner-gif](https://github.com/user-attachments/assets/650b901b-c9ba-4001-bac7-626b009df830)](#rokid-ai-assistant-with-everos) #### Rokid AI Assistant with EverOS Connect to EverOS within Rokid Glasses enabling long-term memory for all of your smart activities. -Coming soon +[Live Demo](#rokid-ai-assistant-with-everos) -![banner-gif](https://github.com/user-attachments/assets/85b338b2-e48e-4a65-9f30-0bc6998df872) +[![banner-gif](https://github.com/user-attachments/assets/85b338b2-e48e-4a65-9f30-0bc6998df872)](#creative-assistant-with-memory) #### Creative Assistant with Memory Creative assistant with long-term memory, never forget your crativites anymore. -Coming soon +[Live Demo](#creative-assistant-with-memory) diff --git a/methods/EverCore/pyproject.toml b/methods/EverCore/pyproject.toml index 02a2912b..f99f53de 100644 --- a/methods/EverCore/pyproject.toml +++ b/methods/EverCore/pyproject.toml @@ -55,6 +55,7 @@ dependencies = [ "elastic-transport>=8.17.0,<9", # ES transport layer (pin major, transport API stability) # Milvus "pymilvus>=2.5.0,<2.6", # Pin minor: 2.6.x may change Collection/search API + "qdrant-client>=1.12,<2", # Qdrant Python client (Milvus->Qdrant migration, feature/qdrant-adapter) # Tokenization "jieba==0.42.1", # Graph Processing diff --git a/methods/EverCore/src/core/component/qdrant_client_factory.py b/methods/EverCore/src/core/component/qdrant_client_factory.py new file mode 100644 index 00000000..65e5a51a --- /dev/null +++ b/methods/EverCore/src/core/component/qdrant_client_factory.py @@ -0,0 +1,289 @@ +""" +Qdrant Client Factory + +Analoge Implementierung zu ``core.component.milvus_client_factory.MilvusClientFactory`` +fuer die Milvus->Qdrant-Migration. + +Provides Qdrant client connection functionality based on environment variables. +""" + +import os +import threading +from typing import Dict, Optional + +from qdrant_client import QdrantClient + +from core.di.decorators import component +from core.observation.logger import get_logger + +logger = get_logger(__name__) + + +def _truthy(value: Optional[str]) -> bool: + """Konsistentes Env-Boolean-Parsing analog zu anderen EverOS-Configs.""" + if value is None: + return False + return value.strip().lower() in {"1", "true", "yes", "on"} + + +def get_qdrant_config(prefix: str = "") -> dict: + """ + Get Qdrant configuration from environment variables. + + Args: + prefix: Environment variable prefix, e.g., prefix="A" reads + "A_QDRANT_HOST"; if empty reads "QDRANT_HOST" etc. + + Environment variables: + - ``{PREFIX_}QDRANT_HOST`` (default ``localhost``) + - ``{PREFIX_}QDRANT_PORT`` (default ``6333``, HTTP) + - ``{PREFIX_}QDRANT_GRPC_PORT`` (default ``6334``) + - ``{PREFIX_}QDRANT_API_KEY`` (optional) + - ``{PREFIX_}QDRANT_HTTPS`` (default ``false``) + - ``{PREFIX_}QDRANT_PREFER_GRPC`` (default ``false``) + - ``{PREFIX_}QDRANT_TIMEOUT`` (default ``30`` Sekunden) + + Returns: + dict mit Schluesseln ``host``, ``port``, ``grpc_port``, ``api_key``, + ``https``, ``prefer_grpc``, ``timeout``, ``url`` (assembled). + """ + + def _env(name: str, default: Optional[str] = None) -> str: + if prefix: + key = f"{prefix.upper()}_{name}" + else: + key = name + if default is None: + return os.getenv(key, "") + return os.getenv(key, default) + + def _parse_port(name: str, default: int) -> int: + """Parse a numeric port env var, falling back to ``default`` on invalid input.""" + raw = _env(name, str(default)) + try: + value = int(raw) + except (TypeError, ValueError): + logger.warning( + "Invalid %s value %r — falling back to default %d", name, raw, default + ) + return default + if not (1 <= value <= 65535): + logger.warning( + "%s value %d out of TCP range 1-65535 — falling back to default %d", + name, value, default, + ) + return default + return value + + host = _env("QDRANT_HOST", "localhost") + port = _parse_port("QDRANT_PORT", 6333) + grpc_port = _parse_port("QDRANT_GRPC_PORT", 6334) + # api_key / https sind explizit None wenn env nicht gesetzt — so kann + # qdrant-client die Defaults / URL-Scheme-Detection selbst uebernehmen. + api_key_raw = _env("QDRANT_API_KEY") + api_key: Optional[str] = api_key_raw or None + https_raw = _env("QDRANT_HTTPS") + https: Optional[bool] = _truthy(https_raw) if https_raw else None + prefer_grpc = _truthy(_env("QDRANT_PREFER_GRPC", "false")) + try: + timeout = int(_env("QDRANT_TIMEOUT", "30")) + except (TypeError, ValueError): + logger.warning("Invalid QDRANT_TIMEOUT value — falling back to 30") + timeout = 30 + # Range-guard: 0 / negative / extreme values would either disable + # timeouts entirely or starve the SDK. Clamp to a sane window so a typo + # in the env doesn't produce silent multi-hour hangs or instant fails. + _TIMEOUT_MIN, _TIMEOUT_MAX = 1, 300 + if not _TIMEOUT_MIN <= timeout <= _TIMEOUT_MAX: + logger.warning( + "QDRANT_TIMEOUT=%d outside [%d, %d] — clamping", + timeout, _TIMEOUT_MIN, _TIMEOUT_MAX, + ) + timeout = max(_TIMEOUT_MIN, min(timeout, _TIMEOUT_MAX)) + + # URL-Assembly. If host already carries a scheme/port, take it verbatim — the + # caller has explicitly chosen what to connect to. Otherwise build the URL + # from scheme + host + port; when ``https`` is unset (None) the qdrant-client + # SDK does its own scheme inference, so we still default to "http" in the URL + # string for the log/config dict only. + if host.startswith(("http://", "https://")): + url = host if ":" in host.split("//", 1)[1] else f"{host}:{port}" + else: + scheme = "https" if https else "http" + url = f"{scheme}://{host}:{port}" + + config = { + "host": host, + "port": port, + "grpc_port": grpc_port, + "api_key": api_key, + "https": https, + "prefer_grpc": prefer_grpc, + "timeout": timeout, + "url": url, + } + + logger.info("Getting Qdrant config [prefix=%s]:", prefix or "default") + logger.info( + " URL: %s (prefer_grpc=%s, grpc_port=%s, https=%s)", + url, prefer_grpc, grpc_port, https, + ) + logger.info(" Auth: %s", "ApiKey" if api_key else "None") + logger.info(" Timeout: %ss", timeout) + + return config + + +@component(name="qdrant_client_factory", primary=False) +class QdrantClientFactory: + """ + Qdrant Client Factory. + + Bietet Caching/Management fuer ``QdrantClient``-Instanzen, ein Client pro + benannter Konfiguration (analog ``MilvusClientFactory``). + + ``primary=False``: Wenn ``VECTOR_STORE_BACKEND=qdrant`` gesetzt ist, wird + diese Factory ueber den Bean-Namen ``qdrant_client_factory`` explizit + gerouted (siehe Phase 1.2 / Repository-Layer-Refactor). So bleibt + Milvus-Factory in der Setup-Phase Default und nichts veraendert sich + bis zum Cutover. + """ + + def __init__(self) -> None: + self._clients: Dict[str, QdrantClient] = {} + self._default_config: Optional[dict] = None + # threading.Lock guards the check-then-create cache miss path so two + # concurrent FastAPI requests for the same alias don't both build a + # QdrantClient (with one silently overwriting the other). + self._lock = threading.Lock() + logger.info("QdrantClientFactory initialized") + + def get_client( + self, + url: str = "", + host: str = "", + port: int = 6333, + grpc_port: int = 6334, + api_key: Optional[str] = None, + https: Optional[bool] = None, + prefer_grpc: bool = False, + timeout: int = 30, + alias: Optional[str] = None, + **kwargs, + ) -> QdrantClient: + """ + Get oder erzeuge cached Qdrant-Client. + + Args: + url: Voll-qualifizierte URL (z.B. ``http://localhost:6333``). Wenn + angegeben, ueberschreibt sie ``host``/``port``. + host: Hostname (Default ``localhost`` falls weder ``url`` noch + ``host`` gesetzt). + port: HTTP/REST-Port (Default ``6333``). + grpc_port: gRPC-Port (Default ``6334``, nur wenn ``prefer_grpc``). + api_key: Optionaler Qdrant Cloud API-Key. ``None`` = anonymous. + https: TLS-Praeferenz. ``None`` (Default) ueberlaesst qdrant-client + die Auto-Detection ueber das URL-Schema. Explizit ``True``/ + ``False`` ueberschreibt das. + prefer_grpc: gRPC statt HTTP fuer Datenwege. + timeout: Request-Timeout in Sekunden. + alias: Cache-Key (Default ``default``). + + Returns: + ``QdrantClient`` (gecached pro ``alias``). + """ + # Normalize cache key so that ``default``, ``Default`` and ``DEFAULT`` + # all share the same cached client. + cache_key = (alias or "default").lower() + + # Fast-path without lock acquisition. + if cache_key in self._clients: + return self._clients[cache_key] + + with self._lock: + # Double-checked locking: re-verify under the lock so concurrent + # waiters don't all build a new client. + if cache_key in self._clients: + return self._clients[cache_key] + + client_kwargs: dict = { + "prefer_grpc": prefer_grpc, + "grpc_port": grpc_port, + "timeout": timeout, + } + if api_key: + client_kwargs["api_key"] = api_key + if https is not None: + client_kwargs["https"] = https + if url: + client_kwargs["url"] = url + else: + client_kwargs["host"] = host or "localhost" + client_kwargs["port"] = port + + client_kwargs.update(kwargs) + + client = QdrantClient(**client_kwargs) + self._clients[cache_key] = client + logger.info( + "Qdrant client created and cached: %s (alias=%s, prefer_grpc=%s, https=%s)", + url or f"{client_kwargs.get('host')}:{port}", + cache_key, + prefer_grpc, + https, + ) + return client + + def get_default_client(self) -> QdrantClient: + """Get default Qdrant client basierend auf Env-Konfiguration.""" + if self._default_config is None: + self._default_config = get_qdrant_config() + + cfg = self._default_config + return self.get_client( + url=cfg["url"], + api_key=cfg["api_key"], + https=cfg["https"], + prefer_grpc=cfg["prefer_grpc"], + grpc_port=cfg["grpc_port"], + timeout=cfg["timeout"], + alias="default", + ) + + def get_named_client(self, name: str) -> QdrantClient: + """ + Get Qdrant client by name. ``name`` wird als Env-Praefix verwendet, + z.B. ``name="A"`` liest ``A_QDRANT_HOST``, ``A_QDRANT_PORT``, ... + + Args: + name: Praefix-Name (Env-Var-Praefix). ``default`` -> default client. + + Returns: + ``QdrantClient`` (gecached unter ``name``). + """ + normalized = name.lower() + if normalized == "default": + return self.get_default_client() + + cfg = get_qdrant_config(prefix=name) + logger.info("Loading named Qdrant config [name=%s]: %s", normalized, cfg["url"]) + + return self.get_client( + url=cfg["url"], + api_key=cfg["api_key"], + https=cfg["https"], + prefer_grpc=cfg["prefer_grpc"], + grpc_port=cfg["grpc_port"], + timeout=cfg["timeout"], + alias=normalized, + ) + + def close_all_clients(self) -> None: + """Schliesst alle gecachten Qdrant-Clients.""" + for alias, client in self._clients.items(): + try: + client.close() + except Exception as e: # noqa: BLE001 + logger.error("Error closing Qdrant client [alias=%s]: %s", alias, e) + self._clients.clear() + logger.info("All Qdrant clients closed") diff --git a/methods/EverCore/src/core/lifespan/qdrant_lifespan.py b/methods/EverCore/src/core/lifespan/qdrant_lifespan.py new file mode 100644 index 00000000..418648aa --- /dev/null +++ b/methods/EverCore/src/core/lifespan/qdrant_lifespan.py @@ -0,0 +1,163 @@ +""" +Qdrant lifespan provider — Analog zu ``core.lifespan.milvus_lifespan``. + +Wird vom DI-Container automatisch entdeckt und in die FastAPI-Lifespan-Kette +eingehaengt. Initialisierung ist **gated** durch das Env-Flag +``VECTOR_STORE_BACKEND``: + + VECTOR_STORE_BACKEND=qdrant -> Qdrant wird initialisiert + VECTOR_STORE_BACKEND=milvus -> No-Op (Milvus-Lifespan uebernimmt; Default) + VECTOR_STORE_BACKEND unset -> No-Op (= Default ``milvus``) + +So kann der Adapter-Layer im Repo liegen, ohne dass er aktiv eingreift bis +zum Cutover. +""" + +import asyncio +import os +from collections import defaultdict +from typing import Any, Dict, List, Type + +from fastapi import FastAPI + +from core.di.decorators import component +from core.di.utils import get_all_subclasses, get_bean +from core.lifespan.lifespan_interface import LifespanProvider +from core.observation.logger import get_logger +from core.oxm.qdrant.qdrant_collection_base import QdrantCollectionBase + +logger = get_logger(__name__) + + +# Order 19: zwischen milvus_lifespan (18) und business_lifespan (20). So +# laufen beide Vector-Backends initialisiert (im Cutover-Fall), und +# business-Logik startet erst danach. +_QDRANT_LIFESPAN_ORDER = 19 + +# Env-Flag, das den aktiven Vector-Store waehlt. Default ``milvus`` damit +# nichts an der bestehenden Deployment-Topologie aendert bis zum Cutover. +_ENV_BACKEND_FLAG = "VECTOR_STORE_BACKEND" +_BACKEND_QDRANT = "qdrant" + + +def _backend_is_qdrant() -> bool: + return os.getenv(_ENV_BACKEND_FLAG, "milvus").strip().lower() == _BACKEND_QDRANT + + +@component(name="qdrant_lifespan_provider") +class QdrantLifespanProvider(LifespanProvider): + """Qdrant lifespan provider (feature-gated).""" + + def __init__(self, name: str = "qdrant", order: int = _QDRANT_LIFESPAN_ORDER): + super().__init__(name, order) + self._qdrant_factory = None + self._qdrant_clients: dict = {} + + async def startup(self, app: FastAPI) -> Any: + """ + Start Qdrant connection und Collection-Initialisierung. + + Skipped wenn ``VECTOR_STORE_BACKEND != qdrant``. + """ + if not _backend_is_qdrant(): + logger.info( + "Qdrant lifespan skipped (%s='%s', Qdrant inactive)", + _ENV_BACKEND_FLAG, + os.getenv(_ENV_BACKEND_FLAG, "milvus"), + ) + return + + logger.info("Initializing Qdrant connection...") + + try: + self._qdrant_factory = get_bean("qdrant_client_factory") + + # Alle konkreten QdrantCollectionBase-Subklassen sammeln. + all_collection_classes = [ + cls + for cls in get_all_subclasses(QdrantCollectionBase) + if cls._COLLECTION_NAME is not None + ] + + # Gruppieren nach _DB_USING (analog Milvus). + using_collections: Dict[str, List[Type[QdrantCollectionBase]]] = defaultdict(list) + for collection_class in all_collection_classes: + using = collection_class._DB_USING + using_collections[using].append(collection_class) + logger.info( + "Discovered Qdrant Collection class: %s [using=%s]", + collection_class.__name__, + using, + ) + + # Pro using: Client holen + Collections initialisieren. + # ``get_named_client`` and ``collection.ensure_all`` perform + # blocking Qdrant I/O; offload them to a worker thread so the + # event loop stays responsive during startup. (``ensure_all`` is + # now an async method on ``QdrantCollectionBase``, so it is + # awaited directly; the to_thread wrap is only needed for the + # synchronous client-factory call.) + for using, collection_classes in using_collections.items(): + client = await asyncio.to_thread( + self._qdrant_factory.get_named_client, using + ) + self._qdrant_clients[using] = client + + for collection_class in collection_classes: + try: + collection = collection_class() + await collection.ensure_all() + logger.info( + "Qdrant Collection '%s' initialized [using=%s]", + collection.name, + using, + ) + except Exception as e: + logger.exception( + "Failed to initialize Qdrant Collection '%s' [using=%s]: %s", + collection_class._COLLECTION_NAME, + using, + e, + ) + raise + + logger.info("Qdrant connection initialization completed") + + except Exception as e: + # Rollback: any clients we managed to construct before the failure + # must be closed, otherwise the next startup retry leaks pools. + logger.error("Error during Qdrant initialization: %s", str(e)) + try: + if self._qdrant_factory is not None: + self._qdrant_factory.close_all_clients() + except Exception as rollback_err: # noqa: BLE001 + logger.warning( + "Rollback close_all_clients() also failed: %s", + rollback_err, + ) + self._qdrant_clients = {} + raise + + async def shutdown(self, app: FastAPI) -> None: + """ + Close Qdrant connections (No-Op wenn nicht initialisiert). + + The gate hangs on ``self._qdrant_factory is None``, NOT on the env + flag — if the backend env was switched between startup and shutdown + (e.g. in a test), we still need to close any clients we actually + opened during startup. + """ + if self._qdrant_factory is None: + return + + logger.info("Closing Qdrant connections...") + try: + self._qdrant_factory.close_all_clients() + logger.info("Qdrant connections closed") + except Exception as e: + logger.error("Error while closing Qdrant connections: %s", str(e)) + + # State-Cleanup analog Milvus. + for attr in ("qdrant_clients", "qdrant_factory"): + if hasattr(app.state, attr): + delattr(app.state, attr) diff --git a/methods/EverCore/src/core/oxm/qdrant/__init__.py b/methods/EverCore/src/core/oxm/qdrant/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/methods/EverCore/src/core/oxm/qdrant/base_converter.py b/methods/EverCore/src/core/oxm/qdrant/base_converter.py new file mode 100644 index 00000000..19ab5b32 --- /dev/null +++ b/methods/EverCore/src/core/oxm/qdrant/base_converter.py @@ -0,0 +1,92 @@ +""" +Qdrant collection converter base class — Analog zu ``BaseMilvusConverter``. + +Provides a unified conversion interface from arbitrary data sources to Qdrant +collection entities (typically ``qdrant_client.http.models.PointStruct`` +instances or compatible payload dicts). + +All Qdrant collection converters should inherit from this base class. +""" + +from abc import ABC, abstractmethod +from typing import Any, Generic, Type, TypeVar, get_args, get_origin + +from core.observation.logger import get_logger +from core.oxm.qdrant.qdrant_collection_base import QdrantCollectionBase + +logger = get_logger(__name__) + +# Generic type variable — bound to QdrantCollectionBase so subclasses are +# explicit about which collection they convert into. +QdrantCollectionType = TypeVar("QdrantCollectionType", bound=QdrantCollectionBase) + + +class BaseQdrantConverter(ABC, Generic[QdrantCollectionType]): + """ + Qdrant collection converter base class. + + Provides basic functionality for converting arbitrary data sources to + Qdrant collection entities (point payloads). All Qdrant converters should + inherit from this class. + + Features: + - Unified conversion interface (class methods). + - Type-safe Qdrant collection generic support. + - Automatically retrieves the bound Qdrant collection type from generics. + - Flexible data source support (Mongo docs are the typical source in + EverOS, see ``from_mongo`` below). + """ + + @classmethod + def get_qdrant_model(cls) -> Type[QdrantCollectionType]: + """ + Retrieve the Qdrant collection model type from generic information. + + Returns: + Type[QdrantCollectionType]: Qdrant collection model class. + + Raises: + ValueError: When the subclass did not bind a generic argument. + """ + # Get the generic base class of the current class. + if hasattr(cls, "__orig_bases__"): + for base in cls.__orig_bases__: + if get_origin(base) is BaseQdrantConverter: + args = get_args(base) + if args: + return args[0] + + raise ValueError( + "Unable to retrieve Qdrant collection type from generic information " + f"of {cls.__name__}" + ) + + @classmethod + @abstractmethod + def from_mongo(cls, source_doc: Any) -> Any: + """ + Convert from a data source (typically a Mongo doc) to a Qdrant entity. + + Subclasses must implement specific conversion logic. The concrete + return type is typically a ``qdrant_client.http.models.PointStruct`` + (``id``, ``vector``, ``payload``) or — for converters that split one + source doc into many points (e.g. ``UserProfile``) — a + ``List[Dict[str, Any]]`` whose items the indexer wraps into + ``PointStruct`` after embedding. + + The annotation is ``Any`` because the generic ``QdrantCollectionType`` + parameter documents the *target collection class*, not the wire- + format the converter emits. Subclasses tighten the annotation + ("-> PointStruct" or "-> List[Dict[str, Any]]") as they implement. + + Args: + source_doc: Source data (any type — Mongo doc, dict, etc.). + + Returns: + Either a ``PointStruct`` or a ``List[Dict[str, Any]]`` of + per-item payload dicts, depending on the subclass. + + Raises: + Exception: When an error occurs during conversion. + """ + raise NotImplementedError("Subclasses must implement the from_mongo method") diff --git a/methods/EverCore/src/core/oxm/qdrant/base_repository.py b/methods/EverCore/src/core/oxm/qdrant/base_repository.py new file mode 100644 index 00000000..a7c13145 --- /dev/null +++ b/methods/EverCore/src/core/oxm/qdrant/base_repository.py @@ -0,0 +1,362 @@ +""" +Qdrant Base Repository class — analog ``BaseMilvusRepository``. + +Provides common CRUD primitives that all Qdrant-backed repositories inherit. +The repository layer sits between the domain code and ``QdrantCollectionBase``: + +- domain code calls ``repo.upsert(point)``, ``repo.find_by_id(id)``, ... +- the repository delegates to the wrapped ``QdrantCollectionBase`` instance, + adding unified async wrapping, logging, and error handling. + +Async wrapping: ``qdrant-client``'s sync API is used (more battle-tested +than ``AsyncQdrantClient`` for Phase 1) and wrapped with ``asyncio.to_thread`` +so we keep the same async repository surface as the Milvus counterpart. +""" + +import asyncio +import uuid +from abc import ABC +from datetime import datetime, timezone +from typing import Any, Generic, List, Optional, Type, TypeVar + +from qdrant_client.http import models as qmodels + +from core.observation.logger import get_logger +from core.oxm.qdrant.qdrant_collection_base import QdrantCollectionBase + +logger = get_logger(__name__) + +T = TypeVar("T", bound=QdrantCollectionBase) + + +# Stable namespace for Mongo ObjectId -> Qdrant UUID translation. +# Qdrant point ids accept only unsigned integers or RFC-4122 UUIDs; +# Mongo ObjectIds (24 hex chars) are neither. Mapping is via ``uuid5`` +# (SHA-1, deterministic) so the same Mongo id always maps to the same +# Qdrant point id — required for idempotent re-embed + lookup by Mongo +# back-reference. NEVER change this namespace without a full data-side +# remigration. +_MONGO_TO_QDRANT_NS = uuid.UUID("ec57c0e3-5e90-4d4a-9c1c-a8b9c7d8e7d6") + + +def mongo_id_to_qdrant_id(mongo_id: Any) -> str: + """ + Deterministic UUID5 mapping of any Mongo doc id (ObjectId/str/int) to a + Qdrant-compatible point id string. + + The mapping is one-way (idempotent), so callers that need the Mongo + original keep it in the payload (e.g. as ``parent_id``). + + Raises: + ValueError: when ``mongo_id`` is ``None`` or an empty string. Both + would map to the same fixed Qdrant id and silently collide with + real records — usually a sign of an upstream bug. + """ + if mongo_id is None or (isinstance(mongo_id, str) and not mongo_id): + raise ValueError( + "mongo_id_to_qdrant_id requires a non-empty source id; got " + f"{mongo_id!r}" + ) + return str(uuid.uuid5(_MONGO_TO_QDRANT_NS, str(mongo_id))) + + +def compute_effective_threshold( + radius: Optional[float], score_threshold: float +) -> Optional[float]: + """ + Two-stage gating: pick the *more permissive* (smaller) positive bound to + pass to Qdrant server-side. Returns ``None`` when neither bound is + positive — caller passes that ``None`` to skip server-side filtering and + relies on the client-side ``point.score < score_threshold`` post-filter. + + Semantics: + - ``score_threshold = 0.0`` is the parameter default and means + "no minimum"; treated as unset. + - ``radius is None`` or ``radius <= -1.0`` means "no radius + expansion"; treated as unset. + - With both set positive, return the smaller value so server-side + recall is the wider net (and the hard caller-facing cut-off is + still enforced client-side). + + Without this helper, a literal ``min(radius, score_threshold)`` with a + default ``score_threshold=0.0`` evaluates to ``0.0`` and silently + disables both server-side and client-side filtering. + """ + candidates: List[float] = [] + if radius is not None and radius > 0: + candidates.append(radius) + if score_threshold > 0: + candidates.append(score_threshold) + return min(candidates) if candidates else None + + +def to_epoch_ms(dt: datetime) -> int: + """ + Convert a ``datetime`` to epoch milliseconds. + + Naive datetimes (``tzinfo is None``) are interpreted as UTC. Callers that + operate in a local timezone should attach an explicit tzinfo before + handing the datetime to repository methods to avoid silent drift. + """ + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + return int(dt.timestamp() * 1000) + + +def to_epoch_s(dt: datetime) -> int: + """Same as :func:`to_epoch_ms` but in seconds (used by ``agent_case``).""" + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + return int(dt.timestamp()) + + +class BaseQdrantRepository(ABC, Generic[T]): + """ + Base class for all Qdrant repositories. + + Subclasses set the bound collection model via the generic parameter and + pass the model class to ``__init__``:: + + class EpisodicMemoryRepository( + BaseQdrantRepository[EpisodicMemoryCollection] + ): + def __init__(self): + super().__init__(EpisodicMemoryCollection) + + Subclasses may add domain-specific finders on top of the CRUD primitives. + """ + + def __init__(self, model: Type[T]): + self.model = model + self.model_name = model.__name__ + + # ------------------------------------------------------------------ shape + + @property + def collection(self) -> T: + """ + Instantiate the bound ``QdrantCollectionBase`` subclass for the + **current tenant context**. + + Not cached on the repository instance: the model's ``__init__`` + resolves the tenant-prefixed collection name at construction time + (see ``TenantAwareQdrantCollectionWithSuffix``). With a typical + DI singleton repository scope, caching the result would lock the + repository to whichever tenant happened to make the first call, + which would silently route subsequent tenants' reads and writes to + the wrong collection. + + The model construction itself is cheap (a tenant-name lookup plus + the base validation in ``QdrantCollectionBase.__init__``), so the + per-call cost is negligible compared to the round-trip to Qdrant. + """ + return self.model() + + def get_model_name(self) -> str: + return self.model_name + + # =================================================== Basic CRUD (async) + + async def upsert( + self, + point: qmodels.PointStruct, + wait: bool = True, + ) -> str: + """ + Insert-or-update a single point. + + Qdrant has no separate ``insert`` semantics — upsert is the + idempotent primitive. The returned id is taken from the passed + PointStruct (caller-supplied). + + Note: this returns a ``str`` (the point id) for parity with the + Milvus repository's ``insert`` method. The underlying Qdrant + ``UpdateResult`` is intentionally discarded here. Callers that + need the wire-level ``UpdateResult`` (e.g., to assert + ``status == completed``) should use ``upsert_batch([point])``. + """ + try: + await self.collection.upsert([point], wait) + logger.debug( + "Qdrant upsert successful [%s]: %s", self.model_name, point.id + ) + return str(point.id) + except Exception as e: + logger.exception("Qdrant upsert failed [%s]: %s", self.model_name, e) + raise + + async def upsert_batch( + self, + points: List[qmodels.PointStruct], + wait: bool = True, + ) -> qmodels.UpdateResult: + """Batch upsert. ``wait=True`` blocks until the operation is durable.""" + try: + result = await self.collection.upsert(points, wait) + logger.debug( + "Qdrant batch upsert successful [%s]: %d points", + self.model_name, + len(points), + ) + return result + except Exception as e: + logger.exception( + "Qdrant batch upsert failed [%s]: %s", self.model_name, e + ) + raise + + async def find_by_id( + self, + point_id: Any, + with_payload: bool = True, + with_vectors: bool = False, + ) -> Optional[qmodels.Record]: + """ + Retrieve a single point by id. Returns ``None`` if not found. + + Qdrant accepts both ``int`` and ``str`` (UUID) point ids — pass + whichever id type was used at upsert time. Operational errors + (network, auth, malformed id type) are logged and re-raised; only + the legitimate "not found" case yields ``None``. + """ + try: + records = await asyncio.to_thread( + self.collection.client().retrieve, + self.collection.name, + [point_id], + with_payload, + with_vectors, + ) + except Exception as e: + logger.error( + "Qdrant find_by_id failed [%s, id=%s]: %s", + self.model_name, + point_id, + e, + ) + raise + return records[0] if records else None + + async def find_by_ids( + self, + point_ids: List[Any], + with_payload: bool = True, + with_vectors: bool = False, + ) -> List[qmodels.Record]: + """ + Batch retrieval by ids. Order of result is not guaranteed. + + Returns an empty list when none of the ids exist; raises on any + operational error so callers can distinguish "all-missing" from a + retrieval failure. + """ + try: + return await asyncio.to_thread( + self.collection.client().retrieve, + self.collection.name, + point_ids, + with_payload, + with_vectors, + ) + except Exception as e: + logger.error( + "Qdrant find_by_ids failed [%s, %d ids]: %s", + self.model_name, + len(point_ids), + e, + ) + raise + + async def delete_by_id( + self, + point_id: Any, + wait: bool = True, + ) -> bool: + """ + Delete a single point. Returns ``True`` on a successful round-trip. + + Operational errors are logged and re-raised (consistent with + ``upsert`` / ``delete_batch``); the ``bool`` return type is kept + for caller-parity with the Milvus repository. + """ + try: + await self.collection.delete([point_id], wait) + except Exception as e: + logger.exception( + "Qdrant delete failed [%s, id=%s]: %s", + self.model_name, + point_id, + e, + ) + raise + logger.debug( + "Qdrant delete successful [%s]: %s", self.model_name, point_id + ) + return True + + async def delete_batch( + self, + point_ids: List[Any], + wait: bool = True, + ) -> qmodels.UpdateResult: + """Batch delete by ids.""" + try: + result = await self.collection.delete(point_ids, wait) + logger.debug( + "Qdrant batch delete successful [%s]: %d ids", + self.model_name, + len(point_ids), + ) + return result + except Exception as e: + logger.exception( + "Qdrant batch delete failed [%s, %d ids]: %s", + self.model_name, + len(point_ids), + e, + ) + raise + + # ============================================================ Search/Count + + async def search( + self, + query_vector: List[float], + limit: int = 10, + query_filter: Optional[qmodels.Filter] = None, + with_payload: bool = True, + with_vectors: bool = False, + score_threshold: Optional[float] = None, + **kwargs: Any, + ) -> List[qmodels.ScoredPoint]: + """ANN search with optional payload-filter.""" + try: + return await self.collection.search( + query_vector, + limit, + query_filter, + with_payload, + with_vectors, + score_threshold, + **kwargs, + ) + except Exception as e: + logger.exception( + "Qdrant search failed [%s, limit=%d]: %s", + self.model_name, + limit, + e, + ) + raise + + async def count(self, exact: bool = True) -> int: + """Number of points in the underlying collection.""" + try: + result = await self.collection.count(exact) + except Exception as e: + logger.exception("Qdrant count failed [%s]: %s", self.model_name, e) + raise + logger.debug( + "Qdrant count successful [%s]: %d points", self.model_name, result + ) + return result diff --git a/methods/EverCore/src/core/oxm/qdrant/qdrant_collection_base.py b/methods/EverCore/src/core/oxm/qdrant/qdrant_collection_base.py new file mode 100644 index 00000000..894ef73c --- /dev/null +++ b/methods/EverCore/src/core/oxm/qdrant/qdrant_collection_base.py @@ -0,0 +1,440 @@ +""" +Qdrant Collection Base — vollstaendige Basisklasse fuer Qdrant-basierte +Collections. + +Konzept-Mapping (laut qdrant.tech/documentation/migrate-to-qdrant/from-milvus): + + Milvus Qdrant + ----------------------------------------------------------- + Collection Collection (1:1) + FieldSchema(vector) VectorParams(size, distance) + FieldSchema(scalar) Payload field (schema-flexible) + Index(HNSW, COSINE) HnswConfigDiff + Distance.Cosine + Partition Payload-Field ODER separate Collection + COSINE Cosine + L2 Euclid + IP Dot + +Die Klasse ist absichtlich schlanker als ihr Milvus-Pendant: Qdrant kennt +keinen Alias-Mechanismus, also entfaellt der ``Real-Name + Alias + +Timestamp``-Indirektions-Layer. Schema-Migrationen erfolgen extern (neue +Collection mit neuem Namen, Daten umlagern). + +Bei ``VECTOR_STORE_BACKEND != qdrant`` wird das Modul zwar geladen (durch +DI-Container-Scan), aber ``QdrantLifespanProvider`` initialisiert nichts — +``ensure_all()`` und alle anderen Methoden werden gar nicht aufgerufen. +""" + +import asyncio +import logging +from dataclasses import dataclass, field +from typing import Any, ClassVar, Dict, List, Optional + +from qdrant_client import QdrantClient +from qdrant_client.http import models as qmodels +from qdrant_client.http.exceptions import ( + ResponseHandlingException, + UnexpectedResponse, +) + +logger = logging.getLogger(__name__) + + +# Mapping: kanonischer Lower-Case-Name (EverOS-intern) -> Qdrant SDK Enum. +# Bewusst ueber Strings, damit Collection-Klassen nicht direkt vom SDK abhaengen. +_PAYLOAD_SCHEMA_TYPE_MAP: Dict[str, "qmodels.PayloadSchemaType"] = { + "keyword": qmodels.PayloadSchemaType.KEYWORD, + "integer": qmodels.PayloadSchemaType.INTEGER, + "float": qmodels.PayloadSchemaType.FLOAT, + "bool": qmodels.PayloadSchemaType.BOOL, + "geo": qmodels.PayloadSchemaType.GEO, + "text": qmodels.PayloadSchemaType.TEXT, + "datetime": qmodels.PayloadSchemaType.DATETIME, + "uuid": qmodels.PayloadSchemaType.UUID, +} + +# Distance-Mapping zum Schutz vor SDK-Versions-Drift. +_DISTANCE_MAP: Dict[str, "qmodels.Distance"] = { + "cosine": qmodels.Distance.COSINE, + "euclid": qmodels.Distance.EUCLID, + "dot": qmodels.Distance.DOT, + "manhattan": qmodels.Distance.MANHATTAN, +} + + +@dataclass +class IndexConfig: + """ + Konfiguration fuer den (Vektor-)Index einer Qdrant-Collection. + + Args: + size: Vektor-Dimension (1024 fuer qwen3-embedding-Default). + distance: Distanz-Metrik (``cosine``, ``euclid``, ``dot``, ``manhattan``). + on_disk: Vektor-Daten auf Disk halten (mmapped) statt vollstaendig im + RAM. Reduziert Memory-Footprint bei groesseren Datasets. + hnsw_m: HNSW Maximum-Edges-per-Node. Hoeher = bessere Recall, mehr RAM. + hnsw_ef_construct: HNSW Search-Width beim Bauen. Hoeher = bessere + Recall, langsamerer Build. + payload_indexes: Map ``field_name -> schema_type``. ``schema_type`` + ist einer von ``_PAYLOAD_SCHEMA_TYPE_MAP`` (e.g. + ``"keyword"`` fuer string-equality-Filter). + """ + + size: int = 1024 + distance: str = "cosine" + on_disk: bool = False + hnsw_m: int = 16 + hnsw_ef_construct: int = 100 + payload_indexes: Dict[str, str] = field(default_factory=dict) + + def to_vectors_config(self) -> qmodels.VectorParams: + """Konvertiert in ``qdrant_client.http.models.VectorParams``.""" + distance_key = self.distance.strip().lower() + if distance_key not in _DISTANCE_MAP: + raise ValueError( + f"Unknown distance '{self.distance}'. " + f"Supported: {sorted(_DISTANCE_MAP)}" + ) + return qmodels.VectorParams( + size=self.size, + distance=_DISTANCE_MAP[distance_key], + on_disk=self.on_disk, + hnsw_config=qmodels.HnswConfigDiff( + m=self.hnsw_m, + ef_construct=self.hnsw_ef_construct, + ), + ) + + +class QdrantCollectionBase: + """ + Qdrant-Collection-Management-Basisklasse (analog ``MilvusCollectionBase``). + + Subclasses MUST define: + _COLLECTION_NAME: ClassVar[str] + _VECTOR_PARAMS: ClassVar[IndexConfig] + + Optional: + _DB_USING: ClassVar[str] = "default" + + Anders als das Milvus-Pendant gibt es keinen Alias-Mechanismus — die + Collection ist direkt unter ``_COLLECTION_NAME`` adressierbar. + + Subclass-Beispiel:: + + class EpisodicMemoryCollection(QdrantCollectionBase): + _COLLECTION_NAME = "v1_episodic_memory" + _VECTOR_PARAMS = IndexConfig( + size=1024, + distance="cosine", + payload_indexes={ + "user_id": "keyword", + "group_id": "keyword", + "session_id": "keyword", + "timestamp": "integer", + }, + ) + + coll = EpisodicMemoryCollection() + coll.ensure_all() + coll.upsert([...]) + """ + + _COLLECTION_NAME: ClassVar[Optional[str]] = None + _DB_USING: ClassVar[str] = "default" + _VECTOR_PARAMS: ClassVar[Optional[IndexConfig]] = None + + def __init__(self) -> None: + if not self._COLLECTION_NAME: + raise NotImplementedError( + f"{self.__class__.__name__} must define '_COLLECTION_NAME' " + "class attribute" + ) + if self._VECTOR_PARAMS is None: + raise NotImplementedError( + f"{self.__class__.__name__} must define '_VECTOR_PARAMS' " + "(IndexConfig) class attribute" + ) + self._using = self._DB_USING or "default" + + @property + def name(self) -> str: + return self._COLLECTION_NAME # type: ignore[return-value] + + @property + def using(self) -> str: + return self._using + + # ------------------------------------------------------------------ client + + def client(self) -> QdrantClient: + """ + Resolve the cached Qdrant client for ``self.using`` via DI factory. + + Looking up via the factory bean keeps client-caching centralized + (factory caches one QdrantClient instance per alias). + """ + # Lazy import to avoid a circular dependency: this module is imported + # at adapter-discovery time, before the DI container is fully wired. + from core.di.utils import get_bean + + factory = get_bean("qdrant_client_factory") + return factory.get_named_client(self.using) + + # ------------------------------------------------------------------ schema + + async def exists(self) -> bool: + """ + Return True if the underlying Qdrant collection already exists. + + Async wrapper over the blocking ``qdrant_client.collection_exists`` + call (offloaded to a worker thread). Only transport-level errors + (``ResponseHandlingException`` — connect refused, timeout, DNS + failure) are caught and reported as "does not exist". HTTP error + responses (``UnexpectedResponse`` for 4xx/5xx, including 401/403 + auth failures and 5xx server errors) propagate — treating them as + "not exists" would route a downstream ``ensure_collection()`` into + a confusing follow-up create attempt and bury the real cause (e.g. + invalid API key, server down). + """ + client = self.client() + try: + return await asyncio.to_thread(client.collection_exists, self.name) + except ResponseHandlingException as e: + logger.warning( + "collection_exists('%s') failed at transport level: %s — " + "treating as non-existent", + self.name, + e, + ) + return False + + async def count(self, exact: bool = True) -> int: + """Number of points in the collection.""" + client = self.client() + result = await asyncio.to_thread( + client.count, collection_name=self.name, exact=exact + ) + return result.count + + async def ensure_collection(self) -> None: + """ + Create the Qdrant collection if it does not exist. + + Idempotent: a pre-existing collection is left untouched, even if its + schema differs from ``_VECTOR_PARAMS`` — schema migration is an + explicit external concern. + """ + cfg = self._VECTOR_PARAMS + # ``__init__`` already enforces this — explicit check guards against + # subclasses that override ``__init__`` without invoking ``super``, + # and survives ``python -O`` (where ``assert`` is stripped). + if cfg is None: + raise RuntimeError( + f"{self.__class__.__name__}._VECTOR_PARAMS is None" + ) + + client = self.client() + if await self.exists(): + # Validate dimension parity: a pre-existing collection with a + # different vector size would only surface as opaque "vector + # size mismatch" errors per upsert/search later. Fail loud here + # instead so the operator notices a stale schema before data + # corruption accumulates. + try: + existing = await asyncio.to_thread(client.get_collection, self.name) + existing_size = ( + existing.config.params.vectors.size # type: ignore[union-attr] + ) + except Exception as e: # noqa: BLE001 + logger.debug( + "get_collection('%s') failed during dim-validation: %s", + self.name, e, + ) + existing_size = None + if existing_size is not None and existing_size != cfg.size: + raise RuntimeError( + f"Qdrant collection '{self.name}' exists with vector " + f"size {existing_size}, but {self.__class__.__name__} " + f"expects {cfg.size}. Migrate or rename the collection." + ) + logger.debug( + "Qdrant collection '%s' already exists, skipping create", + self.name, + ) + return + + logger.info( + "Creating Qdrant collection '%s' (size=%d, distance=%s, on_disk=%s)", + self.name, + cfg.size, + cfg.distance, + cfg.on_disk, + ) + try: + await asyncio.to_thread( + client.create_collection, + collection_name=self.name, + vectors_config=cfg.to_vectors_config(), + ) + except UnexpectedResponse as e: + # TOCTOU between ``self.exists()`` and ``create_collection``: a + # parallel process (sibling adapter, second ``ensure_all`` call + # during a race) may have created the collection just now. + # Qdrant returns 409 Conflict; swallow it and verify the + # already-existing collection matches our schema, then continue. + if getattr(e, "status_code", None) == 409: + logger.info( + "Qdrant collection '%s' was created concurrently — " + "treating create as idempotent", + self.name, + ) + else: + raise + + async def ensure_payload_indexes(self) -> None: + """ + Create payload-indexes for the fields declared in + ``_VECTOR_PARAMS.payload_indexes``. + + Qdrant treats ``create_payload_index`` as idempotent at the API level, + so we call it unconditionally per field. + """ + cfg = self._VECTOR_PARAMS + # Explicit guard instead of ``assert`` — stripped under ``python -O``, + # which would leave the ``cfg.payload_indexes`` access below to raise + # an opaque ``AttributeError`` in production builds. + if cfg is None: + raise RuntimeError( + f"{self.__class__.__name__}._VECTOR_PARAMS is None" + ) + if not cfg.payload_indexes: + logger.debug( + "Qdrant collection '%s' has no declared payload indexes, skipping", + self.name, + ) + return + + client = self.client() + for field_name, schema_str in cfg.payload_indexes.items(): + key = schema_str.strip().lower() + if key not in _PAYLOAD_SCHEMA_TYPE_MAP: + raise ValueError( + f"Unknown payload schema '{schema_str}' for field " + f"'{field_name}'. Supported: {sorted(_PAYLOAD_SCHEMA_TYPE_MAP)}" + ) + schema_type = _PAYLOAD_SCHEMA_TYPE_MAP[key] + try: + await asyncio.to_thread( + client.create_payload_index, + collection_name=self.name, + field_name=field_name, + field_schema=schema_type, + ) + logger.info( + "Ensured payload index on '%s.%s' (%s)", + self.name, + field_name, + schema_str, + ) + except Exception as e: # noqa: BLE001 + logger.exception( + "Failed to ensure payload index on '%s.%s': %s", + self.name, + field_name, + e, + ) + raise + + async def ensure_all(self) -> None: + """Idempotent one-shot init: collection + payload indexes.""" + logger.info("Initializing Qdrant collection '%s' [using=%s]", self.name, self.using) + await self.ensure_collection() + await self.ensure_payload_indexes() + logger.info("Qdrant collection '%s' is ready", self.name) + + # ----------------------------------------------------------- data methods + + async def upsert( + self, + points: List[qmodels.PointStruct], + wait: bool = True, + ) -> qmodels.UpdateResult: + """Upsert points (insert or overwrite by id).""" + client = self.client() + return await asyncio.to_thread( + client.upsert, + collection_name=self.name, + points=points, + wait=wait, + ) + + async def search( + self, + query_vector: List[float], + limit: int = 10, + query_filter: Optional[qmodels.Filter] = None, + with_payload: bool = True, + with_vectors: bool = False, + score_threshold: Optional[float] = None, + **kwargs: Any, + ) -> List[qmodels.ScoredPoint]: + """ + ANN search with optional payload-filter. + + Implemented on top of ``QdrantClient.query_points`` (the legacy + ``search`` method was removed in qdrant-client 1.13+). The wrapper + keeps the more intuitive ``query_vector`` parameter name for callers + and unwraps ``QueryResponse.points`` so the return type stays a + ``List[ScoredPoint]``. + """ + client = self.client() + + def _call() -> Any: + return client.query_points( + collection_name=self.name, + query=query_vector, + query_filter=query_filter, + limit=limit, + with_payload=with_payload, + with_vectors=with_vectors, + score_threshold=score_threshold, + **kwargs, + ) + + response = await asyncio.to_thread(_call) + return response.points + + async def delete( + self, + point_ids: List[Any], + wait: bool = True, + ) -> qmodels.UpdateResult: + """Delete by point ids.""" + client = self.client() + return await asyncio.to_thread( + client.delete, + collection_name=self.name, + points_selector=qmodels.PointIdsList(points=point_ids), + wait=wait, + ) + + async def drop(self) -> None: + """ + Drop the underlying Qdrant collection (DANGEROUS — irreversible). + + Errors (network, auth, permission) are logged and re-raised so the + caller can react. Use ``exists()`` beforehand to handle the + already-absent case explicitly without relying on swallowed errors. + """ + client = self.client() + try: + await asyncio.to_thread(client.delete_collection, collection_name=self.name) + logger.info("Dropped Qdrant collection '%s'", self.name) + except Exception as e: # noqa: BLE001 + logger.warning( + "Failed to drop Qdrant collection '%s': %s", + self.name, + e, + ) + raise diff --git a/methods/EverCore/src/core/tenants/tenantize/oxm/qdrant/__init__.py b/methods/EverCore/src/core/tenants/tenantize/oxm/qdrant/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/methods/EverCore/src/core/tenants/tenantize/oxm/qdrant/config_utils.py b/methods/EverCore/src/core/tenants/tenantize/oxm/qdrant/config_utils.py new file mode 100644 index 00000000..4e857b57 --- /dev/null +++ b/methods/EverCore/src/core/tenants/tenantize/oxm/qdrant/config_utils.py @@ -0,0 +1,208 @@ +""" +Tenant-Aware Qdrant Configuration Utilities. + +Analog zu ``core.tenants.tenantize.oxm.milvus.config_utils``, aber deutlich +schlanker — Qdrant braucht keine pymilvus-Connection-Cache-Keys, weil ein +einzelner ``QdrantClient`` alle Collections eines Endpoints bedient. + +Hauptaufgabe: Aufloesen des **tenant-aware Collection-Namens** anhand des +Tenant-Context. + +Resolution-Reihenfolge (analog Milvus): + 1. Tenant-Context vorhanden + ``storage_info["qdrant"]["collection_prefix"]`` gesetzt + -> ``f"{collection_prefix}_{original_name}"`` + 2. Tenant-Context vorhanden + nur ``storage_info["milvus"]["collection_prefix"]`` gesetzt + -> ``f"{milvus_prefix}_{original_name}"`` (Migrations-Bruecke: gleiche Tenant-Namen + fuer Qdrant wie fuer Milvus, bis pro-Tenant Qdrant-Config explizit gesetzt wird) + 3. Kein Tenant-Context -> Base-Resource-Prefix + ``original_name`` (z.B. ``s0001_v1_episodic_memory``) +""" + +import os +from hashlib import sha256 +from typing import Any, Dict, Optional + +from core.observation.logger import get_logger + +logger = get_logger(__name__) + + +def get_tenant_qdrant_config() -> Optional[Dict[str, Any]]: + """ + Hole das Qdrant-Storage-Dict des aktiven Tenant-Context. + + Returns: + Storage-Info-Dict (``collection_prefix`` und ggf. ``host``/``port``/``api_key``) + oder ``None`` falls kein Tenant aktiv. + """ + # Lazy import vermeidet Circular-Dependency bei Adapter-Discovery-Time. + from core.tenants.tenantize.tenant_context import get_current_tenant + + # Fail-closed: an unexpected error during tenant resolution must not + # degrade silently to the shared base-prefix path — that would route a + # tenant's data into another tenant's collection. Only the specific, + # expected ``LookupError`` from ``get_current_tenant()`` / a missing + # storage entry is treated as "no tenant config" (return ``None``); + # everything else propagates so the caller sees the real error. + tenant_info = get_current_tenant() + if not tenant_info: + return None + + try: + qdrant_cfg = tenant_info.get_storage_info("qdrant") + if qdrant_cfg: + return qdrant_cfg + + # Fallback: Falls noch kein dediziertes Qdrant-Config-Dict im + # Storage-Info, nutze den Milvus-Eintrag (gleicher collection_prefix + # ist sinnvolle Migrations-Bruecke). + return tenant_info.get_storage_info("milvus") or tenant_info.get_storage_info( + "milvus_config" + ) + except LookupError: + # "No such storage entry" is a normal "no tenant config" signal. + return None + except Exception: + logger.exception( + "Tenant qdrant config resolution failed unexpectedly" + ) + raise + + +def _base_prefixed_collection_name(original_name: str) -> str: + """``{base_resource_prefix}_{original_name}`` (no-tenant Fallback).""" + # Lazy import — same circular-avoidance reason as above. + from core.tenants.tenant_constants import get_base_resource_prefix + + return f"{get_base_resource_prefix()}_{original_name}" + + +def get_tenant_aware_collection_name(original_name: str) -> str: + """ + Resolve a tenant-aware Qdrant collection name. + + Args: + original_name: Bare collection name (e.g., ``"v1_episodic_memory"``). + + Returns: + Tenant-prefixed name (e.g., ``"acme_v1_episodic_memory"``, + ``"s0001_v1_episodic_memory"``, etc.). + """ + # Fail-closed: ``get_tenant_qdrant_config`` already returns ``None`` + # for the "no tenant context" case and re-raises everything else, so + # we propagate real resolution failures here too. The base-prefix + # fallback only kicks in when the tenant exists but has no explicit + # collection_prefix configured — that's a legitimate global-resource + # case, not a swallowed error. + cfg = get_tenant_qdrant_config() + if cfg and cfg.get("collection_prefix"): + return f"{cfg['collection_prefix']}_{original_name}" + return _base_prefixed_collection_name(original_name) + + +def get_qdrant_connection_cache_key(config: Dict[str, Any]) -> str: + """ + Build a deterministic cache key for a Qdrant connection. + + Used by ``QdrantClientFactory.get_named_client`` when callers route via + tenant-specific endpoints (each unique ``(host, port, api_key_hash)`` + triple becomes one cached client). For the common case of one shared + Qdrant endpoint across tenants, this returns a stable single key. + + Args: + config: Dict containing at least ``host``/``port`` or ``url``. + ``api_key`` is hashed (not the raw value) when included. + + Returns: + A short stable string suitable as factory alias. + """ + if "url" in config and config["url"]: + endpoint = str(config["url"]) + else: + endpoint = f"{config.get('host', 'localhost')}:{config.get('port', 6333)}" + + api_key = config.get("api_key") + if api_key: + # Hash the api_key fingerprint, not the raw value. Tolerate bytes, + # str, or other types — coerce safely before hashing. + if isinstance(api_key, bytes): + key_bytes = api_key + else: + key_bytes = str(api_key).encode("utf-8") + endpoint += f"#{sha256(key_bytes).hexdigest()[:8]}" + + # Transport flags must participate in the cache key — two tenants that + # share host:port but disagree on ``https`` or ``prefer_grpc`` need + # *different* cached clients. ``bool("false")`` evaluates to ``True`` + # in Python, so when these flags arrive as strings from a tenant-storage + # entry we must parse them as real booleans before keying. + https = config.get("https") + if https is not None: + endpoint += f"#https={_as_bool(https)}" + prefer_grpc = config.get("prefer_grpc") + if prefer_grpc is not None: + endpoint += f"#grpc={_as_bool(prefer_grpc)}" + + return endpoint + + +def _as_bool(value: Any) -> bool: + """ + Robustly coerce a config value to ``bool``. + + Strings are parsed against the common truthy markers; anything else + delegates to ``bool()``. Avoids the ``bool("false") == True`` trap that + bit the previous version of this cache key. + """ + if isinstance(value, bool): + return value + if isinstance(value, str): + return value.strip().lower() in {"1", "true", "yes", "on"} + return bool(value) + + +def _load_qdrant_env(prefix: str = "") -> Dict[str, Any]: + """ + Read Qdrant connection settings from environment variables. Used as a + fallback when no tenant-storage-info is present. + + Currently this helper is staged for the tenant-aware connection routing + that will be wired in alongside the per-tenant ``QdrantClientFactory`` + flow (see TenantAwareQdrantCollectionWithSuffix and the factory). It is + deliberately exported as module-private (``_load_qdrant_env``) until the + routing layer consumes it; do not flag as dead code in the meantime. + + Args: + prefix: Optional env prefix (e.g., ``"A"`` reads ``A_QDRANT_HOST``). + + Returns: + Dict mit ``host``, ``port``, ``api_key``, ``https``, ``prefer_grpc``. + """ + def _env(name: str, default: Optional[str] = None) -> str: + key = f"{prefix.upper()}_{name}" if prefix else name + if default is None: + return os.getenv(key, "") + return os.getenv(key, default) + + def _safe_port(raw: str, default: int) -> int: + try: + value = int(raw) + except (TypeError, ValueError): + logger.warning( + "Invalid QDRANT_PORT value %r — falling back to %d", raw, default + ) + return default + if not (1 <= value <= 65535): + logger.warning( + "QDRANT_PORT %d out of TCP range — falling back to %d", value, default + ) + return default + return value + + return { + "host": _env("QDRANT_HOST", "localhost"), + "port": _safe_port(_env("QDRANT_PORT", "6333"), 6333), + "api_key": _env("QDRANT_API_KEY") or None, + "https": _env("QDRANT_HTTPS", "").strip().lower() in {"1", "true", "yes", "on"}, + "prefer_grpc": _env("QDRANT_PREFER_GRPC", "").strip().lower() + in {"1", "true", "yes", "on"}, + } diff --git a/methods/EverCore/src/core/tenants/tenantize/oxm/qdrant/tenant_aware_qdrant_collection_with_suffix.py b/methods/EverCore/src/core/tenants/tenantize/oxm/qdrant/tenant_aware_qdrant_collection_with_suffix.py new file mode 100644 index 00000000..394c5154 --- /dev/null +++ b/methods/EverCore/src/core/tenants/tenantize/oxm/qdrant/tenant_aware_qdrant_collection_with_suffix.py @@ -0,0 +1,121 @@ +""" +TenantAwareQdrantCollectionWithSuffix — analog zu +``TenantAwareMilvusCollectionWithSuffix``, aber deutlich schlanker: + +- Qdrant kennt keinen Alias-Mechanismus, daher entfaellt der + Real-Name-/Alias-Switch-Layer. +- Multi-Tenancy laeuft via **Collection-per-Tenant-Naming**: + ``f"{tenant_prefix}_{base_collection_name}"``. ``tenant_prefix`` wird vom + Tenant-Context aufgeloest (siehe ``config_utils.get_tenant_aware_collection_name``). +- Optional kann ein expliziter Suffix uebergeben werden (z.B. fuer + Test-/Sandbox-Collections); er wird an den Tenant-Prefixed-Namen + angehaengt: ``f"{tenant_prefix}_{base}_{suffix}"``. + +Subclass-Beispiel:: + + class EpisodicMemoryCollection(TenantAwareQdrantCollectionWithSuffix): + _COLLECTION_NAME = "v1_episodic_memory" + _VECTOR_PARAMS = IndexConfig( + size=1024, + distance="cosine", + payload_indexes={"user_id": "keyword", "timestamp": "integer"}, + ) + + # Im Tenant-Context "acme": + coll = EpisodicMemoryCollection() + coll.name # -> "acme_v1_episodic_memory" + coll.ensure_all() # idempotent + + # Mit explizitem Suffix: + coll_v2 = EpisodicMemoryCollection(suffix="staging") + coll_v2.name # -> "acme_v1_episodic_memory_staging" +""" + +import os +from typing import ClassVar, Optional + +from core.observation.logger import get_logger +from core.oxm.qdrant.qdrant_collection_base import QdrantCollectionBase +from core.tenants.tenantize.oxm.qdrant.config_utils import ( + get_tenant_aware_collection_name, +) + +logger = get_logger(__name__) + +# Umgebungsvariable, die einen statischen Collection-Suffix erzwingt, +# z.B. fuer dev/staging-Builds. Wird nur verwendet wenn kein Suffix im +# Konstruktor uebergeben wurde. +_ENV_SUFFIX = "SELF_QDRANT_COLLECTION_NS" + + +def _resolve_suffix(suffix: Optional[str]) -> str: + """``suffix`` Argument > Env-Var > leerer String.""" + if suffix is not None: + return suffix + return os.getenv(_ENV_SUFFIX, "") + + +class TenantAwareQdrantCollectionWithSuffix(QdrantCollectionBase): + """ + Tenant-aware Qdrant collection with optional explicit suffix. + + Differences from the base class: + + - ``name`` property returns the **tenant-prefixed** name (and optionally + adds the explicit suffix). The base ``_COLLECTION_NAME`` stays the + logical/business identifier. + - ``__init__`` accepts an optional ``suffix`` parameter (or reads it from + the ``SELF_QDRANT_COLLECTION_NS`` environment variable). + - All other behaviour (collection creation, payload indexes, upsert/ + search/delete) is inherited unchanged from ``QdrantCollectionBase``. + """ + + # Subclasses MAY pin a partitioning strategy here. Currently informational + # only; future versions may use it to opt into Qdrant's native multi- + # tenancy via payload partitioning + ``group_id``-based sharding. + _MULTI_TENANT_STRATEGY: ClassVar[str] = "collection_per_tenant" + + def __init__(self, suffix: Optional[str] = None): + """ + Args: + suffix: Optional explicit suffix (e.g., ``"staging"``). If not + provided, falls back to ``SELF_QDRANT_COLLECTION_NS`` + env-var, then to empty string. + """ + # Reuse the base validation (requires _COLLECTION_NAME + _VECTOR_PARAMS). + super().__init__() + + self._suffix = _resolve_suffix(suffix) + # Resolve tenant-aware base name lazily — at __init__ the tenant context + # is whatever is active when the object is instantiated. If callers need + # to materialize a collection for a different tenant context, they + # instantiate within that context. + tenant_prefixed = get_tenant_aware_collection_name(self._COLLECTION_NAME) + if self._suffix: + self._resolved_name = f"{tenant_prefixed}_{self._suffix}" + else: + self._resolved_name = tenant_prefixed + + logger.debug( + "TenantAwareQdrantCollectionWithSuffix resolved name: %s " + "(base=%s, tenant_prefixed=%s, suffix=%s)", + self._resolved_name, + self._COLLECTION_NAME, + tenant_prefixed, + self._suffix or "", + ) + + @property + def name(self) -> str: + """Tenant-prefixed Qdrant collection name (with optional suffix).""" + return self._resolved_name + + @property + def base_name(self) -> str: + """The original logical ``_COLLECTION_NAME`` without tenant prefix.""" + return self._COLLECTION_NAME # type: ignore[return-value] + + @property + def suffix(self) -> str: + """The explicit suffix, or empty string if none was set.""" + return self._suffix diff --git a/methods/EverCore/src/devops_scripts/migrate_milvus_to_qdrant.py b/methods/EverCore/src/devops_scripts/migrate_milvus_to_qdrant.py new file mode 100644 index 00000000..6ae0d732 --- /dev/null +++ b/methods/EverCore/src/devops_scripts/migrate_milvus_to_qdrant.py @@ -0,0 +1,620 @@ +#!/usr/bin/env python3 +""" +Re-embed MongoDB source-of-truth into Qdrant via OpenRouter qwen3-embedding-8b. + +Standalone CLI — does not rely on EverOS' DI container. Reads connection +config from environment / ``.env`` (loaded via python-dotenv if present): + + OPENROUTER_API_KEY # required + OPENROUTER_BASE_URL # default: https://openrouter.ai/api/v1 + VECTORIZE_MODEL # default: qwen/qwen3-embedding-8b + VECTORIZE_DIMENSIONS # default: 1024 (matches memory_layer/constants.py) + MONGO_URI # default: mongodb://localhost:27017 + QDRANT_HOST # default: localhost + QDRANT_PORT # default: 6333 + +Workhorse migrates a single (mongo-db, mongo-collection) -> qdrant-collection +pair. Use a shell loop over the 6 EverOS collection-types × N tenants to do +the full sweep (see ``re_embed_all.sh`` next to this file). + +Idempotent: existing point ids in the target Qdrant collection are skipped +unless ``--force`` is passed. + +Security note: at ``--log-level DEBUG`` PyMongo emits connection events +that include the raw Mongo URI. If your ``MONGO_URI`` carries credentials +(``mongodb://user:pass@host``) avoid DEBUG in shared terminals or pipe the +output through a redactor. + +Usage:: + + python migrate_milvus_to_qdrant.py \\ + --mongo-db _episodic_memsys \\ + --mongo-coll v1_episodic_memories \\ + --qdrant-coll _v1_episodic_memory \\ + --text-field episode \\ + --extra-text-fields subject,summary \\ + --timestamp-field timestamp --timestamp-unit ms \\ + --payload-fields user_id,group_id,session_id,participants,sender_ids,type,parent_type,parent_id \\ + --batch-size 32 +""" + +from __future__ import annotations + +import argparse +import json +import logging +import os +import sys +import time +from dataclasses import dataclass +from typing import Any, Dict, Iterable, List, Optional, Tuple + +# Standalone CLI: add ``src/`` to sys.path so EverOS-internal modules +# (``core.oxm.qdrant.base_repository``) resolve when this script is +# invoked directly with ``python src/devops_scripts/migrate_milvus_to_qdrant.py`` +# (no install / no PYTHONPATH). +_SRC_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if _SRC_DIR not in sys.path: + sys.path.insert(0, _SRC_DIR) + +try: + from dotenv import load_dotenv + + _dotenv_path = os.environ.get("EVEROS_ENV_FILE") + if _dotenv_path: + load_dotenv(_dotenv_path) + else: + load_dotenv() # picks up ./.env if present +except ImportError: + pass + +from openai import OpenAI +from pymongo import MongoClient +from qdrant_client import QdrantClient +from qdrant_client.http import models as qmodels + +# Stable, namespace-shared with the repository layer so script-side and +# service-side ids agree on the same Mongo->Qdrant translation. +from core.oxm.qdrant.base_repository import mongo_id_to_qdrant_id + +logger = logging.getLogger("migrate") + + +# ============================================================ Configuration + + +@dataclass(frozen=True) +class Config: + openrouter_api_key: str + openrouter_base_url: str + vectorize_model: str + vectorize_dimensions: int + mongo_uri: str + qdrant_host: str + qdrant_port: int + + @classmethod + def from_env(cls) -> "Config": + api_key = os.environ.get("OPENROUTER_API_KEY", "").strip() + if not api_key: + raise SystemExit("OPENROUTER_API_KEY is required (env or .env)") + + return cls( + openrouter_api_key=api_key, + openrouter_base_url=os.environ.get( + "OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1" + ).rstrip("/"), + vectorize_model=os.environ.get( + "VECTORIZE_MODEL", "qwen/qwen3-embedding-8b" + ), + # Default mirrors ``memory_layer/constants.py`` (1024) so a migration + # run with no ``VECTORIZE_DIMENSIONS`` env produces collections that + # are immediately usable by the runtime service. Sites running a + # different dimension (e.g. 4096) MUST set the env var in both + # places (migration + runtime) — a default mismatch would silently + # produce dim-incompatible collections at cutover. + vectorize_dimensions=int(os.environ.get("VECTORIZE_DIMENSIONS", "1024")), + mongo_uri=os.environ.get("MONGO_URI", "mongodb://localhost:27017"), + qdrant_host=os.environ.get("QDRANT_HOST", "localhost"), + qdrant_port=int(os.environ.get("QDRANT_PORT", "6333")), + ) + + +# ============================================================== Embedding + + +def embed_batch( + client: OpenAI, + model: str, + dimensions: int, + texts: List[str], +) -> List[List[float]]: + """Call OpenRouter ``/embeddings`` for a batch of texts.""" + response = client.embeddings.create( + model=model, + input=texts, + dimensions=dimensions, + ) + # OpenAI client returns objects sorted by ``index`` in the response. + return [item.embedding for item in response.data] + + +# ============================================================ Doc handling + + +def extract_text( + doc: Dict[str, Any], + primary_field: str, + extra_fields: Tuple[str, ...], +) -> str: + """ + Concatenate primary + extra text fields into a single embedding input. + + Each non-empty value is joined with newlines. The primary field is + always first. + """ + parts: List[str] = [] + primary = doc.get(primary_field) + if primary: + parts.append(str(primary)) + for field in extra_fields: + value = doc.get(field) + if value: + parts.append(str(value)) + return "\n".join(parts).strip() + + +# Heuristic threshold: anything above this is interpreted as epoch +# milliseconds when the target unit is seconds (and the inverse for ms). +# ~year 2065 in seconds, ~year 2001 in milliseconds — gives plenty of head +# room before either branch starts misclassifying real timestamps. +_NUMERIC_TS_HEURISTIC_MS = 3_000_000_000 + + +def _normalize_timestamp_to_epoch( + value: Any, + target_unit: str, + doc_id: Any, + field_name: str, +) -> Optional[int]: + """ + Coerce a timestamp value to integer epoch in ``target_unit`` (``"ms"`` or + ``"s"``). ``datetime`` values are exact; numeric values are coerced + using the heuristic above (a value larger than ``3e9`` is treated as ms, + smaller as seconds — covers any realistic post-1970 timestamp). + + Returns ``None`` for unsupported types and emits a warning so the bad + document surfaces. + """ + if hasattr(value, "timestamp"): # datetime / pandas Timestamp + secs = value.timestamp() + return int(secs * 1000) if target_unit == "ms" else int(secs) + if isinstance(value, (int, float)): + n = float(value) + # Decide source unit from magnitude. + source_is_ms = n >= _NUMERIC_TS_HEURISTIC_MS + if target_unit == "ms": + return int(n) if source_is_ms else int(n * 1000) + # target seconds + return int(n // 1000) if source_is_ms else int(n) + logger.warning( + "Skipping timestamp field '%s' with unexpected type %s for doc %s", + field_name, type(value).__name__, doc_id, + ) + return None + + +def build_payload( + doc: Dict[str, Any], + payload_fields: Tuple[str, ...], + timestamp_field: Optional[str], + timestamp_unit: str, + primary_text: str, + extra_text_fields: Tuple[str, ...], + extra_timestamp_fields: Tuple[str, ...] = (), +) -> Dict[str, Any]: + """Project mongo doc fields into a Qdrant payload dict.""" + payload: Dict[str, Any] = {} + for field in payload_fields: + if field in doc: + payload[field] = doc[field] + + # Timestamp normalization to epoch (the unit is collection-dependent). + if timestamp_field and timestamp_field in doc: + payload[timestamp_field] = _normalize_timestamp_to_epoch( + doc[timestamp_field], timestamp_unit, doc.get("_id"), timestamp_field, + ) + if payload[timestamp_field] is None: + payload.pop(timestamp_field, None) + + # Apply the same normalization to declared extra time fields (e.g. + # foresight's ``end_time``). The whitelist is required because a + # magnitude-based heuristic would also rewrite legitimate non-time + # numeric fields like ``maturity_score``, ``duration_days``, or + # ``memcell_count`` whenever they happened to exceed the threshold. + for field in extra_timestamp_fields: + if field == timestamp_field or field not in payload: + continue + normalized = _normalize_timestamp_to_epoch( + payload[field], timestamp_unit, doc.get("_id"), field, + ) + if normalized is not None: + payload[field] = normalized + + # Persist the text used for the embedding for downstream search-result + # surfaces (matches the Milvus converter's ``search_content`` payload). + text_pieces = [] + for field in (primary_text, *extra_text_fields): + value = doc.get(field) + if value: + text_pieces.append(value) + if text_pieces: + payload["search_content"] = json.dumps(text_pieces, ensure_ascii=False) + + return payload + + +# ============================================================== Qdrant ops + + +def ensure_qdrant_collection( + client: QdrantClient, name: str, vector_size: int +) -> None: + """ + Create the target Qdrant collection if it does not exist yet. + + Raises: + RuntimeError: when a pre-existing collection has a different vector + size. Migrating into a dim-mismatched collection would only + surface as opaque "vector size mismatch" errors at upsert time + (per batch, with no hint at the schema drift cause). + """ + if client.collection_exists(name): + existing = client.get_collection(name) + existing_size = existing.config.params.vectors.size # type: ignore[union-attr] + if existing_size != vector_size: + raise RuntimeError( + f"Qdrant collection '{name}' exists with vector size " + f"{existing_size}, but this migration expects {vector_size}. " + "Aborting before the per-batch dim-mismatch errors. Either " + "set VECTORIZE_DIMENSIONS to match, or rename/delete the " + "stale collection." + ) + logger.info( + "Qdrant collection '%s' already exists (size=%d) — keeping schema", + name, existing_size, + ) + return + + logger.info( + "Creating Qdrant collection '%s' (size=%d, distance=Cosine, HNSW m=16 ef=200)", + name, vector_size, + ) + client.create_collection( + collection_name=name, + vectors_config=qmodels.VectorParams( + size=vector_size, + distance=qmodels.Distance.COSINE, + hnsw_config=qmodels.HnswConfigDiff(m=16, ef_construct=200), + ), + ) + + +def filter_existing_ids( + client: QdrantClient, collection_name: str, ids: List[str] +) -> List[str]: + """Return the subset of ``ids`` not already present in the collection.""" + if not ids: + return [] + existing = client.retrieve( + collection_name=collection_name, + ids=ids, + with_payload=False, + with_vectors=False, + ) + present = {str(p.id) for p in existing} + return [i for i in ids if i not in present] + + +# ============================================================ Orchestration + + +def migrate( + config: Config, + mongo_db: str, + mongo_coll: str, + qdrant_coll: str, + text_field: str, + extra_text_fields: Tuple[str, ...], + timestamp_field: Optional[str], + timestamp_unit: str, + payload_fields: Tuple[str, ...], + batch_size: int, + limit: Optional[int], + force: bool, + dry_run: bool, + extra_timestamp_fields: Tuple[str, ...] = (), +) -> None: + """Run the migration for one (mongo-db, mongo-collection) pair.""" + logger.info( + "Migrate %s.%s -> Qdrant '%s' (model=%s, dim=%d, batch=%d, force=%s, dry_run=%s)", + mongo_db, mongo_coll, qdrant_coll, config.vectorize_model, + config.vectorize_dimensions, batch_size, force, dry_run, + ) + + mongo = MongoClient(config.mongo_uri) + # Pre-initialize both clients to ``None`` so the ``finally`` block can + # safely call ``.close()`` even if construction of ``qdrant`` or ``openai`` + # raises mid-setup. Previously a failing ``QdrantClient(...)`` left + # ``openai`` unbound and the finally-cleanup raised ``NameError``, + # masking the original connection error. + qdrant: Optional[QdrantClient] = None + openai: Optional[OpenAI] = None + cursor = None + try: + qdrant = QdrantClient(host=config.qdrant_host, port=config.qdrant_port) + openai = OpenAI( + api_key=config.openrouter_api_key, + base_url=config.openrouter_base_url, + ) + + coll = mongo[mongo_db][mongo_coll] + total_docs = coll.estimated_document_count() + logger.info("Source has ~%d documents", total_docs) + + if not dry_run: + ensure_qdrant_collection(qdrant, qdrant_coll, config.vectorize_dimensions) + + # ``no_cursor_timeout=True``: a slow embedding batch (OpenRouter + # rate-limit, retry) can easily exceed the server-side default cursor + # idle timeout (10 min), which would surface as ``CursorNotFound`` + # mid-sweep with no progress signal. The cursor is closed in finally. + cursor = coll.find(no_cursor_timeout=True) + if limit: + cursor = cursor.limit(limit) + + processed = 0 + skipped_existing = 0 + skipped_no_text = 0 + upserted = 0 + started = time.time() + + batch_docs: List[Dict[str, Any]] = [] + + def flush(batch: List[Dict[str, Any]]) -> Tuple[int, int, int]: + """Embed + upsert one batch. Returns (upserted, skipped_existing, skipped_no_text).""" + # Mongo ids are mapped to Qdrant point ids via uuid5; idempotent so + # the existence-check below works across reruns. + qdrant_ids = [mongo_id_to_qdrant_id(d["_id"]) for d in batch] + if force: + new_ids = qdrant_ids + else: + new_ids = ( + filter_existing_ids(qdrant, qdrant_coll, qdrant_ids) + if not dry_run + else qdrant_ids + ) + new_set = set(new_ids) + new_docs = [ + d for d, qid in zip(batch, qdrant_ids) if qid in new_set + ] + # Carry the resolved qdrant id alongside the doc so we don't recompute + # the uuid5 twice; attach as a temporary key on a shallow copy. + new_pairs: List[Tuple[Dict[str, Any], str]] = [ + (d, qid) for d, qid in zip(batch, qdrant_ids) if qid in new_set + ] + + texts: List[str] = [] + kept_pairs: List[Tuple[Dict[str, Any], str]] = [] + for d, qid in new_pairs: + text = extract_text(d, text_field, extra_text_fields) + if not text: + continue + texts.append(text) + kept_pairs.append((d, qid)) + + if dry_run: + return ( + len(kept_pairs), + len(batch) - len(new_docs), + len(new_docs) - len(kept_pairs), + ) + + if not texts: + return ( + 0, + len(batch) - len(new_docs), + len(new_docs) - len(kept_pairs), + ) + + vectors = embed_batch( + openai, config.vectorize_model, config.vectorize_dimensions, texts + ) + + points: List[qmodels.PointStruct] = [] + for (d, qid), vec in zip(kept_pairs, vectors): + payload = build_payload( + d, payload_fields, timestamp_field, timestamp_unit, + text_field, extra_text_fields, + extra_timestamp_fields=extra_timestamp_fields, + ) + # Keep the original Mongo id in the payload so reverse-lookup + # from Qdrant -> Mongo is trivial. + payload["mongo_id"] = str(d["_id"]) + points.append( + qmodels.PointStruct(id=qid, vector=vec, payload=payload) + ) + + qdrant.upsert(collection_name=qdrant_coll, points=points, wait=True) + return ( + len(points), + len(batch) - len(new_docs), + len(new_docs) - len(kept_pairs), + ) + + for doc in cursor: + batch_docs.append(doc) + if len(batch_docs) >= batch_size: + u, s_e, s_n = flush(batch_docs) + upserted += u + skipped_existing += s_e + skipped_no_text += s_n + processed += len(batch_docs) + logger.info( + "Progress: processed=%d upserted=%d skipped_existing=%d skipped_no_text=%d elapsed=%.1fs", + processed, upserted, skipped_existing, skipped_no_text, + time.time() - started, + ) + batch_docs = [] + + if batch_docs: + u, s_e, s_n = flush(batch_docs) + upserted += u + skipped_existing += s_e + skipped_no_text += s_n + processed += len(batch_docs) + + logger.info( + "DONE: processed=%d upserted=%d skipped_existing=%d skipped_no_text=%d elapsed=%.1fs", + processed, upserted, skipped_existing, skipped_no_text, + time.time() - started, + ) + finally: + # Close in reverse construction order. Best-effort cleanup: a failing + # close should not mask a real exception from the body. Each handle is + # tested for ``None`` because construction may have raised mid-setup. + try: + if cursor is not None: + cursor.close() + except Exception: # noqa: BLE001 + logger.debug("cursor.close() raised; ignoring during cleanup", exc_info=True) + try: + if openai is not None: + close_fn = getattr(openai, "close", None) + if callable(close_fn): + close_fn() + except Exception: # noqa: BLE001 + logger.debug("openai.close() raised; ignoring during cleanup", exc_info=True) + try: + if qdrant is not None: + qdrant.close() + except Exception: # noqa: BLE001 + logger.debug("qdrant.close() raised; ignoring during cleanup", exc_info=True) + try: + mongo.close() + except Exception: # noqa: BLE001 + logger.debug("mongo.close() raised; ignoring during cleanup", exc_info=True) + + +# =================================================================== CLI + + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser( + description="Re-embed MongoDB docs into Qdrant via OpenRouter." + ) + p.add_argument("--mongo-db", required=True, help="Source Mongo database name") + p.add_argument("--mongo-coll", required=True, help="Source Mongo collection name") + p.add_argument("--qdrant-coll", required=True, help="Target Qdrant collection name") + p.add_argument( + "--text-field", + required=True, + help="Primary text field used for embedding (e.g., episode, task_intent)", + ) + p.add_argument( + "--extra-text-fields", + default="", + help="Comma-separated secondary text fields appended after the primary", + ) + p.add_argument( + "--timestamp-field", + default="", + help="Mongo field carrying the timestamp (omit to skip)", + ) + p.add_argument( + "--timestamp-unit", + choices=["ms", "s"], + default="ms", + help="Target unit for the timestamp payload value", + ) + p.add_argument( + "--payload-fields", + required=True, + help="Comma-separated list of fields to project from Mongo into the Qdrant payload", + ) + p.add_argument( + "--extra-timestamp-fields", + default="", + help=( + "Comma-separated payload field names that should ALSO be normalized " + "to epoch (in ``--timestamp-unit``). Use for collections that store " + "additional time fields beyond ``--timestamp-field`` " + "(e.g. foresight's ``end_time``)." + ), + ) + p.add_argument("--batch-size", type=int, default=32) + p.add_argument( + "--limit", + type=int, + default=None, + help="Max number of docs to process (for smoke tests)", + ) + p.add_argument( + "--force", + action="store_true", + help="Re-embed and overwrite even if the Qdrant point already exists", + ) + p.add_argument( + "--dry-run", + action="store_true", + help="Count what would happen without calling OpenRouter or Qdrant.upsert", + ) + p.add_argument( + "--log-level", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR"], + ) + return p.parse_args() + + +def main() -> int: + args = parse_args() + logging.basicConfig( + level=args.log_level, + format="%(asctime)s %(levelname)-7s %(name)s | %(message)s", + ) + + config = Config.from_env() + + extra_text_fields = tuple( + f.strip() for f in args.extra_text_fields.split(",") if f.strip() + ) + payload_fields = tuple( + f.strip() for f in args.payload_fields.split(",") if f.strip() + ) + extra_timestamp_fields = tuple( + f.strip() for f in args.extra_timestamp_fields.split(",") if f.strip() + ) + timestamp_field = args.timestamp_field.strip() or None + + migrate( + config=config, + mongo_db=args.mongo_db, + mongo_coll=args.mongo_coll, + qdrant_coll=args.qdrant_coll, + text_field=args.text_field, + extra_text_fields=extra_text_fields, + timestamp_field=timestamp_field, + timestamp_unit=args.timestamp_unit, + payload_fields=payload_fields, + extra_timestamp_fields=extra_timestamp_fields, + batch_size=args.batch_size, + limit=args.limit, + force=args.force, + dry_run=args.dry_run, + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/methods/EverCore/src/devops_scripts/re_embed_sweep.py b/methods/EverCore/src/devops_scripts/re_embed_sweep.py new file mode 100644 index 00000000..ea5c2457 --- /dev/null +++ b/methods/EverCore/src/devops_scripts/re_embed_sweep.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python3 +""" +Sweep wrapper for re-embedding all active Mongo databases into Qdrant. + +Iterates the underscore-named Mongo DBs (hyphen DBs are the abandoned +2026-04-25 generation — skipped) × 5 collection types and calls the +workhorse ``migrate`` for each non-empty pair. + +``v1_user_profiles`` is intentionally excluded: it needs per-doc splitting +(one Mongo doc → many Qdrant points), which the workhorse does not do. +That migration runs separately (Phase 3.1). + +Usage:: + + # Full sweep of every active DB × every supported collection + python re_embed_sweep.py --batch-size 64 + + # Smoke: one tenant, one collection, dry-run + python re_embed_sweep.py --tenant --collection episodic_memory --dry-run + + # Per-pair cap (smoke before full run) + python re_embed_sweep.py --limit-per-pair 5 --dry-run +""" + +from __future__ import annotations + +import argparse +import logging +import os +import sys +import time +from dataclasses import dataclass, field +from typing import List, Optional, Tuple + +# Add src/ to sys.path so devops_scripts.migrate_milvus_to_qdrant resolves +# when invoked directly. +_SRC_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if _SRC_DIR not in sys.path: + sys.path.insert(0, _SRC_DIR) + +try: + from dotenv import load_dotenv + + _p = os.environ.get("EVEROS_ENV_FILE") + if _p: + load_dotenv(_p) + else: + load_dotenv() +except ImportError: + pass + +from pymongo import MongoClient + +from devops_scripts.migrate_milvus_to_qdrant import Config, migrate + +logger = logging.getLogger("sweep") + + +# =============================================================== Spec map + + +@dataclass(frozen=True) +class CollectionSpec: + """Per-collection-type config for the sweep.""" + + mongo_collection: str + qdrant_base: str + text_field: str + extra_text_fields: Tuple[str, ...] = () + timestamp_field: Optional[str] = "timestamp" + timestamp_unit: str = "ms" + payload_fields: Tuple[str, ...] = field(default_factory=tuple) + # Additional payload field names that should be epoch-normalized + # alongside ``timestamp_field`` (e.g. foresight's ``end_time``). + extra_timestamp_fields: Tuple[str, ...] = () + + +SPECS = { + "episodic_memory": CollectionSpec( + mongo_collection="v1_episodic_memories", + qdrant_base="v1_episodic_memory", + text_field="episode", + extra_text_fields=("subject", "summary"), + timestamp_field="timestamp", + timestamp_unit="ms", + payload_fields=( + "user_id", "group_id", "session_id", + "participants", "sender_ids", "type", + "parent_type", "parent_id", + ), + ), + "atomic_fact": CollectionSpec( + mongo_collection="v1_atomic_fact_records", + qdrant_base="v1_atomic_fact_record", + text_field="atomic_fact", + timestamp_field="timestamp", + timestamp_unit="ms", + payload_fields=( + "user_id", "group_id", "session_id", + "participants", "sender_ids", "type", + "parent_type", "parent_id", + # ``AtomicFactQdrantRepository.vector_search`` surfaces the raw + # text from ``payload.atomic_fact`` so callers don't need a + # Mongo round-trip; the converter writes this field, so the + # sweep must persist it too — otherwise migrated records would + # come back with ``atomic_fact=None``. + "atomic_fact", + ), + ), + "foresight": CollectionSpec( + mongo_collection="v1_foresight_records", + qdrant_base="v1_foresight_record", + text_field="content", + extra_text_fields=("evidence",), + # Foresight stores start_time / end_time (epoch ms). For the sweep + # we use start_time as the primary time-axis filter (most common + # range query semantics). ``end_time`` is normalized via the + # ``extra_timestamp_fields`` whitelist below so the foresight + # repository's overlap filter can use a consistent epoch-ms type + # on both ends. ``duration_days`` is NOT in the whitelist — it is + # a non-time numeric field that must stay verbatim. + timestamp_field="start_time", + timestamp_unit="ms", + payload_fields=( + "user_id", "group_id", "session_id", + "participants", "sender_ids", "type", + "start_time", "end_time", "duration_days", + "parent_type", "parent_id", + ), + extra_timestamp_fields=("end_time",), + ), + "agent_case": CollectionSpec( + mongo_collection="v1_agent_cases", + qdrant_base="v1_agent_case", + text_field="task_intent", + timestamp_field="timestamp", + timestamp_unit="s", # epoch SECONDS (not ms!) — Milvus parity + payload_fields=( + "user_id", "group_id", "session_id", + "parent_type", "parent_id", + ), + ), + "agent_skill": CollectionSpec( + mongo_collection="v1_agent_skills", + qdrant_base="v1_agent_skill", + text_field="name", + extra_text_fields=("description",), + timestamp_field=None, # no time-axis for skills + timestamp_unit="ms", + payload_fields=( + "user_id", "group_id", "cluster_id", + "name", "description", + "maturity_score", "confidence", + ), + ), + # user_profile intentionally not included — needs doc splitting (Phase 3.1) +} + + +# ============================================================ Mongo helpers + + +def derive_tenant_prefix(mongo_db: str) -> str: + """ + Strip the trailing ``_memsys`` (or ``memsys``) suffix from a Mongo DB + name to get the Qdrant collection prefix. + + Examples:: + + _memsys -> + __memsys -> _ + """ + stripped = mongo_db + for suffix in ("_memsys", "memsys"): + if stripped.endswith(suffix): + stripped = stripped[: -len(suffix)].rstrip("_") + break + return stripped + + +def list_active_dbs(client: MongoClient) -> List[str]: + """All non-system DBs whose name has no hyphen (hyphen = abandoned generation).""" + result = client.admin.command({"listDatabases": 1}) + return sorted( + d["name"] + for d in result["databases"] + if d["name"] not in ("admin", "config", "local") + and "-" not in d["name"] + ) + + +def estimated_count(client: MongoClient, db_name: str, coll_name: str) -> int: + """Cheap ``estimatedDocumentCount``; returns 0 if collection is absent.""" + return client[db_name][coll_name].estimated_document_count() + + +# =============================================================== Sweep loop + + +def sweep( + config: Config, + spec_keys: List[str], + tenant_filter: Optional[str], + batch_size: int, + limit_per_pair: Optional[int], + force: bool, + dry_run: bool, +) -> int: + """ + Iterate active DBs × selected specs and run ``migrate`` per non-empty pair. + + Returns: + Number of pairs that failed. Callers (cron, CI) propagate this as a + non-zero exit code — silent partial-failure sweeps used to be marked + green by the previous unconditional ``return 0`` in ``main()``. + """ + # Single shared Mongo client for the discovery / count phase. The + # workhorse ``migrate()`` opens its own connection inside its try/finally + # block — that is intentional (each pair is self-contained and survives + # cleanup independently). Before this consolidation, ``list_active_dbs`` + # and ``estimated_count`` each opened and closed their own client per + # call, producing N×M connection churn for the discovery scan alone. + mongo = MongoClient(config.mongo_uri) + try: + active_dbs = list_active_dbs(mongo) + if tenant_filter: + active_dbs = [d for d in active_dbs if d.startswith(tenant_filter)] + + target_specs = {k: SPECS[k] for k in spec_keys} + + logger.info( + "Sweep plan: %d active DBs × %d collection types -> up to %d pairs" + " (dry_run=%s, batch=%d, limit_per_pair=%s, force=%s)", + len(active_dbs), len(target_specs), + len(active_dbs) * len(target_specs), + dry_run, batch_size, limit_per_pair, force, + ) + + overall_start = time.time() + pairs_run = 0 + pairs_skipped_empty = 0 + pairs_failed = 0 + + for db in active_dbs: + prefix = derive_tenant_prefix(db) + for spec_name, spec in target_specs.items(): + count = estimated_count(mongo, db, spec.mongo_collection) + if count == 0: + pairs_skipped_empty += 1 + continue + + qdrant_coll = f"{prefix}_{spec.qdrant_base}" + logger.info( + "==> [%s] %s.%s -> %s (count=%d)", + spec_name, db, spec.mongo_collection, qdrant_coll, count, + ) + try: + migrate( + config=config, + mongo_db=db, + mongo_coll=spec.mongo_collection, + qdrant_coll=qdrant_coll, + text_field=spec.text_field, + extra_text_fields=spec.extra_text_fields, + timestamp_field=spec.timestamp_field, + timestamp_unit=spec.timestamp_unit, + payload_fields=spec.payload_fields, + extra_timestamp_fields=spec.extra_timestamp_fields, + batch_size=batch_size, + limit=limit_per_pair, + force=force, + dry_run=dry_run, + ) + pairs_run += 1 + except Exception as e: + logger.exception( + "Pair %s.%s -> %s FAILED: %s", + db, spec.mongo_collection, qdrant_coll, e, + ) + pairs_failed += 1 + + logger.info( + "SWEEP DONE: pairs_run=%d pairs_skipped_empty=%d pairs_failed=%d elapsed=%.1fs", + pairs_run, pairs_skipped_empty, pairs_failed, + time.time() - overall_start, + ) + return pairs_failed + finally: + try: + mongo.close() + except Exception: # noqa: BLE001 + logger.debug("mongo.close() raised during sweep cleanup", exc_info=True) + + +# =================================================================== CLI + + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser(description="Sweep wrapper for re-embed migration") + p.add_argument( + "--tenant", + default=None, + help="Only DBs whose name starts with this prefix", + ) + p.add_argument( + "--collection", + default=None, + choices=sorted(SPECS), + help="Only this collection type (default: all 5)", + ) + p.add_argument("--batch-size", type=int, default=64) + p.add_argument( + "--limit-per-pair", + type=int, + default=None, + help="Cap docs processed per (db, coll) pair (smoke testing)", + ) + p.add_argument("--force", action="store_true") + p.add_argument("--dry-run", action="store_true") + p.add_argument( + "--log-level", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR"], + ) + return p.parse_args() + + +def main() -> int: + args = parse_args() + logging.basicConfig( + level=args.log_level, + format="%(asctime)s %(levelname)-7s %(name)s | %(message)s", + ) + config = Config.from_env() + spec_keys = [args.collection] if args.collection else list(SPECS) + failed = sweep( + config=config, + spec_keys=spec_keys, + tenant_filter=args.tenant, + batch_size=args.batch_size, + limit_per_pair=args.limit_per_pair, + force=args.force, + dry_run=args.dry_run, + ) + # Non-zero exit when any pair failed so the surrounding cron / CI run + # treats the sweep as failed instead of silently green. + return 1 if failed > 0 else 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/__init__.py b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/__init__.py b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py new file mode 100644 index 00000000..a5877455 --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/agent_case_qdrant_converter.py @@ -0,0 +1,77 @@ +""" +AgentCase Qdrant Converter. + +Converts MongoDB ``AgentCaseRecord`` documents into Qdrant ``PointStruct`` +instances for upsert into ``v1_agent_case``. Vector is the embedding of +``task_intent`` (caller must populate ``source_doc.vector`` first). +""" + +from qdrant_client.http import models as qmodels + +from core.observation.logger import get_logger +from core.oxm.qdrant.base_converter import BaseQdrantConverter +from core.oxm.qdrant.base_repository import mongo_id_to_qdrant_id +from infra_layer.adapters.out.persistence.document.memory.agent_case import ( + AgentCaseRecord, +) +from infra_layer.adapters.out.search.qdrant.memory.agent_case_collection import ( + AgentCaseCollection, +) + +logger = get_logger(__name__) + + +class AgentCaseQdrantConverter(BaseQdrantConverter[AgentCaseCollection]): + """Converts MongoDB ``AgentCaseRecord`` documents into Qdrant point payloads.""" + + @classmethod + def from_mongo(cls, source_doc: AgentCaseRecord) -> qmodels.PointStruct: + """ + Build a ``PointStruct`` from a MongoDB ``AgentCaseRecord``. + + Raises: + ValueError: when ``source_doc`` is ``None``. + Exception: on any conversion failure (logged + re-raised). + """ + if source_doc is None: + raise ValueError("MongoDB document cannot be None") + if source_doc.id is None: + raise ValueError("AgentCaseRecord.id must not be None") + + try: + vector = source_doc.vector if source_doc.vector else None + if not vector: + raise ValueError( + f"Vector is required for AgentCaseRecord {source_doc.id} " + "but was not populated" + ) + + task_intent = source_doc.task_intent or "" + # Parity with Milvus converter: epoch seconds (not ms) for this collection. + timestamp_s = ( + int(source_doc.timestamp.timestamp()) if source_doc.timestamp else 0 + ) + + payload = { + "user_id": source_doc.user_id or "", + "group_id": source_doc.group_id or "", + "session_id": source_doc.session_id or "", + "timestamp": timestamp_s, + "task_intent": task_intent[:5000], + "parent_type": source_doc.parent_type or "", + "parent_id": source_doc.parent_id or "", + # Mongo back-reference (see episodic_memory converter). + "mongo_id": str(source_doc.id), + } + + return qmodels.PointStruct( + id=mongo_id_to_qdrant_id(source_doc.id), + vector=vector, + payload=payload, + ) + + except Exception as e: + logger.exception( + "Failed to convert AgentCaseRecord to Qdrant point: %s", e + ) + raise diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py new file mode 100644 index 00000000..8967fae2 --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/agent_skill_qdrant_converter.py @@ -0,0 +1,87 @@ +""" +AgentSkill Qdrant Converter. + +Converts MongoDB ``AgentSkillRecord`` documents into Qdrant ``PointStruct`` +instances for upsert into ``v1_agent_skill``. Vector is the embedding of +name + description (caller-provided via ``source_doc.vector``). +""" + +from qdrant_client.http import models as qmodels + +from core.observation.logger import get_logger +from core.oxm.qdrant.base_converter import BaseQdrantConverter +from core.oxm.qdrant.base_repository import mongo_id_to_qdrant_id +from infra_layer.adapters.out.persistence.document.memory.agent_skill import ( + AgentSkillRecord, +) +from infra_layer.adapters.out.search.qdrant.memory.agent_skill_collection import ( + AgentSkillCollection, +) + +logger = get_logger(__name__) + + +class AgentSkillQdrantConverter(BaseQdrantConverter[AgentSkillCollection]): + """Converts MongoDB ``AgentSkillRecord`` documents into Qdrant point payloads.""" + + @classmethod + def from_mongo(cls, source_doc: AgentSkillRecord) -> qmodels.PointStruct: + """ + Build a ``PointStruct`` from a MongoDB ``AgentSkillRecord``. + + Raises: + ValueError: when ``source_doc`` is ``None``. + Exception: on any conversion failure (logged + re-raised). + """ + if source_doc is None: + raise ValueError("MongoDB document cannot be None") + if source_doc.id is None: + raise ValueError("AgentSkillRecord.id must not be None") + + try: + vector = source_doc.vector if source_doc.vector else None + if not vector: + raise ValueError( + f"Vector is required for AgentSkillRecord {source_doc.id} " + "but was not populated" + ) + + name = source_doc.name or "" + description = source_doc.description or "" + + # Primary text field: name + newline + description (Milvus parity). + content_field = "\n".join(s for s in [name, description] if s) + + payload = { + "user_id": source_doc.user_id or "", + "group_id": source_doc.group_id or "", + "cluster_id": source_doc.cluster_id or "", + "content": content_field[:5000], + # Coerce optional scores to 0.0 — Qdrant range-filters silently + # exclude ``null``-valued payloads, which would hide scored points + # from threshold queries. Treat "absent" as "lowest score". + "maturity_score": ( + source_doc.maturity_score + if source_doc.maturity_score is not None + else 0.0 + ), + "confidence": ( + source_doc.confidence + if source_doc.confidence is not None + else 0.0 + ), + # Mongo back-reference (see episodic_memory converter). + "mongo_id": str(source_doc.id), + } + + return qmodels.PointStruct( + id=mongo_id_to_qdrant_id(source_doc.id), + vector=vector, + payload=payload, + ) + + except Exception as e: + logger.exception( + "Failed to convert AgentSkillRecord to Qdrant point: %s", e + ) + raise diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py new file mode 100644 index 00000000..220e87a6 --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/atomic_fact_qdrant_converter.py @@ -0,0 +1,103 @@ +""" +Atomic Fact Qdrant Converter. + +Converts MongoDB ``v1_atomic_fact_records`` documents to Qdrant +``PointStruct`` instances for upsert into ``v1_atomic_fact_record``. +""" + +import json +from typing import List + +from qdrant_client.http import models as qmodels + +from api_specs.memory_types import RawDataType +from core.observation.logger import get_logger +from core.oxm.qdrant.base_converter import BaseQdrantConverter +from core.oxm.qdrant.base_repository import mongo_id_to_qdrant_id +from infra_layer.adapters.out.persistence.document.memory.atomic_fact_record import ( + AtomicFactRecord as MongoAtomicFactRecord, +) +from infra_layer.adapters.out.search.qdrant.memory.atomic_fact_collection import ( + AtomicFactCollection, +) + +logger = get_logger(__name__) + + +class AtomicFactQdrantConverter(BaseQdrantConverter[AtomicFactCollection]): + """Converts MongoDB ``v1_atomic_fact_records`` to Qdrant point payloads.""" + + @classmethod + def from_mongo(cls, source_doc: MongoAtomicFactRecord) -> qmodels.PointStruct: + """ + Build a ``PointStruct`` from a MongoDB atomic-fact document. + + Raises: + ValueError: when ``source_doc`` is ``None``. + Exception: on any conversion failure (logged + re-raised). + """ + if source_doc is None: + raise ValueError("MongoDB document cannot be None") + if source_doc.id is None: + raise ValueError("AtomicFactRecord.id must not be None") + + try: + timestamp_ms = ( + int(source_doc.timestamp.timestamp() * 1000) + if source_doc.timestamp + else 0 + ) + + # ``getattr(... , None)`` then explicit ``is None`` check so a + # legitimately falsy value (e.g. empty string from a future + # type enum entry) is preserved. + raw_type = getattr(source_doc, "type", None) + event_type = ( + raw_type if raw_type is not None else RawDataType.CONVERSATION.value + ) + + payload = { + "user_id": source_doc.user_id or "", + "group_id": source_doc.group_id or "", + "session_id": source_doc.session_id or "", + "participants": source_doc.participants or [], + "sender_ids": getattr(source_doc, "sender_ids", []) or [], + "type": event_type, + "timestamp": timestamp_ms, + "parent_type": source_doc.parent_type or "", + "parent_id": ( + str(source_doc.parent_id) if source_doc.parent_id else "" + ), + # Persist the canonical text so search results can return the + # underlying atomic_fact without a Mongo round-trip. + "search_content": cls._build_search_content(source_doc), + # Mongo back-reference (see episodic_memory converter). + "mongo_id": str(source_doc.id), + } + + vector = getattr(source_doc, "vector", None) + if not vector: + raise ValueError( + f"Vector is required for AtomicFactRecord {source_doc.id} " + "but was not populated" + ) + + return qmodels.PointStruct( + id=mongo_id_to_qdrant_id(source_doc.id), + vector=vector, + payload=payload, + ) + + except Exception as e: + logger.exception( + "Failed to convert MongoDB AtomicFact to Qdrant point: %s", e + ) + raise + + @staticmethod + def _build_search_content(source_doc: MongoAtomicFactRecord) -> str: + """Build search content JSON-string from the atomic_fact text field.""" + text_content: List[str] = [] + if source_doc.atomic_fact: + text_content.append(source_doc.atomic_fact) + return json.dumps(text_content, ensure_ascii=False) diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py new file mode 100644 index 00000000..0585b89b --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/episodic_memory_qdrant_converter.py @@ -0,0 +1,133 @@ +""" +Episodic Memory Qdrant Converter. + +Converts MongoDB ``v1_episodic_memories`` documents to Qdrant ``PointStruct`` +instances for upsert into ``v1_episodic_memory``. Only search-essential +fields are mapped — full payload is fetched from MongoDB via ``parent_id`` +back-reference. + +Vector is taken from ``source_doc.vector`` (caller must have populated the +embedding before calling the converter). +""" + +import json +from typing import List + +from qdrant_client.http import models as qmodels + +from core.observation.logger import get_logger +from core.oxm.qdrant.base_converter import BaseQdrantConverter +from core.oxm.qdrant.base_repository import mongo_id_to_qdrant_id +from infra_layer.adapters.out.persistence.document.memory.episodic_memory import ( + EpisodicMemory as MongoEpisodicMemory, +) +from infra_layer.adapters.out.search.qdrant.memory.episodic_memory_collection import ( + EpisodicMemoryCollection, +) + +logger = get_logger(__name__) + + +class EpisodicMemoryQdrantConverter(BaseQdrantConverter[EpisodicMemoryCollection]): + """ + Converts MongoDB ``v1_episodic_memories`` documents to Qdrant point payloads. + + Output shape: ``qdrant_client.http.models.PointStruct`` with the document + id as point id, the pre-computed embedding as the vector, and all + search-relevant scalar fields plus the MongoDB back-reference in the + payload. + """ + + @classmethod + def from_mongo(cls, source_doc: MongoEpisodicMemory) -> qmodels.PointStruct: + """ + Build a ``PointStruct`` from a MongoDB episodic-memory document. + + Args: + source_doc: MongoDB ``v1_episodic_memories`` document instance. + + Returns: + ``PointStruct`` ready for ``client.upsert([point])``. + + Raises: + ValueError: when ``source_doc`` is ``None``. + Exception: on any conversion failure (logged + re-raised). + """ + if source_doc is None: + raise ValueError("MongoDB document cannot be None") + if source_doc.id is None: + raise ValueError("EpisodicMemory.id must not be None") + + try: + # Timestamp -> epoch milliseconds (integer, parity with Milvus). + timestamp_ms = ( + int(source_doc.timestamp.timestamp() * 1000) + if source_doc.timestamp + else 0 + ) + + search_content = cls._build_search_content(source_doc) + + payload = { + "user_id": source_doc.user_id or "", + "group_id": source_doc.group_id or "", + "session_id": source_doc.session_id or "", + "participants": source_doc.participants or [], + "sender_ids": getattr(source_doc, "sender_ids", []) or [], + "type": getattr(source_doc, "type", None) or "", + "timestamp": timestamp_ms, + "episode": source_doc.episode or "", + "search_content": search_content, + "parent_type": source_doc.parent_type or "", + "parent_id": ( + str(source_doc.parent_id) if source_doc.parent_id else "" + ), + # Mongo back-reference: Qdrant ids are derived via uuid5, so + # we keep the raw Mongo id in the payload for round-trip + # lookup, idempotent re-embed, and debugging. + "mongo_id": str(source_doc.id), + } + + vector = ( + source_doc.vector + if hasattr(source_doc, "vector") and source_doc.vector + else None + ) + if not vector: + raise ValueError( + f"Vector is required for EpisodicMemory {source_doc.id} " + "but was not populated" + ) + + return qmodels.PointStruct( + id=mongo_id_to_qdrant_id(source_doc.id), + vector=vector, + payload=payload, + ) + + except Exception as e: + logger.exception( + "Failed to convert MongoDB document to Qdrant point: %s", e + ) + raise + + @staticmethod + def _build_search_content(source_doc: MongoEpisodicMemory) -> str: + """ + Build search content string from the document's text fields. + + Returns a JSON-stringified list (parity with Milvus converter; the + search pipeline can deserialize it back to a list when needed). + """ + text_content: List[str] = [] + + if hasattr(source_doc, "subject") and source_doc.subject: + text_content.append(source_doc.subject) + + if hasattr(source_doc, "summary") and source_doc.summary: + text_content.append(source_doc.summary) + + if hasattr(source_doc, "episode") and source_doc.episode: + text_content.append(source_doc.episode) + + return json.dumps(text_content, ensure_ascii=False) diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py new file mode 100644 index 00000000..4654bebb --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/foresight_qdrant_converter.py @@ -0,0 +1,153 @@ +""" +Foresight Qdrant Converter. + +Converts MongoDB ``v1_foresight_records`` documents to Qdrant ``PointStruct`` +instances for upsert into ``v1_foresight_record``. + +Time fields (``start_time``, ``end_time``) accept ``datetime``, ISO-8601 +strings, or numeric epoch seconds — all normalized to epoch milliseconds +on output (Milvus parity). +""" + +import json +from datetime import datetime +from typing import List, Optional, Union + +from qdrant_client.http import models as qmodels + +from core.observation.logger import get_logger +from core.oxm.qdrant.base_converter import BaseQdrantConverter +from core.oxm.qdrant.base_repository import mongo_id_to_qdrant_id +from infra_layer.adapters.out.persistence.document.memory.foresight_record import ( + ForesightRecord as MongoForesightRecord, +) +from infra_layer.adapters.out.search.qdrant.memory.foresight_collection import ( + ForesightCollection, +) + +logger = get_logger(__name__) + + +class ForesightQdrantConverter(BaseQdrantConverter[ForesightCollection]): + """Converts MongoDB ``v1_foresight_records`` documents to Qdrant point payloads.""" + + @classmethod + def _parse_time_field( + cls, + time_value: Optional[Union[datetime, str, int, float]], + field_name: str, + doc_id: Optional[str], + ) -> int: + """ + Parse a time field to epoch milliseconds. + + Accepts ``datetime``, ISO-8601 strings, numeric epoch seconds, or + numeric epoch milliseconds — the magnitude guard distinguishes the + two numeric units (values above 1e10 are treated as already-ms, + otherwise multiplied by 1000). This intentionally diverges from the + Milvus template, which always multiplies numeric inputs by 1000 and + thus would corrupt already-ms inputs. + """ + # Explicit ``is None`` so a legitimate epoch 0 / datetime(1970-01-01) + # is not silently dropped as "missing". + if time_value is None: + return 0 + + try: + if isinstance(time_value, datetime): + return int(time_value.timestamp() * 1000) + if isinstance(time_value, str): + dt = datetime.fromisoformat(time_value.replace("Z", "+00:00")) + return int(dt.timestamp() * 1000) + if isinstance(time_value, (int, float)): + # Magnitude guard: 1e10 epoch-seconds ~= year 2286, so any + # numeric > 1e10 is already in milliseconds. + value_ms = time_value if time_value > 1e10 else time_value * 1000 + return int(value_ms) + except Exception as e: + logger.warning( + "Failed to parse %s (doc_id=%s): %s, error: %s", + field_name, doc_id, time_value, e, + ) + + return 0 + + @classmethod + def from_mongo(cls, source_doc: MongoForesightRecord) -> qmodels.PointStruct: + """ + Build a ``PointStruct`` from a MongoDB foresight-record document. + + Raises: + ValueError: when ``source_doc`` is ``None``. + Exception: on any conversion failure (logged + re-raised). + """ + if source_doc is None: + raise ValueError("MongoDB document cannot be None") + if source_doc.id is None: + raise ValueError("ForesightRecord.id must not be None") + + try: + start_time = cls._parse_time_field( + source_doc.start_time, "start_time", source_doc.id + ) + end_time = cls._parse_time_field( + source_doc.end_time, "end_time", source_doc.id + ) + + search_content = cls._build_search_content(source_doc) + + payload = { + "user_id": source_doc.user_id or "", + "group_id": source_doc.group_id or "", + "session_id": source_doc.session_id or "", + "participants": source_doc.participants or [], + "sender_ids": getattr(source_doc, "sender_ids", []) or [], + "type": getattr(source_doc, "type", None) or "", + "start_time": start_time, + "end_time": end_time, + "duration_days": ( + source_doc.duration_days if source_doc.duration_days else 0 + ), + # ``content`` is intentionally passed through verbatim (incl. + # ``None``) — parity with the Milvus template. Downstream + # search code distinguishes "absent content" from "empty + # content" via the ``None`` sentinel. + "content": source_doc.content, + "evidence": source_doc.evidence or "", + "search_content": search_content, + "parent_type": source_doc.parent_type or "", + "parent_id": ( + str(source_doc.parent_id) if source_doc.parent_id else "" + ), + # Mongo back-reference (see episodic_memory converter). + "mongo_id": str(source_doc.id), + } + + vector = source_doc.vector if source_doc.vector else None + if not vector: + raise ValueError( + f"Vector is required for ForesightRecord {source_doc.id} " + "but was not populated" + ) + + return qmodels.PointStruct( + id=mongo_id_to_qdrant_id(source_doc.id), + vector=vector, + payload=payload, + ) + + except Exception as e: + logger.exception( + "Failed to convert MongoDB foresight document to Qdrant point: %s", e + ) + raise + + @staticmethod + def _build_search_content(source_doc: MongoForesightRecord) -> str: + """Build search content JSON-string from content + evidence fields.""" + text_content: List[str] = [] + if source_doc.content: + text_content.append(source_doc.content) + if source_doc.evidence: + text_content.append(source_doc.evidence) + return json.dumps(text_content, ensure_ascii=False) diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/user_profile_qdrant_converter.py b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/user_profile_qdrant_converter.py new file mode 100644 index 00000000..11627ab3 --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/converter/user_profile_qdrant_converter.py @@ -0,0 +1,168 @@ +""" +User Profile Qdrant Converter. + +Converts a single MongoDB ``v1_user_profiles`` document into a **list** of +profile items — one per ``explicit_info`` entry and one per +``implicit_trait``. Each item carries an ``embed_text`` field used by the +ProfileIndexer to generate the actual embedding; the indexer then wraps +each item into a ``PointStruct`` and upserts it. + +Return type intentionally diverges from the other Qdrant converters +(``PointStruct``) — it returns ``List[Dict[str, Any]]`` for parity with the +Milvus counterpart, because the indexer flow expects per-item dicts +(vector is **not yet** set at converter time; that happens downstream). + +The ``from_mongo`` override carries ``# type: ignore[override]`` because of +this intentional contract divergence from the generic ``BaseQdrantConverter`` +signature. The ProfileIndexer downstream is the only known consumer. +""" + +from typing import Any, Dict, List + +from api_specs.memory_types import ScenarioType +from core.observation.logger import get_logger +from core.oxm.mongo.mongo_utils import generate_object_id_str +from core.oxm.qdrant.base_converter import BaseQdrantConverter +from infra_layer.adapters.out.persistence.document.memory.user_profile import ( + UserProfile as MongoUserProfile, +) +from infra_layer.adapters.out.search.qdrant.memory.user_profile_collection import ( + UserProfileCollection, +) + +logger = get_logger(__name__) + + +# Profile-data shape (matches Milvus converter): +# explicit fields contain [{value, level?}] items (skills / responsibilities / interests). +_EXPLICIT_FIELDS = [ + ("hard_skills", "Hard Skill"), + ("soft_skills", "Soft Skill"), + ("work_responsibility", "Work Responsibility"), + ("interests", "Interest"), +] + +# Implicit fields contain [{value}] items (personality / tendencies / values). +_IMPLICIT_FIELDS = [ + ("personality", "Personality"), + ("tendency", "Tendency"), + ("way_of_decision_making", "Decision Making"), + ("motivation_system", "Motivation"), + ("fear_system", "Fear"), + ("value_system", "Value"), +] + + +class UserProfileQdrantConverter(BaseQdrantConverter[UserProfileCollection]): + """Splits a MongoDB ``UserProfile`` doc into per-item Qdrant payload dicts.""" + + @classmethod + def from_mongo( # type: ignore[override] + cls, source_doc: MongoUserProfile + ) -> List[Dict[str, Any]]: + """ + Convert a single ``UserProfile`` doc into a list of per-item payloads. + + The returned list contains one dict per ``explicit_info`` / + ``implicit_trait`` entry. Each dict has: + - ``id``: a freshly generated ObjectId string (the Mongo doc ``id`` + would collide across items because we emit many points from one + source doc). + - All filterable payload fields (user_id, group_id, scenario, + memcell_count, item_type). + - ``embed_text``: the text used by the ProfileIndexer to generate + the embedding vector. The vector is **not** included — the + indexer wraps the dict into ``PointStruct`` after embedding. + + Raises: + ValueError: when ``source_doc`` is ``None``. + Exception: on any conversion failure (logged + re-raised). + """ + if source_doc is None: + raise ValueError("MongoDB document cannot be empty") + + try: + profile_data: Dict[str, Any] = source_doc.profile_data or {} + user_id = source_doc.user_id or "" + group_id = source_doc.group_id or "" + scenario = source_doc.scenario or ScenarioType.SOLO.value + memcell_count = source_doc.memcell_count or 0 + + items: List[Dict[str, Any]] = [] + + def _make_item(embed_text: str, item_type: str) -> Dict[str, Any]: + return { + "id": generate_object_id_str(), + "user_id": user_id, + "group_id": group_id, + "scenario": scenario, + "memcell_count": memcell_count, + "item_type": item_type, + "embed_text": embed_text, + } + + # ProfileMemory format: per-field lists of {value, level?, ...}. + for field_name, label in _EXPLICIT_FIELDS: + for entry in profile_data.get(field_name, []) or []: + value = ( + entry.get("value", "") + if isinstance(entry, dict) + else str(entry) + ) + if not value: + continue + level = entry.get("level", "") if isinstance(entry, dict) else "" + embed_text = ( + f"{label}: {value}" + (f" ({level})" if level else "") + ) + items.append(_make_item(embed_text, "explicit_info")) + + for field_name, label in _IMPLICIT_FIELDS: + for entry in profile_data.get(field_name, []) or []: + value = ( + entry.get("value", "") + if isinstance(entry, dict) + else str(entry) + ) + if not value: + continue + items.append(_make_item(f"{label}: {value}", "implicit_trait")) + + # Legacy format: flat explicit_info[] / implicit_traits[] arrays + # with {category, description} / {trait, description, basis} shape. + for entry in profile_data.get("explicit_info", []) or []: + if not isinstance(entry, dict): + continue + desc = entry.get("description", "") + if not desc: + continue + category = entry.get("category", "") + embed_text = f"{category}: {desc}" if category else desc + items.append(_make_item(embed_text, "explicit_info")) + + for entry in profile_data.get("implicit_traits", []) or []: + if not isinstance(entry, dict): + continue + desc = entry.get("description", "") + if not desc: + continue + trait_name = entry.get("trait") or entry.get("trait_name", "") + embed_text = f"{trait_name}: {desc}" if trait_name else desc + if entry.get("basis"): + embed_text += f". {entry['basis']}" + items.append(_make_item(embed_text, "implicit_trait")) + + # Single user-goal string. + user_goal = profile_data.get("user_goal") + if user_goal and isinstance(user_goal, str) and user_goal.strip(): + items.append( + _make_item(f"Goal: {user_goal.strip()}", "explicit_info") + ) + + return items + + except Exception as e: + logger.exception( + "Failed to convert MongoDB UserProfile to Qdrant items: %s", e + ) + raise diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/__init__.py b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/agent_case_collection.py b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/agent_case_collection.py new file mode 100644 index 00000000..56d12f4c --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/agent_case_collection.py @@ -0,0 +1,53 @@ +""" +AgentCase Qdrant Collection Definition. + +Stores vector embeddings of agent task-solving experiences. The vector +represents the ``task_intent`` of one experience per MemCell. Full payload +is fetched from MongoDB via the ``parent_id`` back-reference. + +Wire-Layout per point:: + + PointStruct( + id=, + vector=, + payload={ + "user_id": str | "", + "group_id": str | "", + "session_id": str | "", + "timestamp": int, # epoch seconds + "task_intent": str, # truncated to 5000 chars + "parent_type": str | "", + "parent_id": str | "", + }, + ) +""" + +from core.oxm.qdrant.qdrant_collection_base import IndexConfig +from core.tenants.tenantize.oxm.qdrant.tenant_aware_qdrant_collection_with_suffix import ( + TenantAwareQdrantCollectionWithSuffix, +) +from memory_layer.constants import VECTORIZE_DIMENSIONS + + +class AgentCaseCollection(TenantAwareQdrantCollectionWithSuffix): + """V1 Agent Case Qdrant Collection. Tenant-prefixed at construction time.""" + + _COLLECTION_NAME = "v1_agent_case" + + _VECTOR_PARAMS = IndexConfig( + size=VECTORIZE_DIMENSIONS, + distance="cosine", + hnsw_m=16, + hnsw_ef_construct=200, + payload_indexes={ + # Scope filters. + "user_id": "keyword", + "group_id": "keyword", + "session_id": "keyword", + # Back-reference filters. + "parent_id": "keyword", + "parent_type": "keyword", + # Time-range filter. + "timestamp": "integer", + }, + ) diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/agent_skill_collection.py b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/agent_skill_collection.py new file mode 100644 index 00000000..780526e2 --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/agent_skill_collection.py @@ -0,0 +1,49 @@ +""" +AgentSkill Qdrant Collection Definition. + +Stores vector embeddings of reusable skill items. The vector represents the +embedding of name + description; ``content`` is the primary searchable text. + +Wire-Layout per point:: + + PointStruct( + id=, + vector=, + payload={ + "user_id": str | "", # agent owner + "group_id": str | "", + "cluster_id": str | "", # MemScene cluster id + "content": str, # name + "\\n" + description, ≤5000 chars + "maturity_score": float, # 0.0–1.0 + "confidence": float, # 0.0–1.0 + }, + ) +""" + +from core.oxm.qdrant.qdrant_collection_base import IndexConfig +from core.tenants.tenantize.oxm.qdrant.tenant_aware_qdrant_collection_with_suffix import ( + TenantAwareQdrantCollectionWithSuffix, +) +from memory_layer.constants import VECTORIZE_DIMENSIONS + + +class AgentSkillCollection(TenantAwareQdrantCollectionWithSuffix): + """V1 Agent Skill Qdrant Collection.""" + + _COLLECTION_NAME = "v1_agent_skill" + + _VECTOR_PARAMS = IndexConfig( + size=VECTORIZE_DIMENSIONS, + distance="cosine", + hnsw_m=16, + hnsw_ef_construct=200, + payload_indexes={ + # Scope filters. + "user_id": "keyword", + "group_id": "keyword", + "cluster_id": "keyword", + # Quality-score filters (range queries for thresholding). + "maturity_score": "float", + "confidence": "float", + }, + ) diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/atomic_fact_collection.py b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/atomic_fact_collection.py new file mode 100644 index 00000000..451fdcde --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/atomic_fact_collection.py @@ -0,0 +1,55 @@ +""" +V1 Atomic Fact Record Qdrant Collection Definition. + +Based on MongoDB ``v1_atomic_fact_records``. Stores only search-essential +fields; full data is retrieved from MongoDB using ``parent_id``. + +Wire-Layout per point:: + + PointStruct( + id=, + vector=, + payload={ + "user_id": str | "", + "group_id": str | "", + "session_id": str | "", + "participants": list[str], + "sender_ids": list[str], + "type": str, # default RawDataType.CONVERSATION + "timestamp": int, # epoch milliseconds + "parent_type": str | "", + "parent_id": str | "", + }, + ) +""" + +from core.oxm.qdrant.qdrant_collection_base import IndexConfig +from core.tenants.tenantize.oxm.qdrant.tenant_aware_qdrant_collection_with_suffix import ( + TenantAwareQdrantCollectionWithSuffix, +) +from memory_layer.constants import VECTORIZE_DIMENSIONS + + +class AtomicFactCollection(TenantAwareQdrantCollectionWithSuffix): + """V1 Atomic Fact Record Qdrant Collection.""" + + _COLLECTION_NAME = "v1_atomic_fact_record" + + _VECTOR_PARAMS = IndexConfig( + size=VECTORIZE_DIMENSIONS, + distance="cosine", + hnsw_m=16, + hnsw_ef_construct=200, + payload_indexes={ + # Scope filters. + "user_id": "keyword", + "group_id": "keyword", + "session_id": "keyword", + # Back-reference filters. + "parent_id": "keyword", + "parent_type": "keyword", + # Type + time-range filters. + "type": "keyword", + "timestamp": "integer", + }, + ) diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/episodic_memory_collection.py b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/episodic_memory_collection.py new file mode 100644 index 00000000..370a4840 --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/episodic_memory_collection.py @@ -0,0 +1,68 @@ +""" +V1 Episodic Memory Qdrant Collection Definition. + +Based on MongoDB ``v1_episodic_memories``. Stores only search-essential +fields; full data is retrieved from MongoDB using ``parent_id``. Qdrant is +schema-flexible — payload fields are described here for documentation +only; only fields that need filtering get an explicit payload index. + +Wire-Layout per point:: + + PointStruct( + id=, + vector=, + payload={ + "user_id": str | "", + "group_id": str | "", + "session_id": str | "", + "participants": list[str], + "sender_ids": list[str], + "type": str | "", + "timestamp": int, # epoch milliseconds + "episode": str | "", + "search_content": str, # JSON-stringified list + "parent_type": str | "", + "parent_id": str | "", + }, + ) +""" + +from core.oxm.qdrant.qdrant_collection_base import IndexConfig +from core.tenants.tenantize.oxm.qdrant.tenant_aware_qdrant_collection_with_suffix import ( + TenantAwareQdrantCollectionWithSuffix, +) +from memory_layer.constants import VECTORIZE_DIMENSIONS + + +class EpisodicMemoryCollection(TenantAwareQdrantCollectionWithSuffix): + """ + V1 Episodic Memory Qdrant Collection. + + Tenant-prefixed name resolution comes from + ``TenantAwareQdrantCollectionWithSuffix`` (e.g., + ``acme_v1_episodic_memory``). HNSW parameters are tuned conservatively + for ~10k-scale collections; revisit for larger workloads. + """ + + # Logical base name. The actual Qdrant collection name is resolved at + # construction time by the parent class (tenant prefix + optional suffix). + _COLLECTION_NAME = "v1_episodic_memory" + + _VECTOR_PARAMS = IndexConfig( + size=VECTORIZE_DIMENSIONS, + distance="cosine", + hnsw_m=16, + hnsw_ef_construct=200, + payload_indexes={ + # Tenant-isolation + scope filters (all keyword for exact-equality). + "user_id": "keyword", + "group_id": "keyword", + "session_id": "keyword", + # Back-reference filters (lookup-by-parent for resync flows). + "parent_id": "keyword", + "parent_type": "keyword", + # Type and time-range filters used by the search service. + "type": "keyword", + "timestamp": "integer", + }, + ) diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/foresight_collection.py b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/foresight_collection.py new file mode 100644 index 00000000..cd2fa3fe --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/foresight_collection.py @@ -0,0 +1,61 @@ +""" +V1 Foresight Record Qdrant Collection Definition. + +Based on MongoDB ``v1_foresight_records``. Stores only search-essential +fields; full data is retrieved from MongoDB using ``parent_id``. + +Wire-Layout per point:: + + PointStruct( + id=, + vector=, + payload={ + "user_id": str | "", + "group_id": str | "", + "session_id": str | "", + "participants": list[str], + "sender_ids": list[str], + "type": str | "", + "start_time": int, # epoch milliseconds + "end_time": int, # epoch milliseconds + "duration_days": int, + "content": str | None, + "evidence": str | "", + "search_content": str, # JSON-stringified list + "parent_type": str | "", + "parent_id": str | "", + }, + ) +""" + +from core.oxm.qdrant.qdrant_collection_base import IndexConfig +from core.tenants.tenantize.oxm.qdrant.tenant_aware_qdrant_collection_with_suffix import ( + TenantAwareQdrantCollectionWithSuffix, +) +from memory_layer.constants import VECTORIZE_DIMENSIONS + + +class ForesightCollection(TenantAwareQdrantCollectionWithSuffix): + """V1 Foresight Record Qdrant Collection.""" + + _COLLECTION_NAME = "v1_foresight_record" + + _VECTOR_PARAMS = IndexConfig( + size=VECTORIZE_DIMENSIONS, + distance="cosine", + hnsw_m=16, + hnsw_ef_construct=200, + payload_indexes={ + # Scope filters. + "user_id": "keyword", + "group_id": "keyword", + "session_id": "keyword", + # Back-reference filters. + "parent_id": "keyword", + "parent_type": "keyword", + # Type + time-range filters. + "type": "keyword", + "start_time": "integer", + "end_time": "integer", + }, + ) diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/user_profile_collection.py b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/user_profile_collection.py new file mode 100644 index 00000000..0e080bb0 --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/qdrant/memory/user_profile_collection.py @@ -0,0 +1,50 @@ +""" +V1 User Profile Qdrant Collection Definition. + +Based on MongoDB ``v1_user_profiles``. Stores per-item embeddings — one +point per ``explicit_info`` entry and one per ``implicit_trait`` (the +converter splits a single Mongo doc into many points). UserProfile does +**not** have ``session_id`` (user-level aggregation). + +Wire-Layout per point:: + + PointStruct( + id=, + vector=, + payload={ + "user_id": str, # required + "group_id": str | "", + "scenario": str, # "solo" | "team" + "memcell_count": int, + "item_type": str, # "explicit_info" | "implicit_trait" + "embed_text": str, # text used to generate the vector + }, + ) +""" + +from core.oxm.qdrant.qdrant_collection_base import IndexConfig +from core.tenants.tenantize.oxm.qdrant.tenant_aware_qdrant_collection_with_suffix import ( + TenantAwareQdrantCollectionWithSuffix, +) +from memory_layer.constants import VECTORIZE_DIMENSIONS + + +class UserProfileCollection(TenantAwareQdrantCollectionWithSuffix): + """V1 User Profile Qdrant Collection.""" + + _COLLECTION_NAME = "v1_user_profile" + + _VECTOR_PARAMS = IndexConfig( + size=VECTORIZE_DIMENSIONS, + distance="cosine", + hnsw_m=16, + hnsw_ef_construct=200, + payload_indexes={ + # Scope filters. + "user_id": "keyword", + "group_id": "keyword", + # Cohort filters. + "scenario": "keyword", + "item_type": "keyword", + }, + ) diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/repository/agent_case_qdrant_repository.py b/methods/EverCore/src/infra_layer/adapters/out/search/repository/agent_case_qdrant_repository.py new file mode 100644 index 00000000..ac11545e --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/repository/agent_case_qdrant_repository.py @@ -0,0 +1,156 @@ +""" +AgentCase Qdrant Repository. + +Provides vector search for agent task-solving experiences via Qdrant. +Mirrors the Milvus counterpart's surface for caller parity. + +Timestamp filter is in **epoch seconds** (parity with the Milvus repository +and the AgentCase converter — both store seconds, not milliseconds, for this +collection). +""" + +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional + +from qdrant_client.http import models as qmodels + +from core.di.decorators import repository +from core.observation.logger import get_logger +from core.oxm.constants import MAGIC_ALL +from core.oxm.qdrant.base_repository import ( + BaseQdrantRepository, + compute_effective_threshold, + to_epoch_s, +) +from infra_layer.adapters.out.search.qdrant.memory.agent_case_collection import ( + AgentCaseCollection, +) + +logger = get_logger(__name__) + + +@repository("agent_case_qdrant_repository", primary=False) +class AgentCaseQdrantRepository(BaseQdrantRepository[AgentCaseCollection]): + """V1 AgentCase Qdrant Repository.""" + + def __init__(self) -> None: + super().__init__(AgentCaseCollection) + + async def vector_search( + self, + query_vector: List[float], + user_id: Optional[str] = None, + session_id: Optional[str] = None, + group_ids: Optional[List[str]] = None, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, + parent_id: Optional[str] = None, + limit: int = 10, + score_threshold: float = 0.0, + radius: Optional[float] = None, + ) -> List[Dict[str, Any]]: + """Vector similarity search with scope + time-range filters.""" + try: + conditions: List[qmodels.FieldCondition] = [] + + if user_id and user_id != MAGIC_ALL: + # ``None``/empty user_id means "do not filter". + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id), + ) + ) + + if session_id: + conditions.append( + qmodels.FieldCondition( + key="session_id", + match=qmodels.MatchValue(value=session_id), + ) + ) + + if group_ids: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchAny(any=list(group_ids)), + ) + ) + + if parent_id: + conditions.append( + qmodels.FieldCondition( + key="parent_id", + match=qmodels.MatchValue(value=parent_id), + ) + ) + + # AgentCase timestamps are epoch SECONDS (Milvus parity). + # to_epoch_s coerces tz-naive datetimes to UTC to avoid silent + # locale drift in the filter bounds. + time_range: Dict[str, int] = {} + if start_time: + time_range["gte"] = to_epoch_s(start_time) + if end_time: + time_range["lte"] = to_epoch_s(end_time) + if time_range: + conditions.append( + qmodels.FieldCondition( + key="timestamp", + range=qmodels.Range(**time_range), + ) + ) + + query_filter = qmodels.Filter(must=conditions) if conditions else None + ef_value = max(128, limit * 2) + # Two-stage gating: use the more permissive (lower) positive + # bound of ``radius`` and ``score_threshold`` server-side, then + # enforce the hard caller cut client-side. See + # ``compute_effective_threshold`` for the precedence rules. + effective_threshold = compute_effective_threshold( + radius, score_threshold + ) + + scored_points = await self.search( + query_vector=query_vector, + limit=limit, + query_filter=query_filter, + with_payload=True, + with_vectors=False, + score_threshold=effective_threshold, + search_params=qmodels.SearchParams(hnsw_ef=ef_value), + ) + + search_results: List[Dict[str, Any]] = [] + for point in scored_points: + if point.score < score_threshold: + continue + payload = point.payload or {} + ts_seconds = payload.get("timestamp", 0) or 0 + search_results.append( + { + "id": str(point.id), + "score": float(point.score), + "user_id": payload.get("user_id"), + "group_id": payload.get("group_id"), + "session_id": payload.get("session_id", ""), + # Convert epoch seconds back to UTC datetime for caller + # parity with the Milvus repository. + "timestamp": datetime.fromtimestamp( + ts_seconds, tz=timezone.utc + ), + "task_intent": payload.get("task_intent", ""), + "parent_type": payload.get("parent_type", ""), + "parent_id": payload.get("parent_id", ""), + } + ) + + logger.debug( + "AgentCase Qdrant search: found %d results", len(search_results) + ) + return search_results + + except Exception as e: + logger.exception("AgentCase Qdrant search failed: %s", e) + raise diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/repository/agent_skill_qdrant_repository.py b/methods/EverCore/src/infra_layer/adapters/out/search/repository/agent_skill_qdrant_repository.py new file mode 100644 index 00000000..88ee0094 --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/repository/agent_skill_qdrant_repository.py @@ -0,0 +1,236 @@ +""" +AgentSkill Qdrant Repository. + +Provides vector search for agent skill records via Qdrant. Supports +cluster-level delete for the replace pattern used by AgentSkillExtractor. + +Filter expressions are built as ``qmodels.Filter(must=[FieldCondition...])`` +instead of the Milvus string-expression syntax — same semantic, native +typing. +""" + +import asyncio +from functools import partial +from typing import Any, Dict, List, Optional + +from qdrant_client.http import models as qmodels + +from core.di.decorators import repository +from core.observation.logger import get_logger +from core.oxm.constants import MAGIC_ALL +from core.oxm.qdrant.base_repository import ( + BaseQdrantRepository, + compute_effective_threshold, +) +from infra_layer.adapters.out.search.qdrant.memory.agent_skill_collection import ( + AgentSkillCollection, +) + +logger = get_logger(__name__) + + +@repository("agent_skill_qdrant_repository", primary=False) +class AgentSkillQdrantRepository(BaseQdrantRepository[AgentSkillCollection]): + """ + AgentSkill Qdrant Repository. + + Supports vector similarity search over reusable skill items, plus + cluster-level deletion for the replace pattern. + """ + + def __init__(self) -> None: + super().__init__(AgentSkillCollection) + + # ----------------------------------------------------------------- search + + async def vector_search( + self, + query_vector: List[float], + group_ids: Optional[List[str]] = None, + user_id: Optional[str] = None, + cluster_id: Optional[str] = None, + limit: int = 10, + score_threshold: float = 0.0, + radius: Optional[float] = None, + maturity_threshold: Optional[float] = 0.6, + confidence_threshold: Optional[float] = None, + ) -> List[Dict[str, Any]]: + """ + Vector similarity search over agent skill items. + + Args: + query_vector: Query embedding vector. + group_ids: Group ID list filter (``None`` to skip). + user_id: User ID filter. ``MAGIC_ALL`` disables the filter. + cluster_id: Filter by MemScene cluster ID. + limit: Max results to return. + score_threshold: Minimum Cosine similarity score (applied + post-search at the wrapper level; Qdrant also gets it via + ``score_threshold`` for early stopping). + radius: Explicit Cosine similarity threshold (>-1.0 enables it). + maturity_threshold: Minimum maturity score (0.0–1.0). ``None`` + skips the filter (include all maturities). + confidence_threshold: Minimum confidence score (0.0–1.0). ``None`` + skips the filter. + + Returns: + List of result dicts with the same shape as the Milvus + repository for caller parity. + """ + try: + conditions: List[qmodels.FieldCondition] = [] + + if maturity_threshold is not None: + conditions.append( + qmodels.FieldCondition( + key="maturity_score", + range=qmodels.Range(gte=maturity_threshold), + ) + ) + + if confidence_threshold is not None: + conditions.append( + qmodels.FieldCondition( + key="confidence", + range=qmodels.Range(gte=confidence_threshold), + ) + ) + + if user_id and user_id != MAGIC_ALL: + # ``None``/empty user_id means "do not filter" (search across + # the whole tenant), not "match the empty-string user_id". + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id), + ) + ) + + if group_ids: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchAny(any=list(group_ids)), + ) + ) + + if cluster_id: + conditions.append( + qmodels.FieldCondition( + key="cluster_id", + match=qmodels.MatchValue(value=cluster_id), + ) + ) + + query_filter = qmodels.Filter(must=conditions) if conditions else None + + ef_value = max(128, limit * 2) + # Two-stage gating: ``compute_effective_threshold`` returns the + # more permissive (smaller) positive bound of ``radius`` and + # ``score_threshold``, or ``None`` if neither is positive. The + # client-side ``point.score < score_threshold`` post-filter below + # still enforces the caller's hard cut-off. + effective_threshold = compute_effective_threshold( + radius, score_threshold + ) + + scored_points = await self.search( + query_vector=query_vector, + limit=limit, + query_filter=query_filter, + with_payload=True, + with_vectors=False, + score_threshold=effective_threshold, + search_params=qmodels.SearchParams(hnsw_ef=ef_value), + ) + + search_results: List[Dict[str, Any]] = [] + for point in scored_points: + if point.score < score_threshold: + continue + payload = point.payload or {} + search_results.append( + { + "id": str(point.id), + "score": float(point.score), + "user_id": payload.get("user_id", ""), + "group_id": payload.get("group_id"), + "cluster_id": payload.get("cluster_id"), + "content": payload.get("content", ""), + } + ) + + logger.debug( + "AgentSkill Qdrant search: found %d results", len(search_results) + ) + return search_results + + except Exception as e: + logger.exception("AgentSkill Qdrant search failed: %s", e) + raise + + # -------------------------------------------------------- domain deletes + + async def delete_by_cluster_id(self, cluster_id: str) -> int: + """ + Delete all Qdrant points whose ``cluster_id`` payload matches. + + Used by the AgentSkillExtractor's replace pattern: drop all skills + of a cluster, then re-upsert the freshly extracted skills. + + Args: + cluster_id: MemScene cluster ID. + + Returns: + Number of points deleted (best-effort; Qdrant doesn't return an + exact count, so we count via a prior scroll). + """ + try: + filter_ = qmodels.Filter( + must=[ + qmodels.FieldCondition( + key="cluster_id", + match=qmodels.MatchValue(value=cluster_id), + ) + ] + ) + + client = self.collection.client() + name = self.collection.name + + # Use Qdrant's ``count`` for an exact total instead of a single + # scroll page (which could undercount when the cluster has more + # than the page limit). After counting we issue a single + # filter-based delete that covers all matches. + count_result = await asyncio.to_thread( + partial( + client.count, + collection_name=name, + count_filter=filter_, + exact=True, + ) + ) + count = count_result.count + + if count > 0: + await asyncio.to_thread( + partial( + client.delete, + collection_name=name, + points_selector=qmodels.FilterSelector(filter=filter_), + wait=True, + ) + ) + logger.debug( + "Deleted %d Qdrant points for cluster=%s", count, cluster_id + ) + return count + + except Exception as e: + logger.exception( + "Failed to delete Qdrant points for cluster=%s: %s", cluster_id, e + ) + # Re-raise so callers can distinguish a genuine zero from an + # operational failure (consistent with upsert/search/delete_batch + # in the base repository). + raise diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/repository/atomic_fact_qdrant_repository.py b/methods/EverCore/src/infra_layer/adapters/out/search/repository/atomic_fact_qdrant_repository.py new file mode 100644 index 00000000..c86186aa --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/repository/atomic_fact_qdrant_repository.py @@ -0,0 +1,419 @@ +""" +Atomic Fact Qdrant Repository. + +Provides vector search + batch lookups for atomic-fact records via Qdrant. +Mirrors the Milvus counterpart's surface for caller parity: + +- ``create_and_save_atomic_fact``: convenience constructor + upsert +- ``vector_search``: scope + time-range filtered search +- ``batch_vector_search_by_parent_ids``: MRAG-Phase-3 expansion from + episodes to atomic facts +- ``delete_by_filters``: batch delete by user/group/time-range + +Timestamp filter uses **epoch milliseconds** (parity with the Milvus +repository and the AtomicFact converter). +""" + +import asyncio +import json +from datetime import datetime, timezone +from functools import partial +from typing import Any, Dict, List, Optional + +from qdrant_client.http import models as qmodels + +from core.di.decorators import repository +from core.observation.logger import get_logger +from core.oxm.constants import MAGIC_ALL +from core.oxm.qdrant.base_repository import ( + BaseQdrantRepository, + compute_effective_threshold, + to_epoch_ms, +) +from infra_layer.adapters.out.search.qdrant.memory.atomic_fact_collection import ( + AtomicFactCollection, +) + +logger = get_logger(__name__) + + +@repository("atomic_fact_qdrant_repository", primary=False) +class AtomicFactQdrantRepository(BaseQdrantRepository[AtomicFactCollection]): + """V1 Atomic Fact Qdrant Repository.""" + + def __init__(self) -> None: + super().__init__(AtomicFactCollection) + + # ===================================== Document creation / management + + async def create_and_save_atomic_fact( + self, + point_id: str, + user_id: Optional[str], + atomic_fact: str, + parent_id: str, + parent_type: str, + timestamp: datetime, + vector: List[float], + group_id: Optional[str] = None, + participants: Optional[List[str]] = None, + sender_ids: Optional[List[str]] = None, + event_type: Optional[str] = None, + search_content: Optional[List[str]] = None, + ) -> Dict[str, Any]: + """ + Build a ``PointStruct`` for an atomic fact and upsert it. + + Returns: + Summary dict (id / user_id / atomic_fact / parent_* / timestamp / + search_content) — same shape as the Milvus repository. + """ + # Explicit None / empty check so a legitimate all-zero embedding + # is not falsy-rejected (any() on a list of 0.0s is False). + if vector is None or len(vector) == 0: + raise ValueError( + f"Vector is required for AtomicFact {point_id} but was not populated" + ) + + try: + if search_content is None: + search_content = [atomic_fact] + + payload = { + "user_id": user_id or "", + "group_id": group_id or "", + "session_id": "", # not provided by this entry point + "participants": participants or [], + "sender_ids": sender_ids or [], + "type": event_type, + "timestamp": to_epoch_ms(timestamp), + "atomic_fact": atomic_fact, + "search_content": json.dumps(search_content, ensure_ascii=False), + "parent_type": parent_type, + "parent_id": parent_id, + } + + await self.upsert( + qmodels.PointStruct(id=point_id, vector=vector, payload=payload) + ) + + logger.debug( + "Atomic fact point upserted: id=%s, user_id=%s", point_id, user_id + ) + + # Result dict keeps the ``id`` key for caller parity with the + # Milvus counterpart; only the parameter name changed. + return { + "id": point_id, + "user_id": user_id, + "atomic_fact": atomic_fact, + "parent_type": parent_type, + "parent_id": parent_id, + "timestamp": timestamp, + "search_content": search_content, + } + + except Exception as e: + logger.exception( + "Failed to create atomic fact point: id=%s, error=%s", point_id, e + ) + raise + + # ============================================================ search + + async def vector_search( + self, + query_vector: List[float], + user_id: Optional[str] = None, + group_ids: Optional[List[str]] = None, + session_id: Optional[str] = None, + parent_type: Optional[str] = None, + parent_id: Optional[str] = None, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, + limit: int = 10, + score_threshold: float = 0.0, + radius: Optional[float] = None, + ) -> List[Dict[str, Any]]: + """Vector similarity search with full scope + time-range filters.""" + try: + conditions: List[qmodels.FieldCondition] = [] + + if user_id and user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id), + ) + ) + + if group_ids: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchAny(any=list(group_ids)), + ) + ) + + if session_id: + conditions.append( + qmodels.FieldCondition( + key="session_id", + match=qmodels.MatchValue(value=session_id), + ) + ) + + if parent_type: + conditions.append( + qmodels.FieldCondition( + key="parent_type", + match=qmodels.MatchValue(value=parent_type), + ) + ) + + if parent_id: + conditions.append( + qmodels.FieldCondition( + key="parent_id", + match=qmodels.MatchValue(value=parent_id), + ) + ) + + time_range: Dict[str, int] = {} + if start_time: + time_range["gte"] = to_epoch_ms(start_time) + if end_time: + time_range["lte"] = to_epoch_ms(end_time) + if time_range: + conditions.append( + qmodels.FieldCondition( + key="timestamp", + range=qmodels.Range(**time_range), + ) + ) + + query_filter = qmodels.Filter(must=conditions) if conditions else None + ef_value = max(128, limit * 2) + # Two-stage gating — see ``compute_effective_threshold`` for the + # full precedence rules; ``min(radius, score_threshold)`` is wrong + # when ``score_threshold`` is at its default ``0.0`` (yields 0, + # disabling both server- and client-side filtering). + effective_threshold = compute_effective_threshold( + radius, score_threshold + ) + + scored_points = await self.search( + query_vector=query_vector, + limit=limit, + query_filter=query_filter, + with_payload=True, + with_vectors=False, + score_threshold=effective_threshold, + search_params=qmodels.SearchParams(hnsw_ef=ef_value), + ) + + search_results: List[Dict[str, Any]] = [] + for point in scored_points: + if point.score < score_threshold: + continue + payload = point.payload or {} + ts_ms = payload.get("timestamp", 0) or 0 + search_results.append( + { + "id": str(point.id), + "score": float(point.score), + "user_id": payload.get("user_id"), + "group_id": payload.get("group_id"), + "session_id": payload.get("session_id"), + "participants": payload.get("participants"), + # Returned alongside ``search_content`` (and matching + # the batch path) so callers don't need a Mongo + # round-trip to recover the canonical atomic fact text. + "atomic_fact": payload.get("atomic_fact"), + # Convert epoch milliseconds back to UTC datetime so + # callers get a consistent type across all repository + # entry points (parity with create_and_save_atomic_fact + # and with the agent_case repository's seconds-path). + "timestamp": datetime.fromtimestamp( + ts_ms / 1000, tz=timezone.utc + ), + "parent_type": payload.get("parent_type"), + "parent_id": payload.get("parent_id"), + } + ) + + logger.debug( + "AtomicFact Qdrant search: found %d results", len(search_results) + ) + return search_results + + except Exception as e: + logger.error("AtomicFact Qdrant search failed: %s", e) + raise + + async def batch_vector_search_by_parent_ids( + self, + query_vector: List[float], + parent_ids: List[str], + user_id: Optional[str] = None, + group_ids: Optional[List[str]] = None, + limit: int = 5, + score_threshold: float = 0.0, + ) -> List[Dict[str, Any]]: + """ + Vector search restricted to a list of ``parent_id`` values. + + Used by MRAG Phase 3 to expand episodes into their atomic facts. + Total effective limit is ``limit * len(parent_ids)``. + """ + if not parent_ids: + return [] + + try: + conditions: List[qmodels.FieldCondition] = [ + qmodels.FieldCondition( + key="parent_id", + match=qmodels.MatchAny(any=list(parent_ids)), + ) + ] + + if user_id and user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id), + ) + ) + + if group_ids: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchAny(any=list(group_ids)), + ) + ) + + total_limit = limit * len(parent_ids) + ef_value = max(128, total_limit * 2) + + scored_points = await self.search( + query_vector=query_vector, + limit=total_limit, + query_filter=qmodels.Filter(must=conditions), + with_payload=True, + with_vectors=False, + score_threshold=score_threshold if score_threshold > 0 else None, + search_params=qmodels.SearchParams(hnsw_ef=ef_value), + ) + + search_results: List[Dict[str, Any]] = [] + for point in scored_points: + if point.score < score_threshold: + continue + payload = point.payload or {} + ts_ms = payload.get("timestamp", 0) or 0 + search_results.append( + { + "id": str(point.id), + "score": float(point.score), + "user_id": payload.get("user_id"), + "group_id": payload.get("group_id"), + "parent_type": payload.get("parent_type"), + "parent_id": payload.get("parent_id"), + "atomic_fact": payload.get("atomic_fact"), + "timestamp": datetime.fromtimestamp( + ts_ms / 1000, tz=timezone.utc + ), + "participants": payload.get("participants"), + } + ) + + logger.debug( + "AtomicFact batch search by parent_ids: parent_ids=%d, results=%d", + len(parent_ids), + len(search_results), + ) + return search_results + + except Exception as e: + logger.error("AtomicFact batch search by parent_ids failed: %s", e) + raise + + # ========================================================== deletion + + async def delete_by_filters( + self, + user_id: Optional[str] = MAGIC_ALL, + group_id: Optional[str] = MAGIC_ALL, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, + ) -> int: + """Batch delete by filter combination; at least one filter required.""" + try: + conditions: List[qmodels.FieldCondition] = [] + + if user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id or ""), + ) + ) + if group_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchValue(value=group_id or ""), + ) + ) + + time_range: Dict[str, int] = {} + if start_time: + time_range["gte"] = to_epoch_ms(start_time) + if end_time: + time_range["lte"] = to_epoch_ms(end_time) + if time_range: + conditions.append( + qmodels.FieldCondition( + key="timestamp", + range=qmodels.Range(**time_range), + ) + ) + + if not conditions: + raise ValueError("At least one filter condition must be provided") + + filter_ = qmodels.Filter(must=conditions) + client = self.collection.client() + name = self.collection.name + + # Exact count for the deleted-points return value (avoids + # the bounded scroll-page undercount). + count_result = await asyncio.to_thread( + partial( + client.count, + collection_name=name, + count_filter=filter_, + exact=True, + ) + ) + delete_count = count_result.count + + if delete_count > 0: + await asyncio.to_thread( + partial( + client.delete, + collection_name=name, + points_selector=qmodels.FilterSelector(filter=filter_), + wait=True, + ) + ) + + logger.debug( + "Batch deleted atomic facts: deleted %d points", delete_count + ) + return delete_count + + except Exception as e: + logger.error("Failed to batch delete atomic facts: %s", e) + raise diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py b/methods/EverCore/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py new file mode 100644 index 00000000..45a769c5 --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/repository/episodic_memory_qdrant_repository.py @@ -0,0 +1,349 @@ +""" +Episodic Memory Qdrant Repository. + +V1 simplified repository for vector semantic retrieval. Only stores +search-essential fields in Qdrant; full data is fetched from MongoDB via +``parent_id`` back-reference. + +Mirrors the surface of the Milvus counterpart for caller parity, but uses +native Qdrant filtering (``qmodels.Filter(must=[FieldCondition...])``) +instead of Milvus' string expression syntax. +""" + +import asyncio +import json +from datetime import datetime +from functools import partial +from typing import Any, Dict, List, Optional + +from qdrant_client.http import models as qmodels + +from core.di.decorators import repository +from core.observation.logger import get_logger +from core.oxm.constants import MAGIC_ALL +from core.oxm.qdrant.base_repository import ( + BaseQdrantRepository, + compute_effective_threshold, + to_epoch_ms, +) +from infra_layer.adapters.out.search.qdrant.memory.episodic_memory_collection import ( + EpisodicMemoryCollection, +) + +logger = get_logger(__name__) + + +@repository("episodic_memory_qdrant_repository", primary=False) +class EpisodicMemoryQdrantRepository(BaseQdrantRepository[EpisodicMemoryCollection]): + """V1 simplified Qdrant repository for episodic memory.""" + + def __init__(self) -> None: + super().__init__(EpisodicMemoryCollection) + + # ===================================== Document creation / management + + async def create_and_save_episodic_memory( + self, + id: str, + user_id: str, + timestamp: datetime, + episode: str, + search_content: List[str], + vector: List[float], + title: Optional[str] = None, + summary: Optional[str] = None, + group_id: Optional[str] = None, + participants: Optional[List[str]] = None, + sender_ids: Optional[List[str]] = None, + event_type: Optional[str] = None, + subject: Optional[str] = None, + parent_type: Optional[str] = None, + parent_id: Optional[str] = None, + ) -> Dict[str, Any]: + """ + High-level convenience constructor: build a ``PointStruct`` and upsert. + + Returns: + A small summary dict (id, user_id, timestamp, episode, + search_content) — same shape as the Milvus repository to keep + callers untouched at cutover. + + Raises: + ValueError: when ``vector`` is None or empty. A missing embedding + would only surface as a confusing 400 from Qdrant at upsert + time, far from the bad caller. Fail fast instead. + """ + if vector is None or len(vector) == 0: + raise ValueError( + f"Vector is required for EpisodicMemory {id} but was not populated" + ) + + try: + payload = { + "user_id": user_id or "", + "group_id": group_id or "", + "session_id": "", # not provided by this entry point + "participants": participants or [], + "sender_ids": sender_ids or [], + "type": event_type or "", + "timestamp": to_epoch_ms(timestamp), + "episode": episode, + "search_content": json.dumps(search_content, ensure_ascii=False), + "parent_type": parent_type or "", + "parent_id": parent_id or "", + } + + await self.upsert( + qmodels.PointStruct(id=id, vector=vector, payload=payload) + ) + + logger.debug( + "Episodic memory point upserted: id=%s, user_id=%s", id, user_id + ) + + return { + "id": id, + "user_id": user_id, + "timestamp": timestamp, + "episode": episode, + "search_content": search_content, + } + + except Exception as e: + logger.error( + "Failed to create episodic memory point: id=%s, error=%s", id, e + ) + raise + + # ============================================================ search + + async def vector_search( + self, + query_vector: List[float], + user_id: Optional[str] = None, + group_ids: Optional[List[str]] = None, + session_id: Optional[str] = None, + parent_type: Optional[str] = None, + parent_id: Optional[str] = None, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, + limit: int = 10, + score_threshold: float = 0.0, + radius: Optional[float] = None, + ) -> List[Dict[str, Any]]: + """Vector similarity search with optional scope + time-range filters.""" + try: + conditions: List[qmodels.FieldCondition] = [] + + # Guard both ``None`` (no scope passed) and the explicit "all" + # sentinel. Without the ``is not None`` guard a default ``user_id=None`` + # slipped past the sentinel check and the ``user_id or ""`` fallback + # filtered the search to documents with an empty ``user_id`` payload, + # i.e. zero hits in practice. + if user_id is not None and user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id), + ) + ) + + if group_ids: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchAny(any=list(group_ids)), + ) + ) + + if session_id: + conditions.append( + qmodels.FieldCondition( + key="session_id", + match=qmodels.MatchValue(value=session_id), + ) + ) + + if parent_type: + conditions.append( + qmodels.FieldCondition( + key="parent_type", + match=qmodels.MatchValue(value=parent_type), + ) + ) + + if parent_id: + conditions.append( + qmodels.FieldCondition( + key="parent_id", + match=qmodels.MatchValue(value=parent_id), + ) + ) + + time_range: Dict[str, int] = {} + if start_time: + time_range["gte"] = to_epoch_ms(start_time) + if end_time: + time_range["lte"] = to_epoch_ms(end_time) + if time_range: + conditions.append( + qmodels.FieldCondition( + key="timestamp", + range=qmodels.Range(**time_range), + ) + ) + + query_filter = qmodels.Filter(must=conditions) if conditions else None + ef_value = max(128, limit * 2) + # Two-stage score gating (parity with Milvus repository): + # - Server-side: pass the *more permissive* (lower) of + # ``radius`` and ``score_threshold`` so Qdrant returns the + # wider net. + # - Client-side: the ``point.score < score_threshold`` post- + # filter enforces the hard caller-facing minimum. + # This way callers can use ``radius`` to widen recall without + # accidentally making the server-side cut stricter than the + # caller's own cut-off. + # Two-stage gating — see ``compute_effective_threshold`` for the + # precedence rules. The plain ``min(radius, score_threshold)`` + # collapsed to ``0`` whenever ``score_threshold`` was at its + # default and silently disabled both server- and client-side + # filtering. + effective_threshold = compute_effective_threshold( + radius, score_threshold + ) + + scored_points = await self.search( + query_vector=query_vector, + limit=limit, + query_filter=query_filter, + with_payload=True, + with_vectors=False, + score_threshold=effective_threshold, + search_params=qmodels.SearchParams(hnsw_ef=ef_value), + ) + + search_results: List[Dict[str, Any]] = [] + for point in scored_points: + if point.score < score_threshold: + continue + payload = point.payload or {} + ts_ms = payload.get("timestamp", 0) or 0 + search_results.append( + { + "id": str(point.id), + "score": float(point.score), + "user_id": payload.get("user_id"), + "group_id": payload.get("group_id"), + "session_id": payload.get("session_id"), + "participants": payload.get("participants"), + # Normalise epoch ms back to a UTC ``datetime`` for + # caller parity with the other Qdrant repositories + # (atomic_fact, agent_case, foresight) — they all + # surface time as ``datetime``, returning the raw + # epoch here used to break callers that wanted a + # single time type across collections. + "timestamp": datetime.fromtimestamp( + ts_ms / 1000, tz=timezone.utc + ), + "parent_type": payload.get("parent_type"), + "parent_id": payload.get("parent_id"), + "type": payload.get("type"), + "episode": payload.get("episode"), + } + ) + + logger.debug( + "EpisodicMemory Qdrant search: found %d results", len(search_results) + ) + return search_results + + except Exception as e: + logger.exception("EpisodicMemory Qdrant search failed: %s", e) + raise + + # ========================================================== deletion + + async def delete_by_filters( + self, + user_id: Optional[str] = MAGIC_ALL, + group_id: Optional[str] = MAGIC_ALL, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, + ) -> int: + """ + Batch delete by filter combination. + + At least one filter (other than ``MAGIC_ALL`` sentinels) must be + provided, matching the Milvus repository's guard. + """ + try: + conditions: List[qmodels.FieldCondition] = [] + + if user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id or ""), + ) + ) + if group_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchValue(value=group_id or ""), + ) + ) + + time_range: Dict[str, int] = {} + if start_time: + time_range["gte"] = to_epoch_ms(start_time) + if end_time: + time_range["lte"] = to_epoch_ms(end_time) + if time_range: + conditions.append( + qmodels.FieldCondition( + key="timestamp", + range=qmodels.Range(**time_range), + ) + ) + + if not conditions: + raise ValueError("At least one filter condition must be provided") + + filter_ = qmodels.Filter(must=conditions) + client = self.collection.client() + name = self.collection.name + + # Use Qdrant's exact ``count`` instead of a bounded scroll page, + # so the returned count reflects the *full* number of points + # the filter matches (a 10k scroll cap would undercount large + # tenants and produce a misleading return value). + count_result = await asyncio.to_thread( + partial( + client.count, + collection_name=name, + count_filter=filter_, + exact=True, + ) + ) + delete_count = count_result.count + + if delete_count > 0: + await asyncio.to_thread( + partial( + client.delete, + collection_name=name, + points_selector=qmodels.FilterSelector(filter=filter_), + wait=True, + ) + ) + + logger.debug( + "Batch deleted episodic memories: deleted %d points", delete_count + ) + return delete_count + + except Exception as e: + logger.error("Failed to batch delete episodic memories: %s", e) + raise diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/repository/foresight_qdrant_repository.py b/methods/EverCore/src/infra_layer/adapters/out/search/repository/foresight_qdrant_repository.py new file mode 100644 index 00000000..dc3aded3 --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/repository/foresight_qdrant_repository.py @@ -0,0 +1,379 @@ +""" +Foresight Qdrant Repository. + +V1 repository for vector semantic retrieval over foresight records. +Mirrors the Milvus counterpart's surface (``create_and_save_foresight_mem``, +``vector_search``, ``delete_by_filters``) for caller parity. + +**Note on time filters:** the Foresight schema stores ``start_time`` and +``end_time`` (both epoch milliseconds). Both ``vector_search`` and +``delete_by_filters`` use **window-overlap** semantics — a record matches +when its window ``[start_time, end_time]`` overlaps the query window +``[start_time arg, end_time arg]``: + +- ``end_time`` arg -> ``payload.start_time <= end_time_ms`` +- ``start_time`` arg -> ``payload.end_time >= start_time_ms`` + +(Older revisions of this file used the inverted containment predicates, +which silently dropped partially-overlapping records on read and left +them undeleted on cleanup.) +""" + +import asyncio +import json +from datetime import datetime +from functools import partial +from typing import Any, Dict, List, Optional + +from qdrant_client.http import models as qmodels + +from core.di.decorators import repository +from core.observation.logger import get_logger +from core.oxm.constants import MAGIC_ALL +from core.oxm.qdrant.base_repository import ( + BaseQdrantRepository, + compute_effective_threshold, + to_epoch_ms, +) +from infra_layer.adapters.out.search.qdrant.memory.foresight_collection import ( + ForesightCollection, +) + +logger = get_logger(__name__) + + +@repository("foresight_qdrant_repository", primary=False) +class ForesightQdrantRepository(BaseQdrantRepository[ForesightCollection]): + """V1 Foresight Qdrant Repository.""" + + def __init__(self) -> None: + super().__init__(ForesightCollection) + + # ===================================== Document creation / management + + async def create_and_save_foresight_mem( + self, + id: str, + user_id: Optional[str], + content: str, + parent_id: str, + parent_type: str, + vector: List[float], + group_id: Optional[str] = None, + session_id: Optional[str] = None, + event_type: Optional[str] = None, + participants: Optional[List[str]] = None, + sender_ids: Optional[List[str]] = None, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, + duration_days: Optional[int] = None, + evidence: Optional[str] = None, + search_content: Optional[List[str]] = None, + ) -> Dict[str, Any]: + """ + Build a ``PointStruct`` for a foresight record and upsert it. + + ``session_id`` is written into the payload so the matching + ``vector_search(session_id=...)`` filter can hit. Parity with the + Foresight schema's ``session_id`` payload index. + + Returns a caller-compatible summary dict (same shape as the Milvus + repository for cutover). + """ + if not vector: + raise ValueError( + f"Vector is required for Foresight {id} but was not populated" + ) + + try: + if search_content is None: + search_content = [content] + if evidence: + search_content.append(evidence) + + payload = { + "user_id": user_id or "", + "group_id": group_id or "", + "session_id": session_id or "", + "participants": participants or [], + "sender_ids": sender_ids or [], + "type": event_type, + # ``None`` (not 0) for missing bounds so range queries treat + # "no start/end" distinct from "epoch 1970". Downstream + # filters skip the field when payload value is None. + "start_time": to_epoch_ms(start_time) if start_time else None, + "end_time": to_epoch_ms(end_time) if end_time else None, + "duration_days": duration_days or 0, + "content": content, + "evidence": evidence or "", + "search_content": json.dumps(search_content, ensure_ascii=False), + "parent_type": parent_type, + "parent_id": parent_id, + } + + await self.upsert( + qmodels.PointStruct(id=id, vector=vector, payload=payload) + ) + + logger.debug( + "Foresight point upserted: id=%s, user_id=%s", id, user_id + ) + + return { + "id": id, + "user_id": user_id, + "content": content, + "parent_type": parent_type, + "parent_id": parent_id, + "search_content": search_content, + } + + except Exception as e: + logger.error( + "Failed to create foresight point: id=%s, error=%s", id, e + ) + raise + + # ============================================================ search + + async def vector_search( + self, + query_vector: List[float], + user_id: Optional[str] = None, + group_ids: Optional[List[str]] = None, + sender_id: Optional[str] = None, + session_id: Optional[str] = None, + parent_type: Optional[str] = None, + parent_id: Optional[str] = None, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, + limit: int = 10, + score_threshold: float = 0.0, + radius: Optional[float] = None, + ) -> List[Dict[str, Any]]: + """ + Vector similarity search with scope, sender, and time-range filters. + + Time filters semantic — **window-overlap**, not containment. A + record matches when its window ``[start_time, end_time]`` overlaps + the query window ``[start_time arg, end_time arg]``: + + - ``end_time`` arg -> ``payload.start_time <= end_time_ms`` (record + starts on or before the query window ends). + - ``start_time`` arg -> ``payload.end_time >= start_time_ms`` (record + ends on or after the query window starts). + + The previous containment filter (``start>=q.start AND end<=q.end``) + silently dropped foresights whose window only partially overlapped + the query window, which is rarely what callers want. + + ``sender_id`` filters via Qdrant's array-containment semantics on the + ``sender_ids`` payload field. + """ + try: + conditions: List[qmodels.FieldCondition] = [] + + if user_id and user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id), + ) + ) + + if group_ids: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchAny(any=list(group_ids)), + ) + ) + + if sender_id: + # Qdrant matches arrays element-wise on MatchValue, so this + # is the equivalent of Milvus' ``array_contains(sender_ids, x)``. + conditions.append( + qmodels.FieldCondition( + key="sender_ids", + match=qmodels.MatchValue(value=sender_id), + ) + ) + + if session_id: + conditions.append( + qmodels.FieldCondition( + key="session_id", + match=qmodels.MatchValue(value=session_id), + ) + ) + + if parent_type: + conditions.append( + qmodels.FieldCondition( + key="parent_type", + match=qmodels.MatchValue(value=parent_type), + ) + ) + + if parent_id: + conditions.append( + qmodels.FieldCondition( + key="parent_id", + match=qmodels.MatchValue(value=parent_id), + ) + ) + + # Window-overlap filter (see docstring): a record overlaps the + # query window iff record.end_time >= query.start AND + # record.start_time <= query.end. + if start_time: + conditions.append( + qmodels.FieldCondition( + key="end_time", + range=qmodels.Range(gte=to_epoch_ms(start_time)), + ) + ) + if end_time: + conditions.append( + qmodels.FieldCondition( + key="start_time", + range=qmodels.Range(lte=to_epoch_ms(end_time)), + ) + ) + + query_filter = qmodels.Filter(must=conditions) if conditions else None + ef_value = max(128, limit * 2) + # Two-stage gating — see ``compute_effective_threshold``. + effective_threshold = compute_effective_threshold( + radius, score_threshold + ) + + scored_points = await self.search( + query_vector=query_vector, + limit=limit, + query_filter=query_filter, + with_payload=True, + with_vectors=False, + score_threshold=effective_threshold, + search_params=qmodels.SearchParams(hnsw_ef=ef_value), + ) + + search_results: List[Dict[str, Any]] = [] + for point in scored_points: + if point.score < score_threshold: + continue + payload = point.payload or {} + search_results.append( + { + "id": str(point.id), + "score": float(point.score), + "user_id": payload.get("user_id"), + "group_id": payload.get("group_id"), + "sender_ids": payload.get("sender_ids"), + "session_id": payload.get("session_id"), + "participants": payload.get("participants"), + "start_time": payload.get("start_time"), + "end_time": payload.get("end_time"), + "parent_type": payload.get("parent_type"), + "parent_id": payload.get("parent_id"), + } + ) + + logger.debug( + "Foresight Qdrant search: found %d results", len(search_results) + ) + return search_results + + except Exception as e: + logger.error("Foresight Qdrant search failed: %s", e) + raise + + # ========================================================== deletion + + async def delete_by_filters( + self, + user_id: Optional[str] = MAGIC_ALL, + group_id: Optional[str] = MAGIC_ALL, + start_time: Optional[datetime] = None, + end_time: Optional[datetime] = None, + ) -> int: + """ + Batch delete by filter combination; at least one filter required. + + Time-range semantic matches ``vector_search``: ``start_time`` arg + maps to ``payload.start_time >= ...``, ``end_time`` arg maps to + ``payload.end_time <= ...``. + """ + try: + conditions: List[qmodels.FieldCondition] = [] + + if user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id or ""), + ) + ) + if group_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchValue(value=group_id or ""), + ) + ) + + # Use the same window-overlap semantics as ``vector_search`` + # (record overlaps query window when record.end >= q.start AND + # record.start <= q.end). Diverging here would silently keep + # foresights that ``vector_search`` already returns, leaving + # callers with stale records after a "delete this window" call. + if start_time: + conditions.append( + qmodels.FieldCondition( + key="end_time", + range=qmodels.Range(gte=to_epoch_ms(start_time)), + ) + ) + if end_time: + conditions.append( + qmodels.FieldCondition( + key="start_time", + range=qmodels.Range(lte=to_epoch_ms(end_time)), + ) + ) + + if not conditions: + raise ValueError("At least one filter condition must be provided") + + filter_ = qmodels.Filter(must=conditions) + client = self.collection.client() + name = self.collection.name + + count_result = await asyncio.to_thread( + partial( + client.count, + collection_name=name, + count_filter=filter_, + exact=True, + ) + ) + delete_count = count_result.count + + if delete_count > 0: + await asyncio.to_thread( + partial( + client.delete, + collection_name=name, + points_selector=qmodels.FilterSelector(filter=filter_), + wait=True, + ) + ) + + logger.debug( + "Batch deleted foresights: deleted %d points", delete_count + ) + return delete_count + + except Exception as e: + logger.error("Failed to batch delete foresights: %s", e) + raise diff --git a/methods/EverCore/src/infra_layer/adapters/out/search/repository/user_profile_qdrant_repository.py b/methods/EverCore/src/infra_layer/adapters/out/search/repository/user_profile_qdrant_repository.py new file mode 100644 index 00000000..2b3dba1b --- /dev/null +++ b/methods/EverCore/src/infra_layer/adapters/out/search/repository/user_profile_qdrant_repository.py @@ -0,0 +1,199 @@ +""" +User Profile Qdrant Repository. + +V1 repository for vector semantic retrieval over user-profile items. +Mirrors the Milvus counterpart's surface for caller parity: +- ``vector_search``: scope (user_id/group_id) + scenario filter +- ``delete_by_user_group``: drop all profile items for a (user_id, group_id) pair + +Note: UserProfile has no ``session_id`` (user-level aggregation). +""" + +import asyncio +from functools import partial +from hashlib import sha256 +from typing import Any, Dict, List, Optional + +from qdrant_client.http import models as qmodels + +from core.di.decorators import repository +from core.observation.logger import get_logger +from core.oxm.constants import MAGIC_ALL +from core.oxm.qdrant.base_repository import ( + BaseQdrantRepository, + compute_effective_threshold, +) +from infra_layer.adapters.out.search.qdrant.memory.user_profile_collection import ( + UserProfileCollection, +) + +logger = get_logger(__name__) + + +def _fp(value: Optional[str]) -> str: + """ + Short fingerprint for log lines. Profile identifiers can be PII when + the upstream caller is using human-readable user/group ids; emitting + the raw value into centralised logs is an unnecessary compliance risk. + A 12-char SHA-256 prefix is enough to correlate events without + surfacing the underlying identifier. ``None``/empty becomes ``"-"``. + """ + if not value: + return "-" + return sha256(value.encode("utf-8")).hexdigest()[:12] + + +@repository("user_profile_qdrant_repository", primary=False) +class UserProfileQdrantRepository(BaseQdrantRepository[UserProfileCollection]): + """V1 User Profile Qdrant Repository.""" + + def __init__(self) -> None: + super().__init__(UserProfileCollection) + + # ============================================================ search + + async def vector_search( + self, + query_vector: List[float], + user_id: Optional[str] = None, + group_id: Optional[str] = None, + scenario: Optional[str] = None, + limit: int = 10, + score_threshold: float = 0.0, + radius: Optional[float] = None, + ) -> List[Dict[str, Any]]: + """Vector similarity search with scope + scenario filters.""" + try: + conditions: List[qmodels.FieldCondition] = [] + + if user_id and user_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id), + ) + ) + + if group_id and group_id != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchValue(value=group_id), + ) + ) + + if scenario and scenario != MAGIC_ALL: + conditions.append( + qmodels.FieldCondition( + key="scenario", + match=qmodels.MatchValue(value=scenario), + ) + ) + + query_filter = qmodels.Filter(must=conditions) if conditions else None + ef_value = max(128, limit * 2) + # Two-stage gating — see ``compute_effective_threshold``. + effective_threshold = compute_effective_threshold( + radius, score_threshold + ) + + scored_points = await self.search( + query_vector=query_vector, + limit=limit, + query_filter=query_filter, + with_payload=True, + with_vectors=False, + score_threshold=effective_threshold, + search_params=qmodels.SearchParams(hnsw_ef=ef_value), + ) + + search_results: List[Dict[str, Any]] = [] + for point in scored_points: + if point.score < score_threshold: + continue + payload = point.payload or {} + search_results.append( + { + "id": str(point.id), + "score": float(point.score), + "user_id": payload.get("user_id"), + "group_id": payload.get("group_id"), + "scenario": payload.get("scenario"), + "memcell_count": payload.get("memcell_count"), + "item_type": payload.get("item_type", ""), + "embed_text": payload.get("embed_text", ""), + } + ) + + logger.debug( + "UserProfile Qdrant search: found %d results", len(search_results) + ) + return search_results + + except Exception as e: + logger.error("UserProfile Qdrant search failed: %s", e) + raise + + # ========================================================== deletion + + async def delete_by_user_group(self, user_id: str, group_id: str) -> int: + """ + Delete all profile items for a (user_id, group_id) pair. + + Returns the actual point count via ``client.count(exact=True)`` + (Qdrant's filter-based delete doesn't return a count of its own). + """ + try: + filter_ = qmodels.Filter( + must=[ + qmodels.FieldCondition( + key="user_id", + match=qmodels.MatchValue(value=user_id), + ), + qmodels.FieldCondition( + key="group_id", + match=qmodels.MatchValue(value=group_id), + ), + ] + ) + client = self.collection.client() + name = self.collection.name + + count_result = await asyncio.to_thread( + partial( + client.count, + collection_name=name, + count_filter=filter_, + exact=True, + ) + ) + count = count_result.count + + if count > 0: + await asyncio.to_thread( + partial( + client.delete, + collection_name=name, + points_selector=qmodels.FilterSelector(filter=filter_), + wait=True, + ) + ) + + logger.info( + "Deleted profile items: user_fp=%s group_fp=%s count=%d", + _fp(user_id), + _fp(group_id), + count, + ) + return count + + except Exception as e: + logger.exception( + "Failed to delete profile items: user_fp=%s group_fp=%s error=%s", + _fp(user_id), + _fp(group_id), + e, + ) + # Re-raise so callers can distinguish "nothing to delete" from + # an operational failure (consistent with base_repository fix). + raise diff --git a/methods/EverCore/uv.lock b/methods/EverCore/uv.lock index b8d4ea65..67c9fad8 100644 --- a/methods/EverCore/uv.lock +++ b/methods/EverCore/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = "==3.12.*" [[package]] @@ -1514,6 +1514,7 @@ dependencies = [ { name = "python-multipart" }, { name = "python-snappy" }, { name = "pyyaml" }, + { name = "qdrant-client" }, { name = "rank-bm25" }, { name = "redis" }, { name = "scikit-learn" }, @@ -1610,6 +1611,7 @@ requires-dist = [ { name = "python-multipart", specifier = ">=0.0.6" }, { name = "python-snappy", specifier = ">=0.7.3" }, { name = "pyyaml", specifier = ">=6.0.0" }, + { name = "qdrant-client", specifier = ">=1.12,<2" }, { name = "rank-bm25", specifier = ">=0.2.2" }, { name = "redis", specifier = ">=5.0.0" }, { name = "scikit-learn", specifier = ">=1.3.0" },