From 9fac9a94162871bcb39460228c122cbe7e7e45aa Mon Sep 17 00:00:00 2001 From: Drew Cain Date: Thu, 11 Jun 2026 01:22:54 -0500 Subject: [PATCH 1/3] feat(core): add config-gated entity-aware ranking boost for hybrid search Proper nouns in a query carry no extra weight against generic semantic similarity, so documents about a different entity on the same topic can outrank the document that actually names the queried entity (#951 cross-conversation confusion in the LoCoMo benchmark). Add an optional, lexical-only re-scoring pass to hybrid fusion: - Extract candidate entity terms from the query (capitalized / proper-noun tokens that are not common stopwords; trailing possessives stripped). - Count how many distinct query entity terms appear in each fused candidate's entity name (title) or a relation row's linked entity names. - Multiply matching candidates' fused scores by 1 + weight * min(matches, max_terms), promoting entity-matching docs. The boost runs over the full fused candidate set before the limit/offset cut, so a matching doc below the cutoff can be promoted into the returned window. It adds no model inference (index/lexical lookups only), so per-query latency overhead is trivial, and only affects hybrid retrieval. Behind three config flags, DEFAULT OFF pending LoCoMo benchmark validation: search_entity_boost_enabled, search_entity_boost_weight, search_entity_boost_max_terms. Documented in docs/semantic-search.md. Tests: unit coverage for entity-term extraction and the boost math; a hybrid-pipeline test showing reordering when enabled and unchanged ordering when disabled; and a service-level integration test over a real DB with a deterministic stub embedding provider proving an entity-matching doc outranks a higher-similarity non-matching doc only when enabled. Co-Authored-By: Claude Signed-off-by: Drew Cain --- docs/semantic-search.md | 38 +++ src/basic_memory/config.py | 32 +++ .../repository/postgres_search_repository.py | 3 + .../repository/search_repository_base.py | 173 +++++++++++++ .../repository/sqlite_search_repository.py | 3 + .../test_entity_boost_search_service.py | 227 ++++++++++++++++++ tests/repository/test_hybrid_fusion.py | 109 +++++++++ tests/repository/test_semantic_search_base.py | 179 ++++++++++++++ 8 files changed, 764 insertions(+) create mode 100644 tests/repository/test_entity_boost_search_service.py diff --git a/docs/semantic-search.md b/docs/semantic-search.md index e5665c226..25cc8794e 100644 --- a/docs/semantic-search.md +++ b/docs/semantic-search.md @@ -107,6 +107,44 @@ All settings are fields on `BasicMemoryConfig` and can be set via environment va | `semantic_embedding_document_input_type` | `BASIC_MEMORY_SEMANTIC_EMBEDDING_DOCUMENT_INPUT_TYPE` | Auto for known LiteLLM models | Optional LiteLLM `input_type` for indexed document/passages. | | `semantic_embedding_query_input_type` | `BASIC_MEMORY_SEMANTIC_EMBEDDING_QUERY_INPUT_TYPE` | Auto for known LiteLLM models | Optional LiteLLM `input_type` for search queries. | | `semantic_vector_k` | `BASIC_MEMORY_SEMANTIC_VECTOR_K` | `100` | Candidate count for vector nearest-neighbour retrieval. Higher values improve recall at the cost of latency. | +| `search_entity_boost_enabled` | `BASIC_MEMORY_SEARCH_ENTITY_BOOST_ENABLED` | `false` | Enable the entity-aware ranking boost in hybrid search (see below). Default off pending benchmark validation. | +| `search_entity_boost_weight` | `BASIC_MEMORY_SEARCH_ENTITY_BOOST_WEIGHT` | `0.15` | Per-matched-term multiplier strength for the entity boost. A candidate matching N query entity terms is scaled by `1 + weight * min(N, max_terms)`. | +| `search_entity_boost_max_terms` | `BASIC_MEMORY_SEARCH_ENTITY_BOOST_MAX_TERMS` | `3` | Maximum number of distinct matched entity terms that contribute to the boost, bounding the multiplier. | + +## Entity-Aware Ranking Boost + +Hybrid search fuses keyword (FTS) and vector similarity, but proper nouns in a query +carry no special weight against generic semantic similarity. As a result, a document +about a *different* entity on the same topic can outrank the document that actually +names the queried entity — e.g. "What are Joanna's hobbies?" surfacing a generic +hobbies note ahead of Joanna's note (see +[#951](https://github.com/basicmachines-co/basic-memory/issues/951)). + +When `search_entity_boost_enabled=true`, hybrid retrieval performs a final, +lexical-only re-scoring pass: + +1. It extracts candidate entity terms from the query — capitalized / proper-noun + tokens that are not common stopwords (e.g. `Joanna`, `Anthony`, `NASA`). +2. For each fused candidate, it counts how many distinct query entity terms appear in + the candidate's entity name (its title) or in a relation row's linked entity names. +3. Matching candidates have their fused score multiplied by + `1 + weight * min(matches, max_terms)`, so an entity-matching document can be + promoted above a higher-similarity non-matching one. + +The boost adds **no model inference** — it is pure index/lexical lookup, so per-query +latency overhead is trivial. It only affects `hybrid` retrieval; `text` and `vector` +modes are unchanged. Non-matching candidates keep their original scores, so ordering +among them is preserved. + +```bash +export BASIC_MEMORY_SEARCH_ENTITY_BOOST_ENABLED=true +# Optional tuning: +export BASIC_MEMORY_SEARCH_ENTITY_BOOST_WEIGHT=0.15 +export BASIC_MEMORY_SEARCH_ENTITY_BOOST_MAX_TERMS=3 +``` + +> **Default off.** This setting is disabled by default pending LoCoMo benchmark +> validation. Enable it to experiment with entity-heavy corpora. ## Embedding Providers diff --git a/src/basic_memory/config.py b/src/basic_memory/config.py index 92ff504ab..0f51ecac5 100644 --- a/src/basic_memory/config.py +++ b/src/basic_memory/config.py @@ -346,6 +346,38 @@ def __init__(self, **data: Any) -> None: ... "Valid values: text, vector, hybrid. " "When unset, defaults to 'hybrid' if semantic search is enabled, otherwise 'text'.", ) + # Entity-aware ranking boost (hybrid retrieval). + # Trigger: proper nouns in a query (e.g. "Joanna") carry no extra weight against + # generic semantic similarity, so documents from the wrong conversation can outrank + # the gold document during hybrid fusion (#951). + # Why: entities are first-class in Basic Memory, so a candidate whose title or linked + # relation names contain a query proper noun is a stronger answer than a same-topic + # document about a different entity. + # Outcome: when enabled, hybrid fusion multiplies a candidate's fused score by a small + # bonus for each distinct query entity term it matches lexically (no model inference). + # Default OFF pending LoCoMo benchmark validation by the maintainer. + search_entity_boost_enabled: bool = Field( + default=False, + description="Enable entity-aware ranking boost in hybrid search. When enabled, " + "hybrid candidates whose title or linked relation names contain a proper-noun " + "term from the query are boosted in the final ranking. Lexical-only; adds no " + "model inference. Default off pending benchmark validation.", + ) + search_entity_boost_weight: float = Field( + default=0.15, + description="Per-matched-term multiplier strength for the entity-aware ranking " + "boost. A candidate matching N distinct query entity terms has its fused score " + "multiplied by (1 + weight * N), capped at search_entity_boost_max_terms terms. " + "Only applies when search_entity_boost_enabled is true.", + ge=0.0, + ) + search_entity_boost_max_terms: int = Field( + default=3, + description="Maximum number of distinct matched entity terms that contribute to " + "the entity-aware ranking boost, bounding the multiplier so a single candidate " + "cannot run away with the ranking.", + gt=0, + ) # Database connection pool configuration (Postgres only) db_pool_size: int = Field( diff --git a/src/basic_memory/repository/postgres_search_repository.py b/src/basic_memory/repository/postgres_search_repository.py index 85e01a103..10b0a6dd5 100644 --- a/src/basic_memory/repository/postgres_search_repository.py +++ b/src/basic_memory/repository/postgres_search_repository.py @@ -67,6 +67,9 @@ def __init__( self._semantic_postgres_prepare_concurrency = ( self._app_config.semantic_postgres_prepare_concurrency ) + self._entity_boost_enabled = self._app_config.search_entity_boost_enabled + self._entity_boost_weight = self._app_config.search_entity_boost_weight + self._entity_boost_max_terms = self._app_config.search_entity_boost_max_terms self._embedding_provider = embedding_provider self._vector_dimensions = 384 self._vector_tables_initialized = False diff --git a/src/basic_memory/repository/search_repository_base.py b/src/basic_memory/repository/search_repository_base.py index 2e4e687a3..23ca5929b 100644 --- a/src/basic_memory/repository/search_repository_base.py +++ b/src/basic_memory/repository/search_repository_base.py @@ -45,6 +45,64 @@ # the vector/hybrid retrieval path must key rows by (type, id) to avoid collisions. type SearchIndexKey = tuple[str, int] +# --- Entity-aware ranking boost (#951) --- + +# Match word tokens (allowing internal apostrophes/hyphens) so we can inspect +# their capitalization to detect proper-noun-like query terms. +_ENTITY_TERM_TOKEN_PATTERN = re.compile(r"[A-Za-z][A-Za-z'\-]*") + +# Common capitalized sentence-starters and interrogatives that look like proper +# nouns but are not entity references. Kept lowercase for case-insensitive checks. +# Intentionally small: a candidate term only boosts a row when it actually matches +# that row's title/relation names, so a stray non-entity term simply does nothing. +_ENTITY_TERM_STOPWORDS = frozenset( + { + "a", + "an", + "and", + "are", + "as", + "at", + "be", + "but", + "by", + "do", + "does", + "for", + "from", + "has", + "have", + "how", + "i", + "in", + "is", + "it", + "of", + "on", + "or", + "the", + "their", + "they", + "this", + "to", + "was", + "we", + "were", + "what", + "when", + "where", + "which", + "who", + "whom", + "whose", + "why", + "will", + "with", + "you", + "your", + } +) + @dataclass class VectorSyncBatchResult: @@ -166,6 +224,13 @@ class SearchRepositoryBase(ABC): _vector_dimensions: int _vector_tables_initialized: bool + # Entity-aware ranking boost (#951). Defaults keep the feature off for any + # subclass or test double that does not explicitly configure it. Concrete + # backends overwrite these from BasicMemoryConfig in their __init__. + _entity_boost_enabled: bool = False + _entity_boost_weight: float = 0.0 + _entity_boost_max_terms: int = 1 + def __init__(self, session_maker: async_sessionmaker[AsyncSession], project_id: int): """Initialize with session maker and project_id filter. @@ -2147,6 +2212,105 @@ async def _fetch_search_index_rows_by_ids( # Shared semantic search: hybrid score-based fusion # ------------------------------------------------------------------ + # --- Entity-aware ranking boost (#951) --- + + @staticmethod + def _extract_query_entity_terms(search_text: Optional[str]) -> set[str]: + """Extract candidate entity (proper-noun) terms from a query string. + + Heuristic, lexical only (no model inference): a token is a candidate entity + term when it is title-cased or all-caps and is not a common stopword. The + result is lowercased so downstream matching is case-insensitive. + + Examples: + "What are Joanna's hobbies?" -> {"joanna"} + "Who is Anthony?" -> {"anthony"} + "Deborah and Jolene" -> {"deborah", "jolene"} + "what is the weather" -> set() (no proper nouns) + """ + if not search_text: + return set() + + terms: set[str] = set() + for match in _ENTITY_TERM_TOKEN_PATTERN.finditer(search_text): + token = match.group(0) + # Trigger: token begins with an uppercase letter (Title-Case or ALL-CAPS). + # Why: proper nouns and named entities are conventionally capitalized; this + # is the cheapest reliable signal without a NER model. + # Outcome: lowercase, non-capitalized words are ignored as generic terms. + if not token[0].isupper(): + continue + normalized = token.lower() + # Strip a trailing possessive so "Joanna's" matches the entity "Joanna". + if normalized.endswith("'s"): + normalized = normalized[:-2] + if normalized in _ENTITY_TERM_STOPWORDS: + continue + # Single characters (e.g. a stray "I") carry no entity signal. + if len(normalized) < 2: + continue + terms.add(normalized) + return terms + + @staticmethod + def _row_entity_match_count(row: SearchIndexRow, entity_terms: set[str]) -> int: + """Count distinct query entity terms that a candidate row references. + + Matches against the row's own entity name (title) and the names embedded in + a relation row's title (``"From -> To"``). These are the fields where Basic + Memory's first-class entity names surface, so a match here is strong evidence + the candidate is about the queried entity rather than a same-topic document. + """ + if not entity_terms: + return 0 + + haystack_parts = [row.title or ""] + # Relation rows encode linked entity names in their title ("From -> To"); + # the relation_type itself is not an entity name, so it is excluded. + haystack = " ".join(part for part in haystack_parts if part) + if not haystack: + return 0 + + haystack_tokens: set[str] = set() + for match in _ENTITY_TERM_TOKEN_PATTERN.finditer(haystack): + token = match.group(0).lower() + # Mirror the query-side possessive stripping so a doc titled + # "Joanna's Hobbies" matches the query entity term "joanna". + if token.endswith("'s"): + token = token[:-2] + haystack_tokens.add(token) + return len(entity_terms & haystack_tokens) + + def _apply_entity_boost( + self, + fused_scores: dict[SearchIndexKey, float], + rows_by_key: dict[SearchIndexKey, SearchIndexRow], + entity_terms: set[str], + ) -> dict[SearchIndexKey, float]: + """Multiply fused scores by a per-matched-term bonus for entity-matching rows. + + Trigger: entity boosting is enabled and the query contains proper-noun terms. + Why: a candidate whose entity/relation names contain a queried proper noun is a + stronger answer than a generic same-topic document (#951 cross-conversation + confusion). + Outcome: ``score * (1 + weight * min(matches, max_terms))``. Rows that match no + query entity term are returned unchanged, so relative order among non-matching + rows is preserved. + """ + if not self._entity_boost_enabled or not entity_terms or self._entity_boost_weight <= 0: + return fused_scores + + boosted: dict[SearchIndexKey, float] = {} + for row_key, score in fused_scores.items(): + row = rows_by_key.get(row_key) + matches = self._row_entity_match_count(row, entity_terms) if row is not None else 0 + if matches <= 0: + boosted[row_key] = score + continue + capped_matches = min(matches, self._entity_boost_max_terms) + boosted[row_key] = score * (1.0 + self._entity_boost_weight * capped_matches) + return boosted + async def _search_hybrid( self, *, @@ -2250,6 +2414,15 @@ async def _search_hybrid( f = fts_scores.get(row_key, 0.0) fused_scores[row_key] = max(v, f) + FUSION_BONUS * min(v, f) + # Entity-aware ranking boost (#951): runs over the full fused candidate set + # before the limit/offset cut, so a boosted entity-matching candidate can be + # promoted into the returned window. No-op when the feature is disabled or the + # query contains no proper-noun terms, preserving the existing ordering. + entity_terms = ( + self._extract_query_entity_terms(query_text) if self._entity_boost_enabled else set() + ) + fused_scores = self._apply_entity_boost(fused_scores, rows_by_key, entity_terms) + ranked = sorted(fused_scores.items(), key=lambda item: item[1], reverse=True) output: list[SearchIndexRow] = [] for row_key, fused_score in ranked[offset : offset + limit]: diff --git a/src/basic_memory/repository/sqlite_search_repository.py b/src/basic_memory/repository/sqlite_search_repository.py index c467a9a24..a28a362d9 100644 --- a/src/basic_memory/repository/sqlite_search_repository.py +++ b/src/basic_memory/repository/sqlite_search_repository.py @@ -55,6 +55,9 @@ def __init__( self._semantic_embedding_sync_batch_size = ( self._app_config.semantic_embedding_sync_batch_size ) + self._entity_boost_enabled = self._app_config.search_entity_boost_enabled + self._entity_boost_weight = self._app_config.search_entity_boost_weight + self._entity_boost_max_terms = self._app_config.search_entity_boost_max_terms self._embedding_provider = embedding_provider self._sqlite_vec_load_lock = asyncio.Lock() self._sqlite_prepare_write_lock = asyncio.Lock() diff --git a/tests/repository/test_entity_boost_search_service.py b/tests/repository/test_entity_boost_search_service.py new file mode 100644 index 000000000..2426a44b7 --- /dev/null +++ b/tests/repository/test_entity_boost_search_service.py @@ -0,0 +1,227 @@ +"""Service-level integration test for the entity-aware ranking boost (#951). + +Drives a fully wired SearchService over a real database with a deterministic stub +embedding provider so vector similarity is controlled. Verifies that when the boost +is enabled, an entity-matching document outranks a higher-similarity non-matching +document, and that ordering is unchanged when the boost is disabled. + +No model inference is involved: the stub provider returns fixed unit vectors, so the +test is fast and deterministic on both SQLite and Postgres. +""" + +from __future__ import annotations + +import math +from typing import Any + +import pytest +import pytest_asyncio + +from basic_memory.config import BasicMemoryConfig +from basic_memory.repository.entity_repository import EntityRepository +from basic_memory.repository.search_repository import SearchRepository +from basic_memory.schemas.base import Entity as EntitySchema +from basic_memory.schemas.search import SearchQuery, SearchRetrievalMode +from basic_memory.services.entity_service import EntityService +from basic_memory.services.file_service import FileService +from basic_memory.services.search_service import SearchService + + +# --- Deterministic stub embedding provider --- + +_STUB_DIMENSIONS = 4 + + +def _unit(vector: list[float]) -> list[float]: + norm = math.sqrt(sum(component * component for component in vector)) or 1.0 + return [component / norm for component in vector] + + +class _StubEmbeddingProvider: + """Maps known text fragments to fixed unit vectors for controlled similarity. + + The query is engineered to sit closer (cosine) to the non-matching "hobbies" + document than to the gold "Joanna" document, reproducing the #951 failure where + generic semantic similarity outranks the entity-matching gold doc. + """ + + model_name = "stub-entity-boost" + dimensions = _STUB_DIMENSIONS + + def _vector_for(self, text: str) -> list[float]: + lowered = text.lower() + if "joanna" in lowered: + # Gold doc: shares some direction with the query but less than the decoy. + return _unit([0.6, 0.8, 0.0, 0.0]) + if "hobbies" in lowered or "pastime" in lowered: + # Decoy doc: closest to the query direction. + return _unit([0.95, 0.31, 0.0, 0.0]) + return _unit([0.0, 0.0, 1.0, 0.0]) + + async def embed_query(self, text: str) -> list[float]: + # Query direction is closest to the decoy vector above. + return _unit([0.97, 0.24, 0.0, 0.0]) + + async def embed_documents(self, texts: list[str]) -> list[list[float]]: + return [self._vector_for(text) for text in texts] + + def runtime_log_attrs(self) -> dict[str, Any]: + return {} + + +# --- Fixtures --- + + +async def _build_search_service( + *, + session_maker, + test_project, + base_app_config: BasicMemoryConfig, + file_service: FileService, + entity_repository: EntityRepository, + boost_enabled: bool, +) -> SearchService: + """Build a SearchService with semantic search + a deterministic stub provider.""" + from basic_memory.repository.sqlite_search_repository import SQLiteSearchRepository + from basic_memory.repository.postgres_search_repository import PostgresSearchRepository + from basic_memory.config import DatabaseBackend + + app_config = base_app_config.model_copy( + update={ + "semantic_search_enabled": True, + "semantic_min_similarity": 0.0, + "search_entity_boost_enabled": boost_enabled, + "search_entity_boost_weight": 0.3, + "search_entity_boost_max_terms": 3, + } + ) + + provider = _StubEmbeddingProvider() + if app_config.database_backend == DatabaseBackend.POSTGRES: # pragma: no cover + search_repo: SearchRepository = PostgresSearchRepository( + session_maker, + project_id=test_project.id, + app_config=app_config, + embedding_provider=provider, + ) + else: + repo = SQLiteSearchRepository( + session_maker, + project_id=test_project.id, + app_config=app_config, + ) + repo._semantic_enabled = True + repo._embedding_provider = provider + repo._vector_dimensions = provider.dimensions + repo._vector_tables_initialized = False + search_repo = repo + + service = SearchService(search_repo, entity_repository, file_service) + await service.init_search_index() + return service + + +@pytest_asyncio.fixture +async def boost_entities( + entity_service: EntityService, +): + """Index two entities: a decoy 'hobbies' doc and the gold 'Joanna' doc.""" + decoy, _ = await entity_service.create_or_update_entity( + EntitySchema( + title="Common Hobbies and Pastimes", + note_type="note", + directory="people", + content="A general overview of hobbies and pastimes people enjoy.", + ) + ) + gold, _ = await entity_service.create_or_update_entity( + EntitySchema( + title="Joanna", + note_type="note", + directory="people", + content="Notes about Joanna and what she likes to do.", + ) + ) + return decoy, gold + + +# --- Tests --- + + +async def _sync_vectors(service: SearchService, entity_ids: list[int]) -> None: + """Embed the indexed entities via the stub provider.""" + await service.sync_entity_vectors_batch(entity_ids) + + +@pytest.mark.asyncio +async def test_entity_boost_enabled_promotes_gold_doc( + session_maker, + test_project, + app_config, + file_service, + entity_repository, + boost_entities, +): + decoy, gold = boost_entities + service = await _build_search_service( + session_maker=session_maker, + test_project=test_project, + base_app_config=app_config, + file_service=file_service, + entity_repository=entity_repository, + boost_enabled=True, + ) + # Re-index the entities through this service so vector tables exist for it. + for entity in (decoy, gold): + await service.index_entity(entity) + await _sync_vectors(service, [decoy.id, gold.id]) + + results = await service.search( + SearchQuery( + text="What are Joanna's hobbies?", + retrieval_mode=SearchRetrievalMode.HYBRID, + ), + limit=10, + ) + + entity_ids = [r.entity_id for r in results] + assert gold.id in entity_ids and decoy.id in entity_ids + # With the boost on, the entity-matching gold doc ranks ahead of the + # higher-similarity decoy. + assert entity_ids.index(gold.id) < entity_ids.index(decoy.id) + + +@pytest.mark.asyncio +async def test_entity_boost_disabled_keeps_similarity_order( + session_maker, + test_project, + app_config, + file_service, + entity_repository, + boost_entities, +): + decoy, gold = boost_entities + service = await _build_search_service( + session_maker=session_maker, + test_project=test_project, + base_app_config=app_config, + file_service=file_service, + entity_repository=entity_repository, + boost_enabled=False, + ) + for entity in (decoy, gold): + await service.index_entity(entity) + await _sync_vectors(service, [decoy.id, gold.id]) + + results = await service.search( + SearchQuery( + text="What are Joanna's hobbies?", + retrieval_mode=SearchRetrievalMode.HYBRID, + ), + limit=10, + ) + + entity_ids = [r.entity_id for r in results] + assert gold.id in entity_ids and decoy.id in entity_ids + # With the boost off, the higher-similarity decoy ranks ahead of the gold doc. + assert entity_ids.index(decoy.id) < entity_ids.index(gold.id) diff --git a/tests/repository/test_hybrid_fusion.py b/tests/repository/test_hybrid_fusion.py index 0a4a2c290..fee8b69d0 100644 --- a/tests/repository/test_hybrid_fusion.py +++ b/tests/repository/test_hybrid_fusion.py @@ -134,6 +134,115 @@ def _fake_embedding_provider() -> EmbeddingProvider: ) +def _hybrid_kwargs(**overrides: Any) -> dict[str, Any]: + """Return HYBRID_KWARGS with overrides applied, typed as dict[str, Any]. + + Keeps the splat into the keyword-only _search_hybrid signature type-clean. + """ + merged: dict[str, Any] = {**HYBRID_KWARGS, **overrides} + return merged + + +@pytest.mark.asyncio +async def test_entity_boost_promotes_matching_doc_when_enabled(): + """With entity boost enabled, an entity-matching doc outranks a higher-similarity + non-matching doc. + + Reproduces the #951 cross-conversation confusion: a generic same-topic document + (higher raw similarity) initially outranks the gold doc whose title names the + queried entity. Enabling the boost flips the order. + """ + repo = ConcreteSearchRepo() + repo._entity_boost_enabled = True + repo._entity_boost_weight = 0.15 + repo._entity_boost_max_terms = 3 + + # Row 1: generic hobbies doc from the wrong conversation, higher vector similarity. + # Row 2: the gold doc whose title names the queried entity "Joanna". + fts_results = [] + vector_results = [ + FakeRow(id=1, score=0.80, title="Hobbies and pastimes"), + FakeRow(id=2, score=0.72, title="Joanna profile"), + ] + + with ( + patch.object(repo, "search", new_callable=AsyncMock, return_value=fts_results), + patch.object( + repo, "_search_vector_only", new_callable=AsyncMock, return_value=vector_results + ), + ): + results = await repo._search_hybrid( + **_hybrid_kwargs(search_text="What are Joanna's hobbies?") + ) + + # Boost: row 2 -> 0.72 * 1.15 = 0.828 > row 1's 0.80 + assert [r.id for r in results] == [2, 1] + assert results[0].score == pytest.approx(0.72 * 1.15, rel=1e-6) + assert results[1].score == pytest.approx(0.80, rel=1e-6) + + +@pytest.mark.asyncio +async def test_entity_boost_disabled_preserves_ordering(): + """With entity boost disabled (default), ordering matches pure similarity.""" + repo = ConcreteSearchRepo() + # Defaults from the base class keep boosting off; assert explicitly. + assert repo._entity_boost_enabled is False + + fts_results = [] + vector_results = [ + FakeRow(id=1, score=0.80, title="Hobbies and pastimes"), + FakeRow(id=2, score=0.72, title="Joanna profile"), + ] + + with ( + patch.object(repo, "search", new_callable=AsyncMock, return_value=fts_results), + patch.object( + repo, "_search_vector_only", new_callable=AsyncMock, return_value=vector_results + ), + ): + results = await repo._search_hybrid( + **_hybrid_kwargs(search_text="What are Joanna's hobbies?") + ) + + # No boost: original similarity order is preserved, scores unchanged. + assert [r.id for r in results] == [1, 2] + assert results[0].score == pytest.approx(0.80, rel=1e-6) + assert results[1].score == pytest.approx(0.72, rel=1e-6) + + +@pytest.mark.asyncio +async def test_entity_boost_promotes_doc_into_limited_window(): + """Boosting runs before the limit cut, so a matching doc ranked below the cutoff + can be promoted into the returned window.""" + repo = ConcreteSearchRepo() + repo._entity_boost_enabled = True + repo._entity_boost_weight = 0.6 + repo._entity_boost_max_terms = 3 + + fts_results = [] + # Three non-matching docs above the gold doc, which matches "Anthony". + vector_results = [ + FakeRow(id=1, score=0.90, title="conversation six"), + FakeRow(id=2, score=0.85, title="conversation one"), + FakeRow(id=3, score=0.60, title="Anthony introduces himself"), + ] + + with ( + patch.object(repo, "search", new_callable=AsyncMock, return_value=fts_results), + patch.object( + repo, "_search_vector_only", new_callable=AsyncMock, return_value=vector_results + ), + ): + results = await repo._search_hybrid( + **_hybrid_kwargs(search_text="Who is Anthony?", limit=1) + ) + + # Gold doc boost: 0.60 * 1.6 = 0.96 > row 1's 0.90, so it is promoted into the + # top-1 window even though it was ranked third before boosting. + assert len(results) == 1 + assert results[0].id == 3 + + @pytest.mark.asyncio async def test_high_fts_score_boosts_ranking(): """FTS-only: a high normalized score should outscore a low normalized score.""" diff --git a/tests/repository/test_semantic_search_base.py b/tests/repository/test_semantic_search_base.py index b75f88716..97ce650aa 100644 --- a/tests/repository/test_semantic_search_base.py +++ b/tests/repository/test_semantic_search_base.py @@ -809,3 +809,182 @@ def _capture_info(message: str, **kwargs): assert runtime_logs[0]["threads"] == 4 assert runtime_logs[0]["configured_parallel"] == 2 assert runtime_logs[0]["effective_parallel"] == 2 + + +# --- Entity-aware ranking boost (#951) --- + + +def _make_index_row( + *, + row_id: int, + title: str, + row_type: str = SearchItemType.ENTITY.value, +) -> SearchIndexRow: + """Build a real SearchIndexRow for entity-boost matching tests.""" + now = datetime(2026, 1, 1) + return SearchIndexRow( + project_id=1, + id=row_id, + type=row_type, + file_path=f"notes/{row_id}.md", + created_at=now, + updated_at=now, + permalink=f"notes/{row_id}", + title=title, + ) + + +class TestExtractQueryEntityTerms: + """Verify proper-noun extraction from query strings.""" + + def test_extracts_single_proper_noun(self): + terms = SearchRepositoryBase._extract_query_entity_terms("What are Joanna's hobbies?") + assert terms == {"joanna"} + + def test_extracts_multiple_proper_nouns(self): + terms = SearchRepositoryBase._extract_query_entity_terms( + "What symbolic gifts do Deborah and Jolene have from their mothers?" + ) + assert terms == {"deborah", "jolene"} + + def test_who_is_anthony(self): + terms = SearchRepositoryBase._extract_query_entity_terms("Who is Anthony?") + assert terms == {"anthony"} + + def test_all_lowercase_query_has_no_entity_terms(self): + terms = SearchRepositoryBase._extract_query_entity_terms("what is the weather today") + assert terms == set() + + def test_capitalized_stopword_at_sentence_start_is_ignored(self): + # "What" and "Who" are capitalized interrogatives, not entity names. + terms = SearchRepositoryBase._extract_query_entity_terms("What does Sarah like?") + assert terms == {"sarah"} + + def test_all_caps_token_is_extracted(self): + terms = SearchRepositoryBase._extract_query_entity_terms("who works at NASA") + assert terms == {"nasa"} + + def test_possessive_is_stripped(self): + terms = SearchRepositoryBase._extract_query_entity_terms("Joanna's mother") + assert terms == {"joanna"} + + def test_single_capital_letter_is_ignored(self): + # "A" lowercases to a stopword; "X" is a non-stopword single letter that + # still carries no entity signal and must be dropped by the length guard. + terms = SearchRepositoryBase._extract_query_entity_terms("A is for X and Apple") + assert terms == {"apple"} + + def test_empty_and_none_inputs(self): + assert SearchRepositoryBase._extract_query_entity_terms("") == set() + assert SearchRepositoryBase._extract_query_entity_terms(None) == set() + + +class TestRowEntityMatchCount: + """Verify lexical match counting between query terms and row entity names.""" + + def test_title_match_counts_one(self): + row = _make_index_row(row_id=1, title="Joanna's Hobbies") + assert SearchRepositoryBase._row_entity_match_count(row, {"joanna"}) == 1 + + def test_relation_title_matches_both_endpoints(self): + row = _make_index_row( + row_id=2, + title="Deborah -> Jolene", + row_type=SearchItemType.RELATION.value, + ) + count = SearchRepositoryBase._row_entity_match_count(row, {"deborah", "jolene"}) + assert count == 2 + + def test_no_match_returns_zero(self): + row = _make_index_row(row_id=3, title="Anthony Profile") + assert SearchRepositoryBase._row_entity_match_count(row, {"joanna"}) == 0 + + def test_match_is_case_insensitive(self): + row = _make_index_row(row_id=4, title="JOANNA notes") + assert SearchRepositoryBase._row_entity_match_count(row, {"joanna"}) == 1 + + def test_empty_terms_returns_zero(self): + row = _make_index_row(row_id=5, title="Joanna") + assert SearchRepositoryBase._row_entity_match_count(row, set()) == 0 + + def test_missing_title_returns_zero(self): + row = _make_index_row(row_id=6, title="") + row.title = None + assert SearchRepositoryBase._row_entity_match_count(row, {"joanna"}) == 0 + + def test_distinct_terms_not_double_counted(self): + # A title containing the same term twice still counts as one distinct match. + row = _make_index_row(row_id=7, title="Joanna and Joanna") + assert SearchRepositoryBase._row_entity_match_count(row, {"joanna"}) == 1 + + +class TestApplyEntityBoost: + """Verify the entity-boost score math and gating.""" + + def _repo(self, *, enabled: bool, weight: float = 0.15, max_terms: int = 3) -> _ConcreteRepo: + repo = _ConcreteRepo() + repo._entity_boost_enabled = enabled + repo._entity_boost_weight = weight + repo._entity_boost_max_terms = max_terms + return repo + + def test_disabled_returns_scores_unchanged(self): + repo = self._repo(enabled=False) + key = ("entity", 1) + rows = {key: _make_index_row(row_id=1, title="Joanna")} + scores = {key: 0.5} + assert repo._apply_entity_boost(scores, rows, {"joanna"}) == {key: 0.5} + + def test_matching_row_is_boosted(self): + repo = self._repo(enabled=True, weight=0.2) + key = ("entity", 1) + rows = {key: _make_index_row(row_id=1, title="Joanna")} + boosted = repo._apply_entity_boost({key: 0.5}, rows, {"joanna"}) + assert boosted[key] == pytest.approx(0.5 * 1.2) + + def test_non_matching_row_unchanged(self): + repo = self._repo(enabled=True, weight=0.2) + key = ("entity", 1) + rows = {key: _make_index_row(row_id=1, title="Anthony")} + boosted = repo._apply_entity_boost({key: 0.5}, rows, {"joanna"}) + assert boosted[key] == pytest.approx(0.5) + + def test_boost_can_reorder_lower_scored_match_above_higher_non_match(self): + repo = self._repo(enabled=True, weight=0.5) + generic_key = ("entity", 1) + joanna_key = ("entity", 2) + rows = { + generic_key: _make_index_row(row_id=1, title="Generic topic about hobbies"), + joanna_key: _make_index_row(row_id=2, title="Joanna"), + } + # The generic row starts higher (0.6) but does not match; "Joanna" (0.5) matches. + boosted = repo._apply_entity_boost({generic_key: 0.6, joanna_key: 0.5}, rows, {"joanna"}) + assert boosted[joanna_key] > boosted[generic_key] + + def test_multiple_matches_scale_boost(self): + repo = self._repo(enabled=True, weight=0.1, max_terms=3) + key = ("relation", 1) + rows = {key: _make_index_row(row_id=1, title="Deborah -> Jolene")} + boosted = repo._apply_entity_boost({key: 1.0}, rows, {"deborah", "jolene"}) + # Two matched terms -> 1 + 0.1 * 2 = 1.2 + assert boosted[key] == pytest.approx(1.2) + + def test_max_terms_caps_the_boost(self): + repo = self._repo(enabled=True, weight=0.1, max_terms=1) + key = ("relation", 1) + rows = {key: _make_index_row(row_id=1, title="Deborah -> Jolene")} + boosted = repo._apply_entity_boost({key: 1.0}, rows, {"deborah", "jolene"}) + # Capped at 1 term -> 1 + 0.1 * 1 = 1.1 + assert boosted[key] == pytest.approx(1.1) + + def test_zero_weight_is_noop(self): + repo = self._repo(enabled=True, weight=0.0) + key = ("entity", 1) + rows = {key: _make_index_row(row_id=1, title="Joanna")} + assert repo._apply_entity_boost({key: 0.5}, rows, {"joanna"}) == {key: 0.5} + + def test_empty_entity_terms_is_noop(self): + repo = self._repo(enabled=True, weight=0.2) + key = ("entity", 1) + rows = {key: _make_index_row(row_id=1, title="Joanna")} + assert repo._apply_entity_boost({key: 0.5}, rows, set()) == {key: 0.5} From de8eac6638997ceb42a680a8cd4706d9b65deca4 Mon Sep 17 00:00:00 2001 From: Drew Cain Date: Thu, 11 Jun 2026 09:36:26 -0500 Subject: [PATCH 2/3] test(core): pass stub embedding provider to SQLiteSearchRepository at construction time On the SQLite path, SQLiteSearchRepository.__init__ calls create_embedding_provider() when semantic_search_enabled=True and no provider is supplied, which could pull FastEmbed/cache artifacts or fail where semantic deps are unusable. The Postgres branch already passed the stub provider at construction; bring the SQLite branch in line by passing embedding_provider=provider to the constructor and removing the redundant after-the-fact attribute swaps. Co-Authored-By: Claude Signed-off-by: Drew Cain --- tests/repository/test_entity_boost_search_service.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/repository/test_entity_boost_search_service.py b/tests/repository/test_entity_boost_search_service.py index 2426a44b7..3e5d22e1c 100644 --- a/tests/repository/test_entity_boost_search_service.py +++ b/tests/repository/test_entity_boost_search_service.py @@ -105,15 +105,14 @@ async def _build_search_service( embedding_provider=provider, ) else: + # Pass the stub provider at construction time so __init__ does not + # instantiate the real configured provider when semantic_search_enabled=True. repo = SQLiteSearchRepository( session_maker, project_id=test_project.id, app_config=app_config, + embedding_provider=provider, ) - repo._semantic_enabled = True - repo._embedding_provider = provider - repo._vector_dimensions = provider.dimensions - repo._vector_tables_initialized = False search_repo = repo service = SearchService(search_repo, entity_repository, file_service) From d175879940668c7202a75550fb4db3c114249841 Mon Sep 17 00:00:00 2001 From: Drew Cain Date: Thu, 11 Jun 2026 15:39:24 -0500 Subject: [PATCH 3/3] docs(core): record entity-boost benchmark findings; keep default off Benchmarked the #951 entity-aware ranking boost against the LoCoMo retrieval suite (hybrid mode) and a hand-built adversarial corpus. LoCoMo is insensitive to the boost: sweeping the weight across 0.15/0.3/0.5/1.0/2.0 produced identical recall@5, recall@10, MRR, and content-hit at every point (no query reordered, no score changed). LoCoMo docs are keyed by session id and expose speaker names only in body text, never as entity titles or relation names, so the title/relation-matching boost never fires there. An adversarial check found a real regression mode: Title-Case queries inject spurious entity terms. 'What Is The Plan For Q3' extracts 'Q3' and, even at weight 0.15, promotes a literal-'Q3' document over the more relevant 'third quarter' document. Clean proper nouns (Katze) work; lowercase-leading identifiers (getUserById) are correctly ignored. Decision: keep search_entity_boost_enabled default off and the weight at 0.15. LoCoMo provides no signal to raise the weight, and the adversarial check is not clean. Document the findings and guidance; no code/default changes. Co-Authored-By: Claude Signed-off-by: Drew Cain --- docs/semantic-search.md | 38 +++++++++++++++++++++++++++++++++++--- src/basic_memory/config.py | 8 ++++++-- 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/docs/semantic-search.md b/docs/semantic-search.md index 25cc8794e..205031b41 100644 --- a/docs/semantic-search.md +++ b/docs/semantic-search.md @@ -107,7 +107,7 @@ All settings are fields on `BasicMemoryConfig` and can be set via environment va | `semantic_embedding_document_input_type` | `BASIC_MEMORY_SEMANTIC_EMBEDDING_DOCUMENT_INPUT_TYPE` | Auto for known LiteLLM models | Optional LiteLLM `input_type` for indexed document/passages. | | `semantic_embedding_query_input_type` | `BASIC_MEMORY_SEMANTIC_EMBEDDING_QUERY_INPUT_TYPE` | Auto for known LiteLLM models | Optional LiteLLM `input_type` for search queries. | | `semantic_vector_k` | `BASIC_MEMORY_SEMANTIC_VECTOR_K` | `100` | Candidate count for vector nearest-neighbour retrieval. Higher values improve recall at the cost of latency. | -| `search_entity_boost_enabled` | `BASIC_MEMORY_SEARCH_ENTITY_BOOST_ENABLED` | `false` | Enable the entity-aware ranking boost in hybrid search (see below). Default off pending benchmark validation. | +| `search_entity_boost_enabled` | `BASIC_MEMORY_SEARCH_ENTITY_BOOST_ENABLED` | `false` | Enable the entity-aware ranking boost in hybrid search (see below). Default off: benchmark-validated as inert on LoCoMo and prone to Title-Case false positives. | | `search_entity_boost_weight` | `BASIC_MEMORY_SEARCH_ENTITY_BOOST_WEIGHT` | `0.15` | Per-matched-term multiplier strength for the entity boost. A candidate matching N query entity terms is scaled by `1 + weight * min(N, max_terms)`. | | `search_entity_boost_max_terms` | `BASIC_MEMORY_SEARCH_ENTITY_BOOST_MAX_TERMS` | `3` | Maximum number of distinct matched entity terms that contribute to the boost, bounding the multiplier. | @@ -143,8 +143,40 @@ export BASIC_MEMORY_SEARCH_ENTITY_BOOST_WEIGHT=0.15 export BASIC_MEMORY_SEARCH_ENTITY_BOOST_MAX_TERMS=3 ``` -> **Default off.** This setting is disabled by default pending LoCoMo benchmark -> validation. Enable it to experiment with entity-heavy corpora. +> **Default off.** This setting is disabled by default. See the benchmark +> findings below for why the default stays off and where the boost helps. + +### Benchmark findings + +The boost was benchmarked against LoCoMo (the +[basic-memory-benchmarks](https://github.com/basicmachines-co/basic-memory-benchmarks) +retrieval suite, hybrid mode) and a hand-built adversarial corpus. Two results +drove the decision to keep the default **off** and leave the weight at `0.15`: + +1. **LoCoMo is insensitive to the boost.** Sweeping the weight across + `0.15, 0.3, 0.5, 1.0, 2.0` produced *identical* recall@5, recall@10, MRR, and + content-hit at every point — no query reordered, no score changed. LoCoMo's + documents are titled by conversation/session id and expose speaker names only + in body text, never as entity titles or relation names. Because the boost + matches query proper nouns against a candidate's **title or linked relation + names**, it never fires on this corpus. LoCoMo therefore provides no signal to + raise the weight, and the boost neither helps nor harms it. + +2. **A capitalization-only heuristic has false positives.** On a corpus where + entity terms appear in titles, the boost correctly promotes the right document + for clean proper nouns (e.g. `Katze`) and is correctly inert on + lowercase-leading identifiers (e.g. `getUserById`, ignored). But **Title-Case + queries can regress**: a query like `What Is The Plan For Q3` extracts `Q3` as + an entity term, and even at weight `0.15` it promotes a document that + *literally* contains "Q3" above the more relevant document that says "third + quarter". Since entity detection is lexical (capitalization, no NER), any + capitalized non-entity token in a query is a potential false positive. + +**Guidance.** Enable the boost only on entity-heavy corpora where your queries +name entities that are themselves note titles or linked relations (the #951 +"Joanna" case). Prefer natural-case queries (`What are Joanna's hobbies?`) over +Title-Cased phrasing, which can inject spurious entity terms. Leave it off for +conversational / body-text-keyed corpora like LoCoMo, where it cannot help. ## Embedding Providers diff --git a/src/basic_memory/config.py b/src/basic_memory/config.py index 0f51ecac5..df3e0918e 100644 --- a/src/basic_memory/config.py +++ b/src/basic_memory/config.py @@ -355,13 +355,17 @@ def __init__(self, **data: Any) -> None: ... # document about a different entity. # Outcome: when enabled, hybrid fusion multiplies a candidate's fused score by a small # bonus for each distinct query entity term it matches lexically (no model inference). - # Default OFF pending LoCoMo benchmark validation by the maintainer. + # Default OFF: LoCoMo benchmarking showed the boost is inert there (its docs are keyed + # by session id, not entity titles) and an adversarial check found Title-Case queries + # can inject spurious entity terms (e.g. "Q3") that regress ranking. See + # docs/semantic-search.md "Benchmark findings". search_entity_boost_enabled: bool = Field( default=False, description="Enable entity-aware ranking boost in hybrid search. When enabled, " "hybrid candidates whose title or linked relation names contain a proper-noun " "term from the query are boosted in the final ranking. Lexical-only; adds no " - "model inference. Default off pending benchmark validation.", + "model inference. Default off: benchmark-validated as inert on LoCoMo and prone " + "to Title-Case false positives (see docs/semantic-search.md).", ) search_entity_boost_weight: float = Field( default=0.15,