basicmachines-co · groksrc · Jun 13, 2026 · Jun 13, 2026 · chatgpt-codex-connector · Jun 13, 2026
diff --git a/src/basic_memory/repository/postgres_search_repository.py b/src/basic_memory/repository/postgres_search_repository.py
@@ -18,6 +18,7 @@
 from basic_memory.repository.search_repository_base import (
     SearchRepositoryBase,
     VectorChunkState,
+    relaxed_query_words,
 )
 from basic_memory.repository.metadata_filters import parse_metadata_filters
 from basic_memory.repository.semantic_errors import SemanticDependenciesMissingError
@@ -176,6 +177,14 @@ def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
         # For non-Boolean queries, prepare single term
         return self._prepare_single_term(term, is_prefix)
 
+    @staticmethod
+    def _relaxed_tsquery_text(search_text: Optional[str]) -> Optional[str]:
+        """OR-relaxed tsquery expression for a failed strict query, or None."""
+        words = relaxed_query_words(search_text)
+        if not words:
+            return None
+        return " | ".join(f"{word}:*" for word in words)
+
     def _prepare_boolean_query(self, query: str) -> str:
         """Convert Boolean query to tsquery format.
 
@@ -236,7 +245,12 @@ def _prepare_single_term(self, term: str, is_prefix: bool = True) -> str:
 
         # Handle multi-word queries
         if " " in cleaned_term:
-            words = [w for w in cleaned_term.split() if w.strip()]
+            # Strip sentence punctuation from word edges so question-form
+            # queries produce clean lexemes (parity with SQLite FTS5 prep).
+            # The tsquery tokenizer ignores this punctuation anyway; leaving it
+            # in only risks tsquery syntax errors. Interior characters are kept.
+            words = [w.strip("?!.,;") for w in cleaned_term.split()]
+            words = [w for w in words if w]
             if not words:
                 # All characters were special chars, search won't match anything
                 # Return a safe search term that won't cause syntax errors
@@ -249,8 +263,11 @@ def _prepare_single_term(self, term: str, is_prefix: bool = True) -> str:
             # Join with AND operator
             return " & ".join(prepared_words)
 
-        # Single word
-        cleaned_term = cleaned_term.strip()
+        # Single word: strip edge punctuation; guard the now-empty case so a
+        # bare ":*"/"" never reaches tsquery.
+        cleaned_term = cleaned_term.strip().strip("?!.,;")
+        if not cleaned_term:
+            return "NOSPECIALCHARS:*"
         if is_prefix:
             return f"{cleaned_term}:*"
         else:
@@ -908,6 +925,7 @@ async def search(
         min_similarity: Optional[float] = None,
         limit: int = 10,
         offset: int = 0,
+        allow_relaxed: bool = False,
     ) -> List[SearchIndexRow]:
         """Search across all indexed content using PostgreSQL tsvector."""
         # --- Dispatch vector / hybrid modes (shared logic) ---
@@ -982,6 +1000,20 @@ async def search(
             async with db.scoped_session(self.session_maker) as session:
                 result = await session.execute(text(sql), params)
                 rows = result.fetchall()
+                # Trigger: multi-word natural-language query matched nothing
+                # under the default all-terms-AND tsquery semantics.
+                # Why: questions rarely have every word in one document;
+                # without relaxation the FTS half of hybrid search contributes
+                # zero candidates (parity with the SQLite path).
+                # Outcome: one retry with OR-joined prefix lexemes; ts_rank
+                # still ranks multi-term matches first.
+                relaxed = (
+                    self._relaxed_tsquery_text(search_text) if allow_relaxed and not rows else None
+                )
+                if relaxed and params.get("text"):
+                    params["text"] = relaxed
+                    result = await session.execute(text(sql), params)
+                    rows = result.fetchall()
         except Exception as e:
             if self._is_tsquery_syntax_error(e):
                 logger.warning(f"tsquery syntax error for search term: {search_text}, error: {e}")

diff --git a/src/basic_memory/repository/search_repository_base.py b/src/basic_memory/repository/search_repository_base.py
@@ -40,6 +40,36 @@
 OVERSIZED_ENTITY_VECTOR_SHARD_SIZE = 256
 _SQLITE_MAX_PREPARE_WINDOW = 8
 
+# Interrogative/function words contribute lexical noise when a strict
+# full-text query is relaxed: "when OR did OR a" matches loud wrong documents
+# that displace genuine results from the ranking window.
+RELAXATION_STOPWORDS = frozenset(
+    "a an and are as at be but by did do does for from had has have how i in is it of on "
+    "or that the their they this to was we were what when where which who whom whose why "
+    "will with you your".split()
+)
+
+
+def relaxed_query_words(search_text: Optional[str]) -> Optional[list[str]]:
+    """Content-bearing words for OR-relaxing a strict full-text query.
+
+    Returns None when relaxation must not apply: empty input, quoted phrases,
+    or explicit boolean queries (user intent is not second-guessed).
+    """
+    if not search_text:
+        return None
+    stripped = search_text.strip()
+    if '"' in stripped or any(op in f" {stripped} " for op in (" AND ", " OR ", " NOT ")):
+        return None
+    words = [word.strip("?!.,;:") for word in stripped.split()]
+    words = [
+        word
+        for word in words
+        if word and word.isalnum() and word.lower() not in RELAXATION_STOPWORDS
+    ]
+    return words or None
+
+
 # Entity, observation, and relation rows in search_index carry ids from independent
 # auto-increment sequences, so a bare id is ambiguous across row types. Every map in
 # the vector/hybrid retrieval path must key rows by (type, id) to avoid collisions.
@@ -229,6 +259,7 @@ async def search(
         min_similarity: Optional[float] = None,
         limit: int = 10,
         offset: int = 0,
+        allow_relaxed: bool = False,
     ) -> List[SearchIndexRow]:
         """Search across all indexed content.
 
@@ -2174,6 +2205,9 @@ async def _search_hybrid(
         query_start = time.perf_counter()
         candidate_limit = max(self._semantic_vector_k, (limit + offset) * 10)
         fts_start = time.perf_counter()
+        # allow_relaxed: question-form queries rarely AND-match, and a dead FTS
+        # branch silently degrades hybrid to vector-only ranking. Fusion plus
+        # bm25 keep relaxed lexical candidates from dominating precision.
         fts_results = await self.search(
             search_text=search_text,
             permalink=permalink,
@@ -2187,6 +2221,7 @@ async def _search_hybrid(
             retrieval_mode=SearchRetrievalMode.FTS,
             limit=candidate_limit,
             offset=0,
+            allow_relaxed=True,
         )
         fts_ms = (time.perf_counter() - fts_start) * 1000
         vector_start = time.perf_counter()

diff --git a/src/basic_memory/repository/sqlite_search_repository.py b/src/basic_memory/repository/sqlite_search_repository.py
@@ -23,7 +23,10 @@
 from basic_memory.repository.embedding_provider import EmbeddingProvider
 from basic_memory.repository.embedding_provider_factory import create_embedding_provider
 from basic_memory.repository.search_index_row import SearchIndexRow
-from basic_memory.repository.search_repository_base import SearchRepositoryBase
+from basic_memory.repository.search_repository_base import (
+    SearchRepositoryBase,
+    relaxed_query_words,
+)
 from basic_memory.repository.metadata_filters import parse_metadata_filters, build_sqlite_json_path
 from basic_memory.repository.semantic_errors import SemanticDependenciesMissingError
 from basic_memory.schemas.search import SearchItemType, SearchRetrievalMode
@@ -255,6 +258,19 @@ def _prepare_single_term(self, term: str, is_prefix: bool = True) -> str:
         if "*" in term and all(c.isalnum() or c in "*_-" for c in term):
             return term
 
+        # Natural-language queries arrive with sentence punctuation that FTS5
+        # treats as syntax ("When did Melanie paint a sunrise?"). The tokenizer
+        # ignores this punctuation in the INDEX, so stripping it from word
+        # edges loses nothing — but leaving it forces the whole question into
+        # an exact-phrase match that returns zero rows, silently disabling the
+        # FTS half of hybrid search. Interior characters (hyphens, slashes —
+        # permalinks and paths) are untouched.
+        if " " in term:
+            words = [word.strip("?!.,;:") for word in term.split()]
+            term = " ".join(word for word in words if word)
+            if not term:
+                return ""
+
         # Characters that can cause FTS5 syntax errors when used as operators
         # We're more conservative here - only quote when we detect problematic patterns
         problematic_chars = [
@@ -351,6 +367,14 @@ def _prepare_search_term(self, term: str, is_prefix: bool = True) -> str:
         # For non-Boolean queries, use the single term preparation logic
         return self._prepare_single_term(term, is_prefix)
 
+    @staticmethod
+    def _relaxed_fts_text(search_text: Optional[str]) -> Optional[str]:
+        """OR-relaxed FTS5 expression for a failed strict query, or None."""
+        words = relaxed_query_words(search_text)
+        if not words:
+            return None
+        return " OR ".join(f"{word}*" for word in words)
+
     # ------------------------------------------------------------------
     # sqlite-vec extension loading (SQLite-specific)
     # ------------------------------------------------------------------
@@ -953,8 +977,15 @@ async def search(
         min_similarity: Optional[float] = None,
         limit: int = 10,
         offset: int = 0,
+        allow_relaxed: bool = False,
     ) -> List[SearchIndexRow]:
-        """Search across all indexed content using SQLite FTS5."""
+        """Search across all indexed content using SQLite FTS5.
+
+        ``allow_relaxed=True`` retries a zero-result strict multi-word query
+        with OR-joined content terms. Only the hybrid path opts in: its FTS
+        branch otherwise contributes nothing for question-form queries.
+        Service-level FTS searches keep their own conservative fallback.
+        """
         # --- Dispatch vector / hybrid modes (shared logic) ---
         dispatched = await self._dispatch_retrieval_mode(
             search_text=search_text,
@@ -1021,6 +1052,21 @@ async def search(
             async with db.scoped_session(self.session_maker) as session:
                 result = await session.execute(text(sql), params)
                 rows = result.fetchall()
+                # Trigger: multi-word natural-language query matched nothing
+                # under the default all-terms-AND semantics.
+                # Why: questions ("when did X do Y") rarely have every word in
+                # one document; without relaxation the FTS half of hybrid
+                # search contributes zero candidates and ranking degrades to
+                # vector-only.
+                # Outcome: one retry with OR-joined prefix terms; bm25 still
+                # ranks multi-term matches first.
+                relaxed = (
+                    self._relaxed_fts_text(search_text) if allow_relaxed and not rows else None
+                )
+                if relaxed and params.get("text"):
+                    params["text"] = relaxed
+                    result = await session.execute(text(sql), params)
+                    rows = result.fetchall()
         except Exception as e:
             # Handle FTS5 syntax errors and provide user-friendly feedback
             if self._is_fts5_syntax_error(e):  # pragma: no cover

diff --git a/tests/repository/test_hybrid_fusion.py b/tests/repository/test_hybrid_fusion.py
@@ -77,6 +77,7 @@ async def search(
         min_similarity: Optional[float] = None,
         limit: int = 10,
         offset: int = 0,
+        allow_relaxed: bool = False,
     ) -> list[SearchIndexRow]:
         return []  # pragma: no cover
 

diff --git a/tests/repository/test_postgres_search_repository.py b/tests/repository/test_postgres_search_repository.py
@@ -1001,3 +1001,59 @@ async def test_postgres_search_categories_exact_match(session_maker, test_projec
     # Multiple categories union.
     multi = await repo.search(categories=["requirement", "decision"])
     assert {r.id for r in multi} == {70101, 70102}
+
+
+@pytest.mark.asyncio
+async def test_postgres_question_punctuation_and_relaxation(session_maker, test_project):
+    """Question-form queries must produce clean lexemes and a usable relaxation.
+
+    Parity with SQLite: sentence punctuation previously reached tsquery terms,
+    and a strict all-AND miss had no relaxed retry, silently disabling the FTS
+    half of hybrid search for natural-language questions.
+    """
+    repo = PostgresSearchRepository(session_maker, project_id=test_project.id)
+
+    # Edge punctuation stripped before lexeme formatting.
+    prepared = repo._prepare_search_term("When did Melanie paint a sunrise?")
+    assert "?" not in prepared
+    assert "sunrise:*" in prepared
+
+    # Relaxation drops stopwords and OR-joins content terms.
+    relaxed = repo._relaxed_tsquery_text("When did Melanie paint a sunrise?")
+    assert relaxed == "Melanie:* | paint:* | sunrise:*"
+
+    # User intent is not second-guessed.
+    assert repo._relaxed_tsquery_text("alpha AND beta") is None
+    assert repo._relaxed_tsquery_text('"exact phrase"') is None
+    assert repo._relaxed_tsquery_text(None) is None
+
+
+@pytest.mark.asyncio
+async def test_postgres_multiword_query_relaxes_on_strict_miss(session_maker, test_project):
+    repo = PostgresSearchRepository(session_maker, project_id=test_project.id)
+    now = datetime.now(timezone.utc)
+    await repo.index_item(
+        SearchIndexRow(
+            project_id=test_project.id,
+            id=77,
+            title="Trip plans",
+            content_stems="melanie painted a sunrise over the lake last year",
+            content_snippet="Melanie painted a sunrise over the lake last year.",
+            permalink="docs/trip-plans",
+            file_path="docs/trip-plans.md",
+            type="entity",
+            metadata={"note_type": "note"},
+            created_at=now,
+            updated_at=now,
+        )
+    )
+
+    # A content word absent from the doc ("hiking") makes the strict
+    # all-terms-AND query miss even after Postgres drops stopwords — without
+    # it, to_tsquery('english', ...) already strips "when/did/a" and matches.
+    strict = await repo.search(search_text="Did Melanie go hiking at sunrise?")
+    assert strict == []
+
+    # The hybrid FTS branch opts in; OR-relaxation surfaces the partial match.
+    results = await repo.search(search_text="Did Melanie go hiking at sunrise?", allow_relaxed=True)
+    assert any(r.id == 77 for r in results)
diff --git a/tests/repository/test_search_repository.py b/tests/repository/test_search_repository.py
@@ -1124,3 +1124,80 @@ async def test_search_categories_exact_match(search_repository, search_entity):
     # Multiple categories union: both observations come back.
     multi = await search_repository.search(categories=["requirement", "decision"])
     assert {r.id for r in multi} == {70001, 70002}
+
+
+@pytest.mark.asyncio
+async def test_question_punctuation_does_not_phrase_quote(search_repository):
+    """Sentence punctuation must not force exact-phrase matching (#hybrid-fts).
+
+    'When did Melanie paint a sunrise?' previously became the FTS5 phrase
+    '"When did Melanie paint a sunrise?"*' — zero rows for any corpus — which
+    silently disabled the FTS half of hybrid search for question queries.
+    """
+    prepared = search_repository._prepare_single_term("When did Melanie paint a sunrise?")
+    assert '"' not in prepared
+    # Prefix syntax differs by backend: FTS5 uses '*', tsquery uses ':*'.
+    if is_postgres_backend(search_repository):
+        assert "sunrise:*" in prepared
+    else:
+        assert "sunrise*" in prepared
+
+
+@pytest.mark.asyncio
+async def test_relaxed_query_drops_stopwords(search_repository):
+    """Relaxation keys on content-bearing terms in each backend's syntax."""
+    if is_postgres_backend(search_repository):
+        relaxed = search_repository._relaxed_tsquery_text("When did Melanie paint a sunrise?")
+        assert relaxed == "Melanie:* | paint:* | sunrise:*"
+    else:
+        relaxed = search_repository._relaxed_fts_text("When did Melanie paint a sunrise?")
+        assert relaxed == "Melanie* OR paint* OR sunrise*"
+
+
+@pytest.mark.asyncio
+async def test_relaxed_query_respects_user_intent(search_repository):
+    # Explicit boolean and quoted queries are not second-guessed (both backends).
+    if is_postgres_backend(search_repository):
+        relaxer = search_repository._relaxed_tsquery_text
+        single = "single:*"
+    else:
+        relaxer = search_repository._relaxed_fts_text
+        single = "single*"
+    assert relaxer("alpha AND beta") is None
+    assert relaxer('"exact phrase"') is None
+    assert relaxer("single") == single
+    assert relaxer(None) is None
+
+
+@pytest.mark.asyncio
+async def test_multiword_query_relaxes_to_or_when_strict_misses(search_repository, search_entity):
+    """A question sharing only SOME words with a doc still surfaces it."""
+    from basic_memory.repository.search_index_row import SearchIndexRow
+    from basic_memory.schemas.search import SearchItemType
+
+    row = SearchIndexRow(
+        project_id=search_repository.project_id,
+        id=search_entity.id,
+        type=SearchItemType.ENTITY.value,
+        title="Trip plans",
+        content_snippet="Melanie painted a sunrise over the lake last year.",
+        content_stems="melanie painted a sunrise over the lake last year",
+        permalink=search_entity.permalink,
+        file_path=search_entity.file_path,
+        entity_id=search_entity.id,
+        metadata={"note_type": search_entity.note_type},
+        created_at=search_entity.created_at,
+        updated_at=search_entity.updated_at,
+    )
+    await search_repository.index_item(row)
+
+    # "hiking" is absent from the doc, so strict all-terms-AND misses on both
+    # backends (Postgres's stopword stripping can't rescue it either).
+    strict = await search_repository.search(search_text="Did Melanie go hiking at sunrise?")
+    assert strict == []
+
+    # The hybrid FTS branch opts in; OR-relaxation surfaces the partial match.
+    results = await search_repository.search(
+        search_text="Did Melanie go hiking at sunrise?", allow_relaxed=True
+    )
+    assert any(r.entity_id == search_entity.id for r in results)
diff --git a/tests/repository/test_semantic_search_base.py b/tests/repository/test_semantic_search_base.py
@@ -89,6 +89,7 @@ async def search(
         min_similarity: float | None = None,
         limit: int = 10,
         offset: int = 0,
+        allow_relaxed: bool = False,
     ) -> list[SearchIndexRow]:
         return []
 

diff --git a/tests/repository/test_vector_pagination.py b/tests/repository/test_vector_pagination.py
@@ -62,6 +62,7 @@ async def search(
         min_similarity: float | None = None,
         limit: int = 10,
         offset: int = 0,
+        allow_relaxed: bool = False,
     ) -> list[SearchIndexRow]:
         return []  # pragma: no cover