From 5cdc109ef0ddc64da9f013f1f3382cf5e7805ae9 Mon Sep 17 00:00:00 2001 From: Drew Cain Date: Wed, 10 Jun 2026 10:24:20 -0500 Subject: [PATCH] test(mcp): integration coverage for colliding observation permalinks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Distinct observations sharing a category and a 200-char content prefix used to collide on their truncated synthetic permalink and the second was silently dropped from the search index (#909, fixed by #931 via a content digest suffix). These end-to-end MCP tests pin the behavior independent of the disambiguation mechanism: both observations stay searchable, and delete_note removes every index row — including disambiguated ones, which would otherwise survive as ghost rows since search_index has no FK cascade from entity. Post-delete assertions inspect the index via search_service directly because the MCP search pipeline hides rows whose entity is gone, which would mask an orphan. Refs #909 Co-Authored-By: Claude Fable 5 Signed-off-by: Drew Cain --- ...rvation_permalink_collision_integration.py | 141 ++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 test-int/mcp/test_observation_permalink_collision_integration.py diff --git a/test-int/mcp/test_observation_permalink_collision_integration.py b/test-int/mcp/test_observation_permalink_collision_integration.py new file mode 100644 index 00000000..41f11ae3 --- /dev/null +++ b/test-int/mcp/test_observation_permalink_collision_integration.py @@ -0,0 +1,141 @@ +"""Regression tests for https://github.com/basicmachines-co/basic-memory/issues/909. + +Observation content is truncated to 200 chars when building the synthetic +permalink (Postgres btree index limit), so distinct observations sharing a +category and a 200-char content prefix used to collide and the second one was +silently dropped from the search index, making it unfindable. #931 fixed this +by appending a content digest to the truncated permalink. + +These tests pin the end-to-end behavior independent of the disambiguation +mechanism: every observation stays searchable, and deleting the note removes +every index row — including rows whose permalinks needed disambiguation. +""" + +import json +from typing import Any + +import pytest +from fastmcp import Client + + +def _json_content(tool_result) -> Any: + """Parse a FastMCP tool result content block into JSON.""" + assert len(tool_result.content) == 1 + assert tool_result.content[0].type == "text" + return json.loads(tool_result.content[0].text) # pyright: ignore [reportAttributeAccessIssue] + + +@pytest.mark.asyncio +async def test_duplicate_category_content_observations_both_searchable( + mcp_server, app, test_project +): + """Both observations must be indexed even when their synthetic permalinks collide.""" + async with Client(mcp_server) as client: + prefix = "x" * 210 # > 200 so the truncated permalink prefixes are identical + content = ( + "# Dup Obs Note\n\n" + "## Observations\n" + f"- [note] {prefix} ALPHA_UNIQUE_MARKER\n" + f"- [note] {prefix} BETA_UNIQUE_MARKER\n" + ) + await client.call_tool( + "write_note", + { + "project": test_project.name, + "title": "Dup Obs Note", + "directory": "dup", + "content": content, + }, + ) + + # Both observations should be independently findable by their unique suffix + for marker in ("ALPHA_UNIQUE_MARKER", "BETA_UNIQUE_MARKER"): + result = await client.call_tool( + "search_notes", + { + "project": test_project.name, + "query": marker, + "search_type": "text", + "entity_types": ["observation"], + "output_format": "json", + }, + ) + data = _json_content(result) + snippets = [r.get("content") or "" for r in data["results"]] + assert any(marker in s for s in snippets), ( + f"observation containing {marker} was dropped from the search index " + "due to a synthetic-permalink collision (content truncated to 200 chars). " + "results=" + json.dumps(data, default=str)[:800] + ) + + +@pytest.mark.asyncio +async def test_delete_note_with_colliding_observations_leaves_no_ghost_rows( + mcp_server, app, test_project, search_service +): + """Deleting a note must clean up disambiguated observation index rows. + + search_index has no FK cascade from entity, so any index-time permalink + disambiguation must be matched by delete-time cleanup or the extra + observation survives in the search index as a ghost row pointing at the + deleted file. + + The post-delete assertions inspect the index through ``search_service`` + rather than the ``search_notes`` tool: the MCP search pipeline happens to + hide rows whose entity is gone, which would mask the orphan. + """ + from basic_memory.schemas.search import SearchQuery + + async with Client(mcp_server) as client: + prefix = "y" * 210 # > 200 so the truncated permalink prefixes are identical + content = ( + "# Ghost Obs Note\n\n" + "## Observations\n" + f"- [note] {prefix} GHOST_ALPHA_MARKER\n" + f"- [note] {prefix} GHOST_BETA_MARKER\n" + ) + await client.call_tool( + "write_note", + { + "project": test_project.name, + "title": "Ghost Obs Note", + "directory": "ghost", + "content": content, + }, + ) + + # Both observations are searchable before deletion + for marker in ("GHOST_ALPHA_MARKER", "GHOST_BETA_MARKER"): + result = await client.call_tool( + "search_notes", + { + "project": test_project.name, + "query": marker, + "search_type": "text", + "entity_types": ["observation"], + "output_format": "json", + }, + ) + data = _json_content(result) + assert any(marker in (r.get("content") or "") for r in data["results"]) + + # Both rows exist in the search index itself under distinct permalinks + index_rows = await search_service.search(SearchQuery(text="GHOST_BETA_MARKER")) + assert any(r.type == "observation" for r in index_rows) + + delete_result = await client.call_tool( + "delete_note", + { + "project": test_project.name, + "identifier": "Ghost Obs Note", + }, + ) + assert "true" in delete_result.content[0].text.lower() # pyright: ignore [reportAttributeAccessIssue] + + # No ghost rows remain in the index - including any disambiguated row + for marker in ("GHOST_ALPHA_MARKER", "GHOST_BETA_MARKER"): + index_rows = await search_service.search(SearchQuery(text=marker)) + assert index_rows == [], ( + f"search index row containing {marker} survived note deletion as a ghost: " + f"{[(r.type, r.permalink) for r in index_rows]}" + )