From 736aa840aaa9c9383bab109c1f78ef09b2e8e9a3 Mon Sep 17 00:00:00 2001 From: Drew Cain Date: Thu, 11 Jun 2026 01:10:06 -0500 Subject: [PATCH 1/2] feat(mcp): comma-split note_types/entity_types/categories in search_notes (#930) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `parse_str_list` to utils.py — like `parse_tags` but without stripping '#' — and wire it as the BeforeValidator for note_types, entity_types, and categories in search_notes. This makes passing "note,task" or '["note","task"]' work correctly instead of being wrapped as a single literal value by coerce_list. coerce_list is left unchanged; canvas and other callers that depend on its wrap-single-string behaviour are unaffected. Co-Authored-By: Claude Signed-off-by: Drew Cain --- src/basic_memory/mcp/tools/search.py | 16 ++++--- src/basic_memory/utils.py | 49 ++++++++++++++++++++ tests/test_coerce.py | 67 +++++++++++++++++++++++++++- 3 files changed, 125 insertions(+), 7 deletions(-) diff --git a/src/basic_memory/mcp/tools/search.py b/src/basic_memory/mcp/tools/search.py index 08f87bc1..c6e8778d 100644 --- a/src/basic_memory/mcp/tools/search.py +++ b/src/basic_memory/mcp/tools/search.py @@ -14,7 +14,7 @@ from basic_memory.utils import ( build_canonical_permalink, coerce_dict, - coerce_list, + parse_str_list, parse_tags, strict_search_tags, ) @@ -649,24 +649,30 @@ async def search_notes( # Plural-vs-singular trips models constantly. Accept the singular too. note_types: Annotated[ List[str] | None, - BeforeValidator(coerce_list), + # parse_str_list, not coerce_list: "note,task" must split into ["note", "task"] + # consistent with how tags are handled (#910/#930). coerce_list wraps the whole + # comma string as the single literal type ["note,task"], which matches nothing. + BeforeValidator(parse_str_list), Field(default=None, validation_alias=AliasChoices("note_types", "note_type", "types")), "Filter by the 'type' field in note frontmatter (e.g. 'note', 'chapter', 'person'). " + "Accepts a list, a comma-separated string (e.g. 'note,task'), or a JSON-array string. " "Case-insensitive.", ] = None, entity_types: Annotated[ List[str] | None, - BeforeValidator(coerce_list), + BeforeValidator(parse_str_list), Field(default=None, validation_alias=AliasChoices("entity_types", "entity_type")), "Filter by knowledge graph item type: 'entity' (whole notes), 'observation', or " "'relation'. Defaults to 'entity'. Do NOT pass schema/frontmatter types like " - "'Chapter' here — use note_types instead.", + "'Chapter' here — use note_types instead. " + "Accepts a list, a comma-separated string (e.g. 'entity,observation'), or a JSON-array string.", ] = None, categories: Annotated[ List[str] | None, - BeforeValidator(coerce_list), + BeforeValidator(parse_str_list), Field(default=None, validation_alias=AliasChoices("categories", "category")), "Filter observation results to these exact categories (e.g. ['requirement']). " + "Accepts a list, a comma-separated string (e.g. 'requirement,decision'), or a JSON-array string. " "Pair with entity_types=['observation'] to return only observations whose " "category matches exactly — not every row mentioning the word.", ] = None, diff --git a/src/basic_memory/utils.py b/src/basic_memory/utils.py index 05249498..43ed9490 100644 --- a/src/basic_memory/utils.py +++ b/src/basic_memory/utils.py @@ -600,6 +600,55 @@ def strict_search_tags(v: Any) -> Any: return v +def parse_str_list(v: Any) -> List[str]: + """Parse a list of plain strings from various input formats. + + Like parse_tags but without stripping '#' — correct for type/category params + where the value is a literal identifier, not a hashtag. + + Handles the four input shapes that MCP clients commonly produce: + - None → [] + - "note,task" → ["note", "task"] (comma-split string) + - '["note","task"]' → ["note", "task"] (JSON array string) + - ["note,task"] → ["note", "task"] (list with comma-string element) + + Non-str/list/None values are returned unchanged so Pydantic can reject them + with a clear validation error instead of silently coercing. + """ + if v is None: + return [] + + if isinstance(v, list): + # Trigger: a list element may itself be a comma-separated string (e.g. some MCP clients + # serialise `["note,task"]` when the caller passed `note_types="note,task"`). + # Outcome: flatten each element by splitting on commas and stripping whitespace. + return [ + item.strip() + for raw in v + if raw is not None + for item in str(raw).split(",") + if item and item.strip() + ] + + if isinstance(v, str): + # Trigger: MCP clients sometimes send a JSON array string like '["note","task"]'. + # Outcome: parse it as JSON first, then recurse to handle the resulting list. + stripped = v.strip() + if stripped.startswith("[") and stripped.endswith("]"): + try: + parsed_json = json.loads(stripped) + if isinstance(parsed_json, list): + return parse_str_list(parsed_json) + except json.JSONDecodeError: + pass + + # Plain comma-separated string: "note,task" → ["note", "task"] + return [item.strip() for item in v.split(",") if item and item.strip()] + + # Non-str/list/None — return unchanged so Pydantic rejects with a clear error. + return v # type: ignore[return-value] + + def coerce_list(v: Any) -> Any: """Coerce string input to list for MCP clients that serialize lists as strings.""" if v is None: diff --git a/tests/test_coerce.py b/tests/test_coerce.py index a3aebc82..1c826058 100644 --- a/tests/test_coerce.py +++ b/tests/test_coerce.py @@ -1,9 +1,9 @@ -"""Tests for coerce_list, coerce_dict, and strict_search_tags utility functions. +"""Tests for coerce_list, coerce_dict, strict_search_tags, and parse_str_list utility functions. These must fail until the helpers are implemented in utils.py. """ -from basic_memory.utils import coerce_list, coerce_dict, strict_search_tags +from basic_memory.utils import coerce_dict, coerce_list, parse_str_list, strict_search_tags class TestCoerceList: @@ -113,3 +113,66 @@ def test_json_array_string_passthrough(self): def test_int_passthrough(self): assert coerce_dict(42) == 42 + + +class TestParseStrList: + """Tests for parse_str_list — the comma-split coercer for note_types/entity_types/categories.""" + + # --- None input --- + + def test_none_returns_empty_list(self): + assert parse_str_list(None) == [] + + # --- Single string inputs --- + + def test_single_string_wraps_as_one_element(self): + assert parse_str_list("note") == ["note"] + + def test_comma_string_splits(self): + """The primary motivation for this function: "note,task" → ["note", "task"].""" + assert parse_str_list("note,task") == ["note", "task"] + + def test_comma_string_with_spaces_strips(self): + assert parse_str_list("note, task, person") == ["note", "task", "person"] + + # --- JSON array string inputs (MCP clients sometimes serialize lists as strings) --- + + def test_json_array_string(self): + assert parse_str_list('["note", "task"]') == ["note", "task"] + + def test_json_array_string_single_element(self): + assert parse_str_list('["note"]') == ["note"] + + def test_json_array_string_with_comma_elements(self): + """JSON array where an element is itself a comma-string — flatten it.""" + assert parse_str_list('["note,task"]') == ["note", "task"] + + # --- List inputs --- + + def test_plain_list_passthrough(self): + assert parse_str_list(["note", "task"]) == ["note", "task"] + + def test_list_with_comma_element_splits(self): + """A list containing a comma-string is flattened.""" + assert parse_str_list(["note,task"]) == ["note", "task"] + + def test_list_with_multiple_comma_elements(self): + assert parse_str_list(["note,task", "person"]) == ["note", "task", "person"] + + # --- No '#' stripping (unlike parse_tags) --- + + def test_hash_prefix_preserved(self): + """parse_str_list must NOT strip '#' — these are type identifiers, not hashtags.""" + assert parse_str_list("#type") == ["#type"] + + def test_hash_prefix_in_comma_string_preserved(self): + assert parse_str_list("#type,#other") == ["#type", "#other"] + + # --- Non-str/list/None pass through for Pydantic rejection --- + + def test_int_passthrough_for_pydantic_rejection(self): + assert parse_str_list(42) == 42 # type: ignore[arg-type] + + def test_dict_passthrough_for_pydantic_rejection(self): + value = {"a": 1} + assert parse_str_list(value) is value # type: ignore[arg-type] From 0f1d72893ef1d9569128d7a4ff016ba794eebf33 Mon Sep 17 00:00:00 2001 From: Drew Cain Date: Thu, 11 Jun 2026 09:38:36 -0500 Subject: [PATCH 2/2] fix(mcp): normalize note_types/entity_types/categories on direct call path and reject non-string list elements Codex review of PR #962 identified two real issues: 1. CLI bypass: the BeforeValidator(parse_str_list) on note_types, entity_types, and categories only fires through MCP/Pydantic validation. The CLI path in cli/commands/tool.py calls search_notes() directly, so `bm tool search-notes --type note,task` arrived as note_types=["note,task"] and matched nothing. Fix: add in-body parse_str_list() normalization for all three params (mirroring the existing parse_tags() call for tags on the same code path). 2. Silent stringify: parse_str_list used str(raw) in the list branch, so [42] became ["42"] before Pydantic saw it, accepting invalid input as a no-result search instead of rejecting it. Fix: guard against non-string list elements and return the original value unchanged so Pydantic rejects it with a clear error. Tests added: annotation-level split tests for note_types/entity_types/categories, non-string-element rejection tests, async direct-call regression for note_types, and unit-level parse_str_list non-string list tests in test_coerce.py. Co-Authored-By: Claude Signed-off-by: Drew Cain --- src/basic_memory/mcp/tools/search.py | 11 ++ src/basic_memory/utils.py | 9 +- tests/mcp/test_tool_search.py | 158 +++++++++++++++++++++++++++ tests/test_coerce.py | 16 +++ 4 files changed, 193 insertions(+), 1 deletion(-) diff --git a/src/basic_memory/mcp/tools/search.py b/src/basic_memory/mcp/tools/search.py index c6e8778d..b8072f96 100644 --- a/src/basic_memory/mcp/tools/search.py +++ b/src/basic_memory/mcp/tools/search.py @@ -899,6 +899,17 @@ async def search_notes( if page_size < 1: raise ValueError(f"page_size must be >= 1, got {page_size}") + # Trigger: list params arrived via a direct function call instead of the MCP layer. + # Why: the BeforeValidator annotations only run through MCP/Pydantic validation; direct + # callers (e.g. `bm tool search-notes --type note,task` in cli/commands/tool.py, + # which Typer collects as the one-element list ["note,task"]) would otherwise + # forward the comma string as one literal type that matches nothing (#930). + # Outcome: comma-split/list normalization applies on every path; parse_str_list is + # idempotent, so MCP-validated input passes through unchanged. + note_types = parse_str_list(note_types) if note_types is not None else [] + entity_types = parse_str_list(entity_types) if entity_types is not None else [] + categories = parse_str_list(categories) if categories is not None else [] + # Avoid mutable-default-argument footguns. Treat None as "no filter". # Lowercase note_types so "Chapter" matches the stored "chapter". note_types = [t.lower() for t in note_types] if note_types else [] diff --git a/src/basic_memory/utils.py b/src/basic_memory/utils.py index 43ed9490..5df2b1ee 100644 --- a/src/basic_memory/utils.py +++ b/src/basic_memory/utils.py @@ -619,6 +619,13 @@ def parse_str_list(v: Any) -> List[str]: return [] if isinstance(v, list): + # Trigger: a list element is not a string (e.g. [42] or ["note", 42]). + # Why: str(raw) would silently convert 42 → "42" and let invalid caller data pass + # Pydantic validation as a junk filter, producing a silent no-result search. + # Outcome: return the list unchanged so Pydantic rejects it with a clear error. + if not all(isinstance(raw, str) for raw in v if raw is not None): + return v # type: ignore[return-value] + # Trigger: a list element may itself be a comma-separated string (e.g. some MCP clients # serialise `["note,task"]` when the caller passed `note_types="note,task"`). # Outcome: flatten each element by splitting on commas and stripping whitespace. @@ -626,7 +633,7 @@ def parse_str_list(v: Any) -> List[str]: item.strip() for raw in v if raw is not None - for item in str(raw).split(",") + for item in raw.split(",") if item and item.strip() ] diff --git a/tests/mcp/test_tool_search.py b/tests/mcp/test_tool_search.py index d178a5f0..36a96716 100644 --- a/tests/mcp/test_tool_search.py +++ b/tests/mcp/test_tool_search.py @@ -2115,3 +2115,161 @@ def test_default_search_type_falls_back_to_text_when_semantic_disabled(): with patch.object(search_module, "get_container", return_value=mock_container): assert search_module._default_search_type() == "text" + + +# --- Tests for note_types/entity_types/categories comma-split fix (#930, Codex review) --- + + +def test_search_notes_note_types_annotation_splits_comma_strings(): + """The note_types parameter annotation must parse every documented input form (#930). + + Direct function calls bypass the BeforeValidator; validate through the same + Annotated metadata pydantic applies on the MCP path. The old coerce_list wrapped a + bare comma string as the single literal type ["note,task"]; parse_str_list splits it. + """ + annotation = inspect.signature(search_notes).parameters["note_types"].annotation + adapter = TypeAdapter(annotation) + + real_list = adapter.validate_python(["note", "task"]) + comma_string = adapter.validate_python("note,task") + json_string = adapter.validate_python('["note", "task"]') + single_string = adapter.validate_python("note") + comma_in_list = adapter.validate_python(["note,task"]) + + assert real_list == ["note", "task"] + assert comma_string == real_list, "comma string must behave like the real list" + assert json_string == real_list + assert single_string == ["note"] + assert comma_in_list == real_list, "list with comma element must be flattened" + + +def test_search_notes_entity_types_annotation_splits_comma_strings(): + """The entity_types parameter annotation must parse every documented input form (#930).""" + annotation = inspect.signature(search_notes).parameters["entity_types"].annotation + adapter = TypeAdapter(annotation) + + real_list = adapter.validate_python(["entity", "observation"]) + comma_string = adapter.validate_python("entity,observation") + comma_in_list = adapter.validate_python(["entity,observation"]) + + assert real_list == ["entity", "observation"] + assert comma_string == real_list + assert comma_in_list == real_list + + +def test_search_notes_categories_annotation_splits_comma_strings(): + """The categories parameter annotation must parse every documented input form (#930).""" + annotation = inspect.signature(search_notes).parameters["categories"].annotation + adapter = TypeAdapter(annotation) + + real_list = adapter.validate_python(["requirement", "decision"]) + comma_string = adapter.validate_python("requirement,decision") + comma_in_list = adapter.validate_python(["requirement,decision"]) + + assert real_list == ["requirement", "decision"] + assert comma_string == real_list + assert comma_in_list == real_list + + +def test_search_notes_note_types_annotation_rejects_non_string_list_elements(): + """note_types=[42] must fail Pydantic validation, not be stringified to ['42']. + + parse_str_list used str(raw) to coerce list elements, silently accepting [42] as + ["42"]. The fix guards against non-string list elements and returns the original + value so Pydantic rejects it with a clear error. + """ + from pydantic import ValidationError + + annotation = inspect.signature(search_notes).parameters["note_types"].annotation + adapter = TypeAdapter(annotation) + + with pytest.raises(ValidationError): + adapter.validate_python([42]) + with pytest.raises(ValidationError): + adapter.validate_python(["note", 42]) + + # All-string lists remain valid. + assert adapter.validate_python(["note", "task"]) == ["note", "task"] + + +def test_search_notes_entity_types_annotation_rejects_non_string_list_elements(): + """entity_types=[42] must fail Pydantic validation, not be stringified.""" + from pydantic import ValidationError + + annotation = inspect.signature(search_notes).parameters["entity_types"].annotation + adapter = TypeAdapter(annotation) + + with pytest.raises(ValidationError): + adapter.validate_python([42]) + with pytest.raises(ValidationError): + adapter.validate_python(["entity", 42]) + + assert adapter.validate_python(["entity", "observation"]) == ["entity", "observation"] + + +def test_search_notes_categories_annotation_rejects_non_string_list_elements(): + """categories=[42] must fail Pydantic validation, not be stringified.""" + from pydantic import ValidationError + + annotation = inspect.signature(search_notes).parameters["categories"].annotation + adapter = TypeAdapter(annotation) + + with pytest.raises(ValidationError): + adapter.validate_python([42]) + with pytest.raises(ValidationError): + adapter.validate_python(["requirement", 42]) + + assert adapter.validate_python(["requirement", "decision"]) == ["requirement", "decision"] + + +@pytest.mark.asyncio +async def test_search_notes_direct_call_splits_comma_note_types(client, test_project): + """Direct callers bypass the BeforeValidator, so the body must normalize note_types. + + Regression for the CLI path: `bm tool search-notes --type note,task` calls this + function directly with Typer's collected list ["note,task"], which must split into + ["note", "task"] and match the note correctly (#930, Codex review follow-up). + """ + await write_note( + project=test_project.name, + title="Direct NoteType Split Note", + directory="test", + content="# Direct NoteType Split Note\nNoteTypeSplitToken body", + ) + + async def found(note_types_value: list[str] | None) -> bool: + result = await search_notes( + project=test_project.name, + query="NoteTypeSplitToken", + search_type="text", + output_format="json", + note_types=note_types_value, + ) + assert isinstance(result, dict), f"search failed: {result}" + return any(r["title"] == "Direct NoteType Split Note" for r in result["results"]) + + assert await found(None), "no filter must match (sanity)" + assert await found(["note"]), "plain single-type list must match (sanity)" + # The CLI regression: Typer collects --type note,task as the single element "note,task". + assert await found(["note,task"]), "comma list element must be flattened and match 'note'" + # Negative control: a specific nonexistent type must not match. + assert not await found(["nonexistent_type"]) + + +def test_search_notes_parse_str_list_rejects_non_string_list_elements_in_place(): + """parse_str_list must return non-str list elements unchanged for Pydantic rejection. + + The old implementation used str(raw) which silently coerced [42] -> ['42'], + causing bad caller data to become silent no-result searches instead of a + clear Pydantic validation error. + """ + from basic_memory.utils import parse_str_list + + # Non-string list elements pass through unchanged. + assert parse_str_list([42]) == [42] # type: ignore[arg-type] + assert parse_str_list(["ok", 42]) == ["ok", 42] # type: ignore[arg-type] + assert parse_str_list([{"a": 1}]) == [{"a": 1}] # type: ignore[arg-type] + + # All-string lists still work correctly. + assert parse_str_list(["note", "task"]) == ["note", "task"] + assert parse_str_list(["note,task"]) == ["note", "task"] diff --git a/tests/test_coerce.py b/tests/test_coerce.py index 1c826058..91cd42cc 100644 --- a/tests/test_coerce.py +++ b/tests/test_coerce.py @@ -176,3 +176,19 @@ def test_int_passthrough_for_pydantic_rejection(self): def test_dict_passthrough_for_pydantic_rejection(self): value = {"a": 1} assert parse_str_list(value) is value # type: ignore[arg-type] + + # --- Non-string list elements pass through unchanged (Codex review fix) --- + + def test_int_list_passthrough_for_pydantic_rejection(self): + """Lists with non-string elements must not be stringified ([42] → ['42']).""" + value = [42] + assert parse_str_list(value) is value # type: ignore[arg-type] + + def test_mixed_list_passthrough_for_pydantic_rejection(self): + """One non-string element poisons the whole list — no partial coercion.""" + value = ["note", 42] + assert parse_str_list(value) is value # type: ignore[arg-type] + + def test_dict_list_passthrough_for_pydantic_rejection(self): + value = [{"a": 1}] + assert parse_str_list(value) is value # type: ignore[arg-type]