From 736aa840aaa9c9383bab109c1f78ef09b2e8e9a3 Mon Sep 17 00:00:00 2001
From: Drew Cain <groksrc@gmail.com>
Date: Thu, 11 Jun 2026 01:10:06 -0500
Subject: [PATCH 1/2] feat(mcp): comma-split note_types/entity_types/categories
 in search_notes (#930)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add `parse_str_list` to utils.py — like `parse_tags` but without stripping
'#' — and wire it as the BeforeValidator for note_types, entity_types, and
categories in search_notes. This makes passing "note,task" or
'["note","task"]' work correctly instead of being wrapped as a single literal
value by coerce_list.

coerce_list is left unchanged; canvas and other callers that depend on its
wrap-single-string behaviour are unaffected.

Co-Authored-By: Claude <noreply@anthropic.com>
Signed-off-by: Drew Cain <groksrc@gmail.com>
---
 src/basic_memory/mcp/tools/search.py | 16 ++++---
 src/basic_memory/utils.py            | 49 ++++++++++++++++++++
 tests/test_coerce.py                 | 67 +++++++++++++++++++++++++++-
 3 files changed, 125 insertions(+), 7 deletions(-)

diff --git a/src/basic_memory/mcp/tools/search.py b/src/basic_memory/mcp/tools/search.py
index 08f87bc1..c6e8778d 100644
--- a/src/basic_memory/mcp/tools/search.py
+++ b/src/basic_memory/mcp/tools/search.py
@@ -14,7 +14,7 @@
 from basic_memory.utils import (
     build_canonical_permalink,
     coerce_dict,
-    coerce_list,
+    parse_str_list,
     parse_tags,
     strict_search_tags,
 )
@@ -649,24 +649,30 @@ async def search_notes(
     # Plural-vs-singular trips models constantly. Accept the singular too.
     note_types: Annotated[
         List[str] | None,
-        BeforeValidator(coerce_list),
+        # parse_str_list, not coerce_list: "note,task" must split into ["note", "task"]
+        # consistent with how tags are handled (#910/#930). coerce_list wraps the whole
+        # comma string as the single literal type ["note,task"], which matches nothing.
+        BeforeValidator(parse_str_list),
         Field(default=None, validation_alias=AliasChoices("note_types", "note_type", "types")),
         "Filter by the 'type' field in note frontmatter (e.g. 'note', 'chapter', 'person'). "
+        "Accepts a list, a comma-separated string (e.g. 'note,task'), or a JSON-array string. "
         "Case-insensitive.",
     ] = None,
     entity_types: Annotated[
         List[str] | None,
-        BeforeValidator(coerce_list),
+        BeforeValidator(parse_str_list),
         Field(default=None, validation_alias=AliasChoices("entity_types", "entity_type")),
         "Filter by knowledge graph item type: 'entity' (whole notes), 'observation', or "
         "'relation'. Defaults to 'entity'. Do NOT pass schema/frontmatter types like "
-        "'Chapter' here — use note_types instead.",
+        "'Chapter' here — use note_types instead. "
+        "Accepts a list, a comma-separated string (e.g. 'entity,observation'), or a JSON-array string.",
     ] = None,
     categories: Annotated[
         List[str] | None,
-        BeforeValidator(coerce_list),
+        BeforeValidator(parse_str_list),
         Field(default=None, validation_alias=AliasChoices("categories", "category")),
         "Filter observation results to these exact categories (e.g. ['requirement']). "
+        "Accepts a list, a comma-separated string (e.g. 'requirement,decision'), or a JSON-array string. "
         "Pair with entity_types=['observation'] to return only observations whose "
         "category matches exactly — not every row mentioning the word.",
     ] = None,
diff --git a/src/basic_memory/utils.py b/src/basic_memory/utils.py
index 05249498..43ed9490 100644
--- a/src/basic_memory/utils.py
+++ b/src/basic_memory/utils.py
@@ -600,6 +600,55 @@ def strict_search_tags(v: Any) -> Any:
     return v
 
 
+def parse_str_list(v: Any) -> List[str]:
+    """Parse a list of plain strings from various input formats.
+
+    Like parse_tags but without stripping '#' — correct for type/category params
+    where the value is a literal identifier, not a hashtag.
+
+    Handles the four input shapes that MCP clients commonly produce:
+    - None → []
+    - "note,task" → ["note", "task"] (comma-split string)
+    - '["note","task"]' → ["note", "task"] (JSON array string)
+    - ["note,task"] → ["note", "task"] (list with comma-string element)
+
+    Non-str/list/None values are returned unchanged so Pydantic can reject them
+    with a clear validation error instead of silently coercing.
+    """
+    if v is None:
+        return []
+
+    if isinstance(v, list):
+        # Trigger: a list element may itself be a comma-separated string (e.g. some MCP clients
+        #   serialise `["note,task"]` when the caller passed `note_types="note,task"`).
+        # Outcome: flatten each element by splitting on commas and stripping whitespace.
+        return [
+            item.strip()
+            for raw in v
+            if raw is not None
+            for item in str(raw).split(",")
+            if item and item.strip()
+        ]
+
+    if isinstance(v, str):
+        # Trigger: MCP clients sometimes send a JSON array string like '["note","task"]'.
+        # Outcome: parse it as JSON first, then recurse to handle the resulting list.
+        stripped = v.strip()
+        if stripped.startswith("[") and stripped.endswith("]"):
+            try:
+                parsed_json = json.loads(stripped)
+                if isinstance(parsed_json, list):
+                    return parse_str_list(parsed_json)
+            except json.JSONDecodeError:
+                pass
+
+        # Plain comma-separated string: "note,task" → ["note", "task"]
+        return [item.strip() for item in v.split(",") if item and item.strip()]
+
+    # Non-str/list/None — return unchanged so Pydantic rejects with a clear error.
+    return v  # type: ignore[return-value]
+
+
 def coerce_list(v: Any) -> Any:
     """Coerce string input to list for MCP clients that serialize lists as strings."""
     if v is None:
diff --git a/tests/test_coerce.py b/tests/test_coerce.py
index a3aebc82..1c826058 100644
--- a/tests/test_coerce.py
+++ b/tests/test_coerce.py
@@ -1,9 +1,9 @@
-"""Tests for coerce_list, coerce_dict, and strict_search_tags utility functions.
+"""Tests for coerce_list, coerce_dict, strict_search_tags, and parse_str_list utility functions.
 
 These must fail until the helpers are implemented in utils.py.
 """
 
-from basic_memory.utils import coerce_list, coerce_dict, strict_search_tags
+from basic_memory.utils import coerce_dict, coerce_list, parse_str_list, strict_search_tags
 
 
 class TestCoerceList:
@@ -113,3 +113,66 @@ def test_json_array_string_passthrough(self):
 
     def test_int_passthrough(self):
         assert coerce_dict(42) == 42
+
+
+class TestParseStrList:
+    """Tests for parse_str_list — the comma-split coercer for note_types/entity_types/categories."""
+
+    # --- None input ---
+
+    def test_none_returns_empty_list(self):
+        assert parse_str_list(None) == []
+
+    # --- Single string inputs ---
+
+    def test_single_string_wraps_as_one_element(self):
+        assert parse_str_list("note") == ["note"]
+
+    def test_comma_string_splits(self):
+        """The primary motivation for this function: "note,task" → ["note", "task"]."""
+        assert parse_str_list("note,task") == ["note", "task"]
+
+    def test_comma_string_with_spaces_strips(self):
+        assert parse_str_list("note, task, person") == ["note", "task", "person"]
+
+    # --- JSON array string inputs (MCP clients sometimes serialize lists as strings) ---
+
+    def test_json_array_string(self):
+        assert parse_str_list('["note", "task"]') == ["note", "task"]
+
+    def test_json_array_string_single_element(self):
+        assert parse_str_list('["note"]') == ["note"]
+
+    def test_json_array_string_with_comma_elements(self):
+        """JSON array where an element is itself a comma-string — flatten it."""
+        assert parse_str_list('["note,task"]') == ["note", "task"]
+
+    # --- List inputs ---
+
+    def test_plain_list_passthrough(self):
+        assert parse_str_list(["note", "task"]) == ["note", "task"]
+
+    def test_list_with_comma_element_splits(self):
+        """A list containing a comma-string is flattened."""
+        assert parse_str_list(["note,task"]) == ["note", "task"]
+
+    def test_list_with_multiple_comma_elements(self):
+        assert parse_str_list(["note,task", "person"]) == ["note", "task", "person"]
+
+    # --- No '#' stripping (unlike parse_tags) ---
+
+    def test_hash_prefix_preserved(self):
+        """parse_str_list must NOT strip '#' — these are type identifiers, not hashtags."""
+        assert parse_str_list("#type") == ["#type"]
+
+    def test_hash_prefix_in_comma_string_preserved(self):
+        assert parse_str_list("#type,#other") == ["#type", "#other"]
+
+    # --- Non-str/list/None pass through for Pydantic rejection ---
+
+    def test_int_passthrough_for_pydantic_rejection(self):
+        assert parse_str_list(42) == 42  # type: ignore[arg-type]
+
+    def test_dict_passthrough_for_pydantic_rejection(self):
+        value = {"a": 1}
+        assert parse_str_list(value) is value  # type: ignore[arg-type]

From 0f1d72893ef1d9569128d7a4ff016ba794eebf33 Mon Sep 17 00:00:00 2001
From: Drew Cain <groksrc@gmail.com>
Date: Thu, 11 Jun 2026 09:38:36 -0500
Subject: [PATCH 2/2] fix(mcp): normalize note_types/entity_types/categories on
 direct call path and reject non-string list elements

Codex review of PR #962 identified two real issues:

1. CLI bypass: the BeforeValidator(parse_str_list) on note_types, entity_types, and
   categories only fires through MCP/Pydantic validation. The CLI path in
   cli/commands/tool.py calls search_notes() directly, so `bm tool search-notes
   --type note,task` arrived as note_types=["note,task"] and matched nothing.
   Fix: add in-body parse_str_list() normalization for all three params (mirroring
   the existing parse_tags() call for tags on the same code path).

2. Silent stringify: parse_str_list used str(raw) in the list branch, so [42] became
   ["42"] before Pydantic saw it, accepting invalid input as a no-result search
   instead of rejecting it. Fix: guard against non-string list elements and return
   the original value unchanged so Pydantic rejects it with a clear error.

Tests added: annotation-level split tests for note_types/entity_types/categories,
non-string-element rejection tests, async direct-call regression for note_types,
and unit-level parse_str_list non-string list tests in test_coerce.py.

Co-Authored-By: Claude <noreply@anthropic.com>
Signed-off-by: Drew Cain <groksrc@gmail.com>
---
 src/basic_memory/mcp/tools/search.py |  11 ++
 src/basic_memory/utils.py            |   9 +-
 tests/mcp/test_tool_search.py        | 158 +++++++++++++++++++++++++++
 tests/test_coerce.py                 |  16 +++
 4 files changed, 193 insertions(+), 1 deletion(-)

diff --git a/src/basic_memory/mcp/tools/search.py b/src/basic_memory/mcp/tools/search.py
index c6e8778d..b8072f96 100644
--- a/src/basic_memory/mcp/tools/search.py
+++ b/src/basic_memory/mcp/tools/search.py
@@ -899,6 +899,17 @@ async def search_notes(
     if page_size < 1:
         raise ValueError(f"page_size must be >= 1, got {page_size}")
 
+    # Trigger: list params arrived via a direct function call instead of the MCP layer.
+    # Why: the BeforeValidator annotations only run through MCP/Pydantic validation; direct
+    #      callers (e.g. `bm tool search-notes --type note,task` in cli/commands/tool.py,
+    #      which Typer collects as the one-element list ["note,task"]) would otherwise
+    #      forward the comma string as one literal type that matches nothing (#930).
+    # Outcome: comma-split/list normalization applies on every path; parse_str_list is
+    #          idempotent, so MCP-validated input passes through unchanged.
+    note_types = parse_str_list(note_types) if note_types is not None else []
+    entity_types = parse_str_list(entity_types) if entity_types is not None else []
+    categories = parse_str_list(categories) if categories is not None else []
+
     # Avoid mutable-default-argument footguns. Treat None as "no filter".
     # Lowercase note_types so "Chapter" matches the stored "chapter".
     note_types = [t.lower() for t in note_types] if note_types else []
diff --git a/src/basic_memory/utils.py b/src/basic_memory/utils.py
index 43ed9490..5df2b1ee 100644
--- a/src/basic_memory/utils.py
+++ b/src/basic_memory/utils.py
@@ -619,6 +619,13 @@ def parse_str_list(v: Any) -> List[str]:
         return []
 
     if isinstance(v, list):
+        # Trigger: a list element is not a string (e.g. [42] or ["note", 42]).
+        # Why: str(raw) would silently convert 42 → "42" and let invalid caller data pass
+        #   Pydantic validation as a junk filter, producing a silent no-result search.
+        # Outcome: return the list unchanged so Pydantic rejects it with a clear error.
+        if not all(isinstance(raw, str) for raw in v if raw is not None):
+            return v  # type: ignore[return-value]
+
         # Trigger: a list element may itself be a comma-separated string (e.g. some MCP clients
         #   serialise `["note,task"]` when the caller passed `note_types="note,task"`).
         # Outcome: flatten each element by splitting on commas and stripping whitespace.
@@ -626,7 +633,7 @@ def parse_str_list(v: Any) -> List[str]:
             item.strip()
             for raw in v
             if raw is not None
-            for item in str(raw).split(",")
+            for item in raw.split(",")
             if item and item.strip()
         ]
 
diff --git a/tests/mcp/test_tool_search.py b/tests/mcp/test_tool_search.py
index d178a5f0..36a96716 100644
--- a/tests/mcp/test_tool_search.py
+++ b/tests/mcp/test_tool_search.py
@@ -2115,3 +2115,161 @@ def test_default_search_type_falls_back_to_text_when_semantic_disabled():
 
     with patch.object(search_module, "get_container", return_value=mock_container):
         assert search_module._default_search_type() == "text"
+
+
+# --- Tests for note_types/entity_types/categories comma-split fix (#930, Codex review) ---
+
+
+def test_search_notes_note_types_annotation_splits_comma_strings():
+    """The note_types parameter annotation must parse every documented input form (#930).
+
+    Direct function calls bypass the BeforeValidator; validate through the same
+    Annotated metadata pydantic applies on the MCP path. The old coerce_list wrapped a
+    bare comma string as the single literal type ["note,task"]; parse_str_list splits it.
+    """
+    annotation = inspect.signature(search_notes).parameters["note_types"].annotation
+    adapter = TypeAdapter(annotation)
+
+    real_list = adapter.validate_python(["note", "task"])
+    comma_string = adapter.validate_python("note,task")
+    json_string = adapter.validate_python('["note", "task"]')
+    single_string = adapter.validate_python("note")
+    comma_in_list = adapter.validate_python(["note,task"])
+
+    assert real_list == ["note", "task"]
+    assert comma_string == real_list, "comma string must behave like the real list"
+    assert json_string == real_list
+    assert single_string == ["note"]
+    assert comma_in_list == real_list, "list with comma element must be flattened"
+
+
+def test_search_notes_entity_types_annotation_splits_comma_strings():
+    """The entity_types parameter annotation must parse every documented input form (#930)."""
+    annotation = inspect.signature(search_notes).parameters["entity_types"].annotation
+    adapter = TypeAdapter(annotation)
+
+    real_list = adapter.validate_python(["entity", "observation"])
+    comma_string = adapter.validate_python("entity,observation")
+    comma_in_list = adapter.validate_python(["entity,observation"])
+
+    assert real_list == ["entity", "observation"]
+    assert comma_string == real_list
+    assert comma_in_list == real_list
+
+
+def test_search_notes_categories_annotation_splits_comma_strings():
+    """The categories parameter annotation must parse every documented input form (#930)."""
+    annotation = inspect.signature(search_notes).parameters["categories"].annotation
+    adapter = TypeAdapter(annotation)
+
+    real_list = adapter.validate_python(["requirement", "decision"])
+    comma_string = adapter.validate_python("requirement,decision")
+    comma_in_list = adapter.validate_python(["requirement,decision"])
+
+    assert real_list == ["requirement", "decision"]
+    assert comma_string == real_list
+    assert comma_in_list == real_list
+
+
+def test_search_notes_note_types_annotation_rejects_non_string_list_elements():
+    """note_types=[42] must fail Pydantic validation, not be stringified to ['42'].
+
+    parse_str_list used str(raw) to coerce list elements, silently accepting [42] as
+    ["42"]. The fix guards against non-string list elements and returns the original
+    value so Pydantic rejects it with a clear error.
+    """
+    from pydantic import ValidationError
+
+    annotation = inspect.signature(search_notes).parameters["note_types"].annotation
+    adapter = TypeAdapter(annotation)
+
+    with pytest.raises(ValidationError):
+        adapter.validate_python([42])
+    with pytest.raises(ValidationError):
+        adapter.validate_python(["note", 42])
+
+    # All-string lists remain valid.
+    assert adapter.validate_python(["note", "task"]) == ["note", "task"]
+
+
+def test_search_notes_entity_types_annotation_rejects_non_string_list_elements():
+    """entity_types=[42] must fail Pydantic validation, not be stringified."""
+    from pydantic import ValidationError
+
+    annotation = inspect.signature(search_notes).parameters["entity_types"].annotation
+    adapter = TypeAdapter(annotation)
+
+    with pytest.raises(ValidationError):
+        adapter.validate_python([42])
+    with pytest.raises(ValidationError):
+        adapter.validate_python(["entity", 42])
+
+    assert adapter.validate_python(["entity", "observation"]) == ["entity", "observation"]
+
+
+def test_search_notes_categories_annotation_rejects_non_string_list_elements():
+    """categories=[42] must fail Pydantic validation, not be stringified."""
+    from pydantic import ValidationError
+
+    annotation = inspect.signature(search_notes).parameters["categories"].annotation
+    adapter = TypeAdapter(annotation)
+
+    with pytest.raises(ValidationError):
+        adapter.validate_python([42])
+    with pytest.raises(ValidationError):
+        adapter.validate_python(["requirement", 42])
+
+    assert adapter.validate_python(["requirement", "decision"]) == ["requirement", "decision"]
+
+
+@pytest.mark.asyncio
+async def test_search_notes_direct_call_splits_comma_note_types(client, test_project):
+    """Direct callers bypass the BeforeValidator, so the body must normalize note_types.
+
+    Regression for the CLI path: `bm tool search-notes --type note,task` calls this
+    function directly with Typer's collected list ["note,task"], which must split into
+    ["note", "task"] and match the note correctly (#930, Codex review follow-up).
+    """
+    await write_note(
+        project=test_project.name,
+        title="Direct NoteType Split Note",
+        directory="test",
+        content="# Direct NoteType Split Note\nNoteTypeSplitToken body",
+    )
+
+    async def found(note_types_value: list[str] | None) -> bool:
+        result = await search_notes(
+            project=test_project.name,
+            query="NoteTypeSplitToken",
+            search_type="text",
+            output_format="json",
+            note_types=note_types_value,
+        )
+        assert isinstance(result, dict), f"search failed: {result}"
+        return any(r["title"] == "Direct NoteType Split Note" for r in result["results"])
+
+    assert await found(None), "no filter must match (sanity)"
+    assert await found(["note"]), "plain single-type list must match (sanity)"
+    # The CLI regression: Typer collects --type note,task as the single element "note,task".
+    assert await found(["note,task"]), "comma list element must be flattened and match 'note'"
+    # Negative control: a specific nonexistent type must not match.
+    assert not await found(["nonexistent_type"])
+
+
+def test_search_notes_parse_str_list_rejects_non_string_list_elements_in_place():
+    """parse_str_list must return non-str list elements unchanged for Pydantic rejection.
+
+    The old implementation used str(raw) which silently coerced [42] -> ['42'],
+    causing bad caller data to become silent no-result searches instead of a
+    clear Pydantic validation error.
+    """
+    from basic_memory.utils import parse_str_list
+
+    # Non-string list elements pass through unchanged.
+    assert parse_str_list([42]) == [42]  # type: ignore[arg-type]
+    assert parse_str_list(["ok", 42]) == ["ok", 42]  # type: ignore[arg-type]
+    assert parse_str_list([{"a": 1}]) == [{"a": 1}]  # type: ignore[arg-type]
+
+    # All-string lists still work correctly.
+    assert parse_str_list(["note", "task"]) == ["note", "task"]
+    assert parse_str_list(["note,task"]) == ["note", "task"]
diff --git a/tests/test_coerce.py b/tests/test_coerce.py
index 1c826058..91cd42cc 100644
--- a/tests/test_coerce.py
+++ b/tests/test_coerce.py
@@ -176,3 +176,19 @@ def test_int_passthrough_for_pydantic_rejection(self):
     def test_dict_passthrough_for_pydantic_rejection(self):
         value = {"a": 1}
         assert parse_str_list(value) is value  # type: ignore[arg-type]
+
+    # --- Non-string list elements pass through unchanged (Codex review fix) ---
+
+    def test_int_list_passthrough_for_pydantic_rejection(self):
+        """Lists with non-string elements must not be stringified ([42] → ['42'])."""
+        value = [42]
+        assert parse_str_list(value) is value  # type: ignore[arg-type]
+
+    def test_mixed_list_passthrough_for_pydantic_rejection(self):
+        """One non-string element poisons the whole list — no partial coercion."""
+        value = ["note", 42]
+        assert parse_str_list(value) is value  # type: ignore[arg-type]
+
+    def test_dict_list_passthrough_for_pydantic_rejection(self):
+        value = [{"a": 1}]
+        assert parse_str_list(value) is value  # type: ignore[arg-type]