Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions src/basic_memory/mcp/tools/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from basic_memory.utils import (
build_canonical_permalink,
coerce_dict,
coerce_list,
parse_str_list,
parse_tags,
strict_search_tags,
)
Expand Down Expand Up @@ -649,24 +649,30 @@ async def search_notes(
# Plural-vs-singular trips models constantly. Accept the singular too.
note_types: Annotated[
List[str] | None,
BeforeValidator(coerce_list),
# parse_str_list, not coerce_list: "note,task" must split into ["note", "task"]
# consistent with how tags are handled (#910/#930). coerce_list wraps the whole
# comma string as the single literal type ["note,task"], which matches nothing.
BeforeValidator(parse_str_list),

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Normalize comma filters outside MCP validation

This validator only runs when FastMCP/Pydantic validates a tool call, but the CLI path in src/basic_memory/cli/commands/tool.py invokes mcp_search(...) directly with repeatable options. In that path bm tool search-notes --type note,task reaches the function as note_types=["note,task"], skips this BeforeValidator, and line 904 still forwards the literal type "note,task", which matches nothing. Tags already compensate with an in-body parse_tags(tags) for the same direct-call reason; these new comma-split filters need equivalent normalization in the function body.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Verified and fixed in commit 0f1d728.

Claim confirmed: BeforeValidator(parse_str_list) only fires through the MCP/Pydantic path. The CLI path (cli/commands/tool.py) calls search_notes() directly, bypassing it. bm tool search-notes --type note,task arrived as note_types=["note,task"] and matched nothing, while tags already had in-body parse_tags() normalization for the same reason.

Fix: Added in-body parse_str_list() calls for note_types, entity_types, and categories in the search_notes function body, mirroring the parse_tags(tags) pattern. The validators remain for the MCP path; the body normalization covers direct callers. parse_str_list is idempotent so already-split lists pass through unchanged.

Tests added: annotation-level comma-split tests for all three params, and an async direct-call regression test (test_search_notes_direct_call_splits_comma_note_types) that calls search_notes() directly with note_types=["note,task"] and confirms it matches a real note.

In reply to: #962 (comment)

Field(default=None, validation_alias=AliasChoices("note_types", "note_type", "types")),
"Filter by the 'type' field in note frontmatter (e.g. 'note', 'chapter', 'person'). "
"Accepts a list, a comma-separated string (e.g. 'note,task'), or a JSON-array string. "
"Case-insensitive.",
] = None,
entity_types: Annotated[
List[str] | None,
BeforeValidator(coerce_list),
BeforeValidator(parse_str_list),
Field(default=None, validation_alias=AliasChoices("entity_types", "entity_type")),
"Filter by knowledge graph item type: 'entity' (whole notes), 'observation', or "
"'relation'. Defaults to 'entity'. Do NOT pass schema/frontmatter types like "
"'Chapter' here — use note_types instead.",
"'Chapter' here — use note_types instead. "
"Accepts a list, a comma-separated string (e.g. 'entity,observation'), or a JSON-array string.",
] = None,
categories: Annotated[
List[str] | None,
BeforeValidator(coerce_list),
BeforeValidator(parse_str_list),
Field(default=None, validation_alias=AliasChoices("categories", "category")),
"Filter observation results to these exact categories (e.g. ['requirement']). "
"Accepts a list, a comma-separated string (e.g. 'requirement,decision'), or a JSON-array string. "
"Pair with entity_types=['observation'] to return only observations whose "
"category matches exactly — not every row mentioning the word.",
] = None,
Expand Down Expand Up @@ -893,6 +899,17 @@ async def search_notes(
if page_size < 1:
raise ValueError(f"page_size must be >= 1, got {page_size}")

# Trigger: list params arrived via a direct function call instead of the MCP layer.
# Why: the BeforeValidator annotations only run through MCP/Pydantic validation; direct
# callers (e.g. `bm tool search-notes --type note,task` in cli/commands/tool.py,
# which Typer collects as the one-element list ["note,task"]) would otherwise
# forward the comma string as one literal type that matches nothing (#930).
# Outcome: comma-split/list normalization applies on every path; parse_str_list is
# idempotent, so MCP-validated input passes through unchanged.
note_types = parse_str_list(note_types) if note_types is not None else []
entity_types = parse_str_list(entity_types) if entity_types is not None else []
categories = parse_str_list(categories) if categories is not None else []

# Avoid mutable-default-argument footguns. Treat None as "no filter".
# Lowercase note_types so "Chapter" matches the stored "chapter".
note_types = [t.lower() for t in note_types] if note_types else []
Expand Down
56 changes: 56 additions & 0 deletions src/basic_memory/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,62 @@ def strict_search_tags(v: Any) -> Any:
return v


def parse_str_list(v: Any) -> List[str]:
"""Parse a list of plain strings from various input formats.

Like parse_tags but without stripping '#' — correct for type/category params
where the value is a literal identifier, not a hashtag.

Handles the four input shapes that MCP clients commonly produce:
- None → []
- "note,task" → ["note", "task"] (comma-split string)
- '["note","task"]' → ["note", "task"] (JSON array string)
- ["note,task"] → ["note", "task"] (list with comma-string element)

Non-str/list/None values are returned unchanged so Pydantic can reject them
with a clear validation error instead of silently coercing.
"""
if v is None:
return []

if isinstance(v, list):
# Trigger: a list element is not a string (e.g. [42] or ["note", 42]).
# Why: str(raw) would silently convert 42 → "42" and let invalid caller data pass
# Pydantic validation as a junk filter, producing a silent no-result search.
# Outcome: return the list unchanged so Pydantic rejects it with a clear error.
if not all(isinstance(raw, str) for raw in v if raw is not None):
return v # type: ignore[return-value]

# Trigger: a list element may itself be a comma-separated string (e.g. some MCP clients
# serialise `["note,task"]` when the caller passed `note_types="note,task"`).
# Outcome: flatten each element by splitting on commas and stripping whitespace.
return [
item.strip()
for raw in v
if raw is not None
for item in raw.split(",")
if item and item.strip()
]

if isinstance(v, str):
# Trigger: MCP clients sometimes send a JSON array string like '["note","task"]'.
# Outcome: parse it as JSON first, then recurse to handle the resulting list.
stripped = v.strip()
if stripped.startswith("[") and stripped.endswith("]"):
try:
parsed_json = json.loads(stripped)
if isinstance(parsed_json, list):
return parse_str_list(parsed_json)
except json.JSONDecodeError:
pass

# Plain comma-separated string: "note,task" → ["note", "task"]
return [item.strip() for item in v.split(",") if item and item.strip()]

# Non-str/list/None — return unchanged so Pydantic rejects with a clear error.
return v # type: ignore[return-value]


def coerce_list(v: Any) -> Any:
"""Coerce string input to list for MCP clients that serialize lists as strings."""
if v is None:
Expand Down
158 changes: 158 additions & 0 deletions tests/mcp/test_tool_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2115,3 +2115,161 @@ def test_default_search_type_falls_back_to_text_when_semantic_disabled():

with patch.object(search_module, "get_container", return_value=mock_container):
assert search_module._default_search_type() == "text"


# --- Tests for note_types/entity_types/categories comma-split fix (#930, Codex review) ---


def test_search_notes_note_types_annotation_splits_comma_strings():
"""The note_types parameter annotation must parse every documented input form (#930).

Direct function calls bypass the BeforeValidator; validate through the same
Annotated metadata pydantic applies on the MCP path. The old coerce_list wrapped a
bare comma string as the single literal type ["note,task"]; parse_str_list splits it.
"""
annotation = inspect.signature(search_notes).parameters["note_types"].annotation
adapter = TypeAdapter(annotation)

real_list = adapter.validate_python(["note", "task"])
comma_string = adapter.validate_python("note,task")
json_string = adapter.validate_python('["note", "task"]')
single_string = adapter.validate_python("note")
comma_in_list = adapter.validate_python(["note,task"])

assert real_list == ["note", "task"]
assert comma_string == real_list, "comma string must behave like the real list"
assert json_string == real_list
assert single_string == ["note"]
assert comma_in_list == real_list, "list with comma element must be flattened"


def test_search_notes_entity_types_annotation_splits_comma_strings():
"""The entity_types parameter annotation must parse every documented input form (#930)."""
annotation = inspect.signature(search_notes).parameters["entity_types"].annotation
adapter = TypeAdapter(annotation)

real_list = adapter.validate_python(["entity", "observation"])
comma_string = adapter.validate_python("entity,observation")
comma_in_list = adapter.validate_python(["entity,observation"])

assert real_list == ["entity", "observation"]
assert comma_string == real_list
assert comma_in_list == real_list


def test_search_notes_categories_annotation_splits_comma_strings():
"""The categories parameter annotation must parse every documented input form (#930)."""
annotation = inspect.signature(search_notes).parameters["categories"].annotation
adapter = TypeAdapter(annotation)

real_list = adapter.validate_python(["requirement", "decision"])
comma_string = adapter.validate_python("requirement,decision")
comma_in_list = adapter.validate_python(["requirement,decision"])

assert real_list == ["requirement", "decision"]
assert comma_string == real_list
assert comma_in_list == real_list


def test_search_notes_note_types_annotation_rejects_non_string_list_elements():
"""note_types=[42] must fail Pydantic validation, not be stringified to ['42'].

parse_str_list used str(raw) to coerce list elements, silently accepting [42] as
["42"]. The fix guards against non-string list elements and returns the original
value so Pydantic rejects it with a clear error.
"""
from pydantic import ValidationError

annotation = inspect.signature(search_notes).parameters["note_types"].annotation
adapter = TypeAdapter(annotation)

with pytest.raises(ValidationError):
adapter.validate_python([42])
with pytest.raises(ValidationError):
adapter.validate_python(["note", 42])

# All-string lists remain valid.
assert adapter.validate_python(["note", "task"]) == ["note", "task"]


def test_search_notes_entity_types_annotation_rejects_non_string_list_elements():
"""entity_types=[42] must fail Pydantic validation, not be stringified."""
from pydantic import ValidationError

annotation = inspect.signature(search_notes).parameters["entity_types"].annotation
adapter = TypeAdapter(annotation)

with pytest.raises(ValidationError):
adapter.validate_python([42])
with pytest.raises(ValidationError):
adapter.validate_python(["entity", 42])

assert adapter.validate_python(["entity", "observation"]) == ["entity", "observation"]


def test_search_notes_categories_annotation_rejects_non_string_list_elements():
"""categories=[42] must fail Pydantic validation, not be stringified."""
from pydantic import ValidationError

annotation = inspect.signature(search_notes).parameters["categories"].annotation
adapter = TypeAdapter(annotation)

with pytest.raises(ValidationError):
adapter.validate_python([42])
with pytest.raises(ValidationError):
adapter.validate_python(["requirement", 42])

assert adapter.validate_python(["requirement", "decision"]) == ["requirement", "decision"]


@pytest.mark.asyncio
async def test_search_notes_direct_call_splits_comma_note_types(client, test_project):
"""Direct callers bypass the BeforeValidator, so the body must normalize note_types.

Regression for the CLI path: `bm tool search-notes --type note,task` calls this
function directly with Typer's collected list ["note,task"], which must split into
["note", "task"] and match the note correctly (#930, Codex review follow-up).
"""
await write_note(
project=test_project.name,
title="Direct NoteType Split Note",
directory="test",
content="# Direct NoteType Split Note\nNoteTypeSplitToken body",
)

async def found(note_types_value: list[str] | None) -> bool:
result = await search_notes(
project=test_project.name,
query="NoteTypeSplitToken",
search_type="text",
output_format="json",
note_types=note_types_value,
)
assert isinstance(result, dict), f"search failed: {result}"
return any(r["title"] == "Direct NoteType Split Note" for r in result["results"])

assert await found(None), "no filter must match (sanity)"
assert await found(["note"]), "plain single-type list must match (sanity)"
# The CLI regression: Typer collects --type note,task as the single element "note,task".
assert await found(["note,task"]), "comma list element must be flattened and match 'note'"
# Negative control: a specific nonexistent type must not match.
assert not await found(["nonexistent_type"])


def test_search_notes_parse_str_list_rejects_non_string_list_elements_in_place():
"""parse_str_list must return non-str list elements unchanged for Pydantic rejection.

The old implementation used str(raw) which silently coerced [42] -> ['42'],
causing bad caller data to become silent no-result searches instead of a
clear Pydantic validation error.
"""
from basic_memory.utils import parse_str_list

# Non-string list elements pass through unchanged.
assert parse_str_list([42]) == [42] # type: ignore[arg-type]
assert parse_str_list(["ok", 42]) == ["ok", 42] # type: ignore[arg-type]
assert parse_str_list([{"a": 1}]) == [{"a": 1}] # type: ignore[arg-type]

# All-string lists still work correctly.
assert parse_str_list(["note", "task"]) == ["note", "task"]
assert parse_str_list(["note,task"]) == ["note", "task"]
83 changes: 81 additions & 2 deletions tests/test_coerce.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Tests for coerce_list, coerce_dict, and strict_search_tags utility functions.
"""Tests for coerce_list, coerce_dict, strict_search_tags, and parse_str_list utility functions.

These must fail until the helpers are implemented in utils.py.
"""

from basic_memory.utils import coerce_list, coerce_dict, strict_search_tags
from basic_memory.utils import coerce_dict, coerce_list, parse_str_list, strict_search_tags


class TestCoerceList:
Expand Down Expand Up @@ -113,3 +113,82 @@ def test_json_array_string_passthrough(self):

def test_int_passthrough(self):
assert coerce_dict(42) == 42


class TestParseStrList:
"""Tests for parse_str_list — the comma-split coercer for note_types/entity_types/categories."""

# --- None input ---

def test_none_returns_empty_list(self):
assert parse_str_list(None) == []

# --- Single string inputs ---

def test_single_string_wraps_as_one_element(self):
assert parse_str_list("note") == ["note"]

def test_comma_string_splits(self):
"""The primary motivation for this function: "note,task" → ["note", "task"]."""
assert parse_str_list("note,task") == ["note", "task"]

def test_comma_string_with_spaces_strips(self):
assert parse_str_list("note, task, person") == ["note", "task", "person"]

# --- JSON array string inputs (MCP clients sometimes serialize lists as strings) ---

def test_json_array_string(self):
assert parse_str_list('["note", "task"]') == ["note", "task"]

def test_json_array_string_single_element(self):
assert parse_str_list('["note"]') == ["note"]

def test_json_array_string_with_comma_elements(self):
"""JSON array where an element is itself a comma-string — flatten it."""
assert parse_str_list('["note,task"]') == ["note", "task"]

# --- List inputs ---

def test_plain_list_passthrough(self):
assert parse_str_list(["note", "task"]) == ["note", "task"]

def test_list_with_comma_element_splits(self):
"""A list containing a comma-string is flattened."""
assert parse_str_list(["note,task"]) == ["note", "task"]

def test_list_with_multiple_comma_elements(self):
assert parse_str_list(["note,task", "person"]) == ["note", "task", "person"]

# --- No '#' stripping (unlike parse_tags) ---

def test_hash_prefix_preserved(self):
"""parse_str_list must NOT strip '#' — these are type identifiers, not hashtags."""
assert parse_str_list("#type") == ["#type"]

def test_hash_prefix_in_comma_string_preserved(self):
assert parse_str_list("#type,#other") == ["#type", "#other"]

# --- Non-str/list/None pass through for Pydantic rejection ---

def test_int_passthrough_for_pydantic_rejection(self):
assert parse_str_list(42) == 42 # type: ignore[arg-type]

def test_dict_passthrough_for_pydantic_rejection(self):
value = {"a": 1}
assert parse_str_list(value) is value # type: ignore[arg-type]

# --- Non-string list elements pass through unchanged (Codex review fix) ---

def test_int_list_passthrough_for_pydantic_rejection(self):
"""Lists with non-string elements must not be stringified ([42] → ['42'])."""
value = [42]
assert parse_str_list(value) is value # type: ignore[arg-type]

def test_mixed_list_passthrough_for_pydantic_rejection(self):
"""One non-string element poisons the whole list — no partial coercion."""
value = ["note", 42]
assert parse_str_list(value) is value # type: ignore[arg-type]

def test_dict_list_passthrough_for_pydantic_rejection(self):
value = [{"a": 1}]
assert parse_str_list(value) is value # type: ignore[arg-type]
Loading