diff --git a/src/basic_memory/file_utils.py b/src/basic_memory/file_utils.py index d23626da..a55ca773 100644 --- a/src/basic_memory/file_utils.py +++ b/src/basic_memory/file_utils.py @@ -302,26 +302,76 @@ async def format_file( return None +# A frontmatter fence is a line containing exactly `---`, optionally followed by +# trailing horizontal whitespace. Anchoring to a full line (rather than a bare +# substring/`startswith`) prevents single-line content like +# `---\nstatus: active\n---\nBody` — where `\n` is a literal backslash-n, not a +# newline — from being misread as frontmatter. See issue #972. +_FENCE_RE = re.compile(r"^---[ \t]*$") + + +def _split_frontmatter(content: str) -> Optional[tuple[str, str]]: + """Split content into (yaml_block, body) when it opens with a line-anchored fence. + + The opening fence must be the very first line and the closing fence must be a + later line, each matching exactly `---` (with optional trailing whitespace). + + Returns: + A `(yaml_block, body)` tuple when a complete fenced block is present, or + ``None`` when the content does not open with a frontmatter fence. + + Raises: + ParseError: If the content opens with a fence but has no closing fence. + """ + lines = content.splitlines(keepends=True) + + # Skip leading blank lines: a document may begin with whitespace before the + # opening fence (e.g. a heredoc/dedented string starting with a newline). This + # does NOT relax line-anchoring — the opening fence must still be the first + # non-blank line, all on its own, so a single-line `---\\nstatus...` (literal + # backslash-n) is still rejected. See issue #972. + start = 0 + while start < len(lines) and lines[start].strip() == "": + start += 1 + + if start >= len(lines) or not _FENCE_RE.match(lines[start].rstrip("\r\n")): + return None + + # Find the closing fence on its own line somewhere after the opening fence. + for index in range(start + 1, len(lines)): + if _FENCE_RE.match(lines[index].rstrip("\r\n")): + yaml_block = "".join(lines[start + 1 : index]) + body = "".join(lines[index + 1 :]) + return yaml_block, body + + raise ParseError("Invalid frontmatter format") + + def has_frontmatter(content: str) -> bool: """ Check if content contains valid YAML frontmatter. + Frontmatter requires `---` fences on their own lines; an inline `---` (such as + a single-line string that merely starts with the characters `---`) is not + frontmatter. + Args: content: Content to check Returns: - True if content has valid frontmatter markers (---), False otherwise + True if content has line-anchored frontmatter fences, False otherwise """ if not content: return False # Strip BOM before checking for frontmatter markers - content = strip_bom(content).strip() - if not content.startswith("---"): + content = strip_bom(content) + try: + return _split_frontmatter(content) is not None + except ParseError: + # An opening fence with no closing fence is not usable frontmatter. return False - return "---" in content[3:] - def parse_frontmatter(content: str) -> Dict[str, Any]: """ @@ -339,17 +389,14 @@ def parse_frontmatter(content: str) -> Dict[str, Any]: try: # Strip BOM before parsing frontmatter content = strip_bom(content) - if not content.strip().startswith("---"): + split = _split_frontmatter(content) + if split is None: raise ParseError("Content has no frontmatter") - - # Split on first two occurrences of --- - parts = content.split("---", 2) - if len(parts) < 3: - raise ParseError("Invalid frontmatter format") + yaml_block, _ = split # Parse YAML try: - frontmatter = yaml.safe_load(parts[1]) + frontmatter = yaml.safe_load(yaml_block) # Handle empty frontmatter (None from yaml.safe_load) if frontmatter is None: return {} @@ -381,18 +428,17 @@ def remove_frontmatter(content: str) -> str: ParseError: If content starts with frontmatter marker but is malformed """ # Strip BOM before processing - content = strip_bom(content).strip() - - # Return as-is if no frontmatter marker - if not content.startswith("---"): - return content + content = strip_bom(content) - # Split on first two occurrences of --- - parts = content.split("---", 2) - if len(parts) < 3: - raise ParseError("Invalid frontmatter format") + split = _split_frontmatter(content) + # Trigger: content does not open with a line-anchored fence + # Why: inline `---` is ordinary content, not frontmatter (issue #972) + # Outcome: return the content untouched (stripped to preserve prior behavior) + if split is None: + return content.strip() - return parts[2].strip() + _, body = split + return body.strip() def dump_frontmatter(post: frontmatter.Post) -> str: diff --git a/tests/mcp/test_tool_write_note.py b/tests/mcp/test_tool_write_note.py index 1198608e..40db9387 100644 --- a/tests/mcp/test_tool_write_note.py +++ b/tests/mcp/test_tool_write_note.py @@ -476,6 +476,39 @@ async def test_write_note_preserves_content_frontmatter(app, test_project): ) +@pytest.mark.asyncio +async def test_write_note_single_line_inline_fence_is_body_issue_972(app, test_project): + """Single-line content starting with `---` must be stored as body, not frontmatter. + + Reproduces issue #972: a one-line string where `\\n` are literal backslash-n + characters (a common CLI/agent input shape) was misread as frontmatter, merging a + garbage `\\nstatus` YAML key into the note and silently dropping the inline + `---...---` segment from the body. + """ + one_line = r"---\nstatus: active\n---\nDiscussed Q3 roadmap with Anthony." + + await write_note( + project=test_project.name, + title="Meeting Notes", + directory="meetings", + content=one_line, + ) + + content = await read_note("meetings/meeting-notes", project=test_project.name) + assert isinstance(content, str) + + # The literal one-line string survives verbatim in the body... + assert one_line in content + + # ...and no garbage `\nstatus` key leaked into the generated YAML frontmatter. + # Inspect only the frontmatter block (between the first pair of fence lines). + lines = content.splitlines() + assert lines[0] == "---" + closing = lines.index("---", 1) + frontmatter_block = lines[1:closing] + assert not any("status" in line for line in frontmatter_block) + + @pytest.mark.asyncio async def test_write_note_permalink_collision_fix_issue_139(app, test_project): """Test fix for GitHub Issue #139: UNIQUE constraint failed: entity.permalink. diff --git a/tests/utils/test_file_utils.py b/tests/utils/test_file_utils.py index dc1cbf09..7af48fbd 100644 --- a/tests/utils/test_file_utils.py +++ b/tests/utils/test_file_utils.py @@ -111,6 +111,23 @@ def test_has_frontmatter(): assert not has_frontmatter("--title: test--") +def test_has_frontmatter_requires_line_anchored_fences(): + """Inline `---` must not be mistaken for frontmatter fences (issue #972).""" + # Exact one-line repro from the issue: `\n` are literal backslash-n, not newlines, + # so the whole string is a single line that merely starts with `---`. + one_line = r"---\nstatus: active\n---\nDiscussed Q3 roadmap with Anthony." + assert not has_frontmatter(one_line) + + # An inline `---` later in the first line is not an opening fence either. + assert not has_frontmatter("--- not really frontmatter --- still content") + + # Opening fence with no closing fence on its own line is not frontmatter. + assert not has_frontmatter("---\ntitle: Test\nbody without closing fence") + + # A fence with trailing whitespace is still a valid fence. + assert has_frontmatter("--- \ntitle: Test\n--- \ncontent") + + def test_parse_frontmatter(): """Test parsing frontmatter.""" # Valid frontmatter @@ -187,6 +204,33 @@ def test_remove_frontmatter(): assert "Invalid frontmatter format" in str(exc.value) +def test_frontmatter_helpers_ignore_inline_fences(): + """Single-line content starting with `---` is passed through unchanged (issue #972). + + Previously the loose substring/split logic merged a garbage `\\nstatus` YAML key + into the note and silently stripped the inline `---...---` segment from the body. + """ + one_line = r"---\nstatus: active\n---\nDiscussed Q3 roadmap with Anthony." + + # Not detected as frontmatter... + assert not has_frontmatter(one_line) + # ...so parsing raises rather than inventing a `\nstatus` key... + with pytest.raises(ParseError): + parse_frontmatter(one_line) + # ...and the body survives intact through the merge path's removal step. + assert remove_frontmatter(one_line) == one_line + + # An inline `---` later in the first line is also left untouched. + inline = "intro --- not frontmatter --- outro" + assert remove_frontmatter(inline) == inline + + # Valid line-anchored frontmatter with trailing whitespace on the fences parses + # and is removed identically to clean fences. + padded = "--- \ntitle: Test\n--- \ncontent" + assert parse_frontmatter(padded) == {"title": "Test"} + assert remove_frontmatter(padded) == "content" + + def test_sanitize_for_filename_removes_invalid_characters(): # Test all invalid characters listed in the regex invalid_chars = '<>:"|?*'