Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 68 additions & 22 deletions src/basic_memory/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,26 +302,76 @@ async def format_file(
return None


# A frontmatter fence is a line containing exactly `---`, optionally followed by
# trailing horizontal whitespace. Anchoring to a full line (rather than a bare
# substring/`startswith`) prevents single-line content like
# `---\nstatus: active\n---\nBody` — where `\n` is a literal backslash-n, not a
# newline — from being misread as frontmatter. See issue #972.
_FENCE_RE = re.compile(r"^---[ \t]*$")


def _split_frontmatter(content: str) -> Optional[tuple[str, str]]:
"""Split content into (yaml_block, body) when it opens with a line-anchored fence.

The opening fence must be the very first line and the closing fence must be a
later line, each matching exactly `---` (with optional trailing whitespace).

Returns:
A `(yaml_block, body)` tuple when a complete fenced block is present, or
``None`` when the content does not open with a frontmatter fence.

Raises:
ParseError: If the content opens with a fence but has no closing fence.
"""
lines = content.splitlines(keepends=True)

# Skip leading blank lines: a document may begin with whitespace before the
# opening fence (e.g. a heredoc/dedented string starting with a newline). This
# does NOT relax line-anchoring — the opening fence must still be the first
# non-blank line, all on its own, so a single-line `---\\nstatus...` (literal
# backslash-n) is still rejected. See issue #972.
start = 0
while start < len(lines) and lines[start].strip() == "":
start += 1

if start >= len(lines) or not _FENCE_RE.match(lines[start].rstrip("\r\n")):
return None

# Find the closing fence on its own line somewhere after the opening fence.
for index in range(start + 1, len(lines)):
if _FENCE_RE.match(lines[index].rstrip("\r\n")):
yaml_block = "".join(lines[start + 1 : index])
body = "".join(lines[index + 1 :])
return yaml_block, body

raise ParseError("Invalid frontmatter format")


def has_frontmatter(content: str) -> bool:
"""
Check if content contains valid YAML frontmatter.

Frontmatter requires `---` fences on their own lines; an inline `---` (such as
a single-line string that merely starts with the characters `---`) is not
frontmatter.

Args:
content: Content to check

Returns:
True if content has valid frontmatter markers (---), False otherwise
True if content has line-anchored frontmatter fences, False otherwise
"""
if not content:
return False

# Strip BOM before checking for frontmatter markers
content = strip_bom(content).strip()
if not content.startswith("---"):
content = strip_bom(content)
try:
return _split_frontmatter(content) is not None
except ParseError:
# An opening fence with no closing fence is not usable frontmatter.
return False

return "---" in content[3:]


def parse_frontmatter(content: str) -> Dict[str, Any]:
"""
Expand All @@ -339,17 +389,14 @@ def parse_frontmatter(content: str) -> Dict[str, Any]:
try:
# Strip BOM before parsing frontmatter
content = strip_bom(content)
if not content.strip().startswith("---"):
split = _split_frontmatter(content)
if split is None:
raise ParseError("Content has no frontmatter")

# Split on first two occurrences of ---
parts = content.split("---", 2)
if len(parts) < 3:
raise ParseError("Invalid frontmatter format")
yaml_block, _ = split

# Parse YAML
try:
frontmatter = yaml.safe_load(parts[1])
frontmatter = yaml.safe_load(yaml_block)
# Handle empty frontmatter (None from yaml.safe_load)
if frontmatter is None:
return {}
Expand Down Expand Up @@ -381,18 +428,17 @@ def remove_frontmatter(content: str) -> str:
ParseError: If content starts with frontmatter marker but is malformed
"""
# Strip BOM before processing
content = strip_bom(content).strip()

# Return as-is if no frontmatter marker
if not content.startswith("---"):
return content
content = strip_bom(content)

# Split on first two occurrences of ---
parts = content.split("---", 2)
if len(parts) < 3:
raise ParseError("Invalid frontmatter format")
split = _split_frontmatter(content)
# Trigger: content does not open with a line-anchored fence
# Why: inline `---` is ordinary content, not frontmatter (issue #972)
# Outcome: return the content untouched (stripped to preserve prior behavior)
if split is None:
return content.strip()

return parts[2].strip()
_, body = split
return body.strip()


def dump_frontmatter(post: frontmatter.Post) -> str:
Expand Down
33 changes: 33 additions & 0 deletions tests/mcp/test_tool_write_note.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,39 @@ async def test_write_note_preserves_content_frontmatter(app, test_project):
)


@pytest.mark.asyncio
async def test_write_note_single_line_inline_fence_is_body_issue_972(app, test_project):
"""Single-line content starting with `---` must be stored as body, not frontmatter.

Reproduces issue #972: a one-line string where `\\n` are literal backslash-n
characters (a common CLI/agent input shape) was misread as frontmatter, merging a
garbage `\\nstatus` YAML key into the note and silently dropping the inline
`---...---` segment from the body.
"""
one_line = r"---\nstatus: active\n---\nDiscussed Q3 roadmap with Anthony."

await write_note(
project=test_project.name,
title="Meeting Notes",
directory="meetings",
content=one_line,
)

content = await read_note("meetings/meeting-notes", project=test_project.name)
assert isinstance(content, str)

# The literal one-line string survives verbatim in the body...
assert one_line in content

# ...and no garbage `\nstatus` key leaked into the generated YAML frontmatter.
# Inspect only the frontmatter block (between the first pair of fence lines).
lines = content.splitlines()
assert lines[0] == "---"
closing = lines.index("---", 1)
frontmatter_block = lines[1:closing]
assert not any("status" in line for line in frontmatter_block)


@pytest.mark.asyncio
async def test_write_note_permalink_collision_fix_issue_139(app, test_project):
"""Test fix for GitHub Issue #139: UNIQUE constraint failed: entity.permalink.
Expand Down
44 changes: 44 additions & 0 deletions tests/utils/test_file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,23 @@ def test_has_frontmatter():
assert not has_frontmatter("--title: test--")


def test_has_frontmatter_requires_line_anchored_fences():
"""Inline `---` must not be mistaken for frontmatter fences (issue #972)."""
# Exact one-line repro from the issue: `\n` are literal backslash-n, not newlines,
# so the whole string is a single line that merely starts with `---`.
one_line = r"---\nstatus: active\n---\nDiscussed Q3 roadmap with Anthony."
assert not has_frontmatter(one_line)

# An inline `---` later in the first line is not an opening fence either.
assert not has_frontmatter("--- not really frontmatter --- still content")

# Opening fence with no closing fence on its own line is not frontmatter.
assert not has_frontmatter("---\ntitle: Test\nbody without closing fence")

# A fence with trailing whitespace is still a valid fence.
assert has_frontmatter("--- \ntitle: Test\n--- \ncontent")


def test_parse_frontmatter():
"""Test parsing frontmatter."""
# Valid frontmatter
Expand Down Expand Up @@ -187,6 +204,33 @@ def test_remove_frontmatter():
assert "Invalid frontmatter format" in str(exc.value)


def test_frontmatter_helpers_ignore_inline_fences():
"""Single-line content starting with `---` is passed through unchanged (issue #972).

Previously the loose substring/split logic merged a garbage `\\nstatus` YAML key
into the note and silently stripped the inline `---...---` segment from the body.
"""
one_line = r"---\nstatus: active\n---\nDiscussed Q3 roadmap with Anthony."

# Not detected as frontmatter...
assert not has_frontmatter(one_line)
# ...so parsing raises rather than inventing a `\nstatus` key...
with pytest.raises(ParseError):
parse_frontmatter(one_line)
# ...and the body survives intact through the merge path's removal step.
assert remove_frontmatter(one_line) == one_line

# An inline `---` later in the first line is also left untouched.
inline = "intro --- not frontmatter --- outro"
assert remove_frontmatter(inline) == inline

# Valid line-anchored frontmatter with trailing whitespace on the fences parses
# and is removed identically to clean fences.
padded = "--- \ntitle: Test\n--- \ncontent"
assert parse_frontmatter(padded) == {"title": "Test"}
assert remove_frontmatter(padded) == "content"


def test_sanitize_for_filename_removes_invalid_characters():
# Test all invalid characters listed in the regex
invalid_chars = '<>:"|?*'
Expand Down
Loading