Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions okf/src/reference_agent/viewer/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path
from typing import Any

from reference_agent.bundle.document import OKFDocument, OKFDocumentError
from reference_agent.bundle.document import OKFDocument

_INDEX_NAME = "index.md"
_LINK_RE = re.compile(r"\]\(([^)\s]+\.md)(?:#[A-Za-z0-9_\-]*)?\)")
Expand Down Expand Up @@ -75,7 +75,14 @@ def _walk_concepts(bundle_root: Path) -> list[Concept]:
concept_id = "/".join(rel.parts)
try:
doc = OKFDocument.parse(md_path.read_text(encoding="utf-8"))
except OKFDocumentError:
except (ValueError, OSError):
# Skip a single malformed/unreadable file rather than aborting the
# whole visualization. ValueError covers OKFDocument parse failures
# (OKFDocumentError) and invalid UTF-8 (UnicodeDecodeError) — both
# ValueError subclasses — plus PyYAML's *bare* ValueError for an
# out-of-range frontmatter value such as a bad timestamp; OSError
# covers read errors. bundle.index._load_doc tolerates the same
# files via a broader `except Exception`.
continue
fm = doc.frontmatter or {}
tags = fm.get("tags") or []
Expand Down
34 changes: 34 additions & 0 deletions okf/tests/test_viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,40 @@ def test_node_colors_match_palette(tmp_path: Path):
assert by_id["references/metrics/dau"]["color"] == "#10b981"


def test_unreadable_concept_file_is_skipped(tmp_path: Path):
# A single file with invalid UTF-8 bytes must not abort the whole
# visualization — it should be skipped like a malformed-frontmatter file.
bundle = tmp_path / "bundle"
_make_bundle(bundle)
corrupt = bundle / "tables" / "corrupt.md"
corrupt.parent.mkdir(parents=True, exist_ok=True)
corrupt.write_bytes(b"---\ntype: BigQuery Table\n\xff\xfe not utf-8\n---\n")
out = tmp_path / "viz.html"
stats = generate_visualization(bundle, out)
assert out.exists()
# The 4 well-formed concepts still render; the corrupt file is skipped.
assert stats["concepts"] == 4


def test_malformed_timestamp_concept_file_is_skipped(tmp_path: Path):
# A valid-UTF-8 file whose frontmatter has an out-of-range timestamp makes
# PyYAML raise a *bare* ValueError (not an OKFDocumentError); it must be
# skipped like any other malformed file, not abort the whole viz.
bundle = tmp_path / "bundle"
_make_bundle(bundle)
bad = bundle / "tables" / "badts.md"
bad.parent.mkdir(parents=True, exist_ok=True)
bad.write_text(
"---\ntype: BigQuery Table\ntimestamp: 2026-13-45\n---\nbody\n",
encoding="utf-8",
)
out = tmp_path / "viz.html"
stats = generate_visualization(bundle, out)
assert out.exists()
# The 4 well-formed concepts still render; the bad-timestamp file is skipped.
assert stats["concepts"] == 4


def test_raises_when_bundle_missing(tmp_path: Path):
with pytest.raises(FileNotFoundError):
generate_visualization(tmp_path / "nope", tmp_path / "viz.html")