diff --git a/okf/src/reference_agent/viewer/generator.py b/okf/src/reference_agent/viewer/generator.py index 86aae8b..62197f2 100644 --- a/okf/src/reference_agent/viewer/generator.py +++ b/okf/src/reference_agent/viewer/generator.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import Any -from reference_agent.bundle.document import OKFDocument, OKFDocumentError +from reference_agent.bundle.document import OKFDocument _INDEX_NAME = "index.md" _LINK_RE = re.compile(r"\]\(([^)\s]+\.md)(?:#[A-Za-z0-9_\-]*)?\)") @@ -75,7 +75,14 @@ def _walk_concepts(bundle_root: Path) -> list[Concept]: concept_id = "/".join(rel.parts) try: doc = OKFDocument.parse(md_path.read_text(encoding="utf-8")) - except OKFDocumentError: + except (ValueError, OSError): + # Skip a single malformed/unreadable file rather than aborting the + # whole visualization. ValueError covers OKFDocument parse failures + # (OKFDocumentError) and invalid UTF-8 (UnicodeDecodeError) — both + # ValueError subclasses — plus PyYAML's *bare* ValueError for an + # out-of-range frontmatter value such as a bad timestamp; OSError + # covers read errors. bundle.index._load_doc tolerates the same + # files via a broader `except Exception`. continue fm = doc.frontmatter or {} tags = fm.get("tags") or [] diff --git a/okf/tests/test_viewer.py b/okf/tests/test_viewer.py index cdb0b18..9fa626e 100644 --- a/okf/tests/test_viewer.py +++ b/okf/tests/test_viewer.py @@ -160,6 +160,40 @@ def test_node_colors_match_palette(tmp_path: Path): assert by_id["references/metrics/dau"]["color"] == "#10b981" +def test_unreadable_concept_file_is_skipped(tmp_path: Path): + # A single file with invalid UTF-8 bytes must not abort the whole + # visualization — it should be skipped like a malformed-frontmatter file. + bundle = tmp_path / "bundle" + _make_bundle(bundle) + corrupt = bundle / "tables" / "corrupt.md" + corrupt.parent.mkdir(parents=True, exist_ok=True) + corrupt.write_bytes(b"---\ntype: BigQuery Table\n\xff\xfe not utf-8\n---\n") + out = tmp_path / "viz.html" + stats = generate_visualization(bundle, out) + assert out.exists() + # The 4 well-formed concepts still render; the corrupt file is skipped. + assert stats["concepts"] == 4 + + +def test_malformed_timestamp_concept_file_is_skipped(tmp_path: Path): + # A valid-UTF-8 file whose frontmatter has an out-of-range timestamp makes + # PyYAML raise a *bare* ValueError (not an OKFDocumentError); it must be + # skipped like any other malformed file, not abort the whole viz. + bundle = tmp_path / "bundle" + _make_bundle(bundle) + bad = bundle / "tables" / "badts.md" + bad.parent.mkdir(parents=True, exist_ok=True) + bad.write_text( + "---\ntype: BigQuery Table\ntimestamp: 2026-13-45\n---\nbody\n", + encoding="utf-8", + ) + out = tmp_path / "viz.html" + stats = generate_visualization(bundle, out) + assert out.exists() + # The 4 well-formed concepts still render; the bad-timestamp file is skipped. + assert stats["concepts"] == 4 + + def test_raises_when_bundle_missing(tmp_path: Path): with pytest.raises(FileNotFoundError): generate_visualization(tmp_path / "nope", tmp_path / "viz.html")