From 05e41a3d7d4b387f09acc77e68d1be8adf36ebdd Mon Sep 17 00:00:00 2001 From: Kevin Sallee Date: Tue, 12 May 2026 13:58:25 -0600 Subject: [PATCH] Add JSON Schema and CI validation for data.json Treats data.json as a spec: data/schema.json describes the shape convert_doc.py produces, data/validate.py runs the check locally, and a GitHub Actions workflow regenerates the data from doc_export.html and validates on every PR, so the parser and the committed artifact cannot silently drift. While wiring this up the schema surfaced 43 empty-titled subsections in data.json. They were coming from convert_doc.py appending a subsection for every

including styling-only ones; this commit includes a one-line skip for empty

s and regenerates data.json. The source Google Doc likely still contains those empty

s. Schema is strict by default (additionalProperties: false at the top level and on data entries) so future field additions surface as CI failures and prompt a schema bump. Happy to relax if maintainers prefer the schema to document rather than constrain. --- .github/workflows/validate.yml | 34 ++++++ data/convert_doc.py | 4 +- data/data.json | 172 ----------------------------- data/schema.json | 192 +++++++++++++++++++++++++++++++++ data/validate.py | 53 +++++++++ requirements.txt | 1 + 6 files changed, 283 insertions(+), 173 deletions(-) create mode 100644 .github/workflows/validate.yml create mode 100644 data/schema.json create mode 100644 data/validate.py diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml new file mode 100644 index 0000000..9829b94 --- /dev/null +++ b/.github/workflows/validate.yml @@ -0,0 +1,34 @@ +name: Validate data.json + +on: + push: + branches: [main] + pull_request: + +jobs: + validate: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: pip + + - name: Install dependencies + run: pip install -r requirements.txt + + - name: Regenerate data artifacts from doc_export.html + run: python data/convert_doc.py + + - name: Check committed artifacts match regenerated output + run: | + git diff --exit-code \ + data/data.json \ + data/directory_data.json \ + data/directory_structure.yaml \ + dashboard/doc_styles.css + + - name: Validate data.json against schema + run: python data/validate.py diff --git a/data/convert_doc.py b/data/convert_doc.py index 2216cbc..420227c 100644 --- a/data/convert_doc.py +++ b/data/convert_doc.py @@ -197,6 +197,8 @@ def remove_comments(tag): current_h2_obj = None elif el.name == 'h2': + if not text: + continue if current_h1_obj and not current_h1_obj.get("special"): current_h2_obj = { "title": text, @@ -204,7 +206,7 @@ def remove_comments(tag): } current_h1_obj["subsections"].append(current_h2_obj) else: - current_h2_obj = None + current_h2_obj = None elif el.name in ['p', 'ul', 'ol']: if current_h1_obj and current_h1_obj.get("special"): diff --git a/data/data.json b/data/data.json index ea52efb..2597c60 100644 --- a/data/data.json +++ b/data/data.json @@ -102,10 +102,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "1.2 Camera Type & Model", "items": [ @@ -150,10 +146,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "1.3 Per take Sensor Metadata", "items": [ @@ -191,10 +183,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "1.4 Per Camera Lens Specifications", "items": [ @@ -233,10 +221,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "1.5 Lens Encoder Tracking Data", "items": [ @@ -342,10 +326,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "2.2 Overhead & Wide-Angle BTS Coverage", "items": [ @@ -377,10 +357,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "2.3 Real-World Depth Information", "items": [ @@ -657,10 +633,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "4.2 HDRIs", "items": [ @@ -723,10 +695,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "4.4 DMX/Lighting Control Metadata", "items": [ @@ -996,10 +964,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "5.6 Set Reference Photography", "items": [ @@ -1030,10 +994,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "5.7 Costume, Face Marker, and T-Pose Reference", "items": [ @@ -1063,10 +1023,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "5.8 Reference of Vehicles, Props, Production Locations", "items": [ @@ -1101,10 +1057,6 @@ ] } ] - }, - { - "title": "", - "items": [] } ] }, @@ -1146,10 +1098,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "6.2 DIT Specs", "items": [ @@ -1184,10 +1132,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "6.3 Lens Specification", "items": [ @@ -1218,10 +1162,6 @@ ] } ] - }, - { - "title": "", - "items": [] } ] }, @@ -1269,10 +1209,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "7.2 Concept Art, Lookdev, Character Designs, R&D, Storyboards", "items": [ @@ -1308,10 +1244,6 @@ ] } ] - }, - { - "title": "", - "items": [] } ] }, @@ -1348,10 +1280,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "8.2 Performance Capture Facial Tracking", "items": [ @@ -1379,10 +1307,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "8.3 Hand/Finger Tracking", "items": [ @@ -1416,10 +1340,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "8.4 Raw Animation Data & Retargeting Settings", "items": [ @@ -1453,10 +1373,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "8.5 Motion Capture Volume Data", "items": [ @@ -1482,10 +1398,6 @@ ] } ] - }, - { - "title": "", - "items": [] } ] }, @@ -1570,10 +1482,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "9.3 IMU Data", "items": [ @@ -1606,10 +1514,6 @@ ] } ] - }, - { - "title": "", - "items": [] } ] }, @@ -1648,10 +1552,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "10.2 Display Frustum Calibration", "items": [ @@ -1680,10 +1580,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "10.3 Colour Profile & Brightness Settings", "items": [ @@ -1716,10 +1612,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "10.4 Latency Data", "items": [ @@ -1749,10 +1641,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "10.5 Plate vs. Real-Time Rendered Assets", "items": [ @@ -1784,10 +1672,6 @@ ] } ] - }, - { - "title": "", - "items": [] } ] }, @@ -1822,10 +1706,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "11.2 Virtual Production Operator Metadata", "items": [ @@ -1853,10 +1733,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "11.3 On-Set Real-Time Adjustments", "items": [ @@ -1885,10 +1761,6 @@ ] } ] - }, - { - "title": "", - "items": [] } ] }, @@ -1927,10 +1799,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "12.2 Spatial Audio Tracking Metadata", "items": [ @@ -1958,10 +1826,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "12.3 Lip sync and character audio", "items": [ @@ -1992,10 +1856,6 @@ ] } ] - }, - { - "title": "", - "items": [] } ] }, @@ -2033,10 +1893,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "13.2 Mudmaps", "items": [ @@ -2070,10 +1926,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "13.3 Storyboards, Previz, Techviz, Stuntviz", "items": [ @@ -2118,10 +1970,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "13.4 Art Department Assets", "items": [ @@ -2170,10 +2018,6 @@ ] } ] - }, - { - "title": "", - "items": [] } ] }, @@ -2211,10 +2055,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "14.2 Lens Grid and Reference Specs", "items": [ @@ -2247,10 +2087,6 @@ ] } ] - }, - { - "title": "", - "items": [] } ] }, @@ -2287,10 +2123,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "15.2 Descriptive File Naming", "items": [ @@ -2336,10 +2168,6 @@ } ] }, - { - "title": "", - "items": [] - }, { "title": "15.3 Review Tools", "items": [ diff --git a/data/schema.json b/data/schema.json new file mode 100644 index 0000000..5235ace --- /dev/null +++ b/data/schema.json @@ -0,0 +1,192 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://ves-on-set-data.org/schema/data.schema.json", + "title": "VES On-Set VFX Data Collection Guide", + "description": "Schema for data.json produced by data/convert_doc.py from the canonical Google Doc.", + "type": "object", + "required": [ + "version", + "publishDate", + "Introduction", + "Scope Definitions", + "VFX Types", + "Data Sets", + "Directory Structure", + "Reference Docs", + "Feedback" + ], + "additionalProperties": false, + "properties": { + "version": { + "type": "string", + "pattern": "^\\d+\\.\\d+\\.\\d+$", + "description": "Semantic version of the source document." + }, + "publishDate": { + "type": "string", + "format": "date", + "pattern": "^\\d{4}-\\d{2}-\\d{2}$", + "description": "ISO-8601 publication date of the source document." + }, + "Introduction": { + "type": "array", + "items": { "$ref": "#/$defs/htmlBlock" }, + "minItems": 1 + }, + "Scope Definitions": { + "type": "array", + "items": { + "anyOf": [ + { "$ref": "#/$defs/htmlBlock" }, + { "$ref": "#/$defs/definitionsTable" } + ] + } + }, + "VFX Types": { + "type": "array", + "items": { + "anyOf": [ + { "$ref": "#/$defs/htmlBlock" }, + { "$ref": "#/$defs/definitionsTable" } + ] + } + }, + "Data Sets": { + "type": "array", + "items": { "$ref": "#/$defs/section" } + }, + "Directory Structure": { + "type": "array", + "items": { + "oneOf": [ + { "$ref": "#/$defs/htmlBlock" }, + { + "type": "object", + "required": ["type"], + "additionalProperties": false, + "properties": { + "type": { "const": "tree_view" } + } + } + ] + } + }, + "Reference Docs": { + "type": "array", + "items": { "$ref": "#/$defs/htmlBlock" }, + "minItems": 1 + }, + "Feedback": { + "type": "array", + "items": { "$ref": "#/$defs/htmlBlock" }, + "minItems": 1 + } + }, + "$defs": { + "htmlBlock": { + "type": "object", + "required": ["html"], + "additionalProperties": false, + "properties": { + "html": { + "type": "string", + "minLength": 1 + } + } + }, + "definitionsTable": { + "description": "Key-value definitions table where each key is a term and the value is its definition (string) or a list of related terms.", + "type": "object", + "minProperties": 1, + "additionalProperties": { + "oneOf": [ + { "type": "string", "minLength": 1 }, + { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "minItems": 1 + } + ] + } + }, + "section": { + "type": "object", + "required": ["title", "subsections"], + "additionalProperties": false, + "properties": { + "title": { + "type": "string", + "minLength": 1 + }, + "subsections": { + "type": "array", + "items": { "$ref": "#/$defs/subsection" } + } + } + }, + "subsection": { + "type": "object", + "required": ["title", "items"], + "additionalProperties": false, + "properties": { + "title": { + "type": "string", + "minLength": 1 + }, + "items": { + "type": "array", + "items": { "$ref": "#/$defs/dataEntry" } + } + } + }, + "dataEntry": { + "description": "A single data-collection entry inside a subsection.", + "type": "object", + "required": [ + "Data Collected", + "Description", + "Usage", + "Scope", + "Creator", + "Consumer", + "VFXTypes" + ], + "additionalProperties": false, + "properties": { + "Data Collected": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "minItems": 1 + }, + "Description": { + "type": "string", + "minLength": 1 + }, + "Usage": { + "type": "string", + "minLength": 1 + }, + "Scope": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "minItems": 1 + }, + "Creator": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "minItems": 1 + }, + "Consumer": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "minItems": 1 + }, + "VFXTypes": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "minItems": 1 + } + } + } + } +} diff --git a/data/validate.py b/data/validate.py new file mode 100644 index 0000000..223c15d --- /dev/null +++ b/data/validate.py @@ -0,0 +1,53 @@ +"""Validate data/data.json against data/schema.json. + +Run from the repository root: + python data/validate.py + +Exits with code 0 if data.json conforms to the schema, 1 otherwise. +""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +from jsonschema import Draft202012Validator, FormatChecker + +REPO = Path(__file__).resolve().parent.parent +SCHEMA_PATH = REPO / "data" / "schema.json" +DATA_PATH = REPO / "data" / "data.json" + + +def main() -> int: + with SCHEMA_PATH.open() as f: + schema = json.load(f) + with DATA_PATH.open() as f: + data = json.load(f) + + validator = Draft202012Validator(schema, format_checker=FormatChecker()) + errors = sorted(validator.iter_errors(data), key=lambda e: list(e.absolute_path)) + + if errors: + for err in errors: + path = "/".join(str(p) for p in err.absolute_path) or "" + print(f"[{path}] {err.message}", file=sys.stderr) + print(f"\nFAIL: {len(errors)} schema violation(s)", file=sys.stderr) + return 1 + + sections = data.get("Data Sets", []) + subsections = sum(len(s.get("subsections", [])) for s in sections) + items = sum( + len(sub.get("items", [])) + for s in sections + for sub in s.get("subsections", []) + ) + print( + f"OK: data.json conforms to schema " + f"({len(sections)} sections, {subsections} subsections, {items} items)" + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/requirements.txt b/requirements.txt index 0999a55..491a83c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ beautifulsoup4 PyYAML +jsonschema