diff --git a/depone/__main__.py b/depone/__main__.py index c03e637..bc59b43 100644 --- a/depone/__main__.py +++ b/depone/__main__.py @@ -93,6 +93,11 @@ def main() -> None: verify_parser.add_argument( "--out", default="verification-report.json", help="Output path for report" ) + verify_parser.add_argument( + "--operator-view-out", + default=None, + help="Output path for a V111 operator-readable report view", + ) verify_parser.add_argument( "--self-test", action="store_true", help="Run self-test and exit" ) diff --git a/depone/verify/__init__.py b/depone/verify/__init__.py index eb7f268..f36cfbc 100644 --- a/depone/verify/__init__.py +++ b/depone/verify/__init__.py @@ -11,6 +11,7 @@ from depone.core.plan_schema import load_plan from depone.verify.adapters import generic, resolve from depone.verify.engine import run_verification +from depone.verify.operator_view import write_operator_view def run(args: argparse.Namespace) -> None: @@ -63,6 +64,10 @@ def run(args: argparse.Namespace) -> None: print(f" Assurance: {report_dict['assurance']}") print(f" Phases: {len(report_dict['phases'])}") + if args.operator_view_out: + view_path = write_operator_view(report, args.operator_view_out) + print(f"Operator view written to {view_path}") + if verdict != "verified": sys.exit(1) diff --git a/depone/verify/operator_view.py b/depone/verify/operator_view.py new file mode 100644 index 0000000..55b45df --- /dev/null +++ b/depone/verify/operator_view.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +from dataclasses import asdict, is_dataclass +from pathlib import Path +from typing import Any, Mapping + +from depone.verify.engine import VerificationReport + + +def _report_mapping(report: VerificationReport | Mapping[str, Any]) -> Mapping[str, Any]: + if is_dataclass(report): + return asdict(report) + return report + + +def _capture_mapping(capture: Any) -> Mapping[str, Any]: + if is_dataclass(capture): + return asdict(capture) + if isinstance(capture, Mapping): + return capture + return {} + + +def render_operator_view(report: VerificationReport | Mapping[str, Any]) -> str: + """Render the operator-facing V111 summary for a verification report.""" + report_data = _report_mapping(report) + captures = [ + _capture_mapping(capture) + for capture in report_data.get("agent_fabric_captures", []) + ] + + lines = [ + "# Verification Operator View", + "", + f"- Decision: {report_data.get('decision', 'unknown')}", + f"- Assurance: {report_data.get('assurance', 'unknown')}", + f"- Agent Fabric captures: {len(captures)}", + "", + "## Agent Fabric captures", + ] + + if not captures: + lines.append("- None") + return "\n".join(lines) + "\n" + + for index, capture in enumerate(captures, start=1): + errors = capture.get("errors", []) + valid = "yes" if capture.get("valid") is True else "no" + lines.extend( + [ + f"{index}. `{capture.get('evidence_path', 'unknown')}`", + f" - Decision: {capture.get('decision', 'unknown')}", + f" - Assurance: {capture.get('assurance', 'unknown')}", + f" - Valid: {valid}", + ] + ) + if errors: + lines.append(" - Errors:") + for error in errors: + lines.append(f" - {error}") + else: + lines.append(" - Errors: none") + + return "\n".join(lines) + "\n" + + +def write_operator_view( + report: VerificationReport | Mapping[str, Any], + output_path: str | Path, +) -> Path: + path = Path(output_path) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(render_operator_view(report), encoding="utf-8") + return path diff --git a/docs/automation-roadmap.md b/docs/automation-roadmap.md index 2d06e74..e164f0d 100644 --- a/docs/automation-roadmap.md +++ b/docs/automation-roadmap.md @@ -1,6 +1,6 @@ # Depone Automation Roadmap -Status: V3 entry runtime implemented; V7.5 frontier result review implemented; V8 frontier review ingestion implemented; V9 human gate resolution implemented; V10 product CLI implemented; V11 operator guidance implemented; V12-V20 product roadmap implemented; V52-V87 product evidence, graph timing, activation, and brand boundary gates implemented; V88 roadmap reconciliation audit implemented; V89 command safety gate implemented; V90 activation v2 implemented; V91 contract tiering implemented; V92 evidence oracle implemented; V93 workflow narrative implemented; V94 control deck score implemented; V95 score history implemented; V96 metric ladder implemented; V97 benchmark readiness implemented; V98 wave operator implemented; V99 wave receipt implemented; V100 promotion evidence implemented; V101 promotion route implemented; V102 deterministic live-proof recorder implemented; V103 live-proof comparison schema implemented; V104 product direction implemented; V105 verify wedge implemented; V106 multi-wave validation implemented; V107 Agent Fabric contracts and compiler implemented; V108 Agent Fabric reference adapter fixture implemented; V109 Agent Fabric capture bridge implemented; V110 Agent Fabric report assurance implemented; V94-V101 meta layer frozen; live proof n=1 completed +Status: V3 entry runtime implemented; V7.5 frontier result review implemented; V8 frontier review ingestion implemented; V9 human gate resolution implemented; V10 product CLI implemented; V11 operator guidance implemented; V12-V20 product roadmap implemented; V52-V87 product evidence, graph timing, activation, and brand boundary gates implemented; V88 roadmap reconciliation audit implemented; V89 command safety gate implemented; V90 activation v2 implemented; V91 contract tiering implemented; V92 evidence oracle implemented; V93 workflow narrative implemented; V94 control deck score implemented; V95 score history implemented; V96 metric ladder implemented; V97 benchmark readiness implemented; V98 wave operator implemented; V99 wave receipt implemented; V100 promotion evidence implemented; V101 promotion route implemented; V102 deterministic live-proof recorder implemented; V103 live-proof comparison schema implemented; V104 product direction implemented; V105 verify wedge implemented; V106 multi-wave validation implemented; V107 Agent Fabric contracts and compiler implemented; V108 Agent Fabric reference adapter fixture implemented; V109 Agent Fabric capture bridge implemented; V110 Agent Fabric report assurance implemented; V111 Agent Fabric operator view implemented; V94-V101 meta layer frozen; live proof n=1 completed Date: 2026-06-20 ## Purpose @@ -56,6 +56,7 @@ legacy/internal and intentionally deferred. | Agent Fabric reference adapter | fixture-only local shell adapter capture shape for self-report, diff/touched files, test output, and command receipts | planned V108, shell fixture implemented | | Agent Fabric capture bridge | Depone-facing manifest with A0/A1 assurance labels and hash-bound observer capture | planned V109, passive bridge implemented | | Agent Fabric report assurance | verification report decision/assurance fields sourced from capture manifests | planned V110, report surface implemented | +| Agent Fabric operator view | Markdown export for operator-readable report decision, assurance, and capture status | planned V111, operator view implemented | | Harness benchmark | corpus and scoring gate for direct harness comparisons | planned V23, first benchmark gate implemented | | README public page | source-bound benchmark graph on the GitHub landing page | planned V37, first publish slice implemented | | Benchmark history | hash-bound report history ledger and trend graph artifacts | planned V38, first ledger slice implemented | @@ -941,6 +942,10 @@ Implemented continuation: now separate existing `verdict`, operator-facing `decision`, and capture `assurance`, and invalid capture manifests refute the report without hiding validation errors. +- V111 added the Agent Fabric operator-view/exporter. The view is + presentation-only over V110 report fields, keeps invalid captures visible, + preserves Depone branding, and writes deterministic Markdown through + `depone verify --operator-view-out`. Next roadmap direction: @@ -952,9 +957,8 @@ Next roadmap direction: migration gate proves compatibility. 4. Expand read-only or pre-isolated live execution only where V84/V85 and queue preflight evidence permit it. -5. Add a small fixture/report exporter or operator view over the V110 report - fields without adding new trust levels or bypassing evidence-contract - failures. +5. Add the next Agent Fabric planning slice only after the V107-V111 + compile-to-report path stays green under changed-surface checks. ## Strategic Decisions diff --git a/docs/release-history.md b/docs/release-history.md index 6ecfcac..2ea9f36 100644 --- a/docs/release-history.md +++ b/docs/release-history.md @@ -207,6 +207,9 @@ promotion requires real release history. - V110: `docs/v110-agent-fabric-report-assurance-spec.md` surfaced capture manifest checks in verification reports with separate `verdict`, `decision`, and `assurance` fields. +- V111: `docs/v111-agent-fabric-operator-view-spec.md` documents the + presentation-only operator-view/exporter for V110 report fields, including + the `depone verify --operator-view-out` Markdown export path. ## Current Public Boundaries @@ -256,3 +259,5 @@ promotion requires real release history. - V110 verification reports may display Agent Fabric assurance, but invalid capture manifests fail closed and evidence-contract failures still dominate the final verdict. +- V111 operator views may summarize report fields, but they do not create new + evidence, upgrade assurance, or hide invalid captures and integration risks. diff --git a/docs/v111-agent-fabric-operator-view-spec.md b/docs/v111-agent-fabric-operator-view-spec.md new file mode 100644 index 0000000..b889276 --- /dev/null +++ b/docs/v111-agent-fabric-operator-view-spec.md @@ -0,0 +1,71 @@ +# V111 Agent Fabric Operator View Spec + +V111 adds a small operator-facing Markdown view/exporter on top of the V110 +verification report fields. + +## Boundary + +The view consumes an existing Depone verification report. It does not execute +commands, create Agent Fabric captures, validate live model output, introduce a +new assurance level, or bypass evidence-contract failures. + +The source of truth remains the verification report JSON and the evidence files +referenced by that report. The view may summarize fields, but it must not turn a +summary into stronger proof than the report already carries. + +## Required report inputs + +A V111-compatible view must read these V110 fields when present: + +- `verdict`; +- `decision`; +- `assurance`; +- `agent_fabric_captures[]` entries with `evidence_path`, `assurance`, + `decision`, `valid`, and `errors`. + +Missing V110 fields must be rendered as an integration risk, not silently +upgraded to success. Invalid capture entries must remain visible to the +operator. + +## View/export behavior + +The operator view should make the following distinctions explicit: + +- report verdict versus operator-facing decision; +- report-level assurance versus capture-level assurance; +- valid captures versus invalid captures; +- self-report-only `A0-claims-only` material versus locally observed + `A1-local-observed` material; +- evidence-contract failures versus Agent Fabric capture failures. + +A Markdown export is available through: + +```bash +python3 -m depone verify --out report.json --operator-view-out operator-view.md +``` + +The export is deterministic, stdlib-only, and derived from report fields. It +preserves source paths so an operator can trace each displayed capture back to +the underlying evidence artifact. The view layer does not duplicate V110 +validation logic; it renders the report state it is given. + +Compatibility behavior: + +- reports without `agent_fabric_captures` render an explicit no-captures + message and keep `A0-claims-only` as the default assurance; +- missing V110 fields render as `unknown` rather than a stronger pass state; +- invalid capture manifests stay fail-closed in the report and remain visible + in the view; +- Depone remains the public brand, with DWM Core used only for the internal + engine where needed. + +## Verification + +The V111 implementation is covered by: + +```bash +python3 -m py_compile depone/__main__.py depone/verify/__init__.py depone/verify/operator_view.py +python3 tests/test_agent_fabric_report_assurance.py +python3 -m depone verify --self-test +python3 -m depone validate-contracts --self-test +``` diff --git a/docs/v111-decision.md b/docs/v111-decision.md new file mode 100644 index 0000000..b7fc270 --- /dev/null +++ b/docs/v111-decision.md @@ -0,0 +1,40 @@ +# V111 Decision + +Decision: keep the V111 Agent Fabric operator-view/exporter as a +presentation-only layer over verification reports. + +V111 should make the V110 report fields easier for an operator to read without +changing the trust model. The view/exporter is a presentation layer over Depone +verification reports: it can summarize `verdict`, `decision`, `assurance`, and +Agent Fabric capture entries, but it cannot create new evidence or upgrade an +assurance label. + +Accepted implementation: + +- consume existing verification report JSON as the source of truth; +- preserve `A0-claims-only` and `A1-local-observed` exactly as V109/V110 define + them; +- keep invalid capture manifests visible and fail-closed through the underlying + report; +- expose evidence paths in any exported summary for traceability; +- keep the implementation stdlib-only and deterministic; +- expose the Markdown exporter through + `python3 -m depone verify --operator-view-out `. + +Resolved integration choices: + +- the exporter command is `--operator-view-out` on `depone verify`; +- tests exercise the CLI write path, empty capture reports, invalid capture + rendering, and evidence-contract dominance over Agent Fabric capture state; +- the view layer renders report fields without revalidating capture manifests; +- missing V110 fields render as `unknown`, not as a stronger pass state; +- public docs stay on the Depone brand and do not reintroduce old product + naming. + +This decision intentionally does not claim: + +- live model or command execution; +- external attestation; +- new assurance levels beyond V109/V110; +- improved productivity, speed, cost, quality, or direct-agent superiority; +- release readiness beyond the focused V111 operator-view slice. diff --git a/tests/test_agent_fabric_capture_bridge.py b/tests/test_agent_fabric_capture_bridge.py index 4b0fb4f..2416583 100644 --- a/tests/test_agent_fabric_capture_bridge.py +++ b/tests/test_agent_fabric_capture_bridge.py @@ -87,6 +87,38 @@ def test_self_report_without_observer_remains_a0_claims_only(self) -> None: self.assertEqual(manifest["decision"], "claims-only") self.assertEqual(validate_capture_manifest(manifest), []) + + def test_rejects_new_assurance_level(self) -> None: + manifest = build_capture_manifest(_fixture()) + manifest["assurance"] = "A2-live-observed" + manifest["decision"] = "trusted-live-capture" + + errors = validate_capture_manifest(manifest) + + self.assertTrue( + any( + "assurance must be 'A0-claims-only' or 'A1-local-observed'" in e + for e in errors + ), + errors, + ) + + def test_rejects_live_source_fixture_even_with_observer_capture(self) -> None: + fixture = _fixture() + fixture["adapter"]["executes_commands"] = True + manifest = build_capture_manifest( + fixture, + observer_capture=_observer_capture(), + allowed_touched_files=["depone/example.py"], + ) + + errors = validate_capture_manifest(manifest) + + self.assertTrue( + any("adapter.executes_commands must be false" in e for e in errors), + errors, + ) + def test_tampered_observer_capture_fails_closed(self) -> None: manifest = build_capture_manifest( _fixture(), diff --git a/tests/test_agent_fabric_reference_adapter.py b/tests/test_agent_fabric_reference_adapter.py index 1edac0e..120eba1 100644 --- a/tests/test_agent_fabric_reference_adapter.py +++ b/tests/test_agent_fabric_reference_adapter.py @@ -60,6 +60,15 @@ def test_builds_non_authoritative_fixture_for_valid_invocation(self) -> None: self.assertFalse(fixture["adapter"]["executes_commands"]) self.assertEqual(validate_reference_adapter_fixture(fixture), []) + + def test_rejects_new_capture_trust_level(self) -> None: + fixture = build_reference_adapter_fixture(_invocation(), self_report=_result()) + fixture["capture"]["trust_level"] = "A1-local-observed" + + errors = validate_reference_adapter_fixture(fixture) + + self.assertIn("capture.trust_level must be 'A0-claims-only'", errors) + def test_rejects_live_execution_claim(self) -> None: fixture = build_reference_adapter_fixture(_invocation(), self_report=_result()) fixture["adapter"]["executes_commands"] = True diff --git a/tests/test_agent_fabric_report_assurance.py b/tests/test_agent_fabric_report_assurance.py index 4bae82b..ceeb1b5 100644 --- a/tests/test_agent_fabric_report_assurance.py +++ b/tests/test_agent_fabric_report_assurance.py @@ -2,14 +2,19 @@ from __future__ import annotations +import argparse import hashlib import json +import tempfile import unittest +from pathlib import Path from depone.agent_fabric.capture_bridge import build_capture_manifest from depone.agent_fabric.reference_adapter import build_reference_adapter_fixture from depone.verify.adapters.base import EvidenceContext, EvidenceFile +from depone.verify import run as run_verify from depone.verify.engine import run_verification +from depone.verify.operator_view import render_operator_view def _sha(text: str) -> str: @@ -127,6 +132,59 @@ def test_self_report_only_capture_stays_a0(self) -> None: self.assertEqual(report.decision, "pass") self.assertEqual(report.assurance, "A0-claims-only") + + def test_a1_capture_cannot_bypass_missing_evidence_contract(self) -> None: + files = [ + evidence_file + for evidence_file in _base_evidence_files(_a1_manifest()) + if evidence_file.path != "evidence-contract.json" + ] + evidence = EvidenceContext( + run_id="assurance-test-run", + files=files, + raw={"metadata": {"run_id": "assurance-test-run"}}, + ) + + report = run_verification(_plan(), evidence) + + self.assertEqual(report.verdict, "refuted") + self.assertEqual(report.decision, "fail") + self.assertEqual(report.assurance, "A1-local-observed") + self.assertTrue( + any( + entry.code == "ERR_EVIDENCE_CONTRACT_MISSING" + for entry in report.evidence_contract + ), + report.evidence_contract, + ) + + def test_a1_capture_cannot_bypass_invalid_evidence_contract(self) -> None: + files = _base_evidence_files(_a1_manifest()) + files = [ + _file("evidence-contract.json", "{}") + if evidence_file.path == "evidence-contract.json" + else evidence_file + for evidence_file in files + ] + evidence = EvidenceContext( + run_id="assurance-test-run", + files=files, + raw={"metadata": {"run_id": "assurance-test-run"}}, + ) + + report = run_verification(_plan(), evidence) + + self.assertEqual(report.verdict, "refuted") + self.assertEqual(report.decision, "fail") + self.assertEqual(report.assurance, "A1-local-observed") + self.assertTrue( + any( + entry.code == "ERR_EVIDENCE_CONTRACT_INVALID" + for entry in report.evidence_contract + ), + report.evidence_contract, + ) + def test_invalid_capture_manifest_refutes_report_without_hiding_errors(self) -> None: manifest = _a1_manifest() manifest["observer_capture"]["test_output"]["summary"] = "tampered" @@ -143,6 +201,82 @@ def test_invalid_capture_manifest_refutes_report_without_hiding_errors(self) -> self.assertEqual(report.agent_fabric_captures[0].valid, False) self.assertTrue(report.agent_fabric_captures[0].errors) + def test_operator_view_renders_v110_report_fields(self) -> None: + evidence = EvidenceContext( + run_id="assurance-test-run", + files=_base_evidence_files(_a1_manifest()), + raw={"metadata": {"run_id": "assurance-test-run"}}, + ) + + view = render_operator_view(run_verification(_plan(), evidence)) + + self.assertIn("- Decision: pass", view) + self.assertIn("- Assurance: A1-local-observed", view) + self.assertIn("- Agent Fabric captures: 1", view) + self.assertIn("`agent-fabric-capture-manifest.json`", view) + self.assertIn(" - Valid: yes", view) + + def test_operator_view_renders_empty_capture_list(self) -> None: + evidence = EvidenceContext( + run_id="assurance-test-run", + files=_base_evidence_files({}), + raw={"metadata": {"run_id": "assurance-test-run"}}, + ) + + view = render_operator_view(run_verification(_plan(), evidence)) + + self.assertIn("- Decision: pass", view) + self.assertIn("- Assurance: A0-claims-only", view) + self.assertIn("- Agent Fabric captures: 0", view) + self.assertIn("- None", view) + + def test_operator_view_renders_invalid_capture_errors(self) -> None: + manifest = _a1_manifest() + manifest["observer_capture"]["test_output"]["summary"] = "tampered" + evidence = EvidenceContext( + run_id="assurance-test-run", + files=_base_evidence_files(manifest), + raw={"metadata": {"run_id": "assurance-test-run"}}, + ) + + view = render_operator_view(run_verification(_plan(), evidence)) + + self.assertIn("- Decision: fail", view) + self.assertIn(" - Valid: no", view) + self.assertIn(" - Errors:", view) + self.assertIn("observer_capture_hash mismatch", view) + + def test_verify_cli_writes_operator_view_output(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + plan_path = root / "plan.json" + evidence_dir = root / "evidence" + report_path = root / "verification-report.json" + view_path = root / "operator-view.md" + evidence_dir.mkdir() + plan_path.write_text(json.dumps(_plan()), encoding="utf-8") + for evidence_file in _base_evidence_files(_a1_manifest()): + target = evidence_dir / evidence_file.path + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(evidence_file.content, encoding="utf-8") + + run_verify( + argparse.Namespace( + self_test=False, + plan=str(plan_path), + evidence=str(evidence_dir), + adapter="generic", + out=str(report_path), + operator_view_out=str(view_path), + ) + ) + + self.assertTrue(report_path.is_file()) + view = view_path.read_text(encoding="utf-8") + self.assertIn("# Verification Operator View", view) + self.assertIn("- Decision: pass", view) + self.assertIn("- Assurance: A1-local-observed", view) + if __name__ == "__main__": unittest.main()