diff --git a/infrastructure/step_function.json b/infrastructure/step_function.json index bd2d12d..c4b1441 100644 --- a/infrastructure/step_function.json +++ b/infrastructure/step_function.json @@ -2252,7 +2252,27 @@ } ], "ResultPath": "$.substrate_check_poll", - "Next": "ReportCard" + "Next": "CheckShellRunSkipDirector" + }, + "CheckShellRunSkipDirector": { + "Type": "Choice", + "Comment": "Shell-run / preflight guard for the advisory tail (ROADMAP L4504). The Friday-PM Preflight Pipeline (shell_run=true) dry-executes the Saturday SF to exercise bootstrap paths — but ReportCard + Director have NO dry path (their payload is only {date}; the Director Lambda gates solely on DIRECTOR_ENABLED). Left ungated, a Friday preflight would run ReportCard for real over backtest/{Fri-date}/* that the dry workload never wrote (→ a degenerate, mostly-N/A card) and, once DIRECTOR_ENABLED is flipped on, fire a REAL Opus Director call that merges that degenerate plan into the SHARED, non-date-scoped carry-over ledger (director/carryover_ledger.json) — polluting the state the real Saturday run reads. So on shell_run=true we hard-skip BOTH advisory states straight to the shell-run-aware notify; the preflight's purpose is bootstrap exercise and both Lambdas already have their own canaries. shell_run absent/false → Default → ReportCard, BYTE-IDENTICAL to the pre-guard real Saturday run.", + "Choices": [ + { + "And": [ + { + "Variable": "$.shell_run", + "IsPresent": true + }, + { + "Variable": "$.shell_run", + "BooleanEquals": true + } + ], + "Next": "CheckShellRunNotify" + } + ], + "Default": "ReportCard" }, "ReportCard": { "Type": "Task", diff --git a/tests/test_sf_friday_shell_run_wiring.py b/tests/test_sf_friday_shell_run_wiring.py index 904f2e1..e0d4b4d 100644 --- a/tests/test_sf_friday_shell_run_wiring.py +++ b/tests/test_sf_friday_shell_run_wiring.py @@ -674,15 +674,19 @@ def test_dry_lambda_payload_references_control_var( class TestConsolidatedNotify: def test_substrate_check_routes_to_notify_gate(self, states): - # The substrate check now flows through two non-fatal advisory states - # (evaluator Report Card v2, then the Director) before the notify gate. - # ReportCard's SUCCESS Next feeds the Director; its Catch skips straight - # to CheckShellRunNotify. The Director's own Next AND Catch both land on - # CheckShellRunNotify, so the path to the notify gate is preserved - # whether grading/advisory succeed or fail. + # The substrate check now flows through the shell-run guard + # (CheckShellRunSkipDirector, ROADMAP L4504) and then two non-fatal + # advisory states (evaluator Report Card v2, then the Director) before + # the notify gate. On a real Saturday run the guard's Default routes to + # ReportCard; ReportCard's SUCCESS Next feeds the Director; its Catch + # skips straight to CheckShellRunNotify. The Director's own Next AND + # Catch both land on CheckShellRunNotify, so the path to the notify gate + # is preserved whether grading/advisory succeed or fail. assert ( - states["WaitForWeeklySubstrateHealthCheck"]["Next"] == "ReportCard" + states["WaitForWeeklySubstrateHealthCheck"]["Next"] + == "CheckShellRunSkipDirector" ) + assert states["CheckShellRunSkipDirector"]["Default"] == "ReportCard" report_card = states["ReportCard"] assert report_card["Next"] == "Director" assert all(c["Next"] == "CheckShellRunNotify" for c in report_card["Catch"]) @@ -690,6 +694,31 @@ def test_substrate_check_routes_to_notify_gate(self, states): assert director["Next"] == "CheckShellRunNotify" assert all(c["Next"] == "CheckShellRunNotify" for c in director["Catch"]) + def test_shell_run_skips_advisory_tail(self, states): + """ROADMAP L4504: on a Friday-PM Preflight Pipeline (shell_run=true) the + advisory tail (ReportCard + Director) MUST be hard-skipped straight to + the notify gate. Left ungated, the Director would run a real Opus call + over a degenerate preflight card and pollute the shared, non-date-scoped + carry-over ledger (director/carryover_ledger.json) that the real + Saturday run reads. The guard sits between the substrate health check + and ReportCard so BOTH advisory states are bypassed on the preflight. + """ + gate = states["CheckShellRunSkipDirector"] + assert gate["Type"] == "Choice" + # shell_run absent/false → Default → ReportCard (byte-identical Saturday). + assert gate["Default"] == "ReportCard" + # shell_run present AND true → skip the whole advisory tail to notify. + choices = gate["Choices"] + assert len(choices) == 1 + assert choices[0]["Next"] == "CheckShellRunNotify" + conds = choices[0]["And"] + assert {c["Variable"] for c in conds} == {"$.shell_run"} + assert any(c.get("IsPresent") is True for c in conds) + assert any(c.get("BooleanEquals") is True for c in conds) + # The skip target is the notify gate, NOT ReportCard/Director. + assert "ReportCard" not in {choices[0]["Next"]} + assert "Director" not in {choices[0]["Next"]} + def test_shell_run_notify_reuses_sns_substrate(self, states): """NotifyShellRunComplete surfaces the user-facing 'Saturday Preflight Pipeline' label so a green Friday dry-pass is diff --git a/tests/test_sf_substrate_check_wiring.py b/tests/test_sf_substrate_check_wiring.py index 22aede8..d8df8ba 100644 --- a/tests/test_sf_substrate_check_wiring.py +++ b/tests/test_sf_substrate_check_wiring.py @@ -77,14 +77,25 @@ def test_wait_for_substrate_routes_to_notify_complete(self, states): # unchanged NotifyComplete, so the REAL Saturday run (no shell_run # input) still ends at NotifyComplete — strict superset preserved. # + # The shell-run guard (CheckShellRunSkipDirector, ROADMAP L4504) now + # sits first: on a Friday-PM preflight (shell_run=true) it skips the + # whole advisory tail straight to CheckShellRunNotify; its Default routes + # the REAL Saturday run (no shell_run) to ReportCard, unchanged. + # # Two non-fatal advisory states (evaluator Report Card v2, then the - # Director) now sit between the substrate poll and the notify gate. - # ReportCard's SUCCESS edge feeds the Director (which weighs the fresh - # card); ReportCard's Catch skips the Director straight to notify (no - # card to weigh). The Director's own Next AND Catch both land on + # Director) sit between the guard and the notify gate. ReportCard's + # SUCCESS edge feeds the Director (which weighs the fresh card); + # ReportCard's Catch skips the Director straight to notify (no card to + # weigh). The Director's own Next AND Catch both land on # CheckShellRunNotify, so every path still preserves the success edge. assert ( - states["WaitForWeeklySubstrateHealthCheck"]["Next"] == "ReportCard" + states["WaitForWeeklySubstrateHealthCheck"]["Next"] + == "CheckShellRunSkipDirector" + ) + assert states["CheckShellRunSkipDirector"]["Default"] == "ReportCard" + assert ( + states["CheckShellRunSkipDirector"]["Choices"][0]["Next"] + == "CheckShellRunNotify" ) assert states["ReportCard"]["Next"] == "Director" assert all(c["Next"] == "CheckShellRunNotify" for c in states["ReportCard"]["Catch"])