Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion infrastructure/step_function.json
Original file line number Diff line number Diff line change
Expand Up @@ -2252,7 +2252,27 @@
}
],
"ResultPath": "$.substrate_check_poll",
"Next": "ReportCard"
"Next": "CheckShellRunSkipDirector"
},
"CheckShellRunSkipDirector": {
"Type": "Choice",
"Comment": "Shell-run / preflight guard for the advisory tail (ROADMAP L4504). The Friday-PM Preflight Pipeline (shell_run=true) dry-executes the Saturday SF to exercise bootstrap paths — but ReportCard + Director have NO dry path (their payload is only {date}; the Director Lambda gates solely on DIRECTOR_ENABLED). Left ungated, a Friday preflight would run ReportCard for real over backtest/{Fri-date}/* that the dry workload never wrote (→ a degenerate, mostly-N/A card) and, once DIRECTOR_ENABLED is flipped on, fire a REAL Opus Director call that merges that degenerate plan into the SHARED, non-date-scoped carry-over ledger (director/carryover_ledger.json) — polluting the state the real Saturday run reads. So on shell_run=true we hard-skip BOTH advisory states straight to the shell-run-aware notify; the preflight's purpose is bootstrap exercise and both Lambdas already have their own canaries. shell_run absent/false → Default → ReportCard, BYTE-IDENTICAL to the pre-guard real Saturday run.",
"Choices": [
{
"And": [
{
"Variable": "$.shell_run",
"IsPresent": true
},
{
"Variable": "$.shell_run",
"BooleanEquals": true
}
],
"Next": "CheckShellRunNotify"
}
],
"Default": "ReportCard"
},
"ReportCard": {
"Type": "Task",
Expand Down
43 changes: 36 additions & 7 deletions tests/test_sf_friday_shell_run_wiring.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,22 +674,51 @@ def test_dry_lambda_payload_references_control_var(

class TestConsolidatedNotify:
def test_substrate_check_routes_to_notify_gate(self, states):
# The substrate check now flows through two non-fatal advisory states
# (evaluator Report Card v2, then the Director) before the notify gate.
# ReportCard's SUCCESS Next feeds the Director; its Catch skips straight
# to CheckShellRunNotify. The Director's own Next AND Catch both land on
# CheckShellRunNotify, so the path to the notify gate is preserved
# whether grading/advisory succeed or fail.
# The substrate check now flows through the shell-run guard
# (CheckShellRunSkipDirector, ROADMAP L4504) and then two non-fatal
# advisory states (evaluator Report Card v2, then the Director) before
# the notify gate. On a real Saturday run the guard's Default routes to
# ReportCard; ReportCard's SUCCESS Next feeds the Director; its Catch
# skips straight to CheckShellRunNotify. The Director's own Next AND
# Catch both land on CheckShellRunNotify, so the path to the notify gate
# is preserved whether grading/advisory succeed or fail.
assert (
states["WaitForWeeklySubstrateHealthCheck"]["Next"] == "ReportCard"
states["WaitForWeeklySubstrateHealthCheck"]["Next"]
== "CheckShellRunSkipDirector"
)
assert states["CheckShellRunSkipDirector"]["Default"] == "ReportCard"
report_card = states["ReportCard"]
assert report_card["Next"] == "Director"
assert all(c["Next"] == "CheckShellRunNotify" for c in report_card["Catch"])
director = states["Director"]
assert director["Next"] == "CheckShellRunNotify"
assert all(c["Next"] == "CheckShellRunNotify" for c in director["Catch"])

def test_shell_run_skips_advisory_tail(self, states):
"""ROADMAP L4504: on a Friday-PM Preflight Pipeline (shell_run=true) the
advisory tail (ReportCard + Director) MUST be hard-skipped straight to
the notify gate. Left ungated, the Director would run a real Opus call
over a degenerate preflight card and pollute the shared, non-date-scoped
carry-over ledger (director/carryover_ledger.json) that the real
Saturday run reads. The guard sits between the substrate health check
and ReportCard so BOTH advisory states are bypassed on the preflight.
"""
gate = states["CheckShellRunSkipDirector"]
assert gate["Type"] == "Choice"
# shell_run absent/false → Default → ReportCard (byte-identical Saturday).
assert gate["Default"] == "ReportCard"
# shell_run present AND true → skip the whole advisory tail to notify.
choices = gate["Choices"]
assert len(choices) == 1
assert choices[0]["Next"] == "CheckShellRunNotify"
conds = choices[0]["And"]
assert {c["Variable"] for c in conds} == {"$.shell_run"}
assert any(c.get("IsPresent") is True for c in conds)
assert any(c.get("BooleanEquals") is True for c in conds)
# The skip target is the notify gate, NOT ReportCard/Director.
assert "ReportCard" not in {choices[0]["Next"]}
assert "Director" not in {choices[0]["Next"]}

def test_shell_run_notify_reuses_sns_substrate(self, states):
"""NotifyShellRunComplete surfaces the user-facing 'Saturday
Preflight Pipeline' label so a green Friday dry-pass is
Expand Down
21 changes: 16 additions & 5 deletions tests/test_sf_substrate_check_wiring.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,25 @@ def test_wait_for_substrate_routes_to_notify_complete(self, states):
# unchanged NotifyComplete, so the REAL Saturday run (no shell_run
# input) still ends at NotifyComplete — strict superset preserved.
#
# The shell-run guard (CheckShellRunSkipDirector, ROADMAP L4504) now
# sits first: on a Friday-PM preflight (shell_run=true) it skips the
# whole advisory tail straight to CheckShellRunNotify; its Default routes
# the REAL Saturday run (no shell_run) to ReportCard, unchanged.
#
# Two non-fatal advisory states (evaluator Report Card v2, then the
# Director) now sit between the substrate poll and the notify gate.
# ReportCard's SUCCESS edge feeds the Director (which weighs the fresh
# card); ReportCard's Catch skips the Director straight to notify (no
# card to weigh). The Director's own Next AND Catch both land on
# Director) sit between the guard and the notify gate. ReportCard's
# SUCCESS edge feeds the Director (which weighs the fresh card);
# ReportCard's Catch skips the Director straight to notify (no card to
# weigh). The Director's own Next AND Catch both land on
# CheckShellRunNotify, so every path still preserves the success edge.
assert (
states["WaitForWeeklySubstrateHealthCheck"]["Next"] == "ReportCard"
states["WaitForWeeklySubstrateHealthCheck"]["Next"]
== "CheckShellRunSkipDirector"
)
assert states["CheckShellRunSkipDirector"]["Default"] == "ReportCard"
assert (
states["CheckShellRunSkipDirector"]["Choices"][0]["Next"]
== "CheckShellRunNotify"
)
assert states["ReportCard"]["Next"] == "Director"
assert all(c["Next"] == "CheckShellRunNotify" for c in states["ReportCard"]["Catch"])
Expand Down
Loading