From bebe4c6389ae4ddc29b07f1cf376519288475577 Mon Sep 17 00:00:00 2001 From: Brian McMahon Date: Thu, 4 Jun 2026 13:39:02 -0700 Subject: [PATCH] feat(sf): wire the non-fatal Director ReportCard state into the Saturday pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inserts a ReportCard Lambda state into the Saturday SF after the substrate health check (so it reads fresh grades), invoking alpha-engine-evaluator:live to build s3://alpha-engine-research/evaluator/{date}/report_card.json — the 7-tile Report Card v2 substrate the Director will consume. NON-FATAL: its own Catch routes to CheckShellRunNotify, so an advisory grading failure never breaks the run that produced the real trading artifacts (director-plan §5). - step_function.json: WaitForWeeklySubstrateHealthCheck.Next CheckShellRunNotify → ReportCard; new ReportCard Task (FunctionName alpha-engine-evaluator:live, Payload {date.$: $.run_date}, 300s, retry, Catch→CheckShellRunNotify). - iam/alpha-engine-step-functions-role.json: add lambda:InvokeFunction on arn:...:function:alpha-engine-evaluator* (applied live pre-merge per the codified-IAM convention; drift-check passes). - Update the 3 SF-wiring guardrail tests + the payload registry for the new state (these are the chokepoints that fail-loud on a wiring change). Applied live this session: IAM grant applied + update-state-machine on alpha-engine-saturday-pipeline (verified ReportCard present + wired). Full suite 1804→1807 passed. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../iam/alpha-engine-step-functions-role.json | 3 +- infrastructure/step_function.json | 35 +++++++++++++++++++ tests/test_sf_friday_shell_run_wiring.py | 10 ++++-- tests/test_sf_payload_uniqueness.py | 3 ++ tests/test_sf_substrate_check_wiring.py | 9 +++-- 5 files changed, 55 insertions(+), 5 deletions(-) diff --git a/infrastructure/iam/alpha-engine-step-functions-role.json b/infrastructure/iam/alpha-engine-step-functions-role.json index 732a355..c290b78 100644 --- a/infrastructure/iam/alpha-engine-step-functions-role.json +++ b/infrastructure/iam/alpha-engine-step-functions-role.json @@ -17,7 +17,8 @@ "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-predictor-inference*", "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-predictor-health-check*", "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-predictor-regime-substrate*", - "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-predictor-regime-retrospective-eval*" + "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-predictor-regime-retrospective-eval*", + "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-evaluator*" ] }, { diff --git a/infrastructure/step_function.json b/infrastructure/step_function.json index fcfec2b..462ac74 100644 --- a/infrastructure/step_function.json +++ b/infrastructure/step_function.json @@ -2252,6 +2252,41 @@ } ], "ResultPath": "$.substrate_check_poll", + "Next": "ReportCard" + }, + "ReportCard": { + "Type": "Task", + "Comment": "Evaluator Report Card v2 (Layer B, Option B) — builds s3://alpha-engine-research/evaluator/{date}/report_card.json from the persisted per-module artifacts (the 7-tile MetricRecord substrate the Director will consume). Runs here, after the Evaluator + substrate health checks, so it reads fresh grades. NON-FATAL: its own Catch routes to CheckShellRunNotify so an advisory grading failure never breaks the run that produced the real trading artifacts.", + "Resource": "arn:aws:states:::lambda:invoke", + "Parameters": { + "FunctionName": "alpha-engine-evaluator:live", + "Payload": { + "date.$": "$.run_date" + } + }, + "TimeoutSeconds": 300, + "Retry": [ + { + "ErrorEquals": [ + "Lambda.ServiceException", + "Lambda.TooManyRequestsException" + ], + "MaxAttempts": 1, + "IntervalSeconds": 30, + "BackoffRate": 1.0 + } + ], + "Catch": [ + { + "ErrorEquals": [ + "States.ALL" + ], + "Comment": "Advisory grading is non-fatal — continue to the terminal notify regardless.", + "Next": "CheckShellRunNotify", + "ResultPath": "$.report_card_error" + } + ], + "ResultPath": "$.report_card_result", "Next": "CheckShellRunNotify" }, "CheckShellRunNotify": { diff --git a/tests/test_sf_friday_shell_run_wiring.py b/tests/test_sf_friday_shell_run_wiring.py index 9599204..5d2ff47 100644 --- a/tests/test_sf_friday_shell_run_wiring.py +++ b/tests/test_sf_friday_shell_run_wiring.py @@ -674,10 +674,16 @@ def test_dry_lambda_payload_references_control_var( class TestConsolidatedNotify: def test_substrate_check_routes_to_notify_gate(self, states): + # The substrate check now flows through the non-fatal ReportCard state + # (evaluator Report Card v2) before the notify gate; ReportCard's success + # Next AND its Catch both land on CheckShellRunNotify, so the path to the + # notify gate is preserved whether grading succeeds or fails. assert ( - states["WaitForWeeklySubstrateHealthCheck"]["Next"] - == "CheckShellRunNotify" + states["WaitForWeeklySubstrateHealthCheck"]["Next"] == "ReportCard" ) + report_card = states["ReportCard"] + assert report_card["Next"] == "CheckShellRunNotify" + assert all(c["Next"] == "CheckShellRunNotify" for c in report_card["Catch"]) def test_shell_run_notify_reuses_sns_substrate(self, states): """NotifyShellRunComplete surfaces the user-facing 'Saturday diff --git a/tests/test_sf_payload_uniqueness.py b/tests/test_sf_payload_uniqueness.py index 1d53c9d..c6d2e4b 100644 --- a/tests/test_sf_payload_uniqueness.py +++ b/tests/test_sf_payload_uniqueness.py @@ -103,6 +103,9 @@ def _flatten_states(sf_doc: dict) -> dict: {"dry_run_llm.$", "end_time_iso.$", "max_depth", "window_days"} ), "AggregateCosts": frozenset({"date.$", "dry_run_llm.$"}), + # Evaluator Report Card v2 (Layer B) — alpha-engine-evaluator:live. Builds + # evaluator/{date}/report_card.json; non-fatal (own Catch → notify gate). + "ReportCard": frozenset({"date.$"}), } # Weekday SF — alpha-engine-predictor Lambdas diff --git a/tests/test_sf_substrate_check_wiring.py b/tests/test_sf_substrate_check_wiring.py index 783a220..be26020 100644 --- a/tests/test_sf_substrate_check_wiring.py +++ b/tests/test_sf_substrate_check_wiring.py @@ -76,10 +76,15 @@ def test_wait_for_substrate_routes_to_notify_complete(self, states): # dry-pass gets a shell-run-tagged email. The gate's Default is the # unchanged NotifyComplete, so the REAL Saturday run (no shell_run # input) still ends at NotifyComplete — strict superset preserved. + # + # The non-fatal ReportCard state (evaluator Report Card v2) now sits + # between the substrate poll and the notify gate; both its Next and its + # Catch land on CheckShellRunNotify, preserving the success edge. assert ( - states["WaitForWeeklySubstrateHealthCheck"]["Next"] - == "CheckShellRunNotify" + states["WaitForWeeklySubstrateHealthCheck"]["Next"] == "ReportCard" ) + assert states["ReportCard"]["Next"] == "CheckShellRunNotify" + assert all(c["Next"] == "CheckShellRunNotify" for c in states["ReportCard"]["Catch"]) assert states["CheckShellRunNotify"]["Default"] == "NotifyComplete"