diff --git a/infrastructure/iam/alpha-engine-step-functions-role.json b/infrastructure/iam/alpha-engine-step-functions-role.json index 732a355..c290b78 100644 --- a/infrastructure/iam/alpha-engine-step-functions-role.json +++ b/infrastructure/iam/alpha-engine-step-functions-role.json @@ -17,7 +17,8 @@ "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-predictor-inference*", "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-predictor-health-check*", "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-predictor-regime-substrate*", - "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-predictor-regime-retrospective-eval*" + "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-predictor-regime-retrospective-eval*", + "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-evaluator*" ] }, { diff --git a/infrastructure/step_function.json b/infrastructure/step_function.json index fcfec2b..462ac74 100644 --- a/infrastructure/step_function.json +++ b/infrastructure/step_function.json @@ -2252,6 +2252,41 @@ } ], "ResultPath": "$.substrate_check_poll", + "Next": "ReportCard" + }, + "ReportCard": { + "Type": "Task", + "Comment": "Evaluator Report Card v2 (Layer B, Option B) — builds s3://alpha-engine-research/evaluator/{date}/report_card.json from the persisted per-module artifacts (the 7-tile MetricRecord substrate the Director will consume). Runs here, after the Evaluator + substrate health checks, so it reads fresh grades. NON-FATAL: its own Catch routes to CheckShellRunNotify so an advisory grading failure never breaks the run that produced the real trading artifacts.", + "Resource": "arn:aws:states:::lambda:invoke", + "Parameters": { + "FunctionName": "alpha-engine-evaluator:live", + "Payload": { + "date.$": "$.run_date" + } + }, + "TimeoutSeconds": 300, + "Retry": [ + { + "ErrorEquals": [ + "Lambda.ServiceException", + "Lambda.TooManyRequestsException" + ], + "MaxAttempts": 1, + "IntervalSeconds": 30, + "BackoffRate": 1.0 + } + ], + "Catch": [ + { + "ErrorEquals": [ + "States.ALL" + ], + "Comment": "Advisory grading is non-fatal — continue to the terminal notify regardless.", + "Next": "CheckShellRunNotify", + "ResultPath": "$.report_card_error" + } + ], + "ResultPath": "$.report_card_result", "Next": "CheckShellRunNotify" }, "CheckShellRunNotify": { diff --git a/tests/test_sf_friday_shell_run_wiring.py b/tests/test_sf_friday_shell_run_wiring.py index 9599204..5d2ff47 100644 --- a/tests/test_sf_friday_shell_run_wiring.py +++ b/tests/test_sf_friday_shell_run_wiring.py @@ -674,10 +674,16 @@ def test_dry_lambda_payload_references_control_var( class TestConsolidatedNotify: def test_substrate_check_routes_to_notify_gate(self, states): + # The substrate check now flows through the non-fatal ReportCard state + # (evaluator Report Card v2) before the notify gate; ReportCard's success + # Next AND its Catch both land on CheckShellRunNotify, so the path to the + # notify gate is preserved whether grading succeeds or fails. assert ( - states["WaitForWeeklySubstrateHealthCheck"]["Next"] - == "CheckShellRunNotify" + states["WaitForWeeklySubstrateHealthCheck"]["Next"] == "ReportCard" ) + report_card = states["ReportCard"] + assert report_card["Next"] == "CheckShellRunNotify" + assert all(c["Next"] == "CheckShellRunNotify" for c in report_card["Catch"]) def test_shell_run_notify_reuses_sns_substrate(self, states): """NotifyShellRunComplete surfaces the user-facing 'Saturday diff --git a/tests/test_sf_payload_uniqueness.py b/tests/test_sf_payload_uniqueness.py index 1d53c9d..c6d2e4b 100644 --- a/tests/test_sf_payload_uniqueness.py +++ b/tests/test_sf_payload_uniqueness.py @@ -103,6 +103,9 @@ def _flatten_states(sf_doc: dict) -> dict: {"dry_run_llm.$", "end_time_iso.$", "max_depth", "window_days"} ), "AggregateCosts": frozenset({"date.$", "dry_run_llm.$"}), + # Evaluator Report Card v2 (Layer B) — alpha-engine-evaluator:live. Builds + # evaluator/{date}/report_card.json; non-fatal (own Catch → notify gate). + "ReportCard": frozenset({"date.$"}), } # Weekday SF — alpha-engine-predictor Lambdas diff --git a/tests/test_sf_substrate_check_wiring.py b/tests/test_sf_substrate_check_wiring.py index 783a220..be26020 100644 --- a/tests/test_sf_substrate_check_wiring.py +++ b/tests/test_sf_substrate_check_wiring.py @@ -76,10 +76,15 @@ def test_wait_for_substrate_routes_to_notify_complete(self, states): # dry-pass gets a shell-run-tagged email. The gate's Default is the # unchanged NotifyComplete, so the REAL Saturday run (no shell_run # input) still ends at NotifyComplete — strict superset preserved. + # + # The non-fatal ReportCard state (evaluator Report Card v2) now sits + # between the substrate poll and the notify gate; both its Next and its + # Catch land on CheckShellRunNotify, preserving the success edge. assert ( - states["WaitForWeeklySubstrateHealthCheck"]["Next"] - == "CheckShellRunNotify" + states["WaitForWeeklySubstrateHealthCheck"]["Next"] == "ReportCard" ) + assert states["ReportCard"]["Next"] == "CheckShellRunNotify" + assert all(c["Next"] == "CheckShellRunNotify" for c in states["ReportCard"]["Catch"]) assert states["CheckShellRunNotify"]["Default"] == "NotifyComplete"