From bebe4c6389ae4ddc29b07f1cf376519288475577 Mon Sep 17 00:00:00 2001
From: Brian McMahon <brian@nousergon.ai>
Date: Thu, 4 Jun 2026 13:39:02 -0700
Subject: [PATCH] feat(sf): wire the non-fatal Director ReportCard state into
 the Saturday pipeline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Inserts a ReportCard Lambda state into the Saturday SF after the substrate
health check (so it reads fresh grades), invoking alpha-engine-evaluator:live to
build s3://alpha-engine-research/evaluator/{date}/report_card.json — the 7-tile
Report Card v2 substrate the Director will consume. NON-FATAL: its own Catch
routes to CheckShellRunNotify, so an advisory grading failure never breaks the
run that produced the real trading artifacts (director-plan §5).

- step_function.json: WaitForWeeklySubstrateHealthCheck.Next CheckShellRunNotify
  → ReportCard; new ReportCard Task (FunctionName alpha-engine-evaluator:live,
  Payload {date.$: $.run_date}, 300s, retry, Catch→CheckShellRunNotify).
- iam/alpha-engine-step-functions-role.json: add lambda:InvokeFunction on
  arn:...:function:alpha-engine-evaluator* (applied live pre-merge per the
  codified-IAM convention; drift-check passes).
- Update the 3 SF-wiring guardrail tests + the payload registry for the new
  state (these are the chokepoints that fail-loud on a wiring change).

Applied live this session: IAM grant applied + update-state-machine on
alpha-engine-saturday-pipeline (verified ReportCard present + wired). Full suite
1804→1807 passed.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../iam/alpha-engine-step-functions-role.json |  3 +-
 infrastructure/step_function.json             | 35 +++++++++++++++++++
 tests/test_sf_friday_shell_run_wiring.py      | 10 ++++--
 tests/test_sf_payload_uniqueness.py           |  3 ++
 tests/test_sf_substrate_check_wiring.py       |  9 +++--
 5 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/infrastructure/iam/alpha-engine-step-functions-role.json b/infrastructure/iam/alpha-engine-step-functions-role.json
index 732a355..c290b78 100644
--- a/infrastructure/iam/alpha-engine-step-functions-role.json
+++ b/infrastructure/iam/alpha-engine-step-functions-role.json
@@ -17,7 +17,8 @@
                 "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-predictor-inference*",
                 "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-predictor-health-check*",
                 "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-predictor-regime-substrate*",
-                "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-predictor-regime-retrospective-eval*"
+                "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-predictor-regime-retrospective-eval*",
+                "arn:aws:lambda:us-east-1:711398986525:function:alpha-engine-evaluator*"
             ]
         },
         {
diff --git a/infrastructure/step_function.json b/infrastructure/step_function.json
index fcfec2b..462ac74 100644
--- a/infrastructure/step_function.json
+++ b/infrastructure/step_function.json
@@ -2252,6 +2252,41 @@
         }
       ],
       "ResultPath": "$.substrate_check_poll",
+      "Next": "ReportCard"
+    },
+    "ReportCard": {
+      "Type": "Task",
+      "Comment": "Evaluator Report Card v2 (Layer B, Option B) — builds s3://alpha-engine-research/evaluator/{date}/report_card.json from the persisted per-module artifacts (the 7-tile MetricRecord substrate the Director will consume). Runs here, after the Evaluator + substrate health checks, so it reads fresh grades. NON-FATAL: its own Catch routes to CheckShellRunNotify so an advisory grading failure never breaks the run that produced the real trading artifacts.",
+      "Resource": "arn:aws:states:::lambda:invoke",
+      "Parameters": {
+        "FunctionName": "alpha-engine-evaluator:live",
+        "Payload": {
+          "date.$": "$.run_date"
+        }
+      },
+      "TimeoutSeconds": 300,
+      "Retry": [
+        {
+          "ErrorEquals": [
+            "Lambda.ServiceException",
+            "Lambda.TooManyRequestsException"
+          ],
+          "MaxAttempts": 1,
+          "IntervalSeconds": 30,
+          "BackoffRate": 1.0
+        }
+      ],
+      "Catch": [
+        {
+          "ErrorEquals": [
+            "States.ALL"
+          ],
+          "Comment": "Advisory grading is non-fatal — continue to the terminal notify regardless.",
+          "Next": "CheckShellRunNotify",
+          "ResultPath": "$.report_card_error"
+        }
+      ],
+      "ResultPath": "$.report_card_result",
       "Next": "CheckShellRunNotify"
     },
     "CheckShellRunNotify": {
diff --git a/tests/test_sf_friday_shell_run_wiring.py b/tests/test_sf_friday_shell_run_wiring.py
index 9599204..5d2ff47 100644
--- a/tests/test_sf_friday_shell_run_wiring.py
+++ b/tests/test_sf_friday_shell_run_wiring.py
@@ -674,10 +674,16 @@ def test_dry_lambda_payload_references_control_var(
 
 class TestConsolidatedNotify:
     def test_substrate_check_routes_to_notify_gate(self, states):
+        # The substrate check now flows through the non-fatal ReportCard state
+        # (evaluator Report Card v2) before the notify gate; ReportCard's success
+        # Next AND its Catch both land on CheckShellRunNotify, so the path to the
+        # notify gate is preserved whether grading succeeds or fails.
         assert (
-            states["WaitForWeeklySubstrateHealthCheck"]["Next"]
-            == "CheckShellRunNotify"
+            states["WaitForWeeklySubstrateHealthCheck"]["Next"] == "ReportCard"
         )
+        report_card = states["ReportCard"]
+        assert report_card["Next"] == "CheckShellRunNotify"
+        assert all(c["Next"] == "CheckShellRunNotify" for c in report_card["Catch"])
 
     def test_shell_run_notify_reuses_sns_substrate(self, states):
         """NotifyShellRunComplete surfaces the user-facing 'Saturday
diff --git a/tests/test_sf_payload_uniqueness.py b/tests/test_sf_payload_uniqueness.py
index 1d53c9d..c6d2e4b 100644
--- a/tests/test_sf_payload_uniqueness.py
+++ b/tests/test_sf_payload_uniqueness.py
@@ -103,6 +103,9 @@ def _flatten_states(sf_doc: dict) -> dict:
         {"dry_run_llm.$", "end_time_iso.$", "max_depth", "window_days"}
     ),
     "AggregateCosts": frozenset({"date.$", "dry_run_llm.$"}),
+    # Evaluator Report Card v2 (Layer B) — alpha-engine-evaluator:live. Builds
+    # evaluator/{date}/report_card.json; non-fatal (own Catch → notify gate).
+    "ReportCard": frozenset({"date.$"}),
 }
 
 # Weekday SF — alpha-engine-predictor Lambdas
diff --git a/tests/test_sf_substrate_check_wiring.py b/tests/test_sf_substrate_check_wiring.py
index 783a220..be26020 100644
--- a/tests/test_sf_substrate_check_wiring.py
+++ b/tests/test_sf_substrate_check_wiring.py
@@ -76,10 +76,15 @@ def test_wait_for_substrate_routes_to_notify_complete(self, states):
         # dry-pass gets a shell-run-tagged email. The gate's Default is the
         # unchanged NotifyComplete, so the REAL Saturday run (no shell_run
         # input) still ends at NotifyComplete — strict superset preserved.
+        #
+        # The non-fatal ReportCard state (evaluator Report Card v2) now sits
+        # between the substrate poll and the notify gate; both its Next and its
+        # Catch land on CheckShellRunNotify, preserving the success edge.
         assert (
-            states["WaitForWeeklySubstrateHealthCheck"]["Next"]
-            == "CheckShellRunNotify"
+            states["WaitForWeeklySubstrateHealthCheck"]["Next"] == "ReportCard"
         )
+        assert states["ReportCard"]["Next"] == "CheckShellRunNotify"
+        assert all(c["Next"] == "CheckShellRunNotify" for c in states["ReportCard"]["Catch"])
         assert states["CheckShellRunNotify"]["Default"] == "NotifyComplete"