From c430ee7bc805650bd02095142b2237169f61edca Mon Sep 17 00:00:00 2001 From: Brian McMahon Date: Thu, 4 Jun 2026 16:15:29 -0700 Subject: [PATCH] =?UTF-8?q?fix(infra):=20pass=20SF=20definition=20via=20fi?= =?UTF-8?q?le://=20=E2=80=94=20inline=20arg=20blew=20ARG=5FMAX=20(deploy?= =?UTF-8?q?=20broke)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Director SF state (2026-06-04) pushed the Saturday ASL to ~131 KB; the deploy-infrastructure.yml auto-deploy aborted at update-state-machine with "aws: Argument list too long" (exit 126) because the definition was passed inline via --definition "$(cat ...)". That silently left BOTH live state machines stamped behind origin/main HEAD → the deploy-drift preflight would halt the next weekday/Saturday pipeline run. Fix: --definition "file://$STAMPED" for both the Saturday + weekday updates. file:// reads from disk, so the bound is the SF service limit (1 MB), not the runner's ARG_MAX (env + args). No behavior change otherwise. Co-Authored-By: Claude Opus 4.8 (1M context) --- infrastructure/deploy-infrastructure.sh | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/infrastructure/deploy-infrastructure.sh b/infrastructure/deploy-infrastructure.sh index f3dc9b1..b912fd0 100755 --- a/infrastructure/deploy-infrastructure.sh +++ b/infrastructure/deploy-infrastructure.sh @@ -101,10 +101,19 @@ echo "==> Updating Step Function definitions..." SAT_ARN="arn:aws:states:$REGION:${ACCOUNT_ID}:stateMachine:alpha-engine-saturday-pipeline" DAILY_ARN="arn:aws:states:$REGION:${ACCOUNT_ID}:stateMachine:alpha-engine-weekday-pipeline" -aws stepfunctions update-state-machine --state-machine-arn "$SAT_ARN" --definition "$(cat "$SAT_STAMPED")" --query "updateDate" --output text +# Pass the definition via file:// — NOT inline "$(cat ...)". The Saturday ASL is +# ~131 KB and growing (one state per pipeline step); combined with the AWS +# session-token env on the CI runner, an inline arg blows past the effective +# ARG_MAX and the runner aborts with "aws: Argument list too long" (exit 126), +# which silently leaves the live SF stamp behind origin/main HEAD and trips the +# deploy-drift preflight on the next pipeline run. file:// reads from disk, so +# the definition size is bounded only by the SF service limit (1 MB), not ARG_MAX. +# Regression: 2026-06-04 — the Director SF state pushed the Saturday ASL over the +# line and broke this deploy. +aws stepfunctions update-state-machine --state-machine-arn "$SAT_ARN" --definition "file://$SAT_STAMPED" --query "updateDate" --output text echo " Saturday pipeline updated." -aws stepfunctions update-state-machine --state-machine-arn "$DAILY_ARN" --definition "$(cat "$DAILY_STAMPED")" --query "updateDate" --output text +aws stepfunctions update-state-machine --state-machine-arn "$DAILY_ARN" --definition "file://$DAILY_STAMPED" --query "updateDate" --output text echo " Weekday pipeline updated." # ── 4. Deploy/update CloudFormation stack ────────────────────────────────────