From 0a5276f306e737832d960c78288218cfada02cc6 Mon Sep 17 00:00:00 2001
From: Webster Bei Yijie <beiyijie@fireworks.ai>
Date: Mon, 18 May 2026 17:49:43 -0700
Subject: [PATCH 01/10] Allow private Fireworks deployment shape versions for
 deployment creation (#26060)

---
 .github/workflows/ci-e2e.yml                  |  40 ++++
 .github/workflows/ci-security.yml             |  28 +++
 .github/workflows/ci.yml                      | 180 ++++++++++++++++--
 .github/workflows/post-publish.yml            |   8 +-
 .github/workflows/release-doctor.yml          |   2 +-
 .github/workflows/release-tag.yml             | 142 --------------
 .release-please-manifest.json                 |   3 +
 CHANGELOG.md                                  |  12 --
 noxfile.py                                    |  22 +--
 pyproject.toml                                |   2 +-
 release-please-config.json                    |  66 +++++++
 requirements-dev.lock                         |   2 +-
 requirements.lock                             |   2 +-
 scripts/utils/upload-artifact.sh              |  27 +++
 src/fireworks/_version.py                     |   2 +-
 src/fireworks/training/sdk/deployment.py      |   3 -
 .../training/sdk/tests/test_deployment.py     |  21 --
 .../training/sdk/tests/test_trainer.py        |  52 -----
 src/fireworks/training/sdk/trainer.py         |  49 -----
 19 files changed, 337 insertions(+), 326 deletions(-)
 create mode 100644 .github/workflows/ci-e2e.yml
 create mode 100644 .github/workflows/ci-security.yml
 delete mode 100644 .github/workflows/release-tag.yml
 create mode 100644 .release-please-manifest.json
 create mode 100644 release-please-config.json
 create mode 100755 scripts/utils/upload-artifact.sh

diff --git a/.github/workflows/ci-e2e.yml b/.github/workflows/ci-e2e.yml
new file mode 100644
index 00000000..2c4c0023
--- /dev/null
+++ b/.github/workflows/ci-e2e.yml
@@ -0,0 +1,40 @@
+name: E2E Tests
+on:
+  schedule:
+    - cron: '0 6 * * *'
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+
+jobs:
+  e2e:
+    if: github.repository == 'stainless-sdks/fireworks-ai-python' || github.repository == 'fw-ai-external/python-sdk'
+    timeout-minutes: 15
+    name: e2e (${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.9', '3.11', '3.13']
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install Rye
+        run: |
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
+
+      - name: Install dependencies
+        run: rye sync --all-features
+
+      - name: Run e2e tests
+        run: rye run pytest tests/ -m e2e --timeout=60 -v
+        env:
+          FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_E2E_API_KEY }}
diff --git a/.github/workflows/ci-security.yml b/.github/workflows/ci-security.yml
new file mode 100644
index 00000000..ce93b812
--- /dev/null
+++ b/.github/workflows/ci-security.yml
@@ -0,0 +1,28 @@
+name: Security Audit
+
+on:
+  schedule:
+    - cron: "0 0 * * 1"
+  pull_request:
+    paths:
+      - "requirements*.lock"
+      - "pyproject.toml"
+  workflow_dispatch:
+
+jobs:
+  audit:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install pip-audit
+        run: pip install pip-audit
+
+      - name: Run pip-audit
+        run: pip-audit -r requirements.lock --desc on
+        continue-on-error: false
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index bf6dc9a6..b9757d58 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,22 +1,25 @@
 name: CI
-
-# Minimal safety net for direct PRs. The authoritative CI suite
-# (lint, build, unit + mock-server matrix, coverage, security audit,
-# e2e) runs upstream before promotion, so this workflow only needs
-# to catch the rare direct edit.
-
 on:
   push:
     branches:
       - '**'
+      - '!integrated/**'
+      - '!stl-preview-head/**'
+      - '!stl-preview-base/**'
+      - '!generated'
+      - '!codegen/**'
+      - 'codegen/stl/**'
   pull_request:
+    branches-ignore:
+      - 'stl-preview-head/**'
+      - 'stl-preview-base/**'
 
 jobs:
   lint:
-    name: lint
-    runs-on: ubuntu-latest
     timeout-minutes: 10
-    if: github.event.head_commit.message != 'codegen metadata'
+    name: lint
+    runs-on: ${{ github.repository == 'stainless-sdks/fireworks-ai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
+    if: (github.event_name == 'push' || github.event.pull_request.head.repo.fork) && (github.event_name != 'push' || github.event.head_commit.message != 'codegen metadata')
     steps:
       - uses: actions/checkout@v6
 
@@ -34,20 +37,159 @@ jobs:
       - name: Run lints
         run: ./scripts/lint
 
-  import-smoke:
-    name: import-smoke
+  build:
+    if: (github.event_name == 'push' || github.event.pull_request.head.repo.fork) && (github.event_name != 'push' || github.event.head_commit.message != 'codegen metadata')
+    timeout-minutes: 10
+    name: build
+    permissions:
+      contents: read
+      id-token: write
+    runs-on: ${{ github.repository == 'stainless-sdks/fireworks-ai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Install Rye
+        run: |
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
+
+      - name: Install dependencies
+        run: rye sync --all-features
+
+      - name: Run build
+        run: rye build
+
+      - name: Get GitHub OIDC Token
+        if: |-
+          github.repository == 'stainless-sdks/fireworks-ai-python' &&
+          !startsWith(github.ref, 'refs/heads/stl/')
+        id: github-oidc
+        uses: actions/github-script@v8
+        with:
+          script: core.setOutput('github_token', await core.getIDToken());
+
+      - name: Upload tarball
+        if: |-
+          github.repository == 'stainless-sdks/fireworks-ai-python' &&
+          !startsWith(github.ref, 'refs/heads/stl/')
+        env:
+          URL: https://pkg.stainless.com/s
+          AUTH: ${{ steps.github-oidc.outputs.github_token }}
+          SHA: ${{ github.sha }}
+        run: ./scripts/utils/upload-artifact.sh
+
+  test:
+    timeout-minutes: 10
+    name: test
+    runs-on: ${{ github.repository == 'stainless-sdks/fireworks-ai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
+    if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Install Rye
+        run: |
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
+
+      - name: Bootstrap
+        run: ./scripts/bootstrap
+
+      - name: Run tests
+        run: ./scripts/test
+
+  test-mock-server:
+    timeout-minutes: 15
+    name: test-mock-server (Python ${{ matrix.python-version }})
     runs-on: ubuntu-latest
-    timeout-minutes: 5
-    if: github.event.head_commit.message != 'codegen metadata'
+    if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ['3.9', '3.11', '3.13']
     steps:
       - uses: actions/checkout@v6
 
-      - uses: actions/setup-python@v6
+      - uses: actions/setup-python@v5
         with:
-          python-version: '3.11'
+          python-version: ${{ matrix.python-version }}
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+
+      - name: Install Prism mock server
+        run: npm install -g @stoplight/prism-cli
+
+      - name: Download OpenAPI spec
+        run: |
+          SPEC_URL=$(grep 'openapi_spec_url' .stats.yml | sed 's/openapi_spec_url: //')
+          curl -sSL "$SPEC_URL" -o openapi-spec.yml
 
-      - name: Install package
-        run: pip install -e . 2>/dev/null || pip install -e .[training]
+      - name: Start Prism mock server
+        run: |
+          prism mock openapi-spec.yml --port 4010 --host 127.0.0.1 &
+          # Wait for Prism to be ready
+          for i in $(seq 1 30); do
+            if curl -s http://127.0.0.1:4010 > /dev/null 2>&1; then
+              echo "Prism is ready"
+              break
+            fi
+            sleep 1
+          done
+          curl -s http://127.0.0.1:4010 > /dev/null 2>&1 || { echo "Prism failed to start"; exit 1; }
 
-      - name: Smoke import
-        run: python -c "import fireworks; print('fireworks', getattr(fireworks, '__version__', '<unknown>'))"
+      - name: Install dependencies
+        run: |
+          pip install -e ".[training]" 2>/dev/null || pip install -e .
+          pip install pytest pytest-asyncio pytest-timeout respx aiohttp httpx_aiohttp dirty-equals
+
+      - name: Run mock server tests
+        env:
+          RUN_MOCK_SERVER_TESTS: 'true'
+          TEST_API_BASE_URL: http://127.0.0.1:4010
+          DEFER_PYDANTIC_BUILD: 'false'
+        run: |
+          python -m pytest tests/api_resources/ -v --timeout=30 -x
+
+  test-coverage:
+    timeout-minutes: 15
+    name: test-coverage
+    runs-on: ubuntu-latest
+    if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Install Rye
+        run: |
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
+
+      - name: Install dependencies
+        run: rye sync --all-features
+
+      - name: Run tests with coverage
+        env:
+          DEFER_PYDANTIC_BUILD: 'false'
+        run: |
+          rye run pytest tests/ \
+            --ignore=tests/api_resources \
+            --cov=fireworks \
+            --cov-report=term-missing \
+            --cov-report=xml:coverage.xml \
+            --cov-fail-under=70
+
+      - name: Upload coverage report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-report
+          path: coverage.xml
diff --git a/.github/workflows/post-publish.yml b/.github/workflows/post-publish.yml
index 5f2849aa..a3e9c632 100644
--- a/.github/workflows/post-publish.yml
+++ b/.github/workflows/post-publish.yml
@@ -22,12 +22,8 @@ jobs:
 
       - name: Wait for PyPI propagation and install
         run: |
-          # --upgrade --pre is required: the runner toolcache ships an older
-          # fireworks-ai which would otherwise satisfy the requirement and skip
-          # the install, so the smoke test would silently exercise the stale
-          # cached version instead of the release we just published.
           for i in $(seq 1 10); do
-            if pip install --upgrade --pre fireworks-ai 2>/dev/null; then
+            if pip install fireworks-ai 2>/dev/null; then
               echo "Package available"
               exit 0
             fi
@@ -43,5 +39,5 @@ jobs:
       - name: Verify training extras
         if: matrix.python-version != '3.9'
         run: |
-          pip install --upgrade --pre "fireworks-ai[training]"
+          pip install "fireworks-ai[training]"
           python -c "from fireworks.training import sdk; print('Training SDK OK')"
diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml
index a386ef5e..87c04a8d 100644
--- a/.github/workflows/release-doctor.yml
+++ b/.github/workflows/release-doctor.yml
@@ -9,7 +9,7 @@ jobs:
   release_doctor:
     name: release doctor
     runs-on: ubuntu-latest
-    if: github.repository == 'fw-ai-external/python-sdk' && (github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'autorelease/') || github.head_ref == 'next')
+    if: github.repository == 'fw-ai-external/python-sdk' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next')
 
     steps:
       - uses: actions/checkout@v6
diff --git a/.github/workflows/release-tag.yml b/.github/workflows/release-tag.yml
deleted file mode 100644
index 0f043f06..00000000
--- a/.github/workflows/release-tag.yml
+++ /dev/null
@@ -1,142 +0,0 @@
-name: Release Tag
-
-# Fires on every push to main. If pyproject.toml carries a version that has
-# not yet been tagged, this workflow creates the matching git tag vX.Y.Z and a
-# GitHub Release, which is what publish-pypi.yml listens for. pyproject.toml
-# is the single source of truth for the released version — no commit-subject
-# regex, no manifest, no workflow_dispatch override. The GitHub Release MUST
-# be created with FW_AI_BOT_TOKEN (a PAT) rather than github.token, because
-# Release events triggered by github.token do not cascade into other
-# workflows. Idempotent — the tag/release won't be recreated if they already
-# exist.
-
-on:
-  push:
-    branches:
-      - main
-
-permissions:
-  contents: write
-  pull-requests: write
-
-jobs:
-  tag:
-    if: github.repository == 'fw-ai-external/python-sdk'
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v6
-        with:
-          fetch-depth: 0
-          token: ${{ secrets.FW_AI_BOT_TOKEN }}
-
-      - name: Determine version
-        id: version
-        run: |
-          set -euo pipefail
-
-          # Single source of truth: pyproject.toml [project] version on the
-          # merged commit. No commit-subject parsing, no manifest, no fallback.
-          version="$(python3 -c '
-          import sys
-          try:
-              import tomllib
-          except ModuleNotFoundError:
-              import tomli as tomllib
-          with open("pyproject.toml", "rb") as f:
-              print(tomllib.load(f)["project"]["version"])
-          ')"
-
-          if [[ ! "${version}" =~ ^[0-9][0-9A-Za-z._-]*$ ]]; then
-            echo "::error::Invalid version in pyproject.toml: ${version}"
-            exit 1
-          fi
-
-          {
-            echo "version=${version}"
-            echo "tag=v${version}"
-          } >> "$GITHUB_OUTPUT"
-
-      - name: Check if tag exists
-        id: tag
-        env:
-          TAG: ${{ steps.version.outputs.tag }}
-        run: |
-          set -euo pipefail
-          if git rev-parse -q --verify "refs/tags/${TAG}" >/dev/null; then
-            echo "Tag ${TAG} already exists."
-            echo "exists=true" >> "$GITHUB_OUTPUT"
-          else
-            echo "exists=false" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Require FW_AI_BOT_TOKEN
-        env:
-          BOT_TOKEN: ${{ secrets.FW_AI_BOT_TOKEN }}
-        run: |
-          [[ -n "${BOT_TOKEN}" ]] || { echo "::error::FW_AI_BOT_TOKEN is required"; exit 1; }
-
-      - name: Create tag and GitHub Release
-        if: steps.tag.outputs.exists == 'false'
-        env:
-          GH_TOKEN: ${{ secrets.FW_AI_BOT_TOKEN }}
-          VERSION: ${{ steps.version.outputs.version }}
-          TAG: ${{ steps.version.outputs.tag }}
-        run: |
-          set -euo pipefail
-
-          # Extract this version's CHANGELOG section as the release notes.
-          notes_file="$(mktemp)"
-          awk -v ver="${VERSION}" '
-            $0 ~ "^## " ver "($| )" { capture = 1; print; next }
-            capture && /^## / { exit }
-            capture { print }
-          ' CHANGELOG.md > "${notes_file}"
-
-          # Tag this commit and push.
-          git config user.name "github-actions[bot]"
-          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
-          git tag -a "${TAG}" -m "${TAG}"
-          git push origin "${TAG}"
-
-          # Pre-1.0 prereleases stay marked as prerelease.
-          prerelease_flag=""
-          if [[ "${VERSION}" == *-alpha.* || "${VERSION}" == *-beta.* || "${VERSION}" == *-rc.* ]]; then
-            prerelease_flag="--prerelease"
-          fi
-
-          gh release create "${TAG}" \
-            --title "${TAG}" \
-            --notes-file "${notes_file}" \
-            ${prerelease_flag}
-
-      - name: Mark release PR as tagged
-        # Runs unconditionally so that recovering from a partial prior failure
-        # (tag created, label flip failed) just needs a workflow rerun. The
-        # step is a no-op on commits that aren't release-PR merges (no PR
-        # found) and on PRs already flipped to 'tagged' (no pending label).
-        # Loud failure only when this run just created the tag but no PR is
-        # associated — that indicates a misconfigured release commit.
-        env:
-          GH_TOKEN: ${{ secrets.FW_AI_BOT_TOKEN }}
-          TAG_JUST_CREATED: ${{ steps.tag.outputs.exists == 'false' }}
-        run: |
-          set -euo pipefail
-          pr_number="$(gh api "repos/${GITHUB_REPOSITORY}/commits/${GITHUB_SHA}/pulls" \
-            --jq '.[0].number // empty')"
-          if [[ -z "${pr_number}" ]]; then
-            if [[ "${TAG_JUST_CREATED}" == "true" ]]; then
-              echo "::error::Tag was created this run but no PR is associated with ${GITHUB_SHA}."
-              exit 1
-            fi
-            echo "No PR associated with ${GITHUB_SHA}; nothing to flip."
-            exit 0
-          fi
-          labels="$(gh pr view "${pr_number}" --repo "${GITHUB_REPOSITORY}" \
-            --json labels --jq '[.labels[].name] | join(",")')"
-          if [[ ",${labels}," != *",autorelease: pending,"* ]]; then
-            echo "PR #${pr_number} does not carry 'autorelease: pending'; nothing to flip."
-            exit 0
-          fi
-          gh pr edit "${pr_number}" --repo "${GITHUB_REPOSITORY}" \
-            --remove-label "autorelease: pending" \
-            --add-label "autorelease: tagged"
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
new file mode 100644
index 00000000..b789ab70
--- /dev/null
+++ b/.release-please-manifest.json
@@ -0,0 +1,3 @@
+{
+  ".": "1.2.0-alpha.70"
+}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c7c9c918..d3b5d6b9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,17 +1,5 @@
 # Changelog
 
-## 1.2.0-alpha.72 (2026-05-21)
-
-Full Changelog: [v1.2.0-alpha.71...v1.2.0-alpha.72](https://github.com/fw-ai-external/python-sdk/compare/v1.2.0-alpha.71...v1.2.0-alpha.72)
-
-### Chores
-* bootstrap public SDK release workflows (#83) ([7dcb03e](https://github.com/fw-ai-external/python-sdk/commit/7dcb03e5380a70b28dd7d438a6e10f55be0e493f))
-* bootstrap promotion pipeline workflows (#86) ([e0042ab](https://github.com/fw-ai-external/python-sdk/commit/e0042ab3117a02a7ca5266b4a8400f47627c5402))
-
-## 1.2.0-alpha.71 (2026-05-19)
-
-Full Changelog: [v1.2.0-alpha.70...v1.2.0-alpha.71](https://github.com/fw-ai-external/python-sdk/compare/v1.2.0-alpha.70...v1.2.0-alpha.71)
-
 ## 1.2.0-alpha.70 (2026-05-15)
 
 Full Changelog: [v1.2.0-alpha.69...v1.2.0-alpha.70](https://github.com/fw-ai-external/python-sdk/compare/v1.2.0-alpha.69...v1.2.0-alpha.70)
diff --git a/noxfile.py b/noxfile.py
index c2293764..25dd61f3 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -3,28 +3,16 @@
 
 import nox
 
-_TRAINING_ONLY_PACKAGES = (
-    "tinker==",
-    "tinker-cookbook==",
-    "torch==",
-    "triton==",
-    "transformers==",
-    "datasets==",
-    "tiktoken==",
-    "wandb==",
-    "nvidia-",
-)
+_TRAINING_ONLY_PACKAGES = ("tinker==", "tinker-cookbook==")
 
 
 def _install_dev_deps_without_training_extras(session: nox.Session) -> None:
     """Install lockfile deps while skipping training-only extras.
 
-    `test-pydantic-v1` validates base SDK compatibility on Python 3.9; it does
-    not need the training stack. Skip tinker / tinker-cookbook (which require
-    newer Python) plus the heavy ML wheels (torch, triton, transformers,
-    datasets, tiktoken, wandb, and the nvidia-* CUDA wheels) that would
-    otherwise be reinstalled into a second venv on top of the .venv
-    `rye sync --all-features` already produced, overflowing runner disk.
+    `test-pydantic-v1` runs on Python 3.9 to validate base SDK compatibility.
+    Training extras (`tinker`, `tinker-cookbook`) currently require newer
+    Python versions, so we exclude only those packages from the shared dev
+    lockfile for this session.
     """
     lockfile_lines = Path("requirements-dev.lock").read_text(encoding="utf-8").splitlines()
     filtered_lines = [
diff --git a/pyproject.toml b/pyproject.toml
index d2215a0f..6fde7e46 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "fireworks-ai"
-version = "1.2.0-alpha.72"
+version = "1.2.0-alpha.70"
 description = "The official Python library for the fireworks API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/release-please-config.json b/release-please-config.json
new file mode 100644
index 00000000..c5eb8a35
--- /dev/null
+++ b/release-please-config.json
@@ -0,0 +1,66 @@
+{
+  "packages": {
+    ".": {}
+  },
+  "$schema": "https://raw.githubusercontent.com/stainless-api/release-please/main/schemas/config.json",
+  "include-v-in-tag": true,
+  "include-component-in-tag": false,
+  "versioning": "prerelease",
+  "prerelease": true,
+  "bump-minor-pre-major": true,
+  "bump-patch-for-minor-pre-major": false,
+  "pull-request-header": "Automated Release PR",
+  "pull-request-title-pattern": "release: ${version}",
+  "changelog-sections": [
+    {
+      "type": "feat",
+      "section": "Features"
+    },
+    {
+      "type": "fix",
+      "section": "Bug Fixes"
+    },
+    {
+      "type": "perf",
+      "section": "Performance Improvements"
+    },
+    {
+      "type": "revert",
+      "section": "Reverts"
+    },
+    {
+      "type": "chore",
+      "section": "Chores"
+    },
+    {
+      "type": "docs",
+      "section": "Documentation"
+    },
+    {
+      "type": "style",
+      "section": "Styles"
+    },
+    {
+      "type": "refactor",
+      "section": "Refactors"
+    },
+    {
+      "type": "test",
+      "section": "Tests",
+      "hidden": true
+    },
+    {
+      "type": "build",
+      "section": "Build System"
+    },
+    {
+      "type": "ci",
+      "section": "Continuous Integration",
+      "hidden": true
+    }
+  ],
+  "release-type": "python",
+  "extra-files": [
+    "src/fireworks/_version.py"
+  ]
+}
\ No newline at end of file
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 0acb3e70..27ab5b89 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -65,7 +65,7 @@ httpx-aiohttp==0.1.12
     # via fireworks-ai
 humanize==4.13.0
     # via nox
-idna==3.15
+idna==3.11
     # via anyio
     # via httpx
     # via yarl
diff --git a/requirements.lock b/requirements.lock
index bbc5e6d5..9fb2d809 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -45,7 +45,7 @@ httpx==0.28.1
     # via httpx-aiohttp
 httpx-aiohttp==0.1.12
     # via fireworks-ai
-idna==3.15
+idna==3.11
     # via anyio
     # via httpx
     # via yarl
diff --git a/scripts/utils/upload-artifact.sh b/scripts/utils/upload-artifact.sh
new file mode 100755
index 00000000..b041dbb4
--- /dev/null
+++ b/scripts/utils/upload-artifact.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -exuo pipefail
+
+FILENAME=$(basename dist/*.whl)
+
+RESPONSE=$(curl -X POST "$URL?filename=$FILENAME" \
+  -H "Authorization: Bearer $AUTH" \
+  -H "Content-Type: application/json")
+
+SIGNED_URL=$(echo "$RESPONSE" | jq -r '.url')
+
+if [[ "$SIGNED_URL" == "null" ]]; then
+  echo -e "\033[31mFailed to get signed URL.\033[0m"
+  exit 1
+fi
+
+UPLOAD_RESPONSE=$(curl -v -X PUT \
+  -H "Content-Type: binary/octet-stream" \
+  --data-binary "@dist/$FILENAME" "$SIGNED_URL" 2>&1)
+
+if echo "$UPLOAD_RESPONSE" | grep -q "HTTP/[0-9.]* 200"; then
+  echo -e "\033[32mUploaded build to Stainless storage.\033[0m"
+  echo -e "\033[32mInstallation: pip install 'https://pkg.stainless.com/s/fireworks-ai-python/$SHA/$FILENAME'\033[0m"
+else
+  echo -e "\033[31mFailed to upload artifact.\033[0m"
+  exit 1
+fi
diff --git a/src/fireworks/_version.py b/src/fireworks/_version.py
index 3cc8870e..e56cc03c 100644
--- a/src/fireworks/_version.py
+++ b/src/fireworks/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "fireworks"
-__version__ = "1.2.0-alpha.72"  # x-release-please-version
+__version__ = "1.2.0-alpha.70"  # x-release-please-version
diff --git a/src/fireworks/training/sdk/deployment.py b/src/fireworks/training/sdk/deployment.py
index f0cc5b97..fb1ef170 100644
--- a/src/fireworks/training/sdk/deployment.py
+++ b/src/fireworks/training/sdk/deployment.py
@@ -174,7 +174,6 @@ class DeploymentConfig:
     disable_speculative_decoding: bool = False
     extra_args: list[str] | None = None
     extra_values: dict[str, str] | None = None
-    annotations: dict[str, str] | None = None
 
 
 class DeploymentManager(_RestClient):
@@ -319,8 +318,6 @@ def _create_deployment(self, config: DeploymentConfig) -> dict:
             body["extraArgs"] = flat
         if config.extra_values:
             body["extraValues"] = config.extra_values
-        if config.annotations:
-            body["annotations"] = config.annotations
 
         logger.info("Creating deployment: %s", config.deployment_id)
         resp = self._post(path, json=body)
diff --git a/src/fireworks/training/sdk/tests/test_deployment.py b/src/fireworks/training/sdk/tests/test_deployment.py
index bd1b5d5a..f26a6ad3 100644
--- a/src/fireworks/training/sdk/tests/test_deployment.py
+++ b/src/fireworks/training/sdk/tests/test_deployment.py
@@ -151,27 +151,6 @@ def test_create_omits_placement_when_region_unset(self, mgr):
         body = mgr._post.call_args[1]["json"]
         assert "placement" not in body
 
-    def test_create_includes_annotations(self, mgr):
-        resp = MagicMock()
-        resp.status_code = 200
-        resp.is_success = True
-        resp.json.return_value = {
-            "name": "accounts/test-acct/deployments/dep-1",
-            "state": "CREATING",
-        }
-        mgr._post = MagicMock(return_value=resp)
-
-        mgr._create_deployment(
-            DeploymentConfig(
-                deployment_id="dep-1",
-                base_model="accounts/test/models/qwen3-1p7b",
-                annotations={"purpose": "test"},
-            )
-        )
-
-        body = mgr._post.call_args[1]["json"]
-        assert body["annotations"] == {"purpose": "test"}
-
     def test_409_is_not_raised(self, mgr, deploy_config):
         resp = MagicMock()
         resp.status_code = 409
diff --git a/src/fireworks/training/sdk/tests/test_trainer.py b/src/fireworks/training/sdk/tests/test_trainer.py
index a11c8326..d06d0341 100644
--- a/src/fireworks/training/sdk/tests/test_trainer.py
+++ b/src/fireworks/training/sdk/tests/test_trainer.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import logging
-from datetime import timedelta
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -138,41 +137,6 @@ def test_manual_path_sends_all_fields(self, mgr):
         assert payload["nodeCount"] == 4
         assert tc["region"] == "US_OHIO_1"
 
-    def test_inactivity_cleanup_fields(self, mgr):
-        config = TrainerJobConfig(
-            base_model="accounts/test/models/m",
-            training_shape_ref="accounts/test-account/trainingShapes/ts-test/versions/shape-v1",
-            inactivity_timeout=timedelta(minutes=30),
-            disable_inactivity_cleanup=True,
-        )
-        resp = MagicMock()
-        resp.is_success = True
-        resp.status_code = 200
-        resp.json.return_value = {"name": "j"}
-        mgr._post = MagicMock(return_value=resp)
-
-        mgr._create(config)
-
-        payload = mgr._post.call_args[1]["json"]
-        assert payload["inactivityTimeout"] == "1800s"
-        assert payload["disableInactivityCleanup"] is True
-
-    def test_inactivity_timeout_accepts_proto_duration_string(self, mgr):
-        config = TrainerJobConfig(
-            base_model="accounts/test/models/m",
-            inactivity_timeout="7200s",
-        )
-        resp = MagicMock()
-        resp.is_success = True
-        resp.status_code = 200
-        resp.json.return_value = {"name": "j"}
-        mgr._post = MagicMock(return_value=resp)
-
-        mgr._create(config)
-
-        payload = mgr._post.call_args[1]["json"]
-        assert payload["inactivityTimeout"] == "7200s"
-
     def test_extra_args_flattened(self, mgr):
         config = TrainerJobConfig(
             base_model="accounts/test/models/m",
@@ -702,22 +666,6 @@ def test_explicit_one_gradient_accumulation_steps_warns(self, caplog):
             for rec in caplog.records
         )
 
-    def test_rejects_negative_inactivity_timeout(self):
-        config = TrainerJobConfig(
-            base_model="accounts/test/models/m",
-            inactivity_timeout=timedelta(seconds=-1),
-        )
-        with pytest.raises(ValueError, match="inactivity_timeout"):
-            config.validate()
-
-    def test_rejects_invalid_inactivity_timeout_string(self):
-        config = TrainerJobConfig(
-            base_model="accounts/test/models/m",
-            inactivity_timeout="30m",
-        )
-        with pytest.raises(ValueError, match="protobuf JSON duration"):
-            config.validate()
-
 
 # ---------------------------------------------------------------------------
 # _check_healthz — uses persistent session
diff --git a/src/fireworks/training/sdk/trainer.py b/src/fireworks/training/sdk/trainer.py
index cf823dfa..3d039c15 100644
--- a/src/fireworks/training/sdk/trainer.py
+++ b/src/fireworks/training/sdk/trainer.py
@@ -12,7 +12,6 @@
 import time
 import logging
 from typing import Any
-from datetime import timedelta
 from dataclasses import dataclass
 from urllib.parse import urlencode
 
@@ -29,30 +28,6 @@
 
 _SHAPE_OWNED_FIELDS = ("accelerator_type", "accelerator_count", "custom_image_tag", "node_count")
 _POLL_LOG_HEARTBEAT_S = 60.0
-_PROTO_DURATION_RE = re.compile(r"^(?P<sign>-?)(?P<seconds>\d+)(\.\d{1,9})?s$")
-
-
-def _format_proto_duration(value: timedelta | str) -> str:
-    """Format a non-negative duration for protobuf JSON REST fields."""
-    if isinstance(value, timedelta):
-        total_seconds = value.total_seconds()
-        if total_seconds < 0:
-            raise ValueError("must be non-negative")
-        if total_seconds.is_integer():
-            return f"{int(total_seconds)}s"
-        return f"{total_seconds:.9f}".rstrip("0").rstrip(".") + "s"
-
-    if isinstance(value, str):
-        if not _PROTO_DURATION_RE.match(value):
-            raise ValueError(
-                "must be a protobuf JSON duration string such as '1800s'; "
-                "use datetime.timedelta for minute/hour values"
-            )
-        if value.startswith("-"):
-            raise ValueError("must be non-negative")
-        return value
-
-    raise TypeError("must be datetime.timedelta or protobuf JSON duration string")
 
 
 def _extract_job_status_message(job: dict[str, Any]) -> str:
@@ -169,21 +144,6 @@ class TrainerJobConfig:
     must not be set.
     """
     forward_only: bool = False
-    inactivity_timeout: timedelta | str | None = None
-    """Trainer inactivity timeout.
-
-    The trainer reports tracked activity, including trainer API operations and
-    active-session heartbeats. If no tracked activity is observed for this
-    duration, the trainer is automatically stopped. When unset or 0, Fireworks
-    uses the 60-minute default. Use ``disable_inactivity_cleanup=True`` to
-    disable automatic cleanup.
-    """
-    disable_inactivity_cleanup: bool = False
-    """Disable trainer inactivity cleanup.
-
-    When true, the trainer is not automatically stopped due to inactivity. GPU
-    usage continues to accrue while the trainer is running.
-    """
     skip_validations: bool = False
     """Skip server-side shape validation. Requires superuser API key."""
     purpose: str | None = None
@@ -212,11 +172,6 @@ def validate(self) -> None:
                 "(multiple forward_backward calls per optim_step) and pass "
                 "grad_accumulation_normalization on the optim_step request."
             )
-        if self.inactivity_timeout is not None:
-            try:
-                _format_proto_duration(self.inactivity_timeout)
-            except (TypeError, ValueError) as e:
-                errors.append(f"inactivity_timeout {e}")
         if self.training_shape_ref:
             for field in _SHAPE_OWNED_FIELDS:
                 val = getattr(self, field)
@@ -373,10 +328,6 @@ def _create(self, config: TrainerJobConfig) -> dict:
             payload["purpose"] = config.purpose
         if config.managed_by:
             payload["managedBy"] = config.managed_by
-        if config.inactivity_timeout is not None:
-            payload["inactivityTimeout"] = _format_proto_duration(config.inactivity_timeout)
-        if config.disable_inactivity_cleanup:
-            payload["disableInactivityCleanup"] = True
 
         logger.info("Creating RLOR job: POST %s (model=%s) (payload=%s)", f"{self.base_url}{path}", config.base_model, payload)
         resp = self._post(path, json=payload, timeout=60)

From 8eb9f95014dd449f038d6e78fad9df4ca02bed34 Mon Sep 17 00:00:00 2001
From: Chengxi Li <114854555+Hecate0821@users.noreply.github.com>
Date: Tue, 19 May 2026 10:36:28 -0700
Subject: [PATCH 02/10] chore: stage SDK deployment annotations (#26077)

---
 src/fireworks/training/sdk/deployment.py      |  3 +++
 .../training/sdk/tests/test_deployment.py     | 21 +++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/src/fireworks/training/sdk/deployment.py b/src/fireworks/training/sdk/deployment.py
index fb1ef170..f0cc5b97 100644
--- a/src/fireworks/training/sdk/deployment.py
+++ b/src/fireworks/training/sdk/deployment.py
@@ -174,6 +174,7 @@ class DeploymentConfig:
     disable_speculative_decoding: bool = False
     extra_args: list[str] | None = None
     extra_values: dict[str, str] | None = None
+    annotations: dict[str, str] | None = None
 
 
 class DeploymentManager(_RestClient):
@@ -318,6 +319,8 @@ def _create_deployment(self, config: DeploymentConfig) -> dict:
             body["extraArgs"] = flat
         if config.extra_values:
             body["extraValues"] = config.extra_values
+        if config.annotations:
+            body["annotations"] = config.annotations
 
         logger.info("Creating deployment: %s", config.deployment_id)
         resp = self._post(path, json=body)
diff --git a/src/fireworks/training/sdk/tests/test_deployment.py b/src/fireworks/training/sdk/tests/test_deployment.py
index f26a6ad3..bd1b5d5a 100644
--- a/src/fireworks/training/sdk/tests/test_deployment.py
+++ b/src/fireworks/training/sdk/tests/test_deployment.py
@@ -151,6 +151,27 @@ def test_create_omits_placement_when_region_unset(self, mgr):
         body = mgr._post.call_args[1]["json"]
         assert "placement" not in body
 
+    def test_create_includes_annotations(self, mgr):
+        resp = MagicMock()
+        resp.status_code = 200
+        resp.is_success = True
+        resp.json.return_value = {
+            "name": "accounts/test-acct/deployments/dep-1",
+            "state": "CREATING",
+        }
+        mgr._post = MagicMock(return_value=resp)
+
+        mgr._create_deployment(
+            DeploymentConfig(
+                deployment_id="dep-1",
+                base_model="accounts/test/models/qwen3-1p7b",
+                annotations={"purpose": "test"},
+            )
+        )
+
+        body = mgr._post.call_args[1]["json"]
+        assert body["annotations"] == {"purpose": "test"}
+
     def test_409_is_not_raised(self, mgr, deploy_config):
         resp = MagicMock()
         resp.status_code = 409

From 8037b48f7242072256841345eef9ec14142ed189 Mon Sep 17 00:00:00 2001
From: Chengxi Li <114854555+Hecate0821@users.noreply.github.com>
Date: Tue, 19 May 2026 16:04:07 -0700
Subject: [PATCH 03/10] chore: clean up staged public repo references (#26200)

---
 .github/workflows/ci-e2e.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci-e2e.yml b/.github/workflows/ci-e2e.yml
index 2c4c0023..50e619b6 100644
--- a/.github/workflows/ci-e2e.yml
+++ b/.github/workflows/ci-e2e.yml
@@ -9,7 +9,7 @@ on:
 
 jobs:
   e2e:
-    if: github.repository == 'stainless-sdks/fireworks-ai-python' || github.repository == 'fw-ai-external/python-sdk'
+    if: github.repository == 'fw-ai-external/python-sdk'
     timeout-minutes: 15
     name: e2e (${{ matrix.python-version }})
     runs-on: ubuntu-latest

From 79a1c79157d74e0443666344ef872bf5609cfd4c Mon Sep 17 00:00:00 2001
From: Chengxi Li <114854555+Hecate0821@users.noreply.github.com>
Date: Tue, 19 May 2026 18:03:57 -0700
Subject: [PATCH 04/10] chore: unblock public SDK promotion flow (#26242)

---
 .github/workflows/ci.yml             | 37 +++-------------------------
 .github/workflows/release-doctor.yml |  2 +-
 .release-please-manifest.json        |  2 +-
 CHANGELOG.md                         |  4 +++
 pyproject.toml                       |  2 +-
 release-please-config.json           |  4 +--
 requirements-dev.lock                |  2 +-
 requirements.lock                    |  2 +-
 scripts/utils/upload-artifact.sh     | 27 --------------------
 src/fireworks/_version.py            |  2 +-
 10 files changed, 15 insertions(+), 69 deletions(-)
 delete mode 100755 scripts/utils/upload-artifact.sh

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b9757d58..d43f8726 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -3,22 +3,13 @@ on:
   push:
     branches:
       - '**'
-      - '!integrated/**'
-      - '!stl-preview-head/**'
-      - '!stl-preview-base/**'
-      - '!generated'
-      - '!codegen/**'
-      - 'codegen/stl/**'
   pull_request:
-    branches-ignore:
-      - 'stl-preview-head/**'
-      - 'stl-preview-base/**'
 
 jobs:
   lint:
     timeout-minutes: 10
     name: lint
-    runs-on: ${{ github.repository == 'stainless-sdks/fireworks-ai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
+    runs-on: ubuntu-latest
     if: (github.event_name == 'push' || github.event.pull_request.head.repo.fork) && (github.event_name != 'push' || github.event.head_commit.message != 'codegen metadata')
     steps:
       - uses: actions/checkout@v6
@@ -41,10 +32,7 @@ jobs:
     if: (github.event_name == 'push' || github.event.pull_request.head.repo.fork) && (github.event_name != 'push' || github.event.head_commit.message != 'codegen metadata')
     timeout-minutes: 10
     name: build
-    permissions:
-      contents: read
-      id-token: write
-    runs-on: ${{ github.repository == 'stainless-sdks/fireworks-ai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v6
 
@@ -62,29 +50,10 @@ jobs:
       - name: Run build
         run: rye build
 
-      - name: Get GitHub OIDC Token
-        if: |-
-          github.repository == 'stainless-sdks/fireworks-ai-python' &&
-          !startsWith(github.ref, 'refs/heads/stl/')
-        id: github-oidc
-        uses: actions/github-script@v8
-        with:
-          script: core.setOutput('github_token', await core.getIDToken());
-
-      - name: Upload tarball
-        if: |-
-          github.repository == 'stainless-sdks/fireworks-ai-python' &&
-          !startsWith(github.ref, 'refs/heads/stl/')
-        env:
-          URL: https://pkg.stainless.com/s
-          AUTH: ${{ steps.github-oidc.outputs.github_token }}
-          SHA: ${{ github.sha }}
-        run: ./scripts/utils/upload-artifact.sh
-
   test:
     timeout-minutes: 10
     name: test
-    runs-on: ${{ github.repository == 'stainless-sdks/fireworks-ai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
+    runs-on: ubuntu-latest
     if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
     steps:
       - uses: actions/checkout@v6
diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml
index 87c04a8d..745714d7 100644
--- a/.github/workflows/release-doctor.yml
+++ b/.github/workflows/release-doctor.yml
@@ -9,7 +9,7 @@ jobs:
   release_doctor:
     name: release doctor
     runs-on: ubuntu-latest
-    if: github.repository == 'fw-ai-external/python-sdk' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next')
+    if: github.repository == 'fw-ai-external/python-sdk' && (github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next')
 
     steps:
       - uses: actions/checkout@v6
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index b789ab70..8e7a7098 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.2.0-alpha.70"
+  ".": "1.2.0-alpha.71"
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d3b5d6b9..d0cf6e85 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## 1.2.0-alpha.71 (2026-05-19)
+
+Full Changelog: [v1.2.0-alpha.70...v1.2.0-alpha.71](https://github.com/fw-ai-external/python-sdk/compare/v1.2.0-alpha.70...v1.2.0-alpha.71)
+
 ## 1.2.0-alpha.70 (2026-05-15)
 
 Full Changelog: [v1.2.0-alpha.69...v1.2.0-alpha.70](https://github.com/fw-ai-external/python-sdk/compare/v1.2.0-alpha.69...v1.2.0-alpha.70)
diff --git a/pyproject.toml b/pyproject.toml
index 6fde7e46..23bc6930 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "fireworks-ai"
-version = "1.2.0-alpha.70"
+version = "1.2.0-alpha.71"
 description = "The official Python library for the fireworks API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/release-please-config.json b/release-please-config.json
index c5eb8a35..c24a3935 100644
--- a/release-please-config.json
+++ b/release-please-config.json
@@ -2,7 +2,7 @@
   "packages": {
     ".": {}
   },
-  "$schema": "https://raw.githubusercontent.com/stainless-api/release-please/main/schemas/config.json",
+  "$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json",
   "include-v-in-tag": true,
   "include-component-in-tag": false,
   "versioning": "prerelease",
@@ -63,4 +63,4 @@
   "extra-files": [
     "src/fireworks/_version.py"
   ]
-}
\ No newline at end of file
+}
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 27ab5b89..0acb3e70 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -65,7 +65,7 @@ httpx-aiohttp==0.1.12
     # via fireworks-ai
 humanize==4.13.0
     # via nox
-idna==3.11
+idna==3.15
     # via anyio
     # via httpx
     # via yarl
diff --git a/requirements.lock b/requirements.lock
index 9fb2d809..bbc5e6d5 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -45,7 +45,7 @@ httpx==0.28.1
     # via httpx-aiohttp
 httpx-aiohttp==0.1.12
     # via fireworks-ai
-idna==3.11
+idna==3.15
     # via anyio
     # via httpx
     # via yarl
diff --git a/scripts/utils/upload-artifact.sh b/scripts/utils/upload-artifact.sh
deleted file mode 100755
index b041dbb4..00000000
--- a/scripts/utils/upload-artifact.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env bash
-set -exuo pipefail
-
-FILENAME=$(basename dist/*.whl)
-
-RESPONSE=$(curl -X POST "$URL?filename=$FILENAME" \
-  -H "Authorization: Bearer $AUTH" \
-  -H "Content-Type: application/json")
-
-SIGNED_URL=$(echo "$RESPONSE" | jq -r '.url')
-
-if [[ "$SIGNED_URL" == "null" ]]; then
-  echo -e "\033[31mFailed to get signed URL.\033[0m"
-  exit 1
-fi
-
-UPLOAD_RESPONSE=$(curl -v -X PUT \
-  -H "Content-Type: binary/octet-stream" \
-  --data-binary "@dist/$FILENAME" "$SIGNED_URL" 2>&1)
-
-if echo "$UPLOAD_RESPONSE" | grep -q "HTTP/[0-9.]* 200"; then
-  echo -e "\033[32mUploaded build to Stainless storage.\033[0m"
-  echo -e "\033[32mInstallation: pip install 'https://pkg.stainless.com/s/fireworks-ai-python/$SHA/$FILENAME'\033[0m"
-else
-  echo -e "\033[31mFailed to upload artifact.\033[0m"
-  exit 1
-fi
diff --git a/src/fireworks/_version.py b/src/fireworks/_version.py
index e56cc03c..6e176a76 100644
--- a/src/fireworks/_version.py
+++ b/src/fireworks/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "fireworks"
-__version__ = "1.2.0-alpha.70"  # x-release-please-version
+__version__ = "1.2.0-alpha.71"  # x-release-please-version

From bd458774a30a83160489b93133baa02cbb89c4be Mon Sep 17 00:00:00 2001
From: Yinghan Ma <yinghan.ma@fireworks.ai>
Date: Thu, 21 May 2026 02:59:12 +0800
Subject: [PATCH 05/10] [codex] Add trainer SDK inactivity cleanup options
 (#26144)

---
 .../training/sdk/tests/test_trainer.py        | 52 ++++++++++++++++++
 src/fireworks/training/sdk/trainer.py         | 53 ++++++++++++++++++-
 2 files changed, 103 insertions(+), 2 deletions(-)

diff --git a/src/fireworks/training/sdk/tests/test_trainer.py b/src/fireworks/training/sdk/tests/test_trainer.py
index d06d0341..a11c8326 100644
--- a/src/fireworks/training/sdk/tests/test_trainer.py
+++ b/src/fireworks/training/sdk/tests/test_trainer.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import logging
+from datetime import timedelta
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -137,6 +138,41 @@ def test_manual_path_sends_all_fields(self, mgr):
         assert payload["nodeCount"] == 4
         assert tc["region"] == "US_OHIO_1"
 
+    def test_inactivity_cleanup_fields(self, mgr):
+        config = TrainerJobConfig(
+            base_model="accounts/test/models/m",
+            training_shape_ref="accounts/test-account/trainingShapes/ts-test/versions/shape-v1",
+            inactivity_timeout=timedelta(minutes=30),
+            disable_inactivity_cleanup=True,
+        )
+        resp = MagicMock()
+        resp.is_success = True
+        resp.status_code = 200
+        resp.json.return_value = {"name": "j"}
+        mgr._post = MagicMock(return_value=resp)
+
+        mgr._create(config)
+
+        payload = mgr._post.call_args[1]["json"]
+        assert payload["inactivityTimeout"] == "1800s"
+        assert payload["disableInactivityCleanup"] is True
+
+    def test_inactivity_timeout_accepts_proto_duration_string(self, mgr):
+        config = TrainerJobConfig(
+            base_model="accounts/test/models/m",
+            inactivity_timeout="7200s",
+        )
+        resp = MagicMock()
+        resp.is_success = True
+        resp.status_code = 200
+        resp.json.return_value = {"name": "j"}
+        mgr._post = MagicMock(return_value=resp)
+
+        mgr._create(config)
+
+        payload = mgr._post.call_args[1]["json"]
+        assert payload["inactivityTimeout"] == "7200s"
+
     def test_extra_args_flattened(self, mgr):
         config = TrainerJobConfig(
             base_model="accounts/test/models/m",
@@ -666,6 +702,22 @@ def test_explicit_one_gradient_accumulation_steps_warns(self, caplog):
             for rec in caplog.records
         )
 
+    def test_rejects_negative_inactivity_timeout(self):
+        config = TrainerJobConfig(
+            base_model="accounts/test/models/m",
+            inactivity_timeout=timedelta(seconds=-1),
+        )
+        with pytest.raises(ValueError, match="inactivity_timeout"):
+            config.validate()
+
+    def test_rejects_invalid_inactivity_timeout_string(self):
+        config = TrainerJobConfig(
+            base_model="accounts/test/models/m",
+            inactivity_timeout="30m",
+        )
+        with pytest.raises(ValueError, match="protobuf JSON duration"):
+            config.validate()
+
 
 # ---------------------------------------------------------------------------
 # _check_healthz — uses persistent session
diff --git a/src/fireworks/training/sdk/trainer.py b/src/fireworks/training/sdk/trainer.py
index 3d039c15..e704ad2b 100644
--- a/src/fireworks/training/sdk/trainer.py
+++ b/src/fireworks/training/sdk/trainer.py
@@ -8,11 +8,12 @@
 
 from __future__ import annotations
 
+import logging
 import re
 import time
-import logging
-from typing import Any
 from dataclasses import dataclass
+from datetime import timedelta
+from typing import Any
 from urllib.parse import urlencode
 
 from fireworks.training.sdk.errors import (
@@ -28,6 +29,30 @@
 
 _SHAPE_OWNED_FIELDS = ("accelerator_type", "accelerator_count", "custom_image_tag", "node_count")
 _POLL_LOG_HEARTBEAT_S = 60.0
+_PROTO_DURATION_RE = re.compile(r"^(?P<sign>-?)(?P<seconds>\d+)(\.\d{1,9})?s$")
+
+
+def _format_proto_duration(value: timedelta | str) -> str:
+    """Format a non-negative duration for protobuf JSON REST fields."""
+    if isinstance(value, timedelta):
+        total_seconds = value.total_seconds()
+        if total_seconds < 0:
+            raise ValueError("must be non-negative")
+        if total_seconds.is_integer():
+            return f"{int(total_seconds)}s"
+        return f"{total_seconds:.9f}".rstrip("0").rstrip(".") + "s"
+
+    if isinstance(value, str):
+        if not _PROTO_DURATION_RE.match(value):
+            raise ValueError(
+                "must be a protobuf JSON duration string such as '1800s'; "
+                "use datetime.timedelta for minute/hour values"
+            )
+        if value.startswith("-"):
+            raise ValueError("must be non-negative")
+        return value
+
+    raise TypeError("must be datetime.timedelta or protobuf JSON duration string")
 
 
 def _extract_job_status_message(job: dict[str, Any]) -> str:
@@ -144,6 +169,21 @@ class TrainerJobConfig:
     must not be set.
     """
     forward_only: bool = False
+    inactivity_timeout: timedelta | str | None = None
+    """Trainer inactivity timeout.
+
+    The trainer reports tracked activity, including trainer API operations and
+    active-session heartbeats. If no tracked activity is observed for this
+    duration, the trainer is automatically stopped. When unset or 0, Fireworks
+    uses the 60-minute default. Use ``disable_inactivity_cleanup=True`` to
+    disable automatic cleanup.
+    """
+    disable_inactivity_cleanup: bool = False
+    """Disable trainer inactivity cleanup.
+
+    When true, the trainer is not automatically stopped due to inactivity. GPU
+    usage continues to accrue while the trainer is running.
+    """
     skip_validations: bool = False
     """Skip server-side shape validation. Requires superuser API key."""
     purpose: str | None = None
@@ -172,6 +212,11 @@ def validate(self) -> None:
                 "(multiple forward_backward calls per optim_step) and pass "
                 "grad_accumulation_normalization on the optim_step request."
             )
+        if self.inactivity_timeout is not None:
+            try:
+                _format_proto_duration(self.inactivity_timeout)
+            except (TypeError, ValueError) as e:
+                errors.append(f"inactivity_timeout {e}")
         if self.training_shape_ref:
             for field in _SHAPE_OWNED_FIELDS:
                 val = getattr(self, field)
@@ -328,6 +373,10 @@ def _create(self, config: TrainerJobConfig) -> dict:
             payload["purpose"] = config.purpose
         if config.managed_by:
             payload["managedBy"] = config.managed_by
+        if config.inactivity_timeout is not None:
+            payload["inactivityTimeout"] = _format_proto_duration(config.inactivity_timeout)
+        if config.disable_inactivity_cleanup:
+            payload["disableInactivityCleanup"] = True
 
         logger.info("Creating RLOR job: POST %s (model=%s) (payload=%s)", f"{self.base_url}{path}", config.base_model, payload)
         resp = self._post(path, json=payload, timeout=60)

From fadcec507fa24427fb5cc6ad1e8d0f238ab34a37 Mon Sep 17 00:00:00 2001
From: Chengxi Li <114854555+Hecate0821@users.noreply.github.com>
Date: Wed, 20 May 2026 22:14:14 -0700
Subject: [PATCH 06/10] ci: finish public SDK release workflow (#26259)

---
 .github/workflows/ci-e2e.yml          |  40 -------
 .github/workflows/ci-security.yml     |  28 -----
 .github/workflows/ci.yml              | 147 ++++----------------------
 .github/workflows/release-tag.yml     | 121 +++++++++++++++++++++
 noxfile.py                            |  22 +++-
 src/fireworks/training/sdk/trainer.py |   6 +-
 6 files changed, 159 insertions(+), 205 deletions(-)
 delete mode 100644 .github/workflows/ci-e2e.yml
 delete mode 100644 .github/workflows/ci-security.yml
 create mode 100644 .github/workflows/release-tag.yml

diff --git a/.github/workflows/ci-e2e.yml b/.github/workflows/ci-e2e.yml
deleted file mode 100644
index 50e619b6..00000000
--- a/.github/workflows/ci-e2e.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-name: E2E Tests
-on:
-  schedule:
-    - cron: '0 6 * * *'
-  workflow_dispatch:
-  push:
-    branches:
-      - main
-
-jobs:
-  e2e:
-    if: github.repository == 'fw-ai-external/python-sdk'
-    timeout-minutes: 15
-    name: e2e (${{ matrix.python-version }})
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ['3.9', '3.11', '3.13']
-    steps:
-      - uses: actions/checkout@v6
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install Rye
-        run: |
-          curl -sSf https://rye.astral.sh/get | bash
-          echo "$HOME/.rye/shims" >> $GITHUB_PATH
-        env:
-          RYE_VERSION: '0.44.0'
-          RYE_INSTALL_OPTION: '--yes'
-
-      - name: Install dependencies
-        run: rye sync --all-features
-
-      - name: Run e2e tests
-        run: rye run pytest tests/ -m e2e --timeout=60 -v
-        env:
-          FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_E2E_API_KEY }}
diff --git a/.github/workflows/ci-security.yml b/.github/workflows/ci-security.yml
deleted file mode 100644
index ce93b812..00000000
--- a/.github/workflows/ci-security.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: Security Audit
-
-on:
-  schedule:
-    - cron: "0 0 * * 1"
-  pull_request:
-    paths:
-      - "requirements*.lock"
-      - "pyproject.toml"
-  workflow_dispatch:
-
-jobs:
-  audit:
-    runs-on: ubuntu-latest
-    timeout-minutes: 10
-    steps:
-      - uses: actions/checkout@v6
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install pip-audit
-        run: pip install pip-audit
-
-      - name: Run pip-audit
-        run: pip-audit -r requirements.lock --desc on
-        continue-on-error: false
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d43f8726..bf6dc9a6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,4 +1,10 @@
 name: CI
+
+# Minimal safety net for direct PRs. The authoritative CI suite
+# (lint, build, unit + mock-server matrix, coverage, security audit,
+# e2e) runs upstream before promotion, so this workflow only needs
+# to catch the rare direct edit.
+
 on:
   push:
     branches:
@@ -7,10 +13,10 @@ on:
 
 jobs:
   lint:
-    timeout-minutes: 10
     name: lint
     runs-on: ubuntu-latest
-    if: (github.event_name == 'push' || github.event.pull_request.head.repo.fork) && (github.event_name != 'push' || github.event.head_commit.message != 'codegen metadata')
+    timeout-minutes: 10
+    if: github.event.head_commit.message != 'codegen metadata'
     steps:
       - uses: actions/checkout@v6
 
@@ -28,137 +34,20 @@ jobs:
       - name: Run lints
         run: ./scripts/lint
 
-  build:
-    if: (github.event_name == 'push' || github.event.pull_request.head.repo.fork) && (github.event_name != 'push' || github.event.head_commit.message != 'codegen metadata')
-    timeout-minutes: 10
-    name: build
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v6
-
-      - name: Install Rye
-        run: |
-          curl -sSf https://rye.astral.sh/get | bash
-          echo "$HOME/.rye/shims" >> $GITHUB_PATH
-        env:
-          RYE_VERSION: '0.44.0'
-          RYE_INSTALL_OPTION: '--yes'
-
-      - name: Install dependencies
-        run: rye sync --all-features
-
-      - name: Run build
-        run: rye build
-
-  test:
-    timeout-minutes: 10
-    name: test
+  import-smoke:
+    name: import-smoke
     runs-on: ubuntu-latest
-    if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
+    timeout-minutes: 5
+    if: github.event.head_commit.message != 'codegen metadata'
     steps:
       - uses: actions/checkout@v6
 
-      - name: Install Rye
-        run: |
-          curl -sSf https://rye.astral.sh/get | bash
-          echo "$HOME/.rye/shims" >> $GITHUB_PATH
-        env:
-          RYE_VERSION: '0.44.0'
-          RYE_INSTALL_OPTION: '--yes'
-
-      - name: Bootstrap
-        run: ./scripts/bootstrap
-
-      - name: Run tests
-        run: ./scripts/test
-
-  test-mock-server:
-    timeout-minutes: 15
-    name: test-mock-server (Python ${{ matrix.python-version }})
-    runs-on: ubuntu-latest
-    if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ['3.9', '3.11', '3.13']
-    steps:
-      - uses: actions/checkout@v6
-
-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@v6
         with:
-          python-version: ${{ matrix.python-version }}
+          python-version: '3.11'
 
-      - uses: actions/setup-node@v4
-        with:
-          node-version: '20'
+      - name: Install package
+        run: pip install -e . 2>/dev/null || pip install -e .[training]
 
-      - name: Install Prism mock server
-        run: npm install -g @stoplight/prism-cli
-
-      - name: Download OpenAPI spec
-        run: |
-          SPEC_URL=$(grep 'openapi_spec_url' .stats.yml | sed 's/openapi_spec_url: //')
-          curl -sSL "$SPEC_URL" -o openapi-spec.yml
-
-      - name: Start Prism mock server
-        run: |
-          prism mock openapi-spec.yml --port 4010 --host 127.0.0.1 &
-          # Wait for Prism to be ready
-          for i in $(seq 1 30); do
-            if curl -s http://127.0.0.1:4010 > /dev/null 2>&1; then
-              echo "Prism is ready"
-              break
-            fi
-            sleep 1
-          done
-          curl -s http://127.0.0.1:4010 > /dev/null 2>&1 || { echo "Prism failed to start"; exit 1; }
-
-      - name: Install dependencies
-        run: |
-          pip install -e ".[training]" 2>/dev/null || pip install -e .
-          pip install pytest pytest-asyncio pytest-timeout respx aiohttp httpx_aiohttp dirty-equals
-
-      - name: Run mock server tests
-        env:
-          RUN_MOCK_SERVER_TESTS: 'true'
-          TEST_API_BASE_URL: http://127.0.0.1:4010
-          DEFER_PYDANTIC_BUILD: 'false'
-        run: |
-          python -m pytest tests/api_resources/ -v --timeout=30 -x
-
-  test-coverage:
-    timeout-minutes: 15
-    name: test-coverage
-    runs-on: ubuntu-latest
-    if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
-    steps:
-      - uses: actions/checkout@v6
-
-      - name: Install Rye
-        run: |
-          curl -sSf https://rye.astral.sh/get | bash
-          echo "$HOME/.rye/shims" >> $GITHUB_PATH
-        env:
-          RYE_VERSION: '0.44.0'
-          RYE_INSTALL_OPTION: '--yes'
-
-      - name: Install dependencies
-        run: rye sync --all-features
-
-      - name: Run tests with coverage
-        env:
-          DEFER_PYDANTIC_BUILD: 'false'
-        run: |
-          rye run pytest tests/ \
-            --ignore=tests/api_resources \
-            --cov=fireworks \
-            --cov-report=term-missing \
-            --cov-report=xml:coverage.xml \
-            --cov-fail-under=70
-
-      - name: Upload coverage report
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: coverage-report
-          path: coverage.xml
+      - name: Smoke import
+        run: python -c "import fireworks; print('fireworks', getattr(fireworks, '__version__', '<unknown>'))"
diff --git a/.github/workflows/release-tag.yml b/.github/workflows/release-tag.yml
new file mode 100644
index 00000000..bc70052a
--- /dev/null
+++ b/.github/workflows/release-tag.yml
@@ -0,0 +1,121 @@
+name: Release Tag
+
+# Fires when a "release: X.Y.Z" commit lands on main (typically via
+# squash-merge of an upstream-prepared release PR; subject has no "v"
+# prefix, the tag does). Creates the matching git tag vX.Y.Z and a
+# GitHub Release, which is what publish-pypi.yml listens for. The
+# GitHub Release MUST be created with FW_AI_BOT_TOKEN (a PAT) rather
+# than github.token, because Release events triggered by github.token
+# do not cascade into other workflows — using github.token would
+# silently break the publish chain. Idempotent — the tag/release won't
+# be recreated if they already exist.
+
+on:
+  push:
+    branches:
+      - main
+  workflow_dispatch:
+    inputs:
+      version:
+        description: Version to tag (e.g. 1.2.0-alpha.72); leave blank to read manifest
+        type: string
+        required: false
+
+permissions:
+  contents: write
+
+jobs:
+  tag:
+    if: github.repository == 'fw-ai-external/python-sdk'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.FW_AI_BOT_TOKEN || github.token }}
+
+      - name: Determine version
+        id: version
+        env:
+          DISPATCH_VERSION: ${{ github.event.inputs.version }}
+        run: |
+          set -euo pipefail
+
+          if [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" && -n "${DISPATCH_VERSION}" ]]; then
+            version="${DISPATCH_VERSION}"
+          elif [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ]]; then
+            # Manual recovery dispatch with no version: read the manifest.
+            version="$(python3 -c 'import json; print(json.load(open(".release-please-manifest.json"))["."])')"
+          else
+            # Push to main: the commit subject is the source of truth.
+            # The optional " (#NNN)" suffix is what GitHub appends on
+            # squash-merge; capture only the version token.
+            subject="$(git log -1 --format=%s)"
+            if [[ ! "${subject}" =~ ^release:\ +([0-9][0-9A-Za-z._-]*)(\ +\(#[0-9]+\))?$ ]]; then
+              echo "Most recent commit is not a release commit; nothing to tag."
+              echo "skip=true" >> "$GITHUB_OUTPUT"
+              exit 0
+            fi
+            version="${BASH_REMATCH[1]}"
+          fi
+
+          # Belt-and-suspenders: validate the version regardless of source
+          # (push subject, explicit dispatch input, or manifest fallback).
+          if [[ ! "${version}" =~ ^[0-9][0-9A-Za-z._-]*$ ]]; then
+            echo "::error::Invalid release version: ${version}"
+            exit 1
+          fi
+
+          tag="v${version}"
+          if git rev-parse -q --verify "refs/tags/${tag}" >/dev/null; then
+            echo "Tag ${tag} already exists; skipping."
+            echo "skip=true" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          {
+            echo "skip=false"
+            echo "version=${version}"
+            echo "tag=${tag}"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Require FW_AI_BOT_TOKEN
+        if: steps.version.outputs.skip != 'true'
+        env:
+          BOT_TOKEN: ${{ secrets.FW_AI_BOT_TOKEN }}
+        run: |
+          [[ -n "${BOT_TOKEN}" ]] || { echo "::error::FW_AI_BOT_TOKEN is required"; exit 1; }
+
+      - name: Create tag and GitHub Release
+        if: steps.version.outputs.skip != 'true'
+        env:
+          GH_TOKEN: ${{ secrets.FW_AI_BOT_TOKEN }}
+          VERSION: ${{ steps.version.outputs.version }}
+          TAG: ${{ steps.version.outputs.tag }}
+        run: |
+          set -euo pipefail
+
+          # Extract this version's CHANGELOG section as the release notes.
+          notes_file="$(mktemp)"
+          awk -v ver="${VERSION}" '
+            $0 ~ "^## " ver "($| )" { capture = 1; print; next }
+            capture && /^## / { exit }
+            capture { print }
+          ' CHANGELOG.md > "${notes_file}"
+
+          # Tag this commit and push.
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git tag -a "${TAG}" -m "${TAG}"
+          git push origin "${TAG}"
+
+          # Pre-1.0 prereleases stay marked as prerelease.
+          prerelease_flag=""
+          if [[ "${VERSION}" == *-alpha.* || "${VERSION}" == *-beta.* || "${VERSION}" == *-rc.* ]]; then
+            prerelease_flag="--prerelease"
+          fi
+
+          gh release create "${TAG}" \
+            --title "${TAG}" \
+            --notes-file "${notes_file}" \
+            ${prerelease_flag}
diff --git a/noxfile.py b/noxfile.py
index 25dd61f3..c2293764 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -3,16 +3,28 @@
 
 import nox
 
-_TRAINING_ONLY_PACKAGES = ("tinker==", "tinker-cookbook==")
+_TRAINING_ONLY_PACKAGES = (
+    "tinker==",
+    "tinker-cookbook==",
+    "torch==",
+    "triton==",
+    "transformers==",
+    "datasets==",
+    "tiktoken==",
+    "wandb==",
+    "nvidia-",
+)
 
 
 def _install_dev_deps_without_training_extras(session: nox.Session) -> None:
     """Install lockfile deps while skipping training-only extras.
 
-    `test-pydantic-v1` runs on Python 3.9 to validate base SDK compatibility.
-    Training extras (`tinker`, `tinker-cookbook`) currently require newer
-    Python versions, so we exclude only those packages from the shared dev
-    lockfile for this session.
+    `test-pydantic-v1` validates base SDK compatibility on Python 3.9; it does
+    not need the training stack. Skip tinker / tinker-cookbook (which require
+    newer Python) plus the heavy ML wheels (torch, triton, transformers,
+    datasets, tiktoken, wandb, and the nvidia-* CUDA wheels) that would
+    otherwise be reinstalled into a second venv on top of the .venv
+    `rye sync --all-features` already produced, overflowing runner disk.
     """
     lockfile_lines = Path("requirements-dev.lock").read_text(encoding="utf-8").splitlines()
     filtered_lines = [
diff --git a/src/fireworks/training/sdk/trainer.py b/src/fireworks/training/sdk/trainer.py
index e704ad2b..cf823dfa 100644
--- a/src/fireworks/training/sdk/trainer.py
+++ b/src/fireworks/training/sdk/trainer.py
@@ -8,12 +8,12 @@
 
 from __future__ import annotations
 
-import logging
 import re
 import time
-from dataclasses import dataclass
-from datetime import timedelta
+import logging
 from typing import Any
+from datetime import timedelta
+from dataclasses import dataclass
 from urllib.parse import urlencode
 
 from fireworks.training.sdk.errors import (

From 7ac0ed8d35854733f31cc607d7a798f0bdc5c62d Mon Sep 17 00:00:00 2001
From: Chengxi Li <114854555+Hecate0821@users.noreply.github.com>
Date: Wed, 20 May 2026 23:09:59 -0700
Subject: [PATCH 07/10] fix(public-sdk): restore embedding client wiped by
 1.2.0-alpha.72 release (#26423)

---
 src/fireworks/training/sdk/client.py          | 298 +++++++++++++++++-
 .../training/sdk/tests/test_client.py         | 271 +++++++++++++++-
 2 files changed, 548 insertions(+), 21 deletions(-)

diff --git a/src/fireworks/training/sdk/client.py b/src/fireworks/training/sdk/client.py
index d081bd86..7f8057b7 100644
--- a/src/fireworks/training/sdk/client.py
+++ b/src/fireworks/training/sdk/client.py
@@ -19,17 +19,20 @@
 import uuid
 import logging
 from enum import Enum
-from typing import Literal, TypeVar, Callable, Optional
+from typing import Any, Literal, TypeVar, Callable, Optional
 from dataclasses import dataclass
 
 from tinker import types
 from pydantic import BaseModel
-from tinker.lib.api_future_impl import _APIFuture
+from tinker.lib.api_future_impl import _APIFuture, _CombinedAPIFuture
 from tinker.lib.queue_state_logger import QueueStateLogger
 from tinker.lib.client_connection_pool_type import ClientConnectionPoolType
 from tinker.lib.public_interfaces.api_future import APIFuture
 from tinker.lib.public_interfaces.service_client import ServiceClient
-from tinker.lib.public_interfaces.training_client import TrainingClient
+from tinker.lib.public_interfaces.training_client import (
+    TrainingClient,
+    combine_fwd_bwd_output_results,
+)
 
 
 class LoadAdapterResponse(BaseModel):
@@ -41,6 +44,7 @@ class LoadAdapterResponse(BaseModel):
 
     model_config = {"protected_namespaces": ()}
 
+
 logger = logging.getLogger(__name__)
 T = TypeVar("T")
 
@@ -86,8 +90,12 @@ def make_cross_job_checkpoint_ref(*, source_job_id: str, checkpoint_name: str) -
         raise ValueError("source_job_id cannot be empty")
     if not normalized_checkpoint_name:
         raise ValueError("checkpoint_name cannot be empty")
-    if normalized_checkpoint_name.startswith("gs://") or normalized_checkpoint_name.startswith("/"):
-        raise ValueError("checkpoint_name must be a logical checkpoint name, not a full path")
+    if normalized_checkpoint_name.startswith(
+        "gs://"
+    ) or normalized_checkpoint_name.startswith("/"):
+        raise ValueError(
+            "checkpoint_name must be a logical checkpoint name, not a full path"
+        )
     return f"{CROSS_JOB_CHECKPOINT_REF_PREFIX}{normalized_source_job_id}/{normalized_checkpoint_name}"
 
 
@@ -163,6 +171,42 @@ def _add_cross_entropy_response_tokens(
     return output
 
 
+def _dump_tinker_model(obj: Any) -> Any:
+    if hasattr(obj, "model_dump"):
+        return obj.model_dump(exclude_unset=True, mode="json")
+    if hasattr(obj, "dict"):
+        return obj.dict(exclude_unset=True)
+    return obj
+
+
+def _text_token_count(datum: types.Datum) -> int:
+    raw_datum = _dump_tinker_model(datum)
+    model_input = raw_datum.get("model_input", {})
+    return sum(
+        len(chunk.get("tokens", []))
+        for chunk in model_input.get("chunks", [])
+        if chunk.get("type", "encoded_text") == "encoded_text"
+    )
+
+
+def _pool_embedding_tensor(
+    embedding,
+    datum: types.Datum,
+    pooling: Literal["mean", "last"],
+):
+    if embedding.ndim <= 1:
+        return embedding
+    token_count = _text_token_count(datum)
+    if token_count <= 0:
+        raise ValueError("Cannot pool embedding from an empty text sequence")
+    token_embeddings = embedding[:token_count]
+    if pooling == "mean":
+        return token_embeddings.mean(dim=0)
+    if pooling == "last":
+        return token_embeddings[-1]
+    raise ValueError(f"Unsupported pooling={pooling!r}; expected 'mean' or 'last'")
+
+
 # -- SaveSamplerResult ---------------------------------------------------------
 
 
@@ -209,7 +253,12 @@ class FiretitanTrainingClient(TrainingClient):
     tinker.TrainingClient.
     """
 
-    def __init__(self, holder, model_seq_id: int, model_id):
+    def __init__(
+        self,
+        holder,
+        model_seq_id: int,
+        model_id,
+    ):
         super().__init__(holder=holder, model_seq_id=model_seq_id, model_id=model_id)
         # Track checkpoint names to detect reuse within a session.
         # Sampler and state names are tracked separately because the same name
@@ -260,7 +309,9 @@ def optim_step(
         """
         extra_body: dict = {}
         if grad_accumulation_normalization is not None:
-            extra_body["grad_accumulation_normalization"] = grad_accumulation_normalization.value
+            extra_body["grad_accumulation_normalization"] = (
+                grad_accumulation_normalization.value
+            )
         request_id = self._get_request_id()
 
         async def _optim_step_async():
@@ -306,6 +357,231 @@ def forward_backward(
             lambda output: _add_cross_entropy_response_tokens(output, data=data),
         )
 
+    async def _send_single_forward_embedding_request(
+        self,
+        request_id: int,
+        data: list[types.Datum],
+        pooling: Literal["mean", "last"],
+    ):
+        request = types.ForwardRequest(
+            forward_input=types.ForwardBackwardInput(
+                data=data,
+                loss_fn="cross_entropy",
+                loss_fn_config=None,
+            ),
+            model_id=self._guaranteed_model_id(),
+            seq_id=request_id + 1,
+        )
+        extra_body = {
+            "forward_input": {
+                "data": [_dump_tinker_model(datum) for datum in data],
+                "loss_fn": "cross_entropy",
+                "loss_fn_config": {"output": "embedding", "pooling": pooling},
+            }
+        }
+        with self.holder.aclient(ClientConnectionPoolType.TRAIN) as client:
+            return await client.training.forward(
+                request=request,
+                extra_body=extra_body,
+            )
+
+    async def _send_single_forward_backward_embedding_request(
+        self,
+        request_id: int,
+        data: list[types.Datum],
+        pooling: Literal["mean", "last"],
+    ):
+        request = types.ForwardBackwardRequest(
+            forward_backward_input=types.ForwardBackwardInput(
+                data=data,
+                loss_fn="cross_entropy",
+                loss_fn_config=None,
+            ),
+            model_id=self._guaranteed_model_id(),
+            seq_id=request_id + 1,
+        )
+        extra_body = {
+            "forward_backward_input": {
+                "data": [_dump_tinker_model(datum) for datum in data],
+                "loss_fn": "cross_entropy",
+                "loss_fn_config": {"output": "embedding", "pooling": pooling},
+            }
+        }
+        with self.holder.aclient(ClientConnectionPoolType.TRAIN) as client:
+            return await client.training.forward_backward(
+                request=request,
+                extra_body=extra_body,
+            )
+
+    async def _forward_embedding_async(
+        self,
+        data: list[types.Datum],
+        pooling: Literal["mean", "last"],
+    ) -> APIFuture[types.ForwardBackwardOutput]:
+        requests = self._chunked_requests(data)
+        futures = []
+        start_time = time.time()
+        for request_id, chunk in requests:
+            async with self._take_turn(request_id):
+                untyped_future = await self.holder.execute_with_retries(
+                    self._send_single_forward_embedding_request,
+                    request_id,
+                    chunk,
+                    pooling,
+                )
+            futures.append(
+                _APIFuture(
+                    types.ForwardBackwardOutput,
+                    self.holder,
+                    untyped_future,
+                    request_start_time=start_time,
+                    request_type="Forward",
+                    queue_state_observer=self._queue_state_logger,
+                )
+            )
+        return _CombinedAPIFuture(futures, combine_fwd_bwd_output_results, self.holder)
+
+    async def _forward_backward_embedding_async(
+        self,
+        data: list[types.Datum],
+        pooling: Literal["mean", "last"],
+    ) -> APIFuture[types.ForwardBackwardOutput]:
+        requests = self._chunked_requests(data)
+        futures = []
+        start_time = time.time()
+        for request_id, chunk in requests:
+            async with self._take_turn(request_id):
+                untyped_future = await self.holder.execute_with_retries(
+                    self._send_single_forward_backward_embedding_request,
+                    request_id,
+                    chunk,
+                    pooling,
+                )
+            futures.append(
+                _APIFuture(
+                    types.ForwardBackwardOutput,
+                    self.holder,
+                    untyped_future,
+                    request_start_time=start_time,
+                    request_type="ForwardBackward",
+                    queue_state_observer=self._queue_state_logger,
+                )
+            )
+        return _CombinedAPIFuture(futures, combine_fwd_bwd_output_results, self.holder)
+
+    async def forward_backward_custom_async(
+        self,
+        data: list[types.Datum],
+        loss_fn: Callable,
+        *,
+        loss_type_input: Literal["logprobs"] = "logprobs",
+        output: Literal["logprobs", "embedding"] = "logprobs",
+        pooling: Literal["mean", "last"] = "mean",
+    ) -> APIFuture[types.ForwardBackwardOutput]:
+        if output == "logprobs":
+            return await super().forward_backward_custom_async(
+                data,
+                loss_fn,
+                loss_type_input=loss_type_input,
+            )
+        if output != "embedding":
+            raise ValueError(
+                f"Unsupported output={output!r}; expected 'logprobs' or 'embedding'"
+            )
+        if loss_type_input != "logprobs":
+            raise ValueError(
+                "Set output='embedding' instead of loss_type_input for embedding custom loss."
+            )
+        if pooling not in ("mean", "last"):
+            raise ValueError(
+                f"Unsupported pooling={pooling!r}; expected 'mean' or 'last'"
+            )
+
+        try:
+            import torch
+        except ImportError as err:
+            raise ImportError(
+                "PyTorch is not installed. Cannot run custom forward_backward."
+            ) from err
+
+        forward_future = await self._forward_embedding_async(data, pooling)
+        forward_result = await forward_future.result_async()
+
+        embeddings = []
+        for datum, out in zip(data, forward_result.loss_fn_outputs, strict=True):
+            if "embedding" not in out:
+                raise ValueError("Embedding response missing 'embedding' tensor")
+            embedding_data = out["embedding"]
+            embedding = torch.tensor(embedding_data.data, dtype=torch.float32)
+            if embedding_data.shape is not None:
+                embedding = embedding.reshape(embedding_data.shape)
+            embedding = _pool_embedding_tensor(embedding, datum, pooling)
+            embeddings.append(embedding.clone().detach().requires_grad_(True))
+
+        loss, metrics = loss_fn(data, embeddings)
+        loss.backward()
+
+        backward_data = []
+        for datum, embedding in zip(data, embeddings, strict=True):
+            if embedding.grad is None:
+                raise ValueError("No gradient computed for embedding tensor")
+            grad = (
+                embedding.grad.detach()
+                .to(dtype=torch.float32)
+                .reshape(-1)
+                .cpu()
+                .tolist()
+            )
+            backward_data.append(
+                types.Datum(
+                    model_input=datum.model_input,
+                    loss_fn_inputs={
+                        "embedding_grads": types.TensorData(
+                            data=grad,
+                            dtype="float32",
+                            shape=list(embedding.grad.shape),
+                        )
+                    },
+                )
+            )
+
+        backward_future = await self._forward_backward_embedding_async(
+            backward_data, pooling
+        )
+
+        def add_custom_metrics(
+            output_value: types.ForwardBackwardOutput,
+        ) -> types.ForwardBackwardOutput:
+            output_value.metrics.update(metrics)
+            return output_value
+
+        return _MappedAPIFuture(backward_future, add_custom_metrics)
+
+    def forward_backward_custom(
+        self,
+        data: list[types.Datum],
+        loss_fn: Callable,
+        *,
+        loss_type_input: Literal["logprobs"] = "logprobs",
+        output: Literal["logprobs", "embedding"] = "logprobs",
+        pooling: Literal["mean", "last"] = "mean",
+    ) -> APIFuture[types.ForwardBackwardOutput]:
+        if output == "logprobs":
+            return super().forward_backward_custom(
+                data,
+                loss_fn,
+                loss_type_input=loss_type_input,
+            )
+        return self.holder.run_coroutine_threadsafe(
+            self.forward_backward_custom_async(
+                data,
+                loss_fn,
+                loss_type_input=loss_type_input,
+                output=output,
+                pooling=pooling,
+            )
+        ).result()
+
     def list_checkpoints(self) -> list[str]:
         """List available DCP checkpoints from the trainer.
 
@@ -659,7 +935,9 @@ async def _create():
                 future,
                 request_start_time=start,
                 request_type="CreateModel",
-                queue_state_observer=QueueStateLogger(base_model, "Base model creation"),
+                queue_state_observer=QueueStateLogger(
+                    base_model, "Base model creation"
+                ),
             ).result_async()
             return resp.model_id
 
@@ -678,4 +956,6 @@ def create_sampling_client(
         base_model=None,
         retry_config=None,
     ):
-        raise NotImplementedError("FiretitanServiceClient.create_sampling_client() is not supported")
+        raise NotImplementedError(
+            "FiretitanServiceClient.create_sampling_client() is not supported"
+        )
diff --git a/src/fireworks/training/sdk/tests/test_client.py b/src/fireworks/training/sdk/tests/test_client.py
index 5dde414e..e536bfec 100644
--- a/src/fireworks/training/sdk/tests/test_client.py
+++ b/src/fireworks/training/sdk/tests/test_client.py
@@ -2,9 +2,11 @@
 
 from __future__ import annotations
 
+import asyncio
 import logging
 from unittest.mock import MagicMock, patch
 
+import torch
 import pytest
 from tinker import types
 
@@ -40,7 +42,9 @@ def test_unique_across_calls(self):
 
 class TestQualifySnapshotName:
     def test_basic(self):
-        assert qualify_snapshot_name("a1b2c3d4", "step-0-base") == "step-0-base-a1b2c3d4"
+        assert (
+            qualify_snapshot_name("a1b2c3d4", "step-0-base") == "step-0-base-a1b2c3d4"
+        )
 
     def test_separator_is_dash(self):
         result = qualify_snapshot_name("deadbeef", "ckpt")
@@ -64,14 +68,18 @@ def _make_client(self):
     def test_first_use_no_warning(self, caplog):
         client = self._make_client()
         with caplog.at_level(logging.WARNING):
-            client._warn_if_name_reused("step-0", client._saved_sampler_names, "Sampler")
+            client._warn_if_name_reused(
+                "step-0", client._saved_sampler_names, "Sampler"
+            )
         assert "already used" not in caplog.text
 
     def test_duplicate_warns(self, caplog):
         client = self._make_client()
         client._saved_sampler_names.add("step-0")
         with caplog.at_level(logging.WARNING):
-            client._warn_if_name_reused("step-0", client._saved_sampler_names, "Sampler")
+            client._warn_if_name_reused(
+                "step-0", client._saved_sampler_names, "Sampler"
+            )
         assert "already used" in caplog.text
 
 
@@ -153,8 +161,12 @@ def _make_client(self):
         client.session_id = "test1234"
         return client
 
-    @patch("tinker.lib.public_interfaces.training_client.TrainingClient.forward_backward")
-    def test_cross_entropy_adds_response_tokens_from_weights(self, mock_forward_backward):
+    @patch(
+        "tinker.lib.public_interfaces.training_client.TrainingClient.forward_backward"
+    )
+    def test_cross_entropy_adds_response_tokens_from_weights(
+        self, mock_forward_backward
+    ):
         client = self._make_client()
         future = MagicMock()
         future.result.return_value = types.ForwardBackwardOutput(
@@ -165,8 +177,12 @@ def test_cross_entropy_adds_response_tokens_from_weights(self, mock_forward_back
         mock_forward_backward.return_value = future
         datum = MagicMock()
         datum.loss_fn_inputs = {
-            "weights": types.TensorData(data=[0.0, 1.0, 1.0, 0.0], dtype="float32", shape=[4]),
-            "target_tokens": types.TensorData(data=[10, 11, 12, 13], dtype="int64", shape=[4]),
+            "weights": types.TensorData(
+                data=[0.0, 1.0, 1.0, 0.0], dtype="float32", shape=[4]
+            ),
+            "target_tokens": types.TensorData(
+                data=[10, 11, 12, 13], dtype="int64", shape=[4]
+            ),
         }
 
         result = client.forward_backward([datum], "cross_entropy").result()
@@ -174,8 +190,12 @@ def test_cross_entropy_adds_response_tokens_from_weights(self, mock_forward_back
         assert result.metrics["response_tokens"] == 2.0
         mock_forward_backward.assert_called_once_with([datum], "cross_entropy", None)
 
-    @patch("tinker.lib.public_interfaces.training_client.TrainingClient.forward_backward")
-    def test_cross_entropy_falls_back_to_target_token_length(self, mock_forward_backward):
+    @patch(
+        "tinker.lib.public_interfaces.training_client.TrainingClient.forward_backward"
+    )
+    def test_cross_entropy_falls_back_to_target_token_length(
+        self, mock_forward_backward
+    ):
         client = self._make_client()
         future = MagicMock()
         future.result.return_value = types.ForwardBackwardOutput(
@@ -186,14 +206,18 @@ def test_cross_entropy_falls_back_to_target_token_length(self, mock_forward_back
         mock_forward_backward.return_value = future
         datum = MagicMock()
         datum.loss_fn_inputs = {
-            "target_tokens": types.TensorData(data=[10, 11, 12], dtype="int64", shape=[3]),
+            "target_tokens": types.TensorData(
+                data=[10, 11, 12], dtype="int64", shape=[3]
+            ),
         }
 
         result = client.forward_backward([datum], "cross_entropy").result()
 
         assert result.metrics["response_tokens"] == 3.0
 
-    @patch("tinker.lib.public_interfaces.training_client.TrainingClient.forward_backward")
+    @patch(
+        "tinker.lib.public_interfaces.training_client.TrainingClient.forward_backward"
+    )
     def test_existing_response_tokens_metric_is_preserved(self, mock_forward_backward):
         client = self._make_client()
         future = MagicMock()
@@ -211,6 +235,227 @@ def test_existing_response_tokens_metric_is_preserved(self, mock_forward_backwar
         assert result.metrics["response_tokens"] == 7.0
 
 
+class TestForwardBackwardCustomEmbedding:
+    def _make_client(self):
+        client = FiretitanTrainingClient.__new__(FiretitanTrainingClient)
+        client._saved_sampler_names = set()
+        client._saved_state_names = set()
+        client.session_id = "test1234"
+        return client
+
+    @patch(
+        "tinker.lib.public_interfaces.training_client.TrainingClient.forward_backward_custom"
+    )
+    def test_logprob_output_delegates_to_upstream_tinker(
+        self, mock_forward_backward_custom
+    ):
+        client = self._make_client()
+        future = MagicMock()
+        mock_forward_backward_custom.return_value = future
+
+        result = client.forward_backward_custom([], MagicMock())
+
+        assert result is future
+        mock_forward_backward_custom.assert_called_once()
+
+    def test_embedding_output_calls_loss_and_sends_embedding_grads(self, monkeypatch):
+        client = self._make_client()
+        datum = types.Datum(
+            model_input=types.ModelInput.from_ints([1, 2]),
+            loss_fn_inputs={},
+        )
+        forward_output = types.ForwardBackwardOutput(
+            loss_fn_output_type="forward",
+            loss_fn_outputs=[
+                {
+                    "embedding": types.TensorData(
+                        data=[1.0, 2.0],
+                        dtype="float32",
+                        shape=[2],
+                    )
+                }
+            ],
+            metrics={},
+        )
+        backward_output = types.ForwardBackwardOutput(
+            loss_fn_output_type="cross_entropy",
+            loss_fn_outputs=[],
+            metrics={"loss:sum": 0.0},
+        )
+        captured = {}
+
+        class _ImmediateFuture:
+            def __init__(self, value):
+                self._value = value
+
+            async def result_async(self, timeout=None):
+                return self._value
+
+            def result(self, timeout=None):
+                return self._value
+
+        async def fake_forward(data, pooling):
+            captured["forward_pooling"] = pooling
+            return _ImmediateFuture(forward_output)
+
+        async def fake_backward(data, pooling):
+            captured["backward_pooling"] = pooling
+            captured["backward_data"] = data
+            return _ImmediateFuture(backward_output)
+
+        monkeypatch.setattr(client, "_forward_embedding_async", fake_forward)
+        monkeypatch.setattr(client, "_forward_backward_embedding_async", fake_backward)
+
+        def loss_fn(data, embeddings):
+            assert data == [datum]
+            return (embeddings[0] * torch.tensor([3.0, -1.0])).sum(), {"custom": 2.0}
+
+        future = asyncio.run(
+            client.forward_backward_custom_async(
+                [datum],
+                loss_fn,
+                output="embedding",
+                pooling="last",
+            )
+        )
+        result = future.result()
+
+        assert result.metrics["custom"] == 2.0
+        assert captured["forward_pooling"] == "last"
+        assert captured["backward_pooling"] == "last"
+        grad_data = captured["backward_data"][0].loss_fn_inputs["embedding_grads"]
+        assert grad_data.data == [3.0, -1.0]
+        assert grad_data.shape == [2]
+
+    def test_embedding_output_pools_sequence_hidden_states(self, monkeypatch):
+        client = self._make_client()
+        datum = types.Datum(
+            model_input=types.ModelInput.from_ints([1, 2]),
+            loss_fn_inputs={},
+        )
+        forward_output = types.ForwardBackwardOutput(
+            loss_fn_output_type="forward",
+            loss_fn_outputs=[
+                {
+                    "embedding": types.TensorData(
+                        data=[1.0, 2.0, 3.0, 4.0, 100.0, 200.0],
+                        dtype="float32",
+                        shape=[3, 2],
+                    )
+                }
+            ],
+            metrics={},
+        )
+        backward_output = types.ForwardBackwardOutput(
+            loss_fn_output_type="cross_entropy",
+            loss_fn_outputs=[],
+            metrics={"loss:sum": 0.0},
+        )
+        captured = {}
+
+        class _ImmediateFuture:
+            def __init__(self, value):
+                self._value = value
+
+            async def result_async(self, timeout=None):
+                return self._value
+
+            def result(self, timeout=None):
+                return self._value
+
+        async def fake_forward(data, pooling):
+            return _ImmediateFuture(forward_output)
+
+        async def fake_backward(data, pooling):
+            captured["backward_data"] = data
+            return _ImmediateFuture(backward_output)
+
+        monkeypatch.setattr(client, "_forward_embedding_async", fake_forward)
+        monkeypatch.setattr(client, "_forward_backward_embedding_async", fake_backward)
+
+        def loss_fn(data, embeddings):
+            assert embeddings[0].tolist() == [3.0, 4.0]
+            return (embeddings[0] * torch.tensor([5.0, -2.0])).sum(), {}
+
+        future = asyncio.run(
+            client.forward_backward_custom_async(
+                [datum],
+                loss_fn,
+                output="embedding",
+                pooling="last",
+            )
+        )
+        future.result()
+
+        grad_data = captured["backward_data"][0].loss_fn_inputs["embedding_grads"]
+        assert grad_data.data == [5.0, -2.0]
+        assert grad_data.shape == [2]
+
+    def test_embedding_output_pools_shaped_sequence_hidden_states(self, monkeypatch):
+        client = self._make_client()
+        datum = types.Datum(
+            model_input=types.ModelInput.from_ints([1, 2]),
+            loss_fn_inputs={},
+        )
+        forward_output = types.ForwardBackwardOutput(
+            loss_fn_output_type="forward",
+            loss_fn_outputs=[
+                {
+                    "embedding": types.TensorData(
+                        data=[1.0, 2.0, 3.0, 4.0, 100.0, 200.0],
+                        dtype="float32",
+                        shape=[3, 2],
+                    )
+                }
+            ],
+            metrics={},
+        )
+        backward_output = types.ForwardBackwardOutput(
+            loss_fn_output_type="cross_entropy",
+            loss_fn_outputs=[],
+            metrics={"loss:sum": 0.0},
+        )
+        captured = {}
+
+        class _ImmediateFuture:
+            def __init__(self, value):
+                self._value = value
+
+            async def result_async(self, timeout=None):
+                return self._value
+
+            def result(self, timeout=None):
+                return self._value
+
+        async def fake_forward(data, pooling):
+            return _ImmediateFuture(forward_output)
+
+        async def fake_backward(data, pooling):
+            captured["backward_data"] = data
+            return _ImmediateFuture(backward_output)
+
+        monkeypatch.setattr(client, "_forward_embedding_async", fake_forward)
+        monkeypatch.setattr(client, "_forward_backward_embedding_async", fake_backward)
+
+        def loss_fn(data, embeddings):
+            assert embeddings[0].tolist() == [3.0, 4.0]
+            return (embeddings[0] * torch.tensor([7.0, -3.0])).sum(), {}
+
+        future = asyncio.run(
+            client.forward_backward_custom_async(
+                [datum],
+                loss_fn,
+                output="embedding",
+                pooling="last",
+            )
+        )
+        future.result()
+
+        grad_data = captured["backward_data"][0].loss_fn_inputs["embedding_grads"]
+        assert grad_data.data == [7.0, -3.0]
+        assert grad_data.shape == [2]
+
+
 # ---------------------------------------------------------------------------
 # FiretitanServiceClient.create_training_client — duplicate detection
 # ---------------------------------------------------------------------------
@@ -236,7 +481,9 @@ def test_different_lora_rank_ok(self):
         svc.holder = MagicMock()
         svc.holder.get_session_id.return_value = 1
         svc.holder.get_training_client_id.return_value = 1
-        svc.holder.run_coroutine_threadsafe.return_value.result.return_value = "model-id"
+        svc.holder.run_coroutine_threadsafe.return_value.result.return_value = (
+            "model-id"
+        )
 
         # Should not raise — different lora_rank is a different config
         try:

From 9d62fe4c45fa91d3780093bb15524ec26600408c Mon Sep 17 00:00:00 2001
From: Chengxi Li <114854555+Hecate0821@users.noreply.github.com>
Date: Thu, 21 May 2026 00:52:16 -0700
Subject: [PATCH 08/10] chore: catch up cookbook + SDK from public main; delete
 dead Stainless CI; autorelease SST + guards (#26426)

---
 .github/workflows/ci.yml             |  53 ------------
 .github/workflows/post-publish.yml   |  43 ----------
 .github/workflows/publish-pypi.yml   |  31 -------
 .github/workflows/release-doctor.yml |  21 -----
 .github/workflows/release-tag.yml    | 121 ---------------------------
 .release-please-manifest.json        |   3 -
 CHANGELOG.md                         |   8 ++
 pyproject.toml                       |   2 +-
 release-please-config.json           |  66 ---------------
 src/fireworks/_version.py            |   2 +-
 10 files changed, 10 insertions(+), 340 deletions(-)
 delete mode 100644 .github/workflows/ci.yml
 delete mode 100644 .github/workflows/post-publish.yml
 delete mode 100644 .github/workflows/publish-pypi.yml
 delete mode 100644 .github/workflows/release-doctor.yml
 delete mode 100644 .github/workflows/release-tag.yml
 delete mode 100644 .release-please-manifest.json
 delete mode 100644 release-please-config.json

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
deleted file mode 100644
index bf6dc9a6..00000000
--- a/.github/workflows/ci.yml
+++ /dev/null
@@ -1,53 +0,0 @@
-name: CI
-
-# Minimal safety net for direct PRs. The authoritative CI suite
-# (lint, build, unit + mock-server matrix, coverage, security audit,
-# e2e) runs upstream before promotion, so this workflow only needs
-# to catch the rare direct edit.
-
-on:
-  push:
-    branches:
-      - '**'
-  pull_request:
-
-jobs:
-  lint:
-    name: lint
-    runs-on: ubuntu-latest
-    timeout-minutes: 10
-    if: github.event.head_commit.message != 'codegen metadata'
-    steps:
-      - uses: actions/checkout@v6
-
-      - name: Install Rye
-        run: |
-          curl -sSf https://rye.astral.sh/get | bash
-          echo "$HOME/.rye/shims" >> $GITHUB_PATH
-        env:
-          RYE_VERSION: '0.44.0'
-          RYE_INSTALL_OPTION: '--yes'
-
-      - name: Install dependencies
-        run: rye sync --all-features
-
-      - name: Run lints
-        run: ./scripts/lint
-
-  import-smoke:
-    name: import-smoke
-    runs-on: ubuntu-latest
-    timeout-minutes: 5
-    if: github.event.head_commit.message != 'codegen metadata'
-    steps:
-      - uses: actions/checkout@v6
-
-      - uses: actions/setup-python@v6
-        with:
-          python-version: '3.11'
-
-      - name: Install package
-        run: pip install -e . 2>/dev/null || pip install -e .[training]
-
-      - name: Smoke import
-        run: python -c "import fireworks; print('fireworks', getattr(fireworks, '__version__', '<unknown>'))"
diff --git a/.github/workflows/post-publish.yml b/.github/workflows/post-publish.yml
deleted file mode 100644
index a3e9c632..00000000
--- a/.github/workflows/post-publish.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-name: Post-Publish Smoke Test
-
-on:
-  workflow_run:
-    workflows: ["Publish PyPI"]
-    types:
-      - completed
-  workflow_dispatch:
-
-jobs:
-  smoke-test:
-    runs-on: ubuntu-latest
-    timeout-minutes: 10
-    if: github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success'
-    strategy:
-      matrix:
-        python-version: ["3.9", "3.11", "3.13"]
-    steps:
-      - uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Wait for PyPI propagation and install
-        run: |
-          for i in $(seq 1 10); do
-            if pip install fireworks-ai 2>/dev/null; then
-              echo "Package available"
-              exit 0
-            fi
-            echo "Attempt $i: not available yet, waiting 30s..."
-            sleep 30
-          done
-          echo "Package not available after 5 minutes"
-          exit 1
-
-      - name: Verify import and version
-        run: python -c "import fireworks; print(f'fireworks-ai {fireworks.__version__} installed successfully')"
-
-      - name: Verify training extras
-        if: matrix.python-version != '3.9'
-        run: |
-          pip install "fireworks-ai[training]"
-          python -c "from fireworks.training import sdk; print('Training SDK OK')"
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
deleted file mode 100644
index f3da1ba3..00000000
--- a/.github/workflows/publish-pypi.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-# This workflow is triggered when a GitHub release is created.
-# It can also be run manually to re-publish to PyPI in case it failed for some reason.
-# You can run this workflow by navigating to https://www.github.com/fw-ai-external/python-sdk/actions/workflows/publish-pypi.yml
-name: Publish PyPI
-on:
-  workflow_dispatch:
-
-  release:
-    types: [published]
-
-jobs:
-  publish:
-    name: publish
-    runs-on: ubuntu-latest
-
-    steps:
-      - uses: actions/checkout@v6
-
-      - name: Install Rye
-        run: |
-          curl -sSf https://rye.astral.sh/get | bash
-          echo "$HOME/.rye/shims" >> $GITHUB_PATH
-        env:
-          RYE_VERSION: '0.44.0'
-          RYE_INSTALL_OPTION: '--yes'
-
-      - name: Publish to PyPI
-        run: |
-          bash ./bin/publish-pypi
-        env:
-          PYPI_TOKEN: ${{ secrets.FIREWORKS_PYPI_TOKEN || secrets.PYPI_TOKEN }}
diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml
deleted file mode 100644
index 745714d7..00000000
--- a/.github/workflows/release-doctor.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-name: Release Doctor
-on:
-  pull_request:
-    branches:
-      - main
-  workflow_dispatch:
-
-jobs:
-  release_doctor:
-    name: release doctor
-    runs-on: ubuntu-latest
-    if: github.repository == 'fw-ai-external/python-sdk' && (github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next')
-
-    steps:
-      - uses: actions/checkout@v6
-
-      - name: Check release environment
-        run: |
-          bash ./bin/check-release-environment
-        env:
-          PYPI_TOKEN: ${{ secrets.FIREWORKS_PYPI_TOKEN || secrets.PYPI_TOKEN }}
diff --git a/.github/workflows/release-tag.yml b/.github/workflows/release-tag.yml
deleted file mode 100644
index bc70052a..00000000
--- a/.github/workflows/release-tag.yml
+++ /dev/null
@@ -1,121 +0,0 @@
-name: Release Tag
-
-# Fires when a "release: X.Y.Z" commit lands on main (typically via
-# squash-merge of an upstream-prepared release PR; subject has no "v"
-# prefix, the tag does). Creates the matching git tag vX.Y.Z and a
-# GitHub Release, which is what publish-pypi.yml listens for. The
-# GitHub Release MUST be created with FW_AI_BOT_TOKEN (a PAT) rather
-# than github.token, because Release events triggered by github.token
-# do not cascade into other workflows — using github.token would
-# silently break the publish chain. Idempotent — the tag/release won't
-# be recreated if they already exist.
-
-on:
-  push:
-    branches:
-      - main
-  workflow_dispatch:
-    inputs:
-      version:
-        description: Version to tag (e.g. 1.2.0-alpha.72); leave blank to read manifest
-        type: string
-        required: false
-
-permissions:
-  contents: write
-
-jobs:
-  tag:
-    if: github.repository == 'fw-ai-external/python-sdk'
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v6
-        with:
-          fetch-depth: 0
-          token: ${{ secrets.FW_AI_BOT_TOKEN || github.token }}
-
-      - name: Determine version
-        id: version
-        env:
-          DISPATCH_VERSION: ${{ github.event.inputs.version }}
-        run: |
-          set -euo pipefail
-
-          if [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" && -n "${DISPATCH_VERSION}" ]]; then
-            version="${DISPATCH_VERSION}"
-          elif [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ]]; then
-            # Manual recovery dispatch with no version: read the manifest.
-            version="$(python3 -c 'import json; print(json.load(open(".release-please-manifest.json"))["."])')"
-          else
-            # Push to main: the commit subject is the source of truth.
-            # The optional " (#NNN)" suffix is what GitHub appends on
-            # squash-merge; capture only the version token.
-            subject="$(git log -1 --format=%s)"
-            if [[ ! "${subject}" =~ ^release:\ +([0-9][0-9A-Za-z._-]*)(\ +\(#[0-9]+\))?$ ]]; then
-              echo "Most recent commit is not a release commit; nothing to tag."
-              echo "skip=true" >> "$GITHUB_OUTPUT"
-              exit 0
-            fi
-            version="${BASH_REMATCH[1]}"
-          fi
-
-          # Belt-and-suspenders: validate the version regardless of source
-          # (push subject, explicit dispatch input, or manifest fallback).
-          if [[ ! "${version}" =~ ^[0-9][0-9A-Za-z._-]*$ ]]; then
-            echo "::error::Invalid release version: ${version}"
-            exit 1
-          fi
-
-          tag="v${version}"
-          if git rev-parse -q --verify "refs/tags/${tag}" >/dev/null; then
-            echo "Tag ${tag} already exists; skipping."
-            echo "skip=true" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-
-          {
-            echo "skip=false"
-            echo "version=${version}"
-            echo "tag=${tag}"
-          } >> "$GITHUB_OUTPUT"
-
-      - name: Require FW_AI_BOT_TOKEN
-        if: steps.version.outputs.skip != 'true'
-        env:
-          BOT_TOKEN: ${{ secrets.FW_AI_BOT_TOKEN }}
-        run: |
-          [[ -n "${BOT_TOKEN}" ]] || { echo "::error::FW_AI_BOT_TOKEN is required"; exit 1; }
-
-      - name: Create tag and GitHub Release
-        if: steps.version.outputs.skip != 'true'
-        env:
-          GH_TOKEN: ${{ secrets.FW_AI_BOT_TOKEN }}
-          VERSION: ${{ steps.version.outputs.version }}
-          TAG: ${{ steps.version.outputs.tag }}
-        run: |
-          set -euo pipefail
-
-          # Extract this version's CHANGELOG section as the release notes.
-          notes_file="$(mktemp)"
-          awk -v ver="${VERSION}" '
-            $0 ~ "^## " ver "($| )" { capture = 1; print; next }
-            capture && /^## / { exit }
-            capture { print }
-          ' CHANGELOG.md > "${notes_file}"
-
-          # Tag this commit and push.
-          git config user.name "github-actions[bot]"
-          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
-          git tag -a "${TAG}" -m "${TAG}"
-          git push origin "${TAG}"
-
-          # Pre-1.0 prereleases stay marked as prerelease.
-          prerelease_flag=""
-          if [[ "${VERSION}" == *-alpha.* || "${VERSION}" == *-beta.* || "${VERSION}" == *-rc.* ]]; then
-            prerelease_flag="--prerelease"
-          fi
-
-          gh release create "${TAG}" \
-            --title "${TAG}" \
-            --notes-file "${notes_file}" \
-            ${prerelease_flag}
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
deleted file mode 100644
index 8e7a7098..00000000
--- a/.release-please-manifest.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
-  ".": "1.2.0-alpha.71"
-}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d0cf6e85..c7c9c918 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## 1.2.0-alpha.72 (2026-05-21)
+
+Full Changelog: [v1.2.0-alpha.71...v1.2.0-alpha.72](https://github.com/fw-ai-external/python-sdk/compare/v1.2.0-alpha.71...v1.2.0-alpha.72)
+
+### Chores
+* bootstrap public SDK release workflows (#83) ([7dcb03e](https://github.com/fw-ai-external/python-sdk/commit/7dcb03e5380a70b28dd7d438a6e10f55be0e493f))
+* bootstrap promotion pipeline workflows (#86) ([e0042ab](https://github.com/fw-ai-external/python-sdk/commit/e0042ab3117a02a7ca5266b4a8400f47627c5402))
+
 ## 1.2.0-alpha.71 (2026-05-19)
 
 Full Changelog: [v1.2.0-alpha.70...v1.2.0-alpha.71](https://github.com/fw-ai-external/python-sdk/compare/v1.2.0-alpha.70...v1.2.0-alpha.71)
diff --git a/pyproject.toml b/pyproject.toml
index 23bc6930..d2215a0f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "fireworks-ai"
-version = "1.2.0-alpha.71"
+version = "1.2.0-alpha.72"
 description = "The official Python library for the fireworks API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/release-please-config.json b/release-please-config.json
deleted file mode 100644
index c24a3935..00000000
--- a/release-please-config.json
+++ /dev/null
@@ -1,66 +0,0 @@
-{
-  "packages": {
-    ".": {}
-  },
-  "$schema": "https://raw.githubusercontent.com/googleapis/release-please/main/schemas/config.json",
-  "include-v-in-tag": true,
-  "include-component-in-tag": false,
-  "versioning": "prerelease",
-  "prerelease": true,
-  "bump-minor-pre-major": true,
-  "bump-patch-for-minor-pre-major": false,
-  "pull-request-header": "Automated Release PR",
-  "pull-request-title-pattern": "release: ${version}",
-  "changelog-sections": [
-    {
-      "type": "feat",
-      "section": "Features"
-    },
-    {
-      "type": "fix",
-      "section": "Bug Fixes"
-    },
-    {
-      "type": "perf",
-      "section": "Performance Improvements"
-    },
-    {
-      "type": "revert",
-      "section": "Reverts"
-    },
-    {
-      "type": "chore",
-      "section": "Chores"
-    },
-    {
-      "type": "docs",
-      "section": "Documentation"
-    },
-    {
-      "type": "style",
-      "section": "Styles"
-    },
-    {
-      "type": "refactor",
-      "section": "Refactors"
-    },
-    {
-      "type": "test",
-      "section": "Tests",
-      "hidden": true
-    },
-    {
-      "type": "build",
-      "section": "Build System"
-    },
-    {
-      "type": "ci",
-      "section": "Continuous Integration",
-      "hidden": true
-    }
-  ],
-  "release-type": "python",
-  "extra-files": [
-    "src/fireworks/_version.py"
-  ]
-}
diff --git a/src/fireworks/_version.py b/src/fireworks/_version.py
index 6e176a76..3cc8870e 100644
--- a/src/fireworks/_version.py
+++ b/src/fireworks/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "fireworks"
-__version__ = "1.2.0-alpha.71"  # x-release-please-version
+__version__ = "1.2.0-alpha.72"  # x-release-please-version

From e7061ec4917051188ab44a71463f59bc8fd3e0e2 Mon Sep 17 00:00:00 2001
From: Chengxi Li <114854555+Hecate0821@users.noreply.github.com>
Date: Thu, 21 May 2026 16:04:32 -0700
Subject: [PATCH 09/10] hotfix(promote): drop overwrite guard, add PR-time
 diverge check, mirror public .github/ (#26425)

---
 .github/workflows/ci.yml             |  59 +++++++++++
 .github/workflows/post-publish.yml   |  47 +++++++++
 .github/workflows/publish-pypi.yml   |  31 ++++++
 .github/workflows/release-doctor.yml |  21 ++++
 .github/workflows/release-tag.yml    | 142 +++++++++++++++++++++++++++
 5 files changed, 300 insertions(+)
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 .github/workflows/post-publish.yml
 create mode 100644 .github/workflows/publish-pypi.yml
 create mode 100644 .github/workflows/release-doctor.yml
 create mode 100644 .github/workflows/release-tag.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 00000000..18303cfe
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,59 @@
+name: CI
+
+# Minimal safety net for direct PRs. The authoritative CI suite
+# (lint, build, unit + mock-server matrix, coverage, security audit,
+# e2e) runs upstream before promotion, so this workflow only needs
+# to catch the rare direct edit.
+
+on:
+  push:
+    branches:
+      - '**'
+  pull_request:
+
+jobs:
+  lint:
+    name: lint
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    if: github.event.head_commit.message != 'codegen metadata'
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Install Rye
+        run: |
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
+
+      - name: Install dependencies
+        run: rye sync --all-features
+
+      - name: Run lints
+        run: ./scripts/lint
+
+  import-smoke:
+    name: import-smoke
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    if: github.event.head_commit.message != 'codegen metadata'
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: actions/setup-python@v6
+        with:
+          python-version: '3.11'
+
+      - name: Install package
+        # Smoke import only exercises the base package (`import fireworks`),
+        # so install without extras. The previous `pip install -e . 2>/dev/null
+        # || pip install -e .[training]` chain swallowed real install errors
+        # and silently chose between two install modes -- exactly the
+        # silent-fallback pattern the rest of this hotfix tightens. Pinned to
+        # plain install for predictable, fast CI.
+        run: pip install -e .
+
+      - name: Smoke import
+        run: python -c "import fireworks; print('fireworks', getattr(fireworks, '__version__', '<unknown>'))"
diff --git a/.github/workflows/post-publish.yml b/.github/workflows/post-publish.yml
new file mode 100644
index 00000000..5f2849aa
--- /dev/null
+++ b/.github/workflows/post-publish.yml
@@ -0,0 +1,47 @@
+name: Post-Publish Smoke Test
+
+on:
+  workflow_run:
+    workflows: ["Publish PyPI"]
+    types:
+      - completed
+  workflow_dispatch:
+
+jobs:
+  smoke-test:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    if: github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success'
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.11", "3.13"]
+    steps:
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Wait for PyPI propagation and install
+        run: |
+          # --upgrade --pre is required: the runner toolcache ships an older
+          # fireworks-ai which would otherwise satisfy the requirement and skip
+          # the install, so the smoke test would silently exercise the stale
+          # cached version instead of the release we just published.
+          for i in $(seq 1 10); do
+            if pip install --upgrade --pre fireworks-ai 2>/dev/null; then
+              echo "Package available"
+              exit 0
+            fi
+            echo "Attempt $i: not available yet, waiting 30s..."
+            sleep 30
+          done
+          echo "Package not available after 5 minutes"
+          exit 1
+
+      - name: Verify import and version
+        run: python -c "import fireworks; print(f'fireworks-ai {fireworks.__version__} installed successfully')"
+
+      - name: Verify training extras
+        if: matrix.python-version != '3.9'
+        run: |
+          pip install --upgrade --pre "fireworks-ai[training]"
+          python -c "from fireworks.training import sdk; print('Training SDK OK')"
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
new file mode 100644
index 00000000..f3da1ba3
--- /dev/null
+++ b/.github/workflows/publish-pypi.yml
@@ -0,0 +1,31 @@
+# This workflow is triggered when a GitHub release is created.
+# It can also be run manually to re-publish to PyPI in case it failed for some reason.
+# You can run this workflow by navigating to https://www.github.com/fw-ai-external/python-sdk/actions/workflows/publish-pypi.yml
+name: Publish PyPI
+on:
+  workflow_dispatch:
+
+  release:
+    types: [published]
+
+jobs:
+  publish:
+    name: publish
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Install Rye
+        run: |
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
+
+      - name: Publish to PyPI
+        run: |
+          bash ./bin/publish-pypi
+        env:
+          PYPI_TOKEN: ${{ secrets.FIREWORKS_PYPI_TOKEN || secrets.PYPI_TOKEN }}
diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml
new file mode 100644
index 00000000..a386ef5e
--- /dev/null
+++ b/.github/workflows/release-doctor.yml
@@ -0,0 +1,21 @@
+name: Release Doctor
+on:
+  pull_request:
+    branches:
+      - main
+  workflow_dispatch:
+
+jobs:
+  release_doctor:
+    name: release doctor
+    runs-on: ubuntu-latest
+    if: github.repository == 'fw-ai-external/python-sdk' && (github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'autorelease/') || github.head_ref == 'next')
+
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Check release environment
+        run: |
+          bash ./bin/check-release-environment
+        env:
+          PYPI_TOKEN: ${{ secrets.FIREWORKS_PYPI_TOKEN || secrets.PYPI_TOKEN }}
diff --git a/.github/workflows/release-tag.yml b/.github/workflows/release-tag.yml
new file mode 100644
index 00000000..0f043f06
--- /dev/null
+++ b/.github/workflows/release-tag.yml
@@ -0,0 +1,142 @@
+name: Release Tag
+
+# Fires on every push to main. If pyproject.toml carries a version that has
+# not yet been tagged, this workflow creates the matching git tag vX.Y.Z and a
+# GitHub Release, which is what publish-pypi.yml listens for. pyproject.toml
+# is the single source of truth for the released version — no commit-subject
+# regex, no manifest, no workflow_dispatch override. The GitHub Release MUST
+# be created with FW_AI_BOT_TOKEN (a PAT) rather than github.token, because
+# Release events triggered by github.token do not cascade into other
+# workflows. Idempotent — the tag/release won't be recreated if they already
+# exist.
+
+on:
+  push:
+    branches:
+      - main
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  tag:
+    if: github.repository == 'fw-ai-external/python-sdk'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.FW_AI_BOT_TOKEN }}
+
+      - name: Determine version
+        id: version
+        run: |
+          set -euo pipefail
+
+          # Single source of truth: pyproject.toml [project] version on the
+          # merged commit. No commit-subject parsing, no manifest, no fallback.
+          version="$(python3 -c '
+          import sys
+          try:
+              import tomllib
+          except ModuleNotFoundError:
+              import tomli as tomllib
+          with open("pyproject.toml", "rb") as f:
+              print(tomllib.load(f)["project"]["version"])
+          ')"
+
+          if [[ ! "${version}" =~ ^[0-9][0-9A-Za-z._-]*$ ]]; then
+            echo "::error::Invalid version in pyproject.toml: ${version}"
+            exit 1
+          fi
+
+          {
+            echo "version=${version}"
+            echo "tag=v${version}"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Check if tag exists
+        id: tag
+        env:
+          TAG: ${{ steps.version.outputs.tag }}
+        run: |
+          set -euo pipefail
+          if git rev-parse -q --verify "refs/tags/${TAG}" >/dev/null; then
+            echo "Tag ${TAG} already exists."
+            echo "exists=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "exists=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Require FW_AI_BOT_TOKEN
+        env:
+          BOT_TOKEN: ${{ secrets.FW_AI_BOT_TOKEN }}
+        run: |
+          [[ -n "${BOT_TOKEN}" ]] || { echo "::error::FW_AI_BOT_TOKEN is required"; exit 1; }
+
+      - name: Create tag and GitHub Release
+        if: steps.tag.outputs.exists == 'false'
+        env:
+          GH_TOKEN: ${{ secrets.FW_AI_BOT_TOKEN }}
+          VERSION: ${{ steps.version.outputs.version }}
+          TAG: ${{ steps.version.outputs.tag }}
+        run: |
+          set -euo pipefail
+
+          # Extract this version's CHANGELOG section as the release notes.
+          notes_file="$(mktemp)"
+          awk -v ver="${VERSION}" '
+            $0 ~ "^## " ver "($| )" { capture = 1; print; next }
+            capture && /^## / { exit }
+            capture { print }
+          ' CHANGELOG.md > "${notes_file}"
+
+          # Tag this commit and push.
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git tag -a "${TAG}" -m "${TAG}"
+          git push origin "${TAG}"
+
+          # Pre-1.0 prereleases stay marked as prerelease.
+          prerelease_flag=""
+          if [[ "${VERSION}" == *-alpha.* || "${VERSION}" == *-beta.* || "${VERSION}" == *-rc.* ]]; then
+            prerelease_flag="--prerelease"
+          fi
+
+          gh release create "${TAG}" \
+            --title "${TAG}" \
+            --notes-file "${notes_file}" \
+            ${prerelease_flag}
+
+      - name: Mark release PR as tagged
+        # Runs unconditionally so that recovering from a partial prior failure
+        # (tag created, label flip failed) just needs a workflow rerun. The
+        # step is a no-op on commits that aren't release-PR merges (no PR
+        # found) and on PRs already flipped to 'tagged' (no pending label).
+        # Loud failure only when this run just created the tag but no PR is
+        # associated — that indicates a misconfigured release commit.
+        env:
+          GH_TOKEN: ${{ secrets.FW_AI_BOT_TOKEN }}
+          TAG_JUST_CREATED: ${{ steps.tag.outputs.exists == 'false' }}
+        run: |
+          set -euo pipefail
+          pr_number="$(gh api "repos/${GITHUB_REPOSITORY}/commits/${GITHUB_SHA}/pulls" \
+            --jq '.[0].number // empty')"
+          if [[ -z "${pr_number}" ]]; then
+            if [[ "${TAG_JUST_CREATED}" == "true" ]]; then
+              echo "::error::Tag was created this run but no PR is associated with ${GITHUB_SHA}."
+              exit 1
+            fi
+            echo "No PR associated with ${GITHUB_SHA}; nothing to flip."
+            exit 0
+          fi
+          labels="$(gh pr view "${pr_number}" --repo "${GITHUB_REPOSITORY}" \
+            --json labels --jq '[.labels[].name] | join(",")')"
+          if [[ ",${labels}," != *",autorelease: pending,"* ]]; then
+            echo "PR #${pr_number} does not carry 'autorelease: pending'; nothing to flip."
+            exit 0
+          fi
+          gh pr edit "${pr_number}" --repo "${GITHUB_REPOSITORY}" \
+            --remove-label "autorelease: pending" \
+            --add-label "autorelease: tagged"

From 7699791385943d11ac9dc08cdd70cb5465bf3245 Mon Sep 17 00:00:00 2001
From: Hecate0821 <114854555+Hecate0821@users.noreply.github.com>
Date: Fri, 22 May 2026 03:10:05 +0000
Subject: [PATCH 10/10] release: 1.2.0-alpha.73

---
 CHANGELOG.md              | 14 ++++++++++++++
 pyproject.toml            |  2 +-
 src/fireworks/_version.py |  2 +-
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c7c9c918..90e9e886 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,19 @@
 # Changelog
 
+## 1.2.0-alpha.73 (2026-05-22)
+
+Full Changelog: [v1.2.0-alpha.72...v1.2.0-alpha.73](https://github.com/fw-ai-external/python-sdk/compare/v1.2.0-alpha.72...v1.2.0-alpha.73)
+
+### Bug Fixes
+* **public-sdk:** restore embedding client wiped by 1.2.0-alpha.72 release (#26423) ([7ac0ed8](https://github.com/fw-ai-external/python-sdk/commit/7ac0ed8d35854733f31cc607d7a798f0bdc5c62d))
+
+### Chores
+* autorelease single source of truth (#90) ([fc3f8c7](https://github.com/fw-ai-external/python-sdk/commit/fc3f8c72d7a72f40d95d32658488bea44e443d18))
+* stage SDK deployment annotations (#26077) ([8eb9f95](https://github.com/fw-ai-external/python-sdk/commit/8eb9f95014dd449f038d6e78fad9df4ca02bed34))
+* clean up staged public repo references (#26200) ([8037b48](https://github.com/fw-ai-external/python-sdk/commit/8037b48f7242072256841345eef9ec14142ed189))
+* unblock public SDK promotion flow (#26242) ([79a1c79](https://github.com/fw-ai-external/python-sdk/commit/79a1c79157d74e0443666344ef872bf5609cfd4c))
+* catch up cookbook + SDK from public main; delete dead Stainless CI; autorelease SST + guards (#26426) ([9d62fe4](https://github.com/fw-ai-external/python-sdk/commit/9d62fe4c45fa91d3780093bb15524ec26600408c))
+
 ## 1.2.0-alpha.72 (2026-05-21)
 
 Full Changelog: [v1.2.0-alpha.71...v1.2.0-alpha.72](https://github.com/fw-ai-external/python-sdk/compare/v1.2.0-alpha.71...v1.2.0-alpha.72)
diff --git a/pyproject.toml b/pyproject.toml
index d2215a0f..ea01178e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "fireworks-ai"
-version = "1.2.0-alpha.72"
+version = "1.2.0-alpha.73"
 description = "The official Python library for the fireworks API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/fireworks/_version.py b/src/fireworks/_version.py
index 3cc8870e..6eb492c7 100644
--- a/src/fireworks/_version.py
+++ b/src/fireworks/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "fireworks"
-__version__ = "1.2.0-alpha.72"  # x-release-please-version
+__version__ = "1.2.0-alpha.73"  # x-release-please-version