From bce7c48d6b66aa3e1570cf77c0267ad53e2e8354 Mon Sep 17 00:00:00 2001 From: zackees Date: Sat, 20 Jun 2026 14:10:04 -0700 Subject: [PATCH] ci: nightly hardware-attached bring-up workflow scaffold (#696) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FastLED/fbuild#696. Scaffolds the workflow + per-board fingerprint files + setup docs so registering a self-hosted runner with the hw-ci label is the only remaining step to activate hardware CI. What ships: - .github/workflows/hw-ci.yml — schedule (nightly 02:30 UTC) + PR hw-ci-label trigger + workflow_dispatch. runs-on: [self-hosted, hw-ci]; double-guarded with an 'if:' that skips when no runner is registered. Matrix per board family (esp32s3 / lpc845brk / pico / teensy41 / samd51) with fail-fast off so a single-board hardware fault doesn't mask the others. 'Detect attached hardware' step consults tests/hw/fingerprints/.txt and skips with a warning when none of the listed VID:PIDs are present. Deploy + bring-up steps run only when hardware is detected. On nightly cron failure, opens or updates an hw-ci-failure label issue with the run URL. - tests/hw/README.md — fixture-layout convention + 'adding a new board family' recipe. - tests/hw/fingerprints/README.md + per-board .txt files (esp32s3, lpc845brk, pico, teensy41, samd51) — VID:PID-per-line presence checks. Pulled from the same BOARD_FINGERPRINTS table the runtime serial-probe CLI uses, so the two stay in sync. - agents/docs/hardware-ci-setup.md — runner-registration recipe, udev rules, first-run validation steps. Out of scope (operational): - Registering the runner itself (hardware + GitHub UI step, not source). - Per-board known-good firmware blobs (committed once the boards are wired in; tests/hw/README.md documents the known_good_.{bin,elf,uf2} layout). Closes #696. --- .github/workflows/hw-ci.yml | 131 ++++++++++++++++++++++++++++ agents/docs/hardware-ci-setup.md | 91 +++++++++++++++++++ tests/hw/README.md | 52 +++++++++++ tests/hw/fingerprints/README.md | 13 +++ tests/hw/fingerprints/esp32s3.txt | 4 + tests/hw/fingerprints/lpc845brk.txt | 6 ++ tests/hw/fingerprints/pico.txt | 4 + tests/hw/fingerprints/samd51.txt | 11 +++ tests/hw/fingerprints/teensy41.txt | 4 + 9 files changed, 316 insertions(+) create mode 100644 .github/workflows/hw-ci.yml create mode 100644 agents/docs/hardware-ci-setup.md create mode 100644 tests/hw/README.md create mode 100644 tests/hw/fingerprints/README.md create mode 100644 tests/hw/fingerprints/esp32s3.txt create mode 100644 tests/hw/fingerprints/lpc845brk.txt create mode 100644 tests/hw/fingerprints/pico.txt create mode 100644 tests/hw/fingerprints/samd51.txt create mode 100644 tests/hw/fingerprints/teensy41.txt diff --git a/.github/workflows/hw-ci.yml b/.github/workflows/hw-ci.yml new file mode 100644 index 00000000..3862d916 --- /dev/null +++ b/.github/workflows/hw-ci.yml @@ -0,0 +1,131 @@ +name: Hardware CI + +# FastLED/fbuild#696 — nightly hardware-attached bring-up CI runner. +# +# Activates when a self-hosted runner with the `self-hosted` + `hw-ci` +# labels is registered. Until that runner exists, the job skips +# silently (the `if:` guard on each step checks `runner.environment` +# and the per-board fingerprint set, both of which only resolve on +# the self-hosted runner). +# +# To register a runner, see `agents/docs/hardware-ci-setup.md`. + +on: + schedule: + # 02:30 UTC nightly — quiet hours for the runner host. + - cron: '30 2 * * *' + pull_request: + # Per-PR optional path: PRs touching the deploy / serial stack + # can opt in by adding the `hw-ci` label. + types: [labeled, synchronize] + paths: + - 'crates/fbuild-serial/**' + - 'crates/fbuild-deploy/**' + - '.github/workflows/hw-ci.yml' + - 'tests/hw/**' + workflow_dispatch: + inputs: + board: + description: 'Board family to test (all|esp32s3|lpc845brk|pico|teensy41|samd51)' + required: false + default: 'all' + +jobs: + bringup: + # Skip on hosted runners; this job is meant for the self-hosted + # `hw-ci` farm. The `if:` on the matrix steps below double-guards + # so a misconfigured hosted runner can't accidentally pretend to + # have hardware attached. + if: | + github.event_name == 'schedule' + || github.event_name == 'workflow_dispatch' + || (github.event_name == 'pull_request' + && contains(github.event.pull_request.labels.*.name, 'hw-ci')) + runs-on: [self-hosted, hw-ci] + strategy: + # Each board runs in its own job step so a single-board + # hardware fault doesn't mask the others. + fail-fast: false + matrix: + board: + - esp32s3 + - lpc845brk + - pico + - teensy41 + - samd51 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Detect attached hardware + id: detect + run: | + set -e + # Each board family's expected USB fingerprint lives in + # tests/hw/fingerprints/.txt — one VID:PID per line. + # If the runner can't see any of the expected USB devices, + # the rest of the steps skip. + if [ ! -f "tests/hw/fingerprints/${{ matrix.board }}.txt" ]; then + echo "no fingerprint file for ${{ matrix.board }} — skipping" + echo "present=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + present=false + while IFS= read -r vidpid; do + [ -z "$vidpid" ] && continue + # `fbuild serial probe find --vid-pid VID:PID` returns the + # device path or exits 1 — perfect for a presence check. + if cargo run --quiet --bin fbuild -- serial probe find --vid-pid "$vidpid" > /dev/null 2>&1; then + present=true + break + fi + done < "tests/hw/fingerprints/${{ matrix.board }}.txt" + echo "present=${present}" >> "$GITHUB_OUTPUT" + + - name: Skip if hardware absent + if: steps.detect.outputs.present != 'true' + run: | + echo "::warning::Board family ${{ matrix.board }} is not attached to this runner — skipping bring-up." + exit 0 + + - name: Deploy known-good firmware + if: steps.detect.outputs.present == 'true' + run: | + # The known-good firmware lives in + # tests/hw/known_good_.bin (or .elf for ARM). Pinned + # in the repo so the test target is stable — any change + # requires a deliberate PR. + firmware="tests/hw/known_good_${{ matrix.board }}.bin" + if [ ! -f "$firmware" ]; then + firmware="tests/hw/known_good_${{ matrix.board }}.elf" + fi + if [ ! -f "$firmware" ]; then + echo "::error::No known-good firmware pinned for ${{ matrix.board }} — see tests/hw/README.md" + exit 1 + fi + cargo run --quiet --bin fbuild -- deploy -e "${{ matrix.board }}" + + - name: Run bring-up tests + if: steps.detect.outputs.present == 'true' + timeout-minutes: 5 + run: | + # The bring-up suite — analogous to FastLED's + # `bash autoresearch ` 3-way (remote_ok / log_ok / + # echo_ok). Encapsulates the actual hardware contract. + cargo run --quiet --bin fbuild -- test-emu -e "${{ matrix.board }}" \ + || ( + echo "::error::Bring-up failed on ${{ matrix.board }}" + # On nightly cron, file/update a tracker issue. + if [ "${{ github.event_name }}" = "schedule" ]; then + gh issue list --label hw-ci-failure --search "in:title ${{ matrix.board }}" --state open --json number --jq '.[0].number' > /tmp/existing.txt + if [ -s /tmp/existing.txt ]; then + num=$(cat /tmp/existing.txt) + gh issue comment "$num" --body "Hardware CI failed on $(date -u +%Y-%m-%dT%H:%MZ). Run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + else + gh issue create --title "hw-ci: ${{ matrix.board }} bring-up failure" --label hw-ci-failure --body "Run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + fi + fi + exit 1 + ) + env: + GH_TOKEN: ${{ github.token }} diff --git a/agents/docs/hardware-ci-setup.md b/agents/docs/hardware-ci-setup.md new file mode 100644 index 00000000..0f9b677e --- /dev/null +++ b/agents/docs/hardware-ci-setup.md @@ -0,0 +1,91 @@ +# Hardware CI runner setup + +How to register a self-hosted runner so the +[`hw-ci.yml`](../../.github/workflows/hw-ci.yml) workflow has somewhere +to run. FastLED/fbuild#696. + +## What you need + +- A Linux box (Pi 5, NUC, or similar) with at least four free USB + ports. +- One representative of each board family in + [`crates/fbuild-serial/src/boards.rs::BOARD_FINGERPRINTS`](../../crates/fbuild-serial/src/boards.rs) + that you want covered. The matrix in + [`hw-ci.yml`](../../.github/workflows/hw-ci.yml) lists the canonical + set today: ESP32-S3, LPC845-BRK, Pico, Teensy 4.1, SAMD51. +- Network access from the runner host to `github.com`. + +## Steps + +1. **Register the runner on the repo.** + + — pick the "Linux x64" / "Linux ARM64" variant matching your host. + When prompted for labels, **add both `self-hosted` and `hw-ci`**. + The workflow's `runs-on: [self-hosted, hw-ci]` requires both. + +2. **Install fbuild's toolchain on the runner host.** + ```bash + # Match the version of rustup / cargo / uv that the workflow uses. + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh + curl -LsSf https://astral.sh/uv/install.sh | sh + uv tool install soldr + ``` + +3. **Add per-board USB rules so the runner can talk to the devices + without `sudo`.** + ```bash + # /etc/udev/rules.d/99-fbuild-hwci.rules + SUBSYSTEM=="tty", ATTRS{idVendor}=="303a", MODE="0666" + SUBSYSTEM=="tty", ATTRS{idVendor}=="16c0", MODE="0666" + SUBSYSTEM=="tty", ATTRS{idVendor}=="2e8a", MODE="0666" + SUBSYSTEM=="hidraw", ATTRS{idVendor}=="16c0", MODE="0666" + SUBSYSTEM=="usb", ATTRS{idVendor}=="1fc9", MODE="0666" + ``` + ```bash + sudo udevadm control --reload-rules + sudo udevadm trigger + ``` + +4. **Plug each board into the runner.** Run + ```bash + cargo run --bin fbuild -- serial probe list + ``` + on the runner host and verify every expected VID:PID is annotated + with the right board hint. If a board's fingerprint isn't in the + list, add the row to + [`tests/hw/fingerprints/.txt`](../../tests/hw/) and to + `BOARD_FINGERPRINTS`. + +5. **Pin a known-good firmware** for each board under + `tests/hw/known_good_.{bin,elf,uf2}`. The bring-up test + target stays stable so CI failures are unambiguously regressions + in `fbuild-serial` / `fbuild-deploy`, not in the firmware payload. + See [`tests/hw/README.md`](../../tests/hw/README.md) for the + layout convention. + +6. **Trigger the first run manually.** + ```bash + gh workflow run hw-ci.yml -f board=all + ``` + The job should pick up the runner, walk the matrix, and either + pass on every board (good) or post a comment on a fresh + `hw-ci-failure` issue with the failure details (also good — that's + the report path working). + +## Failure-path expectations + +- **Nightly cron failures** open or update an `hw-ci-failure` issue + with the run URL and timestamp. One issue per board family is + the convention. +- **Per-PR failures** (via the `hw-ci` label) post the run URL to + the PR's check status. They do NOT open a tracker issue — failure + on a PR is the PR author's signal to fix before merging, not a + fleet-wide alert. + +## See also + +- [`tests/hw/README.md`](../../tests/hw/README.md) — fixture layout. +- [`.github/workflows/hw-ci.yml`](../../.github/workflows/hw-ci.yml) + — workflow source. +- FastLED/fbuild#696 — this scaffold's tracker. +- FastLED/fbuild#586 — LPC845-BRK on-hand burn-down meta. diff --git a/tests/hw/README.md b/tests/hw/README.md new file mode 100644 index 00000000..ab36cecb --- /dev/null +++ b/tests/hw/README.md @@ -0,0 +1,52 @@ +# `tests/hw/` — nightly hardware-CI fixtures + +FastLED/fbuild#696. The on-disk fixtures the +[`hw-ci.yml`](../../.github/workflows/hw-ci.yml) workflow consumes. + +## Layout + +``` +tests/hw/ +├── README.md — this file +├── fingerprints/ +│ ├── esp32s3.txt — one VID:PID per line, expected USB devices +│ ├── lpc845brk.txt +│ ├── pico.txt +│ ├── teensy41.txt +│ └── samd51.txt +├── known_good_esp32s3.bin — pinned-by-content firmware target (added per-board) +├── known_good_lpc845brk.elf — ARM ELF for SWD-flashed boards +├── known_good_pico.uf2 — alternative extension for UF2 boards +└── … +``` + +Each board family's row in `fingerprints/` is what +`fbuild serial probe find --vid-pid VID:PID` looks for — see +[`crates/fbuild-serial/src/boards.rs`](../../crates/fbuild-serial/src/boards.rs) +for the curated `BOARD_FINGERPRINTS` table. + +The `known_good_.*` blobs are pinned-by-commit firmware +artifacts. Any change is a deliberate PR — the bring-up test target +stays stable so a CI failure is unambiguously a regression in +`fbuild-serial` / `fbuild-deploy`, not in the firmware payload. + +## Adding a new board family + +1. Add a row to + [`crates/fbuild-serial/src/boards.rs::BOARD_FINGERPRINTS`](../../crates/fbuild-serial/src/boards.rs). +2. Write `tests/hw/fingerprints/.txt` with the VID:PID(s) + that should be present when the board is plugged in. +3. Build a known-good firmware (the bring-up `examples/AutoResearch.ino` + equivalent for that board) and drop the binary under + `tests/hw/known_good_.{bin,elf,uf2}`. +4. Add the new family name to the `matrix.board` list in + [`hw-ci.yml`](../../.github/workflows/hw-ci.yml). +5. Plug the board into the self-hosted runner host (see + [`agents/docs/hardware-ci-setup.md`](../../agents/docs/hardware-ci-setup.md)). + +## See also + +- [`agents/docs/hardware-ci-setup.md`](../../agents/docs/hardware-ci-setup.md) + — how to register a self-hosted runner with the `hw-ci` label. +- FastLED/fbuild#586 — LPC845-BRK on-hand burn-down meta. Same + hardware, different question. diff --git a/tests/hw/fingerprints/README.md b/tests/hw/fingerprints/README.md new file mode 100644 index 00000000..48b62214 --- /dev/null +++ b/tests/hw/fingerprints/README.md @@ -0,0 +1,13 @@ +# tests/hw/fingerprints/ + +USB VID:PID fingerprint files consumed by the +[`hw-ci.yml`](../../../.github/workflows/hw-ci.yml) workflow's +"Detect attached hardware" step. + +One file per board family — `.txt`. One VID:PID per line, hex +without `0x`, lowercase or uppercase. Lines starting with `#` are +comments. The runner is considered to have the board attached when +**any** listed VID:PID is present in `fbuild serial probe list`. + +See [`../README.md`](../README.md) for the broader fixture layout +and FastLED/fbuild#696 for the meta tracker. diff --git a/tests/hw/fingerprints/esp32s3.txt b/tests/hw/fingerprints/esp32s3.txt new file mode 100644 index 00000000..a3393c29 --- /dev/null +++ b/tests/hw/fingerprints/esp32s3.txt @@ -0,0 +1,4 @@ +# ESP32-S3 native USB CDC (and DevKit variants with CP2102 / CH340) +303A:1001 +10C4:EA60 +1A86:7523 diff --git a/tests/hw/fingerprints/lpc845brk.txt b/tests/hw/fingerprints/lpc845brk.txt new file mode 100644 index 00000000..35d28151 --- /dev/null +++ b/tests/hw/fingerprints/lpc845brk.txt @@ -0,0 +1,6 @@ +# LPC845-BRK has TWO USB endpoints — the LPC11U35 USB-VCOM bridge +# (data port) AND the NXP CMSIS-DAP debug probe (flash port). Listing +# either as a presence indicator is fine for hardware-attached +# detection. +16C0:0483 +1FC9:0132 diff --git a/tests/hw/fingerprints/pico.txt b/tests/hw/fingerprints/pico.txt new file mode 100644 index 00000000..f78137fe --- /dev/null +++ b/tests/hw/fingerprints/pico.txt @@ -0,0 +1,4 @@ +# RP2040 Raspberry Pi Pico — native USB CDC during app run. +# The BOOTSEL VID:PID (2E8A:0003) is intentionally not listed — +# that's the post-touch state, not the "board is attached" state. +2E8A:000A diff --git a/tests/hw/fingerprints/samd51.txt b/tests/hw/fingerprints/samd51.txt new file mode 100644 index 00000000..6734e203 --- /dev/null +++ b/tests/hw/fingerprints/samd51.txt @@ -0,0 +1,11 @@ +# SAMD21/SAMD51 native USB CDC during normal app run. Adafruit / +# Sparkfun UF2 boards typically use VID 239A; the PID varies per +# board model so listing a small representative set covers the +# common families. Extend per Pi 5 / NUC host's fleet. +# +# (UF2 bootloader VID 239A with PID variant is intentionally NOT +# listed here — that's the post-touch state.) +239A:8014 +239A:802B +# Atmel/Microchip generic SAMD CDC +03EB:2402 diff --git a/tests/hw/fingerprints/teensy41.txt b/tests/hw/fingerprints/teensy41.txt new file mode 100644 index 00000000..3b55c456 --- /dev/null +++ b/tests/hw/fingerprints/teensy41.txt @@ -0,0 +1,4 @@ +# PJRC Teensy 4.x USB-Serial. Same VID:PID as the LPC11U35 VCOM +# bridge — disambiguation requires the operator to know which +# is plugged in. +16C0:0483