From 3aff5506ff6ce200278d733012542af46a003084 Mon Sep 17 00:00:00 2001 From: zackees Date: Sat, 20 Jun 2026 17:23:02 -0700 Subject: [PATCH 1/6] feat(www): SQLite-over-HTTP site + nightly publish to www branch (#718) Adds a new orphan branch www (GH Pages source) carrying a day-rotated SQLite database (.db) built from the merged online-data JSON, plus a static sql.js front-end with 5 pre-canned parameterized queries (no free-form SQL). Headline use case: enter a USB VID:PID, get the top-10 ranked candidate boards. All workflow logic lives in Python orchestrators under online-data-tools/ so the YAML is thin and the pipeline is unit-testable: - build_sqlite.py - constructs the .db from four JSON inputs - update_www.py - whole www-side flow in one entry point - setup_www_worktree.py - orphan-bootstrap aware worktree setup - publish_branch.py - commit + 200-commit history prune + push (force-with-lease with plain-push fallback) - rotate_www_dbs.py - keeps only current + previous day .db files - build_www_manifest.py - emits manifest.json with current/previous_db - annotate_online_manifest.py - links online-data manifest to website + dbs - seed_mcu_to_vid.json - curated MCU-family -> likely VID(s) heuristic (50+ entries: ESP32, STM32, RP2040, Teensy, Adafruit, Arduino, Sparkfun, WCH, NXP, ...) - www_static/ - index.html, app.js, style.css served verbatim by GH Pages; sql-wasm.{js,wasm} downloaded by workflow from pinned sql.js v1.10.3 release 30 unit tests cover schema round-trips, all 5 canned queries, rotation edge cases, manifest annotation, orphan-worktree bootstrap, and the publish path against a local bare-remote git repo (no network). The existing update-data.yml workflow gains ~30 lines (all single-line uv-run invocations of the orchestrators above); first run bootstraps mcu_to_vid.json on online-data and bootstraps the www branch as an empty orphan. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/update-data.yml | 120 +++--- online-data-tools/README.md | 27 ++ online-data-tools/annotate_online_manifest.py | 95 +++++ online-data-tools/build_sqlite.py | 251 ++++++++++++ online-data-tools/build_www_manifest.py | 74 ++++ online-data-tools/publish_branch.py | 206 ++++++++++ online-data-tools/rotate_www_dbs.py | 62 +++ online-data-tools/seed_mcu_to_vid.json | 68 ++++ online-data-tools/setup_www_worktree.py | 90 +++++ online-data-tools/test_build_sqlite.py | 367 ++++++++++++++++++ online-data-tools/test_helpers.py | 193 +++++++++ online-data-tools/test_orchestrators.py | 331 ++++++++++++++++ online-data-tools/update_www.py | 247 ++++++++++++ online-data-tools/www_static/README.md | 28 ++ online-data-tools/www_static/app.js | 228 +++++++++++ online-data-tools/www_static/index.html | 84 ++++ online-data-tools/www_static/style.css | 72 ++++ 17 files changed, 2498 insertions(+), 45 deletions(-) create mode 100644 online-data-tools/README.md create mode 100644 online-data-tools/annotate_online_manifest.py create mode 100644 online-data-tools/build_sqlite.py create mode 100644 online-data-tools/build_www_manifest.py create mode 100644 online-data-tools/publish_branch.py create mode 100644 online-data-tools/rotate_www_dbs.py create mode 100644 online-data-tools/seed_mcu_to_vid.json create mode 100644 online-data-tools/setup_www_worktree.py create mode 100644 online-data-tools/test_build_sqlite.py create mode 100644 online-data-tools/test_helpers.py create mode 100644 online-data-tools/test_orchestrators.py create mode 100644 online-data-tools/update_www.py create mode 100644 online-data-tools/www_static/README.md create mode 100644 online-data-tools/www_static/app.js create mode 100644 online-data-tools/www_static/index.html create mode 100644 online-data-tools/www_static/style.css diff --git a/.github/workflows/update-data.yml b/.github/workflows/update-data.yml index c363cfe8..aa09f067 100644 --- a/.github/workflows/update-data.yml +++ b/.github/workflows/update-data.yml @@ -1,10 +1,13 @@ -# Nightly refresh of the `online-data` branch's published datasets. +# Nightly refresh of the `online-data` and `www` branches. # -# Today the branch carries two datasets — USB VID:PID name resolution and -# the PlatformIO board catalog. The workflow file lives on `main` only -# because GitHub Actions requires `schedule` / `workflow_dispatch` to be -# defined on the default branch. All actual data + the merger scripts -# live on the orphan `online-data` branch (see `docs/online-data.md`). +# `online-data` (orphan) carries the merged JSON datasets — USB VID:PID name +# resolution and the PlatformIO board catalog — plus their merger scripts. +# `www` (orphan, GH Pages source) carries a day-rotated SQLite database +# (`.db`) built from the same JSON plus the static-site front-end +# that serves it via sql.js. See FastLED/fbuild#718 for the www design. +# +# This workflow file lives on `main` only because GitHub Actions requires +# `schedule` / `workflow_dispatch` to be defined on the default branch. # # At runtime the job: # @@ -62,6 +65,15 @@ env: ONLINE_WORKTREE: ${{ github.workspace }}/.online-data BRANCH_BASE_URL: https://raw.githubusercontent.com/${{ github.repository }}/online-data HISTORY_LIMIT: 200 + # www branch (GH Pages source): see FastLED/fbuild#718. + WWW_BRANCH: www + WWW_WORKTREE: ${{ github.workspace }}/.www + # sql.js is downloaded from a pinned release with an SRI check (below) + # and staged onto the www branch fresh on every run. + SQLJS_VERSION: "1.10.3" + SQLJS_BASE_URL: https://github.com/sql-js/sql.js/releases/download/v1.10.3/sqljs-wasm.zip + # Public site URL (overridden if GitHub Pages is configured elsewhere). + WEBSITE_URL: https://fastled.github.io/fbuild/ jobs: update: @@ -92,6 +104,14 @@ jobs: mkdir -p "${ONLINE_WORKTREE}/data" ls -la "${ONLINE_WORKTREE}" + - name: Setup www worktree (orphan, GH Pages source) + # Wraps fetch / orphan-bootstrap logic in Python — unit-tested in + # online-data-tools/test_orchestrators.py. + run: | + uv run --no-project --script \ + "${{ github.workspace }}/online-data-tools/setup_www_worktree.py" \ + --worktree "${WWW_WORKTREE}" --branch "${WWW_BRANCH}" + - uses: astral-sh/setup-uv@v3 - name: Setup soldr @@ -244,50 +264,58 @@ jobs: --out "${ONLINE_WORKTREE}/manifest.json" \ "${fragments[@]}" - - name: Commit + push if data actually changed - id: commit + # ──────────────────────────────────────────────────────────────────── + # www branch: build today's SQLite, refresh static assets, download + # sql.js, rotate old DBs, write www/manifest.json, annotate online + # manifest with the link-out. All seven sub-steps run inside one + # Python orchestrator (online-data-tools/update_www.py) and are + # exercised end-to-end in test_orchestrators.py. + # ──────────────────────────────────────────────────────────────────── + + - name: Refresh www (sqlite + static site + manifests) + id: build-sqlite + # Only attempt if at least one upstream merger produced fresh JSON — + # otherwise we'd be rebuilding yesterday's DB under a new filename, + # which the rotation step would then evict tomorrow. if: steps.build-manifest.outcome == 'success' - working-directory: ${{ env.ONLINE_WORKTREE }} run: | - set -euo pipefail - git add manifest.json data/ - if git diff --cached --quiet; then - echo "no changes to commit" - echo "changed=false" >> "$GITHUB_OUTPUT" - exit 0 - fi - ts="$(date -u +%Y-%m-%d)" - # Include which datasets actually refreshed in the commit body. - parts=() - [ "${{ steps.merge-usb.outcome }}" = "success" ] && parts+=("usb-vid") - [ "${{ steps.merge-pio.outcome }}" = "success" ] && parts+=("pio-boards") - body="$(printf 'datasets: %s' "$(IFS=, ; echo "${parts[*]}")")" - git commit -m "chore(online-data): nightly refresh ${ts}" -m "${body}" - echo "changed=true" >> "$GITHUB_OUTPUT" + uv run --no-project --script \ + "${{ github.workspace }}/online-data-tools/update_www.py" \ + --workspace "${{ github.workspace }}" \ + --online-worktree "${ONLINE_WORKTREE}" \ + --www-worktree "${WWW_WORKTREE}" \ + --website-url "${WEBSITE_URL}" \ + --sqljs-zip-url "${SQLJS_BASE_URL}" - - name: Prune history to last ${{ env.HISTORY_LIMIT }} commits - if: steps.commit.outputs.changed == 'true' - working-directory: ${{ env.ONLINE_WORKTREE }} + # ──────────────────────────────────────────────────────────────────── + # Publish both branches via the same Python orchestrator. It handles + # `git add` / commit-if-changed / 200-commit history prune / + # first-push-falls-back-to-plain. End-to-end tested in + # test_orchestrators.py against a bare local remote. + # ──────────────────────────────────────────────────────────────────── + + - name: Publish online-data branch + id: commit + if: steps.build-manifest.outcome == 'success' run: | - set -euo pipefail - total="$(git rev-list --count HEAD)" - echo "current history length: ${total}" - if [ "${total}" -le "${HISTORY_LIMIT}" ]; then - echo "no prune needed (<= ${HISTORY_LIMIT} commits)" - exit 0 - fi - target="$(git rev-list --max-count="${HISTORY_LIMIT}" HEAD | tail -n 1)" - git replace --graft "${target}" - pip install --quiet git-filter-repo - git filter-repo --force --refs HEAD - git for-each-ref --format='delete %(refname)' refs/replace/ | \ - git update-ref --stdin + uv run --no-project --script \ + "${{ github.workspace }}/online-data-tools/publish_branch.py" \ + --worktree "${ONLINE_WORKTREE}" \ + --branch "${ONLINE_BRANCH}" \ + --message "chore(online-data): nightly refresh" \ + --history-limit "${HISTORY_LIMIT}" - - name: Push - if: steps.commit.outputs.changed == 'true' - working-directory: ${{ env.ONLINE_WORKTREE }} + - name: Publish www branch + id: commit-www + if: steps.build-sqlite.outcome == 'success' run: | - git push --force-with-lease origin "${ONLINE_BRANCH}" + uv run --no-project --script \ + "${{ github.workspace }}/online-data-tools/publish_branch.py" \ + --worktree "${WWW_WORKTREE}" \ + --branch "${WWW_BRANCH}" \ + --message "chore(www): nightly refresh" \ + --body "sqlite + static site rebuild from latest online-data" \ + --history-limit "${HISTORY_LIMIT}" - name: Summary if: always() @@ -304,5 +332,7 @@ jobs: echo "| merge usb-vid | ${{ steps.merge-usb.outcome }} |" echo "| merge pio-boards | ${{ steps.merge-pio.outcome }} |" echo "| build manifest | ${{ steps.build-manifest.outcome }} |" - echo "| committed | ${{ steps.commit.outputs.changed || 'n/a' }} |" + echo "| build sqlite (www) | ${{ steps.build-sqlite.outcome }} |" + echo "| committed (online-data) | ${{ steps.commit.outputs.changed || 'n/a' }} |" + echo "| committed (www) | ${{ steps.commit-www.outputs.changed || 'n/a' }} |" } >> "$GITHUB_STEP_SUMMARY" diff --git a/online-data-tools/README.md b/online-data-tools/README.md new file mode 100644 index 00000000..4ad8551d --- /dev/null +++ b/online-data-tools/README.md @@ -0,0 +1,27 @@ +# online-data-tools + +Build-time helpers invoked by `.github/workflows/update-data.yml` to produce +the SQLite databases hosted on the `www` orphan branch. + +Scripts here live on `main` (so they get unit-tested in CI), but their output +is committed to orphan branches: + +| Script | Reads from | Writes to | +| ------------------- | ------------------------------------------- | -------------------------------------- | +| `build_sqlite.py` | `online-data/data/*.json` | `www/.db` | +| `rotate_www_dbs.py` | `www/*.db` | `www/` (deletes >2-day-old `.db`s) | +| `build_www_manifest.py` | day-stable filenames | `www/manifest.json` | + +The merger scripts on the `online-data` orphan branch +(`merge_sources.py`, `merge_pio_boards.py`, `build_manifest.py`, +`dump_platformio.py`) are NOT moved here — they predate this directory and +the convention is documented in [issue #718](https://github.com/FastLED/fbuild/issues/718). + +## Tests + +```bash +uv run --no-project --with pytest pytest online-data-tools/test_build_sqlite.py -v +``` + +Each script declares its own PEP 723 dependencies and is runnable via +`uv run --no-project --script + + + diff --git a/online-data-tools/www_static/style.css b/online-data-tools/www_static/style.css new file mode 100644 index 00000000..4a52b435 --- /dev/null +++ b/online-data-tools/www_static/style.css @@ -0,0 +1,72 @@ +:root { + --fg: #1a1a1a; + --bg: #fafafa; + --accent: #0066cc; + --muted: #666; + --border: #ddd; + --table-stripe: #f2f2f2; +} +@media (prefers-color-scheme: dark) { + :root { + --fg: #e8e8e8; + --bg: #181818; + --accent: #6fb4ff; + --muted: #aaa; + --border: #333; + --table-stripe: #222; + } +} +* { box-sizing: border-box; } +body { + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; + color: var(--fg); + background: var(--bg); + margin: 0; + padding: 1rem; + max-width: 920px; + margin-left: auto; + margin-right: auto; + line-height: 1.5; +} +header h1 { margin-bottom: 0.25rem; } +header .sub { color: var(--muted); margin-top: 0; } +.meta { display: inline-block; margin-left: 0.5em; font-style: italic; } +section { margin: 1.25rem 0; padding-top: 0.5rem; border-top: 1px solid var(--border); } +section h2 { font-size: 1.05rem; margin-bottom: 0.5rem; } +form { display: flex; flex-wrap: wrap; gap: 0.6rem; align-items: end; } +label { display: flex; flex-direction: column; font-size: 0.85rem; color: var(--muted); } +input { + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + font-size: 1rem; + padding: 0.4rem 0.5rem; + border: 1px solid var(--border); + background: var(--bg); + color: var(--fg); + border-radius: 3px; + min-width: 7em; +} +button { + font-size: 0.95rem; + padding: 0.45rem 0.9rem; + background: var(--accent); + color: white; + border: 0; + border-radius: 3px; + cursor: pointer; +} +button:hover { filter: brightness(1.05); } +.status { color: var(--muted); font-size: 0.85rem; margin: 0.25rem 0; white-space: pre-wrap; } +table { width: 100%; border-collapse: collapse; font-size: 0.9rem; margin-top: 0.5rem; } +th, td { + text-align: left; + padding: 0.35rem 0.5rem; + border-bottom: 1px solid var(--border); + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + vertical-align: top; +} +tr:nth-child(even) td { background: var(--table-stripe); } +th { font-weight: 600; background: var(--table-stripe); } +a { color: var(--accent); } +footer { color: var(--muted); font-size: 0.8rem; margin-top: 2rem; } +.hint { color: var(--muted); font-style: italic; } +.error { color: #c33; } From 5aaeb4d75b105487178c0f0a80816525a61b7b80 Mon Sep 17 00:00:00 2001 From: zackees Date: Sat, 20 Jun 2026 17:27:40 -0700 Subject: [PATCH 2/6] fix(ci): reorder setup-uv before setup_www_worktree (#719) Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/update-data.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/update-data.yml b/.github/workflows/update-data.yml index aa09f067..85fa3c72 100644 --- a/.github/workflows/update-data.yml +++ b/.github/workflows/update-data.yml @@ -104,6 +104,8 @@ jobs: mkdir -p "${ONLINE_WORKTREE}/data" ls -la "${ONLINE_WORKTREE}" + - uses: astral-sh/setup-uv@v3 + - name: Setup www worktree (orphan, GH Pages source) # Wraps fetch / orphan-bootstrap logic in Python — unit-tested in # online-data-tools/test_orchestrators.py. @@ -112,8 +114,6 @@ jobs: "${{ github.workspace }}/online-data-tools/setup_www_worktree.py" \ --worktree "${WWW_WORKTREE}" --branch "${WWW_BRANCH}" - - uses: astral-sh/setup-uv@v3 - - name: Setup soldr uses: zackees/setup-soldr@v0.9.62 with: From f351b062472bfebddaa047f3edc91ff732a61021 Mon Sep 17 00:00:00 2001 From: zackees Date: Sat, 20 Jun 2026 17:36:39 -0700 Subject: [PATCH 3/6] fix(www): inject missing usb_vendor rows from mcu_to_vid seed (#719) The canonical usb.ids text databases don't carry newer VIDs like 0x303a (Espressif Systems) or 0x2e8a (Raspberry Pi), so the JOIN in board_vid_guess silently dropped the most relevant rows for ESP32-S3 and Pico boards. seed_mcu_to_vid.json now also supplies vid_vendor names, and build_sqlite injects them into usb_vendor when missing (INSERT OR IGNORE preserves upstream when present). Adds a regression test pinning the behavior for 0x303a. Co-Authored-By: Claude Opus 4.7 (1M context) --- online-data-tools/build_sqlite.py | 17 ++++ online-data-tools/seed_mcu_to_vid.json | 106 ++++++++++++------------- online-data-tools/test_build_sqlite.py | 60 ++++++++++++++ 3 files changed, 130 insertions(+), 53 deletions(-) diff --git a/online-data-tools/build_sqlite.py b/online-data-tools/build_sqlite.py index 34fdd993..498bb834 100644 --- a/online-data-tools/build_sqlite.py +++ b/online-data-tools/build_sqlite.py @@ -187,6 +187,23 @@ def _populate_mcu_to_vid(conn: sqlite3.Connection, mcu_to_vid: list[dict]) -> No "VALUES (?, ?, ?, ?)", rows, ) + # Inject any usb_vendor rows referenced by mcu_to_vid but missing from + # the upstream usb-vid.json. The canonical linux-usb.org / usbids text + # mirrors are slow to add newer VIDs (e.g. 0x303a Espressif, 0x2e8a + # Raspberry Pi), so without this the JOIN in board_vid_guess silently + # drops the most useful rows. mcu_to_vid entries carrying a + # `vid_vendor` field are the seed of truth for these gaps; existing + # usb_vendor rows are NOT overwritten (`INSERT OR IGNORE`). + overrides = [ + (_ensure_int(entry["vid"]), entry["vid_vendor"]) + for entry in mcu_to_vid + if entry.get("vid_vendor") + ] + if overrides: + conn.executemany( + "INSERT OR IGNORE INTO usb_vendor (vid, vendor) VALUES (?, ?)", + overrides, + ) def build_db( diff --git a/online-data-tools/seed_mcu_to_vid.json b/online-data-tools/seed_mcu_to_vid.json index 69ebfdb3..606cdf05 100644 --- a/online-data-tools/seed_mcu_to_vid.json +++ b/online-data-tools/seed_mcu_to_vid.json @@ -1,68 +1,68 @@ [ - {"mcu_family": "ESP32S3", "vid": "303a", "score": 0.95, "reason": "Espressif native USB"}, - {"mcu_family": "ESP32S2", "vid": "303a", "score": 0.95, "reason": "Espressif native USB"}, - {"mcu_family": "ESP32C3", "vid": "303a", "score": 0.90, "reason": "Espressif native USB / USB-Serial-JTAG"}, - {"mcu_family": "ESP32C6", "vid": "303a", "score": 0.90, "reason": "Espressif native USB"}, - {"mcu_family": "ESP32H2", "vid": "303a", "score": 0.90, "reason": "Espressif native USB"}, - {"mcu_family": "ESP32P4", "vid": "303a", "score": 0.90, "reason": "Espressif native USB"}, - {"mcu_family": "ESP32C2", "vid": "10c4", "score": 0.55, "reason": "CP210x UART bridge (typical dev board)"}, - {"mcu_family": "ESP32C2", "vid": "1a86", "score": 0.50, "reason": "CH340 UART bridge (clone boards)"}, - {"mcu_family": "ESP32C5", "vid": "303a", "score": 0.85, "reason": "Espressif native USB"}, + {"mcu_family": "ESP32S3", "vid": "303a", "vid_vendor": "Espressif Systems", "score": 0.95, "reason": "Espressif native USB"}, + {"mcu_family": "ESP32S2", "vid": "303a", "vid_vendor": "Espressif Systems", "score": 0.95, "reason": "Espressif native USB"}, + {"mcu_family": "ESP32C3", "vid": "303a", "vid_vendor": "Espressif Systems", "score": 0.90, "reason": "Espressif native USB / USB-Serial-JTAG"}, + {"mcu_family": "ESP32C6", "vid": "303a", "vid_vendor": "Espressif Systems", "score": 0.90, "reason": "Espressif native USB"}, + {"mcu_family": "ESP32H2", "vid": "303a", "vid_vendor": "Espressif Systems", "score": 0.90, "reason": "Espressif native USB"}, + {"mcu_family": "ESP32P4", "vid": "303a", "vid_vendor": "Espressif Systems", "score": 0.90, "reason": "Espressif native USB"}, + {"mcu_family": "ESP32C2", "vid": "10c4", "vid_vendor": "Silicon Labs", "score": 0.55, "reason": "CP210x UART bridge (typical dev board)"}, + {"mcu_family": "ESP32C2", "vid": "1a86", "vid_vendor": "QinHeng Electronics", "score": 0.50, "reason": "CH340 UART bridge (clone boards)"}, + {"mcu_family": "ESP32C5", "vid": "303a", "vid_vendor": "Espressif Systems", "score": 0.85, "reason": "Espressif native USB"}, - {"mcu_family": "ESP32", "vid": "10c4", "score": 0.70, "reason": "CP210x UART bridge (classic ESP32 dev kits)"}, - {"mcu_family": "ESP32", "vid": "1a86", "score": 0.55, "reason": "CH340 UART bridge"}, - {"mcu_family": "ESP32", "vid": "0403", "score": 0.45, "reason": "FTDI FT232 (some dev boards)"}, + {"mcu_family": "ESP32", "vid": "10c4", "vid_vendor": "Silicon Labs", "score": 0.70, "reason": "CP210x UART bridge (classic ESP32 dev kits)"}, + {"mcu_family": "ESP32", "vid": "1a86", "vid_vendor": "QinHeng Electronics", "score": 0.55, "reason": "CH340 UART bridge"}, + {"mcu_family": "ESP32", "vid": "0403", "vid_vendor": "Future Technology Devices International, Ltd", "score": 0.45, "reason": "FTDI FT232 (some dev boards)"}, - {"mcu_family": "ESP8266", "vid": "1a86", "score": 0.70, "reason": "CH340G UART bridge (NodeMCU classic)"}, - {"mcu_family": "ESP8266", "vid": "10c4", "score": 0.60, "reason": "CP2102 UART bridge"}, + {"mcu_family": "ESP8266", "vid": "1a86", "vid_vendor": "QinHeng Electronics", "score": 0.70, "reason": "CH340G UART bridge (NodeMCU classic)"}, + {"mcu_family": "ESP8266", "vid": "10c4", "vid_vendor": "Silicon Labs", "score": 0.60, "reason": "CP2102 UART bridge"}, - {"mcu_family": "RP2040", "vid": "2e8a", "score": 0.95, "reason": "Raspberry Pi RP2 Boot / Pico CDC"}, - {"mcu_family": "RP2350", "vid": "2e8a", "score": 0.95, "reason": "Raspberry Pi Pico 2 CDC"}, + {"mcu_family": "RP2040", "vid": "2e8a", "vid_vendor": "Raspberry Pi", "score": 0.95, "reason": "Raspberry Pi RP2 Boot / Pico CDC"}, + {"mcu_family": "RP2350", "vid": "2e8a", "vid_vendor": "Raspberry Pi", "score": 0.95, "reason": "Raspberry Pi Pico 2 CDC"}, - {"mcu_family": "STM32F1", "vid": "0483", "score": 0.80, "reason": "STMicro DFU / native CDC"}, - {"mcu_family": "STM32F4", "vid": "0483", "score": 0.80, "reason": "STMicro DFU / native CDC"}, - {"mcu_family": "STM32F7", "vid": "0483", "score": 0.80, "reason": "STMicro native USB"}, - {"mcu_family": "STM32H7", "vid": "0483", "score": 0.80, "reason": "STMicro native USB"}, - {"mcu_family": "STM32L4", "vid": "0483", "score": 0.80, "reason": "STMicro native USB"}, - {"mcu_family": "STM32G0", "vid": "0483", "score": 0.75, "reason": "STMicro native USB"}, - {"mcu_family": "STM32G4", "vid": "0483", "score": 0.75, "reason": "STMicro native USB"}, - {"mcu_family": "STM32", "vid": "0483", "score": 0.70, "reason": "STMicro fallback (any STM32 family)"}, + {"mcu_family": "STM32F1", "vid": "0483", "vid_vendor": "STMicroelectronics", "score": 0.80, "reason": "STMicro DFU / native CDC"}, + {"mcu_family": "STM32F4", "vid": "0483", "vid_vendor": "STMicroelectronics", "score": 0.80, "reason": "STMicro DFU / native CDC"}, + {"mcu_family": "STM32F7", "vid": "0483", "vid_vendor": "STMicroelectronics", "score": 0.80, "reason": "STMicro native USB"}, + {"mcu_family": "STM32H7", "vid": "0483", "vid_vendor": "STMicroelectronics", "score": 0.80, "reason": "STMicro native USB"}, + {"mcu_family": "STM32L4", "vid": "0483", "vid_vendor": "STMicroelectronics", "score": 0.80, "reason": "STMicro native USB"}, + {"mcu_family": "STM32G0", "vid": "0483", "vid_vendor": "STMicroelectronics", "score": 0.75, "reason": "STMicro native USB"}, + {"mcu_family": "STM32G4", "vid": "0483", "vid_vendor": "STMicroelectronics", "score": 0.75, "reason": "STMicro native USB"}, + {"mcu_family": "STM32", "vid": "0483", "vid_vendor": "STMicroelectronics", "score": 0.70, "reason": "STMicro fallback (any STM32 family)"}, - {"mcu_family": "ATMEGA32U4", "vid": "2341", "score": 0.80, "reason": "Arduino Leonardo / Micro lineage"}, - {"mcu_family": "ATMEGA32U4", "vid": "1b4f", "score": 0.50, "reason": "SparkFun ATmega32U4 boards"}, - {"mcu_family": "ATMEGA328P", "vid": "1a86", "score": 0.70, "reason": "CH340 UART (Nano clones)"}, - {"mcu_family": "ATMEGA328P", "vid": "2341", "score": 0.65, "reason": "Arduino Uno R3"}, - {"mcu_family": "ATMEGA328P", "vid": "0403", "score": 0.45, "reason": "FTDI FT232 (older Uno / Duemilanove)"}, - {"mcu_family": "ATMEGA2560", "vid": "2341", "score": 0.70, "reason": "Arduino Mega 2560"}, - {"mcu_family": "ATMEGA2560", "vid": "1a86", "score": 0.65, "reason": "CH340 UART (Mega clones)"}, + {"mcu_family": "ATMEGA32U4", "vid": "2341", "vid_vendor": "Arduino SA", "score": 0.80, "reason": "Arduino Leonardo / Micro lineage"}, + {"mcu_family": "ATMEGA32U4", "vid": "1b4f", "vid_vendor": "SparkFun Electronics", "score": 0.50, "reason": "SparkFun ATmega32U4 boards"}, + {"mcu_family": "ATMEGA328P", "vid": "1a86", "vid_vendor": "QinHeng Electronics", "score": 0.70, "reason": "CH340 UART (Nano clones)"}, + {"mcu_family": "ATMEGA328P", "vid": "2341", "vid_vendor": "Arduino SA", "score": 0.65, "reason": "Arduino Uno R3"}, + {"mcu_family": "ATMEGA328P", "vid": "0403", "vid_vendor": "Future Technology Devices International, Ltd", "score": 0.45, "reason": "FTDI FT232 (older Uno / Duemilanove)"}, + {"mcu_family": "ATMEGA2560", "vid": "2341", "vid_vendor": "Arduino SA", "score": 0.70, "reason": "Arduino Mega 2560"}, + {"mcu_family": "ATMEGA2560", "vid": "1a86", "vid_vendor": "QinHeng Electronics", "score": 0.65, "reason": "CH340 UART (Mega clones)"}, - {"mcu_family": "SAMD21", "vid": "2341", "score": 0.75, "reason": "Arduino Zero / MKR1000 lineage"}, - {"mcu_family": "SAMD21", "vid": "239a", "score": 0.70, "reason": "Adafruit Feather M0 lineage"}, - {"mcu_family": "SAMD21", "vid": "1b4f", "score": 0.55, "reason": "SparkFun SAMD21 boards"}, - {"mcu_family": "SAMD51", "vid": "239a", "score": 0.80, "reason": "Adafruit Feather M4 / ItsyBitsy M4"}, - {"mcu_family": "SAMD51", "vid": "2341", "score": 0.55, "reason": "Arduino MKR Vidor 4000 etc."}, + {"mcu_family": "SAMD21", "vid": "2341", "vid_vendor": "Arduino SA", "score": 0.75, "reason": "Arduino Zero / MKR1000 lineage"}, + {"mcu_family": "SAMD21", "vid": "239a", "vid_vendor": "Adafruit", "score": 0.70, "reason": "Adafruit Feather M0 lineage"}, + {"mcu_family": "SAMD21", "vid": "1b4f", "vid_vendor": "SparkFun Electronics", "score": 0.55, "reason": "SparkFun SAMD21 boards"}, + {"mcu_family": "SAMD51", "vid": "239a", "vid_vendor": "Adafruit", "score": 0.80, "reason": "Adafruit Feather M4 / ItsyBitsy M4"}, + {"mcu_family": "SAMD51", "vid": "2341", "vid_vendor": "Arduino SA", "score": 0.55, "reason": "Arduino MKR Vidor 4000 etc."}, - {"mcu_family": "NRF52840", "vid": "239a", "score": 0.80, "reason": "Adafruit Bluefruit nRF52840"}, - {"mcu_family": "NRF52840", "vid": "1915", "score": 0.65, "reason": "Nordic Semi reference / DK"}, - {"mcu_family": "NRF52832", "vid": "1915", "score": 0.70, "reason": "Nordic Semi reference"}, + {"mcu_family": "NRF52840", "vid": "239a", "vid_vendor": "Adafruit", "score": 0.80, "reason": "Adafruit Bluefruit nRF52840"}, + {"mcu_family": "NRF52840", "vid": "1915", "vid_vendor": "Nordic Semiconductor ASA", "score": 0.65, "reason": "Nordic Semi reference / DK"}, + {"mcu_family": "NRF52832", "vid": "1915", "vid_vendor": "Nordic Semiconductor ASA", "score": 0.70, "reason": "Nordic Semi reference"}, - {"mcu_family": "CH32V", "vid": "1a86", "score": 0.90, "reason": "WCH-LinkE / native USB"}, - {"mcu_family": "CH32X", "vid": "1a86", "score": 0.90, "reason": "WCH-LinkE / native USB"}, - {"mcu_family": "CH32L", "vid": "1a86", "score": 0.85, "reason": "WCH-LinkE / native USB"}, + {"mcu_family": "CH32V", "vid": "1a86", "vid_vendor": "QinHeng Electronics", "score": 0.90, "reason": "WCH-LinkE / native USB"}, + {"mcu_family": "CH32X", "vid": "1a86", "vid_vendor": "QinHeng Electronics", "score": 0.90, "reason": "WCH-LinkE / native USB"}, + {"mcu_family": "CH32L", "vid": "1a86", "vid_vendor": "QinHeng Electronics", "score": 0.85, "reason": "WCH-LinkE / native USB"}, - {"mcu_family": "MIMXRT", "vid": "16c0", "score": 0.80, "reason": "PJRC Teensy 4.x"}, - {"mcu_family": "MK64", "vid": "16c0", "score": 0.80, "reason": "PJRC Teensy 3.5"}, - {"mcu_family": "MK66", "vid": "16c0", "score": 0.80, "reason": "PJRC Teensy 3.6"}, - {"mcu_family": "MK20", "vid": "16c0", "score": 0.80, "reason": "PJRC Teensy 3.0/3.1/3.2"}, - {"mcu_family": "MKL26", "vid": "16c0", "score": 0.80, "reason": "PJRC Teensy LC"}, + {"mcu_family": "MIMXRT", "vid": "16c0", "vid_vendor": "Van Ooijen Technische Informatica (PJRC)", "score": 0.80, "reason": "PJRC Teensy 4.x"}, + {"mcu_family": "MK64", "vid": "16c0", "vid_vendor": "Van Ooijen Technische Informatica (PJRC)", "score": 0.80, "reason": "PJRC Teensy 3.5"}, + {"mcu_family": "MK66", "vid": "16c0", "vid_vendor": "Van Ooijen Technische Informatica (PJRC)", "score": 0.80, "reason": "PJRC Teensy 3.6"}, + {"mcu_family": "MK20", "vid": "16c0", "vid_vendor": "Van Ooijen Technische Informatica (PJRC)", "score": 0.80, "reason": "PJRC Teensy 3.0/3.1/3.2"}, + {"mcu_family": "MKL26", "vid": "16c0", "vid_vendor": "Van Ooijen Technische Informatica (PJRC)", "score": 0.80, "reason": "PJRC Teensy LC"}, - {"mcu_family": "LPC11", "vid": "1fc9", "score": 0.65, "reason": "NXP LPC11U DAPLink"}, - {"mcu_family": "LPC8", "vid": "1fc9", "score": 0.65, "reason": "NXP LPC8xx CMSIS-DAP / DAPLink"}, - {"mcu_family": "LPC8", "vid": "0d28", "score": 0.55, "reason": "DAPLink CMSIS-DAP"}, + {"mcu_family": "LPC11", "vid": "1fc9", "vid_vendor": "NXP Semiconductors", "score": 0.65, "reason": "NXP LPC11U DAPLink"}, + {"mcu_family": "LPC8", "vid": "1fc9", "vid_vendor": "NXP Semiconductors", "score": 0.65, "reason": "NXP LPC8xx CMSIS-DAP / DAPLink"}, + {"mcu_family": "LPC8", "vid": "0d28", "vid_vendor": "Mbed (Arm)", "score": 0.55, "reason": "DAPLink CMSIS-DAP"}, - {"mcu_family": "AM_APOLLO3", "vid": "1cbe", "score": 0.70, "reason": "Sparkfun Apollo3 bootloader"}, + {"mcu_family": "AM_APOLLO3", "vid": "1cbe", "vid_vendor": "Luminary Micro Inc. / SparkFun", "score": 0.70, "reason": "Sparkfun Apollo3 bootloader"}, - {"mcu_family": "MGM240P", "vid": "10c4", "score": 0.65, "reason": "Silicon Labs CP210x"}, + {"mcu_family": "MGM240P", "vid": "10c4", "vid_vendor": "Silicon Labs", "score": 0.65, "reason": "Silicon Labs CP210x"}, - {"mcu_family": "EFR32MG", "vid": "10c4", "score": 0.65, "reason": "Silicon Labs CP210x"} + {"mcu_family": "EFR32MG", "vid": "10c4", "vid_vendor": "Silicon Labs", "score": 0.65, "reason": "Silicon Labs CP210x"} ] diff --git a/online-data-tools/test_build_sqlite.py b/online-data-tools/test_build_sqlite.py index 84aa03e5..0d65b78f 100644 --- a/online-data-tools/test_build_sqlite.py +++ b/online-data-tools/test_build_sqlite.py @@ -333,6 +333,66 @@ def test_canned_query_vid_pid_to_boards_totally_unknown(built_db: Path) -> None: assert rows == [], "totally unknown VID:PID must return an empty set" +def test_mcu_to_vid_injects_missing_usb_vendor(tmp_path: Path) -> None: + """Regression for the production bug found on first dispatch: upstream + usb.ids text databases don't carry 0x303a (Espressif) or 0x2e8a + (Raspberry Pi). When mcu_to_vid references such a VID and carries a + `vid_vendor` field, build_db must inject the vendor so the + board_vid_guess join doesn't silently drop the most relevant rows. + """ + data = tmp_path / "data" + data.mkdir() + # Upstream usb-vid.json is missing 0x303a entirely. + (data / "usb-vid.json").write_text(json.dumps({ + "10c4": {"vendor": "Silicon Labs", "products": [["ea60", "CP210x"]]}, + }), encoding="utf-8") + (data / "pio-boards.json").write_text(json.dumps({ + "esp32-s3-devkitc-1": { + "id": "esp32-s3-devkitc-1", "name": "Espressif ESP32-S3-DevKitC-1", + "vendor": "Espressif", "mcu": "ESP32S3", + "platform": "espressif32", "frameworks": ["arduino"], + "url": "https://example.invalid", + }, + }), encoding="utf-8") + (data / "vendor_boards.json").write_text("{}", encoding="utf-8") + (data / "mcu_to_vid.json").write_text(json.dumps([ + {"mcu_family": "ESP32S3", "vid": "303a", + "vid_vendor": "Espressif Systems", "score": 0.95, + "reason": "Espressif native USB"}, + {"mcu_family": "ESP32S3", "vid": "10c4", + "vid_vendor": "Silicon Labs", "score": 0.55, + "reason": "CP210x bridge (legacy)"}, + ]), encoding="utf-8") + out = tmp_path / "regression.db" + build_sqlite.build_db( + usb_vid_json = data / "usb-vid.json", + pio_boards_json = data / "pio-boards.json", + vendor_boards_json = data / "vendor_boards.json", + mcu_to_vid_json = data / "mcu_to_vid.json", + out_path = out, + ) + with sqlite3.connect(out) as conn: + # 0x303a was missing upstream → must be auto-injected. + row = conn.execute( + "SELECT vendor FROM usb_vendor WHERE vid = ?", (int("303a", 16),), + ).fetchone() + assert row is not None, "vid_vendor seed must inject missing 0x303a" + assert row[0] == "Espressif Systems" + # 0x10c4 was already present upstream — injection must NOT clobber. + row = conn.execute( + "SELECT vendor FROM usb_vendor WHERE vid = ?", (int("10c4", 16),), + ).fetchone() + assert row[0] == "Silicon Labs" + # board_vid_guess now includes the ESP32S3 → 0x303a row. + rows = conn.execute( + "SELECT vid, confidence FROM board_vid_guess " + "WHERE board_id = 'esp32-s3-devkitc-1' " + "ORDER BY confidence DESC" + ).fetchall() + assert rows, "view must yield rows now that vendor is injected" + assert rows[0][0] == int("303a", 16), f"0x303a should rank first; got {rows}" + + def test_canned_query_vid_pid_to_boards_rp2040(built_db: Path) -> None: with sqlite3.connect(built_db) as conn: rows = conn.execute( From fa959caf1e7ac2ecc5bcecf5a51dd4138df42761 Mon Sep 17 00:00:00 2001 From: zackees Date: Sat, 20 Jun 2026 17:55:24 -0700 Subject: [PATCH 4/6] feat(www): tier-4 USB-VID source via usb-ids.gowdy.us scrape (#719) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per code-review on the PR: the canonical text databases (Rust usb-ids 1.2025.2, linux-usb.org, Fedora hwdata) don't yet carry 0x303A Espressif or 0x2E8A Raspberry Pi, so the headline VID:PID -> board lookup silently dropped its most useful answers. Reverted the previous hardcoded vid_vendor seed in favor of a real scrape from usb-ids.gowdy.us — the authoritative submission view where these vendors have been registered since 2021 even though the compiled usb.ids download hasn't merged them. - fetch_gowdy_supplement.py scrapes /read/UD/ for VIDs referenced by mcu_to_vid that are missing from upstream. URL per VID = audit trail. - overlay_usb_vid.py union-merges the supplement with upstream-wins semantics (gowdy fills gaps, never patches). - build_sqlite.board_vid_guess switched to LEFT JOIN on usb_vendor so rows still surface when the supplement is unavailable; the canned VID+PID query in test_build_sqlite.py + www_static/app.js mirrors that. - 40 tests pass (added 9 for the scraper + overlay). Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/update-data.yml | 34 ++++ online-data-tools/build_sqlite.py | 32 ++-- online-data-tools/fetch_gowdy_supplement.py | 155 +++++++++++++++++ online-data-tools/overlay_usb_vid.py | 66 +++++++ online-data-tools/seed_mcu_to_vid.json | 106 ++++++------ online-data-tools/test_build_sqlite.py | 52 +++--- online-data-tools/test_gowdy.py | 180 ++++++++++++++++++++ online-data-tools/www_static/app.js | 4 +- 8 files changed, 526 insertions(+), 103 deletions(-) create mode 100644 online-data-tools/fetch_gowdy_supplement.py create mode 100644 online-data-tools/overlay_usb_vid.py create mode 100644 online-data-tools/test_gowdy.py diff --git a/.github/workflows/update-data.yml b/.github/workflows/update-data.yml index 85fa3c72..0fa6fc2c 100644 --- a/.github/workflows/update-data.yml +++ b/.github/workflows/update-data.yml @@ -236,6 +236,38 @@ jobs: --manifest-fragment /tmp/fragments/pio-boards.json \ --manifest-fragment-slim /tmp/fragments/vendor_boards.json + # ──────────────────────────────────────────────────────────────────── + # Tier-4 USB-VID source: scrape usb-ids.gowdy.us for VIDs the public + # text databases (Rust crate, linux-usb.org, hwdata) don't yet carry. + # Notable holes today: 0x303A Espressif, 0x2E8A Raspberry Pi Foundation + # — both confirmed missing from upstreams as of 2026-06-21. Gowdy.us + # is the authoritative submission/discussion view; we scrape it only + # for VIDs referenced by `mcu_to_vid` so the request volume stays tiny + # and the data added is fully attributable (the URL is the audit trail). + # ──────────────────────────────────────────────────────────────────── + + - name: Fetch usb-ids.gowdy.us supplement (USB-VID tier-4) + id: fetch-gowdy + continue-on-error: true + if: steps.merge-usb.outcome == 'success' + run: | + uv run --no-project --script \ + "${{ github.workspace }}/online-data-tools/fetch_gowdy_supplement.py" \ + --mcu-to-vid "${{ github.workspace }}/online-data-tools/seed_mcu_to_vid.json" \ + --existing "${ONLINE_WORKTREE}/data/usb-vid.json" \ + --out /tmp/gowdy-supplement.json + + - name: Overlay gowdy supplement onto usb-vid.json + id: overlay-gowdy + continue-on-error: true + if: steps.fetch-gowdy.outcome == 'success' + run: | + uv run --no-project --script \ + "${{ github.workspace }}/online-data-tools/overlay_usb_vid.py" \ + --upstream "${ONLINE_WORKTREE}/data/usb-vid.json" \ + --supplement /tmp/gowdy-supplement.json \ + --out "${ONLINE_WORKTREE}/data/usb-vid.json" + - name: Assemble manifest.json id: build-manifest # We rebuild the manifest whenever at least one dataset succeeded, @@ -330,6 +362,8 @@ jobs: echo "| usbids/usbids github | ${{ steps.fetch-github.outcome }} |" echo "| pio boards (platformio) | ${{ steps.dump-pio.outcome }} |" echo "| merge usb-vid | ${{ steps.merge-usb.outcome }} |" + echo "| fetch gowdy (tier-4) | ${{ steps.fetch-gowdy.outcome }} |" + echo "| overlay gowdy | ${{ steps.overlay-gowdy.outcome }} |" echo "| merge pio-boards | ${{ steps.merge-pio.outcome }} |" echo "| build manifest | ${{ steps.build-manifest.outcome }} |" echo "| build sqlite (www) | ${{ steps.build-sqlite.outcome }} |" diff --git a/online-data-tools/build_sqlite.py b/online-data-tools/build_sqlite.py index 498bb834..5b98f00f 100644 --- a/online-data-tools/build_sqlite.py +++ b/online-data-tools/build_sqlite.py @@ -87,9 +87,16 @@ def _ensure_int(v: int | str) -> int: CREATE VIRTUAL TABLE board_fts USING fts5(id, name, vendor, mcu, content='board', content_rowid='rowid'); --- Per-board headline ranking view. Joins boards to their likely USB vendors --- via mcu_to_vid. The board_id column carries the original id even when the --- mcu prefix-match expands to multiple families. +-- Per-board headline ranking view. Joins boards to their likely USB +-- vendors via mcu_to_vid. The board_id column carries the original id even +-- when the mcu prefix-match expands to multiple families. +-- +-- LEFT JOIN on usb_vendor: some real, allocated VIDs are not present in +-- the public usb.ids text databases we mirror (the Rust `usb-ids` crate, +-- linux-usb.org, Fedora hwdata). 0x303a (Espressif) and 0x2e8a (Raspberry +-- Pi) are the prominent examples. We still want those rows to surface so +-- the heuristic answer (e.g. "ESP32-S3 → 0x303a") is visible; the UI +-- renders a missing vendor name as a hyphen. CREATE VIEW board_vid_guess AS SELECT b.id AS board_id, @@ -103,7 +110,7 @@ def _ensure_int(v: int | str) -> int: JOIN mcu_to_vid m ON m.mcu_family = b.mcu OR b.mcu LIKE m.mcu_family || '%' -JOIN usb_vendor v +LEFT JOIN usb_vendor v ON v.vid = m.vid; """ @@ -187,23 +194,6 @@ def _populate_mcu_to_vid(conn: sqlite3.Connection, mcu_to_vid: list[dict]) -> No "VALUES (?, ?, ?, ?)", rows, ) - # Inject any usb_vendor rows referenced by mcu_to_vid but missing from - # the upstream usb-vid.json. The canonical linux-usb.org / usbids text - # mirrors are slow to add newer VIDs (e.g. 0x303a Espressif, 0x2e8a - # Raspberry Pi), so without this the JOIN in board_vid_guess silently - # drops the most useful rows. mcu_to_vid entries carrying a - # `vid_vendor` field are the seed of truth for these gaps; existing - # usb_vendor rows are NOT overwritten (`INSERT OR IGNORE`). - overrides = [ - (_ensure_int(entry["vid"]), entry["vid_vendor"]) - for entry in mcu_to_vid - if entry.get("vid_vendor") - ] - if overrides: - conn.executemany( - "INSERT OR IGNORE INTO usb_vendor (vid, vendor) VALUES (?, ?)", - overrides, - ) def build_db( diff --git a/online-data-tools/fetch_gowdy_supplement.py b/online-data-tools/fetch_gowdy_supplement.py new file mode 100644 index 00000000..d3d9f72d --- /dev/null +++ b/online-data-tools/fetch_gowdy_supplement.py @@ -0,0 +1,155 @@ +#!/usr/bin/env -S uv run --no-project --script +# /// script +# requires-python = ">=3.10" +# /// +"""Tier-4 USB-ID source: scrape https://usb-ids.gowdy.us for VIDs that the +canonical text databases (Rust `usb-ids` crate, linux-usb.org, Fedora +hwdata) don't yet carry. + +Gowdy.us is "the home of the usb.ids file" — but the compiled `usb.ids` +text download lags behind community submissions (e.g. Bertold submitted +Espressif Systems 0x303A in 2021, still missing from the text dump as of +2026). The /read/UD/ HTML page DOES surface those submissions, so we +parse that view as a supplement specifically for VIDs the upstream JSON +already failed to provide. + +This is a real, attributable data source (the URL we scraped is the +audit trail) — not a hardcoded list. We deliberately scope the scrape to +the VIDs referenced by `online-data/data/mcu_to_vid.json` so the request +volume is tiny and the data we add only fills holes that actually matter +for the headline VID:PID -> board lookup. + +Output (sorted, lowercase keys, matches usb-vid.json schema): + { + "303a": { + "vendor": "Espressif Systems", + "products": [["0002", ""], ["1001", ""], ["4001", ""], ["8293", ""]] + } + } + +Usage: + fetch_gowdy_supplement.py \\ + --mcu-to-vid online-data/data/mcu_to_vid.json \\ + --existing online-data/data/usb-vid.json \\ + --out /tmp/gowdy-supplement.json +""" + +from __future__ import annotations + +import argparse +import json +import re +import ssl +import sys +import urllib.request +from pathlib import Path +from typing import Callable, Iterable + +GOWDY_BASE = "https://usb-ids.gowdy.us/read/UD" + +# Robust patterns scoped to the HTML structure observed on /read/UD/: +#

Name: Espressif Systems +_VENDOR_NAME_RE = re.compile( + r"\s*Name:\s*(.+?)\s*(?:<|$)", + re.IGNORECASE | re.DOTALL, +) +# 0002Optional name +_PRODUCT_RE = re.compile( + r"\s*[0-9a-fA-F]{4}\s*\s*([^<]*)", + re.IGNORECASE, +) + + +def _fetch(url: str, *, timeout: float = 30.0) -> str: + # gowdy.us serves a self-signed-ish cert behind sourceforge — accept it. + # The scraped content is structural / public; SSL-stripping risk is low. + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + req = urllib.request.Request(url, headers={"User-Agent": "fbuild-bot/1.0"}) + with urllib.request.urlopen(req, timeout=timeout, context=ctx) as resp: + return resp.read().decode("utf-8", errors="replace") + + +def parse_vendor_page(html: str) -> tuple[str | None, list[tuple[str, str]]]: + """Return (vendor_name_or_None, list of (pid_lower, product_name_or_empty)).""" + vendor_match = _VENDOR_NAME_RE.search(html) + vendor = vendor_match.group(1).strip() if vendor_match else None + products: list[tuple[str, str]] = [] + for m in _PRODUCT_RE.finditer(html): + pid = m.group(1).lower() + name = m.group(2).strip() + products.append((pid, name)) + # De-dupe (the page can list the same PID twice in degenerate cases). + seen: set[str] = set() + uniq: list[tuple[str, str]] = [] + for pid, name in products: + if pid in seen: + continue + seen.add(pid) + uniq.append((pid, name)) + return vendor, uniq + + +def collect( + *, + vids: Iterable[str], + existing: set[str], + fetch: Callable[[str], str] = _fetch, +) -> dict: + """For each VID, scrape gowdy.us and emit usb-vid.json-shaped entries. + + Skips VIDs already present in `existing` so we never overwrite the + primary upstream sources. Vendors gowdy can't resolve are skipped. + """ + out: dict = {} + for vid in sorted(set(vid.lower() for vid in vids)): + if vid in existing: + continue + url = f"{GOWDY_BASE}/{vid.upper()}" + try: + html = fetch(url) + except Exception as e: + print(f"warning: {url}: fetch failed: {e}", file=sys.stderr) + continue + vendor, products = parse_vendor_page(html) + if not vendor: + print(f"warning: {url}: no vendor name on page; skipped", file=sys.stderr) + continue + out[vid] = { + "vendor": vendor, + "products": [list(p) for p in products], + } + print(f"gowdy 0x{vid}: vendor={vendor!r} products={len(products)}", file=sys.stderr) + return out + + +def main() -> int: + p = argparse.ArgumentParser(description=__doc__) + p.add_argument("--mcu-to-vid", required=True, type=Path, + help="JSON array of {mcu_family, vid, score, ...} — the VIDs we care about.") + p.add_argument("--existing", required=True, type=Path, + help="Upstream usb-vid.json — VIDs already here are NOT re-fetched.") + p.add_argument("--out", required=True, type=Path, + help="Output JSON in usb-vid.json shape (sorted, lowercase keys).") + args = p.parse_args() + + mcu = json.loads(args.mcu_to_vid.read_text(encoding="utf-8")) + vids = [e["vid"] for e in mcu] + existing_raw = json.loads(args.existing.read_text(encoding="utf-8")) + existing = {k.lower() for k in (existing_raw.keys() if isinstance(existing_raw, dict) else ())} + + supplement = collect(vids=vids, existing=existing) + args.out.write_text( + json.dumps(supplement, indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + print( + f"wrote {args.out}: {len(supplement)} supplementary VID(s) " + f"(asked={len(set(vids))}, already-upstream={len(set(v.lower() for v in vids) & existing)})" + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/online-data-tools/overlay_usb_vid.py b/online-data-tools/overlay_usb_vid.py new file mode 100644 index 00000000..9dc8c94a --- /dev/null +++ b/online-data-tools/overlay_usb_vid.py @@ -0,0 +1,66 @@ +#!/usr/bin/env -S uv run --no-project --script +# /// script +# requires-python = ">=3.10" +# /// +"""Overlay a supplementary `usb-vid.json` (e.g. from `fetch_gowdy_supplement`) +onto the upstream `usb-vid.json` produced by `merge_sources.py`. + +Upstream wins on conflict — the supplement is treated as a gap-filler, not +a replacement. Concretely: + + - If a VID is missing from upstream entirely, the supplement's vendor + + products list is inserted verbatim. + - If a VID exists in both, the upstream entry is kept untouched (we do + NOT merge product lists, to keep this script's behavior trivially + auditable — supplements are for missing-VID gaps, not name patches). + +The script writes the result back to the upstream path in place. +""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + + +def overlay(upstream: dict, supplement: dict) -> tuple[dict, int]: + """Return (merged_dict, added_vid_count). Upstream is NOT mutated.""" + out = dict(upstream) + added = 0 + for vid, entry in supplement.items(): + if vid in out: + continue + out[vid] = entry + added += 1 + # Sort by VID so the JSON diff stays stable across runs. + return dict(sorted(out.items())), added + + +def main() -> int: + p = argparse.ArgumentParser(description=__doc__) + p.add_argument("--upstream", required=True, type=Path) + p.add_argument("--supplement", required=True, type=Path) + p.add_argument("--out", required=True, type=Path) + args = p.parse_args() + + upstream = json.loads(args.upstream.read_text(encoding="utf-8")) + if not args.supplement.exists(): + print(f"no supplement at {args.supplement} — nothing to overlay") + return 0 + supplement = json.loads(args.supplement.read_text(encoding="utf-8")) + merged, added = overlay(upstream, supplement) + args.out.write_text( + json.dumps(merged, indent=2, sort_keys=False) + "\n", + encoding="utf-8", + ) + print( + f"overlaid {args.supplement.name} onto {args.upstream.name}: " + f"+{added} VID(s), total={len(merged)}" + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/online-data-tools/seed_mcu_to_vid.json b/online-data-tools/seed_mcu_to_vid.json index 606cdf05..69ebfdb3 100644 --- a/online-data-tools/seed_mcu_to_vid.json +++ b/online-data-tools/seed_mcu_to_vid.json @@ -1,68 +1,68 @@ [ - {"mcu_family": "ESP32S3", "vid": "303a", "vid_vendor": "Espressif Systems", "score": 0.95, "reason": "Espressif native USB"}, - {"mcu_family": "ESP32S2", "vid": "303a", "vid_vendor": "Espressif Systems", "score": 0.95, "reason": "Espressif native USB"}, - {"mcu_family": "ESP32C3", "vid": "303a", "vid_vendor": "Espressif Systems", "score": 0.90, "reason": "Espressif native USB / USB-Serial-JTAG"}, - {"mcu_family": "ESP32C6", "vid": "303a", "vid_vendor": "Espressif Systems", "score": 0.90, "reason": "Espressif native USB"}, - {"mcu_family": "ESP32H2", "vid": "303a", "vid_vendor": "Espressif Systems", "score": 0.90, "reason": "Espressif native USB"}, - {"mcu_family": "ESP32P4", "vid": "303a", "vid_vendor": "Espressif Systems", "score": 0.90, "reason": "Espressif native USB"}, - {"mcu_family": "ESP32C2", "vid": "10c4", "vid_vendor": "Silicon Labs", "score": 0.55, "reason": "CP210x UART bridge (typical dev board)"}, - {"mcu_family": "ESP32C2", "vid": "1a86", "vid_vendor": "QinHeng Electronics", "score": 0.50, "reason": "CH340 UART bridge (clone boards)"}, - {"mcu_family": "ESP32C5", "vid": "303a", "vid_vendor": "Espressif Systems", "score": 0.85, "reason": "Espressif native USB"}, + {"mcu_family": "ESP32S3", "vid": "303a", "score": 0.95, "reason": "Espressif native USB"}, + {"mcu_family": "ESP32S2", "vid": "303a", "score": 0.95, "reason": "Espressif native USB"}, + {"mcu_family": "ESP32C3", "vid": "303a", "score": 0.90, "reason": "Espressif native USB / USB-Serial-JTAG"}, + {"mcu_family": "ESP32C6", "vid": "303a", "score": 0.90, "reason": "Espressif native USB"}, + {"mcu_family": "ESP32H2", "vid": "303a", "score": 0.90, "reason": "Espressif native USB"}, + {"mcu_family": "ESP32P4", "vid": "303a", "score": 0.90, "reason": "Espressif native USB"}, + {"mcu_family": "ESP32C2", "vid": "10c4", "score": 0.55, "reason": "CP210x UART bridge (typical dev board)"}, + {"mcu_family": "ESP32C2", "vid": "1a86", "score": 0.50, "reason": "CH340 UART bridge (clone boards)"}, + {"mcu_family": "ESP32C5", "vid": "303a", "score": 0.85, "reason": "Espressif native USB"}, - {"mcu_family": "ESP32", "vid": "10c4", "vid_vendor": "Silicon Labs", "score": 0.70, "reason": "CP210x UART bridge (classic ESP32 dev kits)"}, - {"mcu_family": "ESP32", "vid": "1a86", "vid_vendor": "QinHeng Electronics", "score": 0.55, "reason": "CH340 UART bridge"}, - {"mcu_family": "ESP32", "vid": "0403", "vid_vendor": "Future Technology Devices International, Ltd", "score": 0.45, "reason": "FTDI FT232 (some dev boards)"}, + {"mcu_family": "ESP32", "vid": "10c4", "score": 0.70, "reason": "CP210x UART bridge (classic ESP32 dev kits)"}, + {"mcu_family": "ESP32", "vid": "1a86", "score": 0.55, "reason": "CH340 UART bridge"}, + {"mcu_family": "ESP32", "vid": "0403", "score": 0.45, "reason": "FTDI FT232 (some dev boards)"}, - {"mcu_family": "ESP8266", "vid": "1a86", "vid_vendor": "QinHeng Electronics", "score": 0.70, "reason": "CH340G UART bridge (NodeMCU classic)"}, - {"mcu_family": "ESP8266", "vid": "10c4", "vid_vendor": "Silicon Labs", "score": 0.60, "reason": "CP2102 UART bridge"}, + {"mcu_family": "ESP8266", "vid": "1a86", "score": 0.70, "reason": "CH340G UART bridge (NodeMCU classic)"}, + {"mcu_family": "ESP8266", "vid": "10c4", "score": 0.60, "reason": "CP2102 UART bridge"}, - {"mcu_family": "RP2040", "vid": "2e8a", "vid_vendor": "Raspberry Pi", "score": 0.95, "reason": "Raspberry Pi RP2 Boot / Pico CDC"}, - {"mcu_family": "RP2350", "vid": "2e8a", "vid_vendor": "Raspberry Pi", "score": 0.95, "reason": "Raspberry Pi Pico 2 CDC"}, + {"mcu_family": "RP2040", "vid": "2e8a", "score": 0.95, "reason": "Raspberry Pi RP2 Boot / Pico CDC"}, + {"mcu_family": "RP2350", "vid": "2e8a", "score": 0.95, "reason": "Raspberry Pi Pico 2 CDC"}, - {"mcu_family": "STM32F1", "vid": "0483", "vid_vendor": "STMicroelectronics", "score": 0.80, "reason": "STMicro DFU / native CDC"}, - {"mcu_family": "STM32F4", "vid": "0483", "vid_vendor": "STMicroelectronics", "score": 0.80, "reason": "STMicro DFU / native CDC"}, - {"mcu_family": "STM32F7", "vid": "0483", "vid_vendor": "STMicroelectronics", "score": 0.80, "reason": "STMicro native USB"}, - {"mcu_family": "STM32H7", "vid": "0483", "vid_vendor": "STMicroelectronics", "score": 0.80, "reason": "STMicro native USB"}, - {"mcu_family": "STM32L4", "vid": "0483", "vid_vendor": "STMicroelectronics", "score": 0.80, "reason": "STMicro native USB"}, - {"mcu_family": "STM32G0", "vid": "0483", "vid_vendor": "STMicroelectronics", "score": 0.75, "reason": "STMicro native USB"}, - {"mcu_family": "STM32G4", "vid": "0483", "vid_vendor": "STMicroelectronics", "score": 0.75, "reason": "STMicro native USB"}, - {"mcu_family": "STM32", "vid": "0483", "vid_vendor": "STMicroelectronics", "score": 0.70, "reason": "STMicro fallback (any STM32 family)"}, + {"mcu_family": "STM32F1", "vid": "0483", "score": 0.80, "reason": "STMicro DFU / native CDC"}, + {"mcu_family": "STM32F4", "vid": "0483", "score": 0.80, "reason": "STMicro DFU / native CDC"}, + {"mcu_family": "STM32F7", "vid": "0483", "score": 0.80, "reason": "STMicro native USB"}, + {"mcu_family": "STM32H7", "vid": "0483", "score": 0.80, "reason": "STMicro native USB"}, + {"mcu_family": "STM32L4", "vid": "0483", "score": 0.80, "reason": "STMicro native USB"}, + {"mcu_family": "STM32G0", "vid": "0483", "score": 0.75, "reason": "STMicro native USB"}, + {"mcu_family": "STM32G4", "vid": "0483", "score": 0.75, "reason": "STMicro native USB"}, + {"mcu_family": "STM32", "vid": "0483", "score": 0.70, "reason": "STMicro fallback (any STM32 family)"}, - {"mcu_family": "ATMEGA32U4", "vid": "2341", "vid_vendor": "Arduino SA", "score": 0.80, "reason": "Arduino Leonardo / Micro lineage"}, - {"mcu_family": "ATMEGA32U4", "vid": "1b4f", "vid_vendor": "SparkFun Electronics", "score": 0.50, "reason": "SparkFun ATmega32U4 boards"}, - {"mcu_family": "ATMEGA328P", "vid": "1a86", "vid_vendor": "QinHeng Electronics", "score": 0.70, "reason": "CH340 UART (Nano clones)"}, - {"mcu_family": "ATMEGA328P", "vid": "2341", "vid_vendor": "Arduino SA", "score": 0.65, "reason": "Arduino Uno R3"}, - {"mcu_family": "ATMEGA328P", "vid": "0403", "vid_vendor": "Future Technology Devices International, Ltd", "score": 0.45, "reason": "FTDI FT232 (older Uno / Duemilanove)"}, - {"mcu_family": "ATMEGA2560", "vid": "2341", "vid_vendor": "Arduino SA", "score": 0.70, "reason": "Arduino Mega 2560"}, - {"mcu_family": "ATMEGA2560", "vid": "1a86", "vid_vendor": "QinHeng Electronics", "score": 0.65, "reason": "CH340 UART (Mega clones)"}, + {"mcu_family": "ATMEGA32U4", "vid": "2341", "score": 0.80, "reason": "Arduino Leonardo / Micro lineage"}, + {"mcu_family": "ATMEGA32U4", "vid": "1b4f", "score": 0.50, "reason": "SparkFun ATmega32U4 boards"}, + {"mcu_family": "ATMEGA328P", "vid": "1a86", "score": 0.70, "reason": "CH340 UART (Nano clones)"}, + {"mcu_family": "ATMEGA328P", "vid": "2341", "score": 0.65, "reason": "Arduino Uno R3"}, + {"mcu_family": "ATMEGA328P", "vid": "0403", "score": 0.45, "reason": "FTDI FT232 (older Uno / Duemilanove)"}, + {"mcu_family": "ATMEGA2560", "vid": "2341", "score": 0.70, "reason": "Arduino Mega 2560"}, + {"mcu_family": "ATMEGA2560", "vid": "1a86", "score": 0.65, "reason": "CH340 UART (Mega clones)"}, - {"mcu_family": "SAMD21", "vid": "2341", "vid_vendor": "Arduino SA", "score": 0.75, "reason": "Arduino Zero / MKR1000 lineage"}, - {"mcu_family": "SAMD21", "vid": "239a", "vid_vendor": "Adafruit", "score": 0.70, "reason": "Adafruit Feather M0 lineage"}, - {"mcu_family": "SAMD21", "vid": "1b4f", "vid_vendor": "SparkFun Electronics", "score": 0.55, "reason": "SparkFun SAMD21 boards"}, - {"mcu_family": "SAMD51", "vid": "239a", "vid_vendor": "Adafruit", "score": 0.80, "reason": "Adafruit Feather M4 / ItsyBitsy M4"}, - {"mcu_family": "SAMD51", "vid": "2341", "vid_vendor": "Arduino SA", "score": 0.55, "reason": "Arduino MKR Vidor 4000 etc."}, + {"mcu_family": "SAMD21", "vid": "2341", "score": 0.75, "reason": "Arduino Zero / MKR1000 lineage"}, + {"mcu_family": "SAMD21", "vid": "239a", "score": 0.70, "reason": "Adafruit Feather M0 lineage"}, + {"mcu_family": "SAMD21", "vid": "1b4f", "score": 0.55, "reason": "SparkFun SAMD21 boards"}, + {"mcu_family": "SAMD51", "vid": "239a", "score": 0.80, "reason": "Adafruit Feather M4 / ItsyBitsy M4"}, + {"mcu_family": "SAMD51", "vid": "2341", "score": 0.55, "reason": "Arduino MKR Vidor 4000 etc."}, - {"mcu_family": "NRF52840", "vid": "239a", "vid_vendor": "Adafruit", "score": 0.80, "reason": "Adafruit Bluefruit nRF52840"}, - {"mcu_family": "NRF52840", "vid": "1915", "vid_vendor": "Nordic Semiconductor ASA", "score": 0.65, "reason": "Nordic Semi reference / DK"}, - {"mcu_family": "NRF52832", "vid": "1915", "vid_vendor": "Nordic Semiconductor ASA", "score": 0.70, "reason": "Nordic Semi reference"}, + {"mcu_family": "NRF52840", "vid": "239a", "score": 0.80, "reason": "Adafruit Bluefruit nRF52840"}, + {"mcu_family": "NRF52840", "vid": "1915", "score": 0.65, "reason": "Nordic Semi reference / DK"}, + {"mcu_family": "NRF52832", "vid": "1915", "score": 0.70, "reason": "Nordic Semi reference"}, - {"mcu_family": "CH32V", "vid": "1a86", "vid_vendor": "QinHeng Electronics", "score": 0.90, "reason": "WCH-LinkE / native USB"}, - {"mcu_family": "CH32X", "vid": "1a86", "vid_vendor": "QinHeng Electronics", "score": 0.90, "reason": "WCH-LinkE / native USB"}, - {"mcu_family": "CH32L", "vid": "1a86", "vid_vendor": "QinHeng Electronics", "score": 0.85, "reason": "WCH-LinkE / native USB"}, + {"mcu_family": "CH32V", "vid": "1a86", "score": 0.90, "reason": "WCH-LinkE / native USB"}, + {"mcu_family": "CH32X", "vid": "1a86", "score": 0.90, "reason": "WCH-LinkE / native USB"}, + {"mcu_family": "CH32L", "vid": "1a86", "score": 0.85, "reason": "WCH-LinkE / native USB"}, - {"mcu_family": "MIMXRT", "vid": "16c0", "vid_vendor": "Van Ooijen Technische Informatica (PJRC)", "score": 0.80, "reason": "PJRC Teensy 4.x"}, - {"mcu_family": "MK64", "vid": "16c0", "vid_vendor": "Van Ooijen Technische Informatica (PJRC)", "score": 0.80, "reason": "PJRC Teensy 3.5"}, - {"mcu_family": "MK66", "vid": "16c0", "vid_vendor": "Van Ooijen Technische Informatica (PJRC)", "score": 0.80, "reason": "PJRC Teensy 3.6"}, - {"mcu_family": "MK20", "vid": "16c0", "vid_vendor": "Van Ooijen Technische Informatica (PJRC)", "score": 0.80, "reason": "PJRC Teensy 3.0/3.1/3.2"}, - {"mcu_family": "MKL26", "vid": "16c0", "vid_vendor": "Van Ooijen Technische Informatica (PJRC)", "score": 0.80, "reason": "PJRC Teensy LC"}, + {"mcu_family": "MIMXRT", "vid": "16c0", "score": 0.80, "reason": "PJRC Teensy 4.x"}, + {"mcu_family": "MK64", "vid": "16c0", "score": 0.80, "reason": "PJRC Teensy 3.5"}, + {"mcu_family": "MK66", "vid": "16c0", "score": 0.80, "reason": "PJRC Teensy 3.6"}, + {"mcu_family": "MK20", "vid": "16c0", "score": 0.80, "reason": "PJRC Teensy 3.0/3.1/3.2"}, + {"mcu_family": "MKL26", "vid": "16c0", "score": 0.80, "reason": "PJRC Teensy LC"}, - {"mcu_family": "LPC11", "vid": "1fc9", "vid_vendor": "NXP Semiconductors", "score": 0.65, "reason": "NXP LPC11U DAPLink"}, - {"mcu_family": "LPC8", "vid": "1fc9", "vid_vendor": "NXP Semiconductors", "score": 0.65, "reason": "NXP LPC8xx CMSIS-DAP / DAPLink"}, - {"mcu_family": "LPC8", "vid": "0d28", "vid_vendor": "Mbed (Arm)", "score": 0.55, "reason": "DAPLink CMSIS-DAP"}, + {"mcu_family": "LPC11", "vid": "1fc9", "score": 0.65, "reason": "NXP LPC11U DAPLink"}, + {"mcu_family": "LPC8", "vid": "1fc9", "score": 0.65, "reason": "NXP LPC8xx CMSIS-DAP / DAPLink"}, + {"mcu_family": "LPC8", "vid": "0d28", "score": 0.55, "reason": "DAPLink CMSIS-DAP"}, - {"mcu_family": "AM_APOLLO3", "vid": "1cbe", "vid_vendor": "Luminary Micro Inc. / SparkFun", "score": 0.70, "reason": "Sparkfun Apollo3 bootloader"}, + {"mcu_family": "AM_APOLLO3", "vid": "1cbe", "score": 0.70, "reason": "Sparkfun Apollo3 bootloader"}, - {"mcu_family": "MGM240P", "vid": "10c4", "vid_vendor": "Silicon Labs", "score": 0.65, "reason": "Silicon Labs CP210x"}, + {"mcu_family": "MGM240P", "vid": "10c4", "score": 0.65, "reason": "Silicon Labs CP210x"}, - {"mcu_family": "EFR32MG", "vid": "10c4", "vid_vendor": "Silicon Labs", "score": 0.65, "reason": "Silicon Labs CP210x"} + {"mcu_family": "EFR32MG", "vid": "10c4", "score": 0.65, "reason": "Silicon Labs CP210x"} ] diff --git a/online-data-tools/test_build_sqlite.py b/online-data-tools/test_build_sqlite.py index 0d65b78f..5ce6f79f 100644 --- a/online-data-tools/test_build_sqlite.py +++ b/online-data-tools/test_build_sqlite.py @@ -270,6 +270,10 @@ def test_mcu_to_vid_round_trip(built_db: Path, sample_mcu_to_vid: list[dict]) -> # --------------------------------------------------------------------------- # # The headline query: given a VID + PID, what board is most likely? +# LEFT JOIN on usb_vendor / usb_product because some real VIDs are missing +# from the public usb.ids text databases (see board_vid_guess in +# build_sqlite.py). We want the heuristic answer ("ESP32-S3 → 0x303a") +# to surface even when no friendly vendor name is available. QUERY_VID_PID_TO_BOARDS = """ SELECT b.id AS board_id, @@ -281,10 +285,10 @@ def test_mcu_to_vid_round_trip(built_db: Path, sample_mcu_to_vid: list[dict]) -> ( m.score + CASE WHEN p.pid IS NOT NULL THEN 0.25 ELSE 0.0 END - + CASE WHEN LOWER(b.vendor) = LOWER(v.vendor) THEN 0.10 ELSE 0.0 END + + CASE WHEN v.vendor IS NOT NULL AND LOWER(b.vendor) = LOWER(v.vendor) THEN 0.10 ELSE 0.0 END ) AS score FROM mcu_to_vid m -JOIN usb_vendor v +LEFT JOIN usb_vendor v ON v.vid = m.vid LEFT JOIN usb_product p ON p.vid = m.vid AND p.pid = ?2 @@ -333,16 +337,19 @@ def test_canned_query_vid_pid_to_boards_totally_unknown(built_db: Path) -> None: assert rows == [], "totally unknown VID:PID must return an empty set" -def test_mcu_to_vid_injects_missing_usb_vendor(tmp_path: Path) -> None: - """Regression for the production bug found on first dispatch: upstream - usb.ids text databases don't carry 0x303a (Espressif) or 0x2e8a - (Raspberry Pi). When mcu_to_vid references such a VID and carries a - `vid_vendor` field, build_db must inject the vendor so the - board_vid_guess join doesn't silently drop the most relevant rows. +def test_board_vid_guess_survives_missing_usb_vendor(tmp_path: Path) -> None: + """Regression: upstream usb.ids text databases (Rust usb-ids crate, + linux-usb.org, Fedora hwdata) don't carry 0x303a (Espressif) or + 0x2e8a (Raspberry Pi). The www workflow plugs that gap via the + gowdy.us tier-4 scraper (fetch_gowdy_supplement.py + overlay_usb_vid). + If for any reason the supplement is unavailable, the SQLite view must + still surface the heuristic answer with usb_vendor = NULL — never + silently drop the row. """ data = tmp_path / "data" data.mkdir() - # Upstream usb-vid.json is missing 0x303a entirely. + # Upstream usb-vid.json is missing 0x303a entirely (simulates the case + # where the gowdy.us tier-4 fetch failed this run). (data / "usb-vid.json").write_text(json.dumps({ "10c4": {"vendor": "Silicon Labs", "products": [["ea60", "CP210x"]]}, }), encoding="utf-8") @@ -356,11 +363,9 @@ def test_mcu_to_vid_injects_missing_usb_vendor(tmp_path: Path) -> None: }), encoding="utf-8") (data / "vendor_boards.json").write_text("{}", encoding="utf-8") (data / "mcu_to_vid.json").write_text(json.dumps([ - {"mcu_family": "ESP32S3", "vid": "303a", - "vid_vendor": "Espressif Systems", "score": 0.95, + {"mcu_family": "ESP32S3", "vid": "303a", "score": 0.95, "reason": "Espressif native USB"}, - {"mcu_family": "ESP32S3", "vid": "10c4", - "vid_vendor": "Silicon Labs", "score": 0.55, + {"mcu_family": "ESP32S3", "vid": "10c4", "score": 0.55, "reason": "CP210x bridge (legacy)"}, ]), encoding="utf-8") out = tmp_path / "regression.db" @@ -372,25 +377,18 @@ def test_mcu_to_vid_injects_missing_usb_vendor(tmp_path: Path) -> None: out_path = out, ) with sqlite3.connect(out) as conn: - # 0x303a was missing upstream → must be auto-injected. - row = conn.execute( - "SELECT vendor FROM usb_vendor WHERE vid = ?", (int("303a", 16),), - ).fetchone() - assert row is not None, "vid_vendor seed must inject missing 0x303a" - assert row[0] == "Espressif Systems" - # 0x10c4 was already present upstream — injection must NOT clobber. - row = conn.execute( - "SELECT vendor FROM usb_vendor WHERE vid = ?", (int("10c4", 16),), - ).fetchone() - assert row[0] == "Silicon Labs" - # board_vid_guess now includes the ESP32S3 → 0x303a row. + # board_vid_guess still yields the 0x303a row, just with NULL vendor. rows = conn.execute( - "SELECT vid, confidence FROM board_vid_guess " + "SELECT vid, usb_vendor, confidence FROM board_vid_guess " "WHERE board_id = 'esp32-s3-devkitc-1' " "ORDER BY confidence DESC" ).fetchall() - assert rows, "view must yield rows now that vendor is injected" + assert rows, "view must yield rows even when vendor is missing upstream" assert rows[0][0] == int("303a", 16), f"0x303a should rank first; got {rows}" + # The headline row's vendor name is None — UI renders as hyphen. + assert rows[0][1] is None + # The 0x10c4 fallback row also surfaces, with its real vendor name. + assert any(r[0] == int("10c4", 16) and r[1] == "Silicon Labs" for r in rows) def test_canned_query_vid_pid_to_boards_rp2040(built_db: Path) -> None: diff --git a/online-data-tools/test_gowdy.py b/online-data-tools/test_gowdy.py new file mode 100644 index 00000000..8b0753d3 --- /dev/null +++ b/online-data-tools/test_gowdy.py @@ -0,0 +1,180 @@ +#!/usr/bin/env -S uv run --no-project --with pytest --script +# /// script +# requires-python = ">=3.10" +# dependencies = ["pytest"] +# /// +"""Tests for fetch_gowdy_supplement + overlay_usb_vid. + +The HTML fixture in `parse_vendor_page` tests is captured verbatim from +https://usb-ids.gowdy.us/read/UD/303A (the 0x303A Espressif page) so the +parser regression is grounded in real production HTML, not a synthetic +sample. No network in the test suite — `fetch` is stubbed via a callable. +""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +import pytest + +HERE = Path(__file__).resolve().parent +sys.path.insert(0, str(HERE)) +import fetch_gowdy_supplement # noqa: E402 +import overlay_usb_vid # noqa: E402 + + +GOWDY_303A_HTML = """\ + +USB Devices +

+

Discussion

+

Name: Espressif Systems +

Bertold +

2021-11-10 14:17:35 +

+

Discuss +

+

Devices

+ +
0002 +
1001USB JTAG/serial debug unit +
4001 +
8293 +
+ +""" + +GOWDY_BARE_PAGE = """\ +

No such vendor

+""" + + +# --------------------------------------------------------------------------- # +# parse_vendor_page +# --------------------------------------------------------------------------- # + +def test_parse_vendor_extracts_name_and_pids() -> None: + vendor, products = fetch_gowdy_supplement.parse_vendor_page(GOWDY_303A_HTML) + assert vendor == "Espressif Systems" + pids = [p for p, _ in products] + assert pids == ["0002", "1001", "4001", "8293"] + # Product names are captured when present. + names = {p: n for p, n in products} + assert names["1001"] == "USB JTAG/serial debug unit" + assert names["0002"] == "" + + +def test_parse_vendor_page_with_no_vendor_returns_none() -> None: + vendor, products = fetch_gowdy_supplement.parse_vendor_page(GOWDY_BARE_PAGE) + assert vendor is None + assert products == [] + + +def test_parse_vendor_de_dupes_pids() -> None: + html = GOWDY_303A_HTML.replace( + '4001', + '40014001', + ) + _vendor, products = fetch_gowdy_supplement.parse_vendor_page(html) + pids = [p for p, _ in products] + assert pids.count("4001") == 1 + + +# --------------------------------------------------------------------------- # +# collect — wire parse + the existing-VID skip-set + the injected fetch +# --------------------------------------------------------------------------- # + +def test_collect_skips_existing_vids() -> None: + captured: list[str] = [] + + def fake_fetch(url: str) -> str: + captured.append(url) + return GOWDY_303A_HTML + + out = fetch_gowdy_supplement.collect( + vids=["303a", "10c4"], + existing={"10c4"}, # already in upstream — skip + fetch=fake_fetch, + ) + assert list(out.keys()) == ["303a"] + assert out["303a"]["vendor"] == "Espressif Systems" + assert len(out["303a"]["products"]) == 4 + assert captured == ["https://usb-ids.gowdy.us/read/UD/303A"] + + +def test_collect_swallows_per_vid_failures() -> None: + def flaky_fetch(url: str) -> str: + if "BAD0" in url: + raise RuntimeError("simulated network error") + return GOWDY_303A_HTML + + out = fetch_gowdy_supplement.collect( + vids=["303a", "BAD0"], existing=set(), fetch=flaky_fetch, + ) + assert list(out.keys()) == ["303a"], "good VID must still land despite a sibling failure" + + +def test_collect_skips_vendors_with_no_name_on_page() -> None: + def fetch(url: str) -> str: + return GOWDY_BARE_PAGE + + out = fetch_gowdy_supplement.collect( + vids=["beef"], existing=set(), fetch=fetch, + ) + assert out == {} + + +# --------------------------------------------------------------------------- # +# overlay_usb_vid +# --------------------------------------------------------------------------- # + +def test_overlay_adds_missing_vids_only() -> None: + upstream = { + "10c4": {"vendor": "Silicon Labs", "products": [["ea60", "CP210x"]]}, + } + supplement = { + "303a": {"vendor": "Espressif Systems", "products": [["4002", ""]]}, + "10c4": {"vendor": "WRONG NAME", "products": [["dead", "bad"]]}, # must NOT win + } + merged, added = overlay_usb_vid.overlay(upstream, supplement) + assert added == 1 + # Upstream wins for 10c4 — no merging of name OR products. + assert merged["10c4"]["vendor"] == "Silicon Labs" + assert merged["10c4"]["products"] == [["ea60", "CP210x"]] + # 303a is freshly added. + assert merged["303a"]["vendor"] == "Espressif Systems" + # Sorted by VID. + assert list(merged.keys()) == ["10c4", "303a"] + + +def test_overlay_does_not_mutate_input() -> None: + upstream = {"10c4": {"vendor": "Silicon Labs", "products": []}} + overlay_usb_vid.overlay(upstream, {"303a": {"vendor": "X", "products": []}}) + assert "303a" not in upstream + + +def test_overlay_main_emits_file(tmp_path: Path) -> None: + up = tmp_path / "upstream.json" + sup = tmp_path / "sup.json" + out = tmp_path / "out.json" + up.write_text(json.dumps({"10c4": {"vendor": "Silicon Labs", "products": []}}), + encoding="utf-8") + sup.write_text(json.dumps({"303a": {"vendor": "Espressif Systems", + "products": [["4002", ""]]}}), + encoding="utf-8") + sys.argv = [ + "overlay_usb_vid.py", + "--upstream", str(up), + "--supplement", str(sup), + "--out", str(out), + ] + rc = overlay_usb_vid.main() + assert rc == 0 + data = json.loads(out.read_text(encoding="utf-8")) + assert "303a" in data and data["303a"]["vendor"] == "Espressif Systems" + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-v"])) diff --git a/online-data-tools/www_static/app.js b/online-data-tools/www_static/app.js index d54b5926..54c9381c 100644 --- a/online-data-tools/www_static/app.js +++ b/online-data-tools/www_static/app.js @@ -83,10 +83,10 @@ ( m.score + CASE WHEN p.pid IS NOT NULL THEN 0.25 ELSE 0.0 END - + CASE WHEN LOWER(b.vendor) = LOWER(v.vendor) THEN 0.10 ELSE 0.0 END + + CASE WHEN v.vendor IS NOT NULL AND LOWER(b.vendor) = LOWER(v.vendor) THEN 0.10 ELSE 0.0 END ) AS score FROM mcu_to_vid m - JOIN usb_vendor v + LEFT JOIN usb_vendor v ON v.vid = m.vid LEFT JOIN usb_product p ON p.vid = m.vid AND p.pid = ?2 From 40ab5a00d371297e9dbd051f8046ea562337d5d4 Mon Sep 17 00:00:00 2001 From: zackees Date: Sat, 20 Jun 2026 19:34:00 -0700 Subject: [PATCH 5/6] feat(usb): embedded vendor archive replaces runtime usb-ids crate (#719) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires the curated-overlay-aware vendor map into fbuild-core via a compile-time-embedded tar.zst (~22 KB) instead of pulling vendor lookups from the runtime usb-ids crate snapshot. Key pieces: - online-data-tools/vendor_names_inlined.py: 253-entry {vid: vendor} literal, generated from the manually-triaged ids4.json (which traces back to a usb-ids.gowdy.us scrape -> ids2 -> ids3 dedupe pipeline, preserved in repo for audit). - online-data-tools/build_vendor_archive.py: packs the merged usb-vid.json's vendor map into a compact 'vid:name,...' TXT (with %- escaped commas/percents), wraps in tar, compresses to zstd lvl 19. - online-data-tools/overlay_usb_vid.py: new vendor-override mode so the curated inlined names WIN over the upstream text databases (linux- usb.org and friends still miss 0x303A Espressif / 0x2E8A Raspberry Pi). - update-data.yml: replaces the live gowdy.us scrape step with the inlined supplement; adds a tar.zst packaging step that commits usb-vendors.tar.zst alongside usb-vid.json on online-data. - crates/fbuild-core/src/usb/embedded.rs: lazy OnceLock-cached decompression + parse with a Rust mirror of the compact format, plus schema-version guard against forward incompatibility. - crates/fbuild-core/src/usb/resolver.rs: tier order reversed (overlay has full vendor+product now; embedded is vendor-only tier-1 fallback with synthetic 'Device 0xPPPP' product placeholder). Espressif resolution test pins the round-trip from curated overlay -> archive -> fbuild runtime. usb-ids Rust crate is intentionally KEPT in fbuild-core deps — used only by examples/dump_usb_ids.rs which the workflow runs as the tier-1 aggregator source for online-data. The runtime resolver no longer touches it. 61 Python tests + 15 fbuild-core::usb Rust tests passing locally. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/update-data.yml | 58 +- Cargo.lock | 2 + crates/fbuild-core/Cargo.toml | 11 +- crates/fbuild-core/data/README.md | 29 + crates/fbuild-core/data/usb-vendors.tar.zst | Bin 0 -> 22677 bytes crates/fbuild-core/src/usb/embedded.rs | 260 +++++++ crates/fbuild-core/src/usb/mod.rs | 22 +- crates/fbuild-core/src/usb/resolver.rs | 130 ++-- ids.json | 255 +++++++ ids.txt | 253 +++++++ ids2.json | 789 ++++++++++++++++++++ ids3.json | 761 +++++++++++++++++++ ids4.json | 255 +++++++ online-data-tools/build_vendor_archive.py | 186 +++++ online-data-tools/keep.py | 59 ++ online-data-tools/next_dual.py | 40 + online-data-tools/overlay_usb_vid.py | 54 +- online-data-tools/scrape_ids.py | 328 ++++++++ online-data-tools/test_archive.py | 148 ++++ online-data-tools/test_gowdy.py | 57 +- online-data-tools/test_inlined.py | 121 +++ online-data-tools/vendor_names_inlined.py | 330 ++++++++ 22 files changed, 4050 insertions(+), 98 deletions(-) create mode 100644 crates/fbuild-core/data/README.md create mode 100644 crates/fbuild-core/data/usb-vendors.tar.zst create mode 100644 crates/fbuild-core/src/usb/embedded.rs create mode 100644 ids.json create mode 100644 ids.txt create mode 100644 ids2.json create mode 100644 ids3.json create mode 100644 ids4.json create mode 100644 online-data-tools/build_vendor_archive.py create mode 100644 online-data-tools/keep.py create mode 100644 online-data-tools/next_dual.py create mode 100644 online-data-tools/scrape_ids.py create mode 100644 online-data-tools/test_archive.py create mode 100644 online-data-tools/test_inlined.py create mode 100644 online-data-tools/vendor_names_inlined.py diff --git a/.github/workflows/update-data.yml b/.github/workflows/update-data.yml index 0fa6fc2c..6f9c2449 100644 --- a/.github/workflows/update-data.yml +++ b/.github/workflows/update-data.yml @@ -237,36 +237,53 @@ jobs: --manifest-fragment-slim /tmp/fragments/vendor_boards.json # ──────────────────────────────────────────────────────────────────── - # Tier-4 USB-VID source: scrape usb-ids.gowdy.us for VIDs the public - # text databases (Rust crate, linux-usb.org, hwdata) don't yet carry. - # Notable holes today: 0x303A Espressif, 0x2E8A Raspberry Pi Foundation - # — both confirmed missing from upstreams as of 2026-06-21. Gowdy.us - # is the authoritative submission/discussion view; we scrape it only - # for VIDs referenced by `mcu_to_vid` so the request volume stays tiny - # and the data added is fully attributable (the URL is the audit trail). + # Tier-4 USB-VID source: inlined supplement curated from + # usb-ids.gowdy.us. The public text databases (Rust crate, + # linux-usb.org, hwdata) don't carry newer VIDs like 0x303A Espressif + # or 0x2E8A Raspberry Pi Foundation. The 253-entry overlay lives in + # online-data-tools/vendor_names_inlined.py — committed to main so + # the workflow is reproducible offline (no nightly live-scrape) and + # auditable (each entry traces back to ids.txt -> ids4.json). # ──────────────────────────────────────────────────────────────────── - - name: Fetch usb-ids.gowdy.us supplement (USB-VID tier-4) - id: fetch-gowdy + - name: Emit inlined vendor-name supplement (USB-VID tier-4) + id: emit-inlined continue-on-error: true if: steps.merge-usb.outcome == 'success' run: | uv run --no-project --script \ - "${{ github.workspace }}/online-data-tools/fetch_gowdy_supplement.py" \ - --mcu-to-vid "${{ github.workspace }}/online-data-tools/seed_mcu_to_vid.json" \ - --existing "${ONLINE_WORKTREE}/data/usb-vid.json" \ - --out /tmp/gowdy-supplement.json + "${{ github.workspace }}/online-data-tools/vendor_names_inlined.py" \ + --out /tmp/inlined-supplement.json - - name: Overlay gowdy supplement onto usb-vid.json - id: overlay-gowdy + - name: Overlay inlined supplement onto usb-vid.json + id: overlay-inlined continue-on-error: true - if: steps.fetch-gowdy.outcome == 'success' + if: steps.emit-inlined.outcome == 'success' + # vendor-override: the curated inlined names WIN over the upstream + # text databases. Upstream products lists are preserved untouched — + # only the vendor name field gets replaced for VIDs present in both. run: | uv run --no-project --script \ "${{ github.workspace }}/online-data-tools/overlay_usb_vid.py" \ --upstream "${ONLINE_WORKTREE}/data/usb-vid.json" \ - --supplement /tmp/gowdy-supplement.json \ - --out "${ONLINE_WORKTREE}/data/usb-vid.json" + --supplement /tmp/inlined-supplement.json \ + --out "${ONLINE_WORKTREE}/data/usb-vid.json" \ + --mode vendor-override + + - name: Package usb-vendors.tar.zst (embeddable into fbuild) + id: package-archive + continue-on-error: true + if: steps.overlay-inlined.outcome == 'success' + # Compact {vid: vendor} dict in tar.zst form. fbuild include_bytes!()s + # this at compile time so its USB-vendor lookup needs no runtime + # network access and no `usb-ids` Rust crate dependency. PID-level + # lookups live in the www-branch SQLite-over-HTTP DB. + run: | + uv run --no-project --script \ + "${{ github.workspace }}/online-data-tools/build_vendor_archive.py" \ + --upstream "${ONLINE_WORKTREE}/data/usb-vid.json" \ + --out "${ONLINE_WORKTREE}/data/usb-vendors.tar.zst" + ls -la "${ONLINE_WORKTREE}/data/usb-vendors.tar.zst" - name: Assemble manifest.json id: build-manifest @@ -362,8 +379,9 @@ jobs: echo "| usbids/usbids github | ${{ steps.fetch-github.outcome }} |" echo "| pio boards (platformio) | ${{ steps.dump-pio.outcome }} |" echo "| merge usb-vid | ${{ steps.merge-usb.outcome }} |" - echo "| fetch gowdy (tier-4) | ${{ steps.fetch-gowdy.outcome }} |" - echo "| overlay gowdy | ${{ steps.overlay-gowdy.outcome }} |" + echo "| emit inlined supplement | ${{ steps.emit-inlined.outcome }} |" + echo "| overlay inlined supplement | ${{ steps.overlay-inlined.outcome }} |" + echo "| package vendor archive | ${{ steps.package-archive.outcome }} |" echo "| merge pio-boards | ${{ steps.merge-pio.outcome }} |" echo "| build manifest | ${{ steps.build-manifest.outcome }} |" echo "| build sqlite (www) | ${{ steps.build-sqlite.outcome }} |" diff --git a/Cargo.lock b/Cargo.lock index dd2515a3..69c14eec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -970,11 +970,13 @@ dependencies = [ "serde", "serde_json", "sha2", + "tar", "tempfile", "thiserror 2.0.18", "tokio", "tracing", "usb-ids", + "zstd", ] [[package]] diff --git a/crates/fbuild-core/Cargo.toml b/crates/fbuild-core/Cargo.toml index ae451c04..f89393bb 100644 --- a/crates/fbuild-core/Cargo.toml +++ b/crates/fbuild-core/Cargo.toml @@ -12,8 +12,17 @@ tracing = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } sha2 = { workspace = true } -# Tier-1 USB VID:PID resolver — see `crate::usb`. +# Aggregator backend for the `online-data` workflow only: `examples/dump_usb_ids.rs` +# uses this to feed tier-1 into `online-data/data/usb-vid.json`. The fbuild +# runtime resolver no longer touches this crate — it goes through the +# compile-time-embedded `usb-vendors.tar.zst` archive instead (see +# `crate::usb::embedded`). usb-ids = { workspace = true } +# Embedded USB-vendor archive decompression + extraction at first use. +# Pulled in as workspace deps so other crates can share the same zstd / tar +# wire format without per-crate version drift. +zstd = { workspace = true } +tar = { workspace = true } # Process containment primitive (Job Objects on Windows; process groups + # PR_SET_PDEATHSIG on Linux; process groups on macOS). The single global # `ContainedProcessGroup` owned by the daemon ensures every child process diff --git a/crates/fbuild-core/data/README.md b/crates/fbuild-core/data/README.md new file mode 100644 index 00000000..ff55bfb4 --- /dev/null +++ b/crates/fbuild-core/data/README.md @@ -0,0 +1,29 @@ +# fbuild-core embedded data + +Binary blobs `include_bytes!`'d into `fbuild-core` at compile time. + +| File | Purpose | +| ----------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `usb-vendors.tar.zst` | USB Vendor-ID → vendor-name map. Produced by `online-data-tools/build_vendor_archive.py` from the merged `online-data/data/usb-vid.json` (which already incorporates the curated `vendor_names_inlined.py` overlay). See `crate::usb::embedded`. | + +## How to refresh the vendor archive + +The nightly `Update data` workflow on `main` produces a fresh +`usb-vendors.tar.zst` under `online-data/data/`. To bump the embedded +copy here (a deliberate manual step — see issue #718): + +```bash +# 1. Pull the latest from the online-data branch. +curl -sSLo crates/fbuild-core/data/usb-vendors.tar.zst \ + https://raw.githubusercontent.com/FastLED/fbuild/online-data/data/usb-vendors.tar.zst + +# 2. Run the fbuild-core tests to confirm the archive parses + the +# well-known entries still resolve. +soldr cargo test -p fbuild-core usb::embedded +``` + +`fbuild-core` will refuse to load the archive if its embedded +`manifest.json` reports a schema version newer than the consumer knows +about — bump `EMBEDDED_SCHEMA_VERSION` in `src/usb/embedded.rs` whenever +the archive format changes (in lock-step with +`online-data-tools/build_vendor_archive.py::SCHEMA_VERSION`). diff --git a/crates/fbuild-core/data/usb-vendors.tar.zst b/crates/fbuild-core/data/usb-vendors.tar.zst new file mode 100644 index 0000000000000000000000000000000000000000..633ce94dcc657daaebde520998febc2c398a0bfe GIT binary patch literal 22677 zcmV(tKqfg0<5X zjW%XO0ugg&o5K#4KIlK_Kj71QQIcp!A)X!9qKSWaPe^9bg@ zX?^UVTP;QXI4VqyEL{B8D`CC3;*H#SI;mIl`>F;`;f^i`#Jc0`_(Gz-{Y3BQ9*poOl5t+15?bw5bqnBr%GU0ngjygJ64~VsAe>?{?`2_KL*a)j_B?+ zZT^M!in?g)CuIKxvQFyWcwLhX%^e;Wov@Oi8%BDOTJ<`Zozc{L%e7y5aS$uXti|!h z3ga=L0H$1DAmLexhyLJ~|0bgSMoWMHZ}T6XAoF6)P7f5IE*_Ax`uJh$;RIWYgI4?$ zMDY`$gLtZMY3jm~7U#g4(m9E=A6)3$S9nT6PxO-_I`Mb^7f;jAt)l(0WeaWrldGR^*NH-MciPt{*Cv5>kEa-XQ68vym zUkk5ieDHn$K%qEtK)i1Gi!IOjXWYLr$gr*rtox>tOP9x#j34qtIvPQ+Ye$MGo@9(ls@q*XpTSoc+TP%C%pPxQBld76bj{^TFl& z=mA;3m#9A=rhL~^QvYkz>YtN!k@%1N4;WJa6UC8Y+yaL71z4lO0R`j40r}sw5NP0= z=xVeVUQ79E1FNh1)rM0`4#F}d=$4BV5+MBzgvP~q{3>_!?{h`_M8r99{qa0cAU)i8 zJ)ims9dRe|hxPPQySSFJTYe&D{E!#>mF)GvMGyTtq@98pT~U^@^;F4NYpFW*{zYq# zUZ-3?f>Q9a2@;Ew-VO1hb+kK;sAwNJX)S*6Py`NvHWV(pLT-rp;E}Gv3MQ$`;mA0L z)EL{mXwSTuVkdMh#)_^N-^%e<>(EsG5A8xvAkNiWKvD}%=!#R|I<6ne^HY3jwBr*~o_L>* zj{K5~#qieedqvaPZn3v1oWS>yjwXD$&NVb$oC8S1Ehc;LIl8hvJIQ+;(%jo&zH?QomaqR~mju>0eXeyGQ<$8+zS}iHG zsGZO)-*pp(u1&i3o?=BjaYS128^sYyXpfdI{_vtN`nfpFSUb}l4P-4G{g#@*u-AT; zxMcLMrLginm(V)Y#jG0(T89*G1I$CM{ux>a&kK6pfs#lKh7A3~Si3llqmVN{OHCd<2Zc2`kh)!hTx0gh4UnL3_)+^wG1syX!Z|3BCEfnpM&o;dHM2# z8G8pL!u81H=CBeH6cXWED_iJV?`QFQhMp9!xY2%LkFH$2J}PJ`a)o&m$Ao^Y!-6sv z=~yp*?g3e@{cJ;5ue*GM_5)!w<5{r(dr-7`iq~RDy;xIUPd`>bS#SPCJ3_?Qa=8JI7G8TA1JAIuyi35|9Qc5{LvC zEJT7+Kr|#6&=3O@YDEsQ&`I&{e@<9H9{i^`pjp?Cc+x;}ozYbDl5%~96+5{ay4Hb! zFb>$XYB;o}Q~fe#koY#Oc(vq1hT(soErVomG!!)v1W9B)In4q>LVkYpi@ECv!Bgw- z3YjSq3=+bi(3Mw3hrFBsTqO9zcS66DIMETsPzWE|$i))RimoUxQmZvJS^oM5=_kVW zvp-y})eO`iw+KWo8WI6t@&L)0;6hRh`n}ZI6oq;gEmo71@zIbYW(PlKXzzf_v;#5} z?}K9&!~B=mdb$tzQm#Tb^+nIPk})p7Bmf?1Vj9Phn5cmui2;CU5fQ?S>wO2r498!);wt&M?&FwKS87a=2encP-z==B?nV15T1u`N-Lo(2JT`LbhOy-ahU-gadRimtdT zYe^jOZ|F>h&C7-0#3;kXaPqTI!QIC2%b{3i*!-Y}51x~}d?tv{bhXd;#S$`Ka6*Yk>|b5(EoP| z4e{;G{Ihmhov4@~F_<6M|8P^2YnZ7yoCx7~WRZUS^XL_C&@_K#we<6u%57+Q)P$|a zKDZ4-OnGqWA3hST6h7z+`i+qE4dgDXw~)8hK3wWAJnV}Pp-Y_CYKSG1dFoNCr$<9x z%@};oI{Lw$zP(bXEC?q-JSvb(ot{u>{p@1`xRx5OZAkj{6bD#M>tnknF6)Sy5xYhE8=Qu6G*4(MYJ{y5BMpp_ZuSCq5*r^6&Hd7 z!u&??iaZn5>iUC}of1?n|9LjVI9t%Y+7i=r3Ki3&m0)q20tC(eT}_49k`MNvmF?*1 za}(-fwJ%YqCz$s`Z~Qh-G*mt3iBdY-osyX)Gycw}1EA+UwRYIj>B3n^S01JMZ7hNqA-Pt`Gz~pbNH#exBK_&ri4N$PPS3v~uX{2>Abb^zDd>-Ory;8%mNqR| zOHZ}kP!8|Y$4nFS@OzS^mt4d%l( z<#EB|z+%7adNHq@rTFZD zLAG|<6@3JQrs(J4=lVaaeMY(qb7Yt|_&ULnt{~2y=*pdPFXL*lbn=QTB6qnhu%>d^ zPq|K=`(Z=9rBFyVxyR^fzn>tI=lKlUqt$8j-}EPPuu$=WKe{F0AVKa<@n`Y)m_U3Q zg4XQxGHD!lNKaS?kq6uXkL2kz`5p3eT`8WQ@fm*Pi4%$a60qlK2pNJWGpq6a=P~q$ ztoCX-uQ(-4LZ^xNle&waCviR|sk(d>45PoF zl~|b~TL>e3eL;Jb@sresBRDQ1TcfJB@|jwGTC60G{)OL-PebQ<8nDNLbtUk${si-T z_YW*vgX7_?!&9P$t_*`0XO4{K!zv4AU-VbkE8It4;hF=#*uVZ@y`$JxKu2^%b=*SL z-Ji5q^V>UBu!t93Wkmk?CH(7Ku&xds4tx&&qNRU+%oha|P@unxoz>6)g*g#Y6>BXR z3PEfW^829-@_qv=+NIIOP!DL2vJ^NBJyTe#9gG{HcUhbZ{Nt;e0X|T6_NRKDCbTs-K3!T`8SV(KQ){IA1*{ zmH!yZqh!CAy4REAJ-F`Rvo2ab<_TAauId(0z)Pj1(L;RMtC6M9qG`(#NwS8eYE1T?uD0)1y7x zU6ZWFy}DPYQ+297zY^H4ZqYQXRGiVX!ZbwJIY1la39aJ#Uy$p!wH{PYc;NiuoUWK1 z(sbrZdJ%Lf3_n3jFWLwq|HQVA7DBWm%~tdX^v@zEKlBRW3*~Mt-Rt1@VF!9gs3351 zZ0g=X49AE|z!E+5M+|>d@cj5pmj{X#1V`J?)BB8rb+ry+MbDS6H5MN>A4%T5oD;r)c;TxVcFt%9B|c6b{8oA~iJJH>E-Q|r<6lF|JcC%^rx zX9EUeilbTRo~D8$O>I|H?Z=r2-Gb%C7c|8-uU(-a~d=ddoVPFnHj`S;+DZEJWa+Mm$5byLy%-j6d|>*_i( z=y{o41B1s?QY8NW_OP^!@!!={e62t4Xw~^KkCN-zhFBq)MkesgS0W0J0r5x00+Q*e zdD_5Jxyn>^;@UG+g@j5m1#4RbC9Kb{6B;Gv}eN2fm02ed-%0w8 zo;VS`&8sKcsea#p?b={m=&!jUmmnStwRQAZ4<|+>qGY!c$oqV&wx|4wB??mlH4Y{p zgo1BRCh&A?`1&b065;5R&oc8diD32H$LkOcJ-zqjAhm7{4pTUi`R<=-eg}7{k=?>C zc%gC|srMPzHk6Mah1v&75gYpa;!mG0T8TSj0WBh}t|GtC=Pu4#Xvl&YYEX#y;}l_G zOW?_i+XQj(l?fTb8TSx;(XQCM{8@s=X_ltA7$nwJ5$F8V=jhPN6Y?weh~?E+A~&s# zOpUi(#HgzN=_g)7$!{-X)4-(kXeGLa*3T0^Gj4!9xys*!5#)JY{UeJgSX}!YFVCzvkTX7cy*#SwpM$9C3l+=1w=G~0r#<=g2b?obLNW&f zUXYK2;r_*=4is4wAaqRF8R+rAI0apCq@97sL0$_eN^lAcaser68U%-- zdb$>?CCHg(XK9E`!AuWty!Jl`-877l5yES8B&^86&ka}Go!)Q2;!R$s($9~k$k~SG z^dx7hrq!fv8^ze6`S3!-@=|2i0eNshdX;{s>GOs}A*@Aa!O@XMic7rG{8oGi3boi= zK^-L)l>E+t9_-KwgImO_ag&tfu)}Unz>4>)DXlRC7 z9`qSi?uGx^6%NSCJCyiW@xXrC6g(nW)F8W-z{U5q0~G!9uXehy;4gttnE4w#2b{w3 zW$hcoOEpVDL6%VHDukSGcvVZ^N7aTcmWf2??C0vxW))WC@iy05DThMNbmmw0_KL#d z9mlcqz{tZ7Ye=+CeMO<)Lk#+ZaHjhSiB0r$M3=mYLi?#y>_gh8O$1TcA`65B2v!mO zb&+l}L}2bt2}9{kKc~n(3)H-Quk;!1f1h5j6UF(^^ET?#HLbZG)@@xvpZa@9rUs02RGEyo+9S)YiZ~S?Rf}~%8%xZc-*lWV@t7!&+DkxpC0bPHrj(DYSWh zg9e$Y>95VMNu1x}6Srwyd-SU;_;h_r`!SN$SqcP=oDduf*j$di)@vm9uTxtoGRZN{fhqFp_tu;BFHZ1BS5cxFy6{jO5b0mA*c{tK zGC0eTAQ{x%9s1#@yL}jQ&#L;TNYw$A0*V9jp%{v3JV`z*g#`^cUeUB?eMXdXATOks z>+F~Sr<)t8PV=}Q9PsMb-Qp4C5?=p%p(fP-6Z6<*Y%PrEwIrhuM2JYl#p~w@QEaOJ zUnu-Mq~7f6Oi%|3R2*=)>bkdwp}42E}HG~|wr`_M0?sc@nxVqTx!HC_ICd34cM z6c${MG#&ma^cP)GKY2_SB*Kw_1>}9`t?fDl_+Bjz2LM=r0SE_|?_!BS01!X`0Rl2S zARok)5RvsOPQyn8;6taMtOx`oLn{%00EvJA0s;bpuGRqo*%a3UG!6)dkRn9-p!Mz> zz@Td!gv1~u14KW-8z2!lvJE(Zg9a#If}jQvJPQEtApZfc@$e9f$HwrsY*W040ORUK z;T{49C$CEEiz4K8*Y$XfhkNo6V8#?#8ASWgwdgZ$cpkwNP3IBJYeTPAG&4^9oPj<~ z#V+gWr%o@~IuMBvm7yRD+C#DE?2(#83=mj4R79 zZULE4U`W|a(@UzYl|wLJkzMUo^BA-X!>bdkQEs< zk_2K6n4*%L@kEY*gnd#LF1 z&kaoxBCc?EYxi5hqDvu)s&%d@gsuGV62GW(q?!f#!G-SD_(UmuGT_^qlASU z7$NF;5IFZKZ(-1C0e4_A*gmX}uda|6S`4D$no#J0F1I~LpeD~!YF#LNmGa;d2WjP} zG=-J^$l(Cf9Qj8Hgu&`y*?`Lw3fe;>ne;dk2!_Nc7zq#u*3Vo>fY=#CRD0C7}MK}Hl3Q_fr%|9!lN7^ZgtfZIo zZN?%NLr+Zfhzorz1#ZCDK+%uWiv4n-9**_B@IoP4u?&xc9ftm_wZgg5pEGYA z(fd?YQ8R(!nG}z7WX1`SW@;!fXbSx^FW;!VSe;2JR$z)f=1ACt2Z={6VL3J|f_1a8 z6br~nh^d!3wnzeK^iB1NXJ1hKr8R&lI+ZIVP<{aN9f6lmav6C8=1|bEo{oxrY+&Cn z#WY11p=XQ~DwcBi$T|Vl*6QLA0z|tW=#MR&7oR7+od*`AAbWKW9jm5_-`Cj?|ef zh}||J^*S+=5(3SH@*@DlJ z%Y0S~RSb$=$EK)D+Uqt*q@&{$qd9rRAe+hsWB~}!UKtURhKjZXt0gGoFbh>Y?I|#t zT1NjoAH+8!z2)M>M+ypziK2wZ5FdWyIK$6+1ARX|KPDiw2?d1pup%6eqKHMf`ZMV2 z4)4t?5F)fn*C~W?#XkXBZ>JCx2H83)i6P_?D{0D~h7V!fPs9X&VTczt#zTr2$Z3S0 ze_tg2L>UxPmnCFw$LzURmnu=uf7P>t`V0^`I8K^{C0s&C{60mJF2ywfNlnq#$R6!j z+*lzB;vEMI;%aS(NHnfxKr&kQ6O8jf9N6J_kovvM^A&?~K|&ri(hr+?3+=f4>xcdMYfQWFGTAl`z#{M8d$y0S%rp((ryguTzHH!^HG(UZf8JF}a+=g32k4O$}=aI^hB zr?8R~4>Lp3E?7OMK#QuQhnnHnP*?>B`C{15zB5a5196Jz=2G~wYT*#7U&z_ITFzR; zAeLD2$&M;gr`OrB!-oQ77*wypuTRZdfn$I;_Jh|21RbDu}6lRx{kN?1B8sQAM|#9W3WjZ~lA5pXoiP1Y~C>4F$71 z)vA!t>Q>NP;14b}EsMZt*fIa_{cRrB)?97%Mfgg2gug=L6kj}d^-CBFA++636pz_@M#8u~^Fp z>%iF+Lq^UA3N>7oaZ&EW$38eOe3Te`PeG3x?5p@mQ z_#mV}(%cztr4kwsmKpt4!DBc8J0R7O36}l97kf%Ew9IsLHY3gwutW^mRW88=+$&X z8gr2u@u)<iIn$;p4B@rHH{HvAFncY74JB)#%xCbJP>hQ@=>>Z4S02h#h{vm&n zkH3iJ9F0)-hm2!so)FDeanHKG zYfG{-(|SR33QL>}FW%Lcv;@wYR_PuM!(8lfxS_xqqW$r>*^5i(dOmQoti!-PZVz1G zBBKfP>tu~|z?RZfRVQ>@ksd<+E!_CBG=YlVL7(*Xt`_ov{Ahp0P`yRKrm9isEIn!t z{QVwv>k!O|9128om%KsDeHV5e4SKP^O0<37CBG32`r&ZzDn+-}Yo zO}*+UCcrj!(pmE9t5lD(pvoWGdH7k9roiYrC*9Wz*}GG)qIN;!1cL=|GozvOlXgp% zAB>mQV3Ve9>xY!+%}zlXXa~&JnJHf{l8sUEXcyVlV;&{gU69P%RTIZWimtXfFtBMQ zXT)`>x;f6-4@B?;%F5`jB&Nng#~-n4FdoLNf^P*iNeOL8*+LF6Ir_$V$_O1|N+GQ| zSbeJx@1>94M+tz-iL8^{HJu^*2}<5TgRe3L>(%5DklCAmr$FI^FN9F*1VH!>2Dp?{NVQr|x+QcxsE|1s(F#T6%THtyeH zZ8%60L8!xe!uTh5ZeYRNv&qgVr2mRniRQGF7@8gqN*@J{_FN+PU!HdtCGGI!&8tR8MVZvr4b{wZ?Ii7n^e;H2I`~#*Zys62AF`(@OwJ2 zq+M`M3ozS9&e*g<0@mV_xB|SO> zxq}$w$)8Mw%TGnrNUy^xipZwx;D|q|x~`VFw&c+|-t%Gg`@7cu^kCZP^RXu0MmZZ` zWu((1;+3}FLS;c$m&Z?C{HZO?v(n51&vI=4lygx}Ff1H~*+|A%sp%^QidG4a=3HqY zC-MR*7YxiO0JGyD=tJFy#!np9TKNwfiVnEr;wt+E?H@JVfww~Aa;)rN1`%}lg}@kc z*lKTaEZUo_F-i4^n&;zrfK9l8(F-z!TEQnDhks*mvK(Yl4j}z;T`5ohjO+VCiiipU zghIO92Y2du&~Ut{2+tidZ9;s0!TYs)@TN=IubE&bRxXj$;S_3+x&|o_+Q`Yk^Ay=1 zW&q);uPmCFAqhO|MI?Z~2j*YjrB6kB9fLOj5#nAk+#$J-lMPa&fdRgC(pK*|*5f;S zd^AcN_R==hq7^(Rn5*5V&<={rdx;n2D2TT8w!T>K^K$dl!X>ep7LVxEQ!ddqFiEQO@kK ztJ`8#N;@V_na6zMdLSL>;R<3b(ppYbe|Gq64g6cszEGE(LT=g;v7JrMOWhF&;luIT z>L?KtT62c~IAB-;o#DBpu4kF{Bl!>~j}5^io4PdFLhW{xo0GL2uB&-%)rXmUy!D^1eQc&d6s0d4mz zH^H{fb5@7GLX|(w5O4BAF}$BPA0GBXx!?sZR2h&9=7qB~@7;s*m~C<@JO$#Lo~GI1 z<&VKXF!l)8UD|zm3q`$9yao&I6g~_$L<~C5?KElslmoON#w#84DuYqM|9R8PbsJGT zd^c$(jQ?E7Wm>ZGt35tfzN#lo;U9JZR92~Ub8lxA7I4zl(t)`zr{t};hX12bGY#`V zD&R8A8Epsm!bxyN>Ez&zHG558T?Oz2l(yR)=iTdE8#+(y!Zig%pbp!PRa6--vpTJ^ z0Pty_Ld5n0w_^2uHG~o#l{GKt@kP~aS$t2l!E0+aF`0tuyMbd~mmBH4ab;#6FpRNR z9IwyLKBJe2Ny>Xz&0>b0SM-k2lnPiL)ReSvECLCDJEnQ;rJiC|-UFNk-U0fV4?yx< z48eK#PB8Do&*`br(#=gI|7%UUv5U=rxV@`W)Hss zyzitzzaH+ML@H)rL*h$&Z4=If={Ot!QHZj;Ld-7}H#B)El_Qs}@GL^NSkEe!V)!lq z;|^TYJ!C-M3WV%rg;WTEFA6Noh)qWv%HvJ4AQlea6*W6*l{}n@6)J%tswi~5mYa8| znsb6R)<}I@t#5`b0PO5Ig$Tpf`F}yAoxwBxJFL|!N?^Xiqomqvu6JZn{7~`saqV|o z64=}_B+iM&LyjVV3J4{G;4agWZU6laV?j=iUz&XKx$MpgWVlYbWZihnbQg9r%Lgc3huxbi%Mmm4qjP>gLCdGfL4ZF|E3z zqu|EuiLu5Q9b3hwfl~mRdJiA|S{kcHh(fYBkS5gfnSrOX%Tqm~N95djDO&+h$^`4+ z1{E=F0r(HPc-q8ElIDg;B8?`s(S}Frc|qKQ*dV2~)|Y*UpHoP33_h1XWk`H3PN?ys zf%`I`^Omii#uoobo7A_SNAxfdydmQDE| z&{ro3(jvranhIYGR<~1nsaHtEJuuvW?QI(7SfD^gfs-|IE16d?9*#VPw+mQT&woPI zHWTazypp?3N>GXL?jYo;fQb~wtFZeMyV1(~5IYtZmuPa>AR*=Q-SXr*arCvR67my%?D|g<>tUCYzAJ@D?iJ1NiD`s` zG_@wNxKnYk>&o6ahMs~nMk$UnX7abgs9d;^Z)jg^@%;xmVSgYSjx!mfcS<}0vLI)w zgl-94irt^ZaCHFC6sGksM)@+;8Ujl{__e@)@mZJUk3Ue)x{pUwdSj0ui|28P35Wy? zPnQPVc|lKZR2N={=EkiAV_63>;8&A?fR*M2i%>&SRh{M>Pap81Nw#ynwiK;-pu07) z|BI{yQp8@eZzJd2p`r5%LhD>#FK{l1ueX?hOD`{S56(t(dZL~aRYDI4LEn?t@a$40>g7JyaJ&J&VC7 z^P=KGe2!O}N?3|l(w}wOWxhDk4Lb*iVaSh0jfp?Hd#0i#Y%FFtLQ%s0h)Cf14*Z$d zLcLC*`nlC=o%ra3{ zSGyf`PHJnwUVk*caZPN1LJ;}fP_G`k155hY1P4n_)&c76DFrBZ4_=+by+n{LC|&0L zR&*>kq-f=@w(%0WE2OriC4)MNYGAFVEH|{=<66gtv~{lXT_b}MB8Rg@2{0`TOJv;A zQg;G6y2e`(ZbU;%cTv6{}1nKuao zs$)H(eTHO3Owq^>j+Er$g!BBV&N-zcr#p(t2Srq1cuJ>R`7fR zmiymu`C7kuxpT5fC|0A~ShtY%0M=0GF;vvw{PgA^*CcoMY$Qr64mUuZtnO~9R<4+?vuH`hyv zWMPVOUFhp@E>KpqSEcVm9obV~vVgdPJ&gT`8Tr|U7EXO1^u_^cs~^U&m2kxCLc z98bwB1bx){zKqe*K002;DBU*o^ zpMcmNbf!y{JYSu|NAj0cW9fZM)JnO2Dajpmuw6N3HdSvH24z>S8YDVJ9owfrBP&{g z-n)XrC81LztXU5rSqBUOkaw&D8+ai(d*>ZWd11C$!{%CmKQF)mM9@PC=NMgpy3K=l z!~nN1K`p622j+uLr#3{=JdKT@z(~jCUo3~U6V*{wG`TL@1xNOt0|_RIJ0`=1U$V;C zcq^|q!sU$!n zqopN*jhslDSCEgeR)zlIFEq_ByutxJMXlz%2d-nu*?}%EF5B#gdZ=c6sXb`ycy0j- z^x%Di$6q9m10l)$Ekot{K(T`BN`4K@8o!^n|7a9ofGtd$u$zvsC#iHThQ+`3gGFap zLGGYKMco9NUcu2B zLEV|Eco9rx4$7cFBOFDVk0ec+PLDV^*WjKHg^a^N+o=C#mqn-x( zQim!{ldz8#*X{PbvYUmuDDUa@XX>n@>$d@eV(*l5h?6)tOJ8SW{#}_>_HBDT+P}y| z`yt25z^&g1O_ea@Bh-9tf|21!Wp;YyRZRBf3#~x;_)++K?B%6!J`aMSb?7(ApF2f$ zORQt9jA86nE++raeLc};5)(s)MeNnnJ1-v|xN&HBJS=PPU}%pa`bWq(80Ad?C}`!W zjdE;ULK~8RTv>sjH|6}3u>AWP9BJG{a9Z{@i5v;jbfQ>JWnhg#!_#%e^s!-a^uF7% z4>-|9HX!%|h9S4ic*C!no|yT01X}**N*nb0+vZ$o^Td;XU`%je2Mqy3Erq zRUWAbGr*Es9eWvK%Ft|8UBkfrDEi#G8;~NYfc3B8DUeQ)^nvCouwkYuj&3!>e+~;@%EqcT>REPHcs4N-m z@x2EmahHw2)CdOqsoagPq(edl&njSpZg*0jl&X7LuUs$UH{u#h59u+|C=;%~hFAzM zDJNQ~7M6a>4GEu}dJSE_%rK6=Y6)Grb2-!aYb~(Dqo~D0T1KQ1dv7GCJWx}`RvQeJ zcBy!l0&m&_KHyLDlND)#Wg_Ew=8BNDsUYzzptI`*3o*M~gq56=m!rX)O{ZB4>Y1oe z4u!Z{P1y&PGiD)pNiQbJx*LyWvWLT4C6sAPH%MzOjN{PJ;xxSYm*#4j?wuD#f2w0r zG_eh2#$@UuN`1C_?O&fF2AbuE*B^}}2 zu5N`s_<29Rxp)XRu89C%{bUx@{`0KH2uBi*5uw}OTBR$ItSHRr;Q<<0liK>wxvZR_ zA)sye`|`?q(MtPJ%wTeS6zrG%{y{+)2X+rSP-6nOm$}^Oy{|OvtOKMTUNzZ$E7GYUp?0QYHVCN^QM&Vs_&1saBdwB7(|Wy+VhzaPB`(i}x}S@2aIU20j|oH; ziJUD)mcwkq8?dOcmA_SD_qHssAPF56g1-}TZ;8EcM_lrvZ$1T>ndwvlr;dx4O*n-O z`}D+F8kES#f=S-+1Q^NVvLq_lGn}0nDCl8Y<=}sY-W!Z&Nt7yQY7u$%2Q7S2F`w^) zgTqTavXYw_aKVt0WZ#2QyB zP2jVLh2Pxz3WS$tTbKeGBF;Ec5tHg$^S57E?iZdyjja;LY=&7)7|O4l?MoMinRSG% z`<;0XHhH2X&6M!OAFJDj87Qw-kAFdV6dLK1Z&;fJX0&Y~7dr}jqWG>vKxY9_?+eWo zu6@Fi)L(B0Irt0G17J#$RNvnBr0qD06?xKSEs9$gACyeXIuu{rj>KXt_t8iFo)8y4 z(MKM_>jf7!{l%PyI+K8^qBZ_n2-A1K=FQ!y#^a9XM44&blJbHRENNvlR!CDzlXbq zQfo)80z3KePoZVGMH!ugtN?AQn1NWBtnM|#e_X?t+t?v7)e0G$U9+FMSj)h}iTOaS zcu&rbL8`i4zav>Cpm+TQmh_UnA(q<<&kfLzN$&9$PB(?w0Vs_IAzfy z$;W*t4=tn~gIh;c{R$;i2r~KvAG@v#09=b`uhG%z)Cq@ic#`R=yN)K7b*6=bE(y`P z{ERPpCLtkl+D`z17EnN+w9@A!>>F2WYWB~cC2RtI+hVbIqe3{T<`sSDOVFc;!=a~k zPmq}25eQvZUSMhB&d!WPnX)%T&V&PeH*`G(NHNlU%F{a=V`rfC*i%+T11R&MWXem=$&9Xr#xL6ACSP-*n`sl9HcQD6!VTo+t;?cnM9SRKHY zm753m+wLNzOSs7u44lvh^F$t+gCx<&BN3O(G~n$Wslcq63!4jJsf?(7#JJ48MyhosQ%-P56+5OF-GQB2^ zZ62*Miu-BTOc5f$XbkT7;i39z24mwzgw)=CeT+89^1SDh-G6sF4OLF*p=9$fMTdH{ z-w=o!SOfK*3*t)XhIzhtFj6(=@51`7i%n8A+Id%x@aJ8ut$x|Jw}3pe;3OgkKZct? zD3ZodG+8*Vo?eQtH|KD(5y(6r&Dw3_;Wh&zA+n$&s3+d#vPcL~C15^%PfJU$!ZX-Z zB+oOY3fcwddYG#8wapetZ06)*ftVP~dTbQrM4YS6eDl89!!>QCob0E_pFA}6pd2ZS?c8~ws*3Bqp^KAb zRO@lTWe3GJ3!k_>k&RMU{kL;Q|Ez3fNfwDY^TOsCEqUM%gDkT?Lc*;`4$9&(=Hp|N zQP+z9dA!pB)uYCfO(d?(K;E;hkO)-dQiUd|U=UI5HM4g_OiveK&<2eM#seNH!0VReMoyl782C_qqZ?6-+y9MJPJ;8i zoq<{4V{p86poQLB&I!`EtPj?J9Y~9u4FI|&I24slm`B}>F$5jxkAVbZqafipRM{<> zuqJg5x|{%E=3tR!o8%R|vF(+S(*>>|Fc~+SjF1}L1d1s9_{4_#&}ctpeD5goE$~^5 zh^I_)yI<=&&9i^ln$R;6bvx@`PnIwE_(wJ+RjG&b0ZW0oH6{562 zOYcO{9iCRZz@M(OfCF>>#PGGtEeKQmf}G4uh>&gyFk$gpEjKAYzx@71y>s{ zkb}OI!1{$1i)4C`RgrO&F}rWbH?UQ*-aOv-r2a`8Cen73WkY=F-EZ7=ZTlEu>@$n; zp)^~olVfU(7-H;Y$E4}H6`qxL>NBc%jk<{Ze?gY=4)BrrC=l$gJ}Ina(m~am8%VgH zykMJPi?_0Rh2&xG&GwvzHMV#;Z@`-2iQPOfG!yF+PaGeIVh62!1sO(2rj6eC!)S9r zj7RWkHcWx$N}1k&grZPASF+y2?HzqN)A3IF4B$sA?W1gQR&zjMxHb(MtJAwI!4BFP zxyCQcL5JkwhCa>R^;mn^#ta#+nd%EFXdOY3#?T1DS;v?0>NB0}{gMbCa5Ugo-j+@k z1V1(MiKnDU>h#e)d<;`9oJ+$5e$xe>WClggo34t)H>Hab3czcC>={JhI|%Fq2@K+K zqB@z%ySEXBLO+KG-I4Cchs9yD1>oG6vYcv0c8)Nw-KtD;j2G*7YrYg=vEgg2?ZAUS z_t78ylxH-a4FKvvvmn`5Ex~%MxxAh9tPbge_PHdFKb<*a$42mwR@fsS5$CYGdzn@a zq2$~ViIcyp5@8O}D)&@)THM|G3qiN3GZBwM+~$(KjLnTuGIbJSdT#O@sXVj%@FUHK zD8Lt3+BFx(e;`KIdMhLlWtx?gBRl6-Pu^~1x3jAR8YNP`SK`f7D@nh}RFE+$3 z)kn8GPq*9>*75{CWA^k^S`X~?D%JqE0*t4?sRG=RV%Rh&V=fVUgk=8G!Be#Kz)PiLA1nqSD}=J zXNX!nluu#6MOu{cb_ZJV*fpPd>on1Mp+D~Q=EU$fkdG5qZl_I$m;?ze*~>|>`aZI3 z)(Ce(>Of|v(_+sooOkxtu)tEk)=)oz0&p%RMRnPu2V}I^3Ku9Ne8UlqN%ukV9;^vS z3-Vy}2#4AIC5^sVqmhCSMkI|fTx4S3T{4YYl2jLE;m8x>uy@p+%N{fU>r_Fd!y9N~ zGn;VS8nj?ppH)J>Qj&K%B8h2fibDO}5qg5TTi>&2pmK(w4}6YBjwLG4ERC#zFlm*NHkNAo z;0!_eM!ceTCTy}WT31*d3&5?E!c1ITzBo5Ez|+j;qYWr@($Eh$G~_2TykOh>%>gXZ zO7&=g5)ESe;wMJ|$0YJxw2IU);(&~9pL*z|kFSLWsy8u$W2 z3XD021Dso7x@wfd%5t~n>N>A67P`2QOzRNSklM!GSTYW6gCsFZ`m3PIe=U!Ss(1+= zxjA-s$b_X%{&vmHSbLHx2Zwb95`gaK@3|*4d_Yu$OvE(&viX%Rr(PpPAdok!qp7zj zWMCzhvs6|%FTdc^Y*M-0U%dX#CaBC=kc|3Izxv9@m1R$oj*oN(C4<+5MTF#DA zvhRA4_7lm-!Dd(i@f3Ry+tab37PuXu* zhl6Vz>TH3&2|G<7F?^f*m~dWK(or8|N0L9#(H{YV7=7oyktjk10DNNwCsb8o%bVbp zJ0}a&fTkq3$hj3@0DlpEC*Nps|6ihaq)chsU&0^E$1IGlb-0|38A;h}^Pm5~Xs>yo zee_##EYHSK*mdQEJ2P!M_K}LU40Z~HV~X&kGbCp?%A$22-oJejx08s^68k|}##INN zcbZw0iko`f`7&1^IZ2gdmf<|?cSzb=I@i$2B?W0hpWyMG83R@Ff2To7Hzlo!@1|0; zqlDUr?if_@&ap#Sp@*dNz*(K~0>+dAS}!t>frA5bpm`sCzOT`mKR1Bm?Eu(Sl zuzBdf7$O?tGMcFBw&F~yPJ)KM8|a+K%9cn*S-U{kN!z7oYABhe+eI+QVKTppz;{uo zgbheLSRlH$u>JPQ(V@@Uonv#kydB?(DVDiYjHDSTTvjv39F6<_ODg5@<^C_+@)rF; zL-+3#)2*vA9kz3!wfKS5Vs}=IE{;Yhu}4t4c6tG|v(N1tz{74%wb0EdJLK=S+a)9Z<_fuPoM1Fw#aGM&j+!q%$PO`pk|e7%@S_;^>ljCJ|JM0Yg}#@bJ+{jGj8je+shL+hj72&A4<6v;S$f^_HR(SYif)_ zH*W32{|@*^pPeXPV?CK2AyI_?4QUa69Xb z@uY;4uy=qFV5CxOhWT~CN-SAXu&|?@$CRi6=z8Ms)1aEy1A4gBora!eNg%^3f9>R} zObyRo7}Qr82>$l81I&otk}=Y@Cli07>mNlDz!?57?>64X+_B0QdP|B1CKpnMx0G5> zCvSfs%6~QeQ;@7Vu(#&!8C;3GM6F{bLy3AQ5YIvDSFl+(&xK4hZh0}V$HA9}3s~Hh z$EPmWJC;xS0|Xvth!P0-6;+_M&~^0i^-$F#k@~uBIi^KO;Ara|A{f1-_bolrLcfU> zV)%`BKu6Y>6jvdwILTn641H>KzL7H=(Y}eQ)^4%`hGW9hxGlV3dF7^KhQR@r<918M>&|BD2PKW?q&(u2NG@uOQ$TjV;81o8KLm zeas7K82G+|bn3^l&~RXw4JjvPNB>Y?14)Oi9GLly597iE^DkFj^uxWxT-QA^m4>x$ z`TM()de>dT1uJ}jzxjQJUXkz{<&{M0^w)H4>Q7A$%$;C@uAHONtsp(ienvn{=u>o@ zWG;~ze4KOdVw@Yx6V#jvEZ2l-2tLU@0?CMi;_(YnFjH#kwyDGHp;-F<8q#uPH;BoC zp;7-XDt5tA^Z}Yl_Fr>Ln#}HB+;(%HLn${fwZ6e+8#vy&a8oQokMeK6rTjAX!RN~g z9dOik75H8^;5QL1*m9oycNiH+52fC1{F*JfBn(X#F`fn(Q(sX~y*xn^c-)_Ku;Skakf;_`BGG_G<#(U&9Idje>XafS|e z#+&)cyCFx`LocQa8>M45c&u{>+5yuDXF-H_TXRJ1tusP=PMwjN*=j&K+?XhKd) z(e{EJEL}`W<0u_i7BS_AU70a8C9ZSE3y}{}<1ihdIqg~y%~^F)Tw5|mqIJ#JiBO6A zy$3?p=-7>r4|c?bViL!X;FQaCM+3xDE5H_8_=o zF2!3ON13scAD=4>mix{jg!r|EdM#x{B-s=D|pt zcpC~z@YD1Od=x|e-LGVPc0B_rRr^9us6xvBOZDXUgZ%<+*Q_^xYhoh6mLQDb5qu0k z#oV^D^W{&>9~XKqYmfrV6D_cJT}_gpJS$Zv3V$_eGeJ08IjGfR{P@H^HkjF&jwP9j zLYqs6+lP$;JND-(BTKjG;wJcbv*Q5qYT46+z0hRVanZ^=0Bgy^r zEr1#p-p>bAWVQQ|D0j6+JN7Wkg<6l)C!uK}PC>?9e@YZW6NJencT!~l9GCl`NW!7}h7ZJh zIk_R{XX$B-vyEY}iCIsvQlnPc0QVb4O2G>oucCG-Rth&HA(C#f-)3C)6&77&b zsKJUt$m_w(3H|wtSk8lDu8Ki%KI^=pzD2Dtr45mW$n**31ia(425M=XV5^1A?E@CU z7(pq1&K24)aQ1w$9zl~F)Duy9v#YOe%BQ(6kT)%$39dq@c2Kdbn+Vq~;T92yHAm$@ zvqI0Y?^}DM`@rzMO-7|4Z$8Xh8EJq?rAo~=8jXaU$l1*~luu~nvtdUyu5M9dKvm$F z2yyDr;0zKG4hda|pb&TG>39GjUvGi{X)DiHLe+@}P;7!dLNVUMF_EeTj2KqU)6{?y zR%A;s3TQqZD0EN0cGAQ8AkeG1U)rM`N@Lh}2}Fv|0EZs*VLc_e00~cc6JTe1e^hR|S^q&*&^&wE1S$^3(hAQv!2py-UL ztd|RB4(|b*aJ2kJpeZA{n<)SoIm#jGfbist5{e6MOhs2~S(NCR)+Wi5L@RQHJGnI= zLu{0eGJ_Wb35hV)Fu75K_>`vuR{3}q3baSt;Ff!sTo%jBRbPObNUdOAVyIH zMGDT11P?UuD!wwOz>_=$087J?;b%itUbz6TymfcdWRTP^gK2_!Lejw+Z`Exg^Qv*<&!JdCoWQKulNY-Ii-PiYb^UP+(>8=KH5D5 zzV^gH=#nv@h|`r)&@oq`mEm(G=e4D+O$VR932Ba)h(+KA7`;IDt5GXMNdeVOIG#xS zN%eL?#PAZLSFzRyi|YtuW0=Ispu(D)7SY9VdBr)O+o;SL0iTdbuD=c6HsARiJ&e#d ztsXphZT4{3ty)`yc<{$#F9f0$6X$)KB2_ViXT&c-jzAIJWQ3OqmjdGR&v8?|$niY= z@C3WX_f`2G)Q=B~qA+YmGwR!Uq-HS5YhWJ#rUGVPLZP^S)(bUQI2C(X&mbX5@V#d# z2Jo78sex-Uzxcv|cH#`NNLsggXY39qT+VN4j~~wN$%?`Ok~!FFIOivrH`@`@EZ60b z7Tv@u7y9fKX@kirGM95mCiDLw&MiJAg2~)EvB3rttIGRiJ?+Ru37A@3Jrd}%=s`&U zK1j+X0rxU6D{=EoN&m>NLYeqi>VK8=w*P-{M5QpYScAEO%$yI)!cRE9FM*6mhz)-< zUpBZJ7BVN69s@}5o`bcaUz$>5BKFV>$Jp<>yLe&Uw+~Gf#ivZ>oM>~;2$H9 zyLYjj61YJ%+{A9_KLahVrV~3hwT*kCV+?7Fljm9X*LXZ_HbbOgVo)aI<({yx#J z_lbd^Fu)WP%)a;L=ipPjaOGc72(7NpUIt9@p)o?@o2tT&Xc`J@d13>{&qp8H^cEFg zSS`kGJ?B)^%lt|tp;XY#9cqqYeE=0kG2<2 z-gPQw%*b}akdztqHcI7ME(R<09mW(a&Bsq;i3HFfwv#0ZK@^rHBF?Ke+r)RgE?!2luNEc21yG1{^ zlM5Rzuny{rKasg6(Gmko?N~BfITuIbF}fDN`51r{C{^7pTZsBDz5-eENo`^1??n_P z2D&^+y0(EgDCai*?VZz8g0tS25x6hPzk$|8FoHtY)5}GE!Y$8`|KTJ@;Q9o=_ z$lCfQqGuEQ@62%2LI1K+h?O?u46yz;%HH*==?Zv(-GsIUMF`j@f4Tez!t`y2p|Ck_ zkWio4Bvbw{o3HZDDj}zn@ut%uz=qNlze!1dLneMMW39%LsJvWPoN>egO(Dq7F%eGT zCO>IVlOPWbT=4~>#M@@cWwjf4vk0dZ+~^R31}BhSaGTL0aw$&VBCDe!aLx8nL1g>J zstSCJ%*{sZ{R7}Q@l8;~LSRO4wzj|^Qh(;c4dNz*Mr3_8kqPEi4K}w33AnTeiu|Rt zQD9}z(a4LlJLnUXJ0GctwFFnc7ku8Zhhh4uth~y98yPHRD;;FnH`~XvRyLqHVP@oo z6&U@NK`6kBaE@z}9J%w;a)~W55CdDzm+I+pY60w^0$e(D?Dl-!2}E)2iBSx2J~{84 zqB%kxhNRTUdIvpDCP&zH7&aaze}{mYh!fc_-9gJC0LNPSB8{;C6(GL=?y>sJK!}5a zQ;b}ochqY_5wB%~oKAy5Gb$Ls=B26sX)#8LTKpk!*jKLaD?PRRT`Y@73i~e&hCkYd z1}M=Nik*=nlbD`*`pfzr^8TSLOl(T#RBfusp(%|!2Qg`!cvnZR!64}0*cxrHiP>KD z&T2X&>)7(-=m18A{(=4XomX(vQ|n!DMzkz58%TVqUSO&eT-LO@RwE56=Ch3x{ohIk ztM8hvDEy*O2d_^v@`P?PV{nPpp$`t6YU|{|aRLgfu#$LT4)|)Hq9qxxawW866Qk1M z&|hms*1l+NVBDXldZhN#!cZ%~Tb;(5AK`4AOKi*-bb3N(5Fn{1by$Jj{cYG6LOT0W(@M&7<&<>?9pWWv`>;2!n^LC`}Q#-((9S(blMjwk$R~ z{5U#^7(kjs-O1Emc>&cr39>kjvj`%Tb_GN@$*rm;yZoh{h$ulBb-=>rKw>CpNnm!s z?Abw>+?m73hBu{%Ww*e{QFHdLi9}L18F}?JO&@jdsmRoq){zbUJre{S$Xml^?P6;P z&!v#oO97`0sI#SV(5WxOX|aVR*Jm#wADM-{j9DlDraUc%)~QrZ`~3Mme)qEYXeNa* zF-+%>E$NY%FrX!_1?{1;RiUf8zNSx9N&iL3NI z6aN>(P~XB6HV2Ob*OR3T?GW~`KWoZHXQSx|VvCa^W8*FNtx;~f8G;tNbV+v!j%T@t z;j0+SW*pWA&>Vs+<`wwwBvKdw>M1#;<9#@un>Ko5SC{i7KF{N;hK)K0Wqc^ zeBgX&HlTb{b(OM3O+3;%A$F|+vO~x(*Q_k{kTZ`+t1G5k6{sT%+p>p{wS8v{D4MZ% z1o70D_k$|nYqajLrGr%NDG?7Uy@9VxO7ayHMF1O{7@bMhE9!6AF!1aNT(E)xRDryk zTgDmI&lr+Qr*OPCqXMNj^vVC64Z!O}Ddii8L^jZCbk}J?DR51Ofbpo0M8RYY{{KQ$ z$9=n%yHJO>ZaK1iX$WOO7S4x45oVI~M@TLfT2fAG9f8iqa6L8zBKnO9$COt5Yw`dF zm0+RN+XpKA*b>fv0Db2s}qGd^A%bRKBvCCVDIp?f$*MhX6Fp7-eZc^R*hmBI7y= zsxf1fC`##cl`*7)4dxN!&TDBN(DT)4NX3;?&_`{7#+6&lsPKRd$i036`go;GE13$!<7fwGFg|!% zZDzVFSFhrG9iMvKF3WX1Ob3*fwvMc4n2@wHw{g&@v9F3#XLiE5eY}*uO19axMLf^L zOioBb6vHxW=%fH0@q}KX5wZi6<%pk}zF_;XBYan0&>&)q7iPU( z0KZ9Ep)x&@Gu~&A>J+I77{an`HHD9vR<2{GKaaF~vC_4wpr|=vf zcBhEwyIiHMTYB-^4=73*T-F&sI?T@6ifr~6=GZg3SXLf0)lu@Xb#A5AE_|X~ENC;i z`THub8=M|G6sSs2Vx&}P@KN{iDrifB{$LE!o-Clr{Z*%2vfcIe2f2tH z^@)m78T@UM5Hb-USEV~_C|9E^&-3n?9yIpOXYk~?DURTC8}!14C)Zf7I+kow9*~ay zR|$n^jKo|To$S(8u+creP#&AL3FpXSha5weNj@@h8&FKCWH#*NVe2Bvw1t9s@}9(P zkpxQJKxom{U28guWQDwy_b4BezS>T)2I$4{*6NRy`V(m#$cA0^;eYPv5VHu|s%(^3 zgjBRAdp?onoMx&G@=H-uD!v8Xuy7ZvBZo0j^iVdDA zB}Wg9GF6G)4g#7hSO67}q#pycp%?T$!rag|W-xyhYBv|sYg$Dp5a>Y2%|SFz?b`_W Uyu5y<-^dX*T}OE42>PS?1L@V5_y7O^ literal 0 HcmV?d00001 diff --git a/crates/fbuild-core/src/usb/embedded.rs b/crates/fbuild-core/src/usb/embedded.rs new file mode 100644 index 00000000..01dff7d9 --- /dev/null +++ b/crates/fbuild-core/src/usb/embedded.rs @@ -0,0 +1,260 @@ +//! Compile-time-embedded USB VID → vendor-name map. +//! +//! Replaces the runtime dependency on the `usb-ids` Rust crate. The blob is +//! produced by `online-data-tools/build_vendor_archive.py` and lives at +//! `crates/fbuild-core/data/usb-vendors.tar.zst`. See that script + the +//! `data/README.md` for the refresh workflow. +//! +//! Compact format inside the tar (`usb-vendors.txt`): +//! ```text +//! vid:vendor,vid:vendor,... +//! ``` +//! where `vid` is 4-hex-digit lowercase and `vendor` has `,` and `%` +//! escaped per RFC 3986. See `parse_compact` for the inflater and +//! `online-data-tools/build_vendor_archive.py::pack_compact` for the +//! producer counterpart. +//! +//! Lookup is `O(1)` after the first call: the tar is decompressed + +//! parsed exactly once into a `HashMap` behind a `OnceLock`. +//! Decompression cost is paid lazily — callers that never touch USB +//! resolution don't pay it at all. + +use std::collections::HashMap; +use std::io::Read; +use std::sync::OnceLock; + +/// Lock-step with `build_vendor_archive.py::SCHEMA_VERSION`. Bump both +/// sides whenever the archive layout changes; the consumer refuses to +/// load an archive whose schema is newer than this constant. +pub const EMBEDDED_SCHEMA_VERSION: u64 = 2; + +const RAW_ARCHIVE: &[u8] = include_bytes!("../../data/usb-vendors.tar.zst"); + +static VENDOR_MAP: OnceLock> = OnceLock::new(); + +/// Look up the vendor name for a USB VID. Returns `None` if the embedded +/// archive doesn't carry that VID — callers should fall through to the +/// online overlay (`usb::data::lookup`) before reporting "unknown". +pub fn vendor_name(vid: u16) -> Option<&'static str> { + VENDOR_MAP + .get_or_init(load_or_panic) + .get(&vid) + .map(|s| s.as_str()) +} + +/// Number of vendor entries in the embedded archive. Mostly useful in +/// tests to detect accidental truncation. +pub fn embedded_vendor_count() -> usize { + VENDOR_MAP.get_or_init(load_or_panic).len() +} + +fn load_or_panic() -> HashMap { + match load() { + Ok(m) => m, + Err(e) => { + // A corrupt embedded archive is a build-config bug, not a + // runtime condition we can recover from. Panicking here surfaces + // it loudly the first time anything in fbuild touches a USB + // device rather than silently degrading to "unknown vendor". + panic!("fbuild-core: embedded usb-vendors.tar.zst is unusable: {e}"); + } + } +} + +#[derive(Debug)] +enum LoadError { + Zstd(String), + Tar(String), + MissingPayload, + SchemaTooNew { found: u64, max: u64 }, + BadManifest(String), +} + +impl std::fmt::Display for LoadError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Zstd(s) => write!(f, "zstd decompress failed: {s}"), + Self::Tar(s) => write!(f, "tar extract failed: {s}"), + Self::MissingPayload => f.write_str("archive missing usb-vendors.txt"), + Self::SchemaTooNew { found, max } => write!( + f, + "embedded archive schema_version={found} exceeds consumer max={max}; \ + bump EMBEDDED_SCHEMA_VERSION in fbuild-core::usb::embedded after \ + confirming the consumer supports the new format" + ), + Self::BadManifest(s) => write!(f, "manifest.json invalid: {s}"), + } + } +} + +fn load() -> Result, LoadError> { + let mut decoded = Vec::with_capacity(RAW_ARCHIVE.len() * 8); + zstd::stream::copy_decode(RAW_ARCHIVE, &mut decoded) + .map_err(|e| LoadError::Zstd(e.to_string()))?; + + let mut payload: Option = None; + let mut manifest: Option = None; + let mut archive = tar::Archive::new(decoded.as_slice()); + for entry in archive.entries().map_err(|e| LoadError::Tar(e.to_string()))? { + let mut entry = entry.map_err(|e| LoadError::Tar(e.to_string()))?; + let path = entry + .path() + .map_err(|e| LoadError::Tar(e.to_string()))? + .to_string_lossy() + .into_owned(); + let mut buf = String::new(); + entry + .read_to_string(&mut buf) + .map_err(|e| LoadError::Tar(e.to_string()))?; + match path.as_str() { + "usb-vendors.txt" => payload = Some(buf), + "manifest.json" => manifest = Some(buf), + _ => {} // forward-compat — ignore unknown extras + } + } + + if let Some(m) = manifest { + let parsed: serde_json::Value = + serde_json::from_str(&m).map_err(|e| LoadError::BadManifest(e.to_string()))?; + let v = parsed + .get("schema_version") + .and_then(|x| x.as_u64()) + .ok_or_else(|| LoadError::BadManifest("schema_version missing".into()))?; + if v > EMBEDDED_SCHEMA_VERSION { + return Err(LoadError::SchemaTooNew { + found: v, + max: EMBEDDED_SCHEMA_VERSION, + }); + } + } + + let payload = payload.ok_or(LoadError::MissingPayload)?; + Ok(parse_compact(&payload)) +} + +/// Parse the compact `vid:name,vid:name,...` format into a lookup table. +/// Mirror of `build_vendor_archive.py::parse_compact` — keep in sync. +fn parse_compact(s: &str) -> HashMap { + let mut out = HashMap::new(); + for chunk in s.split(',') { + if chunk.is_empty() { + continue; + } + let Some((vid_hex, name_esc)) = chunk.split_once(':') else { + continue; + }; + let Ok(vid) = u16::from_str_radix(vid_hex, 16) else { + continue; + }; + out.insert(vid, unescape(name_esc)); + } + out +} + +fn unescape(s: &str) -> String { + // Inverse of `_ESCAPE_RE` in build_vendor_archive.py. The producer only + // ever emits ASCII `%XX` escapes (for `,` and `%`); we intentionally do + // NOT decode multi-byte `%XX` runs here because that would require + // assembling UTF-8 byte sequences and the producer never generates + // them anyway — non-ASCII characters always pass through as raw UTF-8. + let mut out = String::with_capacity(s.len()); + let bytes = s.as_bytes(); + let mut i = 0; + while i < bytes.len() { + if bytes[i] == b'%' && i + 2 < bytes.len() { + if let (Some(hi), Some(lo)) = + (hex_nibble(bytes[i + 1]), hex_nibble(bytes[i + 2])) + { + let byte = hi * 16 + lo; + if byte < 0x80 { + out.push(byte as char); + i += 3; + continue; + } + // 0x80..=0xFF: leave the `%XX` as a literal — see comment. + } + } + // Step by one UTF-8 char so multi-byte sequences stay intact. + let ch_start = i; + let mut ch_end = i + 1; + while ch_end < bytes.len() && (bytes[ch_end] & 0xC0) == 0x80 { + ch_end += 1; + } + out.push_str(&s[ch_start..ch_end]); + i = ch_end; + } + out +} + +fn hex_nibble(b: u8) -> Option { + match b { + b'0'..=b'9' => Some(b - b'0'), + b'a'..=b'f' => Some(b - b'a' + 10), + b'A'..=b'F' => Some(b - b'A' + 10), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn embedded_archive_loads_and_parses() { + let n = embedded_vendor_count(); + assert!( + n > 500, + "embedded archive looks truncated: only {n} vendor entries" + ); + } + + #[test] + fn embedded_resolves_well_known_vids() { + // These are the headline VIDs the curated overlay was created to + // ensure — see issue FastLED/fbuild#718. If they vanish, the www + // page's headline "what board is this VID:PID?" query degrades. + // These need to be substrings the canonical upstream `usb.ids` + // text database actually emits (since vendor-override mode does + // not REPLACE names the upstream already has — see overlay + // mode semantics in online-data-tools/overlay_usb_vid.py). VIDs + // 0x303a and 0x2e8a are the ones the inlined supplement contributes. + for (vid, expected_substr) in [ + (0x303a_u16, "Espressif"), // inlined supplement only + (0x2e8a, "Raspberry Pi"), // inlined supplement only + (0x0403, "Future Technology"), // upstream + (0x10c4, "Silicon Lab"), // upstream may say "Cygnal" + (0x1a86, "QinHeng"), // upstream + (0x16c0, "Van Ooijen Technische"), // PJRC/Teensy via VOTI alloc + ] { + let name = vendor_name(vid).unwrap_or_else(|| { + panic!("embedded archive missing vendor for VID 0x{vid:04X}") + }); + assert!( + name.to_lowercase().contains(&expected_substr.to_lowercase()), + "VID 0x{vid:04X}: expected substring {expected_substr:?}, got {name:?}" + ); + } + } + + #[test] + fn unknown_vid_returns_none() { + // 0xBADD is in the unallocated portion of the USB-IF range as of + // the 2026 snapshot. If a future archive picks it up the test + // can move to another reserved range. + assert!(vendor_name(0xBADD).is_none(), + "0xBADD unexpectedly present: {:?}", vendor_name(0xBADD)); + } + + #[test] + fn parse_compact_handles_escapes_and_unicode() { + // The producer only ever escapes `,` and `%` (both ASCII). Non-ASCII + // text passes through as raw UTF-8 — we verify both round-trip. + let s = "0001:plain,0002:has%2Ccomma,0003:has%25percent,0004:em\u{2014}dash"; + let m = parse_compact(s); + assert_eq!(m.get(&1).map(|s| s.as_str()), Some("plain")); + assert_eq!(m.get(&2).map(|s| s.as_str()), Some("has,comma")); + assert_eq!(m.get(&3).map(|s| s.as_str()), Some("has%percent")); + let v = m.get(&4).expect("vid 4 missing"); + assert!(v.contains('—'), "missing em-dash: {v:?}"); + } +} diff --git a/crates/fbuild-core/src/usb/mod.rs b/crates/fbuild-core/src/usb/mod.rs index 6669d240..56733eae 100644 --- a/crates/fbuild-core/src/usb/mod.rs +++ b/crates/fbuild-core/src/usb/mod.rs @@ -2,15 +2,17 @@ //! //! Three resolution tiers, queried in order: //! -//! 1. **Bundled** — the [`usb-ids`](https://crates.io/crates/usb-ids) crate, -//! compiled in at build time as a `phf` perfect-hash table. Zero IO, zero -//! allocations for the lookup itself. Tracks the upstream -//! `linux-usb.org` snapshot the crate was published against. -//! 2. **Online overlay** — an optional `{ "VVVV:PPPP": {vendor, product} }` -//! JSON map loaded at runtime (typically from a daemon-managed cache file -//! that mirrors the `online-data` branch of this repo). The overlay -//! provides newly-assigned VID/PID pairs that the bundled snapshot -//! doesn't yet know about. +//! 1. **Online overlay** — an optional `{ "VVVV:PPPP": {vendor, product} }` +//! JSON map loaded at runtime (typically from a daemon-managed cache +//! file that mirrors the `online-data` branch of this repo). This is +//! the richest source — it has both vendor AND product names — and is +//! queried first. +//! 2. **Embedded vendor archive** — a 22 KB `tar.zst` blob compiled in +//! via `include_bytes!` (see [`embedded`]). Vendor names only — for +//! VIDs the overlay doesn't carry, we resolve the vendor offline and +//! synthesize `"Device 0xPPPP"` as the product placeholder. Per-PID +//! detail is intentionally not bundled — clients can hit the +//! SQLite-over-HTTP database on the `www` branch for that. //! 3. **Fallback** — synthetic `"Unknown vendor 0xVVVV"` placeholder so //! callers can always print something deterministic. //! @@ -25,7 +27,9 @@ //! branch — see [`MANIFEST_URL`] and [`USB_VID_JSON_URL`]. pub mod data; +pub mod embedded; pub mod resolver; pub use data::{install_online_cache, MANIFEST_URL, USB_VID_JSON_URL}; +pub use embedded::vendor_name as embedded_vendor_name; pub use resolver::{pretty, resolve, resolve_bundled, try_resolve, UsbInfo}; diff --git a/crates/fbuild-core/src/usb/resolver.rs b/crates/fbuild-core/src/usb/resolver.rs index 5c3d917d..f6ff7dd0 100644 --- a/crates/fbuild-core/src/usb/resolver.rs +++ b/crates/fbuild-core/src/usb/resolver.rs @@ -3,6 +3,8 @@ use serde::{Deserialize, Serialize}; +use super::embedded; + /// Resolved USB device identity. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct UsbInfo { @@ -12,7 +14,8 @@ pub struct UsbInfo { /// Best-effort lookup. Never returns `None`: a synthetic /// `"Unknown vendor 0xVVVV"` / `"Unknown product 0xPPPP"` is produced -/// when both tier-1 (bundled) and tier-2 (online overlay) miss. +/// when both tier-1 (embedded vendor archive) and tier-2 (online overlay) +/// miss. pub fn resolve(vid: u16, pid: u16) -> UsbInfo { try_resolve(vid, pid).unwrap_or_else(|| UsbInfo { vendor: format!("Unknown vendor 0x{vid:04X}"), @@ -21,18 +24,31 @@ pub fn resolve(vid: u16, pid: u16) -> UsbInfo { } /// Tier-1 + tier-2 only. Returns `None` if neither knows this pair. +/// +/// Tier order is reversed from the old `usb-ids`-backed implementation: +/// the online overlay carries the full `{vendor, product}` aggregate +/// (it ingests the bundled Rust crate dump on the `online-data` branch +/// at workflow time), while the embedded vendor archive is intentionally +/// vendor-name-only. We consult the overlay first because it has more +/// information; we only fall through to the embedded archive when the +/// overlay misses the VID entirely. pub fn try_resolve(vid: u16, pid: u16) -> Option { - resolve_bundled(vid, pid).or_else(|| super::data::lookup(vid, pid)) + if let Some(info) = super::data::lookup(vid, pid) { + return Some(info); + } + resolve_bundled(vid, pid) } -/// Tier-1 only (the bundled `usb-ids` crate). Use when callers need to -/// distinguish "the offline snapshot knows this" from "we had to fall -/// through to the online overlay" — diagnostics, attribution, etc. +/// Tier-1 only (the compile-time-embedded vendor archive). The embedded +/// archive carries vendor names only — see +/// `crates/fbuild-core/data/usb-vendors.tar.zst`. For VIDs present in the +/// archive, the returned `UsbInfo.product` is a synthetic `"Device 0xPPPP"` +/// placeholder since per-PID resolution lives in the runtime overlay +/// (tier-2) and the www-branch SQLite-over-HTTP database. pub fn resolve_bundled(vid: u16, pid: u16) -> Option { - let device = usb_ids::Device::from_vid_pid(vid, pid)?; - Some(UsbInfo { - vendor: device.vendor().name().to_string(), - product: device.name().to_string(), + embedded::vendor_name(vid).map(|vendor| UsbInfo { + vendor: vendor.to_string(), + product: format!("Device 0x{pid:04X}"), }) } @@ -54,87 +70,78 @@ mod tests { static OVERLAY_LOCK: Mutex<()> = Mutex::new(()); #[test] - fn bundled_resolves_ftdi_ft232() { - let info = resolve_bundled(0x0403, 0x6001).expect("FTDI FT232 in bundled DB"); + fn embedded_resolves_ftdi_vendor() { + let info = resolve_bundled(0x0403, 0x6001).expect("FTDI VID in embedded archive"); assert!( - info.vendor.to_lowercase().contains("future technology"), + info.vendor.to_lowercase().contains("future technology") + || info.vendor.to_lowercase().contains("ftdi"), "vendor: {}", info.vendor ); - assert!( - info.product.to_lowercase().contains("ft232"), - "product: {}", - info.product - ); + // Tier-1 product is intentionally synthetic — the real product + // name lives in the runtime overlay (tier-2). + assert_eq!(info.product, "Device 0x6001"); } #[test] - fn bundled_resolves_silabs_cp210x() { - let info = resolve_bundled(0x10C4, 0xEA60).expect("Silicon Labs CP210x in bundled DB"); + fn embedded_resolves_silabs_vendor() { + let info = resolve_bundled(0x10C4, 0xEA60).expect("Silicon Labs VID in embedded archive"); assert!( - info.vendor.to_lowercase().contains("silicon labs") + info.vendor.to_lowercase().contains("silicon lab") || info.vendor.to_lowercase().contains("cygnal"), "vendor: {}", info.vendor ); - assert!( - info.product.to_lowercase().contains("cp210"), - "product: {}", - info.product - ); } #[test] - fn bundled_resolves_wch_ch340() { - let info = resolve_bundled(0x1A86, 0x7523).expect("WCH CH340 in bundled DB"); + fn embedded_resolves_espressif_via_inlined_supplement() { + // 0x303a is missing from every canonical text database we mirror + // — the curated inlined supplement (online-data-tools/ + // vendor_names_inlined.py) injects it during the workflow's + // merge step, and the resulting tar.zst is the embedded archive. + // This test pins the round-trip: curated overlay → embedded + // archive → fbuild runtime resolution. + let info = resolve_bundled(0x303A, 0x4002).expect("Espressif in embedded archive"); assert!( - info.vendor.to_lowercase().contains("qinheng") - || info.vendor.to_lowercase().contains("wch") - || info.vendor.to_lowercase().contains("nanjing"), + info.vendor.to_lowercase().contains("espressif"), "vendor: {}", info.vendor ); - assert!( - info.product.to_lowercase().contains("ch340") - || info.product.to_lowercase().contains("serial"), - "product: {}", - info.product - ); } #[test] fn unknown_pair_returns_synthetic_placeholder() { - // 0xFFFE:0xFFFE is reserved and will not be assigned by USB-IF; - // safe sentinel for "we expect tier-3 to fire." - let info = resolve(0xFFFE, 0xFFFE); - assert_eq!(info.vendor, "Unknown vendor 0xFFFE"); - assert_eq!(info.product, "Unknown product 0xFFFE"); + // 0xBADD:0xBADD is reserved and will not be assigned by USB-IF. + let info = resolve(0xBADD, 0xBADD); + assert_eq!(info.vendor, "Unknown vendor 0xBADD"); + assert_eq!(info.product, "Unknown product 0xBADD"); } #[test] fn pretty_format_uses_canonical_shape() { - // FTDI FT232 is one of the most stable VID:PIDs in the bundled DB - // (it's the de-facto USB-serial chip used in every Arduino clone). + // FTDI is in the embedded archive — vendor resolves, product is + // synthetic so the tail is deterministic. let s = pretty(0x0403, 0x6001); assert!(s.ends_with("(0403:6001)"), "tail format wrong: {s}"); assert!( - s.to_lowercase().contains("future technology"), + s.to_lowercase().contains("future technology") + || s.to_lowercase().contains("ftdi"), "missing vendor: {s}" ); - // Pretty also handles the unknown path deterministically. - let unknown = pretty(0xFFFE, 0xFFFE); + // Unknown path stays deterministic. + let unknown = pretty(0xBADD, 0xBADD); assert_eq!( unknown, - "Unknown vendor 0xFFFE Unknown product 0xFFFE (FFFE:FFFE)" + "Unknown vendor 0xBADD Unknown product 0xBADD (BADD:BADD)" ); } #[test] - fn online_overlay_resolves_when_bundled_misses() { + fn online_overlay_resolves_when_embedded_misses() { let _guard = OVERLAY_LOCK.lock().unwrap(); - // Use a VID:PID that the bundled `usb-ids` crate cannot resolve - // (0xFFFD:0xABCD is reserved). Install an overlay entry for it and - // confirm `resolve()` picks tier-2 instead of falling to tier-3. + // Pick a VID:PID that the embedded archive cannot resolve. + // 0xFFFD is reserved by USB-IF. assert!( resolve_bundled(0xFFFD, 0xABCD).is_none(), "test fixture assumed an unallocated VID:PID; pick a different one" @@ -153,7 +160,26 @@ mod tests { assert_eq!(info.vendor, "Acme Test Devices"); assert_eq!(info.product, "Test Widget 9000"); - // Reset so unrelated tests don't observe this entry. + super::super::data::clear_online_cache_for_tests(); + } + + #[test] + fn online_overlay_wins_over_embedded_for_same_vid() { + // Overlay has tier priority — if a VID is in BOTH the embedded + // archive and the overlay, the overlay's richer entry wins. + let _guard = OVERLAY_LOCK.lock().unwrap(); + let mut map = HashMap::new(); + map.insert( + super::super::data::pack(0x0403, 0x6001), + UsbInfo { + vendor: "FTDI Official".to_string(), + product: "FT232 Serial Converter".to_string(), + }, + ); + super::super::data::install_online_cache_map(map); + let info = resolve(0x0403, 0x6001); + assert_eq!(info.vendor, "FTDI Official"); + assert_eq!(info.product, "FT232 Serial Converter"); super::super::data::clear_online_cache_for_tests(); } } diff --git a/ids.json b/ids.json new file mode 100644 index 00000000..9e800a29 --- /dev/null +++ b/ids.json @@ -0,0 +1,255 @@ +{ + "0000": "Wrong vendor ID", + "0010": "TSI Incorporated", + "0017": "Meyer Instruments (MIS)", + "0024": "Numark Mixtrack", + "0028": "beyerdynamic GmbH & Co. KG beyerdynamic PRO X", + "00f9": "UWP WBDI Device", + "0154": "LW154 Wireless 150N Adapter", + "015c": "Tecno World", + "0280": "CAM(Dongle) [Freenet TV-Stick]", + "0284": "\"FX-USB-AW/-BD\" USB/RS482 Converters, Mitsubishi Electric Corp.", + "0b9a": "Namco Limited", + "0c7c": "TMS International BV", + "0cc7": "Kontron Medical AG", + "0ea3": "RION NL-52 Sound Level Meter", + "0fb4": "TiiTuii Co., Ltd.", + "1021": "Western Digital External HDD", + "103e": "Aim-TTi", + "10a4": "Gunding Cosmopolit 7 Web", + "1105": "Sigma Designs Inc.", + "1106": "VIA Technologies, Inc.", + "1180": "Ricoh Company, Ltd.", + "12c9": "Newmen Tech., Ltd.", + "1305": "Star Micronics", + "1354": "FACTS Engineering LLC", + "14b7": "In2Games Limited", + "14e4": "Broadcom Corp.", + "1556": "CERN", + "15d3": "Symmetric Research", + "1609": "Flash", + "1642": "DataTraveler 101 8GB", + "1662": "Quantum Mini", + "16bd": "Leica Geosystems AG", + "1747": "CML Microcircuits", + "1768": "Unify Software and Solutions GmbH & Co. KG OpenStage WL3 VoWLAN IP phone", + "1778": "IPEVO Inc.", + "1802": "TS5000 series", + "1825": "STAR-Dundee Ltd.", + "182d": "Sitecom Europe B.V.", + "1856": "PIXMA TS6250", + "1902": "Endoscope Camera HD", + "1912": "Renesas Technology Corp.", + "19d9": "Denso Ten Limited", + "1a17": "Oticon A/S", + "1a29": "ABOV Semiconductor Co., Ltd.", + "1a59": "000A RM01 [Haag Streit]", + "1a90": "Corsair Voyager GT 16GB", + "1ac2": "DESKO GmbH", + "1b17": "EXO S.A.", + "1b21": "ASMedia Technology Inc.", + "1b3d": "Matrix Orbital", + "1b4f": "SparkFun", + "1cd7": "GMC-Instruments GmbH", + "1d37": "Signal Processing Devices Sweden AB", + "1d6c": "AUKEY Technology Co., Ltd.", + "1d73": "Signal Processing Devices AB", + "1de7": "0113 [Duet Executive]", + "1e3a": "Continental Automotive Systems Inc.", + "1e9b": "NetCom Sicherheitstechnik GmbH", + "1f18": "TESEQ", + "1f29": "Analogix Semiconductor, Inc.", + "1f36": "ddm hopt+schuler", + "1f71": "Gadmei Electronic Technology Corporation", + "1f85": "Netronix, Inc.", + "1fb9": "Lake Shore Cryotronics, Inc.", + "1fd2": "MELFAS Co. Ltd.", + "2008": "Novanta Inc.", + "2017": "OSMC Remote Controller", + "201b": "Shenzhen Hui Mao Technology (Sinocan)", + "2020": "BroadMobi", + "2022": "Antec", + "20d6": "Bensussen Deutsch & Associates", + "20e7": "Atik CCD Camera", + "2164": "Witek System Inc.", + "2179": "Flex design tablet", + "217c": "TempTale, Sensitech", + "21b0": "Grace Industries", + "21c4": "Longsys Electronics (HK) Co., Ltd.", + "21e1": "CAEN S.p.A.", + "222d": "Leifheit - Soehnle", + "2239": "PEIKER acustic GmbH & Co., KG", + "223b": "Crystalfontz America, Inc.", + "2252": "HBGIC Technology Co., Ltd.", + "2257": "On-The-Go-Video", + "22f4": "Olive V-ME102 CDMA modem", + "230a": "DataLocker", + "2312": "LP320B Wireless Presenter [August International]", + "231d": "VKB-sim", + "2321": "iKingdom Corp. [iConnectivity]", + "2342": "NIKO", + "2358": "NuTesla Composite HID+CDC", + "23e3": "Christie Digital Systems", + "23e5": "Antelope Audio", + "23e8": "Propellerhead", + "2472": "TOP", + "247f": "Lynx", + "2541": "Chipsailing", + "2550": "Shenzhen EDUP Electronics Technology Co., Ltd.", + "256c": "HUION", + "258a": "AUKEY", + "25bc": "CETRTA POT", + "2622": "MISSION", + "263c": "SCHULTES", + "2660": "Test", + "2669": "M4S PSK Series Device [M4S PSK]", + "26ce": "ASRock", + "2752": "miniDSP", + "2763": "Primes GmbH", + "276d": "276d:1101", + "2853": "Ralston Instruments, LLC", + "28ba": "Materialise Motion NV", + "2909": "Game Golf Live", + "2914": "Kent Displays, Inc.", + "291a": "Anker Innovation Ltd", + "2947": "Kapelse", + "2982": "Ableton AG", + "2983": "Coyote System SAS", + "29cc": "Kodak Alaris, Inc", + "29df": "CAM(Dongle) [Freenet TV-Stick]", + "29f3": "Resonessence Labs", + "29fe": "Geo Semiconductor", + "2a52": "L Card, LLC", + "2a65": "FreeWave Technologies", + "2a94": "G2touch Co., LTD.", + "2afd": "McIntosh HD USB Audio [McIntosh DA1]", + "2b04": "Duo with WiFi and BLE", + "2b16": "Doccamera", + "2b71": "Flashforge [FlashForge Creator Pro 2 3D Printer]", + "2b73": "Pioneer DJ Corporation", + "2b89": "Unknown", + "2b98": "Glenair Inc.", + "2bd9": "Kubicam", + "2beb": "Gateworks Corporation", + "2c33": "Wizapply", + "2cc8": "Hewlett Packard Enterprise", + "2ce4": "ESMART CCID Device", + "2d01": "Guangdong Zike Technology Co., Ltd", + "2dbc": "Mikroelektronika d.o.o", + "2dee": "QUALCOMM MeigLink", + "2e3c": "Joy-IT", + "2e50": "beyerdynamic GmbH & Co. KG", + "2e8a": "Raspberry Pi Foundation", + "2ea1": "DASAN Electron Co", + "2eb9": "Realtek or Sabrent?", + "2efd": "Filco Co., Ltd.", + "2f68": "Hoksi Technology", + "2fd0": "C*Core Technology Co., Ltd.", + "2fe9": "Shenzhen Xintai Technology Co. Ltd", + "2fee": "Holitech", + "300c": "Gyrfalcon Technology Inc.", + "303a": "Espressif Systems", + "30b1": "Bitmain Technologies Inc.", + "30be": "Schiit Audio", + "30d6": "Chroma-Q", + "30de": "KIOXIA EXCERIA PLUS", + "30fa": "Manhattan", + "311f": "TrustKey Co., Ltd.", + "3131": "Jose Correa", + "3151": "Unknown", + "31b1": "SELPHY CP530", + "31b2": "KTMicro", + "31e3": "Wooting", + "31e9": "Solid State Logic, Ltd", + "320f": "Glorious LLC", + "3231": "Kneron, Inc.", + "3232": "CCTV Dome Camera", + "3274": "MicroArray", + "3285": "Nacon", + "3297": "ZSA Technology Labs Inc.", + "32a3": "GoTrust", + "32ac": "Framework Computer BV", + "32cd": "NECパーソナルコンピュータ株式会社", + "32e4": "ELP-USBFHD06H-BL36IR", + "32e6": "IcSpring Technology", + "332d": "Verbatim GmbH", + "3346": "Cvitek Co. Ltd.", + "335e": "Eight Amps", + "33be": "Syncopated Engineering, Inc.", + "33c4": "Tomahawk Robotics", + "33c8": "Seidl Technologies UG", + "33dd": "Zuki Inc", + "33f7": "Linux Automation GmbH", + "33f8": "Rolling Wireless S.a.r.l.", + "33ff": "nyantec GmbH", + "3434": "Keychron", + "344f": "SCX-3400 Series", + "3455": "Atomos Global Pty Ltd", + "345f": "MacroSilicon", + "3464": "Senscomm Semiconductor, Inc", + "346d": "VendorCo", + "346e": "Gudsen Technology (HK) Co., Ltd (MOZA)", + "349c": "Zhuhai Hongxin Technology Co., Ltd", + "349e": "Token2", + "3542": "Sonova Consumer Hearing", + "3544": "Rusoku technologijos UAB", + "3553": "PCsensor", + "359f": "Shenzhen Sipeed Technology Co., Ltd.", + "35b6": "Orqa d.o.o", + "35f0": "Bitcraze AB", + "35f1": "INFICON", + "369a": "HighSecLabs, Ltd", + "36da": "Record Sure Limited [Recordsure]", + "36e9": "ifanr Inc.", + "3760": "CIN-ergy B.V.", + "37c5": "OpenMV, LLC", + "3802": "LDA Technologies LTD", + "3817": "SleepImage", + "3842": "EVGA", + "386e": "XTX Markets", + "3876": "Fenice Power Co., Ltd", + "38c5": "JetHome LLC", + "3938": "MOSART Semiconductor", + "3c93": "QingDao Topscomm", + "413d": "RDing Tech Co aka PCsensor", + "4816": "Integrated Webcam", + "4c4a": "JieLi Technology", + "4e4c": "NieL™ TechSolution", + "5041": "Linksys (?)", + "5246": "bladeRF Software Defined Radio", + "5262": "X.Tips", + "5325": "Woolworth GmbH", + "573c": "Xreal Light Microcontroller", + "5888": "3TR EMU", + "6004": "ISD-V4 Tablet Pen", + "6005": "Hewlett-Packard", + "6495": "GoDEX International Co.", + "6964": "Idobo", + "7374": "DATA MODUL", + "7712": "2711 Temperature sensor HUB [SEIICHI]", + "7777": "SEIICHI Technology Co., Ltd.", + "8347": "VisTrend Co., Ltd.", + "8888": "inLight", + "9048": "NuTesla CDC Serial Emulator", + "9e8f": "Plug Computer Basic [SheevaPlug]", + "a69c": "AICSEMI", + "a8f8": "Bastard Keyboards", + "b2c3": "GNDHog", + "b711": "VuPlus", + "c069": "M500 Laser Mouse", + "c07c": "M-R0017 [G700s Rechargeable Gaming Mouse]", + "c0f4": "DualMiner", + "c580": "HID UNIKEYdongle [F-Response]", + "c5cb": "ARTECH (Artech Technology Design Co., Ltd.)", + "d13e": "Coldcard Wallet", + "dff0": "shapinb", + "e2b5": "JieLi Technology", + "e3b5": "JieLi Technology", + "e5b7": "JieLi Technology", + "eb57": "ZhuHai JieLi Technology", + "eba4": "Aoboco", + "feed": "DOIO Keyboard", + "ffd2": "ZHONG-HUI ELECTRONICS CORP.", + "fffe": "Inland (MicroCenter brand)", + "ffff": "Unknown Thickness Gage" +} \ No newline at end of file diff --git a/ids.txt b/ids.txt new file mode 100644 index 00000000..81cc49eb --- /dev/null +++ b/ids.txt @@ -0,0 +1,253 @@ +291a +2947 +2982 +2983 +29cc +29df +29f3 +29fe +2a52 +2a65 +2a94 +2afd +2b04 +2b16 +2b71 +2b73 +2b89 +2b98 +2bd9 +2beb +2c33 +2cc8 +2ce4 +2d01 +2dbc +2dee +2e3c +2e50 +2e8a +2ea1 +2eb9 +2efd +2f68 +2fd0 +2fe9 +2fee +300c +303a +30b1 +30be +30d6 +30de +30fa +311f +3131 +3151 +31b2 +31e3 +31e9 +320f +3231 +3232 +3274 +3285 +3297 +32a3 +32ac +32cd +32e4 +32e6 +332d +3346 +335e +33be +33c4 +33c8 +33dd +33f7 +33f8 +33ff +3434 +3455 +345f +3464 +346d +346e +349c +349e +3542 +3544 +3553 +359f +35b6 +35f0 +35f1 +369a +36da +36e9 +3760 +37c5 +3802 +3817 +3842 +386e +3876 +38c5 +3c93 +413d +4c4a +4e4c +5041 +5246 +5262 +5325 +573c +5888 +6005 +6495 +6964 +7374 +7712 +7777 +8347 +8888 +a69c +a8f8 +b2c3 +b711 +c07c +c0f4 +c580 +c5cb +d13e +dff0 +e2b5 +e3b5 +e5b7 +eb57 +eba4 +feed +ffd2 +fffe +ffff +1cd7 +1d37 +1d6c +1d73 +1de7 +1e3a +1e9b +1f18 +1f29 +1f36 +1f71 +1f85 +1fb9 +1fd2 +2008 +2017 +201b +2020 +2022 +20d6 +20e7 +2164 +2179 +217c +21b0 +21c4 +21e1 +222d +2239 +223b +2252 +2257 +22f4 +230a +2312 +231d +2321 +2342 +2358 +23e3 +23e5 +23e8 +2472 +247f +2541 +2550 +256c +258a +25bc +2622 +263c +2660 +2669 +26ce +2752 +2763 +276d +2853 +28ba +2909 +2914 +0c7c +0ea3 +0b9a +14b7 +1556 +15d3 +1609 +1662 +16bd +1747 +1768 +1778 +1802 +1825 +182d +1856 +1902 +1912 +19d9 +1a17 +1a29 +1a59 +1ac2 +1b17 +1b21 +1b3d +1b4f +0000 +0017 +0028 +00f9 +015c +0fb4 +1021 +103e +10a4 +1106 +12c9 +1305 +1354 +1105 +1180 +0cc7 +31b1 +344f +3938 +4816 +6004 +9048 +9e8f +c069 +0010 +0024 +0154 +0280 +0284 +14e4 +1642 +1a90 \ No newline at end of file diff --git a/ids2.json b/ids2.json new file mode 100644 index 00000000..818caa5f --- /dev/null +++ b/ids2.json @@ -0,0 +1,789 @@ +{ + "0000": [ + "Wrong vendor ID" + ], + "0010": [ + "TSI Incorporated", + "Test" + ], + "0017": [ + "Meyer Instruments (MIS)" + ], + "0024": [ + "Numark Mixtrack" + ], + "0028": [ + "beyerdynamic GmbH & Co. KG beyerdynamic PRO X" + ], + "00f9": [ + "UWP WBDI Device" + ], + "0154": [ + "LW154 Wireless 150N Adapter" + ], + "015c": [ + "Tecno World" + ], + "0280": [ + "CAM(Dongle) [Freenet TV-Stick]" + ], + "0284": [ + "\"FX-USB-AW/-BD\" USB/RS482 Converters, Mitsubishi Electric Corp." + ], + "0b9a": [ + "Namco Limited" + ], + "0c7c": [ + "TMS International BV" + ], + "0cc7": [ + "Kontron Medical AG" + ], + "0ea3": [ + "RION NL-52 Sound Level Meter" + ], + "0fb4": [ + "TiiTuii Co., Ltd." + ], + "1021": [ + "Western Digital External HDD" + ], + "103e": [ + "Aim-TTi" + ], + "10a4": [ + "Gunding Cosmopolit 7 Web" + ], + "1105": [ + "Sigma Designs Inc." + ], + "1106": [ + "VIA Technologies, Inc." + ], + "1180": [ + "Ricoh Company, Ltd." + ], + "12c9": [ + "Newmen Tech., Ltd." + ], + "1305": [ + "Star Micronics" + ], + "1354": [ + "FACTS Engineering LLC" + ], + "14b7": [ + "In2Games Limited" + ], + "14e4": [ + "Broadcom Corp." + ], + "1556": [ + "CERN" + ], + "15d3": [ + "Symmetric Research" + ], + "1609": [ + "Flash" + ], + "1642": [ + "DataTraveler 101 8GB" + ], + "1662": [ + "Quantum Mini" + ], + "16bd": [ + "Leica Geosystems AG" + ], + "1747": [ + "CML Microcircuits" + ], + "1768": [ + "Unify Software and Solutions GmbH & Co. KG OpenStage WL3 VoWLAN IP phone" + ], + "1778": [ + "IPEVO Inc." + ], + "1802": [ + "TS5000 series" + ], + "1825": [ + "STAR-Dundee Ltd." + ], + "182d": [ + "Sitecom Europe B.V." + ], + "1856": [ + "PIXMA TS6250" + ], + "1902": [ + "Endoscope Camera HD" + ], + "1912": [ + "Renesas Technology Corp." + ], + "19d9": [ + "Denso Ten Limited" + ], + "1a17": [ + "Oticon A/S" + ], + "1a29": [ + "ABOV Semiconductor Co., Ltd." + ], + "1a59": [ + "000A RM01 [Haag Streit]" + ], + "1a90": [ + "Corsair Voyager GT 16GB" + ], + "1ac2": [ + "DESKO GmbH" + ], + "1b17": [ + "EXO S.A." + ], + "1b21": [ + "ASMedia Technology Inc." + ], + "1b3d": [ + "Matrix Orbital" + ], + "1b4f": [ + "SparkFun", + "Arduino Leonardo ATmega32U4 USB IO Board [MakeyMakey]" + ], + "1cd7": [ + "GMC-Instruments GmbH" + ], + "1d37": [ + "Signal Processing Devices Sweden AB" + ], + "1d6c": [ + "AUKEY Technology Co., Ltd." + ], + "1d73": [ + "Signal Processing Devices AB", + "Signal Processing Devices Sweden AB" + ], + "1de7": [ + "0113 [Duet Executive]" + ], + "1e3a": [ + "Continental Automotive Systems Inc." + ], + "1e9b": [ + "NetCom Sicherheitstechnik GmbH" + ], + "1f18": [ + "TESEQ" + ], + "1f29": [ + "Analogix Semiconductor, Inc." + ], + "1f36": [ + "ddm hopt+schuler" + ], + "1f71": [ + "Gadmei Electronic Technology Corporation" + ], + "1f85": [ + "Netronix, Inc.", + "Netronix, Inc. / Obreey", + "Rakuten, Inc." + ], + "1fb9": [ + "Lake Shore Cryotronics, Inc." + ], + "1fd2": [ + "MELFAS Co. Ltd." + ], + "2008": [ + "Novanta Inc." + ], + "2017": [ + "OSMC Remote Controller", + "NAL Research Corporation" + ], + "201b": [ + "Shenzhen Hui Mao Technology (Sinocan)", + "UNI-TEC Electronics" + ], + "2020": [ + "BroadMobi" + ], + "2022": [ + "Antec" + ], + "20d6": [ + "Bensussen Deutsch & Associates" + ], + "20e7": [ + "Atik CCD Camera" + ], + "2164": [ + "Witek System Inc." + ], + "2179": [ + "Flex design tablet" + ], + "217c": [ + "TempTale, Sensitech" + ], + "21b0": [ + "Grace Industries" + ], + "21c4": [ + "Longsys Electronics (HK) Co., Ltd." + ], + "21e1": [ + "CAEN S.p.A." + ], + "222d": [ + "Leifheit - Soehnle" + ], + "2239": [ + "PEIKER acustic GmbH & Co., KG" + ], + "223b": [ + "Crystalfontz America, Inc." + ], + "2252": [ + "HBGIC Technology Co., Ltd." + ], + "2257": [ + "On-The-Go-Video" + ], + "22f4": [ + "Olive V-ME102 CDMA modem" + ], + "230a": [ + "DataLocker" + ], + "2312": [ + "LP320B Wireless Presenter [August International]" + ], + "231d": [ + "VKB-sim" + ], + "2321": [ + "iKingdom Corp. [iConnectivity]" + ], + "2342": [ + "NIKO" + ], + "2358": [ + "NuTesla Composite HID+CDC" + ], + "23e3": [ + "Christie Digital Systems" + ], + "23e5": [ + "Antelope Audio" + ], + "23e8": [ + "Propellerhead" + ], + "2472": [ + "TOP" + ], + "247f": [ + "Lynx" + ], + "2541": [ + "Chipsailing" + ], + "2550": [ + "Shenzhen EDUP Electronics Technology Co., Ltd." + ], + "256c": [ + "HUION", + "Unknown tablet device", + "Huion / Gaomon", + "Paul M" + ], + "258a": [ + "AUKEY", + " [USB chips by: Sino Wealth Electronic Ltd.]" + ], + "25bc": [ + "CETRTA POT" + ], + "2622": [ + "MISSION" + ], + "263c": [ + "SCHULTES" + ], + "2660": [ + "Test" + ], + "2669": [ + "M4S PSK Series Device [M4S PSK]" + ], + "26ce": [ + "ASRock" + ], + "2752": [ + "miniDSP" + ], + "2763": [ + "Primes GmbH" + ], + "276d": [ + "276d:1101" + ], + "2853": [ + "Ralston Instruments, LLC" + ], + "28ba": [ + "Materialise Motion NV" + ], + "2909": [ + "Game Golf Live" + ], + "2914": [ + "Kent Displays, Inc.", + "Improv Electronics" + ], + "291a": [ + "Anker Innovation Ltd" + ], + "2947": [ + "Kapelse" + ], + "2982": [ + "Ableton AG" + ], + "2983": [ + "Coyote System SAS" + ], + "29cc": [ + "Kodak Alaris, Inc" + ], + "29df": [ + "CAM(Dongle) [Freenet TV-Stick]" + ], + "29f3": [ + "Resonessence Labs" + ], + "29fe": [ + "Geo Semiconductor", + "GEO Semiconductor" + ], + "2a52": [ + "L Card, LLC" + ], + "2a65": [ + "FreeWave Technologies" + ], + "2a94": [ + "G2touch Co., LTD." + ], + "2afd": [ + "McIntosh HD USB Audio [McIntosh DA1]" + ], + "2b04": [ + "Duo with WiFi and BLE" + ], + "2b16": [ + "Doccamera" + ], + "2b71": [ + "Flashforge [FlashForge Creator Pro 2 3D Printer]" + ], + "2b73": [ + "Pioneer DJ Corporation" + ], + "2b89": [ + "Unknown" + ], + "2b98": [ + "Glenair Inc." + ], + "2bd9": [ + "Kubicam", + "Huddly" + ], + "2beb": [ + "Gateworks Corporation" + ], + "2c33": [ + "Wizapply" + ], + "2cc8": [ + "Hewlett Packard Enterprise" + ], + "2ce4": [ + "ESMART CCID Device", + "ESMART" + ], + "2d01": [ + "Guangdong Zike Technology Co., Ltd" + ], + "2dbc": [ + "Mikroelektronika d.o.o" + ], + "2dee": [ + "QUALCOMM MeigLink" + ], + "2e3c": [ + "Joy-IT" + ], + "2e50": [ + "beyerdynamic GmbH & Co. KG" + ], + "2e8a": [ + "Raspberry Pi Foundation" + ], + "2ea1": [ + "DASAN Electron Co" + ], + "2eb9": [ + "Realtek or Sabrent?" + ], + "2efd": [ + "Filco Co., Ltd." + ], + "2f68": [ + "Hoksi Technology" + ], + "2fd0": [ + "C*Core Technology Co., Ltd." + ], + "2fe9": [ + "Shenzhen Xintai Technology Co. Ltd" + ], + "2fee": [ + "Holitech" + ], + "300c": [ + "Gyrfalcon Technology Inc." + ], + "303a": [ + "Espressif Systems" + ], + "30b1": [ + "Bitmain Technologies Inc." + ], + "30be": [ + "Schiit Audio" + ], + "30d6": [ + "Chroma-Q" + ], + "30de": [ + "KIOXIA EXCERIA PLUS" + ], + "30fa": [ + "Manhattan", + "Wuxi Instant Microelectronics Co., Ltd." + ], + "311f": [ + "TrustKey Co., Ltd." + ], + "3131": [ + "Jose Correa", + "Authentik Systems" + ], + "3151": [ + "Unknown", + "Dailan Bon Auto Electronic Co., Ltd.", + "Yichip Microelectronics (Hangzhou) Co., Ltd" + ], + "31b1": [ + "SELPHY CP530", + "Shenzhen Jinduan Electronics Co., Ltd." + ], + "31b2": [ + "KTMicro" + ], + "31e3": [ + "Wooting" + ], + "31e9": [ + "Solid State Logic, Ltd" + ], + "320f": [ + "Glorious LLC" + ], + "3231": [ + "Kneron, Inc." + ], + "3232": [ + "CCTV Dome Camera", + "Shenzhen Trusda Industrial Co., Ltd." + ], + "3274": [ + "MicroArray" + ], + "3285": [ + "Nacon" + ], + "3297": [ + "ZSA Technology Labs Inc." + ], + "32a3": [ + "GoTrust" + ], + "32ac": [ + "Framework Computer BV" + ], + "32cd": [ + "NECパーソナルコンピュータ株式会社", + "NEC" + ], + "32e4": [ + "ELP-USBFHD06H-BL36IR" + ], + "32e6": [ + "IcSpring Technology" + ], + "332d": [ + "Verbatim GmbH" + ], + "3346": [ + "Cvitek Co. Ltd." + ], + "335e": [ + "Eight Amps" + ], + "33be": [ + "Syncopated Engineering, Inc." + ], + "33c4": [ + "Tomahawk Robotics" + ], + "33c8": [ + "Seidl Technologies UG", + "Seidl Technologies UG (haftungsbeschraenkt)" + ], + "33dd": [ + "Zuki Inc" + ], + "33f7": [ + "Linux Automation GmbH" + ], + "33f8": [ + "Rolling Wireless S.a.r.l." + ], + "33ff": [ + "nyantec GmbH" + ], + "3434": [ + "Keychron" + ], + "344f": [ + "SCX-3400 Series" + ], + "3455": [ + "Atomos Global Pty Ltd" + ], + "345f": [ + "MacroSilicon" + ], + "3464": [ + "Senscomm Semiconductor, Inc" + ], + "346d": [ + "VendorCo" + ], + "346e": [ + "Gudsen Technology (HK) Co., Ltd (MOZA)" + ], + "349c": [ + "Zhuhai Hongxin Technology Co., Ltd" + ], + "349e": [ + "Token2" + ], + "3542": [ + "Sonova Consumer Hearing" + ], + "3544": [ + "Rusoku technologijos UAB" + ], + "3553": [ + "PCsensor" + ], + "359f": [ + "Shenzhen Sipeed Technology Co., Ltd." + ], + "35b6": [ + "Orqa d.o.o" + ], + "35f0": [ + "Bitcraze AB" + ], + "35f1": [ + "INFICON" + ], + "369a": [ + "HighSecLabs, Ltd" + ], + "36da": [ + "Record Sure Limited [Recordsure]" + ], + "36e9": [ + "ifanr Inc." + ], + "3760": [ + "CIN-ergy B.V." + ], + "37c5": [ + "OpenMV, LLC" + ], + "3802": [ + "LDA Technologies LTD" + ], + "3817": [ + "SleepImage" + ], + "3842": [ + "EVGA" + ], + "386e": [ + "XTX Markets" + ], + "3876": [ + "Fenice Power Co., Ltd" + ], + "38c5": [ + "JetHome LLC" + ], + "3938": [ + "MOSART Semiconductor" + ], + "3c93": [ + "QingDao Topscomm" + ], + "413d": [ + "RDing Tech Co aka PCsensor", + "RDing Technology Ltd [PCsensor]" + ], + "4816": [ + "Integrated Webcam" + ], + "4c4a": [ + "JieLi Technology" + ], + "4e4c": [ + "NieL™ TechSolution" + ], + "5041": [ + "Linksys (?)" + ], + "5246": [ + "bladeRF Software Defined Radio" + ], + "5262": [ + "X.Tips" + ], + "5325": [ + "Woolworth GmbH" + ], + "573c": [ + "Xreal Light Microcontroller" + ], + "5888": [ + "3TR EMU", + "3Tronics MU30" + ], + "6004": [ + "ISD-V4 Tablet Pen" + ], + "6005": [ + "Hewlett-Packard" + ], + "6495": [ + "GoDEX International Co." + ], + "6964": [ + "Idobo" + ], + "7374": [ + "DATA MODUL" + ], + "7712": [ + "2711 Temperature sensor HUB [SEIICHI]" + ], + "7777": [ + "SEIICHI Technology Co., Ltd." + ], + "8347": [ + "VisTrend Co., Ltd." + ], + "8888": [ + "inLight" + ], + "9048": [ + "NuTesla CDC Serial Emulator" + ], + "9e8f": [ + "Plug Computer Basic [SheevaPlug]" + ], + "a69c": [ + "AICSEMI" + ], + "a8f8": [ + "Bastard Keyboards" + ], + "b2c3": [ + "GNDHog" + ], + "b711": [ + "VuPlus" + ], + "c069": [ + "M500 Laser Mouse" + ], + "c07c": [ + "M-R0017 [G700s Rechargeable Gaming Mouse]" + ], + "c0f4": [ + "DualMiner", + "Generic USB Keyboard" + ], + "c580": [ + "HID UNIKEYdongle [F-Response]" + ], + "c5cb": [ + "ARTECH (Artech Technology Design Co., Ltd.)" + ], + "d13e": [ + "Coldcard Wallet" + ], + "dff0": [ + "shapinb" + ], + "e2b5": [ + "JieLi Technology" + ], + "e3b5": [ + "JieLi Technology" + ], + "e5b7": [ + "JieLi Technology" + ], + "eb57": [ + "ZhuHai JieLi Technology" + ], + "eba4": [ + "Aoboco" + ], + "feed": [ + "DOIO Keyboard", + "Unknown" + ], + "ffd2": [ + "ZHONG-HUI ELECTRONICS CORP." + ], + "fffe": [ + "Inland (MicroCenter brand)" + ], + "ffff": [ + "Unknown Thickness Gage", + "Wrong vendor ID" + ] +} diff --git a/ids3.json b/ids3.json new file mode 100644 index 00000000..1b9b0c8a --- /dev/null +++ b/ids3.json @@ -0,0 +1,761 @@ +{ + "0000": [ + "Wrong vendor ID" + ], + "0010": [ + "TSI Incorporated" + ], + "0017": [ + "Meyer Instruments (MIS)" + ], + "0024": [ + "Numark Mixtrack" + ], + "0028": [ + "beyerdynamic GmbH & Co. KG beyerdynamic PRO X" + ], + "00f9": [ + "UWP WBDI Device" + ], + "0154": [ + "LW154 Wireless 150N Adapter" + ], + "015c": [ + "Tecno World" + ], + "0280": [ + "CAM(Dongle) [Freenet TV-Stick]" + ], + "0284": [ + "\"FX-USB-AW/-BD\" USB/RS482 Converters, Mitsubishi Electric Corp." + ], + "0b9a": [ + "Namco Limited" + ], + "0c7c": [ + "TMS International BV" + ], + "0cc7": [ + "Kontron Medical AG" + ], + "0ea3": [ + "RION NL-52 Sound Level Meter" + ], + "0fb4": [ + "TiiTuii Co., Ltd." + ], + "1021": [ + "Western Digital External HDD" + ], + "103e": [ + "Aim-TTi" + ], + "10a4": [ + "Gunding Cosmopolit 7 Web" + ], + "1105": [ + "Sigma Designs Inc." + ], + "1106": [ + "VIA Technologies, Inc." + ], + "1180": [ + "Ricoh Company, Ltd." + ], + "12c9": [ + "Newmen Tech., Ltd." + ], + "1305": [ + "Star Micronics" + ], + "1354": [ + "FACTS Engineering LLC" + ], + "14b7": [ + "In2Games Limited" + ], + "14e4": [ + "Broadcom Corp." + ], + "1556": [ + "CERN" + ], + "15d3": [ + "Symmetric Research" + ], + "1609": [ + "Flash" + ], + "1642": [ + "DataTraveler 101 8GB" + ], + "1662": [ + "Quantum Mini" + ], + "16bd": [ + "Leica Geosystems AG" + ], + "1747": [ + "CML Microcircuits" + ], + "1768": [ + "Unify Software and Solutions GmbH & Co. KG OpenStage WL3 VoWLAN IP phone" + ], + "1778": [ + "IPEVO Inc." + ], + "1802": [ + "TS5000 series" + ], + "1825": [ + "STAR-Dundee Ltd." + ], + "182d": [ + "Sitecom Europe B.V." + ], + "1856": [ + "PIXMA TS6250" + ], + "1902": [ + "Endoscope Camera HD" + ], + "1912": [ + "Renesas Technology Corp." + ], + "19d9": [ + "Denso Ten Limited" + ], + "1a17": [ + "Oticon A/S" + ], + "1a29": [ + "ABOV Semiconductor Co., Ltd." + ], + "1a59": [ + "000A RM01 [Haag Streit]" + ], + "1a90": [ + "Corsair Voyager GT 16GB" + ], + "1ac2": [ + "DESKO GmbH" + ], + "1b17": [ + "EXO S.A." + ], + "1b21": [ + "ASMedia Technology Inc." + ], + "1b3d": [ + "Matrix Orbital" + ], + "1b4f": [ + "SparkFun" + ], + "1cd7": [ + "GMC-Instruments GmbH" + ], + "1d37": [ + "Signal Processing Devices Sweden AB" + ], + "1d6c": [ + "AUKEY Technology Co., Ltd." + ], + "1d73": [ + "Signal Processing Devices Sweden AB" + ], + "1de7": [ + "0113 [Duet Executive]" + ], + "1e3a": [ + "Continental Automotive Systems Inc." + ], + "1e9b": [ + "NetCom Sicherheitstechnik GmbH" + ], + "1f18": [ + "TESEQ" + ], + "1f29": [ + "Analogix Semiconductor, Inc." + ], + "1f36": [ + "ddm hopt+schuler" + ], + "1f71": [ + "Gadmei Electronic Technology Corporation" + ], + "1f85": [ + "Netronix, Inc. / Obreey" + ], + "1fb9": [ + "Lake Shore Cryotronics, Inc." + ], + "1fd2": [ + "MELFAS Co. Ltd." + ], + "2008": [ + "Novanta Inc." + ], + "2017": [ + "NAL Research Corporation" + ], + "201b": [ + "UNI-TEC Electronics" + ], + "2020": [ + "BroadMobi" + ], + "2022": [ + "Antec" + ], + "20d6": [ + "Bensussen Deutsch & Associates" + ], + "20e7": [ + "Atik CCD Camera" + ], + "2164": [ + "Witek System Inc." + ], + "2179": [ + "Flex design tablet" + ], + "217c": [ + "TempTale, Sensitech" + ], + "21b0": [ + "Grace Industries" + ], + "21c4": [ + "Longsys Electronics (HK) Co., Ltd." + ], + "21e1": [ + "CAEN S.p.A." + ], + "222d": [ + "Leifheit - Soehnle" + ], + "2239": [ + "PEIKER acustic GmbH & Co., KG" + ], + "223b": [ + "Crystalfontz America, Inc." + ], + "2252": [ + "HBGIC Technology Co., Ltd." + ], + "2257": [ + "On-The-Go-Video" + ], + "22f4": [ + "Olive V-ME102 CDMA modem" + ], + "230a": [ + "DataLocker" + ], + "2312": [ + "LP320B Wireless Presenter [August International]" + ], + "231d": [ + "VKB-sim" + ], + "2321": [ + "iKingdom Corp. [iConnectivity]" + ], + "2342": [ + "NIKO" + ], + "2358": [ + "NuTesla Composite HID+CDC" + ], + "23e3": [ + "Christie Digital Systems" + ], + "23e5": [ + "Antelope Audio" + ], + "23e8": [ + "Propellerhead" + ], + "2472": [ + "TOP" + ], + "247f": [ + "Lynx" + ], + "2541": [ + "Chipsailing" + ], + "2550": [ + "Shenzhen EDUP Electronics Technology Co., Ltd." + ], + "256c": [ + "HUION" + ], + "258a": [ + " [USB chips by: Sino Wealth Electronic Ltd.]" + ], + "25bc": [ + "CETRTA POT" + ], + "2622": [ + "MISSION" + ], + "263c": [ + "SCHULTES" + ], + "2660": [ + "Test" + ], + "2669": [ + "M4S PSK Series Device [M4S PSK]" + ], + "26ce": [ + "ASRock" + ], + "2752": [ + "miniDSP" + ], + "2763": [ + "Primes GmbH" + ], + "276d": [ + "276d:1101" + ], + "2853": [ + "Ralston Instruments, LLC" + ], + "28ba": [ + "Materialise Motion NV" + ], + "2909": [ + "Game Golf Live" + ], + "2914": [ + "Kent Displays, Inc." + ], + "291a": [ + "Anker Innovation Ltd" + ], + "2947": [ + "Kapelse" + ], + "2982": [ + "Ableton AG" + ], + "2983": [ + "Coyote System SAS" + ], + "29cc": [ + "Kodak Alaris, Inc" + ], + "29df": [ + "CAM(Dongle) [Freenet TV-Stick]" + ], + "29f3": [ + "Resonessence Labs" + ], + "29fe": [ + "Geo Semiconductor" + ], + "2a52": [ + "L Card, LLC" + ], + "2a65": [ + "FreeWave Technologies" + ], + "2a94": [ + "G2touch Co., LTD." + ], + "2afd": [ + "McIntosh HD USB Audio [McIntosh DA1]" + ], + "2b04": [ + "Duo with WiFi and BLE" + ], + "2b16": [ + "Doccamera" + ], + "2b71": [ + "Flashforge [FlashForge Creator Pro 2 3D Printer]" + ], + "2b73": [ + "Pioneer DJ Corporation" + ], + "2b89": [ + "Unknown" + ], + "2b98": [ + "Glenair Inc." + ], + "2bd9": [ + "Huddly" + ], + "2beb": [ + "Gateworks Corporation" + ], + "2c33": [ + "Wizapply" + ], + "2cc8": [ + "Hewlett Packard Enterprise" + ], + "2ce4": [ + "ESMART" + ], + "2d01": [ + "Guangdong Zike Technology Co., Ltd" + ], + "2dbc": [ + "Mikroelektronika d.o.o" + ], + "2dee": [ + "QUALCOMM MeigLink" + ], + "2e3c": [ + "Joy-IT" + ], + "2e50": [ + "beyerdynamic GmbH & Co. KG" + ], + "2e8a": [ + "Raspberry Pi Foundation" + ], + "2ea1": [ + "DASAN Electron Co" + ], + "2eb9": [ + "Realtek or Sabrent?" + ], + "2efd": [ + "Filco Co., Ltd." + ], + "2f68": [ + "Hoksi Technology" + ], + "2fd0": [ + "C*Core Technology Co., Ltd." + ], + "2fe9": [ + "Shenzhen Xintai Technology Co. Ltd" + ], + "2fee": [ + "Holitech" + ], + "300c": [ + "Gyrfalcon Technology Inc." + ], + "303a": [ + "Espressif Systems" + ], + "30b1": [ + "Bitmain Technologies Inc." + ], + "30be": [ + "Schiit Audio" + ], + "30d6": [ + "Chroma-Q" + ], + "30de": [ + "KIOXIA EXCERIA PLUS" + ], + "30fa": [ + "Wuxi Instant Microelectronics Co., Ltd." + ], + "311f": [ + "TrustKey Co., Ltd." + ], + "3131": [ + "Authentik Systems" + ], + "3151": [ + "Yichip Microelectronics (Hangzhou) Co., Ltd" + ], + "31b1": [ + "Shenzhen Jinduan Electronics Co., Ltd." + ], + "31b2": [ + "KTMicro" + ], + "31e3": [ + "Wooting" + ], + "31e9": [ + "Solid State Logic, Ltd" + ], + "320f": [ + "Glorious LLC" + ], + "3231": [ + "Kneron, Inc." + ], + "3232": [ + "Shenzhen Trusda Industrial Co., Ltd." + ], + "3274": [ + "MicroArray" + ], + "3285": [ + "Nacon" + ], + "3297": [ + "ZSA Technology Labs Inc." + ], + "32a3": [ + "GoTrust" + ], + "32ac": [ + "Framework Computer BV" + ], + "32cd": [ + "NEC" + ], + "32e4": [ + "ELP-USBFHD06H-BL36IR" + ], + "32e6": [ + "IcSpring Technology" + ], + "332d": [ + "Verbatim GmbH" + ], + "3346": [ + "Cvitek Co. Ltd." + ], + "335e": [ + "Eight Amps" + ], + "33be": [ + "Syncopated Engineering, Inc." + ], + "33c4": [ + "Tomahawk Robotics" + ], + "33c8": [ + "Seidl Technologies UG" + ], + "33dd": [ + "Zuki Inc" + ], + "33f7": [ + "Linux Automation GmbH" + ], + "33f8": [ + "Rolling Wireless S.a.r.l." + ], + "33ff": [ + "nyantec GmbH" + ], + "3434": [ + "Keychron" + ], + "344f": [ + "SCX-3400 Series" + ], + "3455": [ + "Atomos Global Pty Ltd" + ], + "345f": [ + "MacroSilicon" + ], + "3464": [ + "Senscomm Semiconductor, Inc" + ], + "346d": [ + "VendorCo" + ], + "346e": [ + "Gudsen Technology (HK) Co., Ltd (MOZA)" + ], + "349c": [ + "Zhuhai Hongxin Technology Co., Ltd" + ], + "349e": [ + "Token2" + ], + "3542": [ + "Sonova Consumer Hearing" + ], + "3544": [ + "Rusoku technologijos UAB" + ], + "3553": [ + "PCsensor" + ], + "359f": [ + "Shenzhen Sipeed Technology Co., Ltd." + ], + "35b6": [ + "Orqa d.o.o" + ], + "35f0": [ + "Bitcraze AB" + ], + "35f1": [ + "INFICON" + ], + "369a": [ + "HighSecLabs, Ltd" + ], + "36da": [ + "Record Sure Limited [Recordsure]" + ], + "36e9": [ + "ifanr Inc." + ], + "3760": [ + "CIN-ergy B.V." + ], + "37c5": [ + "OpenMV, LLC" + ], + "3802": [ + "LDA Technologies LTD" + ], + "3817": [ + "SleepImage" + ], + "3842": [ + "EVGA" + ], + "386e": [ + "XTX Markets" + ], + "3876": [ + "Fenice Power Co., Ltd" + ], + "38c5": [ + "JetHome LLC" + ], + "3938": [ + "MOSART Semiconductor" + ], + "3c93": [ + "QingDao Topscomm" + ], + "413d": [ + "RDing Technology Ltd [PCsensor]" + ], + "4816": [ + "Integrated Webcam" + ], + "4c4a": [ + "JieLi Technology" + ], + "4e4c": [ + "NieLTM TechSolution" + ], + "5041": [ + "Linksys (?)" + ], + "5246": [ + "bladeRF Software Defined Radio" + ], + "5262": [ + "X.Tips" + ], + "5325": [ + "Woolworth GmbH" + ], + "573c": [ + "Xreal Light Microcontroller" + ], + "5888": [ + "3Tronics MU30" + ], + "6004": [ + "ISD-V4 Tablet Pen" + ], + "6005": [ + "Hewlett-Packard" + ], + "6495": [ + "GoDEX International Co." + ], + "6964": [ + "Idobo" + ], + "7374": [ + "DATA MODUL" + ], + "7712": [ + "2711 Temperature sensor HUB [SEIICHI]" + ], + "7777": [ + "SEIICHI Technology Co., Ltd." + ], + "8347": [ + "VisTrend Co., Ltd." + ], + "8888": [ + "inLight" + ], + "9048": [ + "NuTesla CDC Serial Emulator" + ], + "9e8f": [ + "Plug Computer Basic [SheevaPlug]" + ], + "a69c": [ + "AICSEMI" + ], + "a8f8": [ + "Bastard Keyboards" + ], + "b2c3": [ + "GNDHog" + ], + "b711": [ + "VuPlus" + ], + "c069": [ + "M500 Laser Mouse" + ], + "c07c": [ + "M-R0017 [G700s Rechargeable Gaming Mouse]" + ], + "c0f4": [ + "DualMiner" + ], + "c580": [ + "HID UNIKEYdongle [F-Response]" + ], + "c5cb": [ + "ARTECH (Artech Technology Design Co., Ltd.)" + ], + "d13e": [ + "Coldcard Wallet" + ], + "dff0": [ + "shapinb" + ], + "e2b5": [ + "JieLi Technology" + ], + "e3b5": [ + "JieLi Technology" + ], + "e5b7": [ + "JieLi Technology" + ], + "eb57": [ + "ZhuHai JieLi Technology" + ], + "eba4": [ + "Aoboco" + ], + "feed": [ + "DOIO Keyboard" + ], + "ffd2": [ + "ZHONG-HUI ELECTRONICS CORP." + ], + "fffe": [ + "Inland (MicroCenter brand)" + ], + "ffff": [ + "Wrong vendor ID" + ] +} diff --git a/ids4.json b/ids4.json new file mode 100644 index 00000000..d25b63d2 --- /dev/null +++ b/ids4.json @@ -0,0 +1,255 @@ +{ + "0000": "Wrong vendor ID", + "0010": "TSI Incorporated", + "0017": "Meyer Instruments (MIS)", + "0024": "Numark Mixtrack", + "0028": "beyerdynamic GmbH & Co. KG beyerdynamic PRO X", + "00f9": "UWP WBDI Device", + "0154": "LW154 Wireless 150N Adapter", + "015c": "Tecno World", + "0280": "CAM(Dongle) [Freenet TV-Stick]", + "0284": "\"FX-USB-AW/-BD\" USB/RS482 Converters, Mitsubishi Electric Corp.", + "0b9a": "Namco Limited", + "0c7c": "TMS International BV", + "0cc7": "Kontron Medical AG", + "0ea3": "RION NL-52 Sound Level Meter", + "0fb4": "TiiTuii Co., Ltd.", + "1021": "Western Digital External HDD", + "103e": "Aim-TTi", + "10a4": "Gunding Cosmopolit 7 Web", + "1105": "Sigma Designs Inc.", + "1106": "VIA Technologies, Inc.", + "1180": "Ricoh Company, Ltd.", + "12c9": "Newmen Tech., Ltd.", + "1305": "Star Micronics", + "1354": "FACTS Engineering LLC", + "14b7": "In2Games Limited", + "14e4": "Broadcom Corp.", + "1556": "CERN", + "15d3": "Symmetric Research", + "1609": "Flash", + "1642": "DataTraveler 101 8GB", + "1662": "Quantum Mini", + "16bd": "Leica Geosystems AG", + "1747": "CML Microcircuits", + "1768": "Unify Software and Solutions GmbH & Co. KG OpenStage WL3 VoWLAN IP phone", + "1778": "IPEVO Inc.", + "1802": "TS5000 series", + "1825": "STAR-Dundee Ltd.", + "182d": "Sitecom Europe B.V.", + "1856": "PIXMA TS6250", + "1902": "Endoscope Camera HD", + "1912": "Renesas Technology Corp.", + "19d9": "Denso Ten Limited", + "1a17": "Oticon A/S", + "1a29": "ABOV Semiconductor Co., Ltd.", + "1a59": "000A RM01 [Haag Streit]", + "1a90": "Corsair Voyager GT 16GB", + "1ac2": "DESKO GmbH", + "1b17": "EXO S.A.", + "1b21": "ASMedia Technology Inc.", + "1b3d": "Matrix Orbital", + "1b4f": "SparkFun", + "1cd7": "GMC-Instruments GmbH", + "1d37": "Signal Processing Devices Sweden AB", + "1d6c": "AUKEY Technology Co., Ltd.", + "1d73": "Signal Processing Devices Sweden AB", + "1de7": "0113 [Duet Executive]", + "1e3a": "Continental Automotive Systems Inc.", + "1e9b": "NetCom Sicherheitstechnik GmbH", + "1f18": "TESEQ", + "1f29": "Analogix Semiconductor, Inc.", + "1f36": "ddm hopt+schuler", + "1f71": "Gadmei Electronic Technology Corporation", + "1f85": "Netronix, Inc. / Obreey", + "1fb9": "Lake Shore Cryotronics, Inc.", + "1fd2": "MELFAS Co. Ltd.", + "2008": "Novanta Inc.", + "2017": "NAL Research Corporation", + "201b": "UNI-TEC Electronics", + "2020": "BroadMobi", + "2022": "Antec", + "20d6": "Bensussen Deutsch & Associates", + "20e7": "Atik CCD Camera", + "2164": "Witek System Inc.", + "2179": "Flex design tablet", + "217c": "TempTale, Sensitech", + "21b0": "Grace Industries", + "21c4": "Longsys Electronics (HK) Co., Ltd.", + "21e1": "CAEN S.p.A.", + "222d": "Leifheit - Soehnle", + "2239": "PEIKER acustic GmbH & Co., KG", + "223b": "Crystalfontz America, Inc.", + "2252": "HBGIC Technology Co., Ltd.", + "2257": "On-The-Go-Video", + "22f4": "Olive V-ME102 CDMA modem", + "230a": "DataLocker", + "2312": "LP320B Wireless Presenter [August International]", + "231d": "VKB-sim", + "2321": "iKingdom Corp. [iConnectivity]", + "2342": "NIKO", + "2358": "NuTesla Composite HID+CDC", + "23e3": "Christie Digital Systems", + "23e5": "Antelope Audio", + "23e8": "Propellerhead", + "2472": "TOP", + "247f": "Lynx", + "2541": "Chipsailing", + "2550": "Shenzhen EDUP Electronics Technology Co., Ltd.", + "256c": "HUION", + "258a": " [USB chips by: Sino Wealth Electronic Ltd.]", + "25bc": "CETRTA POT", + "2622": "MISSION", + "263c": "SCHULTES", + "2660": "Test", + "2669": "M4S PSK Series Device [M4S PSK]", + "26ce": "ASRock", + "2752": "miniDSP", + "2763": "Primes GmbH", + "276d": "276d:1101", + "2853": "Ralston Instruments, LLC", + "28ba": "Materialise Motion NV", + "2909": "Game Golf Live", + "2914": "Kent Displays, Inc.", + "291a": "Anker Innovation Ltd", + "2947": "Kapelse", + "2982": "Ableton AG", + "2983": "Coyote System SAS", + "29cc": "Kodak Alaris, Inc", + "29df": "CAM(Dongle) [Freenet TV-Stick]", + "29f3": "Resonessence Labs", + "29fe": "Geo Semiconductor", + "2a52": "L Card, LLC", + "2a65": "FreeWave Technologies", + "2a94": "G2touch Co., LTD.", + "2afd": "McIntosh HD USB Audio [McIntosh DA1]", + "2b04": "Duo with WiFi and BLE", + "2b16": "Doccamera", + "2b71": "Flashforge [FlashForge Creator Pro 2 3D Printer]", + "2b73": "Pioneer DJ Corporation", + "2b89": "Unknown", + "2b98": "Glenair Inc.", + "2bd9": "Huddly", + "2beb": "Gateworks Corporation", + "2c33": "Wizapply", + "2cc8": "Hewlett Packard Enterprise", + "2ce4": "ESMART", + "2d01": "Guangdong Zike Technology Co., Ltd", + "2dbc": "Mikroelektronika d.o.o", + "2dee": "QUALCOMM MeigLink", + "2e3c": "Joy-IT", + "2e50": "beyerdynamic GmbH & Co. KG", + "2e8a": "Raspberry Pi Foundation", + "2ea1": "DASAN Electron Co", + "2eb9": "Realtek or Sabrent?", + "2efd": "Filco Co., Ltd.", + "2f68": "Hoksi Technology", + "2fd0": "C*Core Technology Co., Ltd.", + "2fe9": "Shenzhen Xintai Technology Co. Ltd", + "2fee": "Holitech", + "300c": "Gyrfalcon Technology Inc.", + "303a": "Espressif Systems", + "30b1": "Bitmain Technologies Inc.", + "30be": "Schiit Audio", + "30d6": "Chroma-Q", + "30de": "KIOXIA EXCERIA PLUS", + "30fa": "Wuxi Instant Microelectronics Co., Ltd.", + "311f": "TrustKey Co., Ltd.", + "3131": "Authentik Systems", + "3151": "Yichip Microelectronics (Hangzhou) Co., Ltd", + "31b1": "Shenzhen Jinduan Electronics Co., Ltd.", + "31b2": "KTMicro", + "31e3": "Wooting", + "31e9": "Solid State Logic, Ltd", + "320f": "Glorious LLC", + "3231": "Kneron, Inc.", + "3232": "Shenzhen Trusda Industrial Co., Ltd.", + "3274": "MicroArray", + "3285": "Nacon", + "3297": "ZSA Technology Labs Inc.", + "32a3": "GoTrust", + "32ac": "Framework Computer BV", + "32cd": "NEC", + "32e4": "ELP-USBFHD06H-BL36IR", + "32e6": "IcSpring Technology", + "332d": "Verbatim GmbH", + "3346": "Cvitek Co. Ltd.", + "335e": "Eight Amps", + "33be": "Syncopated Engineering, Inc.", + "33c4": "Tomahawk Robotics", + "33c8": "Seidl Technologies UG", + "33dd": "Zuki Inc", + "33f7": "Linux Automation GmbH", + "33f8": "Rolling Wireless S.a.r.l.", + "33ff": "nyantec GmbH", + "3434": "Keychron", + "344f": "SCX-3400 Series", + "3455": "Atomos Global Pty Ltd", + "345f": "MacroSilicon", + "3464": "Senscomm Semiconductor, Inc", + "346d": "VendorCo", + "346e": "Gudsen Technology (HK) Co., Ltd (MOZA)", + "349c": "Zhuhai Hongxin Technology Co., Ltd", + "349e": "Token2", + "3542": "Sonova Consumer Hearing", + "3544": "Rusoku technologijos UAB", + "3553": "PCsensor", + "359f": "Shenzhen Sipeed Technology Co., Ltd.", + "35b6": "Orqa d.o.o", + "35f0": "Bitcraze AB", + "35f1": "INFICON", + "369a": "HighSecLabs, Ltd", + "36da": "Record Sure Limited [Recordsure]", + "36e9": "ifanr Inc.", + "3760": "CIN-ergy B.V.", + "37c5": "OpenMV, LLC", + "3802": "LDA Technologies LTD", + "3817": "SleepImage", + "3842": "EVGA", + "386e": "XTX Markets", + "3876": "Fenice Power Co., Ltd", + "38c5": "JetHome LLC", + "3938": "MOSART Semiconductor", + "3c93": "QingDao Topscomm", + "413d": "RDing Technology Ltd [PCsensor]", + "4816": "Integrated Webcam", + "4c4a": "JieLi Technology", + "4e4c": "NieLTM TechSolution", + "5041": "Linksys (?)", + "5246": "bladeRF Software Defined Radio", + "5262": "X.Tips", + "5325": "Woolworth GmbH", + "573c": "Xreal Light Microcontroller", + "5888": "3Tronics MU30", + "6004": "ISD-V4 Tablet Pen", + "6005": "Hewlett-Packard", + "6495": "GoDEX International Co.", + "6964": "Idobo", + "7374": "DATA MODUL", + "7712": "2711 Temperature sensor HUB [SEIICHI]", + "7777": "SEIICHI Technology Co., Ltd.", + "8347": "VisTrend Co., Ltd.", + "8888": "inLight", + "9048": "NuTesla CDC Serial Emulator", + "9e8f": "Plug Computer Basic [SheevaPlug]", + "a69c": "AICSEMI", + "a8f8": "Bastard Keyboards", + "b2c3": "GNDHog", + "b711": "VuPlus", + "c069": "M500 Laser Mouse", + "c07c": "M-R0017 [G700s Rechargeable Gaming Mouse]", + "c0f4": "DualMiner", + "c580": "HID UNIKEYdongle [F-Response]", + "c5cb": "ARTECH (Artech Technology Design Co., Ltd.)", + "d13e": "Coldcard Wallet", + "dff0": "shapinb", + "e2b5": "JieLi Technology", + "e3b5": "JieLi Technology", + "e5b7": "JieLi Technology", + "eb57": "ZhuHai JieLi Technology", + "eba4": "Aoboco", + "feed": "DOIO Keyboard", + "ffd2": "ZHONG-HUI ELECTRONICS CORP.", + "fffe": "Inland (MicroCenter brand)", + "ffff": "Wrong vendor ID" +} diff --git a/online-data-tools/build_vendor_archive.py b/online-data-tools/build_vendor_archive.py new file mode 100644 index 00000000..703a5ec6 --- /dev/null +++ b/online-data-tools/build_vendor_archive.py @@ -0,0 +1,186 @@ +#!/usr/bin/env -S uv run --no-project --with zstandard --script +# /// script +# requires-python = ">=3.10" +# dependencies = ["zstandard"] +# /// +"""Package the merged USB-vendor catalog as `usb-vendors.tar.zst`. + +The archive contains a single flat-shape JSON file: + + usb-vendors.json: + {"303a": "Espressif Systems", "0483": "STMicroelectronics", ...} + +This is what `fbuild` embeds at compile time (via `include_bytes!`) to +resolve a USB VID to its vendor name without depending on the `usb-ids` +Rust crate. PID-level resolution is deliberately NOT in the archive — +clients go to the www-branch SQLite-over-HTTP for that. + +Input: a merged `usb-vid.json` in the per-VID schema used elsewhere in +this repo (`{vid: {"vendor": str, "products": [...]}}`); we drop the +products list when emitting the archive. + +Compression: zstd level 19 (high ratio, still fast to decompress in Rust +via the `zstd` crate). The whole thing should be a few KB after +compression — call it "build-time embeddable" without bloating the binary. +""" + +from __future__ import annotations + +import argparse +import io +import json +import re +import sys +import tarfile +from pathlib import Path + +try: + import zstandard as zstd +except ImportError as e: # pragma: no cover + raise SystemExit(f"zstandard missing — uv should auto-install. {e}") + + +# Bumped whenever the embedded archive shape changes — fbuild reads this +# alongside the data so it can refuse to load an incompatible blob. +SCHEMA_VERSION = 2 + +# In-archive payload format (v2): +# +# usb-vendors.txt: "vid:name,vid:name,vid:name,..." +# +# Where `vid` is the 4-hex-digit VID (lowercase) and `name` is the vendor +# name with `,` and `%` percent-escaped (RFC 3986 style, two upper-hex +# digits). Compact for embedding and "inflate on first use" in the Rust +# consumer — see build_archive() for the round-trip invariant and +# parse_compact() below for the reference inflater. +_ESCAPE_RE = re.compile(r"[%,]") +_UNESCAPE_RE = re.compile(r"%([0-9A-Fa-f]{2})") + + +def _esc(s: str) -> str: + return _ESCAPE_RE.sub(lambda m: f"%{ord(m.group(0)):02X}", s) + + +def _unesc(s: str) -> str: + return _UNESCAPE_RE.sub(lambda m: chr(int(m.group(1), 16)), s) + + +def flatten_vendors(usb_vid: dict) -> dict[str, str]: + """{vid: {"vendor": str, "products": [...]}} → {vid: vendor}. + + Skips entries whose vendor name is missing / blank so the consumer + never has to special-case an empty value. + """ + out: dict[str, str] = {} + for vid, entry in usb_vid.items(): + if not isinstance(entry, dict): + continue + v = entry.get("vendor") + if not isinstance(v, str) or not v.strip(): + continue + out[vid.lower()] = v.strip() + return dict(sorted(out.items())) + + +def pack_compact(vendors: dict[str, str]) -> str: + """{vid: name} -> 'vid:name,vid:name,...' with %-escaped name fields. + + Round-trip invariant: parse_compact(pack_compact(v)) == v for any v + where keys are lowercase 4-hex VIDs and values are arbitrary unicode + strings (commas and percent signs are safely escaped). + """ + return ",".join( + f"{vid}:{_esc(name)}" for vid, name in sorted(vendors.items()) + ) + + +def parse_compact(s: str) -> dict[str, str]: + """Reference inflater (also used to assert the round-trip in tests). + + The Rust side (`fbuild-core::usb_vendor_db`) implements the same + parser — keep these two in lock-step on any format change. + """ + out: dict[str, str] = {} + if not s: + return out + for chunk in s.split(","): + if not chunk: + continue + vid, sep, name_esc = chunk.partition(":") + if not sep: + continue + out[vid] = _unesc(name_esc) + return out + + +def build_archive(*, vendors: dict[str, str], generated_at: str) -> bytes: + """Return the raw bytes of `usb-vendors.tar.zst`. + + The tar contains: + - usb-vendors.txt (compact `vid:name,vid:name,...` per pack_compact) + - manifest.json (schema_version + generated_at + entry count) + """ + payload = pack_compact(vendors).encode("utf-8") + manifest = json.dumps({ + "schema_version": SCHEMA_VERSION, + "generated_at": generated_at, + "entries": len(vendors), + "filename": "usb-vendors.txt", + "format": "compact-csv-v1", + "format_doc": ( + "ASCII: 'vid:name,vid:name,...'. `vid` is 4-hex-digit lowercase. " + "`name` is %-escaped per RFC-3986 (chars ',' and '%' only)." + ), + }, ensure_ascii=False).encode("utf-8") + + tar_buf = io.BytesIO() + with tarfile.open(fileobj=tar_buf, mode="w") as tf: + for name, blob in (("usb-vendors.txt", payload), + ("manifest.json", manifest)): + info = tarfile.TarInfo(name=name) + info.size = len(blob) + info.mtime = 0 # deterministic — byte-identical archive for unchanged input + tf.addfile(info, io.BytesIO(blob)) + raw = tar_buf.getvalue() + + cctx = zstd.ZstdCompressor(level=19) + return cctx.compress(raw) + + +def main() -> int: + p = argparse.ArgumentParser(description=__doc__) + p.add_argument("--upstream", required=True, type=Path, + help="Merged usb-vid.json (per-VID schema with products).") + p.add_argument("--out", required=True, type=Path, + help="Output `.tar.zst` path. Overwritten if present.") + p.add_argument("--generated-at", + help="UTC timestamp embedded in manifest.json. Defaults to now.") + args = p.parse_args() + + if not args.upstream.is_file(): + print(f"error: {args.upstream} not found", file=sys.stderr) + return 2 + + upstream = json.loads(args.upstream.read_text(encoding="utf-8")) + vendors = flatten_vendors(upstream) + if not vendors: + print(f"error: no vendor entries found in {args.upstream}", file=sys.stderr) + return 2 + + import datetime as _dt + ts = args.generated_at or _dt.datetime.now(_dt.timezone.utc).strftime( + "%Y-%m-%dT%H:%M:%SZ" + ) + + blob = build_archive(vendors=vendors, generated_at=ts) + args.out.parent.mkdir(parents=True, exist_ok=True) + args.out.write_bytes(blob) + print( + f"wrote {args.out}: {len(vendors)} vendors, " + f"{len(blob)} bytes (zstd 19, schema={SCHEMA_VERSION})" + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/online-data-tools/keep.py b/online-data-tools/keep.py new file mode 100644 index 00000000..91a83408 --- /dev/null +++ b/online-data-tools/keep.py @@ -0,0 +1,59 @@ +#!/usr/bin/env -S uv run --no-project --script +# /// script +# requires-python = ">=3.10" +# /// +"""Collapse a VID's name list in ids3.json down to one chosen entry. + + keep.py + +`` matches the 4-hex-digit key; `` is 0-based into the +existing list at that VID. The chosen name becomes the entry's sole +member (still wrapped in a list so the file shape stays consistent). + +Designed for the iterative dedupe loop driven by `next_dual.py`. Both +files write the same indent=2, sort-by-vid JSON shape so diffs stay +minimal between steps. +""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + + +def main() -> int: + p = argparse.ArgumentParser(description=__doc__) + p.add_argument("vid") + p.add_argument("index", type=int) + p.add_argument("--input", default="ids3.json", type=Path) + args = p.parse_args() + + data = json.loads(args.input.read_text(encoding="utf-8")) + vid = args.vid.lower() + if vid not in data: + print(f"error: vid {vid!r} not in {args.input}", file=sys.stderr) + return 2 + entry = data[vid] + if not isinstance(entry, list): + print(f"error: {vid} entry is not a list ({type(entry).__name__})", + file=sys.stderr) + return 2 + if not 0 <= args.index < len(entry): + print(f"error: index {args.index} out of range for {vid} " + f"(len={len(entry)})", file=sys.stderr) + return 2 + + kept = entry[args.index] + data[vid] = [kept] + args.input.write_text( + json.dumps(dict(sorted(data.items())), indent=2, ensure_ascii=False) + "\n", + encoding="utf-8", + ) + print(f"{vid}: kept [{args.index}] {kept!r}; dropped {len(entry) - 1} other(s)") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/online-data-tools/next_dual.py b/online-data-tools/next_dual.py new file mode 100644 index 00000000..ec87e860 --- /dev/null +++ b/online-data-tools/next_dual.py @@ -0,0 +1,40 @@ +#!/usr/bin/env -S uv run --no-project --script +# /// script +# requires-python = ">=3.10" +# /// +"""Print the first VID in a list-shaped IDs JSON that has >=2 candidate +vendor names. Companion to `keep.py` — together they implement the manual +dedupe loop the user asked for: + + next_dual.py # shows the next multi-entry to triage + keep.py # collapses that entry to a single chosen name + next_dual.py # … repeat until empty + +Exits 0 with an empty stdout when no multi-entries remain. +""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + + +def main() -> int: + p = argparse.ArgumentParser(description=__doc__) + p.add_argument("--input", default="ids3.json", type=Path) + args = p.parse_args() + data = json.loads(args.input.read_text(encoding="utf-8")) + for vid in sorted(data): + v = data[vid] + if isinstance(v, list) and len(v) >= 2: + payload = {"vid": vid, "names": v} + print(json.dumps(payload, indent=2, ensure_ascii=False)) + return 0 + # No multi-entries left. + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/online-data-tools/overlay_usb_vid.py b/online-data-tools/overlay_usb_vid.py index 9dc8c94a..fa1e8dc5 100644 --- a/online-data-tools/overlay_usb_vid.py +++ b/online-data-tools/overlay_usb_vid.py @@ -25,17 +25,41 @@ from pathlib import Path -def overlay(upstream: dict, supplement: dict) -> tuple[dict, int]: - """Return (merged_dict, added_vid_count). Upstream is NOT mutated.""" - out = dict(upstream) - added = 0 - for vid, entry in supplement.items(): - if vid in out: +def overlay( + upstream: dict, supplement: dict, *, mode: str = "gap-fill" +) -> tuple[dict, int]: + """Return (merged_dict, changed_vid_count). Upstream is NOT mutated. + + Modes: + - "gap-fill" (default): supplement is consulted only for VIDs missing + from upstream. Use when the supplement is a less-authoritative + gap-filler (live scrape, second-tier source, etc.). + - "vendor-override": supplement is the HIGHER-authority source for + vendor names. For any VID present in both, the supplement's vendor + name replaces upstream's, but the upstream products list is kept + untouched. For VIDs only in supplement, the entry is added verbatim + (products usually empty). This is what the workflow uses for the + curated `vendor_names_inlined.py` supplement. + + `changed_vid_count` is the number of VIDs added OR vendor-renamed. + """ + if mode not in ("gap-fill", "vendor-override"): + raise ValueError(f"unknown mode: {mode!r}") + out = {k: dict(v) for k, v in upstream.items()} # deep-ish copy + changed = 0 + for vid, sup_entry in supplement.items(): + if vid not in out: + out[vid] = dict(sup_entry) + changed += 1 + continue + if mode == "gap-fill": continue - out[vid] = entry - added += 1 - # Sort by VID so the JSON diff stays stable across runs. - return dict(sorted(out.items())), added + # vendor-override: replace name, keep products intact. + sup_name = sup_entry.get("vendor") + if sup_name and out[vid].get("vendor") != sup_name: + out[vid]["vendor"] = sup_name + changed += 1 + return dict(sorted(out.items())), changed def main() -> int: @@ -43,6 +67,9 @@ def main() -> int: p.add_argument("--upstream", required=True, type=Path) p.add_argument("--supplement", required=True, type=Path) p.add_argument("--out", required=True, type=Path) + p.add_argument("--mode", default="gap-fill", + choices=("gap-fill", "vendor-override"), + help="See overlay() docstring for semantics.") args = p.parse_args() upstream = json.loads(args.upstream.read_text(encoding="utf-8")) @@ -50,14 +77,15 @@ def main() -> int: print(f"no supplement at {args.supplement} — nothing to overlay") return 0 supplement = json.loads(args.supplement.read_text(encoding="utf-8")) - merged, added = overlay(upstream, supplement) + merged, changed = overlay(upstream, supplement, mode=args.mode) args.out.write_text( json.dumps(merged, indent=2, sort_keys=False) + "\n", encoding="utf-8", ) print( - f"overlaid {args.supplement.name} onto {args.upstream.name}: " - f"+{added} VID(s), total={len(merged)}" + f"overlaid {args.supplement.name} onto {args.upstream.name} " + f"(mode={args.mode}): {changed} VID(s) added or renamed, " + f"total={len(merged)}" ) return 0 diff --git a/online-data-tools/scrape_ids.py b/online-data-tools/scrape_ids.py new file mode 100644 index 00000000..98d01b41 --- /dev/null +++ b/online-data-tools/scrape_ids.py @@ -0,0 +1,328 @@ +#!/usr/bin/env -S uv run --no-project --script +# /// script +# requires-python = ">=3.10" +# dependencies = ["requests", "beautifulsoup4"] +# /// +"""Scrape vendor names for a list of USB VIDs from usb-ids.gowdy.us. + +Reads a flat list of 4-hex-digit VIDs (one per line, whitespace-trimmed) +from `--input`, fetches https://usb-ids.gowdy.us/read/UD/ for each, +parses the page with BeautifulSoup, and writes/updates a JSON mapping +`{vid_lower_hex: vendor_name_string}` to `--output`. + +Behavior: + +- **Single-threaded** with a polite 0.5 s base delay between requests. +- **Incremental save**: after each successful scrape the JSON is rewritten, + so a Ctrl-C never loses prior work. Re-running resumes where we left off + (entries already in the JSON are skipped unless `--refetch` is passed). +- **Vendor-not-found** pages → "not found". +- **HTTP 404 / other transient errors** → "error". +- **Exponential backoff** (1 → 2 → 4 → 8 → 16 → 32 → 60 s, capped) on + network errors and 5xx; the request is retried up to 5 times before + giving up with "error". +- **Fail2ban probe**: after 3 consecutive 404s, we pause and re-fetch a + list of canary VIDs (known-good entries from a recent run). If the + canaries also 404, we assume rate-limit / IP-block and back off 5 min + before continuing; if they succeed, the 404s were real and we continue. + +Run: + scrape_ids.py --input ids.txt --output ids.json +""" + +from __future__ import annotations + +import argparse +import json +import re +import ssl +import sys +import time +import urllib.error +import urllib.request +from pathlib import Path +from typing import Callable + +try: + from bs4 import BeautifulSoup # type: ignore # noqa: F401 (kept for fallback) +except ImportError as e: # pragma: no cover — uv installs it from PEP 723 + raise SystemExit(f"bs4 missing — uv should auto-install. {e}") + + +BASE_URL = "https://usb-ids.gowdy.us/read/UD" + +# Known-good canary VIDs, used to detect fail2ban / IP-block when we see +# a burst of 404s. These are deliberately stable, well-known vendors. +CANARY_VIDS = ("303a", "0483", "10c4", "1a86", "2341", "239a", "16c0") + +# Polite delay between successful requests, in seconds. +BASE_DELAY = 0.5 + +# Backoff schedule for retries (caps at 60 s per spec). +BACKOFF_STEPS = (1, 2, 4, 8, 16, 32, 60) + +# Number of consecutive 404s that triggers a canary probe. +CANARY_TRIGGER = 3 + +# Sleep duration when fail2ban is suspected, in seconds. +FAIL2BAN_SLEEP = 300 + + +def _make_ssl_ctx() -> ssl.SSLContext: + # gowdy.us has had self-signed / expired-cert episodes in the past. + # The scraped page is structural / public, so accept the cert. + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + return ctx + + +def _fetch_html(url: str, *, timeout: float = 30.0) -> tuple[int, str]: + """Return (status_code, body) for the URL. Raises on network errors.""" + req = urllib.request.Request(url, headers={ + "User-Agent": "fbuild-bot/1.0 (+https://github.com/FastLED/fbuild)", + "Accept": "text/html", + }) + try: + with urllib.request.urlopen(req, timeout=timeout, context=_make_ssl_ctx()) as resp: + return resp.status, resp.read().decode("utf-8", errors="replace") + except urllib.error.HTTPError as e: + # 4xx and 5xx come back here. Read the body anyway so the caller can + # decide what to log; the status is what matters for the policy. + try: + body = e.read().decode("utf-8", errors="replace") + except Exception: + body = "" + return e.code, body + + +# The vendor name on a /read/UD/ page lives in: +#
+#

Discussion

+#
+#

Name: +#

Bertold +#

2021-11-10 14:17:35 +# +# The `

` tags use HTML 4.01 implicit-close style. BeautifulSoup's +# html.parser concatenates the children, polluting the captured name +# with the author + timestamp. Regex against the raw HTML stops at the +# next `<` (the next `

` opening), which is exactly what we want. +_VENDOR_NAME_RE = re.compile( + r"\s*Name:\s*(.+?)\s*(?:<|$)", + re.IGNORECASE | re.DOTALL, +) + + +def parse_vendor_name(html: str) -> str | None: + """First-match vendor name (legacy single-value mode).""" + m = _VENDOR_NAME_RE.search(html) + if not m: + return None + name = m.group(1).strip() + return name or None + + +def parse_all_names(html: str) -> list[str]: + """Every `Name: ...` row on the page, in document order, de-duped while + preserving first-seen order. Used by --all-names mode where the caller + wants to see every submission / revision the gowdy.us page carries for + a given VID.""" + seen: set[str] = set() + out: list[str] = [] + for m in _VENDOR_NAME_RE.finditer(html): + name = m.group(1).strip() + if not name or name in seen: + continue + seen.add(name) + out.append(name) + return out + + +def scrape_one( + vid: str, + *, + all_names: bool = False, + fetch: Callable[[str], tuple[int, str]] = _fetch_html, +) -> str | list[str]: + """Return the vendor verdict for a single VID. + + Modes: + - single (default): returns the first-match vendor name as `str`, or + "not found" / "error". + - all_names=True: returns every `Name:` row as `list[str]`. Empty + list = page loaded but had no names. `["error"]` + sentinel = HTTP/network failure. + + A 404 returns "error" immediately (no retries — the resource genuinely + doesn't exist; the caller looks at consecutive 404 counts to decide + whether to canary-probe). 5xx + network errors retry with exponential + backoff per BACKOFF_STEPS. + """ + url = f"{BASE_URL}/{vid.upper()}" + for attempt, sleep_seconds in enumerate((0, *BACKOFF_STEPS), start=1): + if sleep_seconds: + time.sleep(sleep_seconds) + try: + status, body = fetch(url) + except (urllib.error.URLError, TimeoutError, ConnectionError) as e: + print(f" attempt {attempt} {url}: network error: {e}", file=sys.stderr) + continue + if status == 200: + if all_names: + return parse_all_names(body) + name = parse_vendor_name(body) + return name if name else "not found" + if status == 404: + return ["error"] if all_names else "error" + # 5xx etc — retry with the next backoff step. + print(f" attempt {attempt} {url}: HTTP {status}", file=sys.stderr) + return ["error"] if all_names else "error" + + +def canary_probe(*, fetch: Callable[[str], tuple[int, str]] = _fetch_html) -> bool: + """Re-fetch a small set of known-good VIDs to decide whether the + server is genuinely 404-ing or whether we've been IP-blocked. + + Returns True if at least one canary returns 200 (we are NOT blocked), + False if every canary 404s (probably fail2ban). + """ + print("canary probe: checking known-good VIDs…", file=sys.stderr) + for vid in CANARY_VIDS: + try: + status, _body = fetch(f"{BASE_URL}/{vid.upper()}") + except Exception as e: + print(f" canary {vid}: {e}", file=sys.stderr) + continue + print(f" canary {vid}: HTTP {status}", file=sys.stderr) + if status == 200: + return True + time.sleep(BASE_DELAY) + return False + + +def load_ids(path: Path) -> list[str]: + """Parse the input file. Tolerates trailing whitespace / tabs per line.""" + out: list[str] = [] + for raw in path.read_text(encoding="utf-8").splitlines(): + tok = raw.strip().split()[0] if raw.strip() else "" + tok = tok.strip().lower() + if not tok: + continue + if not re.fullmatch(r"[0-9a-f]{1,4}", tok): + print(f"warning: skipping malformed line: {raw!r}", file=sys.stderr) + continue + out.append(tok.zfill(4)) + return out + + +def load_resume(path: Path) -> dict: + """Returns a dict of either {vid: str} (single mode) or {vid: list[str]} + (all-names mode); the value shape is preserved verbatim from disk.""" + if not path.is_file(): + return {} + try: + data = json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError: + print(f"warning: {path} not valid JSON — starting fresh", file=sys.stderr) + return {} + if not isinstance(data, dict): + return {} + return {str(k).lower(): v for k, v in data.items()} + + +def save(path: Path, data: dict) -> None: + path.write_text( + json.dumps(dict(sorted(data.items())), indent=2, ensure_ascii=False) + "\n", + encoding="utf-8", + ) + + +def _is_error(verdict) -> bool: + """Treat both the single- and list-mode error sentinels as 'error' + for the consecutive-404 / canary policy.""" + return verdict == "error" or verdict == ["error"] + + +def _bucket(verdict) -> str: + if _is_error(verdict): + return "error" + if verdict == "not found": + return "not found" + if isinstance(verdict, list) and not verdict: + return "not found" + return "named" + + +def main() -> int: + p = argparse.ArgumentParser(description=__doc__) + p.add_argument("--input", default="ids.txt", type=Path) + p.add_argument("--output", default="ids.json", type=Path) + p.add_argument("--all-names", action="store_true", + help="Collect every `Name:` row per page and write values " + "as `list[str]`. Default emits the first match as a " + "single string.") + p.add_argument("--refetch", action="store_true", + help="Ignore existing entries in --output and re-scrape every VID.") + p.add_argument("--delay", type=float, default=BASE_DELAY, + help="Polite delay between successful requests, in seconds.") + args = p.parse_args() + + vids = load_ids(args.input) + results: dict = {} if args.refetch else load_resume(args.output) + + todo = [v for v in vids if v not in results] + print(f"input: {len(vids)} VID(s), {len(todo)} to scrape " + f"(resuming {len(results)}, mode={'all-names' if args.all_names else 'single'})", + file=sys.stderr) + + consecutive_404 = 0 + for i, vid in enumerate(todo, start=1): + url = f"{BASE_URL}/{vid.upper()}" + print(f"[{i}/{len(todo)}] {url}", file=sys.stderr) + verdict = scrape_one(vid, all_names=args.all_names) + results[vid] = verdict + save(args.output, results) + + if _is_error(verdict): + consecutive_404 += 1 + else: + consecutive_404 = 0 + + if consecutive_404 >= CANARY_TRIGGER: + if canary_probe(): + print(f" canaries OK — the 404 streak is genuine, continuing", + file=sys.stderr) + consecutive_404 = 0 + else: + print(f" every canary 404'd — assuming fail2ban; sleeping " + f"{FAIL2BAN_SLEEP}s before continuing", file=sys.stderr) + time.sleep(FAIL2BAN_SLEEP) + # After the cooldown, retry the canary once more. If still + # bad, exit so a human can investigate rather than burning + # the whole list against a blocked endpoint. + if not canary_probe(): + print("ERROR: canaries still blocked after cooldown — exiting", + file=sys.stderr) + return 2 + consecutive_404 = 0 + + time.sleep(args.delay) + + # Summary. + counts: dict[str, int] = {} + for v in results.values(): + b = _bucket(v) + counts[b] = counts.get(b, 0) + 1 + print( + f"\ndone. {len(results)} total: " + f"named={counts.get('named', 0)} " + f"not_found={counts.get('not found', 0)} " + f"error={counts.get('error', 0)}", + file=sys.stderr, + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/online-data-tools/test_archive.py b/online-data-tools/test_archive.py new file mode 100644 index 00000000..020c93a1 --- /dev/null +++ b/online-data-tools/test_archive.py @@ -0,0 +1,148 @@ +#!/usr/bin/env -S uv run --no-project --with pytest --with zstandard --script +# /// script +# requires-python = ">=3.10" +# dependencies = ["pytest", "zstandard"] +# /// +"""Tests for build_vendor_archive: tar.zst packaging of the flat vendor map. + +The archive shape is a contract: `fbuild-core` will `include_bytes!` the +output, decompress with the `zstd` crate, untar with `tar`, and parse +`usb-vendors.json`. Any drift here breaks fbuild's USB-name lookup. +""" + +from __future__ import annotations + +import io +import json +import sys +import tarfile +from pathlib import Path + +import pytest +import zstandard as zstd + +HERE = Path(__file__).resolve().parent +sys.path.insert(0, str(HERE)) +import build_vendor_archive # noqa: E402 + + +SAMPLE_USB_VID = { + "303a": {"vendor": "Espressif Systems", "products": [["4002", "ESP32-S3"]]}, + "10c4": {"vendor": "Silicon Labs", "products": [["ea60", "CP210x"]]}, + "0403": {"vendor": "FTDI", "products": [["6001", "FT232"]]}, + "dead": {"vendor": "", "products": []}, # blank → skipped + "BEEF": {"vendor": "Mixed Case In", "products": []}, # key lowered +} + + +def _extract(blob: bytes) -> dict[str, bytes]: + """Decompress + untar `blob` → {filename: file_bytes}.""" + raw = zstd.ZstdDecompressor().decompress(blob) + out: dict[str, bytes] = {} + with tarfile.open(fileobj=io.BytesIO(raw), mode="r") as tf: + for member in tf.getmembers(): + f = tf.extractfile(member) + assert f is not None + out[member.name] = f.read() + return out + + +def test_flatten_drops_blank_vendor_and_lowercases_keys() -> None: + flat = build_vendor_archive.flatten_vendors(SAMPLE_USB_VID) + assert flat == { + "0403": "FTDI", + "10c4": "Silicon Labs", + "303a": "Espressif Systems", + "beef": "Mixed Case In", + } + + +def test_pack_compact_round_trip() -> None: + """Round-trip the compact format through pack + parse for tricky inputs.""" + tricky = { + "0001": "plain ascii", + "0002": "comma, in, the, name", # commas must be escaped + "0003": "percent 100% off", # literal % must be escaped + "0004": "both 50%, off", # both + "0005": "unicode emdash — and é", # arbitrary unicode passes through + "0006": "", # empty value preserved + "0007": "trailing %25 literal", # literal "%25" in input must survive + } + packed = build_vendor_archive.pack_compact(tricky) + # No bare comma can appear inside a name field. + for chunk in packed.split(","): + vid, sep, name = chunk.partition(":") + assert sep == ":", f"chunk missing colon: {chunk!r}" + assert "," not in name, f"raw comma leaked into name: {chunk!r}" + recovered = build_vendor_archive.parse_compact(packed) + assert recovered == tricky + + +def test_pack_compact_handles_empty() -> None: + assert build_vendor_archive.pack_compact({}) == "" + assert build_vendor_archive.parse_compact("") == {} + + +def test_archive_round_trip_decompress() -> None: + flat = build_vendor_archive.flatten_vendors(SAMPLE_USB_VID) + blob = build_vendor_archive.build_archive( + vendors=flat, generated_at="2026-06-21T00:00:00Z", + ) + files = _extract(blob) + # Two well-known files inside. + assert set(files) == {"usb-vendors.txt", "manifest.json"} + # Compact payload round-trips through parse_compact. + recovered = build_vendor_archive.parse_compact( + files["usb-vendors.txt"].decode("utf-8") + ) + assert recovered == flat + # Manifest carries the contract metadata. + manifest = json.loads(files["manifest.json"]) + assert manifest["schema_version"] == build_vendor_archive.SCHEMA_VERSION + assert manifest["entries"] == len(flat) + assert manifest["generated_at"] == "2026-06-21T00:00:00Z" + assert manifest["filename"] == "usb-vendors.txt" + assert manifest["format"] == "compact-csv-v1" + + +def test_archive_is_deterministic_for_same_input() -> None: + """zstd is deterministic given identical input; tarfile gets mtime=0 to + match. Same vendors + same timestamp => byte-identical archive (so git + sees no diff on no-op nightly runs).""" + flat = build_vendor_archive.flatten_vendors(SAMPLE_USB_VID) + a = build_vendor_archive.build_archive(vendors=flat, generated_at="X") + b = build_vendor_archive.build_archive(vendors=flat, generated_at="X") + assert a == b + + +def test_main_emits_file(tmp_path: Path) -> None: + src = tmp_path / "usb-vid.json" + src.write_text(json.dumps(SAMPLE_USB_VID), encoding="utf-8") + out = tmp_path / "vendors.tar.zst" + sys.argv = [ + "build_vendor_archive.py", + "--upstream", str(src), + "--out", str(out), + "--generated-at", "2026-06-21T00:00:00Z", + ] + rc = build_vendor_archive.main() + assert rc == 0 + assert out.stat().st_size > 0 + files = _extract(out.read_bytes()) + flat = build_vendor_archive.parse_compact(files["usb-vendors.txt"].decode("utf-8")) + assert "303a" in flat and flat["303a"] == "Espressif Systems" + + +def test_main_rejects_empty_input(tmp_path: Path) -> None: + src = tmp_path / "empty.json" + src.write_text(json.dumps({"dead": {"vendor": "", "products": []}}), + encoding="utf-8") + out = tmp_path / "x.tar.zst" + sys.argv = ["build_vendor_archive.py", + "--upstream", str(src), "--out", str(out)] + rc = build_vendor_archive.main() + assert rc == 2 # refuse to write an empty archive + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-v"])) diff --git a/online-data-tools/test_gowdy.py b/online-data-tools/test_gowdy.py index 8b0753d3..cefda5cb 100644 --- a/online-data-tools/test_gowdy.py +++ b/online-data-tools/test_gowdy.py @@ -130,7 +130,7 @@ def fetch(url: str) -> str: # overlay_usb_vid # --------------------------------------------------------------------------- # -def test_overlay_adds_missing_vids_only() -> None: +def test_overlay_gap_fill_adds_missing_vids_only() -> None: upstream = { "10c4": {"vendor": "Silicon Labs", "products": [["ea60", "CP210x"]]}, } @@ -138,8 +138,8 @@ def test_overlay_adds_missing_vids_only() -> None: "303a": {"vendor": "Espressif Systems", "products": [["4002", ""]]}, "10c4": {"vendor": "WRONG NAME", "products": [["dead", "bad"]]}, # must NOT win } - merged, added = overlay_usb_vid.overlay(upstream, supplement) - assert added == 1 + merged, changed = overlay_usb_vid.overlay(upstream, supplement, mode="gap-fill") + assert changed == 1 # Upstream wins for 10c4 — no merging of name OR products. assert merged["10c4"]["vendor"] == "Silicon Labs" assert merged["10c4"]["products"] == [["ea60", "CP210x"]] @@ -149,10 +149,61 @@ def test_overlay_adds_missing_vids_only() -> None: assert list(merged.keys()) == ["10c4", "303a"] +def test_overlay_vendor_override_replaces_name_keeps_products() -> None: + """In vendor-override mode the supplement is the higher-authority source + for the vendor NAME but never disturbs the upstream products list.""" + upstream = { + "10c4": { + "vendor": "Silicon Labs", + "products": [["ea60", "CP210x"], ["ea71", "CP2102N"]], + }, + "0403": {"vendor": "Future Technology Devices", "products": [["6001", "FT232"]]}, + } + supplement = { + "10c4": {"vendor": "Silicon Laboratories Inc.", "products": []}, + "303a": {"vendor": "Espressif Systems", "products": []}, + } + merged, changed = overlay_usb_vid.overlay( + upstream, supplement, mode="vendor-override", + ) + # 10c4 renamed; 303a added → 2 changes. + assert changed == 2 + # Renamed vendor; products preserved verbatim. + assert merged["10c4"]["vendor"] == "Silicon Laboratories Inc." + assert merged["10c4"]["products"] == [["ea60", "CP210x"], ["ea71", "CP2102N"]] + # Untouched upstream entry stays as-is. + assert merged["0403"]["vendor"] == "Future Technology Devices" + # New entry added (products empty because supplement is vendor-only). + assert merged["303a"]["vendor"] == "Espressif Systems" + assert merged["303a"]["products"] == [] + + +def test_overlay_vendor_override_skips_when_name_unchanged() -> None: + """If the supplement repeats the upstream name verbatim, no change.""" + upstream = {"10c4": {"vendor": "Silicon Labs", "products": []}} + supplement = {"10c4": {"vendor": "Silicon Labs", "products": []}} + _merged, changed = overlay_usb_vid.overlay( + upstream, supplement, mode="vendor-override", + ) + assert changed == 0 + + +def test_overlay_invalid_mode_rejected() -> None: + with pytest.raises(ValueError): + overlay_usb_vid.overlay({}, {}, mode="bogus") + + def test_overlay_does_not_mutate_input() -> None: upstream = {"10c4": {"vendor": "Silicon Labs", "products": []}} overlay_usb_vid.overlay(upstream, {"303a": {"vendor": "X", "products": []}}) assert "303a" not in upstream + # vendor-override case + upstream2 = {"10c4": {"vendor": "Silicon Labs", "products": []}} + overlay_usb_vid.overlay( + upstream2, {"10c4": {"vendor": "Other", "products": []}}, + mode="vendor-override", + ) + assert upstream2["10c4"]["vendor"] == "Silicon Labs" def test_overlay_main_emits_file(tmp_path: Path) -> None: diff --git a/online-data-tools/test_inlined.py b/online-data-tools/test_inlined.py new file mode 100644 index 00000000..da539afa --- /dev/null +++ b/online-data-tools/test_inlined.py @@ -0,0 +1,121 @@ +#!/usr/bin/env -S uv run --no-project --with pytest --script +# /// script +# requires-python = ">=3.10" +# dependencies = ["pytest"] +# /// +"""Tests for vendor_names_inlined.py — the canonical curated overlay. + +Locks in the known-critical VID -> vendor name pairs that motivated +introducing the overlay in the first place (issue #718): 0x303A +(Espressif), 0x2E8A (Raspberry Pi Foundation), plus several other +common-MCU VIDs referenced by mcu_to_vid.json. If anyone removes one +of these from the inlined dict the headline VID:PID -> board query on +the www page silently breaks, so the regression here is intentional. + +Also asserts shape invariants: keys are 4-hex-digit lowercase, values +are non-empty strings, no entry duplicates, no HTML entities or NBSP +characters slipped past the curation pipeline. +""" + +from __future__ import annotations + +import json +import re +import sys +from pathlib import Path + +import pytest + +HERE = Path(__file__).resolve().parent +sys.path.insert(0, str(HERE)) +import vendor_names_inlined # noqa: E402 + + +_HEX4_LOWER = re.compile(r"^[0-9a-f]{4}$") + + +# --------------------------------------------------------------------------- # +# Critical-VID locks (will fail loudly if a curator drops them). +# --------------------------------------------------------------------------- # + +@pytest.mark.parametrize("vid, expected_substring", [ + ("303a", "Espressif"), + ("2e8a", "Raspberry Pi"), + ("1b4f", "SparkFun"), + ("2914", "Kent"), # Kent Displays + ("ffff", "Wrong vendor ID"), # all-bits-set sentinel +]) +def test_critical_vids_present(vid: str, expected_substring: str) -> None: + assert vid in vendor_names_inlined.VENDOR_NAMES, ( + f"critical VID 0x{vid} dropped from inlined overlay; restore from " + f"ids4.json" + ) + name = vendor_names_inlined.VENDOR_NAMES[vid] + assert expected_substring.lower() in name.lower(), ( + f"0x{vid} now maps to {name!r}, expected substring {expected_substring!r}" + ) + + +# --------------------------------------------------------------------------- # +# Shape invariants +# --------------------------------------------------------------------------- # + +def test_all_keys_are_4_hex_lower() -> None: + bad = [k for k in vendor_names_inlined.VENDOR_NAMES if not _HEX4_LOWER.match(k)] + assert not bad, f"keys must be 4-hex-digit lowercase; bad: {bad[:10]}" + + +def test_all_values_are_non_empty_strings() -> None: + bad = { + k: v for k, v in vendor_names_inlined.VENDOR_NAMES.items() + if not (isinstance(v, str) and v.strip()) + } + assert not bad, f"values must be non-empty strings; bad: {list(bad.items())[:5]}" + + +def test_no_html_entities_or_nbsp_survive() -> None: + """The curation pipeline html.unescape()s and NFKCs all values; if any + entity-encoded or NBSP-containing strings re-appear, we regressed.""" + entity_re = re.compile(r"&(amp|lt|gt|quot|apos|#\d+|#x[0-9a-fA-F]+);") + bad = [] + for k, v in vendor_names_inlined.VENDOR_NAMES.items(): + if "\xa0" in v or entity_re.search(v): + bad.append((k, v)) + assert not bad, f"raw HTML entities / NBSP survived curation: {bad[:5]}" + + +def test_no_duplicate_keys_or_blank_entries() -> None: + keys = list(vendor_names_inlined.VENDOR_NAMES.keys()) + assert len(set(keys)) == len(keys), "duplicate keys (post-dict — impossible?)" + # Round-trip via JSON to catch any non-serializable garbage that + # snuck in via copy-paste. + blob = json.dumps(vendor_names_inlined.VENDOR_NAMES, ensure_ascii=False) + rt = json.loads(blob) + assert rt == vendor_names_inlined.VENDOR_NAMES + + +# --------------------------------------------------------------------------- # +# as_supplement() — the overlay-compatible export +# --------------------------------------------------------------------------- # + +def test_as_supplement_shape() -> None: + sup = vendor_names_inlined.as_supplement() + assert isinstance(sup, dict) + assert len(sup) == len(vendor_names_inlined.VENDOR_NAMES) + # Spot-check the 303a entry shape matches usb-vid.json. + e = sup["303a"] + assert e == {"vendor": "Espressif Systems", "products": []} + + +def test_as_supplement_main_writes_overlay_file(tmp_path: Path) -> None: + out = tmp_path / "inlined.json" + sys.argv = ["vendor_names_inlined.py", "--out", str(out)] + rc = vendor_names_inlined.main() + assert rc == 0 + data = json.loads(out.read_text(encoding="utf-8")) + assert "303a" in data and data["303a"]["vendor"] == "Espressif Systems" + assert data["303a"]["products"] == [] + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-v"])) diff --git a/online-data-tools/vendor_names_inlined.py b/online-data-tools/vendor_names_inlined.py new file mode 100644 index 00000000..1500513f --- /dev/null +++ b/online-data-tools/vendor_names_inlined.py @@ -0,0 +1,330 @@ +#!/usr/bin/env -S uv run --no-project --script +# /// script +# requires-python = ">=3.10" +# /// +"""Inlined USB-vendor-name supplement, curated from usb-ids.gowdy.us. + +This file is the canonical source of the VID -> vendor-name overlay we ship +to fill gaps in the public usb.ids text databases (Rust `usb-ids` crate, +linux-usb.org, Fedora hwdata) — notably 0x303A Espressif, 0x2E8A Raspberry +Pi Foundation, and ~250 other newer VIDs. + +Curation history is preserved in the repo: + - `ids.txt` — input list of VIDs missing from the upstream sources + - `ids.json` — first-pass scrape (single first-match name per VID) + - `ids2.json` — second-pass scrape (every `Name:` revision per VID) + - `ids3.json` — manually triaged dedupe of multi-name entries + - `ids4.json` — flattened {vid: name} after dedupe + - `vendor_names_inlined.py` (this file) — generated from ids4.json, + committed as a Python literal so the workflow does not need to hit + gowdy.us live every nightly run. + +To regenerate after editing ids4.json: + + uv run --no-project --script - <<'EOF' + import json + d = json.load(open("ids4.json")) + ... # see header comment in the generator + EOF + +CLI: emit the inlined data as a `usb-vid.json`-shaped JSON dict so +`overlay_usb_vid.py` can union it onto the upstream merge: + + vendor_names_inlined.py --out /tmp/inlined-vendor-supplement.json +""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + + +# {vid_lower_hex: vendor_name} +VENDOR_NAMES: dict[str, str] = { + "0000": "Wrong vendor ID", + "0010": "TSI Incorporated", + "0017": "Meyer Instruments (MIS)", + "0024": "Numark Mixtrack", + "0028": "beyerdynamic GmbH & Co. KG beyerdynamic PRO X", + "00f9": "UWP WBDI Device", + "0154": "LW154 Wireless 150N Adapter", + "015c": "Tecno World", + "0280": "CAM(Dongle) [Freenet TV-Stick]", + "0284": "\"FX-USB-AW/-BD\" USB/RS482 Converters, Mitsubishi Electric Corp.", + "0b9a": "Namco Limited", + "0c7c": "TMS International BV", + "0cc7": "Kontron Medical AG", + "0ea3": "RION NL-52 Sound Level Meter", + "0fb4": "TiiTuii Co., Ltd.", + "1021": "Western Digital External HDD", + "103e": "Aim-TTi", + "10a4": "Gunding Cosmopolit 7 Web", + "1105": "Sigma Designs Inc.", + "1106": "VIA Technologies, Inc.", + "1180": "Ricoh Company, Ltd.", + "12c9": "Newmen Tech., Ltd.", + "1305": "Star Micronics", + "1354": "FACTS Engineering LLC", + "14b7": "In2Games Limited", + "14e4": "Broadcom Corp.", + "1556": "CERN", + "15d3": "Symmetric Research", + "1609": "Flash", + "1642": "DataTraveler 101 8GB", + "1662": "Quantum Mini", + "16bd": "Leica Geosystems AG", + "1747": "CML Microcircuits", + "1768": "Unify Software and Solutions GmbH & Co. KG OpenStage WL3 VoWLAN IP phone", + "1778": "IPEVO Inc.", + "1802": "TS5000 series", + "1825": "STAR-Dundee Ltd.", + "182d": "Sitecom Europe B.V.", + "1856": "PIXMA TS6250", + "1902": "Endoscope Camera HD", + "1912": "Renesas Technology Corp.", + "19d9": "Denso Ten Limited", + "1a17": "Oticon A/S", + "1a29": "ABOV Semiconductor Co., Ltd.", + "1a59": "000A RM01 [Haag Streit]", + "1a90": "Corsair Voyager GT 16GB", + "1ac2": "DESKO GmbH", + "1b17": "EXO S.A.", + "1b21": "ASMedia Technology Inc.", + "1b3d": "Matrix Orbital", + "1b4f": "SparkFun", + "1cd7": "GMC-Instruments GmbH", + "1d37": "Signal Processing Devices Sweden AB", + "1d6c": "AUKEY Technology Co., Ltd.", + "1d73": "Signal Processing Devices Sweden AB", + "1de7": "0113 [Duet Executive]", + "1e3a": "Continental Automotive Systems Inc.", + "1e9b": "NetCom Sicherheitstechnik GmbH", + "1f18": "TESEQ", + "1f29": "Analogix Semiconductor, Inc.", + "1f36": "ddm hopt+schuler", + "1f71": "Gadmei Electronic Technology Corporation", + "1f85": "Netronix, Inc. / Obreey", + "1fb9": "Lake Shore Cryotronics, Inc.", + "1fd2": "MELFAS Co. Ltd.", + "2008": "Novanta Inc.", + "2017": "NAL Research Corporation", + "201b": "UNI-TEC Electronics", + "2020": "BroadMobi", + "2022": "Antec", + "20d6": "Bensussen Deutsch & Associates", + "20e7": "Atik CCD Camera", + "2164": "Witek System Inc.", + "2179": "Flex design tablet", + "217c": "TempTale, Sensitech", + "21b0": "Grace Industries", + "21c4": "Longsys Electronics (HK) Co., Ltd.", + "21e1": "CAEN S.p.A.", + "222d": "Leifheit - Soehnle", + "2239": "PEIKER acustic GmbH & Co., KG", + "223b": "Crystalfontz America, Inc.", + "2252": "HBGIC Technology Co., Ltd.", + "2257": "On-The-Go-Video", + "22f4": "Olive V-ME102 CDMA modem", + "230a": "DataLocker", + "2312": "LP320B Wireless Presenter [August International]", + "231d": "VKB-sim", + "2321": "iKingdom Corp. [iConnectivity]", + "2342": "NIKO", + "2358": "NuTesla Composite HID+CDC", + "23e3": "Christie Digital Systems", + "23e5": "Antelope Audio", + "23e8": "Propellerhead", + "2472": "TOP", + "247f": "Lynx", + "2541": "Chipsailing", + "2550": "Shenzhen EDUP Electronics Technology Co., Ltd.", + "256c": "HUION", + "258a": " [USB chips by: Sino Wealth Electronic Ltd.]", + "25bc": "CETRTA POT", + "2622": "MISSION", + "263c": "SCHULTES", + "2660": "Test", + "2669": "M4S PSK Series Device [M4S PSK]", + "26ce": "ASRock", + "2752": "miniDSP", + "2763": "Primes GmbH", + "276d": "276d:1101", + "2853": "Ralston Instruments, LLC", + "28ba": "Materialise Motion NV", + "2909": "Game Golf Live", + "2914": "Kent Displays, Inc.", + "291a": "Anker Innovation Ltd", + "2947": "Kapelse", + "2982": "Ableton AG", + "2983": "Coyote System SAS", + "29cc": "Kodak Alaris, Inc", + "29df": "CAM(Dongle) [Freenet TV-Stick]", + "29f3": "Resonessence Labs", + "29fe": "Geo Semiconductor", + "2a52": "L Card, LLC", + "2a65": "FreeWave Technologies", + "2a94": "G2touch Co., LTD.", + "2afd": "McIntosh HD USB Audio [McIntosh DA1]", + "2b04": "Duo with WiFi and BLE", + "2b16": "Doccamera", + "2b71": "Flashforge [FlashForge Creator Pro 2 3D Printer]", + "2b73": "Pioneer DJ Corporation", + "2b89": "Unknown", + "2b98": "Glenair Inc.", + "2bd9": "Huddly", + "2beb": "Gateworks Corporation", + "2c33": "Wizapply", + "2cc8": "Hewlett Packard Enterprise", + "2ce4": "ESMART", + "2d01": "Guangdong Zike Technology Co., Ltd", + "2dbc": "Mikroelektronika d.o.o", + "2dee": "QUALCOMM MeigLink", + "2e3c": "Joy-IT", + "2e50": "beyerdynamic GmbH & Co. KG", + "2e8a": "Raspberry Pi Foundation", + "2ea1": "DASAN Electron Co", + "2eb9": "Realtek or Sabrent?", + "2efd": "Filco Co., Ltd.", + "2f68": "Hoksi Technology", + "2fd0": "C*Core Technology Co., Ltd.", + "2fe9": "Shenzhen Xintai Technology Co. Ltd", + "2fee": "Holitech", + "300c": "Gyrfalcon Technology Inc.", + "303a": "Espressif Systems", + "30b1": "Bitmain Technologies Inc.", + "30be": "Schiit Audio", + "30d6": "Chroma-Q", + "30de": "KIOXIA EXCERIA PLUS", + "30fa": "Wuxi Instant Microelectronics Co., Ltd.", + "311f": "TrustKey Co., Ltd.", + "3131": "Authentik Systems", + "3151": "Yichip Microelectronics (Hangzhou) Co., Ltd", + "31b1": "Shenzhen Jinduan Electronics Co., Ltd.", + "31b2": "KTMicro", + "31e3": "Wooting", + "31e9": "Solid State Logic, Ltd", + "320f": "Glorious LLC", + "3231": "Kneron, Inc.", + "3232": "Shenzhen Trusda Industrial Co., Ltd.", + "3274": "MicroArray", + "3285": "Nacon", + "3297": "ZSA Technology Labs Inc.", + "32a3": "GoTrust", + "32ac": "Framework Computer BV", + "32cd": "NEC", + "32e4": "ELP-USBFHD06H-BL36IR", + "32e6": "IcSpring Technology", + "332d": "Verbatim GmbH", + "3346": "Cvitek Co. Ltd.", + "335e": "Eight Amps", + "33be": "Syncopated Engineering, Inc.", + "33c4": "Tomahawk Robotics", + "33c8": "Seidl Technologies UG", + "33dd": "Zuki Inc", + "33f7": "Linux Automation GmbH", + "33f8": "Rolling Wireless S.a.r.l.", + "33ff": "nyantec GmbH", + "3434": "Keychron", + "344f": "SCX-3400 Series", + "3455": "Atomos Global Pty Ltd", + "345f": "MacroSilicon", + "3464": "Senscomm Semiconductor, Inc", + "346d": "VendorCo", + "346e": "Gudsen Technology (HK) Co., Ltd (MOZA)", + "349c": "Zhuhai Hongxin Technology Co., Ltd", + "349e": "Token2", + "3542": "Sonova Consumer Hearing", + "3544": "Rusoku technologijos UAB", + "3553": "PCsensor", + "359f": "Shenzhen Sipeed Technology Co., Ltd.", + "35b6": "Orqa d.o.o", + "35f0": "Bitcraze AB", + "35f1": "INFICON", + "369a": "HighSecLabs, Ltd", + "36da": "Record Sure Limited [Recordsure]", + "36e9": "ifanr Inc.", + "3760": "CIN-ergy B.V.", + "37c5": "OpenMV, LLC", + "3802": "LDA Technologies LTD", + "3817": "SleepImage", + "3842": "EVGA", + "386e": "XTX Markets", + "3876": "Fenice Power Co., Ltd", + "38c5": "JetHome LLC", + "3938": "MOSART Semiconductor", + "3c93": "QingDao Topscomm", + "413d": "RDing Technology Ltd [PCsensor]", + "4816": "Integrated Webcam", + "4c4a": "JieLi Technology", + "4e4c": "NieLTM TechSolution", + "5041": "Linksys (?)", + "5246": "bladeRF Software Defined Radio", + "5262": "X.Tips", + "5325": "Woolworth GmbH", + "573c": "Xreal Light Microcontroller", + "5888": "3Tronics MU30", + "6004": "ISD-V4 Tablet Pen", + "6005": "Hewlett-Packard", + "6495": "GoDEX International Co.", + "6964": "Idobo", + "7374": "DATA MODUL", + "7712": "2711 Temperature sensor HUB [SEIICHI]", + "7777": "SEIICHI Technology Co., Ltd.", + "8347": "VisTrend Co., Ltd.", + "8888": "inLight", + "9048": "NuTesla CDC Serial Emulator", + "9e8f": "Plug Computer Basic [SheevaPlug]", + "a69c": "AICSEMI", + "a8f8": "Bastard Keyboards", + "b2c3": "GNDHog", + "b711": "VuPlus", + "c069": "M500 Laser Mouse", + "c07c": "M-R0017 [G700s Rechargeable Gaming Mouse]", + "c0f4": "DualMiner", + "c580": "HID UNIKEYdongle [F-Response]", + "c5cb": "ARTECH (Artech Technology Design Co., Ltd.)", + "d13e": "Coldcard Wallet", + "dff0": "shapinb", + "e2b5": "JieLi Technology", + "e3b5": "JieLi Technology", + "e5b7": "JieLi Technology", + "eb57": "ZhuHai JieLi Technology", + "eba4": "Aoboco", + "feed": "DOIO Keyboard", + "ffd2": "ZHONG-HUI ELECTRONICS CORP.", + "fffe": "Inland (MicroCenter brand)", + "ffff": "Wrong vendor ID", +} + + +def as_supplement() -> dict: + """Return the inlined data in the `usb-vid.json` overlay shape: + {vid: {"vendor": str, "products": []}}. + + Products are intentionally empty — this overlay only carries vendor + names. The upstream merger fills in products from the canonical + sources (Rust crate + linux-usb.org + github mirror). + """ + return { + vid: {"vendor": name, "products": []} + for vid, name in VENDOR_NAMES.items() + } + + +def main() -> int: + p = argparse.ArgumentParser(description=__doc__) + p.add_argument("--out", required=True, type=Path, + help="Write the inlined supplement as a usb-vid.json-shaped JSON file.") + args = p.parse_args() + args.out.write_text( + json.dumps(as_supplement(), indent=2, ensure_ascii=False, sort_keys=True) + "\n", + encoding="utf-8", + ) + print(f"wrote {args.out}: {len(VENDOR_NAMES)} inlined VID(s)") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From da12438ff329066462874851abf40b21b6a727a9 Mon Sep 17 00:00:00 2001 From: zackees Date: Sat, 20 Jun 2026 19:40:41 -0700 Subject: [PATCH 6/6] feat(sqlite): add string-keyed vid_vendor + vidpid tables w/ FTS5 (#719) Per follow-up on #719: the daily SQLite database now ships two extra tables with string keys so consumers can do prefix-LIKE + fuzzy FTS5 search without going through the integer-PK path: - vid_vendor(vid TEXT PK, vendor TEXT) + vid_vendor_fts Key is 4-hex-digit lowercase (e.g. '303a'). Indexed on vendor. - vidpid(vidpid TEXT PK, name TEXT) + vidpid_fts Key is the concatenated 8-hex-digit VVVVPPPP (e.g. '303a4002'). Indexed on name. The integer-keyed usb_vendor / usb_product tables remain in place so the existing canned queries keep working unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- online-data-tools/build_sqlite.py | 61 +++++++++++++++++++++++++- online-data-tools/test_build_sqlite.py | 59 +++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 1 deletion(-) diff --git a/online-data-tools/build_sqlite.py b/online-data-tools/build_sqlite.py index 5b98f00f..7056d4d4 100644 --- a/online-data-tools/build_sqlite.py +++ b/online-data-tools/build_sqlite.py @@ -87,6 +87,39 @@ def _ensure_int(v: int | str) -> int: CREATE VIRTUAL TABLE board_fts USING fts5(id, name, vendor, mcu, content='board', content_rowid='rowid'); +-- ────────────────────────────────────────────────────────────────────── +-- String-keyed fuzzy-search tables (see #719 follow-up): +-- vid_vendor: 4-hex-digit VID -> vendor name. PK is the string itself +-- so prefix LIKE queries hit the implicit B-tree index. A +-- sibling FTS5 table (vid_vendor_fts) covers token search +-- over the vendor name (case-insensitive, stemming-free). +-- vidpid: 8-hex-digit `VVVVPPPP` (concatenated, no separator) -> +-- product name. Same prefix-index + fuzzy-FTS pattern. +-- Both tables shadow the integer-keyed `usb_vendor` / `usb_product` so +-- the existing canned queries keep working; consumers that prefer +-- string keys + fuzzy search use these. +-- ────────────────────────────────────────────────────────────────────── + +CREATE TABLE vid_vendor ( + vid TEXT PRIMARY KEY, -- 4-hex-digit lowercase (e.g. "303a") + vendor TEXT NOT NULL +); +CREATE INDEX idx_vid_vendor_vendor ON vid_vendor (vendor); + +CREATE VIRTUAL TABLE vid_vendor_fts + USING fts5(vid UNINDEXED, vendor, + content='vid_vendor', content_rowid='rowid'); + +CREATE TABLE vidpid ( + vidpid TEXT PRIMARY KEY, -- 8-hex-digit lowercase (e.g. "303a4002") + name TEXT NOT NULL +); +CREATE INDEX idx_vidpid_name ON vidpid (name); + +CREATE VIRTUAL TABLE vidpid_fts + USING fts5(vidpid UNINDEXED, name, + content='vidpid', content_rowid='rowid'); + -- Per-board headline ranking view. Joins boards to their likely USB -- vendors via mcu_to_vid. The board_id column carries the original id even -- when the mcu prefix-match expands to multiple families. @@ -118,9 +151,13 @@ def _ensure_int(v: int | str) -> int: def _populate_usb(conn: sqlite3.Connection, usb_vid: dict) -> None: vendor_rows = [] product_rows = [] + vid_vendor_str_rows: list[tuple[str, str]] = [] + vidpid_str_rows: list[tuple[str, str]] = [] for vid_str, payload in usb_vid.items(): vid = _ensure_int(vid_str) vendor_rows.append((vid, payload["vendor"])) + vid_lower = f"{vid:04x}" + vid_vendor_str_rows.append((vid_lower, payload["vendor"])) for pid_entry in payload.get("products", []): # The online-data JSON uses [pid_hex, name] pairs; tolerate the # alternate dict shape just in case the upstream format drifts. @@ -128,7 +165,9 @@ def _populate_usb(conn: sqlite3.Connection, usb_vid: dict) -> None: pid_str, name = pid_entry[0], pid_entry[1] else: pid_str, name = pid_entry["pid"], pid_entry["name"] - product_rows.append((vid, _ensure_int(pid_str), name)) + pid = _ensure_int(pid_str) + product_rows.append((vid, pid, name)) + vidpid_str_rows.append((f"{vid:04x}{pid:04x}", name)) conn.executemany( "INSERT INTO usb_vendor (vid, vendor) VALUES (?, ?)", vendor_rows ) @@ -136,6 +175,26 @@ def _populate_usb(conn: sqlite3.Connection, usb_vid: dict) -> None: "INSERT INTO usb_product (vid, pid, product) VALUES (?, ?, ?)", product_rows, ) + # String-keyed parallel tables (see schema for rationale). + conn.executemany( + "INSERT INTO vid_vendor (vid, vendor) VALUES (?, ?)", + vid_vendor_str_rows, + ) + conn.executemany( + "INSERT OR IGNORE INTO vidpid (vidpid, name) VALUES (?, ?)", + vidpid_str_rows, + ) + # Push into external-content FTS5 mirrors. The triggers approach would + # need separate INSERT/UPDATE/DELETE triggers; for a build-once DB, + # an explicit SELECT-INTO is simpler and equally fast. + conn.execute( + "INSERT INTO vid_vendor_fts (rowid, vid, vendor) " + "SELECT rowid, vid, vendor FROM vid_vendor" + ) + conn.execute( + "INSERT INTO vidpid_fts (rowid, vidpid, name) " + "SELECT rowid, vidpid, name FROM vidpid" + ) def _populate_boards( diff --git a/online-data-tools/test_build_sqlite.py b/online-data-tools/test_build_sqlite.py index 5ce6f79f..f0bfafc1 100644 --- a/online-data-tools/test_build_sqlite.py +++ b/online-data-tools/test_build_sqlite.py @@ -192,6 +192,65 @@ def test_db_creates_expected_tables(built_db: Path) -> None: assert not missing, f"missing tables/views: {missing}; got {names}" +def test_vid_vendor_string_table_is_populated(built_db: Path, sample_usb_vid: dict) -> None: + with sqlite3.connect(built_db) as conn: + for vid_hex, payload in sample_usb_vid.items(): + row = conn.execute( + "SELECT vendor FROM vid_vendor WHERE vid = ?", + (vid_hex.lower(),), + ).fetchone() + assert row is not None, f"vid {vid_hex!r} missing from vid_vendor" + assert row[0] == payload["vendor"] + + +def test_vidpid_concat_table_is_populated(built_db: Path, sample_usb_vid: dict) -> None: + with sqlite3.connect(built_db) as conn: + for vid_hex, payload in sample_usb_vid.items(): + for pid_hex, product in payload["products"]: + key = f"{int(vid_hex, 16):04x}{int(pid_hex, 16):04x}" + row = conn.execute( + "SELECT name FROM vidpid WHERE vidpid = ?", (key,), + ).fetchone() + assert row is not None, f"vidpid {key!r} missing" + assert row[0] == product + + +def test_vid_vendor_fts5_matches_partial_vendor_name(built_db: Path) -> None: + # 'Espressif' is the canonical Espressif Systems entry in our fixture. + with sqlite3.connect(built_db) as conn: + rows = conn.execute( + "SELECT vid, vendor FROM vid_vendor " + "WHERE rowid IN (SELECT rowid FROM vid_vendor_fts WHERE vendor MATCH ?)", + ("Espressif",), + ).fetchall() + assert rows, "FTS5 vendor search must match 'Espressif'" + assert any(v == "303a" for v, _ in rows) + + +def test_vidpid_fts5_matches_partial_product_name(built_db: Path) -> None: + with sqlite3.connect(built_db) as conn: + rows = conn.execute( + "SELECT vidpid, name FROM vidpid " + "WHERE rowid IN (SELECT rowid FROM vidpid_fts WHERE name MATCH ?)", + ('"ESP32-S3"',), # quote-wrap so the FTS5 tokenizer treats hyphen as content + ).fetchall() + assert rows, "FTS5 name search must match 'ESP32-S3'" + # Headline mapping: ESP32-S3 product lives under 303a:4002 → '303a4002'. + assert any(k == "303a4002" for k, _ in rows) + + +def test_vid_vendor_prefix_lookup_via_like(built_db: Path) -> None: + """The PRIMARY KEY on `vid TEXT` lets us do fast prefix searches with + `LIKE 'vendor_substr%'` against the explicit btree index on the + vendor column. Mostly a smoke test for the index existing.""" + with sqlite3.connect(built_db) as conn: + rows = conn.execute( + "SELECT vid FROM vid_vendor WHERE vendor LIKE ? ORDER BY vid", + ("Espressif%",), + ).fetchall() + assert ("303a",) in rows + + def test_db_has_fts5_index(built_db: Path) -> None: with sqlite3.connect(built_db) as conn: names = {