From 39194fe1cd4671d23ff24c9dbf2bf10607106b67 Mon Sep 17 00:00:00 2001 From: Mariia Zueva Date: Wed, 27 May 2026 17:37:11 +0200 Subject: [PATCH 1/2] Download and pack model weights during python-3.12.10-atls env build --- .gitignore | 4 ++ python-3.12.10-atls/config.json | 8 +++ python-3.12.10-atls/package.json | 3 +- .../download-immunebuilder-weights.mjs | 67 +++++++++++++++++++ 4 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 python-3.12.10-atls/scripts/download-immunebuilder-weights.mjs diff --git a/.gitignore b/.gitignore index 0cc34a9..d763779 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,7 @@ test-dry-run.json *.tar.gz *.tgz *.zip + +# ImmuneBuilder model weights are downloaded by python-3.12.10-atls/scripts/ +# download-immunebuilder-weights.mjs at build time; ~700 MB, not for git. +python-3.12.10-atls/shared/immunebuilder-weights/ diff --git a/python-3.12.10-atls/config.json b/python-3.12.10-atls/config.json index 1ecb45a..9b5a91c 100644 --- a/python-3.12.10-atls/config.json +++ b/python-3.12.10-atls/config.json @@ -76,6 +76,10 @@ { "from": "linux-x64/site-packages/", "to": "{site-packages}/" + }, + { + "from": "shared/immunebuilder-weights/", + "to": "share/immunebuilder-weights/" } ] }, @@ -92,6 +96,10 @@ { "from": "macosx-aarch64/site-packages/", "to": "{site-packages}/" + }, + { + "from": "shared/immunebuilder-weights/", + "to": "share/immunebuilder-weights/" } ] } diff --git a/python-3.12.10-atls/package.json b/python-3.12.10-atls/package.json index a08b161..6cf974a 100644 --- a/python-3.12.10-atls/package.json +++ b/python-3.12.10-atls/package.json @@ -5,7 +5,8 @@ "scripts": { "cleanup": "rm -rf ./pkg-*.tgz ./pydist ./dist/ ./build/", "reset": "pnpm run cleanup && rm -rf ./node_modules ./.turbo", - "build": "pl-py-builder", + "fetch-weights": "node scripts/download-immunebuilder-weights.mjs", + "build": "pnpm run fetch-weights && pl-py-builder", "after-prebuild": "pl-pkg publish packages", "before-publish": "pl-pkg prepublish" }, diff --git a/python-3.12.10-atls/scripts/download-immunebuilder-weights.mjs b/python-3.12.10-atls/scripts/download-immunebuilder-weights.mjs new file mode 100644 index 0000000..2f961ab --- /dev/null +++ b/python-3.12.10-atls/scripts/download-immunebuilder-weights.mjs @@ -0,0 +1,67 @@ +// Pre-fetch ImmuneBuilder model weights into shared/immunebuilder-weights/ +// so the runenv build can stage them via copyFiles into share/immunebuilder-weights/ +// of the published environment. Avoids the unstable on-first-use download from +// Zenodo when the 3d-structure-prediction block runs against a package-deployed +// runtime. +// +// Idempotent: re-runs skip files whose on-disk size already matches the +// expected Zenodo blob size. Partial downloads land in a .part sidecar and +// are atomically renamed on success. + +import { createWriteStream, existsSync, mkdirSync, statSync, unlinkSync } from 'node:fs'; +import { rename } from 'node:fs/promises'; +import { dirname, join, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { pipeline } from 'node:stream/promises'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const targetDir = resolve(__dirname, '..', 'shared', 'immunebuilder-weights'); + +// Sizes are the byte length of the Zenodo blobs (record 7258553). They double +// as an integrity check + early-fail signal for truncated downloads; ImmuneBuilder +// itself only checks that the file is non-empty and doesn't start with "EMPTY", +// which is too loose to catch partial downloads. +const FILES = [ + { name: 'antibody_model_1', size: 61050011 }, + { name: 'antibody_model_2', size: 214267291 }, + { name: 'antibody_model_3', size: 214267291 }, + { name: 'antibody_model_4', size: 214267291 }, + { name: 'nanobody_model_1', size: 61050011 }, + { name: 'nanobody_model_2', size: 214267291 }, + { name: 'nanobody_model_3', size: 214267291 }, + { name: 'nanobody_model_4', size: 214267291 }, +]; + +const BASE = 'https://zenodo.org/record/7258553/files'; + +async function downloadOne({ name, size }) { + const dest = join(targetDir, name); + if (existsSync(dest) && statSync(dest).size === size) { + console.log(` ✓ ${name} (cached)`); + return; + } + const url = `${BASE}/${name}?download=1`; + console.log(` ↓ ${name} from ${url}`); + const res = await fetch(url, { redirect: 'follow' }); + if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`); + const tmp = `${dest}.part`; + try { + await pipeline(res.body, createWriteStream(tmp)); + const got = statSync(tmp).size; + if (got !== size) { + throw new Error(`${name}: expected ${size} bytes, got ${got}`); + } + await rename(tmp, dest); + console.log(` ✓ ${name} (${got} bytes)`); + } catch (err) { + if (existsSync(tmp)) unlinkSync(tmp); + throw err; + } +} + +mkdirSync(targetDir, { recursive: true }); +console.log(`Fetching ImmuneBuilder weights into ${targetDir}`); +for (const f of FILES) { + await downloadOne(f); +} +console.log('ImmuneBuilder weights ready'); From 9ba9fa908df7da68f93e0e42aae3991695fdac31 Mon Sep 17 00:00:00 2001 From: Mariia Zueva Date: Wed, 27 May 2026 18:02:42 +0200 Subject: [PATCH 2/2] Fixes according to review --- .../download-immunebuilder-weights.mjs | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/python-3.12.10-atls/scripts/download-immunebuilder-weights.mjs b/python-3.12.10-atls/scripts/download-immunebuilder-weights.mjs index 2f961ab..8cd4d7f 100644 --- a/python-3.12.10-atls/scripts/download-immunebuilder-weights.mjs +++ b/python-3.12.10-atls/scripts/download-immunebuilder-weights.mjs @@ -8,8 +8,8 @@ // expected Zenodo blob size. Partial downloads land in a .part sidecar and // are atomically renamed on success. -import { createWriteStream, existsSync, mkdirSync, statSync, unlinkSync } from 'node:fs'; -import { rename } from 'node:fs/promises'; +import { createWriteStream } from 'node:fs'; +import { mkdir, rename, stat, unlink } from 'node:fs/promises'; import { dirname, join, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; import { pipeline } from 'node:stream/promises'; @@ -34,32 +34,52 @@ const FILES = [ const BASE = 'https://zenodo.org/record/7258553/files'; +// Zenodo can stall mid-transfer. One AbortController covers both the fetch +// (headers) and the pipeline (streamed body) — clearing the timer right after +// fetch() resolves would still leave a mid-body stall hanging indefinitely. +const DOWNLOAD_TIMEOUT_MS = 15 * 60 * 1000; + +async function statOrNull(path) { + try { + return await stat(path); + } catch (err) { + if (err.code === 'ENOENT') return null; + throw err; + } +} + async function downloadOne({ name, size }) { const dest = join(targetDir, name); - if (existsSync(dest) && statSync(dest).size === size) { + const existing = await statOrNull(dest); + if (existing && existing.size === size) { console.log(` ✓ ${name} (cached)`); return; } const url = `${BASE}/${name}?download=1`; console.log(` ↓ ${name} from ${url}`); - const res = await fetch(url, { redirect: 'follow' }); - if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`); + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), DOWNLOAD_TIMEOUT_MS); const tmp = `${dest}.part`; try { + const res = await fetch(url, { redirect: 'follow', signal: controller.signal }); + if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`); await pipeline(res.body, createWriteStream(tmp)); - const got = statSync(tmp).size; + const got = (await stat(tmp)).size; if (got !== size) { throw new Error(`${name}: expected ${size} bytes, got ${got}`); } await rename(tmp, dest); console.log(` ✓ ${name} (${got} bytes)`); } catch (err) { - if (existsSync(tmp)) unlinkSync(tmp); + // Best-effort cleanup; ENOENT is fine if the .part was never created. + await unlink(tmp).catch(() => {}); throw err; + } finally { + clearTimeout(timer); } } -mkdirSync(targetDir, { recursive: true }); +await mkdir(targetDir, { recursive: true }); console.log(`Fetching ImmuneBuilder weights into ${targetDir}`); for (const f of FILES) { await downloadOne(f);