Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,7 @@ test-dry-run.json
*.tar.gz
*.tgz
*.zip

# ImmuneBuilder model weights are downloaded by python-3.12.10-atls/scripts/
# download-immunebuilder-weights.mjs at build time; ~700 MB, not for git.
python-3.12.10-atls/shared/immunebuilder-weights/
8 changes: 8 additions & 0 deletions python-3.12.10-atls/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@
{
"from": "linux-x64/site-packages/",
"to": "{site-packages}/"
},
{
"from": "shared/immunebuilder-weights/",
"to": "share/immunebuilder-weights/"
}
]
},
Expand All @@ -92,6 +96,10 @@
{
"from": "macosx-aarch64/site-packages/",
"to": "{site-packages}/"
},
{
"from": "shared/immunebuilder-weights/",
"to": "share/immunebuilder-weights/"
}
]
}
Expand Down
3 changes: 2 additions & 1 deletion python-3.12.10-atls/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"scripts": {
"cleanup": "rm -rf ./pkg-*.tgz ./pydist ./dist/ ./build/",
"reset": "pnpm run cleanup && rm -rf ./node_modules ./.turbo",
"build": "pl-py-builder",
"fetch-weights": "node scripts/download-immunebuilder-weights.mjs",
"build": "pnpm run fetch-weights && pl-py-builder",
"after-prebuild": "pl-pkg publish packages",
"before-publish": "pl-pkg prepublish"
},
Expand Down
87 changes: 87 additions & 0 deletions python-3.12.10-atls/scripts/download-immunebuilder-weights.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Pre-fetch ImmuneBuilder model weights into shared/immunebuilder-weights/
// so the runenv build can stage them via copyFiles into share/immunebuilder-weights/
// of the published environment. Avoids the unstable on-first-use download from
// Zenodo when the 3d-structure-prediction block runs against a package-deployed
// runtime.
//
// Idempotent: re-runs skip files whose on-disk size already matches the
// expected Zenodo blob size. Partial downloads land in a .part sidecar and
// are atomically renamed on success.

import { createWriteStream } from 'node:fs';
import { mkdir, rename, stat, unlink } from 'node:fs/promises';
import { dirname, join, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import { pipeline } from 'node:stream/promises';

const __dirname = dirname(fileURLToPath(import.meta.url));
const targetDir = resolve(__dirname, '..', 'shared', 'immunebuilder-weights');

// Sizes are the byte length of the Zenodo blobs (record 7258553). They double
// as an integrity check + early-fail signal for truncated downloads; ImmuneBuilder
// itself only checks that the file is non-empty and doesn't start with "EMPTY",
// which is too loose to catch partial downloads.
const FILES = [
{ name: 'antibody_model_1', size: 61050011 },
{ name: 'antibody_model_2', size: 214267291 },
{ name: 'antibody_model_3', size: 214267291 },
{ name: 'antibody_model_4', size: 214267291 },
{ name: 'nanobody_model_1', size: 61050011 },
{ name: 'nanobody_model_2', size: 214267291 },
{ name: 'nanobody_model_3', size: 214267291 },
{ name: 'nanobody_model_4', size: 214267291 },
];

const BASE = 'https://zenodo.org/record/7258553/files';

// Zenodo can stall mid-transfer. One AbortController covers both the fetch
// (headers) and the pipeline (streamed body) — clearing the timer right after
// fetch() resolves would still leave a mid-body stall hanging indefinitely.
const DOWNLOAD_TIMEOUT_MS = 15 * 60 * 1000;

async function statOrNull(path) {
try {
return await stat(path);
} catch (err) {
if (err.code === 'ENOENT') return null;
throw err;
}
}

async function downloadOne({ name, size }) {
const dest = join(targetDir, name);
const existing = await statOrNull(dest);
if (existing && existing.size === size) {
console.log(` ✓ ${name} (cached)`);
return;
}
Comment thread
mzueva marked this conversation as resolved.
const url = `${BASE}/${name}?download=1`;
console.log(` ↓ ${name} from ${url}`);
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), DOWNLOAD_TIMEOUT_MS);
const tmp = `${dest}.part`;
try {
const res = await fetch(url, { redirect: 'follow', signal: controller.signal });
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
await pipeline(res.body, createWriteStream(tmp));
const got = (await stat(tmp)).size;
if (got !== size) {
throw new Error(`${name}: expected ${size} bytes, got ${got}`);
}
await rename(tmp, dest);
console.log(` ✓ ${name} (${got} bytes)`);
} catch (err) {
// Best-effort cleanup; ENOENT is fine if the .part was never created.
await unlink(tmp).catch(() => {});
throw err;
} finally {
clearTimeout(timer);
}
Comment thread
mzueva marked this conversation as resolved.
}

await mkdir(targetDir, { recursive: true });
console.log(`Fetching ImmuneBuilder weights into ${targetDir}`);
for (const f of FILES) {
await downloadOne(f);
}
console.log('ImmuneBuilder weights ready');
Loading