From 093c325cf6258c338d055e53ac37f0431be10a35 Mon Sep 17 00:00:00 2001 From: emilberntsne-ui Date: Mon, 15 Jun 2026 16:04:42 +0200 Subject: [PATCH] Day 6: fix single-structure overfit templates - single_structure.yaml: set train.pdb to null instead of probability 0.0. The loader instantiates every train dataset regardless of probability, so probability:0 still tries to read the unavailable interfaces_df.parquet; a null dataset is explicitly skipped. - submit_train.sh: fix conda activation path (miniforge -> miniforge3). - monomer.json: use "length" instead of "contig" for the unconditional spec. --- Day_6/templates/monomer.json | 2 +- Day_6/templates/single_structure.yaml | 7 +++++-- Day_6/templates/submit_train.sh | 7 +++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/Day_6/templates/monomer.json b/Day_6/templates/monomer.json index 202142b..0382880 100644 --- a/Day_6/templates/monomer.json +++ b/Day_6/templates/monomer.json @@ -1,6 +1,6 @@ { "uncond_92mer": { "input": null, - "contig": "92-92" + "length": "92-92" } } diff --git a/Day_6/templates/single_structure.yaml b/Day_6/templates/single_structure.yaml index 91a5de4..1616740 100644 --- a/Day_6/templates/single_structure.yaml +++ b/Day_6/templates/single_structure.yaml @@ -38,8 +38,11 @@ datasets: crop_size: 128 # >= structure length (Top7 ~92) so it is never cropped max_atoms_in_crop: 1536 train: - pdb: - probability: 0.0 # turn OFF the (unavailable) PDB datasets + # Set pdb to null (not probability 0): the loader instantiates every train dataset + # regardless of probability, so probability:0 would still try to read the unavailable + # interfaces_df.parquet. A null dataset is explicitly skipped (see + # recursively_instantiate_datasets_and_samplers). + pdb: null # remove the (unavailable) PDB datasets entirely monomer_distillation: probability: 1.0 # train only on our single structure # The monomer config sets `b_factor_min: 70` — a pLDDT threshold meant for AF2 models. diff --git a/Day_6/templates/submit_train.sh b/Day_6/templates/submit_train.sh index e6fe96f..0bb8577 100755 --- a/Day_6/templates/submit_train.sh +++ b/Day_6/templates/submit_train.sh @@ -25,16 +25,19 @@ ##BSUB -N # --- activate the course environment (same two lines as Day 1) --- -source /dtu/blackhole/00/c27666/miniforge/bin/activate +source /dtu/blackhole/00/c27666/miniforge3/bin/activate conda activate protein-design # --- (optional) Weights & Biases: uncomment and set your key to log live curves --- # export WANDB_API_KEY=... +# --- show the full Python traceback on any Hydra job failure (debugging) --- +export HYDRA_FULL_ERROR=1 + # --- paths --- # Run this script FROM the foundry repo root. DATA is the folder you made in Exercise 4, # containing both af2_distillation_facebook.parquet and monomer.json. EDIT this path. -DATA="$HOME/single_structure_data" +DATA="$PWD/single_structure_data" # foundry repo root holds single_structure_data/ LOGS="$PWD/logs" # --- launch training ---