diff --git a/.actlignore b/.actlignore
new file mode 100644
index 00000000..047b4971
--- /dev/null
+++ b/.actlignore
@@ -0,0 +1,13 @@
+# Keep ACTL sync focused on source. Large data/results should live under
+# /mnt/diffuse-shared or the pod home PVC, not in the synced checkout.
+.pixi/
+grid_search_results/
+outputs/
+data/
+initial_dataset_40*/
+checkpoints/
+release_data/
+*.ckpt
+*.pt
+*.tar.gz
+*.tgz
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index d5123f31..9e5350c4 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -1,7 +1,7 @@
 name: Build and Push Docker Images
 
-# CI builds pull all model checkpoints (~10 GB) from Docker Hub automatically via:
-#   COPY --from=diffuseproject/sampleworks-checkpoints:latest /checkpoints/ /checkpoints/
+# CI builds pull all model checkpoints (~10 GB) from Harbor automatically via:
+#   COPY --from=harbor.astera.sh/library/sampleworks-checkpoints:latest /checkpoints/ /checkpoints/
 # No checkpoint files are needed on the CI runner. The checkpoints base image is
 # pre-built and pushed from the GPU server. See Dockerfile comments for details.
 
@@ -13,8 +13,8 @@ on:
   workflow_dispatch:
 
 env:
-  DOCKERHUB_ORG: diffuseproject
-  IMAGE_NAME: sampleworks
+  REGISTRY: harbor.astera.sh
+  IMAGE_NAME: library/pixi-with-checkpoints
 
 jobs:
   build:
@@ -39,24 +39,25 @@ jobs:
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v4
 
-      - name: Login to Docker Hub
+      - name: Login to Harbor
         uses: docker/login-action@v4
         with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
+          registry: ${{ env.REGISTRY }}
+          username: ${{ secrets.HARBOR_USERNAME }}
+          password: ${{ secrets.HARBOR_PASSWORD }}
 
-      # The Dockerfile uses COPY --from=diffuseproject/sampleworks-checkpoints:latest
-      # which Docker automatically pulls from Docker Hub during the build.
+      # The Dockerfile uses COPY --from=harbor.astera.sh/library/sampleworks-checkpoints:latest
+      # which Docker automatically pulls from Harbor during the build.
       # No checkpoint files are needed in the CI build context.
 
       - name: Docker metadata
         id: meta
         uses: docker/metadata-action@v6
         with:
-          images: ${{ env.DOCKERHUB_ORG }}/${{ env.IMAGE_NAME }}
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
           tags: |
-            type=raw,value=latest
-            type=sha,prefix=
+            type=raw,value=sampleworks
+            type=sha,prefix=sha-
             type=semver,pattern={{version}}
             type=semver,pattern=v{{version}}
       - name: Build and push Docker image
diff --git a/AGENTS.md b/AGENTS.md
index b56d51d6..7e1b9436 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -154,6 +154,7 @@ src/sampleworks/
 ├── metrics/               # Quality metrics (LDDT, sidechain)
 ├── eval/                  # Evaluation utilities
 ├── data/                  # Reference data (protein configs)
+├── runs/                  # `sampleworks-runs` CLI + TOML preset orchestrator
 └── utils/                 # Shared utilities
 ```
 
diff --git a/Dockerfile b/Dockerfile
index a51f8651..439421e3 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -5,10 +5,10 @@
 # Checkpoints are baked into the image at /checkpoints/ via a pre-built base image.
 #
 # Build:
-#   docker build -t sampleworks .
+#   docker build -t pixi-with-checkpoints .
 #
-# CI builds pull checkpoints automatically from Docker Hub via:
-#   COPY --from=diffuseproject/sampleworks-checkpoints:latest
+# CI builds pull checkpoints automatically from Harbor via:
+#   COPY --from=harbor.astera.sh/library/sampleworks-checkpoints:latest
 # No checkpoint files are needed in the build context or on the CI runner.
 #
 # To rebuild the checkpoints base image (only needed when checkpoints change):
@@ -16,10 +16,10 @@
 #
 # Run examples:
 #   # Show help
-#   docker run sampleworks --help
+#   docker run pixi-with-checkpoints --help
 #
 #   # Run grid search with Boltz1 (checkpoint baked in)
-#   docker run --gpus all -v /data:/data sampleworks \
+#   docker run --gpus all -v /data:/data pixi-with-checkpoints \
 #     -e boltz run_grid_search.py \
 #     --proteins /data/proteins.csv \
 #     --models boltz1 \
@@ -33,7 +33,7 @@
 #     --align-to-input
 #
 #   # Run grid search with Boltz2 (checkpoint baked in)
-#   docker run --gpus all -v /data:/data sampleworks \
+#   docker run --gpus all -v /data:/data pixi-with-checkpoints \
 #     -e boltz run_grid_search.py \
 #     --proteins /data/proteins.csv \
 #     --models boltz2 \
@@ -45,7 +45,7 @@
 #     --use-tweedie
 #
 #   # Interactive shell
-#   docker run --gpus all -it sampleworks bash
+#   docker run --gpus all -it pixi-with-checkpoints bash
 #
 # Baked-in checkpoints (from diffuseproject/sampleworks-checkpoints:latest):
 #   /checkpoints/boltz1_conf.ckpt                   - Boltz1 model (~3.5GB)
@@ -56,7 +56,7 @@
 #   /checkpoints/protenix_base_default_v0.5.0.pt     - Protenix model (~1.4GB)
 #
 # Checkpoints base image:
-#   All checkpoints live in diffuseproject/sampleworks-checkpoints:latest on Docker Hub.
+#   All checkpoints live in harbor.astera.sh/library/sampleworks-checkpoints:latest.
 #   To rebuild that image, see /data/users/diffuse/checkpoint-build/ on the GPU server.
 
 # ============================================================================
@@ -97,6 +97,7 @@ WORKDIR /app
 # Copy all project files - needed because sampleworks is installed as editable package
 # The pypi-dependencies section has: sampleworks = {editable = true, path = "."}
 COPY pyproject.toml pixi.lock ./
+COPY experiments/ ./experiments/
 COPY src/ ./src/
 COPY scripts/ ./scripts/
 COPY run_grid_search.py ./
@@ -104,11 +105,11 @@ COPY docker-entrypoint.sh /usr/local/bin/entrypoint.sh
 RUN chmod +x /usr/local/bin/entrypoint.sh
 
 # ============================================================================
-# Bake in model checkpoints from pre-built base image on Docker Hub
+# Bake in model checkpoints from pre-built Harbor image
 # ============================================================================
 # Checkpoints (~10 GB) rarely change, so this layer is placed before pixi
 # installs to stay cached even when dependencies update.
-COPY --from=diffuseproject/sampleworks-checkpoints:latest /checkpoints/ /checkpoints/
+COPY --from=harbor.astera.sh/library/sampleworks-checkpoints:latest /checkpoints/ /checkpoints/
 
 # ============================================================================
 # Install all three environments: boltz, protenix, rf3
@@ -129,6 +130,19 @@ RUN pixi run -e boltz python -c "\
 from sampleworks.core.forward_models.xray.real_space_density_deps.ops import dilate_atom_centric; \
 print('CUDA extensions compiled successfully')" || echo "CUDA extension pre-compilation skipped (no GPU during build)"
 
+# This image carries pixi environments and checkpoints. Runtime source should
+# come from ACTL's synced checkout at /home/dev/workspace, not from stale code
+# baked into /app during image construction.
+RUN rm -rf /app/src /app/scripts /app/experiments /app/run_grid_search.py \
+    && mkdir -p /home/dev/workspace
+
+COPY --chmod=755 run_experiments run_experiments.sh run_all_models.sh /usr/local/bin/
+RUN printf '\n# ACTL scientist workflow: land in the synced Sampleworks checkout.\nif [[ $- == *i* ]] && [ -z "${SAMPLEWORKS_NO_AUTO_CD:-}" ] && [ -d /home/dev/workspace ]; then\n    cd /home/dev/workspace\nfi\n' >> /root/.bashrc
+
+ENV SAMPLEWORKS_PIXI_PROJECT_DIR=/app \
+    SAMPLEWORKS_APP_DIR= \
+    SAMPLEWORKS_REQUIRE_PREBUILT_PIXI=1
+
 # Set default checkpoint paths via environment variables
 ENV BOLTZ1_CHECKPOINT=/checkpoints/boltz1_conf.ckpt \
     BOLTZ2_CHECKPOINT=/checkpoints/boltz2_conf.ckpt \
diff --git a/GRID_SEARCH.md b/GRID_SEARCH.md
index 154d00c5..b8f9aa73 100644
--- a/GRID_SEARCH.md
+++ b/GRID_SEARCH.md
@@ -6,14 +6,14 @@ and how to find and read logs if you need to debug the process.
 
 ## Optional: Setting up the docker container
 It is often useful to have a docker container with all the dependencies installed.
-Our script `run_all_models.sh` for instance uses a docker container to manage all
+Our script `run_experiments` for instance uses a docker container to manage all
 dependencies. To run that script, you will need to have docker installed. Build
 the container with
 ```shell
-docker build -t diffuseproject/sampleworks .
+docker build -t pixi-with-checkpoints .
 ```
 which will add an image to your local docker repository called
-`diffuseproject/sampleworks:latest`. The top of the `Dockerfile` contains
+`pixi-with-checkpoints:latest`. The top of the `Dockerfile` contains
 instructions on how to use the container as well. The container entrypoint
 (`docker-entrypoint`) is fairly generic and is used to call the `run_grid_search.py`
 script described below.
diff --git a/README.md b/README.md
index 0b123355..f9f31aa8 100644
--- a/README.md
+++ b/README.md
@@ -152,6 +152,100 @@ Output layout: `grid_search_results/<protein>/<model>[_<method>]/<scaler>/ens<N>
 Instructions for running evaluation and metrics scripts are coming soon.
 
 
+## Running preset experiments on ACTL (`run_experiments`)
+
+This section is Astera-specific: it assumes access to ACTL, the internal Harbor
+image registry, and the `diffuse-shared` PVC. External users can run the same
+TOML presets with `sampleworks-runs` or `python -m sampleworks.runs.cli` after
+setting equivalent local paths for `DATA_DIR`, `PROTEINS_CSV`, `RESULTS_DIR`,
+`MSA_CACHE_DIR`, and model checkpoints.
+
+Start an 8-GPU ACTL machine named `sampleworks` with the Sampleworks image and
+the shared data volume mounted:
+
+```bash
+actl pod up sampleworks --profile 8x --image harbor.astera.sh/library/pixi-with-checkpoints:sampleworks --storage shared --pvc-size 200Gi --mount diffuse-shared --yes
+```
+
+Keep that terminal open; it maintains sync and SSH. From another terminal:
+
+```bash
+actl pod status sampleworks
+# copy the `ssh:` line, then run it, for example:
+ssh workspace.actl-ws-<user>-sampleworks.devspace
+cd /home/dev/workspace
+```
+
+The main command is `run_experiments`. It reads TOML presets and launches the
+right `run_grid_search.py` jobs, pixi environments, GPU assignments, logs,
+results directory, and MSA cache.
+
+```bash
+export DATA_DIR=/mnt/diffuse-shared/raw/sampleworks/initial_dataset_40_occ_sweeps
+export PROTEINS_CSV="$DATA_DIR/proteins.csv"
+export SAMPLEWORKS_ACTL_RUN_NAME="$(hostname -s)"
+
+run_experiments --list        # show available presets (does not require DATA_DIR)
+run_experiments --show rf3    # inspect what will run
+run_experiments --dry-run rf3 # print commands without running
+run_experiments rf3           # run the standalone RF3 preset
+run_experiments boltz         # run Boltz2 X-ray + Boltz2 MD
+run_experiments boltz1        # run standalone Boltz1
+run_experiments protenix      # run the standalone Protenix preset
+run_experiments full_8gpu     # run the full 8-GPU comparison preset
+```
+
+The default `full_8gpu` preset runs Boltz2 XRD, Boltz2 MD, RF3, and Protenix in
+parallel. Run a subset with:
+
+```bash
+run_experiments full_8gpu --jobs rf3,protenix
+```
+
+Standalone presets are available for each model/model family: `boltz`,
+`boltz1`, `boltz2`, `boltz2_xrd`, `boltz2_md`, `rf3`, and `protenix`.
+Additional comparison presets include `protenix_dual`, `rf3_protenix`, and RF3
+variants. Single-job presets default to `gpu_count = 8`, so on an 8-GPU pod
+they use the whole machine.
+
+Presets live in `experiments/*.toml` in your local checkout and on the pod at
+`/home/dev/workspace/experiments/*.toml`. To modify an experiment, edit or copy
+a preset locally, let ACTL sync it, then run it by name or path:
+
+```bash
+cp experiments/rf3_partial.toml experiments/my_rf3.toml
+# edit experiments/my_rf3.toml locally
+run_experiments --preset my_rf3
+```
+
+For one-off changes, use `--set` instead of editing TOML:
+
+```bash
+run_experiments rf3 --set jobs.rf3.gpu_count=4
+run_experiments rf3 --set jobs.rf3.args.gradient-weights="0.0 0.01 0.02"
+```
+
+Presets usually declare `gpu_count = N`, not fixed GPU IDs. The runner assigns
+visible GPUs automatically in job order, so the same preset works on different
+pod sizes and fails fast if the pod has fewer visible GPUs than requested. Use
+explicit `gpus = "0,1"` only when you need to pin a job to specific devices; the
+runner validates those IDs before launching jobs.
+
+Set `DATA_DIR` and `PROTEINS_CSV` explicitly for each run so they are captured in
+the shell history and launch logs. Checkpoints default to
+`/mnt/diffuse-shared/raw/checkpoints` when those files exist, results go to
+`/mnt/diffuse-shared/results/sampleworks/<pod>/<target>/`, and MSA caches go to
+`/mnt/diffuse-shared/cache/sampleworks/msa`. Override with `RESULTS_DIR`,
+`MSA_CACHE_DIR`, or model-specific checkpoint variables before running.
+
+The ACTL image contains baked pixi environments under `/app/.pixi`. If your
+synced branch changes `pyproject.toml` or `pixi.lock`, `run_experiments` stops
+with a clear error instead of mutating the baked environment. For dependency
+debugging only, opt into an on-pod pixi update with
+`RUNTIME_PIXI=1 run_experiments ...`; reproducible scientist runs should use a
+rebuilt `pixi-with-checkpoints:sampleworks` image instead.
+
+
 ## Docker
 
 TODO: Docker container documentation
diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh
index 957c351d..0477a7dc 100755
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -2,15 +2,15 @@
 # Sampleworks Docker Entrypoint
 #
 # Usage:
-#   docker run sampleworks -e <pixi_env> <script> [args...]
-#   docker run sampleworks -e boltz run_grid_search.py --proteins /data/proteins.csv ...
-#   docker run sampleworks bash  # interactive shell
+#   docker run pixi-with-checkpoints -e <pixi_env> <script> [args...]
+#   docker run pixi-with-checkpoints -e boltz run_grid_search.py --proteins /data/proteins.csv ...
+#   docker run pixi-with-checkpoints bash  # interactive shell
 #
 # Available pixi environments: boltz, protenix, rf3
 #
 # Examples:
 #   # Run grid search with RF3
-#   docker run --gpus all -v /data:/data sampleworks \
+#   docker run --gpus all -v /data:/data pixi-with-checkpoints \
 #     -e rf3 run_grid_search.py \
 #     --proteins /data/proteins.csv \
 #     --models rf3 \
@@ -28,9 +28,9 @@ show_help() {
 Sampleworks - Protein structure prediction with diffusion model guidance
 
 USAGE:
-    docker run --gpus all --shm-size=16g sampleworks -e <environment> <script> [arguments...]
-    docker run sampleworks bash
-    docker run sampleworks --help
+    docker run --gpus all --shm-size=16g pixi-with-checkpoints -e <environment> <script> [arguments...]
+    docker run pixi-with-checkpoints bash
+    docker run pixi-with-checkpoints --help
 
 IMPORTANT:
     Always use --shm-size=16g (or larger) to avoid shared memory errors with DataLoaders.
@@ -47,7 +47,7 @@ ENVIRONMENTS:
 
 EXAMPLES:
     # Run grid search with RF3 model
-    docker run --gpus all --shm-size=16g -v /data:/data sampleworks \
+    docker run --gpus all --shm-size=16g -v /data:/data pixi-with-checkpoints \
       -e rf3 run_grid_search.py \
       --proteins /data/proteins.csv \
       --models rf3 \
@@ -62,7 +62,7 @@ EXAMPLES:
       --rf3-checkpoint /data/checkpoints/rf3_foundry_01_24_latest.ckpt
 
     # Run grid search with Boltz1 model
-    docker run --gpus all --shm-size=16g -v /data:/data sampleworks \
+    docker run --gpus all --shm-size=16g -v /data:/data pixi-with-checkpoints \
       -e boltz run_grid_search.py \
       --proteins /data/proteins.csv \
       --models boltz1 \
@@ -74,7 +74,7 @@ EXAMPLES:
       --boltz1-checkpoint /data/checkpoints/boltz1_conf.ckpt
 
     # Run grid search with Boltz2 model
-    docker run --gpus all --shm-size=16g -v /data:/data sampleworks \
+    docker run --gpus all --shm-size=16g -v /data:/data pixi-with-checkpoints \
       -e boltz run_grid_search.py \
       --proteins /data/proteins.csv \
       --models boltz2 \
@@ -87,7 +87,7 @@ EXAMPLES:
       --boltz2-checkpoint /data/checkpoints/boltz2_conf.ckpt
 
     # Run grid search with Protenix model
-    docker run --gpus all --shm-size=16g -v /data:/data sampleworks \
+    docker run --gpus all --shm-size=16g -v /data:/data pixi-with-checkpoints \
       -e protenix run_grid_search.py \
       --proteins /data/proteins.csv \
       --models protenix \
@@ -99,10 +99,10 @@ EXAMPLES:
       --protenix-checkpoint /data/checkpoints/protenix_base_default_v0.5.0.pt
 
     # Interactive shell
-    docker run --gpus all --shm-size=16g -it sampleworks bash
+    docker run --gpus all --shm-size=16g -it pixi-with-checkpoints bash
 
     # Run a custom script
-    docker run --gpus all --shm-size=16g -v /data:/data sampleworks \
+    docker run --gpus all --shm-size=16g -v /data:/data pixi-with-checkpoints \
       -e boltz scripts/boltz2_pure_guidance.py \
       --structure /data/structure.cif \
       --density /data/density.ccp4 \
@@ -191,7 +191,7 @@ PROTEINS CSV FORMAT:
       2xyz,/data/structures/2xyz.cif,/data/maps/2xyz.mrc,1.8
 
 For full argument details, run:
-    docker run sampleworks -e boltz run_grid_search.py --help
+    docker run pixi-with-checkpoints -e boltz run_grid_search.py --help
 EOF
 }
 
@@ -201,7 +201,11 @@ if [ $# -eq 0 ] || [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
     exit 0
 fi
 
-# Handle interactive shell
+# Handle scientist workflow helpers and interactive shells
+if [ "$1" = "run_experiments" ] || [ "$1" = "run_experiments.sh" ] || [ "$1" = "run_all_models.sh" ]; then
+    exec "$@"
+fi
+
 if [ "$1" = "bash" ] || [ "$1" = "sh" ]; then
     exec "$@"
 fi
@@ -222,9 +226,9 @@ while [[ $# -gt 0 ]]; do
         *)
             echo "Error: First argument must be -e <environment>, bash, or --help"
             echo ""
-            echo "Usage: docker run sampleworks -e <env> <script> [args...]"
-            echo "       docker run sampleworks bash"
-            echo "       docker run sampleworks --help"
+            echo "Usage: docker run pixi-with-checkpoints -e <env> <script> [args...]"
+            echo "       docker run pixi-with-checkpoints bash"
+            echo "       docker run pixi-with-checkpoints --help"
             exit 1
             ;;
     esac
@@ -234,12 +238,12 @@ done
 if [[ -z "$ENV" ]]; then
     echo "Error: Environment not specified. Use -e <env> where env is boltz, protenix, or rf3"
     echo ""
-    echo "Usage: docker run sampleworks -e <env> <script> [args...]"
+    echo "Usage: docker run pixi-with-checkpoints -e <env> <script> [args...]"
     echo ""
     echo "Examples:"
-    echo "  docker run sampleworks -e boltz run_grid_search.py --proteins /data/proteins.csv"
-    echo "  docker run sampleworks -e rf3 run_grid_search.py --help"
-    echo "  docker run sampleworks bash"
+    echo "  docker run pixi-with-checkpoints -e boltz run_grid_search.py --proteins /data/proteins.csv"
+    echo "  docker run pixi-with-checkpoints -e rf3 run_grid_search.py --help"
+    echo "  docker run pixi-with-checkpoints bash"
     exit 1
 fi
 
@@ -255,7 +259,7 @@ esac
 # Get the script to run
 if [[ $# -eq 0 ]]; then
     echo "Error: No script specified"
-    echo "Usage: docker run sampleworks -e <env> <script> [args...]"
+    echo "Usage: docker run pixi-with-checkpoints -e <env> <script> [args...]"
     exit 1
 fi
 
diff --git a/experiments/boltz.toml b/experiments/boltz.toml
new file mode 100644
index 00000000..6dde9089
--- /dev/null
+++ b/experiments/boltz.toml
@@ -0,0 +1,30 @@
+description = "Boltz2 X-ray and MD canonical occ-sweep jobs."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/boltz"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "boltz2_xrd"
+env = "boltz"
+gpu_count = 4
+output_subdir = "boltz2_xrd"
+args = { model = "boltz2", method = "X-RAY DIFFRACTION", gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5" }
+
+[[jobs]]
+name = "boltz2_md"
+env = "boltz"
+gpu_count = 4
+output_subdir = "boltz2_md"
+args = { model = "boltz2", method = "MD", gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5" }
diff --git a/experiments/boltz1.toml b/experiments/boltz1.toml
new file mode 100644
index 00000000..f1a8a2cf
--- /dev/null
+++ b/experiments/boltz1.toml
@@ -0,0 +1,27 @@
+description = "Boltz1 canonical occ-sweep job."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/boltz1"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+BOLTZ1_CHECKPOINT = "/checkpoints/boltz1_conf.ckpt"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+model = "boltz1"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5"
+model-checkpoint = "${BOLTZ1_CHECKPOINT}"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "boltz1"
+env = "boltz"
+gpu_count = 8
+output_subdir = "boltz1"
+args = {}
diff --git a/experiments/boltz2.toml b/experiments/boltz2.toml
new file mode 100644
index 00000000..551ca0d5
--- /dev/null
+++ b/experiments/boltz2.toml
@@ -0,0 +1,30 @@
+description = "Boltz2 X-ray and MD canonical occ-sweep jobs."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/boltz2"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "boltz2_xrd"
+env = "boltz"
+gpu_count = 4
+output_subdir = "boltz2_xrd"
+args = { model = "boltz2", method = "X-RAY DIFFRACTION", gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5" }
+
+[[jobs]]
+name = "boltz2_md"
+env = "boltz"
+gpu_count = 4
+output_subdir = "boltz2_md"
+args = { model = "boltz2", method = "MD", gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5" }
diff --git a/experiments/boltz2_md.toml b/experiments/boltz2_md.toml
new file mode 100644
index 00000000..0db59a82
--- /dev/null
+++ b/experiments/boltz2_md.toml
@@ -0,0 +1,26 @@
+description = "Boltz2 MD canonical occ-sweep job."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/boltz2_md"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+model = "boltz2"
+method = "MD"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "boltz2_md"
+env = "boltz"
+gpu_count = 8
+output_subdir = "boltz2_md"
+args = {}
diff --git a/experiments/boltz2_xrd.toml b/experiments/boltz2_xrd.toml
new file mode 100644
index 00000000..10bdf3fb
--- /dev/null
+++ b/experiments/boltz2_xrd.toml
@@ -0,0 +1,26 @@
+description = "Boltz2 X-ray canonical occ-sweep job."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/boltz2_xrd"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+model = "boltz2"
+method = "X-RAY DIFFRACTION"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "boltz2_xrd"
+env = "boltz"
+gpu_count = 8
+output_subdir = "boltz2_xrd"
+args = {}
diff --git a/experiments/full_8gpu.toml b/experiments/full_8gpu.toml
new file mode 100644
index 00000000..62c1cb06
--- /dev/null
+++ b/experiments/full_8gpu.toml
@@ -0,0 +1,44 @@
+description = "Full 8-GPU panel: Boltz2 X-ray, Boltz2 MD, RF3, and Protenix."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/full_8gpu"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "boltz2_xrd"
+env = "boltz"
+gpu_count = 2
+output_subdir = "boltz2_xrd"
+args = { model = "boltz2", method = "X-RAY DIFFRACTION", gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5" }
+
+[[jobs]]
+name = "boltz2_md"
+env = "boltz"
+gpu_count = 2
+output_subdir = "boltz2_md"
+args = { model = "boltz2", method = "MD", gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5" }
+
+[[jobs]]
+name = "rf3"
+env = "rf3"
+gpu_count = 2
+output_subdir = "rf3"
+args = { model = "rf3", gradient-weights = "0.0 0.005 0.01 0.02 0.035 0.05 0.1" }
+
+[[jobs]]
+name = "protenix"
+env = "protenix"
+gpu_count = 2
+output_subdir = "protenix"
+args = { model = "protenix", gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5" }
diff --git a/experiments/protenix.toml b/experiments/protenix.toml
new file mode 100644
index 00000000..1c94a364
--- /dev/null
+++ b/experiments/protenix.toml
@@ -0,0 +1,25 @@
+description = "Protenix canonical occ-sweep job."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/protenix"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+model = "protenix"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "protenix"
+env = "protenix"
+gpu_count = 8
+output_subdir = "protenix"
+args = {}
diff --git a/experiments/protenix_dual.toml b/experiments/protenix_dual.toml
new file mode 100644
index 00000000..50f61e8b
--- /dev/null
+++ b/experiments/protenix_dual.toml
@@ -0,0 +1,34 @@
+description = "Run Protenix tiny and mini variants in parallel (different checkpoints, same sweep)."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/protenix_dual"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+PROTENIX_TINY_CHECKPOINT = "/extra_checkpoints/protenix_tiny_default_v0.5.0.pt"
+PROTENIX_MINI_CHECKPOINT = "/extra_checkpoints/protenix_mini_default_v0.5.0.pt"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+model = "protenix"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "protenix_tiny"
+env = "protenix"
+gpu_count = 4
+output_subdir = "protenix_tiny"
+args = { model-checkpoint = "${PROTENIX_TINY_CHECKPOINT}" }
+
+[[jobs]]
+name = "protenix_mini"
+env = "protenix"
+gpu_count = 4
+output_subdir = "protenix_mini"
+args = { model-checkpoint = "${PROTENIX_MINI_CHECKPOINT}" }
diff --git a/experiments/rf3.toml b/experiments/rf3.toml
new file mode 100644
index 00000000..4e63c128
--- /dev/null
+++ b/experiments/rf3.toml
@@ -0,0 +1,27 @@
+description = "RF3 canonical occ-sweep job."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/rf3"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+RF3_CHECKPOINT = "/checkpoints/rf3_foundry_01_24_latest.ckpt"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+model = "rf3"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-weights = "0.0 0.005 0.01 0.02 0.035 0.05 0.1"
+model-checkpoint = "${RF3_CHECKPOINT}"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "rf3"
+env = "rf3"
+gpu_count = 8
+output_subdir = "rf3"
+args = {}
diff --git a/experiments/rf3_partial.toml b/experiments/rf3_partial.toml
new file mode 100644
index 00000000..7937e9a6
--- /dev/null
+++ b/experiments/rf3_partial.toml
@@ -0,0 +1,24 @@
+description = "RF3 partial-diffusion canonical occ-sweep on a single GPU (7 gradient weights)."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/rf3_partial"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+RF3_CHECKPOINT = "/checkpoints/rf3_foundry_01_24_latest.ckpt"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "rf3"
+env = "rf3"
+gpu_count = 8
+output_subdir = "rf3"
+args = { model = "rf3", gradient-weights = "0.0 0.005 0.01 0.02 0.035 0.05 0.1", model-checkpoint = "${RF3_CHECKPOINT}" }
diff --git a/experiments/rf3_partial_chiral_off.toml b/experiments/rf3_partial_chiral_off.toml
new file mode 100644
index 00000000..562ae2e7
--- /dev/null
+++ b/experiments/rf3_partial_chiral_off.toml
@@ -0,0 +1,26 @@
+description = "RF3 occ-sweep with --disable-chiral-features and a wider 10-weight sweep."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/rf3_partial_chiral_off"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+RF3_CHECKPOINT = "/checkpoints/rf3_foundry_01_24_latest.ckpt"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+force-all = true
+disable-chiral-features = true
+
+[[jobs]]
+name = "rf3"
+env = "rf3"
+gpu_count = 8
+output_subdir = "rf3"
+args = { model = "rf3", gradient-weights = "0.0 0.005 0.01 0.02 0.035 0.05 0.1 0.2 0.35 0.5", model-checkpoint = "${RF3_CHECKPOINT}" }
diff --git a/experiments/rf3_protenix.toml b/experiments/rf3_protenix.toml
new file mode 100644
index 00000000..7b9996cb
--- /dev/null
+++ b/experiments/rf3_protenix.toml
@@ -0,0 +1,29 @@
+description = "RF3 + Protenix combo on the occ-sweep dataset (RF3 on GPUs 0-3, Protenix on 4-7)."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/rf3_protenix"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+scalers = "pure_guidance"
+ensemble-sizes = "8"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "rf3"
+env = "rf3"
+gpu_count = 4
+output_subdir = "rf3"
+args = { model = "rf3", gradient-weights = "0.0 0.01 0.02 0.05 0.1" }
+
+[[jobs]]
+name = "protenix"
+env = "protenix"
+gpu_count = 4
+output_subdir = "protenix"
+args = { model = "protenix", partial-diffusion-step = 120, gradient-weights = "0.0 0.1 0.2 0.5" }
diff --git a/pyproject.toml b/pyproject.toml
index b99dd5a2..2b958183 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,6 +37,10 @@ version = "0.6.3"
 
 [project.scripts]
 sampleworks-guidance = "sampleworks.cli.guidance:main"
+sampleworks-runs = "sampleworks.runs.cli:main"
+
+[tool.hatch.build.targets.wheel.force-include]
+"experiments" = "experiments"
 
 [tool.hatch.metadata]
 allow-direct-references = true
diff --git a/run_all_models.sh b/run_all_models.sh
index 5b90f81f..f1f81b8c 100755
--- a/run_all_models.sh
+++ b/run_all_models.sh
@@ -1,164 +1,17 @@
-#!/bin/bash
-# Run all 4 model grid searches in parallel, 2 GPUs each
-# Total: 8 GPUs used (4 jobs x 2 GPUs each)
-#
-# Models:
-#   - Boltz2 X-ray diffraction (GPUs 0,1)
-#   - Boltz2 MD               (GPUs 2,3)
-#   - RosettaFold3             (GPUs 4,5)
-#   - Protenix                 (GPUs 6,7)
-#
-# Checkpoints are BAKED INTO the Docker image at /checkpoints/.
-# If missing, the code auto-falls back to mounted paths.
-#
-# Usage:
-#   ./run_all_models.sh
-
-set -e
-
-# Configuration
-DATA_DIR="/mnt/diffuse-private/raw/sampleworks/initial_dataset_40_occ_sweeps"
-RESULTS_DIR="${RESULTS_DIR:-/data/sampleworks-exp/occ_sweep/grid_search_results}"
-MSA_CACHE_DIR="${MSA_CACHE_DIR:-/data/sampleworks-exp/msa_cache}"
-
-# Create directories
-mkdir -p "$RESULTS_DIR"
-mkdir -p "$MSA_CACHE_DIR"
-
-# Pull latest image (no-op if already up to date)
-echo "Pulling latest Docker image..."
-docker pull diffuseproject/sampleworks:latest
-
-# Common docker options
-DOCKER_OPTS="--rm --shm-size=16g"
-
-echo "=========================================="
-echo "Starting all model grid searches (4 jobs x 2 GPUs)"
-echo "Data: $DATA_DIR"
-echo "Results: $RESULTS_DIR"
-echo "MSA Cache: $MSA_CACHE_DIR"
-echo "Checkpoints: BAKED INTO IMAGE (with mount fallback)"
-echo ""
-echo "Models:"
-echo "  - Boltz2 X-ray (GPUs 0,1)"
-echo "  - Boltz2 MD    (GPUs 2,3)"
-echo "  - RF3          (GPUs 4,5)"
-echo "  - Protenix     (GPUs 6,7)"
-echo "=========================================="
-
-PIDS=()
-
-# --- Boltz2 X-ray Diffraction (GPUs 0,1) ---
-echo "[$(date)] Starting Boltz2 X-ray on GPUs 0,1"
-docker run $DOCKER_OPTS \
-    --gpus '"device=0,1"' \
-    -v "$DATA_DIR:/data/inputs:ro" \
-    -v "$RESULTS_DIR:/data/results" \
-    -v "$MSA_CACHE_DIR:/root/.sampleworks/msa" \
-    -e SAMPLEWORKS_HOST_INPUT_DIR="$DATA_DIR" \
-    -e SAMPLEWORKS_HOST_RESULTS_DIR="$RESULTS_DIR" \
-    diffuseproject/sampleworks:latest \
-    -e boltz run_grid_search.py \
-    --proteins "/data/inputs/proteins.csv" \
-    --model boltz2 \
-    --method "X-RAY DIFFRACTION" \
-    --scalers pure_guidance \
-    --partial-diffusion-step 120 \
-    --ensemble-sizes "8" \
-    --gradient-weights "0.1 0.2 0.5" \
-    --gradient-normalization --augmentation --align-to-input \
-    --output-dir /data/results \
-    2>&1 | tee "$RESULTS_DIR/boltz2_xrd_run.log" &
-PIDS+=($!)
-echo "[$(date)] Boltz2 X-ray job started (PID: ${PIDS[-1]})"
-
-# --- Boltz2 MD (GPUs 2,3) ---
-echo "[$(date)] Starting Boltz2 MD on GPUs 2,3"
-docker run $DOCKER_OPTS \
-    --gpus '"device=2,3"' \
-    -v "$DATA_DIR:/data/inputs:ro" \
-    -v "$RESULTS_DIR:/data/results" \
-    -v "$MSA_CACHE_DIR:/root/.sampleworks/msa" \
-    -e SAMPLEWORKS_HOST_INPUT_DIR="$DATA_DIR" \
-    -e SAMPLEWORKS_HOST_RESULTS_DIR="$RESULTS_DIR" \
-    diffuseproject/sampleworks:latest \
-    -e boltz run_grid_search.py \
-    --proteins "/data/inputs/proteins.csv" \
-    --model boltz2 \
-    --method "MD" \
-    --scalers pure_guidance \
-    --partial-diffusion-step 120 \
-    --ensemble-sizes "8" \
-    --gradient-weights "0.1 0.2 0.5" \
-    --gradient-normalization --augmentation --align-to-input \
-    --output-dir /data/results \
-    2>&1 | tee "$RESULTS_DIR/boltz2_md_run.log" &
-PIDS+=($!)
-echo "[$(date)] Boltz2 MD job started (PID: ${PIDS[-1]})"
-
-# --- RosettaFold3 (GPUs 4,5) ---
-echo "[$(date)] Starting RosettaFold3 on GPUs 4,5"
-docker run $DOCKER_OPTS \
-    --gpus '"device=4,5"' \
-    -v "$DATA_DIR:/data/inputs:ro" \
-    -v "$RESULTS_DIR:/data/results" \
-    -v "$MSA_CACHE_DIR:/root/.sampleworks/msa" \
-    -e SAMPLEWORKS_HOST_INPUT_DIR="$DATA_DIR" \
-    -e SAMPLEWORKS_HOST_RESULTS_DIR="$RESULTS_DIR" \
-    diffuseproject/sampleworks:latest \
-    -e rf3 run_grid_search.py \
-    --proteins "/data/inputs/proteins.csv" \
-    --model rf3 \
-    --partial-diffusion-step 120 \
-    --scalers pure_guidance \
-    --ensemble-sizes "8" \
-    --gradient-weights "0.01 0.02 0.05" \
-    --gradient-normalization --augmentation --align-to-input \
-    --output-dir /data/results \
-    2>&1 | tee "$RESULTS_DIR/rf3_run.log" &
-PIDS+=($!)
-echo "[$(date)] RosettaFold3 job started (PID: ${PIDS[-1]})"
-
-# --- Protenix (GPUs 6,7) ---
-echo "[$(date)] Starting Protenix on GPUs 6,7"
-docker run $DOCKER_OPTS \
-    --gpus '"device=6,7"' \
-    -v "$DATA_DIR:/data/inputs:ro" \
-    -v "$RESULTS_DIR:/data/results" \
-    -v "$MSA_CACHE_DIR:/root/.sampleworks/msa" \
-    -e SAMPLEWORKS_HOST_INPUT_DIR="$DATA_DIR" \
-    -e SAMPLEWORKS_HOST_RESULTS_DIR="$RESULTS_DIR" \
-    diffuseproject/sampleworks:latest \
-    -e protenix run_grid_search.py \
-    --proteins "/data/inputs/proteins.csv" \
-    --model protenix \
-    --scalers pure_guidance \
-    --partial-diffusion-step 120 \
-    --ensemble-sizes "8" \
-    --gradient-weights "0.1 0.2 0.5" \
-    --gradient-normalization --augmentation --align-to-input \
-    --output-dir /data/results \
-    2>&1 | tee "$RESULTS_DIR/protenix_run.log" &
-PIDS+=($!)
-echo "[$(date)] Protenix job started (PID: ${PIDS[-1]})"
-
-echo ""
-echo "=========================================="
-echo "All 4 jobs launched! PIDs: ${PIDS[*]}"
-echo "Logs:"
-echo "  - $RESULTS_DIR/boltz2_xrd_run.log"
-echo "  - $RESULTS_DIR/boltz2_md_run.log"
-echo "  - $RESULTS_DIR/rf3_run.log"
-echo "  - $RESULTS_DIR/protenix_run.log"
-echo ""
-echo "Monitor GPU usage: nvidia-smi -l 1"
-echo "Waiting for all jobs to complete..."
-echo "=========================================="
-
-# Wait for all background jobs
-wait
-
-echo ""
-echo "=========================================="
-echo "[$(date)] All jobs completed!"
-echo "=========================================="
+#!/usr/bin/env bash
+# Backward-compatible alias. Prefer run_experiments for new docs/usage.
+set -euo pipefail
+
+script_path="${BASH_SOURCE[0]}"
+while [[ -L "$script_path" ]]; do
+    script_dir="$(cd -- "$(dirname -- "$script_path")" && pwd)"
+    script_target="$(readlink "$script_path")"
+    if [[ "$script_target" == /* ]]; then
+        script_path="$script_target"
+    else
+        script_path="$script_dir/$script_target"
+    fi
+done
+script_dir="$(cd -- "$(dirname -- "$script_path")" && pwd)"
+
+exec "$script_dir/run_experiments" "$@"
diff --git a/run_experiments b/run_experiments
new file mode 100755
index 00000000..08d2700d
--- /dev/null
+++ b/run_experiments
@@ -0,0 +1,423 @@
+#!/usr/bin/env bash
+# ACTL-native entry point for Sampleworks preset runs.
+#
+# The TOML preset is the source of truth. This wrapper uses the synced ACTL
+# checkout at /home/dev/workspace for Sampleworks code, while reusing the
+# prebuilt pixi environments/checkpoints from the image at /app.
+
+set -euo pipefail
+
+# Resolve the physical location of this wrapper even when it is invoked through
+# the /usr/local/bin symlink baked into the ACTL image. Later fallbacks use this
+# directory to find a checkout when /home/dev/workspace is not available.
+script_path="${BASH_SOURCE[0]}"
+while [[ -L "$script_path" ]]; do
+    script_dir="$(cd -- "$(dirname -- "$script_path")" && pwd)"
+    script_target="$(readlink "$script_path")"
+    if [[ "$script_target" == /* ]]; then
+        script_path="$script_target"
+    else
+        script_path="$script_dir/$script_target"
+    fi
+done
+script_dir="$(cd -- "$(dirname -- "$script_path")" && pwd)"
+
+is_sampleworks_root() {
+    local candidate="$1"
+    [[ -f "$candidate/pyproject.toml" && -d "$candidate/src/sampleworks" && -f "$candidate/run_grid_search.py" ]]
+}
+
+find_sampleworks_root_upwards() {
+    local candidate="$1"
+    while [[ -n "$candidate" && "$candidate" != "/" ]]; do
+        if is_sampleworks_root "$candidate"; then
+            printf '%s\n' "$candidate"
+            return 0
+        fi
+        candidate="$(dirname -- "$candidate")"
+    done
+    return 1
+}
+
+truthy_env() {
+    local name="$1"
+    [[ "${!name:-}" =~ ^(1|true|yes)$ ]]
+}
+
+require_env_var() {
+    local name="$1"
+    local help_text="$2"
+    if [[ -z "${!name:-}" ]]; then
+        cat >&2 <<EOF
+$name must be set explicitly for run_experiments.
+
+$help_text
+EOF
+        return 2
+    fi
+}
+
+pixi_inputs_match_image() {
+    local image_root="$1"
+    local source_root="$2"
+
+    # Older images do not have enough metadata to validate; let the existing
+    # prebuilt-env checks handle those cases.
+    [[ -f "$image_root/pyproject.toml" && -f "$image_root/pixi.lock" ]] || return 0
+    [[ -f "$source_root/pyproject.toml" && -f "$source_root/pixi.lock" ]] || return 0
+
+    cmp -s "$image_root/pyproject.toml" "$source_root/pyproject.toml" && \
+        cmp -s "$image_root/pixi.lock" "$source_root/pixi.lock"
+}
+
+resolve_repo_root() {
+    local source_override="${SAMPLEWORKS_SOURCE_DIR:-}"
+    if [[ -n "$source_override" ]]; then
+        if ! is_sampleworks_root "$source_override"; then
+            cat >&2 <<EOF
+SAMPLEWORKS_SOURCE_DIR does not point to a Sampleworks checkout:
+  $source_override
+EOF
+            return 2
+        fi
+        printf '%s\n' "$source_override"
+        return 0
+    fi
+
+    # ACTL syncs the user's local checkout here. Prefer it over any stale code
+    # that may have been used only to build the image's pixi environments.
+    if is_sampleworks_root "/home/dev/workspace"; then
+        printf '%s\n' "/home/dev/workspace"
+        return 0
+    fi
+
+    find_sampleworks_root_upwards "$PWD" && return 0
+
+    # Legacy fallback for older images/users. This intentionally comes after
+    # /home/dev/workspace so inherited SAMPLEWORKS_APP_DIR=/app cannot mask the
+    # synced checkout.
+    local app_override="${SAMPLEWORKS_APP_DIR:-}"
+    if [[ -n "$app_override" ]]; then
+        if is_sampleworks_root "$app_override"; then
+            printf '%s\n' "$app_override"
+            return 0
+        fi
+    fi
+
+    if is_sampleworks_root "$script_dir"; then
+        printf '%s\n' "$script_dir"
+        return 0
+    fi
+
+    cat >&2 <<'EOF'
+Could not find the synced Sampleworks checkout.
+
+Expected ACTL to sync the repo to /home/dev/workspace. If you are using a
+custom layout, set SAMPLEWORKS_SOURCE_DIR=/path/to/sampleworks before running
+run_experiments.
+EOF
+    return 2
+}
+
+repo_root="$(resolve_repo_root)"
+
+env_preset="${SAMPLEWORKS_PRESET:-}"
+default_target="$env_preset"
+target=""
+explicit_preset=""
+explicit_jobs=""
+explicit_results_dir=""
+expect_value_for=""
+for arg in "$@"; do
+    if [[ -n "$expect_value_for" ]]; then
+        case "$expect_value_for" in
+            preset)
+                explicit_preset="$arg"
+                ;;
+            results-dir)
+                explicit_results_dir="$arg"
+                ;;
+            jobs)
+                explicit_jobs="$arg"
+                ;;
+        esac
+        expect_value_for=""
+        continue
+    fi
+
+    case "$arg" in
+        --preset)
+            expect_value_for="preset"
+            ;;
+        --preset=*)
+            explicit_preset="${arg#--preset=}"
+            ;;
+        --results-dir)
+            expect_value_for="results-dir"
+            ;;
+        --results-dir=*)
+            explicit_results_dir="${arg#--results-dir=}"
+            ;;
+        --jobs)
+            expect_value_for="jobs"
+            ;;
+        --jobs=*)
+            explicit_jobs="${arg#--jobs=}"
+            ;;
+        -*)
+            ;;
+        *)
+            if [[ -z "$target" ]]; then
+                target="$arg"
+            fi
+            ;;
+    esac
+done
+
+needs_run_config=1
+for arg in "$@"; do
+    case "$arg" in
+        --list|-h|--help)
+            needs_run_config=0
+            ;;
+    esac
+done
+
+if [[
+    "$needs_run_config" -eq 1 &&
+    -z "$target" &&
+    -z "$explicit_preset" &&
+    -z "$explicit_jobs" &&
+    -z "$env_preset"
+]]; then
+    cat >&2 <<'EOF'
+run_experiments requires an explicit preset or job selector.
+
+Examples:
+  run_experiments rf3
+  run_experiments full_8gpu --jobs rf3,protenix
+  run_experiments --preset experiments/my_rf3.toml
+EOF
+    exit 2
+fi
+
+label_source="$default_target"
+if [[ -n "$explicit_preset" ]]; then
+    label_source="$explicit_preset"
+elif [[ -n "$explicit_jobs" && ( -z "$target" || "$target" == "all" || "$target" == "full" || "$target" == "full_8gpu" ) ]]; then
+    label_source="$explicit_jobs"
+elif [[ -n "$target" ]]; then
+    label_source="$target"
+fi
+case "$label_source" in
+    all|full)
+        label_source="full_8gpu"
+        ;;
+esac
+if [[ "$label_source" == *.toml || "$label_source" == */* ]]; then
+    if [[ "$label_source" != /* ]]; then
+        label_source="$repo_root/$label_source"
+    fi
+fi
+run_label="${label_source##*/}"
+run_label="${run_label%.toml}"
+run_label="${run_label//,/_}"
+
+run_name="${SAMPLEWORKS_ACTL_RUN_NAME:-$(hostname -s 2>/dev/null || printf 'sampleworks')}"
+default_results_dir="/mnt/diffuse-shared/results/sampleworks/${run_name}/${run_label}"
+default_msa_cache_dir="/mnt/diffuse-shared/cache/sampleworks/msa"
+
+export DATA_DIR="${DATA_DIR:-${SAMPLEWORKS_DATA_DIR:-}}"
+export PROTEINS_CSV="${PROTEINS_CSV:-${SAMPLEWORKS_PROTEINS_CSV:-}}"
+if [[ "$needs_run_config" -eq 1 ]]; then
+    require_env_var DATA_DIR \
+        "Set DATA_DIR to the dataset directory for this run, e.g. /mnt/diffuse-shared/raw/sampleworks/initial_dataset_40_occ_sweeps."
+    require_env_var PROTEINS_CSV \
+        "Set PROTEINS_CSV to the input manifest for this run, e.g. \$DATA_DIR/proteins.csv."
+fi
+if [[ -n "$explicit_results_dir" ]]; then
+    export RESULTS_DIR="$explicit_results_dir"
+else
+    export RESULTS_DIR="${RESULTS_DIR:-${SAMPLEWORKS_RESULTS_DIR:-$default_results_dir}}"
+fi
+export MSA_CACHE_DIR="${MSA_CACHE_DIR:-${SAMPLEWORKS_MSA_CACHE_DIR:-$default_msa_cache_dir}}"
+export SAMPLEWORKS_GRID_SEARCH_SCRIPT="${SAMPLEWORKS_GRID_SEARCH_SCRIPT:-$repo_root/run_grid_search.py}"
+# Append the old PYTHONPATH only when it is non-empty. This avoids a trailing
+# colon, which Python treats as the current working directory.
+export PYTHONPATH="$repo_root/src${PYTHONPATH:+:$PYTHONPATH}"
+export PIXI_CACHE_DIR="${PIXI_CACHE_DIR:-/tmp/pixi-cache}"
+export UV_CACHE_DIR="${UV_CACHE_DIR:-/tmp/uv-cache}"
+
+shared_checkpoint_dir="/mnt/diffuse-shared/raw/checkpoints"
+for checkpoint_var_and_file in \
+    "BOLTZ1_CHECKPOINT boltz1_conf.ckpt" \
+    "BOLTZ2_CHECKPOINT boltz2_conf.ckpt" \
+    "RF3_CHECKPOINT rf3_foundry_01_24_latest.ckpt" \
+    "PROTENIX_CHECKPOINT protenix_base_default_v0.5.0.pt"; do
+    read -r checkpoint_var checkpoint_file <<<"$checkpoint_var_and_file"
+    checkpoint_path="$shared_checkpoint_dir/$checkpoint_file"
+    if [[ -z "${!checkpoint_var:-}" && -f "$checkpoint_path" ]]; then
+        export "$checkpoint_var=$checkpoint_path"
+    fi
+done
+
+needs_runtime_paths=1
+for arg in "$@"; do
+    case "$arg" in
+        --dry-run|--show|--list|-h|--help)
+            needs_runtime_paths=0
+            ;;
+    esac
+done
+
+source_proteins_csv="$PROTEINS_CSV"
+if [[ "$needs_runtime_paths" -eq 1 && -f "$source_proteins_csv" ]]; then
+    # The shared proteins.csv currently contains absolute /data/inputs paths,
+    # while ACTL mounts the dataset at /mnt/diffuse-shared. Rewrite a per-run
+    # manifest instead of requiring non-root scientists to create /data symlinks.
+    manifest_dir="$RESULTS_DIR/_input_manifest"
+    manifest_proteins_csv="$manifest_dir/proteins.csv"
+    mkdir -p "$manifest_dir"
+    legacy_data_dir="/data/inputs"
+    while IFS= read -r line || [[ -n "$line" ]]; do
+        printf '%s\n' "${line//$legacy_data_dir/$DATA_DIR}"
+    done <"$source_proteins_csv" >"$manifest_proteins_csv"
+    export PROTEINS_CSV="$manifest_proteins_csv"
+fi
+
+runner_env="${SAMPLEWORKS_RUNNER_ENV:-rf3}"
+# RUNTIME_PIXI=1 is the short scientist-facing escape hatch for branches whose
+# pyproject.toml or pixi.lock no longer match the baked image. Normalize it to
+# the internal flag that both this wrapper and sampleworks.runs.runner consume.
+if truthy_env RUNTIME_PIXI; then
+    export SAMPLEWORKS_ALLOW_RUNTIME_PIXI=1
+fi
+pixi_project_dir="${SAMPLEWORKS_PIXI_PROJECT_DIR:-}"
+if [[ -z "$pixi_project_dir" ]]; then
+    if ! pixi_inputs_match_image /app "$repo_root" && truthy_env SAMPLEWORKS_ALLOW_RUNTIME_PIXI; then
+        pixi_project_dir="$repo_root"
+    elif [[ -f /app/pyproject.toml && -d /app/.pixi ]]; then
+        pixi_project_dir="/app"
+    else
+        pixi_project_dir="$repo_root"
+    fi
+fi
+export SAMPLEWORKS_PIXI_PROJECT_DIR="$pixi_project_dir"
+
+if ! pixi_inputs_match_image /app "$repo_root"; then
+    if truthy_env SAMPLEWORKS_ALLOW_RUNTIME_PIXI; then
+        cat >&2 <<EOF
+Synced pyproject.toml or pixi.lock differs from the baked image. Runtime pixi
+updates are enabled, so using the synced checkout as the pixi project:
+  $repo_root
+EOF
+        export SAMPLEWORKS_REQUIRE_PREBUILT_PIXI="${SAMPLEWORKS_REQUIRE_PREBUILT_PIXI:-0}"
+        export SAMPLEWORKS_SKIP_ENV_PREPARE="${SAMPLEWORKS_SKIP_ENV_PREPARE:-0}"
+    else
+        cat >&2 <<EOF
+Synced pyproject.toml or pixi.lock differs from the baked pixi-with-checkpoints image.
+
+Rebuild/use an image produced from this checkout, or intentionally update pixi
+inside this pod by running with:
+
+  RUNTIME_PIXI=1 run_experiments ...
+
+Runtime pixi updates can be slow and may rebuild CUDA packages, so they are
+disabled by default for reproducible scientist runs.
+EOF
+        exit 2
+    fi
+else
+    # The ACTL image is expected to provide ready-to-use pixi envs under /app/.pixi.
+    # Do not let sampleworks.runs.runner call `pixi run` just to "prepare" envs;
+    # that can reinstall the CUDA stack inside the pod or spend a long time
+    # refreshing caches. Use RUNTIME_PIXI=1 for dependency
+    # debugging against the synced checkout.
+    export SAMPLEWORKS_REQUIRE_PREBUILT_PIXI="${SAMPLEWORKS_REQUIRE_PREBUILT_PIXI:-1}"
+    export SAMPLEWORKS_SKIP_ENV_PREPARE="${SAMPLEWORKS_SKIP_ENV_PREPARE:-1}"
+fi
+runner_python="${SAMPLEWORKS_RUNNER_PYTHON:-$pixi_project_dir/.pixi/envs/$runner_env/bin/python}"
+
+extra_cli_args=()
+if [[ $# -eq 0 && -n "$env_preset" ]]; then
+    extra_cli_args=(--preset "$env_preset")
+fi
+
+display_target="${target:-${explicit_preset:-$default_target}}"
+if [[ -n "$explicit_jobs" ]]; then
+    display_target="$display_target --jobs $explicit_jobs"
+fi
+
+if [[ "$needs_runtime_paths" -eq 1 ]]; then
+    if [[ ! -f "${PROTEINS_CSV:-$source_proteins_csv}" ]]; then
+        cat >&2 <<EOF
+Sampleworks input dataset was not found.
+
+Expected: $source_proteins_csv
+
+On an ACTL Sampleworks pod, make sure the diffuse-shared PVC is mounted at
+/mnt/diffuse-shared, or override the dataset path, for example:
+
+  DATA_DIR=/mnt/diffuse-shared/raw/sampleworks/<dataset> ./run_experiments
+
+EOF
+        exit 2
+    fi
+    mkdir -p "$RESULTS_DIR" "$MSA_CACHE_DIR"
+fi
+
+cat >&2 <<EOF
+Sampleworks preset run
+  target:        $display_target
+  data:          $DATA_DIR
+  results:       $RESULTS_DIR
+  msa cache:     $MSA_CACHE_DIR
+  source:        $repo_root
+  pixi project:  $pixi_project_dir
+  runner env:    $runner_env
+  runner python: $runner_python
+
+EOF
+
+if [[ -x "$runner_python" ]]; then
+    runner_env_dir="$(cd -- "$(dirname -- "$runner_python")/.." && pwd)"
+    export PATH="$runner_env_dir/bin${PATH:+:$PATH}"
+    export CONDA_PREFIX="$runner_env_dir"
+    export CUDA_HOME="${CUDA_HOME:-$runner_env_dir}"
+    export PYTHONNOUSERSITE=1
+    cd "$repo_root"
+    if [[ "${#extra_cli_args[@]}" -gt 0 ]]; then
+        exec "$runner_python" -m sampleworks.runs.cli \
+            --results-dir "$RESULTS_DIR" \
+            "${extra_cli_args[@]}" \
+            "$@"
+    fi
+    exec "$runner_python" -m sampleworks.runs.cli \
+        --results-dir "$RESULTS_DIR" \
+        "$@"
+fi
+
+if ! truthy_env SAMPLEWORKS_ALLOW_RUNTIME_PIXI; then
+    cat >&2 <<EOF
+Prebuilt runner pixi environment is missing: $runner_python
+
+run_experiments is for the ACTL pixi-with-checkpoints image, which must contain
+ready-to-use environments under /app/.pixi. Refusing to run 'pixi run' because
+that would install or refresh packages inside the pod.
+
+Recreate the pod with the current pixi-with-checkpoints image. If you are
+intentionally debugging runtime pixi setup, set RUNTIME_PIXI=1.
+EOF
+    exit 2
+fi
+
+cd "$pixi_project_dir"
+if [[ "${#extra_cli_args[@]}" -gt 0 ]]; then
+    exec pixi run -e "$runner_env" python -m sampleworks.runs.cli \
+        --results-dir "$RESULTS_DIR" \
+        "${extra_cli_args[@]}" \
+        "$@"
+fi
+exec pixi run -e "$runner_env" python -m sampleworks.runs.cli \
+    --results-dir "$RESULTS_DIR" \
+    "$@"
diff --git a/run_experiments.sh b/run_experiments.sh
new file mode 100755
index 00000000..f4feba11
--- /dev/null
+++ b/run_experiments.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+# Backward-compatible alias for the extensionless run_experiments command.
+set -euo pipefail
+
+script_path="${BASH_SOURCE[0]}"
+while [[ -L "$script_path" ]]; do
+    script_dir="$(cd -- "$(dirname -- "$script_path")" && pwd)"
+    script_target="$(readlink "$script_path")"
+    if [[ "$script_target" == /* ]]; then
+        script_path="$script_target"
+    else
+        script_path="$script_dir/$script_target"
+    fi
+done
+script_dir="$(cd -- "$(dirname -- "$script_path")" && pwd)"
+
+exec "$script_dir/run_experiments" "$@"
diff --git a/run_grid_search.py b/run_grid_search.py
index 66ed9c8a..3301a4c7 100755
--- a/run_grid_search.py
+++ b/run_grid_search.py
@@ -25,6 +25,8 @@
 
 @dataclass
 class GridSearchConfig:
+    """Serializable summary of the grid-search dimensions and output location."""
+
     model: str
     scalers: list[str]
     ensemble_sizes: list[int]
@@ -66,24 +68,99 @@ def get_job_status(job: JobConfig) -> str:
         return "failed"
 
 
-def detect_gpus() -> list[str]:
-    cuda_visible = os.environ.get("CUDA_VISIBLE_DEVICES", "")
-    if cuda_visible:
-        return [g.strip() for g in cuda_visible.split(",") if g.strip()]
+def _gpu_indices_from_torch() -> list[str] | None:
+    """Return visible CUDA ordinals using PyTorch when it is importable.
+
+    Returns
+    -------
+    list of str or None
+        Visible local CUDA ordinals. ``None`` means PyTorch is unavailable or
+        CUDA discovery failed before returning a device count.
+    """
+    try:
+        import torch
+    except ImportError:
+        return None
+
+    try:
+        if not torch.cuda.is_available():
+            return []
+        return [str(i) for i in range(torch.cuda.device_count())]
+    except Exception as exc:
+        log.debug(f"PyTorch CUDA discovery failed: {exc}")
+        return None
+
+
+def _gpu_indices_from_nvidia_smi() -> list[str] | None:
+    """Return visible CUDA ordinals using ``nvidia-smi`` as a fallback.
+
+    Returns
+    -------
+    list of str or None
+        GPU ordinals reported by ``nvidia-smi``. ``None`` means the command is
+        absent or failed.
+    """
     try:
         result = subprocess.run(
             ["nvidia-smi", "--query-gpu=index", "--format=csv,noheader"],
             capture_output=True,
             text=True,
         )
-        if result.returncode == 0:
-            return [g.strip() for g in result.stdout.strip().split("\n") if g.strip()]
     except FileNotFoundError:
-        pass
+        return None
+    if result.returncode != 0:
+        return None
+    return [g.strip() for g in result.stdout.strip().split("\n") if g.strip()]
+
+
+def _discover_gpu_indices() -> list[str] | None:
+    """Return visible CUDA ordinals from Python first, then ``nvidia-smi``.
+
+    Returns
+    -------
+    list of str or None
+        Visible GPU ordinals, or ``None`` when discovery is unavailable.
+    """
+    torch_indices = _gpu_indices_from_torch()
+    if torch_indices is not None:
+        return torch_indices
+    return _gpu_indices_from_nvidia_smi()
+
+
+def detect_gpus() -> list[str]:
+    """Return CUDA GPU identifiers visible to this grid-search process.
+
+    ``CUDA_VISIBLE_DEVICES`` wins when set because CUDA remaps those entries to
+    local process ordinals. Explicit CUDA "no device" sentinel values return an
+    empty list. Otherwise, ``nvidia-smi`` is used as a best-effort discovery
+    mechanism and ``["0"]`` is returned as a CPU/test fallback.
+    """
+    cuda_visible = os.environ.get("CUDA_VISIBLE_DEVICES", "").strip()
+    cuda_visible_key = cuda_visible.lower()
+    if cuda_visible_key in {"none", "void", "nodevfiles"}:
+        return []
+    if cuda_visible_key == "all":
+        return _discover_gpu_indices() or ["0"]
+    if cuda_visible and cuda_visible_key != "all":
+        gpus = [g.strip() for g in cuda_visible.split(",") if g.strip()]
+        visible = _discover_gpu_indices()
+        if visible and all(g.isdigit() for g in gpus + visible):
+            missing = sorted(set(gpus).difference(visible), key=int)
+            if missing:
+                raise ValueError(
+                    "CUDA_VISIBLE_DEVICES references GPUs that are not visible "
+                    f"in this container: {missing}. Visible GPUs: {visible}. "
+                    "Check the preset jobs.*.gpus values for this pod size."
+                )
+        return gpus
+    discovered = _discover_gpu_indices()
+    if discovered is not None:
+        return discovered
     return ["0"]
 
 
 def get_pixi_env(model: str) -> str:
+    """Return the pixi environment name needed to run a model family."""
     if model in (StructurePredictor.BOLTZ_1, StructurePredictor.BOLTZ_2):
         return "boltz"
     elif model == StructurePredictor.PROTENIX:
@@ -98,6 +175,7 @@ def get_pixi_env(model: str) -> str:
 def build_args_for_process_pool(
     job: JobConfig, args: argparse.Namespace, device_num: int | None = None
 ) -> GuidanceConfig:
+    """Convert a grid-search job into the picklable guidance config for a worker."""
     guidance_config = GuidanceConfig(
         protein=job.protein,
         structure=job.structure_path,
@@ -190,7 +268,11 @@ def run_grid_search(
         for worker_num, job_queue_path in enumerate(job_queue_paths):
             model = worker_job_queues[worker_num][0].model
             future = executor.submit(
-                run_guidance_queue_script, (job_queue_path, max_workers, model, worker_num)
+                run_guidance_queue_script,
+                job_queue_path,
+                model,
+                worker_num,
+                gpus,
             )
             futures[future] = job_queue_path
 
@@ -219,20 +301,127 @@ def run_grid_search(
     return results
 
 
-def run_guidance_queue_script(args: tuple[str, int, str, int]):
-    job_queue_path, max_workers, model, worker_num = args
-    pixi_env = get_pixi_env(model)
+def run_guidance_queue_script(
+    job_queue_path: str,
+    model: str,
+    worker_num: int,
+    gpus: list[str],
+) -> subprocess.CompletedProcess[Any]:
+    """Run one pickled guidance job queue in the model's pixi environment.
+
+    Parameters
+    ----------
+    job_queue_path : str
+        Pickled queue of guidance jobs assigned to this worker.
+    model : str
+        Structure predictor name used to select the pixi environment.
+    worker_num : int
+        Zero-based worker index. This determines the local CUDA ordinal.
+    gpus : list of str
+        Selected GPU entries. CUDA remaps entries such as ``4,5`` to local
+        process indices ``0,1``.
+
+    Returns
+    -------
+    subprocess.CompletedProcess
+        Result from the subprocess that ran the worker queue.
+    """
+    pixi_env_name = get_pixi_env(model)
     script_path = Path(__file__).parent / "scripts" / "run_guidance_pipeline.py"
-    cmd = f"pixi run -e {pixi_env} python {script_path} --job-queue-path {job_queue_path}"
-    cmd = cmd.split()
-    log.info(f"Running worker {worker_num}: {cmd} on GPU {worker_num % max_workers}")
-    # env = os.environ.copy()
+    env_python = get_pixi_env_python(pixi_env_name)
+    if env_python:
+        cmd = [env_python, str(script_path), "--job-queue-path", job_queue_path]
+        process_env = get_pixi_env_process_env(env_python)
+    else:
+        cmd = [
+            "pixi",
+            "run",
+            "-e",
+            pixi_env_name,
+            "python",
+            str(script_path),
+            "--job-queue-path",
+            job_queue_path,
+        ]
+        process_env = os.environ.copy()
+    local_gpu = worker_num % len(gpus)
+    requested_gpu = gpus[local_gpu]
+    if os.environ.get("CUDA_VISIBLE_DEVICES"):
+        gpu_source = "CUDA_VISIBLE_DEVICES"
+    else:
+        gpu_source = "GPU detection"
+    log.info(
+        f"Running worker {worker_num}: {cmd} on local CUDA GPU {local_gpu} "
+        f"(selected GPU {requested_gpu} via {gpu_source})"
+    )
 
     with open(job_queue_path.replace(".pkl", ".log"), "w") as log_file:
-        result = subprocess.run(cmd, stdout=log_file, stderr=subprocess.STDOUT)
+        result = subprocess.run(
+            cmd,
+            stdout=log_file,
+            stderr=subprocess.STDOUT,
+            env=process_env,
+        )
     return result
 
 
+def get_pixi_env_process_env(env_python: str) -> dict[str, str]:
+    """Return process environment values for a direct pixi Python executable.
+
+    Parameters
+    ----------
+    env_python : str
+        Python executable under ``.pixi/envs/<env>/bin/python``.
+
+    Returns
+    -------
+    dict of str to str
+        Environment with the env's ``bin`` directory, ``CONDA_PREFIX``, and
+        ``CUDA_HOME`` set so compiled extensions can find tools such as
+        ``ninja`` and the CUDA toolkit without going through ``pixi run``.
+    """
+    env_dir = Path(env_python).resolve().parent.parent
+    bin_dir = env_dir / "bin"
+    env = os.environ.copy()
+    env["PATH"] = f"{bin_dir}{os.pathsep}{env.get('PATH', '')}"
+    env["CONDA_PREFIX"] = str(env_dir)
+    env.setdefault("CUDA_HOME", str(env_dir))
+    env["PYTHONNOUSERSITE"] = "1"
+    return env
+
+
+def get_pixi_env_python(pixi_env: str) -> str | None:
+    """Return a direct Python binary for a preinstalled pixi environment.
+
+    The ACTL sampleworks image bakes environments under ``/app/.pixi``. Using
+    those interpreters directly avoids a runtime ``pixi run`` cache refresh on
+    shared storage. Set ``SAMPLEWORKS_FORCE_PIXI=1`` to force the old behavior.
+
+    Parameters
+    ----------
+    pixi_env : str
+        Pixi environment name such as ``boltz``, ``protenix``, or ``rf3``.
+
+    Returns
+    -------
+    str or None
+        Path to the environment's Python executable, or ``None`` to use pixi.
+    """
+    if os.environ.get("SAMPLEWORKS_FORCE_PIXI", "").lower() in {"1", "true", "yes"}:
+        return None
+
+    env_key = pixi_env.upper().replace("-", "_")
+    override = os.environ.get(f"SAMPLEWORKS_{env_key}_PYTHON")
+    if override:
+        return override
+
+    pixi_project_dir = Path(os.environ.get("SAMPLEWORKS_PIXI_PROJECT_DIR", "/app"))
+    candidate = pixi_project_dir / ".pixi" / "envs" / pixi_env / "bin" / "python"
+    if candidate.is_file() and os.access(candidate, os.X_OK):
+        return str(candidate)
+    return None
+
+
 def main(args: argparse.Namespace):
     """
     Main pipeline for running grid search trials.
@@ -243,6 +432,10 @@ def main(args: argparse.Namespace):
     log.info(f"Detected {len(gpus)} GPUs: {gpus}")
     if args.max_parallel != "auto":
         gpus = gpus[: int(args.max_parallel)]
+    if not gpus:
+        raise ValueError(
+            "No CUDA GPUs are visible; unset CUDA_VISIBLE_DEVICES=none or use a GPU pod"
+        )
 
     log_args(args, gpus)
 
@@ -276,6 +469,7 @@ def main(args: argparse.Namespace):
 
 
 def generate_jobs(args: argparse.Namespace) -> list[JobConfig]:
+    """Expand CLI grid dimensions into concrete per-protein guidance jobs."""
     jobs = []
 
     proteins = ProteinInput.from_csv(Path(args.proteins))
@@ -361,6 +555,7 @@ def save_results(
     output_dir: str,
     total_time: float,
 ):
+    """Merge the latest job results into ``results.json`` under ``output_dir``."""
     os.makedirs(output_dir, exist_ok=True)
     results_path = os.path.join(output_dir, "results.json")
 
@@ -419,6 +614,7 @@ def save_results(
 
 
 def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments for one model-specific grid search."""
     parser = argparse.ArgumentParser(
         description="Run grid search across scalers, and parameters for a single "
         "protein structure predictor model."
@@ -555,6 +751,7 @@ def parse_args() -> argparse.Namespace:
 
 
 def log_args(args: argparse.Namespace, gpus: list[str]):
+    """Log the resolved grid-search configuration before jobs are generated."""
     log.info("=" * 50)
     log.info("Starting grid search")
     log.info(f"Model: {args.model}")
@@ -576,6 +773,7 @@ def log_args(args: argparse.Namespace, gpus: list[str]):
 # TODO make job statuses a proper class
 # TODO: there are many constants here like "not_run" that should be defined in only one place.
 def generate_and_filter_jobs(args: argparse.Namespace) -> tuple[list[JobConfig], dict[Any, Any]]:
+    """Generate jobs and filter them according to prior status and rerun flags."""
     jobs = generate_jobs(args)
     log.info(f"Generated {len(jobs)} total jobs")
 
diff --git a/src/sampleworks/models/boltz/wrapper.py b/src/sampleworks/models/boltz/wrapper.py
index c257511e..d75b6ab5 100644
--- a/src/sampleworks/models/boltz/wrapper.py
+++ b/src/sampleworks/models/boltz/wrapper.py
@@ -320,7 +320,7 @@ class BoltzConfig:
     """
 
     out_dir: str | Path | None = None
-    num_workers: int = 8
+    num_workers: int = 0
     ensemble_size: int = 1
     recycling_steps: int = 3
 
@@ -329,7 +329,7 @@ def process_structure_for_boltz(
     structure: dict,
     *,
     out_dir: str | Path | None = None,
-    num_workers: int = 8,
+    num_workers: int = 0,
     ensemble_size: int = 1,
     recycling_steps: int | None = 3,
 ) -> dict:
@@ -360,6 +360,13 @@ def process_structure_for_boltz(
     if recycling_steps is None:
         recycling_steps = 3
 
+    # Keep Boltz dataloading in-process by default. Kubernetes pods usually get
+    # a small /dev/shm, and torch DataLoader workers can exhaust it while
+    # sharing large featurized batches back to the parent process. This is
+    # Boltz-specific because the Protenix/RF3 wrappers do not expose an
+    # equivalent preprocessing worker pool here; callers can still pass
+    # ``num_workers`` explicitly when profiling shows that multiprocessing is
+    # worth the shared-memory tradeoff.
     config = BoltzConfig(
         out_dir=out_dir or structure.get("metadata", {}).get("id", "boltz_output"),
         num_workers=num_workers,
@@ -567,7 +574,7 @@ def _setup_data_module(
         self,
         input_path: str | Path,
         out_dir: str | Path,
-        num_workers: int = 8,
+        num_workers: int = 0,
     ):
         """Create the Lightning data module used by Boltz to serve data to the model.
 
@@ -628,7 +635,7 @@ def _setup_data_module(
             target_dir=processed.targets_dir,
             msa_dir=processed.msa_dir,
             mol_dir=mol_dir,
-            num_workers=num_workers if num_workers is not None else 8,
+            num_workers=num_workers if num_workers is not None else 0,
             constraints_dir=processed.constraints_dir,
             template_dir=processed_dir / "templates"
             if (processed_dir / "templates").exists()
@@ -1032,7 +1039,7 @@ def _setup_data_module(
         self,
         input_path: str | Path,
         out_dir: str | Path,
-        num_workers: int = 2,
+        num_workers: int = 0,
     ):
         """Create the Lightning data module used by Boltz to serve data to the model.
 
@@ -1090,7 +1097,7 @@ def _setup_data_module(
             manifest=processed.manifest,
             target_dir=processed.targets_dir,
             msa_dir=processed.msa_dir,
-            num_workers=num_workers if num_workers is not None else 2,
+            num_workers=num_workers if num_workers is not None else 0,
             constraints_dir=processed.constraints_dir,
         )
 
diff --git a/src/sampleworks/runs/__init__.py b/src/sampleworks/runs/__init__.py
new file mode 100644
index 00000000..8bb071ac
--- /dev/null
+++ b/src/sampleworks/runs/__init__.py
@@ -0,0 +1,5 @@
+"""Preset-driven orchestrator for parallel run_grid_search.py invocations.
+
+Replaces the previous ACTL-native bash wrapper scripts with TOML presets +
+a thin Python runner. See ``sampleworks-runs --help``.
+"""
diff --git a/src/sampleworks/runs/cli.py b/src/sampleworks/runs/cli.py
new file mode 100644
index 00000000..008d3f99
--- /dev/null
+++ b/src/sampleworks/runs/cli.py
@@ -0,0 +1,263 @@
+"""Command-line entry point for ``sampleworks-runs``."""
+
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+from . import loader, runner
+from .schema import Preset
+
+
+DEFAULT_PRESET = "full_8gpu"
+DEFAULT_PRESET_ALIASES = frozenset({"all", "full", "full_8gpu"})
+
+
+def main(argv: list[str] | None = None) -> int:
+    """Entry point for the ``sampleworks-runs`` console script.
+
+    Parameters
+    ----------
+    argv : list of str or None, optional
+        Command-line arguments excluding the program name. When ``None``
+        (the default), :mod:`argparse` reads from :data:`sys.argv`.
+
+    Returns
+    -------
+    int
+        Exit code suitable for ``sys.exit``: ``0`` on success, non-zero on
+        job failure or fatal CLI error.
+    """
+    parser = _build_parser()
+    args = parser.parse_args(argv)
+
+    if args.list:
+        for name in loader.list_presets():
+            print(name)
+        return 0
+
+    preset_name, job_filter = _resolve_target(args.target, args.preset, args.jobs, parser)
+    preset = loader.load_preset(preset_name, overrides=args.set)
+    if job_filter:
+        preset = _filter_jobs(preset, job_filter)
+
+    if args.show:
+        _print_show(preset)
+        return 0
+
+    results_dir = Path(args.results_dir or _default_results_dir(preset))
+    try:
+        return runner.run(preset, results_dir=results_dir, dry_run=args.dry_run)
+    except RuntimeError as exc:
+        print(f"error: {exc}", file=sys.stderr)
+        return 2
+
+
+def _build_parser() -> argparse.ArgumentParser:
+    """Construct the :mod:`argparse` parser for ``sampleworks-runs``.
+
+    Returns
+    -------
+    argparse.ArgumentParser
+        Parser covering preset selection, overrides, and execution flags.
+    """
+    parser = argparse.ArgumentParser(
+        prog="sampleworks-runs",
+        description=(
+            "Run Sampleworks experiment presets. With no target, runs the "
+            "full_8gpu preset. A target like 'rf3', 'boltz', or 'protenix' "
+            "runs that preset; comma-separated targets like 'rf3,protenix' "
+            "select jobs from full_8gpu."
+        ),
+    )
+    parser.add_argument(
+        "target",
+        nargs="?",
+        help=(
+            "Preset name from experiments/ (rf3, boltz, protenix, etc.), "
+            "comma-separated job shortcut from full_8gpu, or 'full'/'full_8gpu'."
+        ),
+    )
+    parser.add_argument(
+        "--preset",
+        default="",
+        help="Preset name from experiments/ or path to a .toml file. Default: full_8gpu.",
+    )
+    parser.add_argument(
+        "--list",
+        action="store_true",
+        help="List experiments/*.toml presets and exit",
+    )
+    parser.add_argument("--show", action="store_true", help="Print the resolved preset and exit")
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print the resolved job commands instead of executing them",
+    )
+    parser.add_argument(
+        "--jobs",
+        default="",
+        help="Comma-separated job names to run from the selected preset. Default: all jobs.",
+    )
+    parser.add_argument(
+        "--set",
+        action="append",
+        default=[],
+        metavar="DOTTED_KEY=VALUE",
+        help=(
+            "Override a value in the loaded preset. Examples: "
+            "--set defaults.DATA_DIR=/data/foo, "
+            "--set jobs.rf3.args.gradient-weights='0.0 0.01', "
+            "--set jobs.0.gpu_count=4"
+        ),
+    )
+    parser.add_argument(
+        "--results-dir",
+        default=None,
+        help="Override RESULTS_DIR for this run (also controls per-job log location).",
+    )
+    return parser
+
+
+def _resolve_target(
+    target: str | None,
+    preset: str,
+    jobs: str,
+    parser: argparse.ArgumentParser,
+) -> tuple[str, str]:
+    """Resolve the user-facing target grammar into preset plus job filter.
+
+    Parameters
+    ----------
+    target : str or None
+        Optional positional target. Without ``--preset`` this is either a
+        default preset alias (``full``/``full_8gpu``/``all``) or a job selector
+        from :data:`DEFAULT_PRESET`. With ``--preset`` it is a shorthand job
+        selector for that explicit preset.
+    preset : str
+        Explicit preset name/path from ``--preset``.
+    jobs : str
+        Explicit comma-separated job selector from ``--jobs``.
+    parser : argparse.ArgumentParser
+        Parser used to report grammar errors.
+
+    Returns
+    -------
+    tuple of str, str
+        ``(preset_name_or_path, comma_separated_job_filter)``.
+    """
+    if preset:
+        if target and jobs:
+            parser.error("pass jobs either as the positional target or with --jobs, not both")
+        return preset, jobs or target or ""
+
+    if target is None or target in DEFAULT_PRESET_ALIASES:
+        return DEFAULT_PRESET, jobs
+
+    if jobs:
+        parser.error("pass jobs either as the positional target or with --jobs, not both")
+
+    if target.endswith(".toml") or "/" in target:
+        parser.error("pass custom preset paths with --preset path/to/preset.toml")
+
+    if "," not in target and target in loader.list_presets():
+        return target, ""
+
+    return DEFAULT_PRESET, target
+
+
+def _filter_jobs(preset: Preset, jobs: str) -> Preset:
+    """Return a new :class:`Preset` containing only the named jobs.
+
+    Parameters
+    ----------
+    preset : Preset
+        Source preset.
+    jobs : str
+        Comma-separated list of job names to keep.
+
+    Returns
+    -------
+    Preset
+        New preset with the same ``description``, ``defaults``, and
+        ``shared_args`` and only the filtered jobs.
+
+    Raises
+    ------
+    SystemExit
+        If any name in ``jobs`` does not match a job in ``preset``.
+    """
+    names = [n.strip() for n in jobs.split(",") if n.strip()]
+    keep = [j for j in preset.jobs if j.name in names]
+    missing = set(names) - {j.name for j in keep}
+    if missing:
+        raise SystemExit(f"job selector references unknown jobs: {sorted(missing)}")
+    description = f"Subset of {preset.name}: {', '.join(names)}"
+    name = f"{preset.name}:{','.join(names)}"
+    return Preset(
+        name=name,
+        description=description,
+        defaults=preset.defaults,
+        shared_args=preset.shared_args,
+        jobs=keep,
+    )
+
+
+def _print_show(preset: Preset) -> None:
+    """Print a human-readable rendering of a resolved preset to stdout.
+
+    Parameters
+    ----------
+    preset : Preset
+        Resolved preset to display (used by ``--show``).
+    """
+    print(f"name: {preset.name}")
+    if preset.description:
+        print(f"description: {preset.description}")
+    if preset.defaults:
+        print("defaults:")
+        for k, v in preset.defaults.items():
+            print(f"  {k} = {v}")
+    print("jobs:")
+    for j in preset.jobs:
+        print(f"  - name: {j.name}")
+        print(f"    env: {j.env}")
+        if j.gpus:
+            print(f"    gpus: {j.gpus}")
+        else:
+            print(f"    gpu_count: {j.gpu_count}")
+        print(f"    output_subdir: {j.output_subdir}")
+        print("    args:")
+        for k, v in j.args.items():
+            print(f"      {k} = {v!r}")
+
+
+def _default_results_dir(preset: Preset) -> str:
+    """Pick a sensible default ``--results-dir`` when none is given.
+
+    Order of preference:
+      1. The preset's ``[defaults]`` ``RESULTS_DIR``.
+      2. The ``RESULTS_DIR`` environment variable.
+      3. ``./grid_search_results``.
+
+    Parameters
+    ----------
+    preset : Preset
+        Resolved preset (its ``defaults`` have already been merged with env).
+
+    Returns
+    -------
+    str
+        Path to use as the run's root output directory.
+    """
+    return (
+        preset.defaults.get("RESULTS_DIR")
+        or os.environ.get("RESULTS_DIR")
+        or "./grid_search_results"
+    )
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/src/sampleworks/runs/loader.py b/src/sampleworks/runs/loader.py
new file mode 100644
index 00000000..fcec6441
--- /dev/null
+++ b/src/sampleworks/runs/loader.py
@@ -0,0 +1,526 @@
+"""Load presets from TOML and apply runtime overrides.
+
+Resolution order for every string value (defaults block and ``args``):
+  1. ``--set <dotted-path>=<value>`` CLI overrides are applied to the raw TOML
+     dict by :func:`load_preset`, so overridden values participate in
+     interpolation.
+  2. ``${VAR}`` references are resolved against the process environment,
+     with the preset's ``[defaults]`` block filling in any unset keys.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import tomllib
+from collections.abc import Iterable
+from pathlib import Path
+from typing import Any
+
+from .schema import Job, Preset
+
+
+_EXPERIMENTS_DIR_NAME = "experiments"
+_MAX_EXPAND_ITERATIONS = 32
+_VAR_PATTERN = re.compile(r"\$\{([A-Za-z_][A-Za-z0-9_]*)\}")
+_TOP_LEVEL_KEYS = frozenset({"description", "defaults", "shared_args", "jobs"})
+
+
+def list_presets() -> list[str]:
+    """List experiment preset names from the top-level ``experiments`` directory.
+
+    Returns
+    -------
+    list of str
+        Preset names (filename stems, no ``.toml`` extension), sorted
+        alphabetically. If multiple experiment directories are visible, the
+        first directory in the resolution order wins for duplicate names.
+    """
+    names: dict[str, Path] = {}
+    for directory in _experiment_dirs():
+        if not directory.is_dir():
+            continue
+        for path in directory.iterdir():
+            if path.is_file() and path.suffix == ".toml":
+                names.setdefault(path.stem, path)
+    return sorted(names)
+
+
+def load_preset(name_or_path: str, *, overrides: Iterable[str] = ()) -> Preset:
+    """Load a preset by experiment name or filesystem path.
+
+    Parameters
+    ----------
+    name_or_path : str
+        Either the name of a preset in the top-level ``experiments`` directory
+        (as returned by :func:`list_presets`) or a path ending in ``.toml``.
+    overrides : Iterable of str, optional
+        ``KEY=VALUE`` strings as accepted by ``--set``. Applied before
+        variable interpolation.
+
+    Returns
+    -------
+    Preset
+        Fully resolved preset ready for :func:`runner.run`.
+
+    Raises
+    ------
+    FileNotFoundError
+        If ``name_or_path`` matches no experiment preset and no file on disk.
+    KeyError
+        If an override path begins with an unknown top-level key, or if a
+        ``${VAR}`` reference cannot be resolved against the environment or
+        the ``[defaults]`` block.
+    ValueError
+        If an override is malformed (missing ``=``).
+    """
+    raw = _read_toml(name_or_path)
+    overrides_list = list(overrides)
+    raw = _apply_overrides(raw, overrides_list)
+    raw = _resolve_variables(raw)
+    return _build_preset(name=_preset_name(name_or_path), raw=raw)
+
+
+def _read_toml(name_or_path: str) -> dict[str, Any]:
+    """Read raw TOML from a filesystem path or an experiment preset name.
+
+    Parameters
+    ----------
+    name_or_path : str
+        Experiment preset name or filesystem path ending in ``.toml``.
+
+    Returns
+    -------
+    dict of str to Any
+        Parsed TOML, before override application or interpolation.
+
+    Raises
+    ------
+    FileNotFoundError
+        If neither location yields a TOML file.
+    """
+    path = _find_preset_path(name_or_path)
+    if path is not None:
+        return tomllib.loads(path.read_text())
+    raise FileNotFoundError(
+        f"No preset {name_or_path!r}. Experiments: {list_presets()}. "
+        "Put TOML presets in ./experiments or pass a path to a .toml file."
+    )
+
+
+def _find_preset_path(name_or_path: str) -> Path | None:
+    """Resolve a preset name or path to a TOML file.
+
+    Parameters
+    ----------
+    name_or_path : str
+        Preset name (``full_8gpu``), TOML filename (``full_8gpu.toml``), or
+        filesystem path.
+
+    Returns
+    -------
+    pathlib.Path or None
+        Existing TOML path if found, otherwise ``None``.
+    """
+    path = Path(name_or_path)
+    if path.suffix == ".toml" and path.is_file():
+        return path
+
+    preset_filename = path.name if path.suffix == ".toml" else f"{name_or_path}.toml"
+    for directory in _experiment_dirs():
+        candidate = directory / preset_filename
+        if candidate.is_file():
+            return candidate
+    return None
+
+
+def _experiment_dirs() -> list[Path]:
+    """Return candidate top-level experiment directories in precedence order.
+
+    Returns
+    -------
+    list of pathlib.Path
+        Existing or candidate ``experiments`` directories. Duplicates are
+        removed while preserving order.
+    """
+    candidates: list[Path] = []
+
+    explicit = os.environ.get("SAMPLEWORKS_EXPERIMENTS_DIR")
+    if explicit:
+        candidates.append(Path(explicit))
+
+    source_dir = os.environ.get("SAMPLEWORKS_SOURCE_DIR")
+    if source_dir:
+        candidates.append(Path(source_dir) / _EXPERIMENTS_DIR_NAME)
+
+    candidates.append(Path("/home/dev/workspace") / _EXPERIMENTS_DIR_NAME)
+    candidates.extend(_find_upward_experiment_dirs(Path.cwd()))
+    candidates.extend(_find_upward_experiment_dirs(Path(__file__).resolve()))
+
+    seen: set[Path] = set()
+    unique: list[Path] = []
+    for candidate in candidates:
+        resolved = candidate.expanduser().resolve(strict=False)
+        if resolved not in seen:
+            seen.add(resolved)
+            unique.append(resolved)
+    return unique
+
+
+def _find_upward_experiment_dirs(start: Path) -> list[Path]:
+    """Search parents of ``start`` for top-level ``experiments`` directories.
+
+    Parameters
+    ----------
+    start : pathlib.Path
+        Directory or file path to begin searching from.
+
+    Returns
+    -------
+    list of pathlib.Path
+        Candidate experiment directories nearest to farthest.
+    """
+    current = start if start.is_dir() else start.parent
+    dirs: list[Path] = []
+    for parent in [current, *current.parents]:
+        candidate = parent / _EXPERIMENTS_DIR_NAME
+        if candidate.is_dir():
+            dirs.append(candidate)
+    return dirs
+
+
+def _preset_name(name_or_path: str) -> str:
+    """Return the canonical preset name for an experiment name or path argument.
+
+    Parameters
+    ----------
+    name_or_path : str
+        Either an experiment name or a path ending in ``.toml``.
+
+    Returns
+    -------
+    str
+        Filename stem if ``name_or_path`` looks like a path; otherwise the
+        argument unchanged.
+    """
+    return Path(name_or_path).stem if name_or_path.endswith(".toml") else name_or_path
+
+
+def _apply_overrides(raw: dict[str, Any], overrides: list[str]) -> dict[str, Any]:
+    """Apply each ``KEY=VALUE`` override to the raw preset dict in place.
+
+    Parameters
+    ----------
+    raw : dict of str to Any
+        Parsed TOML to mutate.
+    overrides : list of str
+        Each entry must contain exactly one ``=``.
+
+    Returns
+    -------
+    dict of str to Any
+        The same ``raw`` dict (mutated).
+
+    Raises
+    ------
+    ValueError
+        If an override is missing the ``=`` separator.
+    KeyError
+        If an override's top-level key is unknown.
+    """
+    for spec in overrides:
+        if "=" not in spec:
+            raise ValueError(f"--set expects KEY=VALUE, got {spec!r}")
+        key, value = spec.split("=", 1)
+        _set_dotted(raw, key.strip(), _coerce(value))
+    return raw
+
+
+def _set_dotted(obj: dict[str, Any], dotted: str, value: Any) -> None:
+    """Set a nested value in ``obj`` addressed by a dotted path.
+
+    Job-list elements can be addressed by job name or by integer index.
+
+    Parameters
+    ----------
+    obj : dict of str to Any
+        Root dict to mutate.
+    dotted : str
+        Dotted path, e.g. ``"jobs.rf3.args.gradient-weights"`` or
+        ``"defaults.DATA_DIR"``.
+    value : Any
+        Coerced value to write at the leaf.
+
+    Raises
+    ------
+    KeyError
+        If the first segment is not one of :data:`_TOP_LEVEL_KEYS`, or if a
+        list segment references a missing job name or index.
+    TypeError
+        If the path attempts to descend through a non-container value.
+    """
+    parts = dotted.split(".")
+    if parts[0] not in _TOP_LEVEL_KEYS:
+        raise KeyError(
+            f"--set: unknown top-level key {parts[0]!r} in {dotted!r}. "
+            f"Valid top-level keys: {sorted(_TOP_LEVEL_KEYS)}"
+        )
+    cursor: Any = obj
+    for i, part in enumerate(parts[:-1]):
+        cursor = _index(cursor, part, where=".".join(parts[: i + 1]))
+    leaf_parent = cursor
+    leaf_key = parts[-1]
+    if isinstance(leaf_parent, list):
+        leaf_parent[_find_in_list(leaf_parent, leaf_key, where=dotted)] = value
+    else:
+        leaf_parent[leaf_key] = value
+        if parts[0] == "jobs" and len(parts) == 3 and leaf_key == "gpus":
+            leaf_parent.pop("gpu_count", None)
+        elif parts[0] == "jobs" and len(parts) == 3 and leaf_key == "gpu_count":
+            leaf_parent.pop("gpus", None)
+
+
+def _index(cursor: Any, part: str, *, where: str) -> Any:
+    """Descend one level into a dict or list, auto-creating empty intermediates.
+
+    Parameters
+    ----------
+    cursor : Any
+        Current node in the traversal.
+    part : str
+        Next segment of the dotted path.
+    where : str
+        Path so far, used in error messages.
+
+    Returns
+    -------
+    Any
+        The child node.
+
+    Raises
+    ------
+    TypeError
+        If ``cursor`` is neither a dict nor a list.
+    """
+    if isinstance(cursor, list):
+        return cursor[_find_in_list(cursor, part, where=where)]
+    if isinstance(cursor, dict):
+        if part not in cursor:
+            cursor[part] = {}
+        return cursor[part]
+    raise TypeError(f"Cannot descend into {type(cursor).__name__} at {where!r}")
+
+
+def _find_in_list(items: list[Any], key: str, *, where: str) -> int:
+    """Locate a list element by integer index or by ``name`` field.
+
+    Parameters
+    ----------
+    items : list of Any
+        List to search.
+    key : str
+        Numeric string (positive or negative index) or a name to match against
+        each element's ``"name"`` key.
+    where : str
+        Path so far, used in error messages.
+
+    Returns
+    -------
+    int
+        Index of the matching element.
+
+    Raises
+    ------
+    KeyError
+        If no element with the given name or index exists.
+    """
+    if key.isdigit() or (key.startswith("-") and key[1:].isdigit()):
+        index = int(key)
+        try:
+            items[index]
+        except IndexError:
+            raise KeyError(f"No list element at index {index} at {where!r}") from None
+        return index
+    for i, item in enumerate(items):
+        if isinstance(item, dict) and item.get("name") == key:
+            return i
+    raise KeyError(f"No list element named {key!r} at {where!r}")
+
+
+def _coerce(value: str) -> Any:
+    """Convert a string CLI override value to bool, int, float, or leave as str.
+
+    Parameters
+    ----------
+    value : str
+        Right-hand side of ``KEY=VALUE``.
+
+    Returns
+    -------
+    Any
+        ``True``/``False`` for ``"true"``/``"false"`` (case-insensitive);
+        ``int`` or ``float`` if parseable; otherwise the original string.
+    """
+    if value.lower() in ("true", "false"):
+        return value.lower() == "true"
+    try:
+        return int(value)
+    except ValueError:
+        pass
+    try:
+        return float(value)
+    except ValueError:
+        pass
+    return value
+
+
+def _resolve_variables(raw: dict[str, Any]) -> dict[str, Any]:
+    """Expand ``${VAR}`` references throughout the raw preset.
+
+    Defaults are resolved in TOML order, so later defaults can reference
+    earlier ones (e.g. ``PROTEINS_CSV = "${DATA_DIR}/proteins.csv"``). Process
+    environment variables take precedence over the ``[defaults]`` block.
+
+    Parameters
+    ----------
+    raw : dict of str to Any
+        Parsed TOML, after override application.
+
+    Returns
+    -------
+    dict of str to Any
+        New dict with all string values fully expanded and ``defaults``
+        replaced with the resolved values.
+
+    Raises
+    ------
+    KeyError
+        If any ``${VAR}`` cannot be resolved.
+    """
+    defaults: dict[str, str] = dict(raw.get("defaults", {}))
+    accumulated: dict[str, str] = dict(os.environ)
+    resolved_defaults: dict[str, str] = {}
+    for key, default_value in defaults.items():
+        if key in os.environ:
+            resolved_defaults[key] = os.environ[key]
+        else:
+            resolved_defaults[key] = _expand(default_value, accumulated)
+        accumulated[key] = resolved_defaults[key]
+    resolved = _walk(raw, accumulated)
+    resolved["defaults"] = resolved_defaults
+    return resolved
+
+
+def _walk(obj: Any, env: dict[str, str]) -> Any:
+    """Recursively expand ``${VAR}`` in every string within ``obj``.
+
+    Parameters
+    ----------
+    obj : Any
+        Arbitrary nested dict/list/scalar.
+    env : dict of str to str
+        Resolved variable map.
+
+    Returns
+    -------
+    Any
+        Structurally identical copy with strings expanded.
+    """
+    if isinstance(obj, dict):
+        return {k: _walk(v, env) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_walk(item, env) for item in obj]
+    if isinstance(obj, str):
+        return _expand(obj, env)
+    return obj
+
+
+def _expand(text: str, env: dict[str, str]) -> str:
+    """Substitute ``${VAR}`` references in ``text`` until a fixed point.
+
+    Parameters
+    ----------
+    text : str
+        String potentially containing ``${VAR}`` references.
+    env : dict of str to str
+        Variable map.
+
+    Returns
+    -------
+    str
+        Fully expanded string.
+
+    Raises
+    ------
+    KeyError
+        If a referenced variable is not in ``env``.
+    ValueError
+        If recursive variable interpolation does not converge.
+    """
+
+    def repl(match: re.Match[str]) -> str:
+        """Return the configured value for one ``${VAR}`` interpolation match."""
+        var = match.group(1)
+        if var not in env:
+            raise KeyError(f"Undefined variable ${{{var}}} in preset (no env var, no default)")
+        return env[var]
+
+    current = text
+    for _ in range(_MAX_EXPAND_ITERATIONS):
+        expanded = _VAR_PATTERN.sub(repl, current)
+        if expanded == current:
+            return expanded
+        current = expanded
+    raise ValueError(
+        f"Variable expansion did not converge for {text!r}; check for circular "
+        "${VAR} references in [defaults], environment variables, or --set overrides."
+    )
+
+
+def _build_preset(*, name: str, raw: dict[str, Any]) -> Preset:
+    """Construct a :class:`Preset` from a resolved raw dict.
+
+    Parameters
+    ----------
+    name : str
+        Preset name (assigned to :attr:`Preset.name`).
+    raw : dict of str to Any
+        Resolved TOML.
+
+    Returns
+    -------
+    Preset
+        Validated preset.
+
+    Raises
+    ------
+    ValueError
+        If ``raw['jobs']`` is not a list, or if any :class:`Job` /
+        :class:`Preset` invariant fails (see their docstrings).
+    """
+    raw_jobs = raw.get("jobs", [])
+    if not isinstance(raw_jobs, list):
+        raise ValueError(f"Preset {name!r}: 'jobs' must be a list")
+    jobs = [
+        Job(
+            name=str(j["name"]),
+            env=str(j["env"]),
+            output_subdir=str(j["output_subdir"]),
+            gpus=str(j.get("gpus", "")),
+            gpu_count=_optional_int(j.get("gpu_count")),
+            args=dict(j.get("args", {})),
+        )
+        for j in raw_jobs
+    ]
+    return Preset(
+        name=name,
+        description=str(raw.get("description", "")),
+        defaults=dict(raw.get("defaults", {})),
+        shared_args=dict(raw.get("shared_args", {})),
+        jobs=jobs,
+    )
+
+
+def _optional_int(value: Any) -> int | None:
+    """Return ``value`` as an int, preserving ``None`` for absent fields."""
+    return None if value is None else int(value)
diff --git a/src/sampleworks/runs/runner.py b/src/sampleworks/runs/runner.py
new file mode 100644
index 00000000..4f6a44d9
--- /dev/null
+++ b/src/sampleworks/runs/runner.py
@@ -0,0 +1,765 @@
+"""Build job argv and orchestrate parallel subprocess execution."""
+
+from __future__ import annotations
+
+import os
+import shlex
+import subprocess
+import sys
+import threading
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+from .schema import Job, Preset
+
+
+DEFAULT_GRID_SEARCH_SCRIPT = "/app/run_grid_search.py"
+WORKSPACE_GRID_SEARCH_SCRIPT = "/home/dev/workspace/run_grid_search.py"
+PROCESS_SHUTDOWN_TIMEOUT_SECONDS = 10
+TEE_THREAD_JOIN_TIMEOUT_SECONDS = 5
+
+
+@dataclass(frozen=True)
+class JobInvocation:
+    """The fully resolved command to launch for one job.
+
+    Parameters
+    ----------
+    job : Job
+        Originating :class:`Job` (kept for introspection in logs).
+    argv : list of str
+        Subprocess command line, preferably the baked pixi env Python followed
+        by ``run_grid_search.py``.
+    env : dict of str to str
+        Process environment, including ``CUDA_VISIBLE_DEVICES``.
+    gpus : str
+        Resolved CUDA-visible GPU assignment. For jobs that declare
+        ``gpu_count``, this is the concrete auto-assigned GPU list.
+    log_path : Path
+        File to tee stdout+stderr into.
+    output_dir : Path
+        Resolved ``--output-dir`` value (mkdir'd by the runner before launch
+        because ``run_grid_search.py`` assumes its existence).
+    """
+
+    job: Job
+    argv: list[str]
+    env: dict[str, str]
+    gpus: str
+    log_path: Path
+    output_dir: Path
+
+
+def build_invocations(preset: Preset, *, results_dir: Path) -> list[JobInvocation]:
+    """Build the subprocess invocation for every job in the preset.
+
+    Per-job ``args`` are merged on top of :attr:`Preset.shared_args`, with
+    ``--output-dir`` auto-injected from ``results_dir / job.output_subdir`` if
+    not already present.
+
+    Parameters
+    ----------
+    preset : Preset
+        Resolved preset to launch.
+    results_dir : Path
+        Root directory for outputs and per-job log files.
+
+    Returns
+    -------
+    list of JobInvocation
+        One :class:`JobInvocation` per job, in declaration order.
+    """
+    gpu_assignments = _resolve_gpu_assignments(preset.jobs)
+    invocations: list[JobInvocation] = []
+    for job in preset.jobs:
+        args = preset.effective_args(job)
+        args.setdefault("output-dir", str(results_dir / job.output_subdir))
+        argv = _build_argv(job.env, args)
+        gpus = gpu_assignments[job.name]
+        env = _job_env(job.env, {**os.environ, "CUDA_VISIBLE_DEVICES": gpus})
+        log_path = results_dir / f"{job.name}_run.log"
+        output_dir = Path(args["output-dir"])
+        invocations.append(
+            JobInvocation(
+                job=job,
+                argv=argv,
+                env=env,
+                gpus=gpus,
+                log_path=log_path,
+                output_dir=output_dir,
+            )
+        )
+    return invocations
+
+
+def _resolve_gpu_assignments(jobs: list[Job]) -> dict[str, str]:
+    """Resolve explicit ``gpus`` and automatic ``gpu_count`` declarations.
+
+    Explicit assignments reserve those GPU tokens. Jobs with ``gpu_count`` then
+    consume remaining visible GPU IDs in preset declaration order. When GPU
+    discovery is unavailable (for local dry-runs/tests), synthetic ordinals are
+    generated so command construction stays deterministic.
+    """
+    explicit: dict[str, str] = {job.name: job.gpus for job in jobs if job.gpus}
+    reserved = {gpu for value in explicit.values() for gpu in _split_gpu_list(value)}
+    total_auto = sum(job.gpu_count or 0 for job in jobs)
+    available = _detect_available_gpus()
+    if available:
+        pool = [gpu for gpu in available if gpu not in reserved]
+        if len(pool) < total_auto:
+            raise RuntimeError(
+                "Not enough visible GPUs for preset auto-assignment. "
+                f"Visible GPUs: {available}. Reserved GPUs: {sorted(reserved)}. "
+                f"Auto-requested GPUs: {total_auto}."
+            )
+    elif _cuda_visible_devices_disables_gpus() and total_auto:
+        raise RuntimeError(
+            "CUDA_VISIBLE_DEVICES disables GPU access, so gpu_count auto-assignment "
+            "cannot allocate any GPUs."
+        )
+    else:
+        pool = _synthetic_gpu_pool(reserved, total_auto)
+
+    assignments: dict[str, str] = {}
+    cursor = 0
+    for job in jobs:
+        if job.gpus:
+            assignments[job.name] = job.gpus
+            continue
+        count = job.gpu_count or 0
+        assigned = pool[cursor : cursor + count]
+        cursor += count
+        assignments[job.name] = ",".join(assigned)
+    return assignments
+
+
+def _synthetic_gpu_pool(reserved: set[str], count: int) -> list[str]:
+    """Return deterministic CUDA ordinals when real GPU discovery is unavailable."""
+    pool: list[str] = []
+    candidate = 0
+    while len(pool) < count:
+        token = str(candidate)
+        if token not in reserved:
+            pool.append(token)
+        candidate += 1
+    return pool
+
+
+def _split_gpu_list(value: str) -> list[str]:
+    """Split a comma-separated GPU assignment into normalized tokens.
+
+    Parameters
+    ----------
+    value : str
+        GPU assignment string such as ``"0,1"``.
+
+    Returns
+    -------
+    list of str
+        Non-empty stripped GPU tokens.
+    """
+    return [part.strip() for part in value.split(",") if part.strip()]
+
+
+def _all_integer_tokens(values: list[str]) -> bool:
+    """Return True when every GPU token is a CUDA ordinal.
+
+    Parameters
+    ----------
+    values : list of str
+        GPU tokens to inspect.
+
+    Returns
+    -------
+    bool
+        True if all tokens are non-negative integer strings.
+    """
+    return all(value.isdigit() for value in values)
+
+
+def _detect_available_gpus() -> list[str]:
+    """Return GPU ordinals visible to the runner process.
+
+    Returns
+    -------
+    list of str
+        Visible GPU identifiers, or an empty list when GPU discovery is not
+        available. Empty means validation should be skipped.
+    """
+    cuda_visible = os.environ.get("CUDA_VISIBLE_DEVICES", "").strip()
+    cuda_visible_key = cuda_visible.lower()
+    if _cuda_visible_devices_disables_gpus():
+        return []
+    if cuda_visible and cuda_visible_key != "all":
+        return _split_gpu_list(cuda_visible)
+
+    try:
+        result = subprocess.run(
+            ["nvidia-smi", "--query-gpu=index", "--format=csv,noheader"],
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+    except FileNotFoundError:
+        return []
+    if result.returncode != 0:
+        return []
+    return [line.strip() for line in result.stdout.splitlines() if line.strip()]
+
+
+def _cuda_visible_devices_disables_gpus() -> bool:
+    """Return True when CUDA_VISIBLE_DEVICES explicitly hides all GPUs."""
+    return os.environ.get("CUDA_VISIBLE_DEVICES", "").strip().lower() in {
+        "none",
+        "void",
+        "nodevfiles",
+    }
+
+
+def _validate_gpu_assignments(invocations: list[JobInvocation]) -> None:
+    """Fail fast when a preset asks for GPUs not present in this pod.
+
+    Parameters
+    ----------
+    invocations : list of JobInvocation
+        Jobs whose ``CUDA_VISIBLE_DEVICES`` assignments should be checked.
+
+    Raises
+    ------
+    RuntimeError
+        If numeric preset assignments reference unavailable visible GPU IDs,
+        or if multiple jobs claim the same GPU without opting into
+        oversubscription.
+    """
+    available = _detect_available_gpus()
+    if not available:
+        return
+
+    requested: dict[str, list[str]] = {}
+    for inv in invocations:
+        for gpu in _split_gpu_list(inv.gpus):
+            requested.setdefault(gpu, []).append(inv.job.name)
+
+    requested_tokens = list(requested)
+    if not _all_integer_tokens(available) or not _all_integer_tokens(requested_tokens):
+        return
+
+    available_set = set(available)
+    unavailable = {gpu: names for gpu, names in requested.items() if gpu not in available_set}
+    if unavailable:
+        details = ", ".join(
+            f"GPU {gpu} requested by {', '.join(names)}"
+            for gpu, names in sorted(unavailable.items())
+        )
+        raise RuntimeError(
+            "Preset requests GPUs that are not visible in this pod. "
+            f"Visible GPUs: {', '.join(available)}. {details}. "
+            "Edit the preset's jobs.*.gpus/gpu_count values or run a smaller --jobs subset."
+        )
+
+    allow_oversubscription = os.environ.get(
+        "SAMPLEWORKS_ALLOW_GPU_OVERSUBSCRIPTION", ""
+    ).lower() in {"1", "true", "yes"}
+    duplicates = {gpu: names for gpu, names in requested.items() if len(names) > 1}
+    if duplicates and not allow_oversubscription:
+        details = ", ".join(
+            f"GPU {gpu} requested by {', '.join(names)}"
+            for gpu, names in sorted(duplicates.items())
+        )
+        raise RuntimeError(
+            "Preset assigns the same GPU to multiple jobs. "
+            f"{details}. Set SAMPLEWORKS_ALLOW_GPU_OVERSUBSCRIPTION=1 to allow this."
+        )
+
+
+def _build_argv(pixi_env: str, args: dict[str, Any]) -> list[str]:
+    """Assemble the ``pixi run`` argv list for one job's args dict.
+
+    ``True`` bools become bare flags, ``False``/``None`` are dropped, all other
+    values are stringified.
+
+    Parameters
+    ----------
+    pixi_env : str
+        Pixi environment name passed to ``-e``.
+    args : dict of str to Any
+        Flag-name to value map (kebab-case keys, no leading ``--``).
+
+    Returns
+    -------
+    list of str
+        Subprocess argv.
+    """
+    env_python = _pixi_env_python(pixi_env)
+    if env_python:
+        argv = [env_python, _grid_search_script()]
+    elif _require_prebuilt_envs():
+        raise RuntimeError(_missing_prebuilt_env_message(pixi_env))
+    else:
+        argv = ["pixi", "run", "-e", pixi_env, "python", _grid_search_script()]
+    for key, value in args.items():
+        flag = f"--{key}"
+        if isinstance(value, bool):
+            if value:
+                argv.append(flag)
+        elif value is None:
+            continue
+        else:
+            argv.extend([flag, str(value)])
+    return argv
+
+
+def _pixi_env_python(pixi_env: str) -> str | None:
+    """Return the direct Python binary for a baked pixi environment when available.
+
+    The ACTL pixi/checkpoint image already contains fully-installed environments at
+    ``/app/.pixi/envs/<env>``. Calling those Python binaries directly avoids
+    ``pixi run`` trying to refresh Git/PyPI caches on shared pod storage.
+
+    Parameters
+    ----------
+    pixi_env : str
+        Pixi environment name from the preset job.
+
+    Returns
+    -------
+    str or None
+        Executable Python path, or ``None`` to fall back to ``pixi run``.
+    """
+    if os.environ.get("SAMPLEWORKS_FORCE_PIXI", "").lower() in {"1", "true", "yes"}:
+        return None
+
+    env_key = pixi_env.upper().replace("-", "_")
+    override = os.environ.get(f"SAMPLEWORKS_{env_key}_PYTHON")
+    if override:
+        return override
+
+    candidate = _pixi_project_dir() / ".pixi" / "envs" / pixi_env / "bin" / "python"
+    if candidate.is_file() and os.access(candidate, os.X_OK):
+        return str(candidate)
+    return None
+
+
+def _truthy_env(name: str) -> bool:
+    """Return True when an environment variable is set to a truthy value.
+
+    Parameters
+    ----------
+    name : str
+        Environment variable name to inspect.
+
+    Returns
+    -------
+    bool
+        True for ``1``, ``true``, or ``yes`` values, case-insensitive.
+    """
+    return os.environ.get(name, "").lower() in {"1", "true", "yes"}
+
+
+def _require_prebuilt_envs() -> bool:
+    """Return True when runtime pixi fallback must be disabled.
+
+    Returns
+    -------
+    bool
+        True when the ACTL wrapper/image requires baked pixi environments and
+        the caller has not explicitly opted into runtime pixi installation.
+    """
+    allow_runtime_pixi = _truthy_env("RUNTIME_PIXI") or _truthy_env(
+        "SAMPLEWORKS_ALLOW_RUNTIME_PIXI"
+    )
+    return _truthy_env("SAMPLEWORKS_REQUIRE_PREBUILT_PIXI") and not allow_runtime_pixi
+
+
+def _missing_prebuilt_env_message(pixi_env: str) -> str:
+    """Build the error message for a missing baked pixi environment.
+
+    Parameters
+    ----------
+    pixi_env : str
+        Required pixi environment name.
+
+    Returns
+    -------
+    str
+        Human-readable error message explaining how to fix the pod/image.
+    """
+    expected = _pixi_project_dir() / ".pixi" / "envs" / pixi_env / "bin" / "python"
+    return (
+        f"Prebuilt pixi environment is missing for job env {pixi_env!r}: {expected}. "
+        "The pixi-with-checkpoints image must contain ready-to-use boltz, "
+        "protenix, and rf3 environments. Refusing to fall back to 'pixi run' "
+        "because that would install or refresh packages inside the pod. "
+        "Recreate the pod with the current image, or set RUNTIME_PIXI=1 only "
+        "when intentionally debugging pixi."
+    )
+
+
+def _job_env(pixi_env: str, env: dict[str, str]) -> dict[str, str]:
+    """Return an environment equivalent to activating a direct pixi env.
+
+    Parameters
+    ----------
+    pixi_env : str
+        Pixi environment name used by the job.
+    env : dict of str to str
+        Base process environment.
+
+    Returns
+    -------
+    dict of str to str
+        Environment with the pixi env's ``bin`` directory and compiler/CUDA
+        paths exposed when the job runs a direct Python binary.
+    """
+    env_python = _pixi_env_python(pixi_env)
+    if env_python is None:
+        return env
+
+    env_dir = Path(env_python).resolve().parent.parent
+    bin_dir = env_dir / "bin"
+    activated = dict(env)
+    activated["PATH"] = f"{bin_dir}{os.pathsep}{activated.get('PATH', '')}"
+    activated["CONDA_PREFIX"] = str(env_dir)
+    activated.setdefault("CUDA_HOME", str(env_dir))
+    activated["PYTHONNOUSERSITE"] = "1"
+    return activated
+
+
+def _pixi_project_dir() -> Path:
+    """Return the pixi project directory for env lookup and fallback pixi runs.
+
+    Returns
+    -------
+    Path
+        Project directory, defaulting to ``/app`` for the ACTL pixi/checkpoint
+        image or the current working directory outside that image.
+    """
+    override = os.environ.get("SAMPLEWORKS_PIXI_PROJECT_DIR")
+    if override:
+        return Path(override)
+    app = Path("/app")
+    if (app / "pyproject.toml").exists():
+        return app
+    return Path.cwd()
+
+
+def _grid_search_script() -> str:
+    """Return the ``run_grid_search.py`` path used by worker jobs.
+
+    Prefer the synced ACTL checkout when it exists; otherwise fall back to the
+    historical baked ``/app`` path or an explicit
+    ``SAMPLEWORKS_GRID_SEARCH_SCRIPT``.
+
+    Returns
+    -------
+    str
+        Path to execute with ``python`` inside each pixi environment.
+    """
+    override = os.environ.get("SAMPLEWORKS_GRID_SEARCH_SCRIPT")
+    if override:
+        return override
+    workspace_script = Path(WORKSPACE_GRID_SEARCH_SCRIPT)
+    if workspace_script.exists():
+        return str(workspace_script)
+    return DEFAULT_GRID_SEARCH_SCRIPT
+
+
+def run(preset: Preset, *, results_dir: Path, dry_run: bool = False) -> int:
+    """Launch every job in parallel and wait for completion.
+
+    Stdout+stderr from each job is teed to a per-job log file under
+    ``results_dir`` and also echoed to the driver's stderr with a ``[job_name]``
+    prefix.
+
+    Parameters
+    ----------
+    preset : Preset
+        Preset to launch.
+    results_dir : Path
+        Root directory for outputs and logs. Created if missing.
+    dry_run : bool, optional
+        If True, print the resolved commands instead of launching anything.
+
+    Returns
+    -------
+    int
+        ``0`` if all jobs exited 0 (or ``dry_run`` was set), ``1`` otherwise.
+    """
+    results_dir = results_dir.resolve()
+    results_dir.mkdir(parents=True, exist_ok=True)
+    invocations = build_invocations(preset, results_dir=results_dir)
+    _validate_gpu_assignments(invocations)
+
+    if dry_run:
+        for inv in invocations:
+            _print_dry_run(inv)
+        return 0
+
+    pixi_envs = sorted({inv.job.env for inv in invocations})
+    for pixi_env in pixi_envs:
+        _prepare_pixi_env(pixi_env)
+    invocations = build_invocations(preset, results_dir=results_dir)
+
+    _print_launch_summary(preset, invocations)
+    processes: list[_RunningJob] = []
+    try:
+        for inv in invocations:
+            processes.append(_spawn(inv))
+    except BaseException:
+        _terminate_all(processes)
+        raise
+    return _wait_all(processes)
+
+
+def _terminate_all(jobs: list[_RunningJob]) -> None:
+    """Terminate any already-launched jobs (used when a later spawn fails).
+
+    Parameters
+    ----------
+    jobs : list of _RunningJob
+        Jobs whose subprocesses should be SIGTERM'd, escalated to SIGKILL if
+        needed, and whose tee threads should be joined with bounded waits.
+    """
+    for j in jobs:
+        if j.proc.poll() is None:
+            j.proc.terminate()
+    for j in jobs:
+        try:
+            j.proc.wait(timeout=PROCESS_SHUTDOWN_TIMEOUT_SECONDS)
+        except subprocess.TimeoutExpired:
+            j.proc.kill()
+            try:
+                j.proc.wait(timeout=PROCESS_SHUTDOWN_TIMEOUT_SECONDS)
+            except subprocess.TimeoutExpired:
+                print(
+                    f"[{_ts()}] {j.inv.job.name} did not exit after SIGKILL",
+                    file=sys.stderr,
+                )
+        j.tee_thread.join(timeout=TEE_THREAD_JOIN_TIMEOUT_SECONDS)
+
+
+def _prepare_pixi_env(pixi_env: str) -> None:
+    """Prepare a pixi environment before parallel job launch.
+
+    Preparation is skipped when a baked interpreter is already available, when
+    prebuilt environments are required, or when ``SAMPLEWORKS_SKIP_ENV_PREPARE``
+    is truthy. Otherwise, ``pixi run`` is called once for the environment.
+
+    Parameters
+    ----------
+    pixi_env : str
+        Pixi environment to prepare.
+
+    Raises
+    ------
+    subprocess.CalledProcessError
+        If pixi cannot prepare the environment.
+    """
+    if _pixi_env_python(pixi_env) is not None:
+        return
+
+    if _require_prebuilt_envs():
+        raise RuntimeError(_missing_prebuilt_env_message(pixi_env))
+
+    if os.environ.get("SAMPLEWORKS_SKIP_ENV_PREPARE", "").lower() in {
+        "1",
+        "true",
+        "yes",
+    }:
+        return
+
+    env = {
+        **os.environ,
+        "PIXI_CACHE_DIR": os.environ.get("PIXI_CACHE_DIR", "/tmp/pixi-cache"),
+        "UV_CACHE_DIR": os.environ.get("UV_CACHE_DIR", "/tmp/uv-cache"),
+    }
+    cmd = ["pixi", "run", "-e", pixi_env, "python", "-c", "print('ready')"]
+    print(
+        f"[{_ts()}] preparing pixi env {pixi_env!r} with {shlex.join(cmd)}",
+        file=sys.stderr,
+    )
+    subprocess.run(cmd, cwd=str(_pixi_project_dir()), env=env, check=True)
+
+
+def _print_dry_run(inv: JobInvocation) -> None:
+    """Print the exact command for one job without launching it.
+
+    Parameters
+    ----------
+    inv : JobInvocation
+        Invocation to print.
+    """
+    print(f"# job: {inv.job.name}  (env={inv.job.env}, gpus={inv.gpus})", file=sys.stderr)
+    print(f"# log: {inv.log_path}", file=sys.stderr)
+    print(f"CUDA_VISIBLE_DEVICES={inv.gpus} {_shell_join(inv.argv)}")
+    print(file=sys.stderr)
+
+
+def _print_launch_summary(preset: Preset, invocations: list[JobInvocation]) -> None:
+    """Print a banner describing what is about to be launched.
+
+    Parameters
+    ----------
+    preset : Preset
+        Preset being launched.
+    invocations : list of JobInvocation
+        Jobs about to be spawned.
+    """
+    bar = "=" * 60
+    print(bar, file=sys.stderr)
+    print(f"preset: {preset.name}", file=sys.stderr)
+    if preset.description:
+        print(f"  {preset.description}", file=sys.stderr)
+    for inv in invocations:
+        print(
+            f"  - {inv.job.name}: env={inv.job.env}, gpus={inv.gpus}, log={inv.log_path}",
+            file=sys.stderr,
+        )
+    print(bar, file=sys.stderr)
+
+
+@dataclass(frozen=True)
+class _RunningJob:
+    """Internal handle: a spawned subprocess and its log-tee thread.
+
+    Parameters
+    ----------
+    inv : JobInvocation
+        Originating invocation.
+    proc : subprocess.Popen
+        The subprocess (PIPE'd stdout merged with stderr).
+    tee_thread : threading.Thread
+        Daemon thread copying ``proc.stdout`` to the log file and to
+        ``sys.stderr`` with a per-job prefix.
+    """
+
+    inv: JobInvocation
+    proc: subprocess.Popen[bytes]
+    tee_thread: threading.Thread
+
+
+def _spawn(inv: JobInvocation) -> _RunningJob:
+    """Start one subprocess and a thread to tee its output.
+
+    Parameters
+    ----------
+    inv : JobInvocation
+        Invocation to spawn.
+
+    Returns
+    -------
+    _RunningJob
+        Handle covering the subprocess and the tee thread.
+
+    Raises
+    ------
+    OSError
+        Propagated if the subprocess fails to start (e.g. binary missing).
+    """
+    inv.log_path.parent.mkdir(parents=True, exist_ok=True)
+    inv.output_dir.mkdir(parents=True, exist_ok=True)
+    log_file = open(inv.log_path, "wb")
+    proc: subprocess.Popen[bytes] | None = None
+    thread: threading.Thread | None = None
+    try:
+        proc = subprocess.Popen(
+            inv.argv,
+            env=inv.env,
+            cwd=str(_pixi_project_dir()),
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            bufsize=0,
+        )
+        if proc.stdout is None:
+            raise RuntimeError(f"Job {inv.job.name!r} started without a stdout pipe")
+        thread = threading.Thread(
+            target=_tee,
+            args=(inv.job.name, proc.stdout, log_file),
+            daemon=True,
+        )
+        thread.start()
+    except BaseException:
+        log_file.close()
+        if proc is not None and proc.poll() is None:
+            proc.kill()
+            try:
+                proc.wait(timeout=PROCESS_SHUTDOWN_TIMEOUT_SECONDS)
+            except subprocess.TimeoutExpired:
+                print(
+                    f"[{_ts()}] {inv.job.name} did not exit after failed spawn cleanup",
+                    file=sys.stderr,
+                )
+        raise
+    if proc is None or thread is None:
+        raise RuntimeError(f"Job {inv.job.name!r} failed to initialize")
+    print(f"[{_ts()}] launched {inv.job.name} (pid {proc.pid})", file=sys.stderr)
+    return _RunningJob(inv=inv, proc=proc, tee_thread=thread)
+
+
+def _wait_all(jobs: list[_RunningJob]) -> int:
+    """Wait for every job to exit and aggregate their exit codes.
+
+    Parameters
+    ----------
+    jobs : list of _RunningJob
+        Jobs to wait on.
+
+    Returns
+    -------
+    int
+        ``0`` if all jobs exited 0, ``1`` if any failed.
+    """
+    failures = 0
+    for j in jobs:
+        exit_code = j.proc.wait()
+        j.tee_thread.join()
+        if exit_code == 0:
+            print(f"[{_ts()}] {j.inv.job.name} succeeded", file=sys.stderr)
+        else:
+            print(f"[{_ts()}] {j.inv.job.name} FAILED (exit {exit_code})", file=sys.stderr)
+            failures += 1
+    return 0 if failures == 0 else 1
+
+
+def _tee(prefix: str, src: Any, dest: Any) -> None:
+    """Copy bytes from ``src`` to ``dest`` and to stderr with a label.
+
+    Parameters
+    ----------
+    prefix : str
+        Per-line label prepended to the stderr echo (e.g. job name).
+    src : file-like
+        Readable byte stream (typically ``Popen.stdout`` with stderr merged).
+    dest : file-like
+        Writable byte stream for the on-disk log file. Closed when ``src`` is
+        exhausted.
+    """
+    for line in iter(src.readline, b""):
+        dest.write(line)
+        dest.flush()
+        sys.stderr.write(f"[{prefix}] {line.decode('utf-8', errors='replace')}")
+        sys.stderr.flush()
+    dest.close()
+
+
+def _ts() -> str:
+    """Return the current local time as a ``YYYY-MM-DD HH:MM:SS`` string."""
+    return time.strftime("%Y-%m-%d %H:%M:%S")
+
+
+def _shell_join(argv: list[str]) -> str:
+    """Quote ``argv`` so the result can be pasted into a POSIX shell.
+
+    Parameters
+    ----------
+    argv : list of str
+        Argument vector.
+
+    Returns
+    -------
+    str
+        Single shell-quoted command line.
+    """
+    return shlex.join(argv)
diff --git a/src/sampleworks/runs/schema.py b/src/sampleworks/runs/schema.py
new file mode 100644
index 00000000..9cb99fac
--- /dev/null
+++ b/src/sampleworks/runs/schema.py
@@ -0,0 +1,151 @@
+"""Dataclasses for the preset schema.
+
+A preset describes one or more parallel ``run_grid_search.py`` jobs. Each job
+runs in its configured model environment, either through ``pixi run`` or a
+baked environment Python, with ``CUDA_VISIBLE_DEVICES`` set from an explicit
+GPU assignment or an automatically allocated ``gpu_count``.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+
+VALID_PIXI_ENVS = ("boltz", "protenix", "rf3")
+
+
+@dataclass(frozen=True)
+class Job:
+    """One parallel `run_grid_search.py` invocation within a preset.
+
+    Parameters
+    ----------
+    name : str
+        Identifier used for per-job log files and ``--jobs`` selection. Must be
+        unique within the parent :class:`Preset`.
+    env : str
+        Pixi environment to run the job in. Must be one of
+        :data:`VALID_PIXI_ENVS`.
+    gpus : str
+        Explicit value to set as ``CUDA_VISIBLE_DEVICES`` for the subprocess
+        (e.g. ``"4"`` or ``"0,1"``). Mutually exclusive with ``gpu_count``.
+    gpu_count : int or None, optional
+        Number of visible GPUs to auto-assign for this job. The runner assigns
+        concrete GPU IDs in declaration order.
+    output_subdir : str
+        Path appended to the run's ``results_dir`` to form the job's
+        ``--output-dir`` argument, when one is not given explicitly in ``args``.
+    args : dict of str to Any, optional
+        Per-job overrides merged on top of the preset's
+        :attr:`Preset.shared_args`. Keys are CLI flag names (without the
+        leading ``--``); bools become bare flags (``True``) or omitted
+        (``False``).
+
+    Raises
+    ------
+    ValueError
+        If ``env`` is not in :data:`VALID_PIXI_ENVS`, if neither/both ``gpus``
+        and ``gpu_count`` are set, or if ``output_subdir`` is empty.
+    """
+
+    name: str
+    env: str
+    output_subdir: str
+    gpus: str = ""
+    gpu_count: int | None = None
+    args: dict[str, Any] = field(default_factory=dict)
+
+    def __post_init__(self) -> None:
+        """Validate ``env`` and required string fields."""
+        if self.env not in VALID_PIXI_ENVS:
+            raise ValueError(
+                f"Job {self.name!r}: env must be one of {VALID_PIXI_ENVS}, got {self.env!r}"
+            )
+        if bool(self.gpus) == (self.gpu_count is not None):
+            raise ValueError(f"Job {self.name!r}: set exactly one of gpus or gpu_count")
+        if self.gpu_count is not None and self.gpu_count <= 0:
+            raise ValueError(f"Job {self.name!r}: gpu_count must be positive")
+        if not self.output_subdir:
+            raise ValueError(f"Job {self.name!r}: output_subdir must be non-empty")
+
+
+@dataclass(frozen=True)
+class Preset:
+    """A named bundle of parallel jobs orchestrated as a unit.
+
+    Parameters
+    ----------
+    name : str
+        Identifier (matches the experiment TOML filename without the ``.toml``
+        suffix, or the stem of a user-supplied path).
+    description : str
+        Human-readable summary shown by ``--list`` and the launch banner.
+    defaults : dict of str to str, optional
+        Default values for ``${VAR}`` interpolation. The process environment
+        takes precedence; this block only fills in unset keys.
+    shared_args : dict of str to Any, optional
+        Args merged into every job's ``args`` before argv is built. Per-job
+        ``args`` win on collision.
+    jobs : list of Job
+        Jobs to launch in parallel. Must be non-empty and have unique names.
+
+    Raises
+    ------
+    ValueError
+        If ``jobs`` is empty or contains duplicate names.
+    """
+
+    name: str
+    description: str
+    defaults: dict[str, str] = field(default_factory=dict)
+    shared_args: dict[str, Any] = field(default_factory=dict)
+    jobs: list[Job] = field(default_factory=list)
+
+    def __post_init__(self) -> None:
+        """Validate the job list is non-empty and names are unique."""
+        if not self.jobs:
+            raise ValueError(f"Preset {self.name!r}: must declare at least one job")
+        seen: set[str] = set()
+        for job in self.jobs:
+            if job.name in seen:
+                raise ValueError(f"Preset {self.name!r}: duplicate job name {job.name!r}")
+            seen.add(job.name)
+
+    def job(self, name: str) -> Job:
+        """Return the :class:`Job` with the given name.
+
+        Parameters
+        ----------
+        name : str
+            Job name to look up.
+
+        Returns
+        -------
+        Job
+            The matching job.
+
+        Raises
+        ------
+        KeyError
+            If no job has the given name.
+        """
+        for j in self.jobs:
+            if j.name == name:
+                return j
+        raise KeyError(f"Preset {self.name!r} has no job {name!r}")
+
+    def effective_args(self, job: Job) -> dict[str, Any]:
+        """Merge :attr:`shared_args` with a job's per-job overrides.
+
+        Parameters
+        ----------
+        job : Job
+            Job whose ``args`` override the shared defaults.
+
+        Returns
+        -------
+        dict of str to Any
+            New dict; mutating it does not affect the preset.
+        """
+        return {**self.shared_args, **job.args}
diff --git a/src/sampleworks/utils/guidance_script_arguments.py b/src/sampleworks/utils/guidance_script_arguments.py
index 9c2601fa..29827add 100644
--- a/src/sampleworks/utils/guidance_script_arguments.py
+++ b/src/sampleworks/utils/guidance_script_arguments.py
@@ -9,16 +9,34 @@
 from sampleworks.utils.guidance_constants import GuidanceType, StructurePredictor
 
 
-# Baked-in checkpoint paths (Docker image) with legacy fallbacks
+# Baked-in checkpoint paths (Docker image), ACTL shared-storage paths, and
+# legacy fallbacks. Environment variables win when present.
+_CHECKPOINT_ENV_VARS = {
+    "boltz1": "BOLTZ1_CHECKPOINT",
+    "boltz2": "BOLTZ2_CHECKPOINT",
+    "rf3": "RF3_CHECKPOINT",
+    "protenix": "PROTENIX_CHECKPOINT",
+}
+
 _CHECKPOINT_CANDIDATES = {
-    "boltz1": ["/checkpoints/boltz1_conf.ckpt", "~/.boltz/boltz1_conf.ckpt"],
-    "boltz2": ["/checkpoints/boltz2_conf.ckpt", "~/.boltz/boltz2_conf.ckpt"],
+    "boltz1": [
+        "/checkpoints/boltz1_conf.ckpt",
+        "/mnt/diffuse-shared/raw/checkpoints/boltz1_conf.ckpt",
+        "~/.boltz/boltz1_conf.ckpt",
+    ],
+    "boltz2": [
+        "/checkpoints/boltz2_conf.ckpt",
+        "/mnt/diffuse-shared/raw/checkpoints/boltz2_conf.ckpt",
+        "~/.boltz/boltz2_conf.ckpt",
+    ],
     "rf3": [
         "/checkpoints/rf3_foundry_01_24_latest.ckpt",
+        "/mnt/diffuse-shared/raw/checkpoints/rf3_foundry_01_24_latest.ckpt",
         "~/.foundry/checkpoints/rf3_foundry_01_24_latest.ckpt",
     ],
     "protenix": [
         "/checkpoints/protenix_base_default_v0.5.0.pt",
+        "/mnt/diffuse-shared/raw/checkpoints/protenix_base_default_v0.5.0.pt",
         ".pixi/envs/protenix-dev/lib/python3.12/site-packages/release_data/checkpoint/protenix_base_default_v0.5.0.pt",
     ],
 }
@@ -27,11 +45,16 @@
 def _resolve_checkpoint(model_key: str) -> str:
     """Return the first checkpoint path that exists on disk for *model_key*.
 
-    Tries baked-in Docker paths first (``/checkpoints/``), then falls back to
-    legacy development paths.  If none are found the first candidate is returned
-    so that downstream validation produces a clear error message.
+    Model-specific environment variables from :data:`_CHECKPOINT_ENV_VARS` win
+    when set. Otherwise, candidates from :data:`_CHECKPOINT_CANDIDATES` are
+    tried in order, starting with baked-in ``/checkpoints/`` paths and then
+    ACTL shared-storage and legacy development locations.
     """
-    candidates = _CHECKPOINT_CANDIDATES.get(model_key, [])
+    env_var = _CHECKPOINT_ENV_VARS.get(model_key)
+    candidates = []
+    if env_var and os.environ.get(env_var):
+        candidates.append(os.environ[env_var])
+    candidates.extend(_CHECKPOINT_CANDIDATES.get(model_key, []))
     for candidate in candidates:
         resolved = Path(candidate).expanduser()
         if resolved.exists():
@@ -45,9 +68,10 @@ def _resolve_checkpoint(model_key: str) -> str:
             f"Provide --model-checkpoint or bake checkpoints into /checkpoints/."
         )
     if not Path(resolved).exists():
+        env_hint = _CHECKPOINT_ENV_VARS.get(model_key, "a checkpoint env var")
         raise ValueError(
-            f"Model checkpoint '{resolved}' does not exist. "
-            f"Provide a valid path via --model-checkpoint."
+            f"Model checkpoint for '{model_key}' was not found. Checked: {candidates}. "
+            f"Provide --model-checkpoint or set {env_hint}."
         )
 
     return resolved
@@ -341,6 +365,7 @@ def __post_init__(self):
             raise ValueError(f"Unknown model type: {self.model}")
 
     def populate_config_for_guidance_type(self, job: JobConfig, args: argparse.Namespace):
+        """Apply per-job grid-search values onto this guidance configuration."""
         checkpoint = get_checkpoint(args)
         if checkpoint is not None:
             self.model_checkpoint = checkpoint
@@ -384,6 +409,7 @@ def as_dict(self) -> dict[str, Any]:
 
 
 def add_generic_args(parser: argparse.ArgumentParser | GuidanceConfig):
+    """Add CLI arguments shared by all models and guidance methods."""
     parser.add_argument("--structure", type=str, required=True, help="Input structure")
     parser.add_argument("--density", type=str, required=True, help="Input density map")
     parser.add_argument("--output-dir", type=str, default="output", help="Output directory")
@@ -450,6 +476,7 @@ def add_generic_args(parser: argparse.ArgumentParser | GuidanceConfig):
 # Guidance type specific arguments
 ######################
 def add_pure_guidance_args(parser: argparse.ArgumentParser | GuidanceConfig):
+    """Add CLI arguments specific to pure guidance sampling."""
     parser.add_argument("--step-size", type=float, default=0.1, help="Gradient step")
     parser.add_argument(
         "--step-scaler-type",
@@ -462,6 +489,7 @@ def add_pure_guidance_args(parser: argparse.ArgumentParser | GuidanceConfig):
 
 
 def add_fk_steering_args(parser: argparse.ArgumentParser | GuidanceConfig):
+    """Add CLI arguments specific to Feynman-Kac steering."""
     parser.add_argument(
         "--num-particles",
         type=int,
@@ -504,6 +532,7 @@ def add_fk_steering_args(parser: argparse.ArgumentParser | GuidanceConfig):
 # Model specific arguments
 ###########
 def add_boltz2_specific_args(parser: argparse.ArgumentParser | GuidanceConfig):
+    """Add CLI arguments specific to Boltz2 guidance runs."""
     parser.add_argument(
         "--model-checkpoint",
         type=str,
@@ -519,6 +548,7 @@ def add_boltz2_specific_args(parser: argparse.ArgumentParser | GuidanceConfig):
 
 
 def add_protenix_specific_args(parser: argparse.ArgumentParser | GuidanceConfig):
+    """Add CLI arguments specific to Protenix guidance runs."""
     parser.add_argument(
         "--model-checkpoint",
         type=str,
@@ -528,6 +558,7 @@ def add_protenix_specific_args(parser: argparse.ArgumentParser | GuidanceConfig)
 
 
 def add_boltz1_specific_args(parser: argparse.ArgumentParser | GuidanceConfig):
+    """Add CLI arguments specific to Boltz1 guidance runs."""
     parser.add_argument(
         "--model-checkpoint",
         type=str,
@@ -537,6 +568,7 @@ def add_boltz1_specific_args(parser: argparse.ArgumentParser | GuidanceConfig):
 
 
 def add_rf3_specific_args(parser: argparse.ArgumentParser | GuidanceConfig):
+    """Add CLI arguments specific to RF3 guidance runs."""
     parser.add_argument(
         "--model-checkpoint",
         type=str,
@@ -576,6 +608,8 @@ def add_rf3_specific_args(parser: argparse.ArgumentParser | GuidanceConfig):
 
 @dataclass
 class JobConfig:
+    """Resolved inputs and grid-search settings for one guidance job."""
+
     protein: str
     structure_path: Path | str
     density_path: Path | str
@@ -592,6 +626,8 @@ class JobConfig:
 
 @dataclass
 class JobResult:
+    """Serializable status record produced after a guidance job finishes."""
+
     protein: str
     model: str
     method: str | None
diff --git a/src/sampleworks/utils/guidance_script_utils.py b/src/sampleworks/utils/guidance_script_utils.py
index a72492ee..3832dfb2 100644
--- a/src/sampleworks/utils/guidance_script_utils.py
+++ b/src/sampleworks/utils/guidance_script_utils.py
@@ -71,6 +71,7 @@ def save_trajectory(
     subdir_name,
     save_every=10,
 ):
+    """Dispatch trajectory serialization to the handler for the selected scaler."""
     if scaler_type == GuidanceType.PURE_GUIDANCE:
         _save_trajectory(trajectory, atom_array, output_dir, subdir_name, save_every)
     elif scaler_type == GuidanceType.FK_STEERING:
@@ -100,6 +101,7 @@ def _write_coords_into_array(
 
 
 def _save_trajectory(trajectory, atom_array, output_dir, subdir_name, save_every):
+    """Save a pure-guidance coordinate trajectory as sampled multi-model CIFs."""
     output_dir = Path(output_dir / "trajectory" / subdir_name)
     output_dir.mkdir(parents=True, exist_ok=True)
 
@@ -122,6 +124,7 @@ def _save_trajectory(trajectory, atom_array, output_dir, subdir_name, save_every
 
 
 def _save_fk_steering_trajectory(trajectory, atom_array, output_dir, subdir_name, save_every):
+    """Save the first-particle FK-steering trajectory as sampled multi-model CIFs."""
     output_dir = Path(output_dir / "trajectory" / subdir_name)
     output_dir.mkdir(parents=True, exist_ok=True)
 
@@ -146,6 +149,7 @@ def _save_fk_steering_trajectory(trajectory, atom_array, output_dir, subdir_name
 
 
 def save_losses(losses, output_dir):
+    """Write per-step guidance losses to ``losses.txt`` in ``output_dir``."""
     output_dir = Path(output_dir)
     output_dir.mkdir(parents=True, exist_ok=True)
 
@@ -165,6 +169,7 @@ def get_model_and_device(
     method: str | None = None,
     model: Any = None,
 ) -> tuple[torch.device, Any]:
+    """Validate a checkpoint, choose a device, and construct the model wrapper."""
     validated_checkpoint_path = validate_model_checkpoint(model_type, model_checkpoint_path)
 
     device = torch.device(device_str) if device_str else try_gpu()
@@ -225,6 +230,7 @@ def get_reward_function_and_structure(
     resolution,
     structure_path: str | Path,
 ) -> tuple[RealSpaceRewardFunction, dict[str, Any]]:
+    """Load structure and density inputs and build the real-space reward function."""
     logger.debug(f"Loading structure from {structure_path}")
     safe_structure_path = resolve_mixed_hetatm_atom_altlocs(Path(structure_path))
     structure = parse(
@@ -418,6 +424,7 @@ def run_guidance(args: GuidanceConfig, guidance_type: str, model_wrapper, device
 
 # "guidance_type" is also called "scaler" in many places
 def _run_guidance(args: GuidanceConfig, guidance_type: str, model_wrapper, device):
+    """Run one configured guidance trajectory and save its outputs."""
     reward_function, structure = get_reward_function_and_structure(
         args.density,  # str/path to a map file.
         device,  # this needs to come from the global context, not the args object.
@@ -459,8 +466,14 @@ def _run_guidance(args: GuidanceConfig, guidance_type: str, model_wrapper, devic
     elif "Boltz" in wrapper_class_name:
         from sampleworks.models.boltz.wrapper import process_structure_for_boltz
 
+        # Boltz preprocessing writes manifest/NPZ/MSA files as a side effect.
+        # Keep those under the per-job output directory so concurrent grid jobs
+        # for the same protein do not race on a shared metadata-derived path.
         structure = process_structure_for_boltz(
-            structure, ensemble_size=args.ensemble_size, recycling_steps=recycling_steps
+            structure,
+            out_dir=args.output_dir,
+            ensemble_size=args.ensemble_size,
+            recycling_steps=recycling_steps,
         )
     else:
         raise ValueError(f"Unknown model wrapper class: {wrapper_class_name}")
@@ -588,6 +601,7 @@ def _run_guidance(args: GuidanceConfig, guidance_type: str, model_wrapper, devic
 
 
 def epoch_seconds(time_to_convert: datetime) -> float:
+    """Convert a :class:`datetime.datetime` to seconds since the Unix epoch."""
     return (time_to_convert - datetime(1970, 1, 1)).total_seconds()
 
 
@@ -599,6 +613,7 @@ def get_job_result(
     exit_code: int,
     status: str,
 ) -> JobResult:
+    """Build the serializable result record for a completed guidance job."""
     start_time = epoch_seconds(started_at)
     end_time = epoch_seconds(ended_at)
     result = JobResult(
@@ -621,6 +636,7 @@ def get_job_result(
 
 
 def run_guidance_job_queue(job_queue_path: str) -> list[JobResult]:
+    """Load a pickled job queue, reuse one model wrapper, and run all jobs."""
     with open(job_queue_path, "rb") as fp:
         job_queue: list[GuidanceConfig] = pickle.load(fp)
 
diff --git a/tests/models/boltz/test_boltz_wrapper.py b/tests/models/boltz/test_boltz_wrapper.py
index 47630314..94c7d085 100644
--- a/tests/models/boltz/test_boltz_wrapper.py
+++ b/tests/models/boltz/test_boltz_wrapper.py
@@ -112,7 +112,7 @@ def test_annotate_preserves_original_structure(
     def test_annotate_default_values(self, structure_6b8x: dict, temp_output_dir: Path):
         result = process_structure_for_boltz(structure_6b8x, out_dir=temp_output_dir)
         config = result["_boltz_config"]
-        assert config.num_workers == 8
+        assert config.num_workers == 0
         assert config.ensemble_size == 1
         assert config.recycling_steps == 3
 
@@ -142,7 +142,7 @@ class TestBoltzConfig:
     def test_boltz_config_default_values(self):
         config = BoltzConfig()
         assert config.out_dir is None
-        assert config.num_workers == 8
+        assert config.num_workers == 0
         assert config.ensemble_size == 1
         assert config.recycling_steps == 3
 
diff --git a/tests/runs/__init__.py b/tests/runs/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/runs/conftest.py b/tests/runs/conftest.py
new file mode 100644
index 00000000..70cc7919
--- /dev/null
+++ b/tests/runs/conftest.py
@@ -0,0 +1,18 @@
+"""Shared test fixtures for preset-runner tests."""
+
+from __future__ import annotations
+
+import os
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def force_pixi_argv(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Keep argv assertions deterministic on machines with /app/.pixi present."""
+    monkeypatch.delenv("SAMPLEWORKS_GRID_SEARCH_SCRIPT", raising=False)
+    monkeypatch.delenv("SAMPLEWORKS_PIXI_PROJECT_DIR", raising=False)
+    for var in list(os.environ):
+        if var.startswith("SAMPLEWORKS_") and var.endswith("_PYTHON"):
+            monkeypatch.delenv(var, raising=False)
+    monkeypatch.setenv("SAMPLEWORKS_FORCE_PIXI", "1")
diff --git a/tests/runs/test_cli.py b/tests/runs/test_cli.py
new file mode 100644
index 00000000..7af38081
--- /dev/null
+++ b/tests/runs/test_cli.py
@@ -0,0 +1,152 @@
+"""End-to-end CLI tests (--list, --show, --dry-run, job shortcuts)."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+from sampleworks.runs import cli, runner
+
+
+def test_list_prints_all_experiment_presets(capsys: pytest.CaptureFixture[str]) -> None:
+    """``--list`` prints every bundled experiment preset exactly once."""
+    exit_code = cli.main(["--list"])
+    assert exit_code == 0
+    out = capsys.readouterr().out.splitlines()
+    assert set(out) == {
+        "boltz",
+        "boltz1",
+        "boltz2",
+        "boltz2_md",
+        "boltz2_xrd",
+        "full_8gpu",
+        "protenix",
+        "protenix_dual",
+        "rf3",
+        "rf3_partial",
+        "rf3_partial_chiral_off",
+        "rf3_protenix",
+    }
+
+
+def test_show_prints_resolved_preset(
+    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    """``--show`` renders the resolved preset without launching jobs."""
+    monkeypatch.setenv("HOME", "/home/test")
+    exit_code = cli.main(["--preset", "rf3_partial", "--show"])
+    assert exit_code == 0
+    out = capsys.readouterr().out
+    assert "name: rf3_partial" in out
+    assert "gradient-weights" in out
+
+
+def test_dry_run_does_not_invoke_subprocess(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+) -> None:
+    """``--dry-run`` prints commands and CUDA assignment instead of executing."""
+    monkeypatch.setenv("HOME", str(tmp_path))
+    monkeypatch.setattr(runner, "_detect_available_gpus", lambda: [str(i) for i in range(8)])
+    exit_code = cli.main(
+        [
+            "--preset",
+            "rf3_partial",
+            "--dry-run",
+            "--results-dir",
+            str(tmp_path),
+        ]
+    )
+    assert exit_code == 0
+    out = capsys.readouterr().out
+    assert "pixi run -e rf3 python /app/run_grid_search.py" in out
+    assert "CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7" in out
+
+
+def test_job_shortcut_filters_default_preset(
+    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    """A positional job shortcut filters the default full_8gpu preset."""
+    monkeypatch.setenv("HOME", "/home/test")
+    exit_code = cli.main(["rf3,protenix", "--show"])
+    assert exit_code == 0
+    out = capsys.readouterr().out
+    assert "name: full_8gpu:rf3,protenix" in out
+    assert "name: rf3" in out
+    assert "name: protenix" in out
+    assert "boltz2_xrd" not in out
+    assert "boltz2_md" not in out
+
+
+def test_model_target_uses_named_preset(
+    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    """A single model target resolves to the matching standalone preset."""
+    monkeypatch.setenv("HOME", "/home/test")
+    exit_code = cli.main(["boltz", "--show"])
+    assert exit_code == 0
+    out = capsys.readouterr().out
+    assert "name: boltz" in out
+    assert "name: boltz2_xrd" in out
+    assert "name: boltz2_md" in out
+
+
+def test_boltz1_target_uses_named_preset(
+    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    """The Boltz1 model has its own standalone preset target."""
+    monkeypatch.setenv("HOME", "/home/test")
+    exit_code = cli.main(["boltz1", "--show"])
+    assert exit_code == 0
+    out = capsys.readouterr().out
+    assert "name: boltz1" in out
+    assert "output_subdir: boltz1" in out
+
+
+def test_jobs_filters_explicit_preset(
+    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    """``--jobs`` filters an explicitly selected preset by job name."""
+    monkeypatch.setenv("HOME", "/home/test")
+    exit_code = cli.main(["--preset", "full_8gpu", "--jobs", "rf3", "--show"])
+    assert exit_code == 0
+    out = capsys.readouterr().out
+    assert "name: full_8gpu:rf3" in out
+    assert "name: rf3" in out
+    assert "protenix" not in out
+
+
+def test_job_shortcut_with_unknown_job_errors(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Unknown positional job shortcuts fail with a clear selector error."""
+    monkeypatch.setenv("HOME", "/home/test")
+    with pytest.raises(SystemExit, match="unknown jobs"):
+        cli.main(["nonexistent", "--show"])
+
+
+def test_set_override_propagates_through_cli(
+    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    """``--set`` overrides are applied before the preset is displayed."""
+    monkeypatch.setenv("HOME", "/home/test")
+    exit_code = cli.main(
+        [
+            "--preset",
+            "rf3_partial",
+            "--set",
+            "jobs.rf3.args.gradient-weights=0.0 0.01",
+            "--show",
+        ]
+    )
+    assert exit_code == 0
+    out = capsys.readouterr().out
+    assert "0.0 0.01" in out
+
+
+def test_no_target_defaults_to_full_8gpu(
+    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    """Running without a target resolves to the flagship full_8gpu preset."""
+    monkeypatch.setenv("HOME", "/home/test")
+    exit_code = cli.main(["--show"])
+    assert exit_code == 0
+    out = capsys.readouterr().out
+    assert "name: full_8gpu" in out
diff --git a/tests/runs/test_loader.py b/tests/runs/test_loader.py
new file mode 100644
index 00000000..0d3dd672
--- /dev/null
+++ b/tests/runs/test_loader.py
@@ -0,0 +1,249 @@
+"""Unit tests for sampleworks.runs.loader."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+from sampleworks.runs import loader
+
+
+BUNDLED = [
+    "boltz",
+    "boltz1",
+    "boltz2",
+    "boltz2_md",
+    "boltz2_xrd",
+    "full_8gpu",
+    "protenix",
+    "protenix_dual",
+    "rf3",
+    "rf3_partial",
+    "rf3_partial_chiral_off",
+    "rf3_protenix",
+]
+
+
+def test_list_presets_returns_bundled_experiments() -> None:
+    """Preset discovery returns the expected bundled experiment names."""
+    names = loader.list_presets()
+    assert set(names) == set(BUNDLED), f"unexpected experiment presets: {names}"
+
+
+@pytest.mark.parametrize("name", BUNDLED)
+def test_each_experiment_preset_loads(name: str, monkeypatch: pytest.MonkeyPatch) -> None:
+    """Every bundled preset loads into jobs with supported pixi environments."""
+    monkeypatch.setenv("HOME", "/home/test")
+    preset = loader.load_preset(name)
+    assert preset.name == name
+    assert preset.jobs, f"{name} has no jobs"
+    for job in preset.jobs:
+        assert job.env in ("boltz", "protenix", "rf3")
+
+
+def test_env_var_wins_over_defaults_block(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Environment variables override preset defaults during interpolation."""
+    monkeypatch.setenv("HOME", "/home/test")
+    monkeypatch.setenv("DATA_DIR", "/from/env")
+    preset = loader.load_preset("rf3_partial")
+    assert preset.defaults["DATA_DIR"] == "/from/env"
+    # PROTEINS_CSV expands to ${DATA_DIR}/proteins.csv; DATA_DIR overridden by env
+    proteins = preset.shared_args["proteins"]
+    assert proteins == "/from/env/proteins.csv"
+
+
+def test_defaults_used_when_env_unset(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Preset defaults fill in interpolation variables absent from the environment."""
+    monkeypatch.delenv("DATA_DIR", raising=False)
+    monkeypatch.setenv("HOME", "/home/test")
+    preset = loader.load_preset("rf3_partial")
+    assert preset.defaults["DATA_DIR"] == "/data/inputs"
+
+
+def test_full_8gpu_uses_canonical_inputs_dir(monkeypatch: pytest.MonkeyPatch) -> None:
+    """The flagship preset must use /data/inputs, matching the ACTL wrapper."""
+    monkeypatch.delenv("DATA_DIR", raising=False)
+    monkeypatch.setenv("HOME", "/home/test")
+    preset = loader.load_preset("full_8gpu")
+    assert preset.defaults["DATA_DIR"] == "/data/inputs"
+    assert preset.shared_args["proteins"] == "/data/inputs/proteins.csv"
+
+
+def test_set_override_at_defaults(monkeypatch: pytest.MonkeyPatch) -> None:
+    """``--set defaults.*`` overrides participate in later interpolation."""
+    monkeypatch.delenv("DATA_DIR", raising=False)
+    monkeypatch.setenv("HOME", "/home/test")
+    preset = loader.load_preset("rf3_partial", overrides=["defaults.DATA_DIR=/custom"])
+    assert preset.defaults["DATA_DIR"] == "/custom"
+    assert preset.shared_args["proteins"] == "/custom/proteins.csv"
+
+
+def test_set_override_at_job_by_name(monkeypatch: pytest.MonkeyPatch) -> None:
+    """``--set jobs.<name>.*`` updates the named job."""
+    monkeypatch.setenv("HOME", "/home/test")
+    preset = loader.load_preset("full_8gpu", overrides=["jobs.rf3.gpus=7"])
+    assert preset.job("rf3").gpus == "7"
+    assert preset.job("rf3").gpu_count is None
+
+
+def test_set_override_at_job_by_index(monkeypatch: pytest.MonkeyPatch) -> None:
+    """``--set jobs.<index>.*`` updates the indexed job."""
+    monkeypatch.setenv("HOME", "/home/test")
+    preset = loader.load_preset("full_8gpu", overrides=["jobs.0.gpus=9"])
+    assert preset.jobs[0].gpus == "9"
+    assert preset.jobs[0].gpu_count is None
+
+
+def test_set_override_gpu_count_clears_gpus(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """``--set jobs.<name>.gpu_count`` replaces an explicit GPU assignment."""
+    monkeypatch.setenv("HOME", "/home/test")
+    custom = tmp_path / "gpu_count.toml"
+    custom.write_text(
+        'description = "custom"\n'
+        "[[jobs]]\n"
+        'name = "j1"\n'
+        'env = "rf3"\n'
+        'gpus = "0"\n'
+        'output_subdir = "j1"\n'
+        "args = {}\n"
+    )
+    preset = loader.load_preset(str(custom), overrides=["jobs.j1.gpu_count=2"])
+    assert preset.job("j1").gpu_count == 2
+    assert preset.job("j1").gpus == ""
+
+
+def test_set_override_at_args_inside_job(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Dotted overrides can create or replace per-job CLI args."""
+    monkeypatch.setenv("HOME", "/home/test")
+    preset = loader.load_preset(
+        "rf3_partial", overrides=["jobs.rf3.args.gradient-weights=0.0 0.01"]
+    )
+    assert preset.job("rf3").args["gradient-weights"] == "0.0 0.01"
+
+
+def test_set_coerces_bool_and_int(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Override values are coerced to bools and ints when unambiguous."""
+    monkeypatch.setenv("HOME", "/home/test")
+    preset = loader.load_preset(
+        "rf3_partial",
+        overrides=[
+            "shared_args.gradient-normalization=false",
+            "jobs.rf3.args.partial-diffusion-step=200",
+        ],
+    )
+    assert preset.shared_args["gradient-normalization"] is False
+    # job.args["partial-diffusion-step"] doesn't exist by default in rf3_partial,
+    # but --set should still create or override it
+    assert preset.job("rf3").args["partial-diffusion-step"] == 200
+
+
+def test_load_preset_from_path(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    """A filesystem TOML path loads as a custom preset."""
+    monkeypatch.setenv("HOME", "/home/test")
+    custom = tmp_path / "mycustom.toml"
+    custom.write_text(
+        'description = "custom"\n'
+        "[defaults]\n"
+        'DATA_DIR = "/x"\n'
+        "[shared_args]\n"
+        'model = "rf3"\n'
+        "[[jobs]]\n"
+        'name = "j1"\n'
+        'env = "rf3"\n'
+        'gpus = "0"\n'
+        'output_subdir = "j1"\n'
+        "args = {}\n"
+    )
+    preset = loader.load_preset(str(custom))
+    assert preset.name == "mycustom"
+    assert preset.defaults["DATA_DIR"] == "/x"
+
+
+def test_load_preset_from_experiments_dir_override(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """Scientists can point the loader at a top-level experiments directory."""
+    experiments_dir = tmp_path / "experiments"
+    experiments_dir.mkdir()
+    (experiments_dir / "custom.toml").write_text(
+        'description = "custom"\n'
+        "[shared_args]\n"
+        'model = "rf3"\n'
+        "[[jobs]]\n"
+        'name = "j1"\n'
+        'env = "rf3"\n'
+        'gpus = "0"\n'
+        'output_subdir = "j1"\n'
+        "args = {}\n"
+    )
+    monkeypatch.setenv("SAMPLEWORKS_EXPERIMENTS_DIR", str(experiments_dir))
+
+    preset = loader.load_preset("custom")
+
+    assert preset.name == "custom"
+    assert preset.job("j1").env == "rf3"
+
+
+def test_unknown_preset_raises() -> None:
+    """Missing preset names raise ``FileNotFoundError``."""
+    with pytest.raises(FileNotFoundError):
+        loader.load_preset("does_not_exist")
+
+
+def test_undefined_variable_raises(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    """Unresolved ``${VAR}`` references fail instead of expanding to empty strings."""
+    bad = tmp_path / "bad.toml"
+    bad.write_text(
+        '[shared_args]\nproteins = "${NEVER_DEFINED_VAR}/x"\n'
+        '[[jobs]]\nname = "j"\nenv = "rf3"\ngpus = "0"\noutput_subdir = "j"\nargs = {}\n'
+    )
+    monkeypatch.delenv("NEVER_DEFINED_VAR", raising=False)
+    with pytest.raises(KeyError, match="NEVER_DEFINED_VAR"):
+        loader.load_preset(str(bad))
+
+
+def test_set_without_equals_raises(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Malformed override specs must contain a ``KEY=VALUE`` separator."""
+    monkeypatch.setenv("HOME", "/home/test")
+    with pytest.raises(ValueError, match="KEY=VALUE"):
+        loader.load_preset("rf3_partial", overrides=["bogus_no_equals"])
+
+
+def test_set_with_unknown_top_level_key_raises(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Typos like ``--set job.rf3.gpus=0`` (missing 's' in jobs) must not silently no-op."""
+    monkeypatch.setenv("HOME", "/home/test")
+    with pytest.raises(KeyError, match="unknown top-level key"):
+        loader.load_preset("rf3_partial", overrides=["job.rf3.gpus=0"])
+
+
+def test_set_with_out_of_range_job_index_raises(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Out-of-range list indices in overrides fail with a clear ``KeyError``."""
+    monkeypatch.setenv("HOME", "/home/test")
+    with pytest.raises(KeyError, match="index 99"):
+        loader.load_preset("rf3_partial", overrides=["jobs.99.gpus=0"])
+
+
+def test_cyclic_variable_expansion_raises(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    """Cyclic ``${VAR}`` references fail fast instead of looping forever."""
+    bad = tmp_path / "cycle.toml"
+    bad.write_text(
+        "[shared_args]\n"
+        'proteins = "${A}"\n'
+        '[[jobs]]\nname = "j"\nenv = "rf3"\ngpus = "0"\noutput_subdir = "j"\nargs = {}\n'
+    )
+    monkeypatch.setenv("A", "${B}")
+    monkeypatch.setenv("B", "${A}")
+    with pytest.raises(ValueError, match="did not converge"):
+        loader.load_preset(str(bad))
+
+
+def test_bad_env_rejected(tmp_path: Path) -> None:
+    """Preset jobs reject unsupported pixi environment names."""
+    bad = tmp_path / "bad.toml"
+    bad.write_text(
+        '[[jobs]]\nname = "j"\nenv = "not_a_real_env"\ngpus = "0"\noutput_subdir = "j"\nargs = {}\n'
+    )
+    with pytest.raises(ValueError, match="env must be one of"):
+        loader.load_preset(str(bad))
diff --git a/tests/runs/test_runner.py b/tests/runs/test_runner.py
new file mode 100644
index 00000000..04a4b1b0
--- /dev/null
+++ b/tests/runs/test_runner.py
@@ -0,0 +1,283 @@
+"""Unit tests for sampleworks.runs.runner argv builder."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+from sampleworks.runs import loader, runner
+
+
+def test_argv_for_rf3_partial_matches_bash(monkeypatch: pytest.MonkeyPatch) -> None:
+    """RF3 partial builds the canonical argv and auto-assigns all GPUs."""
+    monkeypatch.setenv("HOME", "/home/test")
+    monkeypatch.delenv("DATA_DIR", raising=False)
+    monkeypatch.delenv("RESULTS_DIR", raising=False)
+    monkeypatch.setattr(runner, "_detect_available_gpus", lambda: [str(i) for i in range(8)])
+    preset = loader.load_preset("rf3_partial")
+    invocations = runner.build_invocations(preset, results_dir=Path("/results"))
+
+    assert len(invocations) == 1
+    inv = invocations[0]
+    assert inv.job.name == "rf3"
+    assert inv.env["CUDA_VISIBLE_DEVICES"] == "0,1,2,3,4,5,6,7"
+    assert inv.log_path == Path("/results/rf3_run.log")
+
+    argv = inv.argv
+    assert argv[:6] == ["pixi", "run", "-e", "rf3", "python", "/app/run_grid_search.py"]
+    pairs = _argv_to_dict(argv[6:])
+    assert pairs["--proteins"] == "/data/inputs/proteins.csv"
+    assert pairs["--model"] == "rf3"
+    assert pairs["--scalers"] == "pure_guidance"
+    assert pairs["--partial-diffusion-step"] == "120"
+    assert pairs["--ensemble-sizes"] == "8"
+    assert pairs["--gradient-weights"] == "0.0 0.005 0.01 0.02 0.035 0.05 0.1"
+    assert pairs["--model-checkpoint"] == "/checkpoints/rf3_foundry_01_24_latest.ckpt"
+    assert pairs["--output-dir"] == "/results/rf3"
+    # store_true flags appear as bare keys (value=True in our dict)
+    assert pairs["--gradient-normalization"] is True
+    assert pairs["--augmentation"] is True
+    assert pairs["--align-to-input"] is True
+
+
+def test_argv_omits_false_bool_flags(monkeypatch: pytest.MonkeyPatch) -> None:
+    """False boolean args are omitted rather than emitted as bare CLI flags."""
+    monkeypatch.setenv("HOME", "/home/test")
+    preset = loader.load_preset(
+        "rf3_partial", overrides=["shared_args.gradient-normalization=false"]
+    )
+    inv = runner.build_invocations(preset, results_dir=Path("/results"))[0]
+    assert "--gradient-normalization" not in inv.argv
+
+
+def test_explicit_output_dir_in_args_wins_over_subdir_default(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """An explicit per-job output-dir beats the output_subdir-derived default."""
+    monkeypatch.setenv("HOME", "/home/test")
+    custom = tmp_path / "custom.toml"
+    custom.write_text(
+        "[shared_args]\n"
+        '[[jobs]]\nname = "j"\nenv = "rf3"\ngpus = "0"\noutput_subdir = "sub"\n'
+        'args = { "output-dir" = "/explicit/path" }\n'
+    )
+    preset = loader.load_preset(str(custom))
+    inv = runner.build_invocations(preset, results_dir=Path("/results"))[0]
+    pairs = _argv_to_dict(inv.argv[6:])
+    assert pairs["--output-dir"] == "/explicit/path"
+
+
+def test_full_8gpu_has_four_jobs_with_distinct_gpus(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """The full_8gpu preset maps its four jobs onto distinct GPU pairs."""
+    monkeypatch.setenv("HOME", "/home/test")
+    monkeypatch.setattr(runner, "_detect_available_gpus", lambda: [str(i) for i in range(8)])
+    preset = loader.load_preset("full_8gpu")
+    invocations = runner.build_invocations(preset, results_dir=Path("/r"))
+    assert [i.job.name for i in invocations] == ["boltz2_xrd", "boltz2_md", "rf3", "protenix"]
+    gpu_assignments = [i.env["CUDA_VISIBLE_DEVICES"] for i in invocations]
+    assert gpu_assignments == ["0,1", "2,3", "4,5", "6,7"]
+
+
+def test_single_job_presets_use_all_eight_gpus(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Standalone single-job presets default to all eight visible GPUs."""
+    monkeypatch.setenv("HOME", "/home/test")
+    monkeypatch.setattr(runner, "_detect_available_gpus", lambda: [str(i) for i in range(8)])
+    all_gpus = "0,1,2,3,4,5,6,7"
+    for name in ("boltz1", "boltz2_xrd", "boltz2_md", "rf3", "protenix"):
+        preset = loader.load_preset(name)
+        invocations = runner.build_invocations(preset, results_dir=Path("/r"))
+        assert len(invocations) == 1
+        assert invocations[0].env["CUDA_VISIBLE_DEVICES"] == all_gpus
+
+
+def test_gpu_count_uses_visible_gpus_in_order(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """Auto GPU allocation consumes visible GPU IDs in preset order."""
+    monkeypatch.setattr(runner, "_detect_available_gpus", lambda: ["4", "5", "6"])
+    custom = tmp_path / "custom.toml"
+    custom.write_text(
+        "[shared_args]\n"
+        '[[jobs]]\nname = "a"\nenv = "rf3"\ngpu_count = 2\noutput_subdir = "a"\n'
+        '[[jobs]]\nname = "b"\nenv = "rf3"\ngpu_count = 1\noutput_subdir = "b"\n'
+    )
+    preset = loader.load_preset(str(custom))
+    invocations = runner.build_invocations(preset, results_dir=tmp_path / "results")
+
+    assert [inv.gpus for inv in invocations] == ["4,5", "6"]
+    assert [inv.env["CUDA_VISIBLE_DEVICES"] for inv in invocations] == ["4,5", "6"]
+
+
+def test_gpu_count_respects_explicit_gpu_reservations(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """Auto GPU allocation skips GPUs already claimed explicitly."""
+    monkeypatch.setattr(runner, "_detect_available_gpus", lambda: ["0", "1", "2", "3"])
+    custom = tmp_path / "custom.toml"
+    custom.write_text(
+        "[shared_args]\n"
+        '[[jobs]]\nname = "manual"\nenv = "rf3"\ngpus = "2"\noutput_subdir = "manual"\n'
+        '[[jobs]]\nname = "auto"\nenv = "rf3"\ngpu_count = 2\noutput_subdir = "auto"\n'
+    )
+    preset = loader.load_preset(str(custom))
+    invocations = runner.build_invocations(preset, results_dir=tmp_path / "results")
+
+    assert [inv.gpus for inv in invocations] == ["2", "0,1"]
+
+
+def test_gpu_count_rejects_insufficient_visible_gpus(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """Auto GPU allocation fails clearly when visible GPUs are exhausted."""
+    monkeypatch.setattr(runner, "_detect_available_gpus", lambda: ["0"])
+    custom = tmp_path / "custom.toml"
+    custom.write_text(
+        '[shared_args]\n[[jobs]]\nname = "a"\nenv = "rf3"\ngpu_count = 2\noutput_subdir = "a"\n'
+    )
+    preset = loader.load_preset(str(custom))
+
+    with pytest.raises(RuntimeError, match="Not enough visible GPUs"):
+        runner.build_invocations(preset, results_dir=tmp_path / "results")
+
+
+def test_protenix_dual_uses_different_checkpoints(monkeypatch: pytest.MonkeyPatch) -> None:
+    """The Protenix dual preset uses separate tiny and mini checkpoints."""
+    monkeypatch.setenv("HOME", "/home/test")
+    preset = loader.load_preset("protenix_dual")
+    invocations = runner.build_invocations(preset, results_dir=Path("/r"))
+    pairs = [_argv_to_dict(i.argv[6:]) for i in invocations]
+    assert pairs[0]["--model-checkpoint"] == "/extra_checkpoints/protenix_tiny_default_v0.5.0.pt"
+    assert pairs[1]["--model-checkpoint"] == "/extra_checkpoints/protenix_mini_default_v0.5.0.pt"
+
+
+def test_rf3_partial_chiral_off_flag_present(monkeypatch: pytest.MonkeyPatch) -> None:
+    """The RF3 chiral-off preset passes the disable and force rerun flags."""
+    monkeypatch.setenv("HOME", "/home/test")
+    preset = loader.load_preset("rf3_partial_chiral_off")
+    inv = runner.build_invocations(preset, results_dir=Path("/r"))[0]
+    assert "--disable-chiral-features" in inv.argv
+    assert "--force-all" in inv.argv
+
+
+def test_build_invocations_records_output_dir(monkeypatch: pytest.MonkeyPatch) -> None:
+    """`run_grid_search.py` assumes its --output-dir exists; the runner must mkdir it."""
+    monkeypatch.setenv("HOME", "/home/test")
+    preset = loader.load_preset("rf3_partial")
+    inv = runner.build_invocations(preset, results_dir=Path("/r"))[0]
+    assert inv.output_dir == Path("/r/rf3")
+
+
+def test_grid_search_script_can_be_overridden(monkeypatch: pytest.MonkeyPatch) -> None:
+    """ACTL wrappers can run the synced checkout instead of the baked /app copy."""
+    monkeypatch.setenv("HOME", "/home/test")
+    monkeypatch.setenv("SAMPLEWORKS_GRID_SEARCH_SCRIPT", "/home/dev/workspace/run_grid_search.py")
+    preset = loader.load_preset("rf3_partial")
+    inv = runner.build_invocations(preset, results_dir=Path("/r"))[0]
+    assert inv.argv[:6] == [
+        "pixi",
+        "run",
+        "-e",
+        "rf3",
+        "python",
+        "/home/dev/workspace/run_grid_search.py",
+    ]
+
+
+def test_gpu_validation_rejects_unavailable_gpu_ids(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """A preset for 8 GPUs should fail clearly on a smaller pod."""
+    monkeypatch.setattr(runner, "_detect_available_gpus", lambda: ["0", "1", "2", "3"])
+    custom = tmp_path / "custom.toml"
+    custom.write_text(
+        "[shared_args]\n"
+        '[[jobs]]\nname = "ok"\nenv = "rf3"\ngpus = "0,1"\noutput_subdir = "ok"\n'
+        '[[jobs]]\nname = "bad"\nenv = "rf3"\ngpus = "4,5"\noutput_subdir = "bad"\n'
+    )
+    preset = loader.load_preset(str(custom))
+    invocations = runner.build_invocations(preset, results_dir=tmp_path / "results")
+
+    with pytest.raises(RuntimeError, match="not visible"):
+        runner._validate_gpu_assignments(invocations)
+
+
+def test_gpu_validation_rejects_duplicate_gpu_ids(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """Accidental GPU oversubscription is caught before jobs launch."""
+    monkeypatch.setattr(runner, "_detect_available_gpus", lambda: ["0", "1"])
+    monkeypatch.delenv("SAMPLEWORKS_ALLOW_GPU_OVERSUBSCRIPTION", raising=False)
+    custom = tmp_path / "custom.toml"
+    custom.write_text(
+        "[shared_args]\n"
+        '[[jobs]]\nname = "a"\nenv = "rf3"\ngpus = "0"\noutput_subdir = "a"\n'
+        '[[jobs]]\nname = "b"\nenv = "rf3"\ngpus = "0"\noutput_subdir = "b"\n'
+    )
+    preset = loader.load_preset(str(custom))
+    invocations = runner.build_invocations(preset, results_dir=tmp_path / "results")
+
+    with pytest.raises(RuntimeError, match="same GPU"):
+        runner._validate_gpu_assignments(invocations)
+
+
+def test_uses_baked_env_python_when_available(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """ACTL image runs bypass pixi cache refreshes by calling env Python directly."""
+    monkeypatch.delenv("SAMPLEWORKS_FORCE_PIXI", raising=False)
+    monkeypatch.setenv("HOME", "/home/test")
+    pixi_project = tmp_path / "app"
+    python_bin = pixi_project / ".pixi" / "envs" / "rf3" / "bin" / "python"
+    python_bin.parent.mkdir(parents=True)
+    python_bin.write_text("#!/bin/sh\n")
+    python_bin.chmod(0o755)
+    monkeypatch.setenv("SAMPLEWORKS_PIXI_PROJECT_DIR", str(pixi_project))
+
+    preset = loader.load_preset("rf3_partial")
+    inv = runner.build_invocations(preset, results_dir=Path("/r"))[0]
+    assert inv.argv[:2] == [str(python_bin), "/app/run_grid_search.py"]
+
+
+def test_prebuilt_env_required_rejects_runtime_pixi(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """ACTL runs fail clearly instead of installing missing pixi envs at runtime."""
+    monkeypatch.delenv("SAMPLEWORKS_FORCE_PIXI", raising=False)
+    monkeypatch.setenv("SAMPLEWORKS_REQUIRE_PREBUILT_PIXI", "1")
+    monkeypatch.delenv("SAMPLEWORKS_ALLOW_RUNTIME_PIXI", raising=False)
+    monkeypatch.setenv("SAMPLEWORKS_PIXI_PROJECT_DIR", str(tmp_path / "app"))
+
+    preset = loader.load_preset("rf3_partial")
+    with pytest.raises(RuntimeError, match="Refusing to fall back to 'pixi run'"):
+        runner.build_invocations(preset, results_dir=Path("/r"))
+
+
+def test_dry_run_does_not_create_directories(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """--dry-run prints commands but never touches the filesystem."""
+    monkeypatch.setenv("HOME", str(tmp_path))
+    results_dir = tmp_path / "results"
+    preset = loader.load_preset("rf3_partial")
+    runner.run(preset, results_dir=results_dir, dry_run=True)
+    # results_dir gets created by run() (for log file location) but per-job
+    # output subdirs must NOT exist after dry-run.
+    assert not (results_dir / "rf3").exists()
+
+
+def _argv_to_dict(tail: list[str]) -> dict[str, object]:
+    """Turn ``[--a, 1, --b, --c, 2]`` into ``{'--a': '1', '--b': True, '--c': '2'}``."""
+    out: dict[str, object] = {}
+    i = 0
+    while i < len(tail):
+        flag = tail[i]
+        assert flag.startswith("--"), f"unexpected positional: {flag}"
+        if i + 1 < len(tail) and not tail[i + 1].startswith("--"):
+            out[flag] = tail[i + 1]
+            i += 2
+        else:
+            out[flag] = True
+            i += 1
+    return out