From ef691f90f0f17a98a49a3cc854b03c67f4ac1aac Mon Sep 17 00:00:00 2001 From: xraymemory Date: Wed, 22 Apr 2026 10:53:44 -0400 Subject: [PATCH 1/4] Add diffuse.yaml for Diffuse platform integration Declares sampleworks run profiles (boltz2-xrd, boltz2-md, protenix, rf3) with input schemas, args templates, volume mounts, and metadata fields. Applied to Diffuse via `diffuse apply`. --- diffuse.yaml | 342 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 342 insertions(+) create mode 100644 diffuse.yaml diff --git a/diffuse.yaml b/diffuse.yaml new file mode 100644 index 00000000..c39acd3f --- /dev/null +++ b/diffuse.yaml @@ -0,0 +1,342 @@ +# diffuse.yaml -- the contract between Sampleworks and Diffuse. +# +# Apply with: diffuse apply +# Run with: diffuse run trigger --profile boltz2-xrd +# +# Idempotent -- safe to apply repeatedly. + +version: 1 + +type: sampleworks +name: Sampleworks +description: > + Protein structure prediction via grid search. + Runs Boltz2, Protenix, or RoseTTAFold3 models with configurable + gradient guidance, ensemble sizes, and diffusion parameters. + +profiles: + - slug: boltz2-xrd + name: Sampleworks Boltz2 (X-Ray Diffraction) + container: + image: diffuseproject/sampleworks + tag: latest + gpus: + min: 2 + entrypoint: null + input_schema: + inputs: + - key: proteins_path + type: text + default: /data/input/proteins.csv + - key: models + type: enum + required: true + allowed_values: [boltz2] + default: boltz2 + - key: methods + type: text + default: "X-RAY DIFFRACTION" + - key: scalers + type: text + default: pure_guidance + - key: partial_diffusion_step + type: number + default: "120" + - key: gradient_weights + type: text + default: "0.1 0.2 0.5" + - key: ensemble_sizes + type: text + default: "8" + - key: gradient_normalization + type: boolean + default: true + - key: augmentation + type: boolean + default: true + - key: align_to_input + type: boolean + default: true + run_config_defaults: + shared_memory_size: "16Gi" + image_pull_policy: IfNotPresent + poll_max_retries: 720 + volumes: + - name: data-input + hostPath: /mnt/diffuse-private/raw/sampleworks/initial_dataset_40 + mountPath: /data/input + readOnly: true + - name: data-results + hostPath: /home/ubuntu/sampleworks-exp/grid_search_results + mountPath: /data/results + - name: msa-cache + hostPath: /home/ubuntu/sampleworks-exp/msa_cache + mountPath: /root/.sampleworks/msa + - name: checkpoints + hostPath: /mnt/diffuse-private + mountPath: /mnt/diffuse-private + readOnly: true + args_template: + base_command: [-e, boltz, run_grid_search.py] + flag_args: + proteins_path: --proteins + models: --models + scalers: --scalers + partial_diffusion_step: --partial-diffusion-step + ensemble_sizes: --ensemble-sizes + gradient_weights: --gradient-weights + methods: --methods + boolean_args: + gradient_normalization: --gradient-normalization + augmentation: --augmentation + align_to_input: --align-to-input + static_args: [--output-dir, /data/results] + + - slug: boltz2-md + name: Sampleworks Boltz2 (MD) + container: + image: diffuseproject/sampleworks + tag: latest + gpus: + min: 2 + entrypoint: null + input_schema: + inputs: + - key: proteins_path + type: text + default: /data/input/proteins.csv + - key: models + type: enum + required: true + allowed_values: [boltz2] + default: boltz2 + - key: methods + type: text + default: MD + - key: scalers + type: text + default: pure_guidance + - key: partial_diffusion_step + type: number + default: "120" + - key: gradient_weights + type: text + default: "0.1 0.2 0.5" + - key: ensemble_sizes + type: text + default: "8" + - key: gradient_normalization + type: boolean + default: true + - key: augmentation + type: boolean + default: true + - key: align_to_input + type: boolean + default: true + run_config_defaults: + shared_memory_size: "16Gi" + image_pull_policy: IfNotPresent + poll_max_retries: 720 + volumes: + - name: data-input + hostPath: /mnt/diffuse-private/raw/sampleworks/initial_dataset_40 + mountPath: /data/input + readOnly: true + - name: data-results + hostPath: /home/ubuntu/sampleworks-exp/grid_search_results + mountPath: /data/results + - name: msa-cache + hostPath: /home/ubuntu/sampleworks-exp/msa_cache + mountPath: /root/.sampleworks/msa + - name: checkpoints + hostPath: /mnt/diffuse-private + mountPath: /mnt/diffuse-private + readOnly: true + args_template: + base_command: [-e, boltz, run_grid_search.py] + flag_args: + proteins_path: --proteins + models: --models + scalers: --scalers + partial_diffusion_step: --partial-diffusion-step + ensemble_sizes: --ensemble-sizes + gradient_weights: --gradient-weights + methods: --methods + boolean_args: + gradient_normalization: --gradient-normalization + augmentation: --augmentation + align_to_input: --align-to-input + static_args: [--output-dir, /data/results] + + - slug: protenix + name: Sampleworks Protenix + container: + image: diffuseproject/sampleworks + tag: latest + gpus: + min: 2 + entrypoint: null + input_schema: + inputs: + - key: proteins_path + type: text + default: /data/input/proteins.csv + - key: models + type: enum + required: true + allowed_values: [protenix] + default: protenix + - key: scalers + type: text + default: pure_guidance + - key: partial_diffusion_step + type: number + default: "120" + - key: gradient_weights + type: text + default: "0.1 0.2 0.5" + - key: ensemble_sizes + type: text + default: "8" + - key: gradient_normalization + type: boolean + default: true + - key: augmentation + type: boolean + default: true + - key: align_to_input + type: boolean + default: true + run_config_defaults: + shared_memory_size: "16Gi" + image_pull_policy: IfNotPresent + poll_max_retries: 720 + volumes: + - name: data-input + hostPath: /mnt/diffuse-private/raw/sampleworks/initial_dataset_40 + mountPath: /data/input + readOnly: true + - name: data-results + hostPath: /home/ubuntu/sampleworks-exp/grid_search_results + mountPath: /data/results + - name: msa-cache + hostPath: /home/ubuntu/sampleworks-exp/msa_cache + mountPath: /root/.sampleworks/msa + - name: checkpoints + hostPath: /mnt/diffuse-private + mountPath: /mnt/diffuse-private + readOnly: true + args_template: + base_command: [-e, protenix, run_grid_search.py] + flag_args: + proteins_path: --proteins + models: --models + scalers: --scalers + partial_diffusion_step: --partial-diffusion-step + ensemble_sizes: --ensemble-sizes + gradient_weights: --gradient-weights + boolean_args: + gradient_normalization: --gradient-normalization + augmentation: --augmentation + align_to_input: --align-to-input + static_args: + - --output-dir + - /data/results + - --protenix-checkpoint + - /mnt/diffuse-private/raw/checkpoints/protenix_base_default_v0.5.0.pt + + - slug: rf3 + name: Sampleworks RoseTTAFold3 + container: + image: diffuseproject/sampleworks + tag: latest + gpus: + min: 2 + entrypoint: null + input_schema: + inputs: + - key: proteins_path + type: text + default: /data/input/proteins.csv + - key: models + type: enum + required: true + allowed_values: [rf3] + default: rf3 + - key: scalers + type: text + default: pure_guidance + - key: partial_diffusion_step + type: number + default: "120" + - key: gradient_weights + type: text + default: "0.01 0.02 0.05" + - key: ensemble_sizes + type: text + default: "8" + - key: gradient_normalization + type: boolean + default: true + - key: augmentation + type: boolean + default: true + - key: align_to_input + type: boolean + default: true + run_config_defaults: + shared_memory_size: "16Gi" + image_pull_policy: IfNotPresent + poll_max_retries: 720 + volumes: + - name: data-input + hostPath: /mnt/diffuse-private/raw/sampleworks/initial_dataset_40 + mountPath: /data/input + readOnly: true + - name: data-results + hostPath: /home/ubuntu/sampleworks-exp/grid_search_results + mountPath: /data/results + - name: msa-cache + hostPath: /home/ubuntu/sampleworks-exp/msa_cache + mountPath: /root/.sampleworks/msa + - name: checkpoints + hostPath: /mnt/diffuse-private + mountPath: /mnt/diffuse-private + readOnly: true + args_template: + base_command: [-e, rf3, run_grid_search.py] + flag_args: + proteins_path: --proteins + models: --models + scalers: --scalers + partial_diffusion_step: --partial-diffusion-step + ensemble_sizes: --ensemble-sizes + gradient_weights: --gradient-weights + boolean_args: + gradient_normalization: --gradient-normalization + augmentation: --augmentation + align_to_input: --align-to-input + static_args: + - --output-dir + - /data/results + - --rf3-checkpoint + - /mnt/diffuse-private/raw/checkpoints/rf3_foundry_01_24_latest.ckpt + +fields: + - key: sampleworks_model + type: enum + display_name: Model + allowed_values: [boltz2, protenix, rf3] + required: true + - key: sampleworks_methods + type: text + display_name: Methods + - key: sampleworks_gradient_weights + type: text + display_name: Gradient Weights + default: "0.1 0.2 0.5" + - key: sampleworks_ensemble_sizes + type: text + display_name: Ensemble Sizes + default: "8" From d8540026d1430bbf9cdd7042d7b404b3ea6cc23b Mon Sep 17 00:00:00 2001 From: xraymemory Date: Thu, 23 Apr 2026 15:22:13 -0400 Subject: [PATCH 2/4] Fix CLI flag names to match run_grid_search.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - --models → --model (renamed in #150) - --methods → --method (renamed in #151) - --protenix-checkpoint/--rf3-checkpoint → --model-checkpoint (standardized) - partial_diffusion_step default: "120" → 120 (number not string) --- diffuse.yaml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/diffuse.yaml b/diffuse.yaml index c39acd3f..273e6b0c 100644 --- a/diffuse.yaml +++ b/diffuse.yaml @@ -41,7 +41,7 @@ profiles: default: pure_guidance - key: partial_diffusion_step type: number - default: "120" + default: 120 - key: gradient_weights type: text default: "0.1 0.2 0.5" @@ -80,12 +80,12 @@ profiles: base_command: [-e, boltz, run_grid_search.py] flag_args: proteins_path: --proteins - models: --models + models: --model scalers: --scalers partial_diffusion_step: --partial-diffusion-step ensemble_sizes: --ensemble-sizes gradient_weights: --gradient-weights - methods: --methods + methods: --method boolean_args: gradient_normalization: --gradient-normalization augmentation: --augmentation @@ -118,7 +118,7 @@ profiles: default: pure_guidance - key: partial_diffusion_step type: number - default: "120" + default: 120 - key: gradient_weights type: text default: "0.1 0.2 0.5" @@ -157,12 +157,12 @@ profiles: base_command: [-e, boltz, run_grid_search.py] flag_args: proteins_path: --proteins - models: --models + models: --model scalers: --scalers partial_diffusion_step: --partial-diffusion-step ensemble_sizes: --ensemble-sizes gradient_weights: --gradient-weights - methods: --methods + methods: --method boolean_args: gradient_normalization: --gradient-normalization augmentation: --augmentation @@ -192,7 +192,7 @@ profiles: default: pure_guidance - key: partial_diffusion_step type: number - default: "120" + default: 120 - key: gradient_weights type: text default: "0.1 0.2 0.5" @@ -231,7 +231,7 @@ profiles: base_command: [-e, protenix, run_grid_search.py] flag_args: proteins_path: --proteins - models: --models + models: --model scalers: --scalers partial_diffusion_step: --partial-diffusion-step ensemble_sizes: --ensemble-sizes @@ -243,7 +243,7 @@ profiles: static_args: - --output-dir - /data/results - - --protenix-checkpoint + - --model-checkpoint - /mnt/diffuse-private/raw/checkpoints/protenix_base_default_v0.5.0.pt - slug: rf3 @@ -269,7 +269,7 @@ profiles: default: pure_guidance - key: partial_diffusion_step type: number - default: "120" + default: 120 - key: gradient_weights type: text default: "0.01 0.02 0.05" @@ -308,7 +308,7 @@ profiles: base_command: [-e, rf3, run_grid_search.py] flag_args: proteins_path: --proteins - models: --models + models: --model scalers: --scalers partial_diffusion_step: --partial-diffusion-step ensemble_sizes: --ensemble-sizes @@ -320,7 +320,7 @@ profiles: static_args: - --output-dir - /data/results - - --rf3-checkpoint + - --model-checkpoint - /mnt/diffuse-private/raw/checkpoints/rf3_foundry_01_24_latest.ckpt fields: From a39bb0dcddfd67ca762d5e0716ab94dbffc513b0 Mon Sep 17 00:00:00 2001 From: xraymemory Date: Mon, 27 Apr 2026 13:05:39 -0400 Subject: [PATCH 3/4] =?UTF-8?q?Fix=20CLI=20command=20in=20header=20comment?= =?UTF-8?q?:=20trigger=20=E2=86=92=20start?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- diffuse.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diffuse.yaml b/diffuse.yaml index 273e6b0c..15355ef0 100644 --- a/diffuse.yaml +++ b/diffuse.yaml @@ -1,7 +1,7 @@ # diffuse.yaml -- the contract between Sampleworks and Diffuse. # # Apply with: diffuse apply -# Run with: diffuse run trigger --profile boltz2-xrd +# Run with: diffuse run start --profile boltz2-xrd # # Idempotent -- safe to apply repeatedly. From 09feea531bc509b1fd093b06c35e4a2da045e4ca Mon Sep 17 00:00:00 2001 From: xraymemory Date: Thu, 30 Apr 2026 13:09:10 -0400 Subject: [PATCH 4/4] Add flexible Diffuse params runs --- Dockerfile | 14 +- GRID_SEARCH.md | 10 +- README.md | 10 +- diffuse.yaml | 326 +------- docker-entrypoint.sh | 84 ++- run_grid_search.py | 152 +++- .../utils/guidance_script_utils.py | 6 +- src/sampleworks/utils/run_params.py | 698 ++++++++++++++++++ tests/utils/test_run_params.py | 338 +++++++++ 9 files changed, 1277 insertions(+), 361 deletions(-) create mode 100644 src/sampleworks/utils/run_params.py create mode 100644 tests/utils/test_run_params.py diff --git a/Dockerfile b/Dockerfile index 20a3d675..45ca0ae0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,7 +27,6 @@ # --ensemble-sizes "1 4" \ # --gradient-weights "0.1 0.2" \ # --output-dir /data/results \ -# --use-tweedie \ # --gradient-normalization \ # --augmentation \ # --align-to-input @@ -41,8 +40,7 @@ # --methods "X-RAY DIFFRACTION" \ # --ensemble-sizes "1 4" \ # --gradient-weights "0.1 0.2" \ -# --output-dir /data/results \ -# --use-tweedie +# --output-dir /data/results # # # Interactive shell # docker run --gpus all -it sampleworks bash @@ -64,6 +62,9 @@ # ============================================================================ FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS base +ARG SAMPLEWORKS_GIT_SHA=unknown +ARG SAMPLEWORKS_IMAGE_TAG=unknown + ENV DEBIAN_FRONTEND=noninteractive \ # Pixi configuration PIXI_HOME=/root/.pixi \ @@ -74,7 +75,12 @@ ENV DEBIAN_FRONTEND=noninteractive \ # Disable user site packages (isolation) PYTHONNOUSERSITE=1 \ # Optimize CUDA compilation for H100 - TORCH_CUDA_ARCH_LIST="9.0" + TORCH_CUDA_ARCH_LIST="9.0" \ + SAMPLEWORKS_GIT_SHA=${SAMPLEWORKS_GIT_SHA} \ + SAMPLEWORKS_IMAGE_TAG=${SAMPLEWORKS_IMAGE_TAG} + +LABEL org.opencontainers.image.revision=${SAMPLEWORKS_GIT_SHA} \ + org.opencontainers.image.version=${SAMPLEWORKS_IMAGE_TAG} # Install system dependencies required for building scientific packages RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/GRID_SEARCH.md b/GRID_SEARCH.md index 154d00c5..e50bfb16 100644 --- a/GRID_SEARCH.md +++ b/GRID_SEARCH.md @@ -26,8 +26,8 @@ protein structure, density map, and resolution columns, described below. ```bash pixi run -e boltz python run_grid_search.py \ --proteins proteins.csv \ - --models boltz2 \ # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!) - --methods "X-RAY DIFFRACTION" \ # only useful for Boltz-2, ignored otherwise + --model boltz2 \ # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!) + --method "X-RAY DIFFRACTION" \ # only useful for Boltz-2, ignored otherwise --scalers pure_guidance \ # options: pure_guidance, fk_steering, or both as space-separated list --ensemble-sizes "1 4" \ --gradient-weights "0.1 0.2" \ @@ -51,11 +51,11 @@ name,structure,density,resolution | Argument | Description | Default | |----------------------|------------------------------------------------------------|-----------------------------| | `--proteins` | CSV with structure/density/resolution columns | required | -| `--models` | Model to run. One of `boltz1`, `boltz2`, `protenix`, `rf3` | required | +| `--model` | Model to run. One of `boltz1`, `boltz2`, `protenix`, `rf3` | required | | `--scalers` | Guidance method(s) to sweep | `pure_guidance fk_steering` | | `--ensemble-sizes` | Space-separated values, e.g. `"1 4"` | `"1 2 4 8"` | | `--gradient-weights` | Space-separated values, e.g. `"0.1 0.2"` | `"0.01 0.1 0.2"` | -| `--methods` | Boltz-2 sampling method (required for boltz2) | `X-RAY DIFFRACTION` | +| `--method` | Boltz-2 sampling method (required for boltz2) | `X-RAY DIFFRACTION` | | `--max-parallel` | Parallel workers (default: number of GPUs) | `auto` | | `--dry-run` | Print jobs without running them | off | | `--force-all` | Re-run including already-successful jobs | off | @@ -65,7 +65,7 @@ name,structure,density,resolution | `--track-chiral-features` | Track RF3 chiral gradient magnitude | off | > **Note**: Jobs are skipped if a `refined.cif` file already exists in the output directory. -> Some flags (e.g., `--use-tweedie`, `--gradient-normalization`) are not reflected in the +> Some flags (e.g., `--gradient-normalization`) are not reflected in the > directory structure, so changing them alone won't trigger a re-run. Use `--force-all` to > re-run all jobs regardless. This is under active development and will likely change soon. diff --git a/README.md b/README.md index 7087c139..84d63cdf 100644 --- a/README.md +++ b/README.md @@ -88,8 +88,8 @@ Output files appear in `output/boltz2_pure_guidance/`: `refined.cif` (final ense ```bash pixi run -e boltz python run_grid_search.py \ --proteins proteins.csv \ - --models boltz2 \ # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!) - --methods "X-RAY DIFFRACTION" \ # only useful for Boltz-2, ignored otherwise + --model boltz2 \ # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!) + --method "X-RAY DIFFRACTION" \ # only useful for Boltz-2, ignored otherwise --scalers pure_guidance \ # options: pure_guidance, fk_steering, or both as space-separated list --ensemble-sizes "1 4" \ --gradient-weights "0.1 0.2" \ @@ -113,11 +113,11 @@ name,structure,density,resolution | Argument | Description | Default | |---|---|---| | `--proteins` | CSV with structure/density/resolution columns | required | -| `--models` | Model to run. One of `boltz1`, `boltz2`, `protenix`, `rf3` | required | +| `--model` | Model to run. One of `boltz1`, `boltz2`, `protenix`, `rf3` | required | | `--scalers` | Guidance method(s) to sweep | `pure_guidance fk_steering` | | `--ensemble-sizes` | Space-separated values, e.g. `"1 4"` | `"1 2 4 8"` | | `--gradient-weights` | Space-separated values, e.g. `"0.1 0.2"` | `"0.01 0.1 0.2"` | -| `--methods` | Boltz-2 sampling method (required for boltz2) | `X-RAY DIFFRACTION` | +| `--method` | Boltz-2 sampling method (required for boltz2) | `X-RAY DIFFRACTION` | | `--max-parallel` | Parallel workers (default: number of GPUs) | `auto` | | `--dry-run` | Print jobs without running them | off | | `--force-all` | Re-run including already-successful jobs | off | @@ -126,7 +126,7 @@ name,structure,density,resolution Output layout: `grid_search_results//[_]//ens_gw/` -> **Note**: Jobs are skipped if a `refined.cif` file already exists in the output directory. Some flags (e.g., `--use-tweedie`, `--gradient-normalization`) are not reflected in the directory structure, so changing them alone won't trigger a re-run. Use `--force-all` to re-run all jobs regardless. This is under active development and will likely change soon. +> **Note**: Jobs are skipped if a `refined.cif` file already exists in the output directory. Some flags (e.g., `--gradient-normalization`) are not reflected in the directory structure, so changing them alone won't trigger a re-run. Use `--force-all` to re-run all jobs regardless. This is under active development and will likely change soon. Instructions for running evaluation and metrics scripts are coming soon. diff --git a/diffuse.yaml b/diffuse.yaml index 15355ef0..bdec6330 100644 --- a/diffuse.yaml +++ b/diffuse.yaml @@ -1,7 +1,7 @@ # diffuse.yaml -- the contract between Sampleworks and Diffuse. # # Apply with: diffuse apply -# Run with: diffuse run start --profile boltz2-xrd +# Run with: diffuse run sampleworks --params params.json --output-dir /data/results/run-001 # # Idempotent -- safe to apply repeatedly. @@ -10,13 +10,13 @@ version: 1 type: sampleworks name: Sampleworks description: > - Protein structure prediction via grid search. - Runs Boltz2, Protenix, or RoseTTAFold3 models with configurable - gradient guidance, ensemble sizes, and diffusion parameters. + Generic Sampleworks experiment runner. Diffuse accepts a flexible params.json + blob, stores it for traceability, materializes it inside the pod, and lets + Sampleworks own validation and model-specific execution. profiles: - - slug: boltz2-xrd - name: Sampleworks Boltz2 (X-Ray Diffraction) + - slug: sampleworks + name: Sampleworks container: image: diffuseproject/sampleworks tag: latest @@ -25,318 +25,36 @@ profiles: entrypoint: null input_schema: inputs: - - key: proteins_path - type: text - default: /data/input/proteins.csv - - key: models - type: enum + - key: params_json + type: json required: true - allowed_values: [boltz2] - default: boltz2 - - key: methods - type: text - default: "X-RAY DIFFRACTION" - - key: scalers - type: text - default: pure_guidance - - key: partial_diffusion_step - type: number - default: 120 - - key: gradient_weights + description: Arbitrary Sampleworks-owned params.json object. + - key: output_dir type: text - default: "0.1 0.2 0.5" - - key: ensemble_sizes - type: text - default: "8" - - key: gradient_normalization - type: boolean - default: true - - key: augmentation - type: boolean - default: true - - key: align_to_input - type: boolean - default: true - run_config_defaults: - shared_memory_size: "16Gi" - image_pull_policy: IfNotPresent - poll_max_retries: 720 - volumes: - - name: data-input - hostPath: /mnt/diffuse-private/raw/sampleworks/initial_dataset_40 - mountPath: /data/input - readOnly: true - - name: data-results - hostPath: /home/ubuntu/sampleworks-exp/grid_search_results - mountPath: /data/results - - name: msa-cache - hostPath: /home/ubuntu/sampleworks-exp/msa_cache - mountPath: /root/.sampleworks/msa - - name: checkpoints - hostPath: /mnt/diffuse-private - mountPath: /mnt/diffuse-private - readOnly: true - args_template: - base_command: [-e, boltz, run_grid_search.py] - flag_args: - proteins_path: --proteins - models: --model - scalers: --scalers - partial_diffusion_step: --partial-diffusion-step - ensemble_sizes: --ensemble-sizes - gradient_weights: --gradient-weights - methods: --method - boolean_args: - gradient_normalization: --gradient-normalization - augmentation: --augmentation - align_to_input: --align-to-input - static_args: [--output-dir, /data/results] - - - slug: boltz2-md - name: Sampleworks Boltz2 (MD) - container: - image: diffuseproject/sampleworks - tag: latest - gpus: - min: 2 - entrypoint: null - input_schema: - inputs: - - key: proteins_path - type: text - default: /data/input/proteins.csv - - key: models - type: enum required: true - allowed_values: [boltz2] - default: boltz2 - - key: methods - type: text - default: MD - - key: scalers - type: text - default: pure_guidance - - key: partial_diffusion_step - type: number - default: 120 - - key: gradient_weights - type: text - default: "0.1 0.2 0.5" - - key: ensemble_sizes - type: text - default: "8" - - key: gradient_normalization - type: boolean - default: true - - key: augmentation - type: boolean - default: true - - key: align_to_input - type: boolean - default: true + default: /data/results + description: Container output directory, usually under /data/results. run_config_defaults: shared_memory_size: "16Gi" image_pull_policy: IfNotPresent poll_max_retries: 720 + params_file: + param_key: params_json + path: /diffuse/input/params.json volumes: - name: data-input - hostPath: /mnt/diffuse-private/raw/sampleworks/initial_dataset_40 + hostPath: /data/sampleworks-exp/input mountPath: /data/input readOnly: true - name: data-results - hostPath: /home/ubuntu/sampleworks-exp/grid_search_results + hostPath: /data/sampleworks-exp/grid_search_results mountPath: /data/results - name: msa-cache - hostPath: /home/ubuntu/sampleworks-exp/msa_cache + hostPath: /data/sampleworks-exp/msa_cache mountPath: /root/.sampleworks/msa - - name: checkpoints - hostPath: /mnt/diffuse-private - mountPath: /mnt/diffuse-private - readOnly: true args_template: - base_command: [-e, boltz, run_grid_search.py] + base_command: [--params, /diffuse/input/params.json] flag_args: - proteins_path: --proteins - models: --model - scalers: --scalers - partial_diffusion_step: --partial-diffusion-step - ensemble_sizes: --ensemble-sizes - gradient_weights: --gradient-weights - methods: --method - boolean_args: - gradient_normalization: --gradient-normalization - augmentation: --augmentation - align_to_input: --align-to-input - static_args: [--output-dir, /data/results] - - - slug: protenix - name: Sampleworks Protenix - container: - image: diffuseproject/sampleworks - tag: latest - gpus: - min: 2 - entrypoint: null - input_schema: - inputs: - - key: proteins_path - type: text - default: /data/input/proteins.csv - - key: models - type: enum - required: true - allowed_values: [protenix] - default: protenix - - key: scalers - type: text - default: pure_guidance - - key: partial_diffusion_step - type: number - default: 120 - - key: gradient_weights - type: text - default: "0.1 0.2 0.5" - - key: ensemble_sizes - type: text - default: "8" - - key: gradient_normalization - type: boolean - default: true - - key: augmentation - type: boolean - default: true - - key: align_to_input - type: boolean - default: true - run_config_defaults: - shared_memory_size: "16Gi" - image_pull_policy: IfNotPresent - poll_max_retries: 720 - volumes: - - name: data-input - hostPath: /mnt/diffuse-private/raw/sampleworks/initial_dataset_40 - mountPath: /data/input - readOnly: true - - name: data-results - hostPath: /home/ubuntu/sampleworks-exp/grid_search_results - mountPath: /data/results - - name: msa-cache - hostPath: /home/ubuntu/sampleworks-exp/msa_cache - mountPath: /root/.sampleworks/msa - - name: checkpoints - hostPath: /mnt/diffuse-private - mountPath: /mnt/diffuse-private - readOnly: true - args_template: - base_command: [-e, protenix, run_grid_search.py] - flag_args: - proteins_path: --proteins - models: --model - scalers: --scalers - partial_diffusion_step: --partial-diffusion-step - ensemble_sizes: --ensemble-sizes - gradient_weights: --gradient-weights - boolean_args: - gradient_normalization: --gradient-normalization - augmentation: --augmentation - align_to_input: --align-to-input - static_args: - - --output-dir - - /data/results - - --model-checkpoint - - /mnt/diffuse-private/raw/checkpoints/protenix_base_default_v0.5.0.pt - - - slug: rf3 - name: Sampleworks RoseTTAFold3 - container: - image: diffuseproject/sampleworks - tag: latest - gpus: - min: 2 - entrypoint: null - input_schema: - inputs: - - key: proteins_path - type: text - default: /data/input/proteins.csv - - key: models - type: enum - required: true - allowed_values: [rf3] - default: rf3 - - key: scalers - type: text - default: pure_guidance - - key: partial_diffusion_step - type: number - default: 120 - - key: gradient_weights - type: text - default: "0.01 0.02 0.05" - - key: ensemble_sizes - type: text - default: "8" - - key: gradient_normalization - type: boolean - default: true - - key: augmentation - type: boolean - default: true - - key: align_to_input - type: boolean - default: true - run_config_defaults: - shared_memory_size: "16Gi" - image_pull_policy: IfNotPresent - poll_max_retries: 720 - volumes: - - name: data-input - hostPath: /mnt/diffuse-private/raw/sampleworks/initial_dataset_40 - mountPath: /data/input - readOnly: true - - name: data-results - hostPath: /home/ubuntu/sampleworks-exp/grid_search_results - mountPath: /data/results - - name: msa-cache - hostPath: /home/ubuntu/sampleworks-exp/msa_cache - mountPath: /root/.sampleworks/msa - - name: checkpoints - hostPath: /mnt/diffuse-private - mountPath: /mnt/diffuse-private - readOnly: true - args_template: - base_command: [-e, rf3, run_grid_search.py] - flag_args: - proteins_path: --proteins - models: --model - scalers: --scalers - partial_diffusion_step: --partial-diffusion-step - ensemble_sizes: --ensemble-sizes - gradient_weights: --gradient-weights - boolean_args: - gradient_normalization: --gradient-normalization - augmentation: --augmentation - align_to_input: --align-to-input - static_args: - - --output-dir - - /data/results - - --model-checkpoint - - /mnt/diffuse-private/raw/checkpoints/rf3_foundry_01_24_latest.ckpt - -fields: - - key: sampleworks_model - type: enum - display_name: Model - allowed_values: [boltz2, protenix, rf3] - required: true - - key: sampleworks_methods - type: text - display_name: Methods - - key: sampleworks_gradient_weights - type: text - display_name: Gradient Weights - default: "0.1 0.2 0.5" - - key: sampleworks_ensemble_sizes - type: text - display_name: Ensemble Sizes - default: "8" + output_dir: --output-dir + boolean_args: {} + static_args: [] diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index d89abc5d..45ca3bb2 100755 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -4,6 +4,7 @@ # Usage: # docker run sampleworks -e