diff-use · xraymemory · Jun 1, 2026 · May 28, 2026 · May 28, 2026 · May 28, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -102,7 +102,8 @@ RUN --mount=type=cache,target=/root/.cache/pixi \
     --mount=type=cache,target=/root/.cache/uv \
     pixi install -e boltz --frozen && \
     pixi install -e protenix --frozen && \
-    pixi install -e rf3 --frozen
+    pixi install -e rf3 --frozen && \
+    pixi install -e analysis --frozen
 
 # A GPU is not required to build the image. Pre-compile CUDA extensions only when
 # the builder exposes NVIDIA devices; if present, failures should stop the build.
@@ -114,6 +115,16 @@ from sampleworks.core.forward_models.xray.real_space_density_deps.ops import dil
 print('CUDA extensions compiled successfully')"; \
     fi
 
+# This image carries pixi environments and checkpoints. Runtime source should
+# come from ACTL's synced checkout at /home/dev/workspace, not from stale code
+# baked into /app during image construction.
+RUN rm -rf /app/src /app/scripts /app/experiments /app/analyses \
+    /app/run_grid_search.py /app/run_analysis \
+    && mkdir -p /home/dev/workspace
+
+COPY --chmod=755 run_experiments run_experiments.sh run_all_models.sh run_analysis run_analysis.sh /usr/local/bin/
+RUN printf '\n# ACTL scientist workflow: land in the synced Sampleworks checkout.\nif [[ $- == *i* ]] && [ -z "${SAMPLEWORKS_NO_AUTO_CD:-}" ] && [ -d /home/dev/workspace ]; then\n    cd /home/dev/workspace\nfi\n' >> /root/.bashrc
+
 # ============================================================================
 # Public runtime: regular Sampleworks image for the public registry
 # ============================================================================

diff --git a/README.md b/README.md
@@ -149,7 +149,8 @@ Output layout: `grid_search_results/<protein>/<model>[_<method>]/<scaler>/ens<N>
 
 > **Note**: Jobs are skipped if a `refined.cif` file already exists in the output directory. Some flags (e.g., `--use-tweedie`, `--gradient-normalization`) are not reflected in the directory structure, so changing them alone won't trigger a re-run. Use `--force-all` to re-run all jobs regardless. This is under active development and will likely change soon.
 
-Instructions for running evaluation and metrics scripts are coming soon.
+Evaluation and metrics scripts can be run through `run_analysis`; see the ACTL
+section below and `scripts/eval/EVALUATION.md`.
 
 
 ## Running preset experiments on ACTL (`run_experiments`)
@@ -246,6 +247,39 @@ debugging only, opt into an on-pod pixi update with
 rebuilt `pixi-with-checkpoints:sampleworks` image instead.
 
 
+## Running preset analyses on ACTL (`run_analysis`)
+
+`run_analysis` uses the same TOML runner as `run_experiments`, but loads presets
+from `analyses/*.toml` and runs the scripts under `scripts/eval/`.
+The `analyze_grid_search`, `all`, and `external_tools` presets first run a sequential
+`patch_outputs` pre-job, which creates `refined-patched.cif` files from each
+`refined.cif` before the evaluation jobs start.
+
+```bash
+export GRID_SEARCH_RESULTS_DIR=/mnt/diffuse-shared/results/sampleworks/<pod>/full_8gpu
+export GRID_SEARCH_INPUTS_DIR=/mnt/diffuse-shared/raw/sampleworks/initial_dataset_40_occ_sweeps
+export PROTEIN_CONFIGS_CSV="$GRID_SEARCH_INPUTS_DIR/protein_analysis_config.csv"
+
+run_analysis --list
+run_analysis --dry-run analyze_grid_search --jobs rscc
+run_analysis analyze_grid_search --jobs rscc,lddt
+run_analysis altloc_find
+run_analysis altloc_classify
+run_analysis all  # includes tortoize and phenix.clashscore jobs
+```
+
+Use `--set` for one-off changes, for example
+`run_analysis analyze_grid_search --jobs rscc --set jobs.rscc.gpus=0`. If your
+input layout differs from the default
+`processed/{pdb_id}/{pdb_id}_single_001_density_input.cif`, override the patch
+pre-job with `--set defaults.PATCH_INPUT_PDB_PATTERN='{pdb_id}/{pdb_id}_original.cif'`.
+When patched CIFs already exist, add `--skip-pre-jobs` to rerun analyses without
+repeating the patching step.
+The `altloc_find` and `altloc_classify` presets are independent of grid-search
+outputs; override `ALTLOC_ANALYSIS_DIR` and `ALTLOC_INPUTS_DIR` when their input
+or output roots differ from the defaults.
+
+
 ## Docker
 
 Sampleworks now has a two-layer image split:

diff --git a/analyses/all.toml b/analyses/all.toml
@@ -0,0 +1,71 @@
+description = "All grid-search evaluations, including optional external-tool checks."
+
+[defaults]
+GRID_SEARCH_RESULTS_DIR = "/data/results/grid_search_results"
+GRID_SEARCH_INPUTS_DIR = "/data/inputs"
+PROTEIN_CONFIGS_CSV = "${GRID_SEARCH_INPUTS_DIR}/protein_analysis_config.csv"
+TARGET_FILENAME = "refined-patched.cif"
+N_JOBS = "16"
+PATCH_CIF_PATTERN = "refined.cif"
+GRID_SEARCH_DEPTH = "4"
+PATCH_DEPTH = "${GRID_SEARCH_DEPTH}"
+PATCH_INPUT_PDB_PATTERN = "processed/{pdb_id}/{pdb_id}_single_001_density_input.cif"
+PATCH_RCSB_PATTERN = "${GRID_SEARCH_RESULTS_DIR}/([A-Za-z0-9]{4})"
+
+[shared_args]
+grid-search-results-path = "${GRID_SEARCH_RESULTS_DIR}"
+grid-search-inputs-path = "${GRID_SEARCH_INPUTS_DIR}"
+protein-configs-csv = "${PROTEIN_CONFIGS_CSV}"
+target-filename = "${TARGET_FILENAME}"
+occupancies = [0.0, 0.25, 0.5, 0.75, 1.0]
+n-jobs = "${N_JOBS}"
+depth = "${GRID_SEARCH_DEPTH}"
+
+[[pre_jobs]]
+name = "patch_outputs"
+env = "analysis"
+gpus = "none"
+script = "scripts/patch_output_cif_files.py"
+output_arg = ""
+output_subdir = "analysis/patch_outputs"
+args = { input-dir = "${GRID_SEARCH_RESULTS_DIR}", cif-pattern = "${PATCH_CIF_PATTERN}", rcsb-pattern = "${PATCH_RCSB_PATTERN}", depth = "${PATCH_DEPTH}", grid-search-input-dir = "${GRID_SEARCH_INPUTS_DIR}", input-pdb-pattern = "${PATCH_INPUT_PDB_PATTERN}" }
+
+[[jobs]]
+name = "rscc"
+env = "analysis"
+gpu_count = 1
+script = "scripts/eval/rscc_grid_search_script.py"
+output_arg = ""
+output_subdir = "analysis/rscc"
+
+[[jobs]]
+name = "lddt"
+env = "analysis"
+gpus = "none"
+script = "scripts/eval/lddt_evaluation_script.py"
+output_arg = ""
+output_subdir = "analysis/lddt"
+
+[[jobs]]
+name = "bond_geometry"
+env = "analysis"
+gpus = "none"
+script = "scripts/eval/bond_geometry_eval.py"
+output_arg = ""
+output_subdir = "analysis/bond_geometry"
+
+[[jobs]]
+name = "tortoize"
+env = "analysis"
+gpus = "none"
+script = "scripts/eval/run_and_process_tortoize.py"
+output_arg = ""
+output_subdir = "analysis/tortoize"
+
+[[jobs]]
+name = "phenix_clashscore"
+env = "analysis"
+gpus = "none"
+script = "scripts/eval/run_and_process_phenix_clashscore.py"
+output_arg = ""
+output_subdir = "analysis/phenix_clashscore"
diff --git a/analyses/altloc_classify.toml b/analyses/altloc_classify.toml
@@ -0,0 +1,25 @@
+description = "Classify altloc selections into side-chain, loop, and domain-shift categories."
+
+[defaults]
+ALTLOC_ANALYSIS_DIR = "/data/results/altloc_analysis"
+ALTLOC_INPUTS_DIR = "/data/inputs"
+ALTLOC_SELECTIONS_CSV = "${ALTLOC_ANALYSIS_DIR}/altloc_selections.csv"
+ALTLOC_CLASSIFICATIONS_CSV = "${ALTLOC_ANALYSIS_DIR}/altloc_region_classifications.csv"
+CIF_ROOT = "${ALTLOC_INPUTS_DIR}"
+DOMAIN_SHIFT_MIN_SPAN = "50"
+LOOP_LDDT_THRESHOLD = "0.75"
+
+[shared_args]
+input-csv = "${ALTLOC_SELECTIONS_CSV}"
+cif-root = "${CIF_ROOT}"
+output-file = "${ALTLOC_CLASSIFICATIONS_CSV}"
+domain-shift-min-span = "${DOMAIN_SHIFT_MIN_SPAN}"
+loop-lddt-threshold = "${LOOP_LDDT_THRESHOLD}"
+
+[[jobs]]
+name = "classify_altloc_regions"
+env = "analysis"
+gpus = "none"
+script = "scripts/eval/classify_altloc_regions.py"
+output_arg = ""
+output_subdir = "analysis/altloc_classify"
diff --git a/analyses/altloc_find.toml b/analyses/altloc_find.toml
@@ -0,0 +1,23 @@
+description = "Build an analysis protein-config CSV by finding altloc selections in input CIFs."
+
+[defaults]
+ALTLOC_ANALYSIS_DIR = "/data/results/altloc_analysis"
+ALTLOC_INPUTS_DIR = "/data/inputs"
+PROTEINS_CSV = "${ALTLOC_INPUTS_DIR}/proteins.csv"
+ALTLOC_SELECTIONS_CSV = "${ALTLOC_ANALYSIS_DIR}/altloc_selections.csv"
+ALTLOC_MIN_SPAN = "5"
+ALTLOC_LABEL = "label_alt_id"
+
+[shared_args]
+input-csv = "${PROTEINS_CSV}"
+output-file = "${ALTLOC_SELECTIONS_CSV}"
+min-span = "${ALTLOC_MIN_SPAN}"
+altloc-label = "${ALTLOC_LABEL}"
+
+[[jobs]]
+name = "find_altloc_selections"
+env = "analysis"
+gpus = "none"
+script = "scripts/eval/find_altloc_selections.py"
+output_arg = ""
+output_subdir = "analysis/altloc_find"
diff --git a/analyses/analyze_grid_search.toml b/analyses/analyze_grid_search.toml
@@ -0,0 +1,55 @@
+description = "Analyze grid-search outputs with RSCC, LDDT clustering, and bond geometry."
+
+[defaults]
+GRID_SEARCH_RESULTS_DIR = "/data/results/grid_search_results"
+GRID_SEARCH_INPUTS_DIR = "/data/inputs"
+PROTEIN_CONFIGS_CSV = "${GRID_SEARCH_INPUTS_DIR}/protein_analysis_config.csv"
+TARGET_FILENAME = "refined-patched.cif"
+N_JOBS = "16"
+PATCH_CIF_PATTERN = "refined.cif"
+GRID_SEARCH_DEPTH = "4"
+PATCH_DEPTH = "${GRID_SEARCH_DEPTH}"
+PATCH_INPUT_PDB_PATTERN = "processed/{pdb_id}/{pdb_id}_single_001_density_input.cif"
+PATCH_RCSB_PATTERN = "${GRID_SEARCH_RESULTS_DIR}/([A-Za-z0-9]{4})"
+
+[shared_args]
+grid-search-results-path = "${GRID_SEARCH_RESULTS_DIR}"
+grid-search-inputs-path = "${GRID_SEARCH_INPUTS_DIR}"
+protein-configs-csv = "${PROTEIN_CONFIGS_CSV}"
+target-filename = "${TARGET_FILENAME}"
+occupancies = [0.0, 0.25, 0.5, 0.75, 1.0]
+n-jobs = "${N_JOBS}"
+depth = "${GRID_SEARCH_DEPTH}"
+
+[[pre_jobs]]
+name = "patch_outputs"
+env = "analysis"
+gpus = "none"
+script = "scripts/patch_output_cif_files.py"
+output_arg = ""
+output_subdir = "analysis/patch_outputs"
+args = { input-dir = "${GRID_SEARCH_RESULTS_DIR}", cif-pattern = "${PATCH_CIF_PATTERN}", rcsb-pattern = "${PATCH_RCSB_PATTERN}", depth = "${PATCH_DEPTH}", grid-search-input-dir = "${GRID_SEARCH_INPUTS_DIR}", input-pdb-pattern = "${PATCH_INPUT_PDB_PATTERN}" }
+
+[[jobs]]
+name = "rscc"
+env = "analysis"
+gpu_count = 1
+script = "scripts/eval/rscc_grid_search_script.py"
+output_arg = ""
+output_subdir = "analysis/rscc"
+
+[[jobs]]
+name = "lddt"
+env = "analysis"
+gpus = "none"
+script = "scripts/eval/lddt_evaluation_script.py"
+output_arg = ""
+output_subdir = "analysis/lddt"
+
+[[jobs]]
+name = "bond_geometry"
+env = "analysis"
+gpus = "none"
+script = "scripts/eval/bond_geometry_eval.py"
+output_arg = ""
+output_subdir = "analysis/bond_geometry"
diff --git a/analyses/external_tools.toml b/analyses/external_tools.toml
@@ -0,0 +1,47 @@
+description = "Evaluation jobs that require external executables: tortoize and phenix.clashscore."
+
+[defaults]
+GRID_SEARCH_RESULTS_DIR = "/data/results/grid_search_results"
+GRID_SEARCH_INPUTS_DIR = "/data/inputs"
+PROTEIN_CONFIGS_CSV = "${GRID_SEARCH_INPUTS_DIR}/protein_analysis_config.csv"
+TARGET_FILENAME = "refined-patched.cif"
+N_JOBS = "16"
+PATCH_CIF_PATTERN = "refined.cif"
+GRID_SEARCH_DEPTH = "4"
+PATCH_DEPTH = "${GRID_SEARCH_DEPTH}"
+PATCH_INPUT_PDB_PATTERN = "processed/{pdb_id}/{pdb_id}_single_001_density_input.cif"
+PATCH_RCSB_PATTERN = "${GRID_SEARCH_RESULTS_DIR}/([A-Za-z0-9]{4})"
+
+[shared_args]
+grid-search-results-path = "${GRID_SEARCH_RESULTS_DIR}"
+grid-search-inputs-path = "${GRID_SEARCH_INPUTS_DIR}"
+protein-configs-csv = "${PROTEIN_CONFIGS_CSV}"
+target-filename = "${TARGET_FILENAME}"
+occupancies = [0.0, 0.25, 0.5, 0.75, 1.0]
+n-jobs = "${N_JOBS}"
+depth = "${GRID_SEARCH_DEPTH}"
+
+[[pre_jobs]]
+name = "patch_outputs"
+env = "analysis"
+gpus = "none"
+script = "scripts/patch_output_cif_files.py"
+output_arg = ""
+output_subdir = "analysis/patch_outputs"
+args = { input-dir = "${GRID_SEARCH_RESULTS_DIR}", cif-pattern = "${PATCH_CIF_PATTERN}", rcsb-pattern = "${PATCH_RCSB_PATTERN}", depth = "${PATCH_DEPTH}", grid-search-input-dir = "${GRID_SEARCH_INPUTS_DIR}", input-pdb-pattern = "${PATCH_INPUT_PDB_PATTERN}" }
+
+[[jobs]]
+name = "tortoize"
+env = "analysis"
+gpus = "none"
+script = "scripts/eval/run_and_process_tortoize.py"
+output_arg = ""
+output_subdir = "analysis/tortoize"
+
+[[jobs]]
+name = "phenix_clashscore"
+env = "analysis"
+gpus = "none"
+script = "scripts/eval/run_and_process_phenix_clashscore.py"
+output_arg = ""
+output_subdir = "analysis/phenix_clashscore"
diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh
@@ -6,7 +6,7 @@
 #   docker run pixi-with-checkpoints -e boltz run_grid_search.py --proteins /data/proteins.csv ...
 #   docker run pixi-with-checkpoints bash  # interactive shell
 #
-# Available pixi environments: boltz, protenix, rf3
+# Available pixi environments: boltz, protenix, rf3, analysis
 #
 # Examples:
 #   # Run grid search with RF3
@@ -36,14 +36,15 @@ IMPORTANT:
     Always use --shm-size=16g (or larger) to avoid shared memory errors with DataLoaders.
 
 OPTIONS:
-    -e, --env <env>     Pixi environment to use (boltz, protenix, rf3)
+    -e, --env <env>     Pixi environment to use (boltz, protenix, rf3, analysis)
     -h, --help          Show this help message
     bash                Start an interactive shell
 
 ENVIRONMENTS:
     boltz       For boltz1 and boltz2 models
     protenix    For protenix model  
     rf3         For RF3 model
+    analysis    For scripts/eval analysis jobs
 
 EXAMPLES:
     # Run grid search with RF3 model
@@ -190,6 +191,10 @@ PROTEINS CSV FORMAT:
       1abc,/data/structures/1abc.cif,/data/maps/1abc.ccp4,2.0
       2xyz,/data/structures/2xyz.cif,/data/maps/2xyz.mrc,1.8
 
+ACTL helper commands:
+    run_experiments         Run experiments/*.toml presets
+    run_analysis            Run analyses/*.toml presets for scripts/eval
+
 For full argument details, run:
     docker run pixi-with-checkpoints -e boltz run_grid_search.py --help
 EOF
@@ -202,7 +207,7 @@ if [ $# -eq 0 ] || [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
 fi
 
 # Handle scientist workflow helpers and interactive shells
-if [ "$1" = "run_experiments" ] || [ "$1" = "run_experiments.sh" ] || [ "$1" = "run_all_models.sh" ]; then
+if [ "$1" = "run_experiments" ] || [ "$1" = "run_experiments.sh" ] || [ "$1" = "run_all_models.sh" ] || [ "$1" = "run_analysis" ] || [ "$1" = "run_analysis.sh" ]; then
     exec "$@"
 fi
 
@@ -216,7 +221,7 @@ while [[ $# -gt 0 ]]; do
     case $1 in
         -e|--env)
             if [ -z "$2" ] || [[ "$2" == -* ]]; then
-                echo "Error: -e/--env requires an environment name (boltz, protenix, rf3)"
+                echo "Error: -e/--env requires an environment name (boltz, protenix, rf3, analysis)"
                 exit 1
             fi
             ENV="$2"
@@ -236,7 +241,7 @@ done
 
 # Validate environment
 if [[ -z "$ENV" ]]; then
-    echo "Error: Environment not specified. Use -e <env> where env is boltz, protenix, or rf3"
+    echo "Error: Environment not specified. Use -e <env> where env is boltz, protenix, rf3, or analysis"
     echo ""
     echo "Usage: docker run pixi-with-checkpoints -e <env> <script> [args...]"
     echo ""
@@ -248,10 +253,10 @@ if [[ -z "$ENV" ]]; then
 fi
 
 case $ENV in
-    boltz|protenix|rf3)
+    boltz|protenix|rf3|analysis)
         ;;
     *)
-        echo "Error: Invalid environment '$ENV'. Must be one of: boltz, protenix, rf3"
+        echo "Error: Invalid environment '$ENV'. Must be one of: boltz, protenix, rf3, analysis"
         exit 1
         ;;
 esac

diff --git a/pyproject.toml b/pyproject.toml
@@ -36,10 +36,12 @@ requires-python = ">= 3.11, <3.14"
 version = "0.7.0"
 
 [project.scripts]
+sampleworks-analysis = "sampleworks.runs.analysis_cli:main"
 sampleworks-guidance = "sampleworks.cli.guidance:main"
 sampleworks-runs = "sampleworks.runs.cli:main"
 
 [tool.hatch.build.targets.wheel.force-include]
+"analyses" = "analyses"
 "experiments" = "experiments"
 
 [tool.hatch.metadata]