From 250dbb8eaad9ad1a757f6f692b351912d831c84d Mon Sep 17 00:00:00 2001
From: "Marcus D. Collins" <marcus.collins@astera.org>
Date: Wed, 25 Mar 2026 11:58:50 -0700
Subject: [PATCH 1/3] feat:add tortoize (bb and sc dihedral z-score) script

---
 EVALUATION.md                                 |   0
 scripts/eval/run_and_process_tortoize.py      | 149 ++++++++++++++++++
 ...cif_files.py => patch_output_cif_files.py} |   0
 3 files changed, 149 insertions(+)
 create mode 100644 EVALUATION.md
 create mode 100644 scripts/eval/run_and_process_tortoize.py
 rename scripts/{patch_input_cif_files.py => patch_output_cif_files.py} (100%)

diff --git a/EVALUATION.md b/EVALUATION.md
new file mode 100644
index 00000000..e69de29b
diff --git a/scripts/eval/run_and_process_tortoize.py b/scripts/eval/run_and_process_tortoize.py
new file mode 100644
index 00000000..d8edc4f7
--- /dev/null
+++ b/scripts/eval/run_and_process_tortoize.py
@@ -0,0 +1,149 @@
+import json
+import subprocess
+from pathlib import Path
+from typing import Any
+
+import joblib
+import pandas as pd
+from loguru import logger
+from pandas import DataFrame
+
+from sampleworks.eval.eval_dataclasses import Trial
+from sampleworks.eval.grid_search_eval_utils import parse_eval_args, setup_evaluation_parameters
+
+
+# TODO make more general: https://github.com/diff-use/sampleworks/issues/93
+def main(args) -> None:
+    # check that phenix is installed and available, bail early if not.
+    try:
+        subprocess.call("tortoize", stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    except FileNotFoundError:
+        raise RuntimeError(
+            "tortoize is not available, make sure you have installed it."
+        )
+    # The dropped variable is a list of ProteinConfigs, not used yet in this script
+    all_trials, _ = setup_evaluation_parameters(args)
+
+    # Now loop over trials with joblib and get back tuples of trial level metrics
+    tortoize_results = joblib.Parallel(n_jobs=args.n_jobs)(
+        joblib.delayed(get_stats_for_single_path)(trial.refined_cif_path) for trial in all_trials
+    )
+    if not tortoize_results:
+        logger.error("No trials successfully processed, check that result files are available.")
+        return
+
+    all_residue_results, all_protein_results = tuple(zip(*tortoize_results, strict=True))
+
+    output_file = f"tortoize_residues.csv"
+    pd.concat(all_residue_results).to_csv(output_file, index=False)
+    logger.info(f"Residue results saved to {output_file}")
+
+    output_file = f"tortoize_protein_stats.csv"
+    pd.concat(all_protein_results).to_csv(output_file, index=False)
+    logger.info(f"Protein-level stats saved to {output_file}")
+
+
+def flatten_residues(tortoize_json: dict[str, Any]) -> pd.DataFrame:
+    """
+    Flattens tortoize JSON into one dict per residue across all models.
+    See test/1cbs.json for an example of the tortoize output JSON
+
+    Output keys per residue:
+      - model (e.g. "1")
+      - asymID
+      - compID
+      - seqID
+      - ss_type (taken from ramachandran["ss-type"] if present, else torsion["ss-type"])
+      - ramachandran_z_score (ramachandran["z-score"])
+      - torsion_z_score (torsion["z-score"])
+    """
+    out: list[dict[str, Any]] = []
+
+    # This is possibly over-robust. Claude Sonnet wrote it and I checked/modified it.
+    model_block = tortoize_json.get("model", {})
+    for model_id, model_data in model_block.items():
+        residues = (model_data or {}).get("residues", [])
+        if not residues:
+            logger.warning(f"No residues found in model {model_id}")
+            continue
+        for r in residues:
+            rama = (r or {}).get("ramachandran", {})
+            tors = (r or {}).get("torsion", {})
+            pdb_info = (r or {}).get("pdb", {})
+
+            ss_type: str | None = rama.get("ss-type", None)
+            if ss_type is None:
+                ss_type = tors.get("ss-type", None)
+
+            out.append(
+                {
+                    "model": str(model_id),
+                    "asymID": r.get("asymID", None),
+                    "compID": r.get("compID", None),
+                    "seqID": r.get("seqID", None),
+                    "strandID": pdb_info.get("strandID", None),
+                    "insCode": pdb_info.get("insCode", None),
+                    "ss_type": ss_type,
+                    "ramachandran_z_score": rama.get("z-score", None),
+                    "torsion_z_score": tors.get("z-score", None),
+                }
+            )
+
+    return pd.DataFrame(out)
+
+
+def get_protein_level_z_scores(tortoize_json: dict[str, Any]) -> pd.DataFrame:
+    """
+    Extracts protein-level z-scores for torsion and Ramachandran angles from tortoize JSON output.
+    See test/1cbs.json for an example of the tortoize output JSON
+
+
+    :param tortoize_json:
+    :return: pd.DataFrame with keys:
+      - model (e.g. "1")
+      - ramachandran_z_score
+      - torsion_z_score
+      - ramachandran_jackknife_sd
+      - torsion_jackknife_sd
+      - residue_count
+    """
+
+    out: list[dict[str, Any]] = []
+    model_block = tortoize_json.get("model", {})
+    for model_id, model_data in model_block.items():
+        out.append({
+            "model": str(model_id),
+            "ramachandran_z_score": model_data.get("ramachandran-z", None),
+            "ramachandran_jackknife_sd": model_data.get("ramachandran-jackknife-sd", None),
+            "torsion_z_score": model_data.get("torsion-z", None),
+            "torsion_jackknife_sd": model_data.get("torsion-jackknife-sd", None)
+        })
+    return pd.DataFrame(out)
+
+
+def get_stats_for_single_path(path: Path) -> tuple[DataFrame, DataFrame]:
+    logger.info(f"Processing {path}")
+    try:
+        output = subprocess.check_output(f"tortoize {path}".split())
+        result = json.loads(output)
+    except Exception as e:
+        logger.error(f"Failed to process {path}: {e}")
+        return pd.DataFrame(), pd.DataFrame()
+
+    residues = flatten_residues(result)
+    if residues.empty:
+        logger.warning(f"No residues found in {path}")
+        return pd.DataFrame(), pd.DataFrame()
+
+    residues["path"] = path
+
+    protein_level_stats = get_protein_level_z_scores(result)
+    protein_level_stats["path"] = path
+    return residues, protein_level_stats
+
+
+if __name__ == "__main__":
+    argparse_description = "Crawl the workspace root for CIF files matching "
+    argparse_description += "--target-filename and run tortoize on them."
+    eval_args = parse_eval_args(description=argparse_description)
+    main(eval_args)
diff --git a/scripts/patch_input_cif_files.py b/scripts/patch_output_cif_files.py
similarity index 100%
rename from scripts/patch_input_cif_files.py
rename to scripts/patch_output_cif_files.py

From 3c087f1085629866b9ed6bff0804c065c5f38436 Mon Sep 17 00:00:00 2001
From: "Marcus D. Collins" <marcus.collins@astera.org>
Date: Wed, 25 Mar 2026 12:00:06 -0700
Subject: [PATCH 2/3] fix:add documentation of evaluation scripts, resolves
 https://github.com/diff-use/sampleworks/issues/123

---
 EVALUATION.md                            | 188 +++++++++++++++++++++++
 scripts/eval/run_and_process_tortoize.py |   4 +-
 2 files changed, 190 insertions(+), 2 deletions(-)

diff --git a/EVALUATION.md b/EVALUATION.md
index e69de29b..efa77eed 100644
--- a/EVALUATION.md
+++ b/EVALUATION.md
@@ -0,0 +1,188 @@
+# Evaluation of SampleWorks Grid Search Results
+
+# External software requirements
+## tortoize
+SampleWorks relies on tortoize to compute backbone and sidechain dihedral angle outliers.
+`tortoize` is free software and can be downloaded from https://github.com/PDB-REDO/tortoize.
+You should install it following their instructions and make sure it is available in the environment
+where you run SampleWorks. The script scripts/eval/run_and_process_tortoize.py will check for the
+`tortoize` executable before running and will raise an error if it is not available.
+
+## phenix
+Information about the phenix package can be found at https://phenix-online.org/. Phenix requires a 
+license which is free to academic users. Others may have to pay a fee. Sampleworks makes use of the
+phenix.clashscore command and `run_and_process_phenix_clashscore.py` will check for it before
+running, raising an error if it is not available.
+
+# Running the evaluations
+## Preparing the output CIF files
+As of this writing, Sampleworks outputs CIF files that primarily contain the output atomic 
+coordinates, and not the additional information that many programs, like `tortoize` and 
+`phenix.clashscore`, require. Furthermore, many protein structure predictors effectively 
+renumber residues. Since our metrics are frequently calculated by comparing selections of atoms or 
+residues, we must align to the original _sequence_ of the protein as well. Future versions of 
+Sampleworks will handle these issues automatically. For now, you should run the script
+`scripts/patch_output_cif_files.py`. This will use the original PDB inputs to reconstruct proper 
+output CIF files that are numbered correctly and
+have all necessary metadata to reconstruct the protein structure correctly.
+
+You can run the following command, which assumes:
+- your sampleworks output is stored in `/home/ubuntu/grid_search_results`, 
+- the output is organized by RCSB PDB ID in directories like `/home/ubuntu/grid_search_results/1VME/...`,
+  see the `--rcsb-pattern` argument which is a regex to match the RCSB PDB ID
+- the input PDB cif files are stored in `/home/ubuntu/grid_search_inputs` as required for running the
+  the grid search (see GRID_SEARCH.md)
+- the input PDB cif files are stored in `/home/ubuntu/grid_search_inputs` as, e.g., 
+  `/home/ubuntu/grid_search_inputs/1VME/1VME_original.cif`, see the `--input-pdb-pattern` argument, which
+  is a python format string which must use the `pdb_id` variable to refer to the RCSB PDB ID.
+
+```shell
+pixi run -e analysis python scripts/patch_output_cif_files.py \
+  --input-dir /home/ubuntu/grid_search_results \
+  --rcsb-pattern 'grid_search_results/(.{4})/...' \
+  --cif-pattern 'refined.cif' \
+  --grid-search-input-dir /home/ubuntu/grid_search_inputs \
+  --input-pdb-pattern '{pdb_id}/{pdb_id}_original.cif'
+```
+
+This script searches recursively for all CIF files under the input directory, by default up to 4
+levels deep. If you organize the output more deeply, you can specify the depth with the `--depth`
+argument. It will output a patched CIF files named `refined-patched.cif` along each original `refined.cif`
+file. These `refined-patched.cif` files can be used as input to the remaining evaluation scripts.
+
+## Running the scripts
+The evaluation scripts have a common interface defined by the method 
+`sampleworks.eval.grid_search_eval_utils.parse_eval_args`. The general form of these commands is:
+
+```shell
+pixi run -e analysis python scripts/eval/<script> \
+--grid-search-results-path /home/ubuntu/grid_search_results \
+--grid-search-inputs-path /home/ubuntu/grid_search_inputs \
+--target-filename 'refined-patched.cif' \
+--protein-configs-csv /home/ubuntu/protein_analysis_config.csv \
+--occupancies 0.0 0.25 0.5 0.75 1.0 \
+--n-jobs 16
+```
+The `--occupancies` argument is a list of occupancy values to evaluate, which should correspond to
+what you used in the grid search. 
+
+The `--n-jobs` argument is the number of parallel jobs to run; it is not used by all scripts yet but
+speeds some up considerably, especially for the tortoize and clashscore scripts.
+
+The `--protein-configs-csv` argument is a CSV file describes what parts of each protein to evaluate.
+Examples can be found in `sampleworks/data/`.
+The file has the following columns:
+- `protein`, the PDB id of the protein to evaluate. 
+- `selection`, a semicolon separated list of selections, with a (very) limited PyMOL like algebra 
+or a more complete atomworks-style selection syntax. See examples in the files in 
+`sampleworks/data/`. Selections are only used by the RSCC and LDDT scripts.
+- `structure_pattern`, the filename of the reference PDB file passed to the sampleworks generation script, 
+  probably through an input configuration CSV file if you used the `run_grid_search.py` script. If a 
+  different reference structure was used for different occupancies, you can use the variable `occ_str`
+  in the pattern, which is replaced with the occupancy values one by one.
+- `map_pattern`, Similar to `structure_pattern`, but for the density map files.
+- `base_map_dir` The code assumes the maps for each protein are in their own subdirectory of the inputs 
+directory specified by `grid_search_inputs_path`, e.g., `processed/1VME`.
+
+## Evaluation scripts you can run.
+All evaluation scripts are in the `scripts/eval` directory. 
+
+### `run_and_process_tortoize.py`
+Uses the PDB-REDO tortoize program to compute backbone and sidechain dihedral angle outliers.
+It produces two files in the directory where it is run: 
+- `tortoize_residues.csv`, detailed information about the each residue's backbone (ramachandran) and
+sidechain (\chi_1, \chi_2) angle z-scores. See PDB-REDO tortoize documentation for more details.
+- `tortoize_protein_stats.csv` Protein-level aggregations of the tortoize results.
+
+### `run_and_process_phenix_clashscore.py`
+Uses the phenix.clashscore program to compute the clashscore of the protein. It produces a single
+file in the directory where it is run: `clashscore_metrics.csv`, which contains one row per
+_model_ in the CIF file. (If there are 8 models in the CIF file, that CIF file will have 8 rows in
+the output file.) For each model, we report the clashscore as defined by the phenix.clashscore
+program, and the number of clashes.
+
+### `bond_geometry_eval.py`
+This script computes bond-length and bond-angle outliers for every bond and angle in every model in
+each CIF file. It produces four files:
+- `bond_length_outliers.csv`, one row per outlier bond, with the the CIF file, model number, bond length and the
+  RDKit upper and lower bounds for that type of bond.
+- `bond_angle_outliers.csv`, one row per outlier angle, with the the CIF file and model number, where an angle outlier is
+  defined as having a distance between the two non-central atoms greater than the expected distance
+  based on the angle type and the lengths of the two bonds. It also reports the RDKit upper and lower
+  _distance_ bounds for that type of angle.
+- `bond_length_violation_fractions.csv`, one row per model per CIF file, with the fraction of outlier bonds in that model.
+- `bond_angle_violation_fractions.csv`, one row per model per CIF file, with the fraction of outlier angles in that model.
+
+### `rscc_grid_search_script.py`
+This script computes the real-space (electron density) correlation coefficient (RSCC) for every 
+selection defined in the file passed to `--protein-configs-csv`, for every occupancy defined by 
+`--occupancies`. Model electron density is computed as an average over the models in the CIF file; 
+target electron density comes from the map file defined by the `base_map_dir` column in the 
+`--protein-configs-csv` file and the occupancy. The code takes values from voxels in the maps
+within 2&#x212B; of the selected atoms' centers. It automatically aligns the maps to the original
+protein structure, but for now this requires the original PDB file (extracted again from the protein
+config file.). An example row is:
+
+
+The output file, `rscc_metrics.csv`, contains one row per selection per output CIF.
+
+| column               | description                                                                        | value                                                                                                                   |
+|----------------------|------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------|
+| protein              | an RCSB id                                                                         | 6dur                                                                                                                    |
+| altloc_occupancies  | a dictionary indicating occupancies used in the experiments                        | {'A': 1.0}                                                                                                              |
+| model                | the model used for generation                                                      | boltz2                                                                                                                  |
+| method               | (Boltz-2 only)                                                                     | MD                                                                                                                      |
+| scaler               | the trajectory scaler                                                              | pure_guidance                                                                                                           |
+| ensemble_size        | The number of output models in the `refined_cif_path`                              | 8                                                                                                                       |
+| guidance_weight      |                                                                                    | 0.1                                                                                                                     |
+| gd_steps             | Deprecated.                                                                        |
+| trial_dir            | the output directory used in generation                                            | /data/sampleworks-exp/occ_sweep/grid_search_results/6DUR_1.0occA/boltz2_MD/pure_guidance/ens8_gw0.1                     |
+| refined_cif_path     | the exact CIF file containing generated models                                     | /data/sampleworks-exp/occ_sweep/grid_search_results/6DUR_1.0occA/boltz2_MD/pure_guidance/ens8_gw0.1/refined-patched.cif |
+| protein_dir_name     | the subdirectory in --grid-search-results-path containing results for this protein | 6DUR_1.0occA                                                                                                            |
+| rscc                 | the real-space electron density for the voxels around selected atoms               | 0.16054469603483204                                                                                                     |
+| base_map_path        | The path to the reference/target electron density map used for guidance            | /mnt/diffuse-private/raw/sampleworks/initial_dataset_40_occ_sweeps/processed/6DUR/6DUR_uniform_1.00A.ccp4               |
+| error                | Any error, if the calculation cannot be completed                                  |                                                                                                                         |
+| selection            | The selection string that was used, described above.                               | chain A and resi 13-46                                                                                                  |
+
+
+### `lddt_evaluation_script.py`
+Similarly to the RSCC script, this script computes the LDDT for every selection defined in the file
+passed to `--protein-configs-csv`, for every occupancy defined by `--occupancies`. 
+
+This script produces a single file, `lddt_metrics.csv`, with each row as described in the table below.
+
+The script attempts to assign selections in each of the models in the CIF file to the altlocs defined
+in the input reference structure, using as a 
+psuedo-distance the LDDT scores computed over the selected atoms in each model. 
+In the example below, the CIF file is 
+```shell
+/mnt/diffuse-private/raw/sampleworks/initial_dataset_40/grid_search_results/1VME_native_occ/boltz2_MD/pure_guidance/ens8_gw0.1/refined.cif
+```
+and it has 8 models. The selection is over all atoms that have altlocs in the reference structure (1VME).
+This includes a loop movement, and 5/8 models are closer, in this LDDT sense, to altloc A, while 3/8
+are closer to altloc B. The script then computes a silhouette score for this assumed clustering, 
+which is reported as the `avg_silhouette` score. We also report a psuedo-silhouette score, which is
+a measure of how well the generated conformers match the reference altlocs. 1.0 is a perfect score, 
+and 0.0 indicates a poor clustering. In the example provided, the psuedo-silhouette score is 0.0034,
+indicating that the generated conformers are not well separated and do not reflect the reference altlocs.
+
+| column                | description                                                                                                                                                                | example value                                                                                                                              |
+|-----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------|
+| protein               | an RCSB id                                                                                                                                                                 | 1vme                                                                                                                                       |
+| altloc_occupancies    | ta dictionary indicating occupancies used in the experiments                                                                                                               | {'A': 0.5}                                                                                                                                 |
+| model                 | the model used for structure generation                                                                                                                                    | boltz2                                                                                                                                     |
+| method                | (Boltz 2 only) the particular Boltz training method                                                                                                                        | MD                                                                                                                                         |
+| scaler                | the trajectory scaler used                                                                                                                                                 | pure_guidance                                                                                                                              |
+| ensemble_size         | the number of models in the output cif file                                                                                                                                | 8                                                                                                                                          |
+| guidance_weight       |                                                                                                                                                                            | 0.1                                                                                                                                        |
+| gd_steps              | Deprecated                                                                                                                                                                 |                                                                                                                                            |
+| trial_dir             | the directory containing generation results | /mnt/diffuse-private/raw/sampleworks/initial_dataset_40/grid_search_results/1VME_native_occ/boltz2_MD/pure_guidance/ens8_gw0.1             |
+| refined_cif_path      | the exact CIF file containing generated models                                                                                                                             | /mnt/diffuse-private/raw/sampleworks/initial_dataset_40/grid_search_results/1VME_native_occ/boltz2_MD/pure_guidance/ens8_gw0.1/refined.cif |
+| protein_dir_name      | the subdirectory in --grid-search-results-path containing results for this protein                                                                                         | 1VME_native_occ                                                                                                                            |
+| rscc                  | unused                                                                                                                                                                     |
+| base_map_path         | unused                                                                                                                                                                     |
+| error                 | any error, if the calculation cannot be completed                                                                                                                          |
+| selection             | the selection string that was used, described above (full string will be available, shortened here)                                                                        | chain_id == 'A' and ((res_id == 1) or ... or (res_id >= 316 and res_id <= 324) or (res_id == 326) or (res_id == 373))                      |
+| occupancies           | space-separated list of occupancies of the identified clustered states                                                                                                     | [0.625, 0.375]                                                                                                                             |
+| avg_silhouette        | the average silhoutte score for the clustered conformers/altlocs (see above)                                                                                               | 0.16082633177992867
+| avg_silhouette_to_ref | the psuedo-silhouette score of the generated structures to the reference altlocs in the original generation input                                                          | 0.003392312238469336                                                                                                                      |
diff --git a/scripts/eval/run_and_process_tortoize.py b/scripts/eval/run_and_process_tortoize.py
index d8edc4f7..26ce54ea 100644
--- a/scripts/eval/run_and_process_tortoize.py
+++ b/scripts/eval/run_and_process_tortoize.py
@@ -34,11 +34,11 @@ def main(args) -> None:
 
     all_residue_results, all_protein_results = tuple(zip(*tortoize_results, strict=True))
 
-    output_file = f"tortoize_residues.csv"
+    output_file = "tortoize_residues.csv"
     pd.concat(all_residue_results).to_csv(output_file, index=False)
     logger.info(f"Residue results saved to {output_file}")
 
-    output_file = f"tortoize_protein_stats.csv"
+    output_file = "tortoize_protein_stats.csv"
     pd.concat(all_protein_results).to_csv(output_file, index=False)
     logger.info(f"Protein-level stats saved to {output_file}")
 

From 388e37e7627795263cc900ca05c0a978527bf677 Mon Sep 17 00:00:00 2001
From: "Marcus D. Collins" <marcus.collins@astera.org>
Date: Wed, 25 Mar 2026 15:23:28 -0700
Subject: [PATCH 3/3] f/b on https://github.com/diff-use/sampleworks/pull/186

---
 EVALUATION.md => scripts/eval/EVALUATION.md | 56 +++++++++++----------
 scripts/eval/run_and_process_tortoize.py    | 52 +++++++++++++------
 2 files changed, 67 insertions(+), 41 deletions(-)
 rename EVALUATION.md => scripts/eval/EVALUATION.md (84%)

diff --git a/EVALUATION.md b/scripts/eval/EVALUATION.md
similarity index 84%
rename from EVALUATION.md
rename to scripts/eval/EVALUATION.md
index efa77eed..c129c26e 100644
--- a/EVALUATION.md
+++ b/scripts/eval/EVALUATION.md
@@ -32,9 +32,11 @@ You can run the following command, which assumes:
   see the `--rcsb-pattern` argument which is a regex to match the RCSB PDB ID
 - the input PDB cif files are stored in `/home/ubuntu/grid_search_inputs` as required for running the
   the grid search (see GRID_SEARCH.md)
-- the input PDB cif files are stored in `/home/ubuntu/grid_search_inputs` as, e.g., 
-  `/home/ubuntu/grid_search_inputs/1VME/1VME_original.cif`, see the `--input-pdb-pattern` argument, which
-  is a python format string which must use the `pdb_id` variable to refer to the RCSB PDB ID.
+- the input PDB cif files are stored in `/home/ubuntu/grid_search_inputs` as required for running the
+  the grid search (see GRID_SEARCH.md). The files will have paths like, e.g., 
+  `/home/ubuntu/grid_search_inputs/1VME/1VME_original.cif`. See also the `--input-pdb-pattern` 
+  argument, which is a python format string which must use the `pdb_id` variable to refer to the 
+  RCSB PDB ID.
 
 ```shell
 pixi run -e analysis python scripts/patch_output_cif_files.py \
@@ -135,7 +137,7 @@ The output file, `rscc_metrics.csv`, contains one row per selection per output C
 | scaler               | the trajectory scaler                                                              | pure_guidance                                                                                                           |
 | ensemble_size        | The number of output models in the `refined_cif_path`                              | 8                                                                                                                       |
 | guidance_weight      |                                                                                    | 0.1                                                                                                                     |
-| gd_steps             | Deprecated.                                                                        |
+| gd_steps             | Deprecated.                                                                        | N/A                                                                                                                     |
 | trial_dir            | the output directory used in generation                                            | /data/sampleworks-exp/occ_sweep/grid_search_results/6DUR_1.0occA/boltz2_MD/pure_guidance/ens8_gw0.1                     |
 | refined_cif_path     | the exact CIF file containing generated models                                     | /data/sampleworks-exp/occ_sweep/grid_search_results/6DUR_1.0occA/boltz2_MD/pure_guidance/ens8_gw0.1/refined-patched.cif |
 | protein_dir_name     | the subdirectory in --grid-search-results-path containing results for this protein | 6DUR_1.0occA                                                                                                            |
@@ -153,7 +155,7 @@ This script produces a single file, `lddt_metrics.csv`, with each row as describ
 
 The script attempts to assign selections in each of the models in the CIF file to the altlocs defined
 in the input reference structure, using as a 
-psuedo-distance the LDDT scores computed over the selected atoms in each model. 
+pseudo-distance the LDDT scores computed over the selected atoms in each model. 
 In the example below, the CIF file is 
 ```shell
 /mnt/diffuse-private/raw/sampleworks/initial_dataset_40/grid_search_results/1VME_native_occ/boltz2_MD/pure_guidance/ens8_gw0.1/refined.cif
@@ -161,28 +163,28 @@ In the example below, the CIF file is
 and it has 8 models. The selection is over all atoms that have altlocs in the reference structure (1VME).
 This includes a loop movement, and 5/8 models are closer, in this LDDT sense, to altloc A, while 3/8
 are closer to altloc B. The script then computes a silhouette score for this assumed clustering, 
-which is reported as the `avg_silhouette` score. We also report a psuedo-silhouette score, which is
+which is reported as the `avg_silhouette` score. We also report a pseudo-silhouette score, which is
 a measure of how well the generated conformers match the reference altlocs. 1.0 is a perfect score, 
-and 0.0 indicates a poor clustering. In the example provided, the psuedo-silhouette score is 0.0034,
+and 0.0 indicates a poor clustering. In the example provided, the pseudo-silhouette score is 0.0034,
 indicating that the generated conformers are not well separated and do not reflect the reference altlocs.
 
-| column                | description                                                                                                                                                                | example value                                                                                                                              |
-|-----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------|
-| protein               | an RCSB id                                                                                                                                                                 | 1vme                                                                                                                                       |
-| altloc_occupancies    | ta dictionary indicating occupancies used in the experiments                                                                                                               | {'A': 0.5}                                                                                                                                 |
-| model                 | the model used for structure generation                                                                                                                                    | boltz2                                                                                                                                     |
-| method                | (Boltz 2 only) the particular Boltz training method                                                                                                                        | MD                                                                                                                                         |
-| scaler                | the trajectory scaler used                                                                                                                                                 | pure_guidance                                                                                                                              |
-| ensemble_size         | the number of models in the output cif file                                                                                                                                | 8                                                                                                                                          |
-| guidance_weight       |                                                                                                                                                                            | 0.1                                                                                                                                        |
-| gd_steps              | Deprecated                                                                                                                                                                 |                                                                                                                                            |
-| trial_dir             | the directory containing generation results | /mnt/diffuse-private/raw/sampleworks/initial_dataset_40/grid_search_results/1VME_native_occ/boltz2_MD/pure_guidance/ens8_gw0.1             |
-| refined_cif_path      | the exact CIF file containing generated models                                                                                                                             | /mnt/diffuse-private/raw/sampleworks/initial_dataset_40/grid_search_results/1VME_native_occ/boltz2_MD/pure_guidance/ens8_gw0.1/refined.cif |
-| protein_dir_name      | the subdirectory in --grid-search-results-path containing results for this protein                                                                                         | 1VME_native_occ                                                                                                                            |
-| rscc                  | unused                                                                                                                                                                     |
-| base_map_path         | unused                                                                                                                                                                     |
-| error                 | any error, if the calculation cannot be completed                                                                                                                          |
-| selection             | the selection string that was used, described above (full string will be available, shortened here)                                                                        | chain_id == 'A' and ((res_id == 1) or ... or (res_id >= 316 and res_id <= 324) or (res_id == 326) or (res_id == 373))                      |
-| occupancies           | space-separated list of occupancies of the identified clustered states                                                                                                     | [0.625, 0.375]                                                                                                                             |
-| avg_silhouette        | the average silhoutte score for the clustered conformers/altlocs (see above)                                                                                               | 0.16082633177992867
-| avg_silhouette_to_ref | the psuedo-silhouette score of the generated structures to the reference altlocs in the original generation input                                                          | 0.003392312238469336                                                                                                                      |
+| column                | description                                                                                                       | example value                                                                                                                             |
+|-----------------------|-------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------|
+| protein               | an RCSB id                                                                                                        | 1vme                                                                                                                                      |
+| altloc_occupancies    | ta dictionary indicating occupancies used in the experiments                                                      | {'A': 0.5}                                                                                                                                |
+| model                 | the model used for structure generation                                                                           | boltz2                                                                                                                                    |
+| method                | (Boltz 2 only) the particular Boltz training method                                                               | MD                                                                                                                                        |
+| scaler                | the trajectory scaler used                                                                                        | pure_guidance                                                                                                                             |
+| ensemble_size         | the number of models in the output cif file                                                                       | 8                                                                                                                                         |
+| guidance_weight       |                                                                                                                   | 0.1                                                                                                                                       |
+| gd_steps              | Deprecated                                                                                                        |                                                                                                                                           |
+| trial_dir             | the directory containing generation results                                                                       | /mnt/diffuse-private/raw/sampleworks/initial_dataset_40/grid_search_results/1VME_native_occ/boltz2_MD/pure_guidance/ens8_gw0.1            |
+| refined_cif_path      | the exact CIF file containing generated models                                                                    | /mnt/diffuse-private/raw/sampleworks/initial_dataset_40/grid_search_results/1VME_native_occ/boltz2_MD/pure_guidance/ens8_gw0.1/refined.cif |
+| protein_dir_name      | the subdirectory in --grid-search-results-path containing results for this protein                                | 1VME_native_occ                                                                                                                           |
+| rscc                  | unused                                                                                                            | N/A                                                                                                                                       |
+| base_map_path         | unused                                                                                                            | N/A                                                                                                                                       |
+| error                 | any error, if the calculation cannot be completed                                                                 |                                                                                                                                           |
+| selection             | the selection string that was used, described above (full string will be available, shortened here)               | chain_id == 'A' and ((res_id == 1) or ... or (res_id >= 316 and res_id <= 324) or (res_id == 326) or (res_id == 373))                     |
+| occupancies           | space-separated list of occupancies of the identified clustered states                                            | [0.625, 0.375]                                                                                                                            |
+| avg_silhouette        | the average silhouette score for the clustered conformers/altlocs (see above)                                     | 0.16082633177992867                                                                                                                       |
+| avg_silhouette_to_ref | the pseudo-silhouette score of the generated structures to the reference altlocs in the original generation input | 0.003392312238469336                                                                                                                      |
diff --git a/scripts/eval/run_and_process_tortoize.py b/scripts/eval/run_and_process_tortoize.py
index 26ce54ea..1fbbb849 100644
--- a/scripts/eval/run_and_process_tortoize.py
+++ b/scripts/eval/run_and_process_tortoize.py
@@ -1,3 +1,4 @@
+import argparse
 import json
 import subprocess
 from pathlib import Path
@@ -8,19 +9,27 @@
 from loguru import logger
 from pandas import DataFrame
 
-from sampleworks.eval.eval_dataclasses import Trial
 from sampleworks.eval.grid_search_eval_utils import parse_eval_args, setup_evaluation_parameters
 
 
 # TODO make more general: https://github.com/diff-use/sampleworks/issues/93
-def main(args) -> None:
-    # check that phenix is installed and available, bail early if not.
+def main(args: argparse.Namespace) -> None:
+    """
+    Run tortoize on all trial CIF files and output residue/protein-level CSV stats.
+
+    Parameters
+    ----------
+     args : argparse.Namespace
+         Parsed CLI arguments from sampleworks.eval.grid_search_eval_utils.parse_eval_args().
+    """
+
+    # check that tortoize is installed and available, bail early if not.
     try:
         subprocess.call("tortoize", stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
     except FileNotFoundError:
         raise RuntimeError(
             "tortoize is not available, make sure you have installed it."
-        )
+        ) from None
     # The dropped variable is a list of ProteinConfigs, not used yet in this script
     all_trials, _ = setup_evaluation_parameters(args)
 
@@ -98,14 +107,16 @@ def get_protein_level_z_scores(tortoize_json: dict[str, Any]) -> pd.DataFrame:
     See test/1cbs.json for an example of the tortoize output JSON
 
 
-    :param tortoize_json:
-    :return: pd.DataFrame with keys:
-      - model (e.g. "1")
-      - ramachandran_z_score
-      - torsion_z_score
-      - ramachandran_jackknife_sd
-      - torsion_jackknife_sd
-      - residue_count
+    Parameters
+    ----------
+    tortoize_json : dict[str, Any]
+        Parsed JSON output from tortoize command.
+
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with columns: model, ramachandran_z_score, torsion_z_score,
+        ramachandran_jackknife_sd, torsion_jackknife_sd.
     """
 
     out: list[dict[str, Any]] = []
@@ -122,11 +133,24 @@ def get_protein_level_z_scores(tortoize_json: dict[str, Any]) -> pd.DataFrame:
 
 
 def get_stats_for_single_path(path: Path) -> tuple[DataFrame, DataFrame]:
+    """
+    Run tortoize on a single CIF file and extract residue/protein-level stats.
+
+    Parameters
+    ----------
+    path : Path
+        Path to a CIF file to process.
+
+    Returns
+    -------
+    tuple[DataFrame, DataFrame]
+        (residue_df, protein_df). Both are empty DataFrames on failure.
+    """
     logger.info(f"Processing {path}")
     try:
-        output = subprocess.check_output(f"tortoize {path}".split())
+        output = subprocess.check_output(["tortoize", str(path)])
         result = json.loads(output)
-    except Exception as e:
+    except (subprocess.CalledProcessError, json.JSONDecodeError, OSError) as e:
         logger.error(f"Failed to process {path}: {e}")
         return pd.DataFrame(), pd.DataFrame()