From 46d939eb67c67eb010b9075749558cde1997542d Mon Sep 17 00:00:00 2001 From: "Marcus D. Collins" Date: Fri, 13 Mar 2026 15:19:25 -0700 Subject: [PATCH 1/4] feat(grid search args): rename --use-tweedie argument to --step-scaler-type which defaults to noisespace --- run_grid_search.py | 8 +++++++- src/sampleworks/utils/guidance_script_arguments.py | 10 ++++++---- src/sampleworks/utils/guidance_script_utils.py | 14 +++++++++++--- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/run_grid_search.py b/run_grid_search.py index 10aa04e7..2fa6a318 100755 --- a/run_grid_search.py +++ b/run_grid_search.py @@ -483,7 +483,13 @@ def parse_args() -> argparse.Namespace: "--partial-diffusion-step", type=int, default=0, help="Partial diffusion step" ) parser.add_argument("--loss-order", type=int, default=2, help="L1 (1) or L2 (2) loss") - parser.add_argument("--use-tweedie", action="store_true", help="Use Tweedie (pure guidance)") + parser.add_argument( + "--step-scaler-type", + type=str, + default="noisespace", + choices=["dataspace", "noisespace", "none"], + help="Type of step scaler to use (pure guidance only)", + ) parser.add_argument( "--gradient-normalization", action="store_true", diff --git a/src/sampleworks/utils/guidance_script_arguments.py b/src/sampleworks/utils/guidance_script_arguments.py index ed91df40..509c64ae 100644 --- a/src/sampleworks/utils/guidance_script_arguments.py +++ b/src/sampleworks/utils/guidance_script_arguments.py @@ -185,7 +185,7 @@ def populate_config_for_guidance_type(self, job: JobConfig, args: argparse.Names self.ensemble_size = job.ensemble_size else: self.step_size = job.gradient_weight - self.use_tweedie = args.use_tweedie + self.step_scaler_type = args.step_scaler_type self.ensemble_size = job.ensemble_size @@ -246,9 +246,11 @@ def add_generic_args(parser: argparse.ArgumentParser | GuidanceConfig): def add_pure_guidance_args(parser: argparse.ArgumentParser | GuidanceConfig): parser.add_argument("--step-size", type=float, default=0.1, help="Gradient step") parser.add_argument( - "--use-tweedie", - action="store_true", - help="Use Tweedie's formula for gradient computation (enables augmentation/alignment)", + "--step-scaler-type", + type=str, + default="noisespace", + choices=["dataspace", "noisespace", "none"], + help="Type of step scaler to use: dataspace (DataSpaceDPSScaler), noisespace (NoiseSpaceDPSScaler), or none (NoScalingScaler)", ) diff --git a/src/sampleworks/utils/guidance_script_utils.py b/src/sampleworks/utils/guidance_script_utils.py index deea463e..14146b43 100644 --- a/src/sampleworks/utils/guidance_script_utils.py +++ b/src/sampleworks/utils/guidance_script_utils.py @@ -27,6 +27,7 @@ from sampleworks.core.scalers.pure_guidance import PureGuidance from sampleworks.core.scalers.step_scalers import ( DataSpaceDPSScaler, + NoScalingScaler, NoiseSpaceDPSScaler, ) from sampleworks.utils.guidance_constants import ( @@ -435,17 +436,24 @@ def _run_guidance( ) # Create step scaler for gradient-based guidance - use_tweedie = getattr(args, "use_tweedie", False) - if use_tweedie: + step_scaler_type = getattr(args, "step_scaler_type", "noisespace") + if step_scaler_type == "dataspace": step_scaler = DataSpaceDPSScaler( step_size=args.step_size, gradient_normalization=args.gradient_normalization, ) - else: + elif step_scaler_type == "noisespace": step_scaler = NoiseSpaceDPSScaler( step_size=args.step_size, gradient_normalization=args.gradient_normalization, ) + elif step_scaler_type == "none": + step_scaler = NoScalingScaler( + step_size=args.step_size, + gradient_normalization=args.gradient_normalization, + ) + else: + raise ValueError(f"Invalid step_scaler_type: {step_scaler_type}") # TODO: this should be a config option num_steps = 200 From 2c00d8603149682c200b6a2d97cf2ef888e2842c Mon Sep 17 00:00:00 2001 From: "Marcus D. Collins" Date: Fri, 13 Mar 2026 15:58:15 -0700 Subject: [PATCH 2/4] feat(grid search args): rename --models to --model and propagate the change; resolves https://github.com/diff-use/sampleworks/issues/150 --- run_grid_search.py | 156 ++++++++++-------- .../utils/guidance_script_utils.py | 5 +- 2 files changed, 87 insertions(+), 74 deletions(-) diff --git a/run_grid_search.py b/run_grid_search.py index 2fa6a318..38007c7f 100755 --- a/run_grid_search.py +++ b/run_grid_search.py @@ -25,7 +25,7 @@ @dataclass class GridSearchConfig: - models: list[str] + model: str scalers: list[str] ensemble_sizes: list[int] gradient_weights: list[float] @@ -244,9 +244,6 @@ def main(args: argparse.Namespace): log_args(args, gpus) - if len(args.models.split()) > 1: - # this is designed to run one type of model per script, # TODO to allow multiple models - raise ValueError("Multiple --models selected, this is not compatible with the new script!") if len(args.methods.split(",")) > 1: # this is designed to run one type of model per script, # TODO to allow multiple models raise ValueError("Multiple --methods selected, this is not compatible with the new script!") @@ -258,7 +255,7 @@ def main(args: argparse.Namespace): return config = GridSearchConfig( - models=args.models.split(), + model=args.model, scalers=args.scalers.split(), ensemble_sizes=[int(x) for x in args.ensemble_sizes.split()], gradient_weights=[float(x) for x in args.gradient_weights.split()], @@ -284,7 +281,7 @@ def generate_jobs(args: argparse.Namespace) -> list[JobConfig]: jobs = [] proteins = ProteinInput.from_csv(Path(args.proteins)) - models = args.models.split() + model = args.model scalers = args.scalers.split() ensemble_sizes = [int(x) for x in args.ensemble_sizes.split()] gradient_weights = [float(x) for x in args.gradient_weights.split()] @@ -293,54 +290,26 @@ def generate_jobs(args: argparse.Namespace) -> list[JobConfig]: for protein in proteins: structure = protein.structure - density = str(protein.density) # in case patch for Path in qfit.volume doesn't work + density = str(protein.density) resolution = protein.resolution protein_name = protein.name - for model in models: - model_methods = methods if model == StructurePredictor.BOLTZ_2 else [None] - - for method in model_methods: - method_suffix = f"_{method.replace(' ', '_')}" if method else "" - - for scaler in scalers: - if scaler == GuidanceType.FK_STEERING: - for ens in ensemble_sizes: - for gw in gradient_weights: - for gd in gd_steps_list: - output_dir = os.path.join( - args.output_dir, - protein_name, - f"{model}{method_suffix}", - scaler, - f"ens{ens}_gw{gw}_gd{gd}", - ) - log_path = os.path.join(output_dir, "run.log") - jobs.append( - JobConfig( - protein=protein_name, - structure_path=structure, - density_path=density, - resolution=resolution, - model=model, - scaler=scaler, - ensemble_size=ens, - gradient_weight=gw, - gd_steps=gd, - method=method, - output_dir=output_dir, - log_path=log_path, - ) - ) - else: - for ens in ensemble_sizes: - for gw in gradient_weights: + model_methods = methods if model == StructurePredictor.BOLTZ_2 else [] + + for method in model_methods: + method_suffix = f"_{method.replace(' ', '_')}" if method else "" + + for scaler in scalers: + if scaler == GuidanceType.FK_STEERING: + for ens in ensemble_sizes: + for gw in gradient_weights: + for gd in gd_steps_list: output_dir = os.path.join( args.output_dir, protein_name, f"{model}{method_suffix}", scaler, - f"ens{ens}_gw{gw}", + f"ens{ens}_gw{gw}_gd{gd}", ) log_path = os.path.join(output_dir, "run.log") jobs.append( @@ -353,12 +322,39 @@ def generate_jobs(args: argparse.Namespace) -> list[JobConfig]: scaler=scaler, ensemble_size=ens, gradient_weight=gw, - gd_steps=1, + gd_steps=gd, method=method, output_dir=output_dir, log_path=log_path, ) ) + else: + for ens in ensemble_sizes: + for gw in gradient_weights: + output_dir = os.path.join( + args.output_dir, + protein_name, + f"{model}{method_suffix}", + scaler, + f"ens{ens}_gw{gw}", + ) + log_path = os.path.join(output_dir, "run.log") + jobs.append( + JobConfig( + protein=protein_name, + structure_path=structure, + density_path=density, + resolution=resolution, + model=model, + scaler=scaler, + ensemble_size=ens, + gradient_weight=gw, + gd_steps=1, + method=method, + output_dir=output_dir, + log_path=log_path, + ) + ) return jobs @@ -431,16 +427,35 @@ def save_results( def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( - description="Run grid search across models, scalers, and parameters." + description="Run grid search across scalers, and parameters for a single " + "protein structure predictor model." ) - + # Experiment level arguments parser.add_argument( "--proteins", required=True, - help="CSV file with columns: structure,density,resolution,name", + help="CSV file with columns: structure,density,resolution,name" + ) + + # Model arguments + parser.add_argument( + "--model", + default="boltz2", + choices=["boltz2", "protenix", "rf3"], + help="The protein structure predictor model to use" + ) + parser.add_argument( + "--model-checkpoint", + default="", + help="Override the default checkpoint path for the selected model" + ) + parser.add_argument( + "--methods", + default="X-RAY DIFFRACTION", + help="Comma-separated methods for Boltz2", ) - parser.add_argument("--models", default="boltz2 protenix", help="Space-separated models") + # Trajectory scaling arguments parser.add_argument( "--scalers", default="pure_guidance fk_steering", help="Space-separated scalers" ) @@ -450,28 +465,22 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--gradient-weights", default="0.01 0.1 0.2", - help="Space-separated gradient weights", + help="Space-separated gradient weights/step sizes", + ) + parser.add_argument( + "--partial-diffusion-step", type=int, default=0, help="Partial diffusion step" ) parser.add_argument( "--num-gd-steps", default="20", help="Space-separated GD steps (FK steering only)", ) - parser.add_argument("--output-dir", default="./grid_search_results", help="Output directory") - parser.add_argument( - "--model-checkpoint", - default="", - help="Override the default checkpoint path for the selected model", + "--num-particles", type=int, default=3, help="FK steering: num particles" ) parser.add_argument( - "--methods", - default="X-RAY DIFFRACTION", - help="Comma-separated methods for Boltz2", + "--fk-lambda", type=float, default=0.5, help="FK steering: lambda" ) - - parser.add_argument("--num-particles", type=int, default=3, help="FK steering: num particles") - parser.add_argument("--fk-lambda", type=float, default=0.5, help="FK steering: lambda") parser.add_argument( "--fk-resampling-interval", type=int, @@ -479,10 +488,7 @@ def parse_args() -> argparse.Namespace: help="FK steering: resampling interval", ) - parser.add_argument( - "--partial-diffusion-step", type=int, default=0, help="Partial diffusion step" - ) - parser.add_argument("--loss-order", type=int, default=2, help="L1 (1) or L2 (2) loss") + # Step Scaler arguments parser.add_argument( "--step-scaler-type", type=str, @@ -498,13 +504,23 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--augmentation", action="store_true", help="Enable augmentation") parser.add_argument("--align-to-input", action="store_true", help="Align to input structure") + # Reward/Loss function arguments + parser.add_argument("--loss-order", type=int, default=2, help="L1 (1) or L2 (2) loss") + + # Output arguments + parser.add_argument("--output-dir", default="./grid_search_results", help="Output directory") + + # Arguments for choosing what to run and what hardware to use. parser.add_argument( "--max-parallel", default="auto", help="Max parallel jobs (default: auto = number of GPUs)", ) - parser.add_argument("--dry-run", action="store_true", help="Print commands without executing") - + parser.add_argument( + "--dry-run", + action="store_true", + help="Print commands without executing", + ) parser.add_argument( "--force-all", action="store_true", @@ -527,7 +543,7 @@ def parse_args() -> argparse.Namespace: def log_args(args: argparse.Namespace, gpus: list[str]): log.info("=" * 50) log.info("Starting grid search") - log.info(f"Models: {args.models}") + log.info(f"Model: {args.model}") log.info(f"Scalers: {args.scalers}") log.info(f"Ensemble sizes: {args.ensemble_sizes}") log.info(f"Gradient weights: {args.gradient_weights}") diff --git a/src/sampleworks/utils/guidance_script_utils.py b/src/sampleworks/utils/guidance_script_utils.py index 14146b43..bbefb327 100644 --- a/src/sampleworks/utils/guidance_script_utils.py +++ b/src/sampleworks/utils/guidance_script_utils.py @@ -448,10 +448,7 @@ def _run_guidance( gradient_normalization=args.gradient_normalization, ) elif step_scaler_type == "none": - step_scaler = NoScalingScaler( - step_size=args.step_size, - gradient_normalization=args.gradient_normalization, - ) + step_scaler = NoScalingScaler() else: raise ValueError(f"Invalid step_scaler_type: {step_scaler_type}") From d2f1f6ede20c8460ebac9d66e9bde8c1c9f3ef48 Mon Sep 17 00:00:00 2001 From: "Marcus D. Collins" Date: Fri, 13 Mar 2026 16:09:37 -0700 Subject: [PATCH 3/4] feat(grid search args): rename --methods to --method and propagate the change, resolves https://github.com/diff-use/sampleworks/issues/151 --- run_grid_search.py | 92 +++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 50 deletions(-) diff --git a/run_grid_search.py b/run_grid_search.py index 38007c7f..61d26466 100755 --- a/run_grid_search.py +++ b/run_grid_search.py @@ -30,7 +30,7 @@ class GridSearchConfig: ensemble_sizes: list[int] gradient_weights: list[float] gd_steps: list[int] - methods: list[str] + method: str proteins_file: str output_dir: str @@ -244,10 +244,6 @@ def main(args: argparse.Namespace): log_args(args, gpus) - if len(args.methods.split(",")) > 1: - # this is designed to run one type of model per script, # TODO to allow multiple models - raise ValueError("Multiple --methods selected, this is not compatible with the new script!") - filtered_jobs, job_statuses = generate_and_filter_jobs(args) if len(filtered_jobs) == 0: @@ -260,7 +256,7 @@ def main(args: argparse.Namespace): ensemble_sizes=[int(x) for x in args.ensemble_sizes.split()], gradient_weights=[float(x) for x in args.gradient_weights.split()], gd_steps=[int(x) for x in args.num_gd_steps.split()], - methods=[m.strip() for m in args.methods.split(",")], + method=args.method, proteins_file=args.proteins, output_dir=args.output_dir, ) @@ -286,7 +282,6 @@ def generate_jobs(args: argparse.Namespace) -> list[JobConfig]: ensemble_sizes = [int(x) for x in args.ensemble_sizes.split()] gradient_weights = [float(x) for x in args.gradient_weights.split()] gd_steps_list = [int(x) for x in args.num_gd_steps.split()] - methods = [m.strip() for m in args.methods.split(",")] for protein in proteins: structure = protein.structure @@ -294,49 +289,18 @@ def generate_jobs(args: argparse.Namespace) -> list[JobConfig]: resolution = protein.resolution protein_name = protein.name - model_methods = methods if model == StructurePredictor.BOLTZ_2 else [] - - for method in model_methods: - method_suffix = f"_{method.replace(' ', '_')}" if method else "" - - for scaler in scalers: - if scaler == GuidanceType.FK_STEERING: - for ens in ensemble_sizes: - for gw in gradient_weights: - for gd in gd_steps_list: - output_dir = os.path.join( - args.output_dir, - protein_name, - f"{model}{method_suffix}", - scaler, - f"ens{ens}_gw{gw}_gd{gd}", - ) - log_path = os.path.join(output_dir, "run.log") - jobs.append( - JobConfig( - protein=protein_name, - structure_path=structure, - density_path=density, - resolution=resolution, - model=model, - scaler=scaler, - ensemble_size=ens, - gradient_weight=gw, - gd_steps=gd, - method=method, - output_dir=output_dir, - log_path=log_path, - ) - ) - else: - for ens in ensemble_sizes: - for gw in gradient_weights: + method_suffix = f"_{args.method.replace(' ', '_')}" if args.method else "" + for scaler in scalers: + if scaler == GuidanceType.FK_STEERING: + for ens in ensemble_sizes: + for gw in gradient_weights: + for gd in gd_steps_list: output_dir = os.path.join( args.output_dir, protein_name, f"{model}{method_suffix}", scaler, - f"ens{ens}_gw{gw}", + f"ens{ens}_gw{gw}_gd{gd}", ) log_path = os.path.join(output_dir, "run.log") jobs.append( @@ -349,12 +313,39 @@ def generate_jobs(args: argparse.Namespace) -> list[JobConfig]: scaler=scaler, ensemble_size=ens, gradient_weight=gw, - gd_steps=1, - method=method, + gd_steps=gd, + method=args.method, output_dir=output_dir, log_path=log_path, ) ) + else: + for ens in ensemble_sizes: + for gw in gradient_weights: + output_dir = os.path.join( + args.output_dir, + protein_name, + f"{model}{method_suffix}", + scaler, + f"ens{ens}_gw{gw}", + ) + log_path = os.path.join(output_dir, "run.log") + jobs.append( + JobConfig( + protein=protein_name, + structure_path=structure, + density_path=density, + resolution=resolution, + model=model, + scaler=scaler, + ensemble_size=ens, + gradient_weight=gw, + gd_steps=1, + method=args.method, + output_dir=output_dir, + log_path=log_path, + ) + ) return jobs @@ -450,9 +441,10 @@ def parse_args() -> argparse.Namespace: help="Override the default checkpoint path for the selected model" ) parser.add_argument( - "--methods", + "--method", default="X-RAY DIFFRACTION", - help="Comma-separated methods for Boltz2", + choices=["X-RAY DIFFRACTION", "MD"], + help="Method for Boltz2 ('X-RAY DIFFRACTION', 'MD')", ) # Trajectory scaling arguments @@ -548,7 +540,7 @@ def log_args(args: argparse.Namespace, gpus: list[str]): log.info(f"Ensemble sizes: {args.ensemble_sizes}") log.info(f"Gradient weights: {args.gradient_weights}") log.info(f"GD steps: {args.num_gd_steps}") - log.info(f"Boltz2 methods: {args.methods}") + log.info(f"Boltz2 method: {args.method}") log.info(f"Output directory: {args.output_dir}") log.info(f"GPUs: {gpus}") log.info(f"Dry run: {args.dry_run}") From 48f3490c414d65e65e7f00d99f4e94f08eecd0fa Mon Sep 17 00:00:00 2001 From: "Marcus D. Collins" Date: Tue, 17 Mar 2026 11:46:52 -0700 Subject: [PATCH 4/4] fix:add back boltz1 option to run_grid_search.py, correct arguments in run_all_models.sh --- run_all_models.sh | 12 ++++++------ run_grid_search.py | 5 +++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/run_all_models.sh b/run_all_models.sh index c958a7cf..76e9f6a1 100755 --- a/run_all_models.sh +++ b/run_all_models.sh @@ -58,8 +58,8 @@ docker run $DOCKER_OPTS \ diffuseproject/sampleworks:latest \ -e boltz run_grid_search.py \ --proteins "/data/inputs/proteins.csv" \ - --models boltz2 \ - --methods "X-RAY DIFFRACTION" \ + --model boltz2 \ + --method "X-RAY DIFFRACTION" \ --scalers pure_guidance \ --partial-diffusion-step 120 \ --ensemble-sizes "8" \ @@ -80,8 +80,8 @@ docker run $DOCKER_OPTS \ diffuseproject/sampleworks:latest \ -e boltz run_grid_search.py \ --proteins "/data/inputs/proteins.csv" \ - --models boltz2 \ - --methods "MD" \ + --model boltz2 \ + --method "MD" \ --scalers pure_guidance \ --partial-diffusion-step 120 \ --ensemble-sizes "8" \ @@ -102,7 +102,7 @@ docker run $DOCKER_OPTS \ diffuseproject/sampleworks:latest \ -e rf3 run_grid_search.py \ --proteins "/data/inputs/proteins.csv" \ - --models rf3 \ + --model rf3 \ --partial-diffusion-step 120 \ --scalers pure_guidance \ --ensemble-sizes "8" \ @@ -123,7 +123,7 @@ docker run $DOCKER_OPTS \ diffuseproject/sampleworks:latest \ -e protenix run_grid_search.py \ --proteins "/data/inputs/proteins.csv" \ - --models protenix \ + --model protenix \ --scalers pure_guidance \ --partial-diffusion-step 120 \ --ensemble-sizes "8" \ diff --git a/run_grid_search.py b/run_grid_search.py index 61d26466..8aaafa78 100755 --- a/run_grid_search.py +++ b/run_grid_search.py @@ -432,7 +432,7 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--model", default="boltz2", - choices=["boltz2", "protenix", "rf3"], + choices=["boltz1", "boltz2", "protenix", "rf3"], help="The protein structure predictor model to use" ) parser.add_argument( @@ -536,11 +536,12 @@ def log_args(args: argparse.Namespace, gpus: list[str]): log.info("=" * 50) log.info("Starting grid search") log.info(f"Model: {args.model}") + if args.model == "boltz2": + log.info(f"Boltz2 method: {args.method}") log.info(f"Scalers: {args.scalers}") log.info(f"Ensemble sizes: {args.ensemble_sizes}") log.info(f"Gradient weights: {args.gradient_weights}") log.info(f"GD steps: {args.num_gd_steps}") - log.info(f"Boltz2 method: {args.method}") log.info(f"Output directory: {args.output_dir}") log.info(f"GPUs: {gpus}") log.info(f"Dry run: {args.dry_run}")