From 861e0822b970eae6eccd1778d63a9307226843d9 Mon Sep 17 00:00:00 2001 From: sfilges Date: Tue, 3 Mar 2026 19:13:17 +0100 Subject: [PATCH 1/8] Fixed config file location during global install --- CHANGELOG.md | 6 + src/plexus/aligner/align.py | 93 +++++++++++----- src/plexus/config.py | 18 +-- src/plexus/data/alignment_parameters.json | 7 ++ src/plexus/data/designer_default_config.json | 105 ++++++++++++++++++ src/plexus/data/designer_lenient_config.json | 105 ++++++++++++++++++ src/plexus/data/nn_model/match.json | 18 +++ src/plexus/data/nn_model/single_mismatch.json | 50 +++++++++ src/plexus/utils/root_dir.py | 3 - src/plexus/version.py | 2 +- tests/test_aligner.py | 20 ++-- 11 files changed, 379 insertions(+), 48 deletions(-) create mode 100644 src/plexus/data/alignment_parameters.json create mode 100644 src/plexus/data/designer_default_config.json create mode 100644 src/plexus/data/designer_lenient_config.json create mode 100644 src/plexus/data/nn_model/match.json create mode 100644 src/plexus/data/nn_model/single_mismatch.json delete mode 100644 src/plexus/utils/root_dir.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e10a55f..66babdd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.0.2] - 2026-03-03 + +### Fixed + +- **Package data files not found in global installs**: Config presets and alignment parameter files were not included in the wheel because they lived outside the Python package at the project root (`config/`). Moved all data files (`designer_default_config.json`, `designer_lenient_config.json`, `alignment_parameters.json`, `nn_model/`) into `src/plexus/data/` and switched `config.py` and `aligner/align.py` from `ROOT_DIR` path concatenation to `importlib.resources.files()`. Removed the now-unused `utils/root_dir.py`. `uv tool install` and `pip install` now work correctly without an editable install. + ## [1.0.1] - 2026-03-03 ### Changed diff --git a/src/plexus/aligner/align.py b/src/plexus/aligner/align.py index e3f11d2..bf5de47 100644 --- a/src/plexus/aligner/align.py +++ b/src/plexus/aligner/align.py @@ -21,12 +21,12 @@ import json from dataclasses import dataclass, field +from importlib.resources import files from itertools import product +from pathlib import Path from loguru import logger -from plexus.utils.root_dir import ROOT_DIR - # ================================================================================ # Define an alignment between two primers # ================================================================================ @@ -91,8 +91,9 @@ def __init__(self, param_path: str | None = None) -> None: # Load parameters if param_path is None: - param_path = f"{ROOT_DIR}/config/alignment_parameters.json" - self.load_parameters(param_path) + self._load_parameters_from_package() + else: + self._load_parameters_from_file(param_path) def set_primers( self, primer1: str, primer2: str, primer1_name: str, primer2_name: str @@ -104,12 +105,46 @@ def set_primers( self.primer2_name = primer2_name self.score = None - def load_parameters(self, param_path: str) -> None: + def _load_parameters_from_package(self) -> None: + """Load alignment parameters from the bundled plexus.data package.""" + cache_key = "__package__" + if cache_key in PrimerDimerPredictor._param_cache: + logger.debug("Using cached alignment parameters from package data") + self.nn_scores, self.end_length, self.end_bonus = ( + PrimerDimerPredictor._param_cache[cache_key] + ) + return + + logger.info("Loading alignment parameters from package data") + data_pkg = files("plexus.data") + + params = json.loads(data_pkg.joinpath("alignment_parameters.json").read_text()) + + match_dt: dict[str, float] = json.loads( + data_pkg.joinpath(params["match_scores"]).read_text() + ) + single_mismatch_dt: dict[str, float] = json.loads( + data_pkg.joinpath(params["single_mismatch_scores"]).read_text() + ) + + self.nn_scores = _build_nn_score_dt( + match_dt, single_mismatch_dt, params["double_mismatch_score"] + ) + self.end_length = params["end_length"] + self.end_bonus = params["end_bonus"] + + PrimerDimerPredictor._param_cache[cache_key] = ( + self.nn_scores, + self.end_length, + self.end_bonus, + ) + + def _load_parameters_from_file(self, param_path: str) -> None: """ - Load parameters necessary for Primer Dimer algorithm, - and set as attributes. Results are cached by path so that - repeated instantiations within the same process only read - the JSON files once. + Load parameters from a user-specified file path. + + Results are cached by path so that repeated instantiations + within the same process only read the JSON files once. Parameters ---------- @@ -125,18 +160,16 @@ def load_parameters(self, param_path: str) -> None: logger.info(f"Loading alignment parameters from: {param_path}") - # Load parameter JSON with open(param_path) as f: params = json.load(f) - # Load nearest neighbour model, these should all be paths + param_dir = str(Path(param_path).parent) self.nn_scores = create_nn_score_dt( - match_json=f"{ROOT_DIR}/{params['match_scores']}", - single_mismatch_json=f"{ROOT_DIR}/{params['single_mismatch_scores']}", + match_json=f"{param_dir}/{params['match_scores']}", + single_mismatch_json=f"{param_dir}/{params['single_mismatch_scores']}", double_mismatch_score=params["double_mismatch_score"], ) - # Load penalties self.end_length = params["end_length"] self.end_bonus = params["end_bonus"] @@ -366,6 +399,23 @@ def get_primer_alignment(self) -> PrimerAlignment: ) +def _build_nn_score_dt( + match_dt: dict[str, float], + single_mismatch_dt: dict[str, float], + double_mismatch_score: float = 0.2, +) -> dict[str, float]: + """Build the nearest-neighbour scoring dict from pre-loaded dicts.""" + nts = ["A", "T", "C", "G"] + nn_score_dt: dict[str, float] = { + "".join(watson) + "/" + "".join(crick): double_mismatch_score + for watson in product(nts, repeat=2) + for crick in product(nts, repeat=2) + } + nn_score_dt.update(match_dt) + nn_score_dt.update(single_mismatch_dt) + return nn_score_dt + + def create_nn_score_dt( match_json: str, single_mismatch_json: str, double_mismatch_score: float = 0.2 ) -> dict[str, float]: @@ -386,22 +436,9 @@ def create_nn_score_dt( dict Dictionary mapping dinucleotide pairs to their scores """ - # Load match and single mismatch .jsons with open(match_json) as f: match_dt: dict[str, float] = json.load(f) with open(single_mismatch_json) as f: single_mismatch_dt: dict[str, float] = json.load(f) - # Set all as double mismatches; then update - nts = ["A", "T", "C", "G"] - nn_score_dt: dict[str, float] = { - "".join(watson) + "/" + "".join(crick): double_mismatch_score - for watson in product(nts, repeat=2) - for crick in product(nts, repeat=2) - } - - # Update - nn_score_dt.update(match_dt) - nn_score_dt.update(single_mismatch_dt) - - return nn_score_dt + return _build_nn_score_dt(match_dt, single_mismatch_dt, double_mismatch_score) diff --git a/src/plexus/config.py b/src/plexus/config.py index d117038..abf1674 100644 --- a/src/plexus/config.py +++ b/src/plexus/config.py @@ -10,14 +10,13 @@ from __future__ import annotations import json +from importlib.resources import files from pathlib import Path from typing import Any, Literal from loguru import logger from pydantic import BaseModel, Field, model_validator -from plexus.utils.root_dir import ROOT_DIR - class SingleplexDesignParameters(BaseModel): """Parameters for individual primer design.""" @@ -467,14 +466,17 @@ def from_preset( ValueError If the preset name is not recognized. """ - if preset == "default": - config_path = Path(ROOT_DIR) / "config" / "designer_default_config.json" - elif preset == "lenient": - config_path = Path(ROOT_DIR) / "config" / "designer_lenient_config.json" - else: + preset_files = { + "default": "designer_default_config.json", + "lenient": "designer_lenient_config.json", + } + if preset not in preset_files: raise ValueError(f"Unknown preset: {preset}. Use 'default' or 'lenient'.") - return cls.from_json_file(config_path) + data = json.loads( + files("plexus.data").joinpath(preset_files[preset]).read_text() + ) + return cls.model_validate(data) def to_dict(self) -> dict[str, Any]: """ diff --git a/src/plexus/data/alignment_parameters.json b/src/plexus/data/alignment_parameters.json new file mode 100644 index 0000000..99e26a3 --- /dev/null +++ b/src/plexus/data/alignment_parameters.json @@ -0,0 +1,7 @@ +{ + "end_length": 4, + "end_bonus": -0.5, + "match_scores": "nn_model/match.json", + "single_mismatch_scores": "nn_model/single_mismatch.json", + "double_mismatch_score": 0.2 +} diff --git a/src/plexus/data/designer_default_config.json b/src/plexus/data/designer_default_config.json new file mode 100644 index 0000000..9fb5a25 --- /dev/null +++ b/src/plexus/data/designer_default_config.json @@ -0,0 +1,105 @@ +{ + "singleplex_design_parameters" : { + "PRIMER_NUM_RETURN": 10, + "PRIMER_OPT_TM": 60.0, + "PRIMER_MIN_TM": 57.0, + "PRIMER_MAX_TM": 63.0, + "PRIMER_OPT_SIZE": 22, + "primer_min_length": 15, + "primer_max_length": 30, + "PRIMER_OPT_BOUND": 98.0, + "PRIMER_MIN_BOUND": 30.0, + "PRIMER_MAX_BOUND": 120.0, + "junction_padding_bases": 3, + "forward_tail": "GGACACTCTTTCCCTACACGACGCTCTTCCGATCTAAAAAAAAAAAAAAAAAAAATGGGAAAGAGTGTCC", + "reverse_tail": "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT", + "primer_length_penalty": 1.0, + "primer_complexity_penalty": 1.0, + "amplicon_length_penalty": 1.0, + "PRIMER_OPT_GC_PERCENT": 50.0, + "primer_min_gc": 30, + "primer_max_gc": 70, + "primer_gc_clamp": 1, + "primer_max_poly_x": 5, + "primer_max_poly_gc": 3, + "primer_max_n": 0, + "PRIMER_MAX_SELF_ANY_TH": 45.0, + "PRIMER_MAX_SELF_END_TH": 35.0, + "PRIMER_MAX_HAIRPIN_TH": 24.0, + "PRIMER_MAX_END_STABILITY": 4.5, + "PRIMER_MAX_TEMPLATE_MISPRIMING_TH": 35.0, + "PRIMER_WT_SIZE_LT": 1.0, + "PRIMER_WT_SIZE_GT": 1.0, + "PRIMER_WT_TM_GT": 1.0, + "PRIMER_WT_TM_LT": 1.0, + "PRIMER_WT_BOUND_GT": 1.0, + "PRIMER_WT_BOUND_LT": 1.0, + "PRIMER_WT_GC_PERCENT_GT": 0.0, + "PRIMER_WT_GC_PERCENT_LT": 0.0, + "PRIMER_WT_SELF_ANY_TH": 1.0, + "PRIMER_WT_SELF_END_TH": 1.0, + "PRIMER_WT_HAIRPIN_TH": 1.0, + "PRIMER_WT_END_STABILITY": 1.0 + }, + "primer_pair_parameters" : { + "PRIMER_PAIR_MAX_DIFF_TM": 3.0, + "PRIMER_PRODUCT_OPT_SIZE": 60, + "PRIMER_PRODUCT_MIN_INSERT_SIZE": 20, + "PRIMER_PRODUCT_MAX_INSERT_SIZE": 60, + "PRIMER_PRODUCT_MAX_SIZE": 120, + "PRIMER_PAIR_WT_PR_PENALTY": 1.0, + "PRIMER_PAIR_WT_DIFF_TM": 0.0, + "PRIMER_PAIR_WT_PRODUCT_SIZE_LT": 0.5, + "PRIMER_PAIR_WT_PRODUCT_SIZE_GT": 2.0 + }, + "pcr_conditions" : { + "annealing_temperature": 60, + "primer_concentration": 50, + "dntp_concentration": 0.8, + "dna_concentration": 50, + "mv_concentration": 50, + "dv_concentration": 1.5, + "dmso_concentration": 0.0, + "dmso_fact": 0.6, + "formamide_concentration": 0.0 + }, + "snp_check_parameters": { + "af_threshold": 0.01, + "snp_penalty_weight": 10.0, + "snp_3prime_window": 5, + "snp_3prime_multiplier": 3.0, + "snp_strict": false, + "snp_af_weight": 1.0 + }, + "blast_parameters": { + "length_threshold": 15, + "evalue_threshold": 10.0, + "max_mismatches": 3, + "three_prime_tolerance": 3, + "blast_evalue": 30000.0, + "blast_word_size": 7, + "blast_reward": 1, + "blast_penalty": -1, + "blast_max_hsps": 100, + "blast_dust": "yes", + "max_amplicon_size": 2000, + "ontarget_tolerance": 5 + }, + "multiplex_picker_parameters": { + "initial_solutions": 100, + "top_solutions_to_keep": 4, + "target_plexity": 24, + "minimum_plexity": 10, + "maximum_plexity": 50, + "plexity_wt_lt": 1.0, + "plexity_wt_gt": 1.0, + "force_plexity": false, + "allow_split_panel": false, + "max_splits": 2, + "wt_pair_penalty": 1.0, + "wt_off_target": 5.0, + "wt_cross_dimer": 1.0, + "wt_pair_dimer": 1.0, + "wt_snp_penalty": 3.0 + } +} diff --git a/src/plexus/data/designer_lenient_config.json b/src/plexus/data/designer_lenient_config.json new file mode 100644 index 0000000..037e371 --- /dev/null +++ b/src/plexus/data/designer_lenient_config.json @@ -0,0 +1,105 @@ +{ + "singleplex_design_parameters" : { + "PRIMER_NUM_RETURN": 10, + "PRIMER_OPT_TM": 60.0, + "PRIMER_MIN_TM": 55.0, + "PRIMER_MAX_TM": 66.0, + "PRIMER_OPT_SIZE": 22, + "primer_min_length": 18, + "primer_max_length": 28, + "PRIMER_OPT_BOUND": 97.0, + "PRIMER_MIN_BOUND": -10.0, + "PRIMER_MAX_BOUND": 110.0, + "junction_padding_bases": 3, + "forward_tail": "GGACACTCTTTCCCTACACGACGCTCTTCCGATCTAAAAAAAAAAAAAAAAAAAATGGGAAAGAGTGTCC", + "reverse_tail": "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT", + "primer_length_penalty": 1.0, + "primer_complexity_penalty": 1.0, + "amplicon_length_penalty": 1.0, + "PRIMER_OPT_GC_PERCENT": 50.0, + "primer_min_gc": 30, + "primer_max_gc": 70, + "primer_gc_clamp": 0, + "primer_max_poly_x": 4, + "primer_max_poly_gc": 3, + "primer_max_n": 0, + "PRIMER_MAX_SELF_ANY_TH": 45.0, + "PRIMER_MAX_SELF_END_TH": 35.0, + "PRIMER_MAX_HAIRPIN_TH": 24.0, + "PRIMER_MAX_END_STABILITY": 4.5, + "PRIMER_MAX_TEMPLATE_MISPRIMING_TH": 35.0, + "PRIMER_WT_SIZE_LT": 1.0, + "PRIMER_WT_SIZE_GT": 1.0, + "PRIMER_WT_TM_GT": 1.0, + "PRIMER_WT_TM_LT": 1.0, + "PRIMER_WT_BOUND_GT": 0.0, + "PRIMER_WT_BOUND_LT": 0.0, + "PRIMER_WT_GC_PERCENT_GT": 0.0, + "PRIMER_WT_GC_PERCENT_LT": 0.0, + "PRIMER_WT_SELF_ANY_TH": 0.0, + "PRIMER_WT_SELF_END_TH": 0.0, + "PRIMER_WT_HAIRPIN_TH": 0.0, + "PRIMER_WT_END_STABILITY": 0.0 + }, + "primer_pair_parameters" : { + "PRIMER_PAIR_MAX_DIFF_TM": 5.0, + "PRIMER_PRODUCT_OPT_SIZE": 60, + "PRIMER_PRODUCT_MIN_INSERT_SIZE": 20, + "PRIMER_PRODUCT_MAX_INSERT_SIZE": 60, + "PRIMER_PRODUCT_MAX_SIZE": 100, + "PRIMER_PAIR_WT_PR_PENALTY": 1.0, + "PRIMER_PAIR_WT_DIFF_TM": 0.0, + "PRIMER_PAIR_WT_PRODUCT_SIZE_LT": 0.5, + "PRIMER_PAIR_WT_PRODUCT_SIZE_GT": 2.0 + }, + "pcr_conditions" : { + "annealing_temperature": 60, + "primer_concentration": 50, + "dntp_concentration": 0.6, + "dna_concentration": 50, + "mv_concentration": 50, + "dv_concentration": 1.5, + "dmso_concentration": 0.0, + "dmso_fact": 0.6, + "formamide_concentration": 0.8 + }, + "snp_check_parameters": { + "af_threshold": 0.01, + "snp_penalty_weight": 10.0, + "snp_3prime_window": 5, + "snp_3prime_multiplier": 3.0, + "snp_strict": false, + "snp_af_weight": 0.5 + }, + "blast_parameters": { + "length_threshold": 15, + "evalue_threshold": 10.0, + "max_mismatches": 3, + "three_prime_tolerance": 3, + "blast_evalue": 30000.0, + "blast_word_size": 7, + "blast_reward": 1, + "blast_penalty": -1, + "blast_max_hsps": 100, + "blast_dust": "yes", + "max_amplicon_size": 2000, + "ontarget_tolerance": 5 + }, + "multiplex_picker_parameters": { + "initial_solutions": 100, + "top_solutions_to_keep": 4, + "target_plexity": 20, + "minimum_plexity": 10, + "maximum_plexity": 50, + "plexity_wt_lt": 1.0, + "plexity_wt_gt": 1.0, + "force_plexity": false, + "allow_split_panel": false, + "max_splits": 2, + "wt_pair_penalty": 1.0, + "wt_off_target": 5.0, + "wt_cross_dimer": 1.0, + "wt_pair_dimer": 0.5, + "wt_snp_penalty": 1.0 + } +} diff --git a/src/plexus/data/nn_model/match.json b/src/plexus/data/nn_model/match.json new file mode 100644 index 0000000..149a993 --- /dev/null +++ b/src/plexus/data/nn_model/match.json @@ -0,0 +1,18 @@ +{ + "AT/TA": -0.88, + "TA/AT": -0.60, + "AA/TT": -1.02, + "TT/AA": -1.02, + "AC/TG": -1.46, + "GT/CA": -1.46, + "CA/GT": -1.46, + "TG/AC": -1.46, + "TC/AG": -1.32, + "GA/CT": -1.32, + "AG/TC": -1.29, + "CT/GA": -1.29, + "CG/GC": -2.17, + "GC/CG": -2.24, + "GG/CC": -1.83, + "CC/GG": -1.83 +} diff --git a/src/plexus/data/nn_model/single_mismatch.json b/src/plexus/data/nn_model/single_mismatch.json new file mode 100644 index 0000000..ccba475 --- /dev/null +++ b/src/plexus/data/nn_model/single_mismatch.json @@ -0,0 +1,50 @@ +{ + "AG/TT": 0.71, + "GT/CG": -0.59, + "CA/GC": 0.75, + "TC/AA": 1.33, + "GC/CT": 0.62, + "AG/TA": 0.02, + "TA/AG": 0.42, + "TA/AA": 0.69, + "AG/TG": -0.13, + "CT/GT": -0.12, + "AT/TG": 0.07, + "TG/AT": 0.43, + "CC/GA": 0.79, + "AC/TT": 0.64, + "GT/CC": 0.98, + "CA/GG": 0.03, + "TG/AA": 0.74, + "AC/TC": 1.33, + "CG/GG": -0.11, + "GT/CT": 0.45, + "CG/GT": -0.47, + "TT/AG": 0.34, + "GA/CC": 0.81, + "AT/TC": 0.73, + "TC/AT": 0.97, + "CG/GA": 0.11, + "AA/TA": 0.61, + "CC/GC": 0.70, + "GG/CG": -1.11, + "TT/AT": 0.68, + "CT/GG": -0.32, + "AA/TC": 0.88, + "GC/CA": 0.47, + "CC/GT": 0.62, + "TT/AC": 0.75, + "GA/CG": -0.25, + "CA/GA": 0.43, + "GC/CC": 0.79, + "TG/AG": 0.44, + "GG/CT": 0.08, + "AC/TA": 0.77, + "TA/AC": 0.92, + "CT/GC": 0.40, + "AA/TG": 0.14, + "GG/CA": -0.52, + "GA/CA": 0.17, + "TC/AC": 1.05, + "AT/TT": 0.69 +} diff --git a/src/plexus/utils/root_dir.py b/src/plexus/utils/root_dir.py deleted file mode 100644 index 38213e5..0000000 --- a/src/plexus/utils/root_dir.py +++ /dev/null @@ -1,3 +0,0 @@ -from pathlib import Path - -ROOT_DIR = Path(__file__).absolute().parent.parent.parent.parent diff --git a/src/plexus/version.py b/src/plexus/version.py index 5c4105c..7863915 100644 --- a/src/plexus/version.py +++ b/src/plexus/version.py @@ -1 +1 @@ -__version__ = "1.0.1" +__version__ = "1.0.2" diff --git a/tests/test_aligner.py b/tests/test_aligner.py index 51ff7d1..1329b0f 100644 --- a/tests/test_aligner.py +++ b/tests/test_aligner.py @@ -2,6 +2,8 @@ from __future__ import annotations +from importlib.resources import as_file, files + import pytest from plexus.aligner.align import ( @@ -9,14 +11,12 @@ PrimerDimerPredictor, create_nn_score_dt, ) -from plexus.utils.root_dir import ROOT_DIR # --------------------------------------------------------------------------- # Shared paths (resolved once at module level) # --------------------------------------------------------------------------- -_MATCH_JSON = f"{ROOT_DIR}/config/nn_model/match.json" -_SINGLE_MM_JSON = f"{ROOT_DIR}/config/nn_model/single_mismatch.json" +_DATA_PKG = files("plexus.data") _DOUBLE_MM_SCORE = 0.2 # All 16 Watson-Crick dinucleotide pairs present in match.json @@ -50,11 +50,15 @@ class TestCreateNnScoreDict: @pytest.fixture(scope="class") def nn_scores(self) -> dict[str, float]: - return create_nn_score_dt( - match_json=_MATCH_JSON, - single_mismatch_json=_SINGLE_MM_JSON, - double_mismatch_score=_DOUBLE_MM_SCORE, - ) + with ( + as_file(_DATA_PKG / "nn_model" / "match.json") as match_path, + as_file(_DATA_PKG / "nn_model" / "single_mismatch.json") as mm_path, + ): + return create_nn_score_dt( + match_json=str(match_path), + single_mismatch_json=str(mm_path), + double_mismatch_score=_DOUBLE_MM_SCORE, + ) def test_all_16_match_pairs_present(self, nn_scores): """All 16 Watson-Crick dinucleotide pairs appear as keys.""" From 5a7b4231dbbe7030e120e8554948bf0470ee32a9 Mon Sep 17 00:00:00 2001 From: sfilges Date: Wed, 4 Mar 2026 08:26:05 +0100 Subject: [PATCH 2/8] Retain all tied least affected pais for off-targets and SNPS --- CHANGELOG.md | 4 ++++ src/plexus/blast/specificity.py | 15 +++++++++------ src/plexus/snpcheck/checker.py | 12 +++++++----- tests/test_blast_specificity.py | 19 ++++++++++++++++++- tests/test_snpcheck.py | 20 +++++++++++++++++++- 5 files changed, 57 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 66babdd..99b9af3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.0.2] - 2026-03-03 +### Changed + +- **SNP and off-target filters now retain all tied least-affected pairs** (`snpcheck/checker.py`, `blast/specificity.py`): When all primer pairs for a junction overlap SNPs or have off-target products, the filters now keep every pair tied at the minimum count instead of arbitrarily picking one. This lets the downstream selector evaluate tied candidates on other properties (Tm, GC%, pair penalty, etc.). + ### Fixed - **Package data files not found in global installs**: Config presets and alignment parameter files were not included in the wheel because they lived outside the Python package at the project root (`config/`). Moved all data files (`designer_default_config.json`, `designer_lenient_config.json`, `alignment_parameters.json`, `nn_model/`) into `src/plexus/data/` and switched `config.py` and `aligner/align.py` from `ROOT_DIR` path concatenation to `importlib.resources.files()`. Removed the now-unused `utils/root_dir.py`. `uv tool install` and `pip install` now work correctly without an editable install. diff --git a/src/plexus/blast/specificity.py b/src/plexus/blast/specificity.py index ccf4f2b..2d8af82 100644 --- a/src/plexus/blast/specificity.py +++ b/src/plexus/blast/specificity.py @@ -212,15 +212,18 @@ def filter_offtarget_pairs(panel: MultiplexPanel) -> tuple[int, list[str]]: f"with off-target products, {len(clean)} clean pair(s) remain" ) else: - # All pairs have off-targets — keep the least affected one - best = min(junction.primer_pairs, key=lambda p: len(p.off_target_products)) - removed = len(junction.primer_pairs) - 1 - junction.primer_pairs = [best] + # All pairs have off-targets — keep all with the fewest + min_ot = min(len(p.off_target_products) for p in junction.primer_pairs) + best_pairs = [ + p for p in junction.primer_pairs if len(p.off_target_products) == min_ot + ] + removed = len(junction.primer_pairs) - len(best_pairs) + junction.primer_pairs = best_pairs fallback_junctions.append(junction.name) logger.warning( f"Junction {junction.name}: all pairs have off-target products; " - f"keeping pair {best.pair_id} with fewest " - f"off-targets={len(best.off_target_products)}" + f"keeping {len(best_pairs)} pair(s) with fewest " + f"off-targets={min_ot}" ) total_removed += removed diff --git a/src/plexus/snpcheck/checker.py b/src/plexus/snpcheck/checker.py index 2bd5b98..6ad7499 100644 --- a/src/plexus/snpcheck/checker.py +++ b/src/plexus/snpcheck/checker.py @@ -274,14 +274,16 @@ def filter_snp_pairs(panel: MultiplexPanel) -> tuple[int, list[str]]: f"overlapping SNPs, {len(clean)} clean pair(s) remain" ) else: - # All pairs have SNPs — keep the least affected one - best = min(junction.primer_pairs, key=lambda p: p.snp_count) - removed = len(junction.primer_pairs) - 1 - junction.primer_pairs = [best] + # All pairs have SNPs — keep all with the lowest snp_count + min_snps = min(p.snp_count for p in junction.primer_pairs) + best_pairs = [p for p in junction.primer_pairs if p.snp_count == min_snps] + removed = len(junction.primer_pairs) - len(best_pairs) + junction.primer_pairs = best_pairs fallback_junctions.append(junction.name) logger.warning( f"Junction {junction.name}: all pairs overlap SNPs; " - f"keeping pair {best.pair_id} with lowest snp_count={best.snp_count}" + f"keeping {len(best_pairs)} pair(s) with lowest " + f"snp_count={min_snps}" ) total_removed += removed diff --git a/tests/test_blast_specificity.py b/tests/test_blast_specificity.py index 07b230e..76c74ea 100644 --- a/tests/test_blast_specificity.py +++ b/tests/test_blast_specificity.py @@ -513,6 +513,23 @@ def test_all_dirty_fallback_keeps_least_affected(self): assert fallbacks == ["J1"] assert panel.junctions[0].primer_pairs == [least] + def test_all_dirty_fallback_keeps_all_tied(self): + """When all pairs have off-targets, ALL with fewest are kept.""" + tied_a = self._make_pair("P1", off_targets=1) + worst = self._make_pair("P2", off_targets=5) + tied_b = self._make_pair("P3", off_targets=1) + + panel = MagicMock(spec=MultiplexPanel) + panel.junctions = [ + self._make_junction("J1", [tied_a, worst, tied_b]), + ] + + removed, fallbacks = filter_offtarget_pairs(panel) + + assert removed == 1 + assert fallbacks == ["J1"] + assert panel.junctions[0].primer_pairs == [tied_a, tied_b] + def test_empty_junction_no_crash(self): """A junction with no primer pairs is skipped gracefully.""" panel = MagicMock(spec=MultiplexPanel) @@ -541,7 +558,7 @@ def test_multiple_junctions(self): removed, fallbacks = filter_offtarget_pairs(panel) - assert removed == 2 # 1 from J1, 1 from J2 + assert removed == 2 # 1 from J1, 1 from J2 (no ties in J2) assert fallbacks == ["J2"] assert panel.junctions[0].primer_pairs == [j1_clean] assert panel.junctions[1].primer_pairs == [j2_least] diff --git a/tests/test_snpcheck.py b/tests/test_snpcheck.py index f8efc06..08c8198 100644 --- a/tests/test_snpcheck.py +++ b/tests/test_snpcheck.py @@ -688,6 +688,24 @@ def test_keeps_least_affected_when_all_dirty(self): assert len(panel.junctions[0].primer_pairs) == 1 assert panel.junctions[0].primer_pairs[0].pair_id == "pair_b" + def test_keeps_all_tied_least_affected_when_all_dirty(self): + """When all pairs have SNPs, keep ALL with the lowest snp_count.""" + pair_a = _make_pair(pair_id="pair_a", snp_count=1) + pair_b = _make_pair(pair_id="pair_b", snp_count=3) + pair_c = _make_pair(pair_id="pair_c", snp_count=1) + junction = _make_junction() + junction.name = "J_TIED" + junction.primer_pairs = [pair_a, pair_b, pair_c] + panel = _make_panel([junction]) + + removed, fallback = filter_snp_pairs(panel) + + assert removed == 1 + assert fallback == ["J_TIED"] + assert len(panel.junctions[0].primer_pairs) == 2 + ids = {p.pair_id for p in panel.junctions[0].primer_pairs} + assert ids == {"pair_a", "pair_c"} + def test_no_pairs_skipped(self): """Junction with no primer pairs -> no error, zero removed.""" junction = _make_junction() @@ -709,7 +727,7 @@ def test_returns_removal_count(self): _make_pair(pair_id="j1_dirty1", snp_count=1), _make_pair(pair_id="j1_dirty2", snp_count=3), ] - # Junction 2: all dirty (3 pairs) -> 2 removed (keep best) + # Junction 2: all dirty (3 pairs, no ties) -> 2 removed (keep best) j2 = _make_junction() j2.name = "J2" j2.primer_pairs = [ From e142d81cb79bf6883c6f9dcc63d8045368122aa0 Mon Sep 17 00:00:00 2001 From: sfilges Date: Wed, 4 Mar 2026 08:44:20 +0100 Subject: [PATCH 3/8] Updated config --- config/designer_default_config.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/designer_default_config.json b/config/designer_default_config.json index 9fb5a25..0c2cbf7 100644 --- a/config/designer_default_config.json +++ b/config/designer_default_config.json @@ -8,8 +8,8 @@ "primer_min_length": 15, "primer_max_length": 30, "PRIMER_OPT_BOUND": 98.0, - "PRIMER_MIN_BOUND": 30.0, - "PRIMER_MAX_BOUND": 120.0, + "PRIMER_MIN_BOUND": 25.0, + "PRIMER_MAX_BOUND": 110.0, "junction_padding_bases": 3, "forward_tail": "GGACACTCTTTCCCTACACGACGCTCTTCCGATCTAAAAAAAAAAAAAAAAAAAATGGGAAAGAGTGTCC", "reverse_tail": "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT", From 1ce9350baeac20a1f63f9bc8dadecea821aabd6e Mon Sep 17 00:00:00 2001 From: sfilges Date: Wed, 4 Mar 2026 09:21:14 +0100 Subject: [PATCH 4/8] Improved CLI UI experience --- CHANGELOG.md | 7 +- data/design_regions.csv | 5 - data/junctions.csv | 24 +- src/plexus/cli.py | 1 + src/plexus/designer/design.py | 21 +- src/plexus/logging.py | 37 +- src/plexus/orchestrator.py | 45 +- src/plexus/pipeline.py | 766 +++++++++++++++++++-------------- src/plexus/snpcheck/checker.py | 6 + tests/test_design.py | 2 +- 10 files changed, 548 insertions(+), 366 deletions(-) delete mode 100644 data/design_regions.csv diff --git a/CHANGELOG.md b/CHANGELOG.md index 99b9af3..06c44e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,17 +5,18 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [1.0.2] - 2026-03-03 +## [1.0.2] - 04-03-2026 ### Changed +- **Rich progress bars replace log output during pipeline runs**: The CLI now shows clean Rich progress bars on stderr (step-level + per-junction detail for primer design and SNP check) instead of a wall of log messages. All detailed logs still go to the file. Warnings and errors are printed above the progress bar. Multi-panel parallel mode shows a panel-level progress bar. Progress bars are only active when stderr is a TTY; non-interactive runs behave as before. - **SNP and off-target filters now retain all tied least-affected pairs** (`snpcheck/checker.py`, `blast/specificity.py`): When all primer pairs for a junction overlap SNPs or have off-target products, the filters now keep every pair tied at the minimum count instead of arbitrarily picking one. This lets the downstream selector evaluate tied candidates on other properties (Tm, GC%, pair penalty, etc.). ### Fixed - **Package data files not found in global installs**: Config presets and alignment parameter files were not included in the wheel because they lived outside the Python package at the project root (`config/`). Moved all data files (`designer_default_config.json`, `designer_lenient_config.json`, `alignment_parameters.json`, `nn_model/`) into `src/plexus/data/` and switched `config.py` and `aligner/align.py` from `ROOT_DIR` path concatenation to `importlib.resources.files()`. Removed the now-unused `utils/root_dir.py`. `uv tool install` and `pip install` now work correctly without an editable install. -## [1.0.1] - 2026-03-03 +## [1.0.1] - 03-03-2026 ### Changed @@ -36,7 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **3'-end tolerance for BLAST annotation** (`annotator.py`, `config.py`, `specificity.py`, `pipeline.py`): New `three_prime_tolerance` parameter (default 3) relaxes the `from_3prime` check from `qend == qlen` to `qlen - qend <= tolerance`. BLAST's local alignment clips terminal mismatches, causing hits like the DCAF12L1 reverse primer (19/21bp aligned, 2bp clipped at 3' end) to be wrongly discarded as "not 3'-anchored" even though Primer-BLAST detects them via semi-global alignment. - Tests for `find_max_poly_gc`, `check_kmer` poly-GC integration, BLAST evalue/reward/penalty/word_size parameter forwarding, and specificity check threading. -## [1.0.0] - 2026-03-03 +## [1.0.0] - 03-03-2026 First stable release. All v1.0 roadmap items complete — see `docs/ROADMAP.md` for the full list. Includes correctness fixes (BLAST annotation, coordinate handling, off-target filtering), diff --git a/data/design_regions.csv b/data/design_regions.csv deleted file mode 100644 index 5c1ab15..0000000 --- a/data/design_regions.csv +++ /dev/null @@ -1,5 +0,0 @@ -Name,Chrom,Five_Prime_Coordinate,Three_Prime_Coordinate -EGFR_T790M,chr7,55181378,55181378 -KRAS_G12D,chr12,25245350,25245350 -KRAS_G13R,chr12,25245348,25245349 -BRAF_V600E,chr7,140753336,140753336 diff --git a/data/junctions.csv b/data/junctions.csv index 58c7ae2..5c1ab15 100644 --- a/data/junctions.csv +++ b/data/junctions.csv @@ -1,21 +1,5 @@ Name,Chrom,Five_Prime_Coordinate,Three_Prime_Coordinate -HOXA2_p.V274F,chr7,27101037,27101037 -BRAF_p.L485W ,chr7,140778054,140778054 -CLTCL1_p.R354H,chr22,19234615,19234615 -TTN_p.E8395K ,chr2,178715052,178715052 -FEM1B_,chr2,177216892,177216892 -PDCL3_p.E49D,chr2,100568944,100568944 -DCDC1_p.T1379fs*2,chr11,30905134,30905134 -KIF22_p.Q88K,chr16,29798664,29798664 -MAMDC4_p.G1071E,chr9,136859904,136859904 -WLS_p.I360N ,chr1,68144579,68144579 -RPA1_p.R31H,chr17,1843927,1843927 -ARL6_p.T181I,chr3,97791959,97791959 -NOLC1_p.I687V,chr10,102162228,102162228 -GOLGA2_p.R322W,chr9,128262652,128262652 -CRTC1_p.P334L ,chr19,18765518,18765518 -FEM1B_c.249-5T>C,chr15,68289602,68289602 -UNK_p.R77fs*103,chr17,75809883,75809883 -CLTCL1_p.R354H,chr22,19234615,19234615 -MBP_p.G56R,chr18,77017242,77017242 -ZNF729_p.C1134Y,chr19,22316818,22316818 +EGFR_T790M,chr7,55181378,55181378 +KRAS_G12D,chr12,25245350,25245350 +KRAS_G13R,chr12,25245348,25245349 +BRAF_V600E,chr7,140753336,140753336 diff --git a/src/plexus/cli.py b/src/plexus/cli.py index f13ea66..c5821a9 100644 --- a/src/plexus/cli.py +++ b/src/plexus/cli.py @@ -346,6 +346,7 @@ def run( debug=debug, fasta_sha256=fasta_sha256, snp_vcf_sha256=snp_vcf_sha256, + show_progress=True, ) if isinstance(result, MultiPanelResult): diff --git a/src/plexus/designer/design.py b/src/plexus/designer/design.py index 0248243..459d22d 100644 --- a/src/plexus/designer/design.py +++ b/src/plexus/designer/design.py @@ -2,6 +2,8 @@ # Primer design module — "plexus" design algorithm # ================================================================================ +from __future__ import annotations + import warnings from loguru import logger @@ -20,19 +22,24 @@ # ================================================================================ -def design_primers(panel: MultiplexPanel, method: str = "plexus") -> MultiplexPanel: +def design_primers( + panel: MultiplexPanel, + method: str = "plexus", + on_junction_done: callable | None = None, +) -> MultiplexPanel: """ Wrapper function to call the design algorithm. Args: panel: An instantiated MultiplexPanel object created with panel_factory. method: Design algorithm to use; defaults to "plexus". + on_junction_done: Optional callback invoked after each junction is processed. Returns: A MultiplexPanel object with primer designs. """ if method == "plexus": - return design_multiplex_primers(panel) + return design_multiplex_primers(panel, on_junction_done=on_junction_done) raise ValueError(f"Unknown design method: {method}") @@ -41,12 +48,16 @@ def design_primers(panel: MultiplexPanel, method: str = "plexus") -> MultiplexPa # ================================================================================ -def design_multiplex_primers(panel: MultiplexPanel) -> MultiplexPanel: +def design_multiplex_primers( + panel: MultiplexPanel, + on_junction_done: callable | None = None, +) -> MultiplexPanel: """ A function that picks individual primers left and right of the provided junctions. Args: panel: A MultiplexPanel object with loaded junctions + on_junction_done: Optional callback invoked after each junction is processed. Returns: A MultiplexPanel object containing the left and right primer designs for each junction. @@ -206,7 +217,9 @@ def design_multiplex_primers(panel: MultiplexPanel) -> MultiplexPanel: ) junction.primer_pairs = [] junction._design_error = str(e) - continue + finally: + if on_junction_done: + on_junction_done() # Separate failed junctions (no primer pairs) failed = [jn for jn in panel.junctions if not jn.primer_pairs] diff --git a/src/plexus/logging.py b/src/plexus/logging.py index 331e92a..4b09d56 100644 --- a/src/plexus/logging.py +++ b/src/plexus/logging.py @@ -7,10 +7,27 @@ from loguru import logger + +class _ConsoleFilter: + """Togglable filter for the stderr log handler. + + When disabled, INFO messages are suppressed on the console while + Rich progress bars are active. File logging is unaffected. + """ + + def __init__(self): + self.enabled = True + + def __call__(self, record): + return self.enabled + + +_console_filter = _ConsoleFilter() + # Remove default handler logger.remove() -# Add console handler with colored output +# Add console handler with colored output (filtered via _console_filter) logger.add( sys.stderr, format=( @@ -19,9 +36,20 @@ ), level="INFO", colorize=True, + filter=_console_filter, ) +def suppress_console_logging(): + """Disable the stderr log handler (used while progress bars are active).""" + _console_filter.enabled = False + + +def restore_console_logging(): + """Re-enable the stderr log handler.""" + _console_filter.enabled = True + + def configure_file_logging(log_dir: str = ".", debug: bool = False) -> str: """ Configure file logging with a timestamped log file. @@ -50,4 +78,9 @@ def configure_file_logging(log_dir: str = ".", debug: bool = False) -> str: # Re-export logger for convenient imports -__all__ = ["logger", "configure_file_logging"] +__all__ = [ + "logger", + "configure_file_logging", + "suppress_console_logging", + "restore_console_logging", +] diff --git a/src/plexus/orchestrator.py b/src/plexus/orchestrator.py index 8f5bd5d..d079674 100644 --- a/src/plexus/orchestrator.py +++ b/src/plexus/orchestrator.py @@ -12,12 +12,21 @@ import json import shutil +import sys from concurrent.futures import ProcessPoolExecutor, as_completed from pathlib import Path from typing import Any import pandas as pd from loguru import logger +from rich.progress import ( + BarColumn, + MofNCompleteColumn, + Progress, + SpinnerColumn, + TextColumn, + TimeElapsedColumn, +) from plexus.pipeline import MultiPanelResult, PipelineResult, run_pipeline @@ -125,6 +134,7 @@ def run_multi_panel( output_dir: str | Path = "./output", parallel: bool = False, max_workers: int | None = None, + show_progress: bool = False, **pipeline_kwargs: Any, ) -> MultiPanelResult | PipelineResult: """ @@ -171,6 +181,7 @@ def run_multi_panel( input_file=input_file, fasta_file=fasta_file, output_dir=output_dir, + show_progress=show_progress, **pipeline_kwargs, ) @@ -192,11 +203,17 @@ def run_multi_panel( if parallel: workers = max_workers or len(panels) logger.info(f"Running {len(panels)} panels in parallel (workers={workers})") + + # Panel-level progress bar for parallel mode (no per-step bars + # because subprocesses would fight over stderr). + _use_panel_bar = show_progress and sys.stderr.isatty() + with ProcessPoolExecutor(max_workers=workers) as executor: futures = {} for panel_id, csv_path in panel_csvs.items(): kw = dict(pipeline_kwargs) kw["panel_name"] = panel_id + # Do NOT forward show_progress to child processes future = executor.submit( _run_single_panel, panel_id=panel_id, @@ -207,11 +224,28 @@ def run_multi_panel( ) futures[future] = panel_id - for future in as_completed(futures): - pid = futures[future] - _, result = future.result() - results[pid] = result - logger.info(f"Panel '{pid}' completed.") + if _use_panel_bar: + progress = Progress( + SpinnerColumn(), + TextColumn("[bold blue]{task.description}"), + BarColumn(), + MofNCompleteColumn(), + TimeElapsedColumn(), + ) + task = progress.add_task("Running panels", total=len(panels)) + with progress: + for future in as_completed(futures): + pid = futures[future] + _, result = future.result() + results[pid] = result + logger.info(f"Panel '{pid}' completed.") + progress.advance(task) + else: + for future in as_completed(futures): + pid = futures[future] + _, result = future.result() + results[pid] = result + logger.info(f"Panel '{pid}' completed.") else: logger.info(f"Running {len(panels)} panels sequentially.") for panel_id, csv_path in panel_csvs.items(): @@ -222,6 +256,7 @@ def run_multi_panel( panel_csv=csv_path, fasta_file=fasta_file, output_dir=output_dir, + show_progress=show_progress, **kw, ) results[panel_id] = result diff --git a/src/plexus/pipeline.py b/src/plexus/pipeline.py index eb8cbb5..dd73c40 100644 --- a/src/plexus/pipeline.py +++ b/src/plexus/pipeline.py @@ -14,15 +14,29 @@ from __future__ import annotations +import sys +from contextlib import contextmanager from dataclasses import dataclass, field from pathlib import Path from loguru import logger +from rich.progress import ( + BarColumn, + MofNCompleteColumn, + Progress, + SpinnerColumn, + TextColumn, + TimeElapsedColumn, +) from plexus.config import DesignerConfig, load_config from plexus.designer.design import design_primers from plexus.designer.multiplexpanel import MultiplexPanel, panel_factory -from plexus.logging import configure_file_logging +from plexus.logging import ( + configure_file_logging, + restore_console_logging, + suppress_console_logging, +) from plexus.utils.env import ( check_disk_space, get_missing_tools, @@ -195,6 +209,83 @@ def _resolve_source_vcf( return get_registered_snp_vcf(genome) +_STEP_LABELS = [ + "Creating panel", + "Designing primers", + "Saving candidates", + "SNP check", + "BLAST specificity check", + "Multiplex optimization", + "Saving results", +] + + +@contextmanager +def _progress_context(enabled: bool): + """Yield a (Progress | None, advance_step, sub_task_ctx) tuple. + + When *enabled* is False the progress object is None and the helpers are + no-ops, so the caller doesn't need conditionals everywhere. + """ + if not enabled or not sys.stderr.isatty(): + # No-op helpers + def _noop_advance(label=None): + pass + + @contextmanager + def _noop_sub(label, total=None): + yield lambda: None + + yield None, _noop_advance, _noop_sub + return + + suppress_console_logging() + + progress = Progress( + SpinnerColumn(), + TextColumn("[bold blue]{task.description}"), + BarColumn(), + MofNCompleteColumn(), + TimeElapsedColumn(), + transient=False, + ) + + # Temporary WARNING+ sink that prints through the progress console + def _warning_sink(message): + progress.console.print(message, end="") + + warning_handler_id = logger.add(_warning_sink, level="WARNING", format="{message}") + + overall = progress.add_task("Starting…", total=len(_STEP_LABELS)) + _step_idx = 0 + + def advance_step(label=None): + nonlocal _step_idx + _step_idx += 1 + desc = ( + f"[{_step_idx}/{len(_STEP_LABELS)}] {label}" + if label + else f"Step {_step_idx}/{len(_STEP_LABELS)}" + ) + progress.update(overall, advance=1, description=desc) + + @contextmanager + def sub_task(label, total=None): + """Context manager for a per-junction sub-task bar.""" + tid = progress.add_task(label, total=total) + try: + yield lambda: progress.advance(tid) + finally: + progress.remove_task(tid) + + try: + with progress: + yield progress, advance_step, sub_task + finally: + logger.remove(warning_handler_id) + restore_console_logging() + + def run_pipeline( input_file: str | Path, fasta_file: str | Path, @@ -216,6 +307,7 @@ def run_pipeline( debug: bool = False, fasta_sha256: str | None = None, snp_vcf_sha256: str | None = None, + show_progress: bool = False, ) -> PipelineResult: """ Run the complete multiplex primer design pipeline. @@ -245,6 +337,9 @@ def run_pipeline( selector : str Multiplex selector algorithm. One of "Greedy", "Random", "BruteForce", "SimulatedAnnealing", or "DFS" (default: "Greedy"). + show_progress : bool + If True and stderr is a TTY, show Rich progress bars instead of + log output on the console (default: False). Returns ------- @@ -373,370 +468,389 @@ def run_pipeline( _exc: BaseException | None = None try: - # Enable file logging to output directory - log_file = configure_file_logging(str(output_dir), debug=debug) - logger.info(f"Log file: {log_file}") - logger.info(f"Provenance written to {provenance_path}") - - # ========================================================================= - # Step 1: Create panel and load junctions - # ========================================================================= - logger.info(f"Creating panel '{panel_name}' with {genome} reference...") - - panel = panel_factory( - name=panel_name, - genome=genome, - design_input_file=str(input_file), - fasta_file=str(fasta_file), - preset=preset, - config_file=str(config_path) if config_path else None, - padding=padding, - ) - # Update panel config to use our loaded config - panel.config = config - logger.info(f"Loaded {len(panel.junctions)} junctions") - - # Initialize result now that we have a valid panel - result = PipelineResult( - panel=panel, - output_dir=output_dir, - config=config, - steps_completed=["panel_created"], - ) + with _progress_context(show_progress) as (_progress, advance_step, sub_task): + # Enable file logging to output directory + log_file = configure_file_logging(str(output_dir), debug=debug) + logger.info(f"Log file: {log_file}") + logger.info(f"Provenance written to {provenance_path}") + + # ========================================================================= + # Step 1: Create panel and load junctions + # ========================================================================= + advance_step("Creating panel") + logger.info(f"Creating panel '{panel_name}' with {genome} reference...") + + panel = panel_factory( + name=panel_name, + genome=genome, + design_input_file=str(input_file), + fasta_file=str(fasta_file), + preset=preset, + config_file=str(config_path) if config_path else None, + padding=padding, + ) + # Update panel config to use our loaded config + panel.config = config + logger.info(f"Loaded {len(panel.junctions)} junctions") + + # Initialize result now that we have a valid panel + result = PipelineResult( + panel=panel, + output_dir=output_dir, + config=config, + steps_completed=["panel_created"], + ) - # ========================================================================= - # Step 2: Design primers - # ========================================================================= - logger.info(f"Designing primers using '{design_method}' method...") + # ========================================================================= + # Step 2: Design primers + # ========================================================================= + advance_step("Designing primers") + logger.info(f"Designing primers using '{design_method}' method...") - try: - panel = design_primers(panel, method=design_method) - result.steps_completed.append("primers_designed") - - # Capture any junctions that failed during design - if hasattr(panel, "failed_junctions") and panel.failed_junctions: - result.failed_junctions = panel.failed_junctions - for fj in panel.failed_junctions: - err_msg = getattr(fj, "_design_error", "no valid primer pairs") - result.errors.append( - f"Junction '{fj.name}' failed primer design: {err_msg}" + try: + n_junctions = len(panel.junctions) + with sub_task("Designing primers", total=n_junctions) as tick: + panel = design_primers( + panel, method=design_method, on_junction_done=tick ) + result.steps_completed.append("primers_designed") - if not panel.junctions: - logger.error("All junctions failed primer design. Cannot continue.") - result.errors.append("All junctions failed primer design.") - return result + # Capture any junctions that failed during design + if hasattr(panel, "failed_junctions") and panel.failed_junctions: + result.failed_junctions = panel.failed_junctions + for fj in panel.failed_junctions: + err_msg = getattr(fj, "_design_error", "no valid primer pairs") + result.errors.append( + f"Junction '{fj.name}' failed primer design: {err_msg}" + ) - total_pairs = sum( - len(j.primer_pairs) for j in panel.junctions if j.primer_pairs - ) - logger.info( - f"Designed {total_pairs} primer pairs across {len(panel.junctions)} junctions" - ) - except Exception as e: - logger.error(f"Primer design failed: {e}") - result.errors.append(f"Primer design failed: {e}") - raise + if not panel.junctions: + logger.error("All junctions failed primer design. Cannot continue.") + result.errors.append("All junctions failed primer design.") + return result - # ========================================================================= - # Step 3: Save intermediate results - # ========================================================================= - logger.info("Saving candidate primer pairs...") + total_pairs = sum( + len(j.primer_pairs) for j in panel.junctions if j.primer_pairs + ) + logger.info( + f"Designed {total_pairs} primer pairs across {len(panel.junctions)} junctions" + ) + except Exception as e: + logger.error(f"Primer design failed: {e}") + result.errors.append(f"Primer design failed: {e}") + raise - try: - pairs_file = output_dir / "candidate_pairs.csv" - panel.save_candidate_pairs_to_csv(str(pairs_file)) - result.steps_completed.append("candidates_saved") - logger.info(f"Saved candidate pairs to {pairs_file}") - except Exception as e: - logger.warning(f"Could not save candidate pairs: {e}") - result.errors.append(f"Save candidates failed: {e}") - - # ========================================================================= - # Step 3.5: SNP overlap check (optional) - # ========================================================================= - snp_config = config.snp_check_parameters - _run_snpcheck = not skip_snpcheck - - if _run_snpcheck: - af_thresh = ( - snp_af_threshold - if snp_af_threshold is not None - else snp_config.af_threshold - ) + # ========================================================================= + # Step 3: Save intermediate results + # ========================================================================= + advance_step("Saving candidates") + logger.info("Saving candidate primer pairs...") try: - from plexus.snpcheck.snp_data import get_snp_vcf - - resolved_vcf = get_snp_vcf( - panel=panel, - output_dir=output_dir, - user_vcf=snp_vcf, - padding=padding, - genome=genome, + pairs_file = output_dir / "candidate_pairs.csv" + panel.save_candidate_pairs_to_csv(str(pairs_file)) + result.steps_completed.append("candidates_saved") + logger.info(f"Saved candidate pairs to {pairs_file}") + except Exception as e: + logger.warning(f"Could not save candidate pairs: {e}") + result.errors.append(f"Save candidates failed: {e}") + + # ========================================================================= + # Step 3.5: SNP overlap check (optional) + # ========================================================================= + snp_config = config.snp_check_parameters + _run_snpcheck = not skip_snpcheck + + if _run_snpcheck: + advance_step("SNP check") + af_thresh = ( + snp_af_threshold + if snp_af_threshold is not None + else snp_config.af_threshold ) - from plexus.snpcheck.checker import run_snp_check + try: + from plexus.snpcheck.snp_data import get_snp_vcf + + resolved_vcf = get_snp_vcf( + panel=panel, + output_dir=output_dir, + user_vcf=snp_vcf, + padding=padding, + genome=genome, + ) + + from plexus.snpcheck.checker import run_snp_check + + n_junctions_snp = len(panel.junctions) + with sub_task("Checking SNPs", total=n_junctions_snp) as tick: + run_snp_check( + panel=panel, + vcf_path=str(resolved_vcf), + af_threshold=af_thresh, + snp_penalty_weight=snp_config.snp_penalty_weight, + snp_3prime_window=snp_config.snp_3prime_window, + snp_3prime_multiplier=snp_config.snp_3prime_multiplier, + snp_af_weight=snp_config.snp_af_weight, + on_junction_done=tick, + ) + result.steps_completed.append("snp_checked") + + if snp_strict or snp_config.snp_strict: + from plexus.snpcheck.checker import filter_snp_pairs - run_snp_check( - panel=panel, - vcf_path=str(resolved_vcf), - af_threshold=af_thresh, - snp_penalty_weight=snp_config.snp_penalty_weight, - snp_3prime_window=snp_config.snp_3prime_window, - snp_3prime_multiplier=snp_config.snp_3prime_multiplier, - snp_af_weight=snp_config.snp_af_weight, + n_removed, fallback_junctions = filter_snp_pairs(panel) + logger.info( + f"SNP strict mode: removed {n_removed} primer pairs overlapping SNPs" + ) + for name in fallback_junctions: + result.errors.append( + f"SNP strict: '{name}' — no SNP-free pairs found; least-affected pair kept" + ) + result.steps_completed.append("snp_strict_filtered") + except Exception as e: + logger.error(f"SNP check failed: {e}") + result.errors.append(f"SNP check failed: {e}") + else: + advance_step("Skipped SNP check") + logger.info("Skipping SNP check") + result.steps_completed.append("snp_check_skipped") + + # ========================================================================= + # Step 4: Run BLAST specificity check (optional) + # ========================================================================= + if run_blast: + advance_step("BLAST specificity check") + logger.info( + f"Running BLAST specificity check (num_threads={blast_num_threads})..." ) - result.steps_completed.append("snp_checked") - if snp_strict or snp_config.snp_strict: - from plexus.snpcheck.checker import filter_snp_pairs + try: + from plexus.blast.specificity import ( + filter_offtarget_pairs, + run_specificity_check, + ) - n_removed, fallback_junctions = filter_snp_pairs(panel) - logger.info( - f"SNP strict mode: removed {n_removed} primer pairs overlapping SNPs" + blast_dir = output_dir / "blast" + blast_config = config.blast_parameters + run_specificity_check( + panel, + str(blast_dir), + str(fasta_file), + num_threads=blast_num_threads, + length_threshold=blast_config.length_threshold, + evalue_threshold=blast_config.evalue_threshold, + max_mismatches=blast_config.max_mismatches, + three_prime_tolerance=blast_config.three_prime_tolerance, + max_amplicon_size=blast_config.max_amplicon_size, + ontarget_tolerance=blast_config.ontarget_tolerance, + blast_evalue=blast_config.blast_evalue, + blast_word_size=blast_config.blast_word_size, + blast_reward=blast_config.blast_reward, + blast_penalty=blast_config.blast_penalty, + blast_max_hsps=blast_config.blast_max_hsps, + blast_dust=blast_config.blast_dust, ) + result.steps_completed.append("specificity_checked") + logger.info("Specificity check complete") + + n_removed, fallback_junctions = filter_offtarget_pairs(panel) + if n_removed > 0: + logger.info( + f"Off-target filter: removed {n_removed} primer pair(s) " + "with off-target products" + ) for name in fallback_junctions: result.errors.append( - f"SNP strict: '{name}' — no SNP-free pairs found; least-affected pair kept" + f"Off-target filter: '{name}' — no clean pairs; " + "least-affected pair kept" ) - result.steps_completed.append("snp_strict_filtered") - except Exception as e: - logger.error(f"SNP check failed: {e}") - result.errors.append(f"SNP check failed: {e}") - else: - logger.info("Skipping SNP check") - result.steps_completed.append("snp_check_skipped") - - # ========================================================================= - # Step 4: Run BLAST specificity check (optional) - # ========================================================================= - if run_blast: - logger.info( - f"Running BLAST specificity check (num_threads={blast_num_threads})..." - ) + result.steps_completed.append("offtarget_filtered") + except ImportError as e: + logger.warning(f"BLAST module not available: {e}") + result.errors.append(f"BLAST not available: {e}") + except Exception as e: + logger.error(f"Specificity check failed: {e}") + result.errors.append(f"Specificity check failed: {e}") + else: + advance_step("Skipped BLAST") + logger.info("Skipping BLAST specificity check") + result.steps_completed.append("specificity_skipped") + + # ========================================================================= + # Step 5: Multiplex optimization + # ========================================================================= + advance_step("Multiplex optimization") + logger.info("Running multiplex optimization...") try: - from plexus.blast.specificity import ( - filter_offtarget_pairs, - run_specificity_check, - ) + from plexus.selector.cost import MultiplexCostFunction + from plexus.selector.selectors import selector_collection - blast_dir = output_dir / "blast" - blast_config = config.blast_parameters - run_specificity_check( - panel, - str(blast_dir), - str(fasta_file), - num_threads=blast_num_threads, - length_threshold=blast_config.length_threshold, - evalue_threshold=blast_config.evalue_threshold, - max_mismatches=blast_config.max_mismatches, - three_prime_tolerance=blast_config.three_prime_tolerance, - max_amplicon_size=blast_config.max_amplicon_size, - ontarget_tolerance=blast_config.ontarget_tolerance, - blast_evalue=blast_config.blast_evalue, - blast_word_size=blast_config.blast_word_size, - blast_reward=blast_config.blast_reward, - blast_penalty=blast_config.blast_penalty, - blast_max_hsps=blast_config.blast_max_hsps, - blast_dust=blast_config.blast_dust, - ) - result.steps_completed.append("specificity_checked") - logger.info("Specificity check complete") + selector_df = panel.build_selector_dataframe() + pair_lookup = panel.build_pair_lookup() - n_removed, fallback_junctions = filter_offtarget_pairs(panel) - if n_removed > 0: - logger.info( - f"Off-target filter: removed {n_removed} primer pair(s) " - "with off-target products" + if selector_df.empty: + logger.warning( + "No primer pairs available for multiplex optimization." + ) + elif selector not in selector_collection: + raise ValueError( + f"Unknown selector '{selector}'. " + f"Available: {', '.join(selector_collection)}" ) - for name in fallback_junctions: - result.errors.append( - f"Off-target filter: '{name}' — no clean pairs; " - "least-affected pair kept" + else: + cost_fn = MultiplexCostFunction( + pair_lookup, config.multiplex_picker_parameters ) - result.steps_completed.append("offtarget_filtered") - except ImportError as e: - logger.warning(f"BLAST module not available: {e}") - result.errors.append(f"BLAST not available: {e}") - except Exception as e: - logger.error(f"Specificity check failed: {e}") - result.errors.append(f"Specificity check failed: {e}") - else: - logger.info("Skipping BLAST specificity check") - result.steps_completed.append("specificity_skipped") - - # ========================================================================= - # Step 5: Multiplex optimization - # ========================================================================= - logger.info("Running multiplex optimization...") - - try: - from plexus.selector.cost import MultiplexCostFunction - from plexus.selector.selectors import selector_collection - - selector_df = panel.build_selector_dataframe() - pair_lookup = panel.build_pair_lookup() - - if selector_df.empty: - logger.warning("No primer pairs available for multiplex optimization.") - elif selector not in selector_collection: - raise ValueError( - f"Unknown selector '{selector}'. " - f"Available: {', '.join(selector_collection)}" - ) - else: - cost_fn = MultiplexCostFunction( - pair_lookup, config.multiplex_picker_parameters - ) - # Warn if stochastic selector is used without a seed - stochastic_selectors = ["Greedy", "Random", "SimulatedAnnealing"] - if ( - selector in stochastic_selectors - and config.multiplex_picker_parameters.selector_seed is None - ): - if op_mode == "compliance": - logger.warning( - f"Compliance mode: stochastic selector '{selector}' used without a seed. " - "This run cannot be identically reproduced." + # Warn if stochastic selector is used without a seed + stochastic_selectors = ["Greedy", "Random", "SimulatedAnnealing"] + if ( + selector in stochastic_selectors + and config.multiplex_picker_parameters.selector_seed is None + ): + if op_mode == "compliance": + logger.warning( + f"Compliance mode: stochastic selector '{selector}' used without a seed. " + "This run cannot be identically reproduced." + ) + else: + logger.info( + f"Stochastic selector '{selector}' used without a seed. " + "Results will vary between runs." + ) + + selector_cls = selector_collection[selector] + selector_obj = selector_cls( + selector_df, + cost_fn, + seed=config.multiplex_picker_parameters.selector_seed, + ) + logger.info(f"Using '{selector}' selector algorithm.") + if selector in ("Greedy", "Random"): + solutions = selector_obj.run( + N=config.multiplex_picker_parameters.initial_solutions ) else: + solutions = selector_obj.run() + + # Sort by cost and keep top solutions + solutions.sort(key=lambda m: m.cost) + top_n = config.multiplex_picker_parameters.top_solutions_to_keep + result.multiplex_solutions = solutions[:top_n] + + # Auto-apply the best solution + if solutions: + best = solutions[0] + selected = [] + for pair_id in best.primer_pairs: + pair = pair_lookup.get(pair_id) + if pair: + pair.selected = True + selected.append(pair) + result.selected_pairs = selected logger.info( - f"Stochastic selector '{selector}' used without a seed. " - "Results will vary between runs." + f"Selected {len(selected)} primer pairs (best cost: {best.cost:.2f})" ) - selector_cls = selector_collection[selector] - selector_obj = selector_cls( - selector_df, - cost_fn, - seed=config.multiplex_picker_parameters.selector_seed, - ) - logger.info(f"Using '{selector}' selector algorithm.") - if selector in ("Greedy", "Random"): - solutions = selector_obj.run( - N=config.multiplex_picker_parameters.initial_solutions - ) - else: - solutions = selector_obj.run() - - # Sort by cost and keep top solutions - solutions.sort(key=lambda m: m.cost) - top_n = config.multiplex_picker_parameters.top_solutions_to_keep - result.multiplex_solutions = solutions[:top_n] - - # Auto-apply the best solution - if solutions: - best = solutions[0] - selected = [] - for pair_id in best.primer_pairs: - pair = pair_lookup.get(pair_id) - if pair: - pair.selected = True - selected.append(pair) - result.selected_pairs = selected - logger.info( - f"Selected {len(selected)} primer pairs (best cost: {best.cost:.2f})" - ) - - result.steps_completed.append("multiplex_optimized") - except Exception as e: - logger.error(f"Multiplex optimization failed: {e}") - result.errors.append(f"Multiplex optimization failed: {e}") + result.steps_completed.append("multiplex_optimized") + except Exception as e: + logger.error(f"Multiplex optimization failed: {e}") + result.errors.append(f"Multiplex optimization failed: {e}") - # ========================================================================= - # Step 6: Save final results - # ========================================================================= - logger.info("Saving final results...") + # ========================================================================= + # Step 6: Save final results + # ========================================================================= + advance_step("Saving results") + logger.info("Saving final results...") - try: - # Selected multiplex (best solution) - if result.selected_pairs: - panel.save_selected_multiplex_csv( - str(output_dir / "selected_multiplex.csv"), - result.selected_pairs, - ) + try: + # Selected multiplex (best solution) + if result.selected_pairs: + panel.save_selected_multiplex_csv( + str(output_dir / "selected_multiplex.csv"), + result.selected_pairs, + ) - # Top N panel solutions - if result.multiplex_solutions: - panel.save_top_panels_csv( - str(output_dir / "top_panels.csv"), - result.multiplex_solutions, - ) + # Top N panel solutions + if result.multiplex_solutions: + panel.save_top_panels_csv( + str(output_dir / "top_panels.csv"), + result.multiplex_solutions, + ) - # Off-target details for selected pairs - if result.selected_pairs: - panel.save_off_targets_csv( - str(output_dir / "off_targets.csv"), - result.selected_pairs, - ) + # Off-target details for selected pairs + if result.selected_pairs: + panel.save_off_targets_csv( + str(output_dir / "off_targets.csv"), + result.selected_pairs, + ) - # Panel QC report (REPT-01) - if result.selected_pairs: - try: - from plexus.reporting.qc import generate_panel_qc + # Panel QC report (REPT-01) + if result.selected_pairs: + try: + from plexus.reporting.qc import generate_panel_qc + + qc_data = generate_panel_qc(panel.junctions) + qc_path = output_dir / "panel_qc.json" + with qc_path.open("w") as f: + _json.dump(qc_data, f, indent=2) + logger.info(f"Wrote panel QC report to {qc_path.name}") + except Exception as e: + logger.warning(f"Could not write panel QC report: {e}") + result.errors.append(f"Panel QC report failed: {e}") + + # Failed junctions report + if result.failed_junctions: + import pandas as pd + + rows = [ + { + "Junction": fj.name, + "Chrom": fj.chrom, + "Start": fj.start, + "End": fj.end, + "Error": getattr(fj, "_design_error", "unknown"), + } + for fj in result.failed_junctions + ] + pd.DataFrame(rows).to_csv( + output_dir / "failed_junctions.csv", index=False + ) + logger.info( + f"Wrote {len(rows)} failed junction(s) to failed_junctions.csv" + ) - qc_data = generate_panel_qc(panel.junctions) - qc_path = output_dir / "panel_qc.json" - with qc_path.open("w") as f: - _json.dump(qc_data, f, indent=2) - logger.info(f"Wrote panel QC report to {qc_path.name}") - except Exception as e: - logger.warning(f"Could not write panel QC report: {e}") - result.errors.append(f"Panel QC report failed: {e}") - - # Failed junctions report - if result.failed_junctions: - import pandas as pd - - rows = [ - { - "Junction": fj.name, - "Chrom": fj.chrom, - "Start": fj.start, - "End": fj.end, - "Error": getattr(fj, "_design_error", "unknown"), - } - for fj in result.failed_junctions - ] - pd.DataFrame(rows).to_csv( - output_dir / "failed_junctions.csv", index=False - ) + result.steps_completed.append("final_results_saved") + except Exception as e: + logger.warning(f"Could not save final results: {e}") + result.errors.append(f"Save final results failed: {e}") + + # ========================================================================= + # Summary + # ========================================================================= + logger.info("=" * 60) + logger.info("Pipeline Summary") + logger.info("=" * 60) + logger.info(f"Panel: {panel.panel_name}") + logger.info(f"Junctions: {len(panel.junctions)}") + logger.info(f"Total candidate primer pairs: {result.num_primer_pairs}") + logger.info(f"Selected primer pairs: {len(result.selected_pairs)}") + if result.multiplex_solutions: logger.info( - f"Wrote {len(rows)} failed junction(s) to failed_junctions.csv" + f"Top {len(result.multiplex_solutions)} multiplex solutions " + f"(best cost: {result.multiplex_solutions[0].cost:.2f})" ) - - result.steps_completed.append("final_results_saved") - except Exception as e: - logger.warning(f"Could not save final results: {e}") - result.errors.append(f"Save final results failed: {e}") - - # ========================================================================= - # Summary - # ========================================================================= - logger.info("=" * 60) - logger.info("Pipeline Summary") - logger.info("=" * 60) - logger.info(f"Panel: {panel.panel_name}") - logger.info(f"Junctions: {len(panel.junctions)}") - logger.info(f"Total candidate primer pairs: {result.num_primer_pairs}") - logger.info(f"Selected primer pairs: {len(result.selected_pairs)}") - if result.multiplex_solutions: - logger.info( - f"Top {len(result.multiplex_solutions)} multiplex solutions " - f"(best cost: {result.multiplex_solutions[0].cost:.2f})" - ) - logger.info(f"Steps completed: {', '.join(result.steps_completed)}") - if result.errors: - logger.warning(f"Errors encountered: {len(result.errors)}") - for err in result.errors: - logger.warning(f" - {err}") - logger.info(f"Output directory: {output_dir}") - logger.info("=" * 60) - - return result + logger.info(f"Steps completed: {', '.join(result.steps_completed)}") + if result.errors: + logger.warning(f"Errors encountered: {len(result.errors)}") + for err in result.errors: + logger.warning(f" - {err}") + logger.info(f"Output directory: {output_dir}") + logger.info("=" * 60) + + return result except BaseException as exc: _exc = exc diff --git a/src/plexus/snpcheck/checker.py b/src/plexus/snpcheck/checker.py index 6ad7499..25870bf 100644 --- a/src/plexus/snpcheck/checker.py +++ b/src/plexus/snpcheck/checker.py @@ -133,6 +133,7 @@ def run_snp_check( snp_3prime_window: int = 5, snp_3prime_multiplier: float = 3.0, snp_af_weight: float = 0.0, + on_junction_done: callable | None = None, ) -> None: """Check all primer pairs in the panel for SNP overlaps using a local VCF. @@ -160,6 +161,8 @@ def run_snp_check( Exponent for AF-based penalty scaling, normalised to af_threshold. 0.0 = no AF scaling (default, backwards compatible). 1.0 = linear scaling. 0.5 = sqrt scaling. + on_junction_done : callable | None + Optional callback invoked after each junction is processed. """ import pysam @@ -229,6 +232,9 @@ def run_snp_check( f"({n_pairs - n_snp_pairs} clean pair(s) available)" ) + if on_junction_done: + on_junction_done() + logger.info( f"SNP check complete: {total_snps} SNPs found across {primers_with_snps} primers" ) diff --git a/tests/test_design.py b/tests/test_design.py index 473e28d..d08627d 100644 --- a/tests/test_design.py +++ b/tests/test_design.py @@ -117,7 +117,7 @@ def test_plexus_method_delegates(self, minimal_panel): with patch("plexus.designer.design.design_multiplex_primers") as mock_dmp: mock_dmp.return_value = minimal_panel design_primers(minimal_panel, method="plexus") - mock_dmp.assert_called_once_with(minimal_panel) + mock_dmp.assert_called_once_with(minimal_panel, on_junction_done=None) def test_unknown_method_raises_value_error(self, minimal_panel): with pytest.raises(ValueError, match="Unknown design method"): From d317b7607c2b2a0afa763925c6484a7b93fd0b15 Mon Sep 17 00:00:00 2001 From: sfilges Date: Wed, 4 Mar 2026 09:46:36 +0100 Subject: [PATCH 5/8] Fixed bug where swapped orientation off-targets were found but not checked --- CHANGELOG.md | 1 + src/plexus/blast/specificity.py | 1 + tests/test_blast_specificity.py | 47 +++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 06c44e9..31c0260 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- **Swapped-orientation off-targets missed** (`blast/specificity.py`): When a forward primer binds the minus strand and the reverse primer binds the plus strand at an off-target locus, the `AmpliconFinder` stores the amplicon under `(reverse_id, forward_id)`. The mapping step only checked `(forward_id, reverse_id)`, silently missing these swapped-orientation off-targets. Now checks both key orders. - **Package data files not found in global installs**: Config presets and alignment parameter files were not included in the wheel because they lived outside the Python package at the project root (`config/`). Moved all data files (`designer_default_config.json`, `designer_lenient_config.json`, `alignment_parameters.json`, `nn_model/`) into `src/plexus/data/` and switched `config.py` and `aligner/align.py` from `ROOT_DIR` path concatenation to `importlib.resources.files()`. Removed the now-unused `utils/root_dir.py`. `uv tool install` and `pip install` now work correctly without an editable install. ## [1.0.1] - 03-03-2026 diff --git a/src/plexus/blast/specificity.py b/src/plexus/blast/specificity.py index 2d8af82..7a88d17 100644 --- a/src/plexus/blast/specificity.py +++ b/src/plexus/blast/specificity.py @@ -139,6 +139,7 @@ def run_specificity_check( n_checked += 1 potential_products = amplicon_map.get((f_id, r_id), []) + potential_products += amplicon_map.get((r_id, f_id), []) off_targets = [] on_targets = [] diff --git a/tests/test_blast_specificity.py b/tests/test_blast_specificity.py index 76c74ea..536be95 100644 --- a/tests/test_blast_specificity.py +++ b/tests/test_blast_specificity.py @@ -229,6 +229,53 @@ def test_run_specificity_check_no_hits(mock_panel, tmp_path): assert pair.on_target_detected != True # noqa: E712 — mock hasn't been set to True +def test_run_specificity_check_swapped_orientation_off_target(mock_panel, tmp_path): + """Off-target where primers bind in swapped strand orientation is detected. + + When the forward primer hits the minus strand and the reverse primer hits + the plus strand at an off-target locus, the AmpliconFinder stores the + amplicon under (R_id, F_id). The mapping step must also check (r_id, f_id) + to catch these swapped-orientation off-targets. + """ + with ( + patch("plexus.blast.specificity.BlastRunner") as MockRunner, + patch("plexus.blast.specificity.BlastResultsAnnotator") as MockAnnotator, + patch("plexus.blast.specificity.AmpliconFinder") as MockFinder, + patch("os.makedirs"), + ): + runner_instance = MockRunner.return_value + runner_instance.get_dataframe.return_value = pd.DataFrame({"dummy": [1]}) + + annotator_instance = MockAnnotator.return_value + annotator_instance.get_predicted_bound.return_value = pd.DataFrame( + {"dummy_bound": [1]} + ) + + # Swapped orientation: reverse primer on plus strand (F_primer), + # forward primer on minus strand (R_primer). + finder_instance = MockFinder.return_value + finder_instance.amplicon_df = pd.DataFrame( + [ + { + "chrom": "chr12", + "F_primer": "P1_R", # reverse primer hit plus strand + "R_primer": "P1_F", # forward primer hit minus strand + "product_bp": 64, + "F_start": 52731859, + "R_start": 52731922, + } + ] + ) + + run_specificity_check(mock_panel, str(tmp_path), "fake_genome.fa") + + pair = mock_panel.junctions[0].primer_pairs[0] + assert pair.specificity_checked is True + assert len(pair.off_target_products) == 1 + assert pair.off_target_products[0]["F_start"] == 52731859 + assert pair.on_target_detected is False + + def test_run_specificity_check_on_target_detected(mock_panel, tmp_path): """When the amplicon is at the correct coordinates, on_target_detected is True.""" with ( From 603ed9b2fb72637955febc5fd67c6885f6c0fbd9 Mon Sep 17 00:00:00 2001 From: sfilges Date: Wed, 4 Mar 2026 13:17:04 +0100 Subject: [PATCH 6/8] Further UI improvements --- src/plexus/orchestrator.py | 126 ++++++++++++++++++++++++++++++++----- src/plexus/pipeline.py | 22 ++++++- 2 files changed, 130 insertions(+), 18 deletions(-) diff --git a/src/plexus/orchestrator.py b/src/plexus/orchestrator.py index d079674..818f1d0 100644 --- a/src/plexus/orchestrator.py +++ b/src/plexus/orchestrator.py @@ -13,21 +13,23 @@ import json import shutil import sys +import threading +import time from concurrent.futures import ProcessPoolExecutor, as_completed +from multiprocessing import Manager from pathlib import Path from typing import Any import pandas as pd from loguru import logger from rich.progress import ( - BarColumn, - MofNCompleteColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn, ) +from plexus.logging import restore_console_logging, suppress_console_logging from plexus.pipeline import MultiPanelResult, PipelineResult, run_pipeline DEFAULT_PANEL_ID = "default" @@ -100,6 +102,7 @@ def _run_single_panel( panel_csv: Path, fasta_file: Path, output_dir: Path, + _status_dict=None, **pipeline_kwargs: Any, ) -> tuple[str, PipelineResult]: """ @@ -108,6 +111,12 @@ def _run_single_panel( This is a top-level function (not a method or lambda) so it can be pickled by ProcessPoolExecutor. + Parameters + ---------- + _status_dict : dict-like or None + Shared multiprocessing dict for reporting step progress back to + the parent process. Keyed by panel_id. + Returns ------- tuple[str, PipelineResult] @@ -116,6 +125,20 @@ def _run_single_panel( panel_output_dir = output_dir / panel_id panel_name = pipeline_kwargs.pop("panel_name", panel_id) + # Suppress child-process console logging early (before any logger calls) + # when the parent is showing a progress bar. + if pipeline_kwargs.get("quiet"): + suppress_console_logging() + + # Build a step_callback that writes to the shared dict + step_callback = None + if _status_dict is not None: + + def step_callback(label): + _status_dict[panel_id] = label + + _status_dict[panel_id] = "Starting…" + logger.info(f"Starting pipeline for panel '{panel_id}'...") result = run_pipeline( @@ -123,8 +146,13 @@ def _run_single_panel( fasta_file=fasta_file, output_dir=panel_output_dir, panel_name=panel_name, + step_callback=step_callback, **pipeline_kwargs, ) + + if _status_dict is not None: + _status_dict[panel_id] = "Done" + return panel_id, result @@ -199,27 +227,38 @@ def run_multi_panel( results: dict[str, PipelineResult] = {} + # Panel-level progress bar for parallel mode (no per-step bars + # because subprocesses would fight over stderr). + _use_panel_bar = parallel and show_progress and sys.stderr.isatty() + + # Suppress parent console logging early — before any parallel INFO + # messages — so only the progress bar and explicit console prints appear. + if _use_panel_bar: + suppress_console_logging() + try: if parallel: workers = max_workers or len(panels) logger.info(f"Running {len(panels)} panels in parallel (workers={workers})") - # Panel-level progress bar for parallel mode (no per-step bars - # because subprocesses would fight over stderr). - _use_panel_bar = show_progress and sys.stderr.isatty() + # Shared dict for child processes to report current step + mgr = Manager() if _use_panel_bar else None + status_dict = mgr.dict() if mgr else None with ProcessPoolExecutor(max_workers=workers) as executor: futures = {} for panel_id, csv_path in panel_csvs.items(): kw = dict(pipeline_kwargs) kw["panel_name"] = panel_id - # Do NOT forward show_progress to child processes + if _use_panel_bar: + kw["quiet"] = True future = executor.submit( _run_single_panel, panel_id=panel_id, panel_csv=csv_path, fasta_file=fasta_file, output_dir=output_dir, + _status_dict=status_dict, **kw, ) futures[future] = panel_id @@ -227,25 +266,75 @@ def run_multi_panel( if _use_panel_bar: progress = Progress( SpinnerColumn(), - TextColumn("[bold blue]{task.description}"), - BarColumn(), - MofNCompleteColumn(), + TextColumn("{task.description}"), TimeElapsedColumn(), ) - task = progress.add_task("Running panels", total=len(panels)) - with progress: - for future in as_completed(futures): - pid = futures[future] - _, result = future.result() - results[pid] = result - logger.info(f"Panel '{pid}' completed.") - progress.advance(task) + + # Route WARNING+ messages through the progress console + def _warning_sink(message): + progress.console.print(message, end="") + + warning_handler_id = logger.add( + _warning_sink, level="WARNING", format="{message}" + ) + + # One task row per panel + panel_tasks = {} + for pid in panels: + panel_tasks[pid] = progress.add_task( + f"[bold]{pid}[/bold]: waiting…", total=None + ) + + # Background thread polls shared dict to update descriptions + _stop_poll = threading.Event() + + def _poll_status(): + while not _stop_poll.is_set(): + for pid, tid in panel_tasks.items(): + step = status_dict.get(pid) + if step and not progress.tasks[tid].finished: + progress.update( + tid, + description=f"[bold]{pid}[/bold]: {step}", + ) + time.sleep(0.3) + + poll_thread = threading.Thread(target=_poll_status, daemon=True) + + try: + with progress: + poll_thread.start() + for future in as_completed(futures): + pid = futures[future] + _, result = future.result() + results[pid] = result + status = ( + "[green]done[/green]" + if result.success + else "[yellow]done (warnings)[/yellow]" + ) + progress.update( + panel_tasks[pid], + description=f"[bold]{pid}[/bold]: {status}", + total=1, + completed=1, + ) + logger.info(f"Panel '{pid}' completed.") + finally: + _stop_poll.set() + poll_thread.join(timeout=1) + logger.remove(warning_handler_id) + restore_console_logging() else: for future in as_completed(futures): pid = futures[future] _, result = future.result() results[pid] = result logger.info(f"Panel '{pid}' completed.") + + # Clean up manager + if mgr: + mgr.shutdown() else: logger.info(f"Running {len(panels)} panels sequentially.") for panel_id, csv_path in panel_csvs.items(): @@ -261,6 +350,9 @@ def run_multi_panel( ) results[panel_id] = result finally: + # Ensure console logging is restored if we suppressed it + if _use_panel_bar: + restore_console_logging() # Clean up temp CSVs shutil.rmtree(tmp_dir, ignore_errors=True) diff --git a/src/plexus/pipeline.py b/src/plexus/pipeline.py index dd73c40..4f91158 100644 --- a/src/plexus/pipeline.py +++ b/src/plexus/pipeline.py @@ -308,6 +308,8 @@ def run_pipeline( fasta_sha256: str | None = None, snp_vcf_sha256: str | None = None, show_progress: bool = False, + quiet: bool = False, + step_callback: callable | None = None, ) -> PipelineResult: """ Run the complete multiplex primer design pipeline. @@ -340,6 +342,14 @@ def run_pipeline( show_progress : bool If True and stderr is a TTY, show Rich progress bars instead of log output on the console (default: False). + quiet : bool + If True, suppress all console (stderr) log output. File logging + is unaffected. Used for child processes in parallel multi-panel + mode where the parent shows a panel-level progress bar. + step_callback : callable | None + Optional callback invoked with the step label string whenever a + new pipeline step starts. Used by the orchestrator to update + per-panel status in the progress display. Returns ------- @@ -364,6 +374,10 @@ def run_pipeline( if not fasta_file.exists(): raise FileNotFoundError(f"FASTA file not found: {fasta_file}") + # Quiet mode: suppress console logging entirely (used by parallel child processes) + if quiet: + suppress_console_logging() + # --- Compliance environment guard --- from plexus.resources import get_operational_mode from plexus.utils.env import ComplianceError, validate_environment @@ -468,7 +482,13 @@ def run_pipeline( _exc: BaseException | None = None try: - with _progress_context(show_progress) as (_progress, advance_step, sub_task): + with _progress_context(show_progress) as (_progress, _advance_step, sub_task): + # Wrap advance_step to also fire the external step_callback + def advance_step(label=None): + _advance_step(label) + if step_callback and label: + step_callback(label) + # Enable file logging to output directory log_file = configure_file_logging(str(output_dir), debug=debug) logger.info(f"Log file: {log_file}") From 9a7a30401de08efef5a8ccb10c9b002e6616ba6e Mon Sep 17 00:00:00 2001 From: sfilges Date: Wed, 4 Mar 2026 13:57:37 +0100 Subject: [PATCH 7/8] Normalized cross-dimer penalty in multiplex cost function --- CHANGELOG.md | 5 + README.md | 2 +- config/alignment_parameters.json | 7 -- config/designer_default_config.json | 105 ------------------- config/designer_lenient_config.json | 105 ------------------- config/nn_model/match.json | 18 ---- config/nn_model/single_mismatch.json | 50 --------- docs/IMPLEMENTATION.md | 4 +- docs/USER_GUIDE.md | 2 +- docs/getting_started.ipynb | 56 +--------- config/environment.yml => environment.yml | 0 src/plexus/cli.py | 54 ++++++---- src/plexus/data/designer_default_config.json | 4 +- src/plexus/pipeline.py | 26 +++-- src/plexus/selector/cost.py | 7 +- tests/test_cli.py | 45 +++++++- tests/test_cost.py | 45 ++++++++ 17 files changed, 158 insertions(+), 377 deletions(-) delete mode 100644 config/alignment_parameters.json delete mode 100644 config/designer_default_config.json delete mode 100644 config/designer_lenient_config.json delete mode 100644 config/nn_model/match.json delete mode 100644 config/nn_model/single_mismatch.json rename config/environment.yml => environment.yml (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31c0260..99e1173 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **Rich progress bars replace log output during pipeline runs**: The CLI now shows clean Rich progress bars on stderr (step-level + per-junction detail for primer design and SNP check) instead of a wall of log messages. All detailed logs still go to the file. Warnings and errors are printed above the progress bar. Multi-panel parallel mode shows a panel-level progress bar. Progress bars are only active when stderr is a TTY; non-interactive runs behave as before. - **SNP and off-target filters now retain all tied least-affected pairs** (`snpcheck/checker.py`, `blast/specificity.py`): When all primer pairs for a junction overlap SNPs or have off-target products, the filters now keep every pair tied at the minimum count instead of arbitrarily picking one. This lets the downstream selector evaluate tied candidates on other properties (Tm, GC%, pair penalty, etc.). +- **Normalise cross-dimer penalty in multiplex cost function** (`selector/cost.py`): The cross-dimer penalty was a raw sum over all C(2n, 2) pairwise primer interactions, scaling quadratically with multiplex size. This caused it to dominate the cost function at higher plexities, effectively drowning out off-target and SNP penalties during selection. The penalty is now divided by the number of interactions, making it a per-interaction average. Weights are now directly comparable regardless of multiplex size. + +- **Separate warnings from errors in pipeline output** (`pipeline.py`, `cli.py`): Off-target and SNP fallback messages (where all pairs had issues but the least-affected were kept) were incorrectly reported as errors, causing the CLI to display "Some panels had errors" for panels that completed successfully. These are now reported as warnings. Errors are reserved for actual failures (e.g. design exceptions, BLAST unavailable). The CLI now shows a distinct warnings section below the success summary. +- **Update fallback message wording**: "least-affected pair kept" now reads "all least-affected pairs kept" to reflect the v1.0.2 change that retains all tied pairs. +- **Remove stale `config/` directory**: Config presets and alignment parameters were moved to `src/plexus/data/` in v1.0.2 but the old `config/` directory was left behind. Removed it and moved `environment.yml` to the repo root. Updated references in `README.md`, `docs/USER_GUIDE.md`, `docs/IMPLEMENTATION.md`, and `docs/getting_started.ipynb`. ### Fixed diff --git a/README.md b/README.md index 068fd87..d1a1b59 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ You can also set up the environment using Conda: ```bash git clone https://github.com/sfilges/plexus cd plexus -conda env create -f config/environment.yml +conda env create -f environment.yml conda activate plexus-run pip install -e . ``` diff --git a/config/alignment_parameters.json b/config/alignment_parameters.json deleted file mode 100644 index f44ce7a..0000000 --- a/config/alignment_parameters.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "end_length": 4, - "end_bonus": -0.5, - "match_scores": "config/nn_model/match.json", - "single_mismatch_scores": "config/nn_model/single_mismatch.json", - "double_mismatch_score": 0.2 -} diff --git a/config/designer_default_config.json b/config/designer_default_config.json deleted file mode 100644 index 0c2cbf7..0000000 --- a/config/designer_default_config.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "singleplex_design_parameters" : { - "PRIMER_NUM_RETURN": 10, - "PRIMER_OPT_TM": 60.0, - "PRIMER_MIN_TM": 57.0, - "PRIMER_MAX_TM": 63.0, - "PRIMER_OPT_SIZE": 22, - "primer_min_length": 15, - "primer_max_length": 30, - "PRIMER_OPT_BOUND": 98.0, - "PRIMER_MIN_BOUND": 25.0, - "PRIMER_MAX_BOUND": 110.0, - "junction_padding_bases": 3, - "forward_tail": "GGACACTCTTTCCCTACACGACGCTCTTCCGATCTAAAAAAAAAAAAAAAAAAAATGGGAAAGAGTGTCC", - "reverse_tail": "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT", - "primer_length_penalty": 1.0, - "primer_complexity_penalty": 1.0, - "amplicon_length_penalty": 1.0, - "PRIMER_OPT_GC_PERCENT": 50.0, - "primer_min_gc": 30, - "primer_max_gc": 70, - "primer_gc_clamp": 1, - "primer_max_poly_x": 5, - "primer_max_poly_gc": 3, - "primer_max_n": 0, - "PRIMER_MAX_SELF_ANY_TH": 45.0, - "PRIMER_MAX_SELF_END_TH": 35.0, - "PRIMER_MAX_HAIRPIN_TH": 24.0, - "PRIMER_MAX_END_STABILITY": 4.5, - "PRIMER_MAX_TEMPLATE_MISPRIMING_TH": 35.0, - "PRIMER_WT_SIZE_LT": 1.0, - "PRIMER_WT_SIZE_GT": 1.0, - "PRIMER_WT_TM_GT": 1.0, - "PRIMER_WT_TM_LT": 1.0, - "PRIMER_WT_BOUND_GT": 1.0, - "PRIMER_WT_BOUND_LT": 1.0, - "PRIMER_WT_GC_PERCENT_GT": 0.0, - "PRIMER_WT_GC_PERCENT_LT": 0.0, - "PRIMER_WT_SELF_ANY_TH": 1.0, - "PRIMER_WT_SELF_END_TH": 1.0, - "PRIMER_WT_HAIRPIN_TH": 1.0, - "PRIMER_WT_END_STABILITY": 1.0 - }, - "primer_pair_parameters" : { - "PRIMER_PAIR_MAX_DIFF_TM": 3.0, - "PRIMER_PRODUCT_OPT_SIZE": 60, - "PRIMER_PRODUCT_MIN_INSERT_SIZE": 20, - "PRIMER_PRODUCT_MAX_INSERT_SIZE": 60, - "PRIMER_PRODUCT_MAX_SIZE": 120, - "PRIMER_PAIR_WT_PR_PENALTY": 1.0, - "PRIMER_PAIR_WT_DIFF_TM": 0.0, - "PRIMER_PAIR_WT_PRODUCT_SIZE_LT": 0.5, - "PRIMER_PAIR_WT_PRODUCT_SIZE_GT": 2.0 - }, - "pcr_conditions" : { - "annealing_temperature": 60, - "primer_concentration": 50, - "dntp_concentration": 0.8, - "dna_concentration": 50, - "mv_concentration": 50, - "dv_concentration": 1.5, - "dmso_concentration": 0.0, - "dmso_fact": 0.6, - "formamide_concentration": 0.0 - }, - "snp_check_parameters": { - "af_threshold": 0.01, - "snp_penalty_weight": 10.0, - "snp_3prime_window": 5, - "snp_3prime_multiplier": 3.0, - "snp_strict": false, - "snp_af_weight": 1.0 - }, - "blast_parameters": { - "length_threshold": 15, - "evalue_threshold": 10.0, - "max_mismatches": 3, - "three_prime_tolerance": 3, - "blast_evalue": 30000.0, - "blast_word_size": 7, - "blast_reward": 1, - "blast_penalty": -1, - "blast_max_hsps": 100, - "blast_dust": "yes", - "max_amplicon_size": 2000, - "ontarget_tolerance": 5 - }, - "multiplex_picker_parameters": { - "initial_solutions": 100, - "top_solutions_to_keep": 4, - "target_plexity": 24, - "minimum_plexity": 10, - "maximum_plexity": 50, - "plexity_wt_lt": 1.0, - "plexity_wt_gt": 1.0, - "force_plexity": false, - "allow_split_panel": false, - "max_splits": 2, - "wt_pair_penalty": 1.0, - "wt_off_target": 5.0, - "wt_cross_dimer": 1.0, - "wt_pair_dimer": 1.0, - "wt_snp_penalty": 3.0 - } -} diff --git a/config/designer_lenient_config.json b/config/designer_lenient_config.json deleted file mode 100644 index 037e371..0000000 --- a/config/designer_lenient_config.json +++ /dev/null @@ -1,105 +0,0 @@ -{ - "singleplex_design_parameters" : { - "PRIMER_NUM_RETURN": 10, - "PRIMER_OPT_TM": 60.0, - "PRIMER_MIN_TM": 55.0, - "PRIMER_MAX_TM": 66.0, - "PRIMER_OPT_SIZE": 22, - "primer_min_length": 18, - "primer_max_length": 28, - "PRIMER_OPT_BOUND": 97.0, - "PRIMER_MIN_BOUND": -10.0, - "PRIMER_MAX_BOUND": 110.0, - "junction_padding_bases": 3, - "forward_tail": "GGACACTCTTTCCCTACACGACGCTCTTCCGATCTAAAAAAAAAAAAAAAAAAAATGGGAAAGAGTGTCC", - "reverse_tail": "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT", - "primer_length_penalty": 1.0, - "primer_complexity_penalty": 1.0, - "amplicon_length_penalty": 1.0, - "PRIMER_OPT_GC_PERCENT": 50.0, - "primer_min_gc": 30, - "primer_max_gc": 70, - "primer_gc_clamp": 0, - "primer_max_poly_x": 4, - "primer_max_poly_gc": 3, - "primer_max_n": 0, - "PRIMER_MAX_SELF_ANY_TH": 45.0, - "PRIMER_MAX_SELF_END_TH": 35.0, - "PRIMER_MAX_HAIRPIN_TH": 24.0, - "PRIMER_MAX_END_STABILITY": 4.5, - "PRIMER_MAX_TEMPLATE_MISPRIMING_TH": 35.0, - "PRIMER_WT_SIZE_LT": 1.0, - "PRIMER_WT_SIZE_GT": 1.0, - "PRIMER_WT_TM_GT": 1.0, - "PRIMER_WT_TM_LT": 1.0, - "PRIMER_WT_BOUND_GT": 0.0, - "PRIMER_WT_BOUND_LT": 0.0, - "PRIMER_WT_GC_PERCENT_GT": 0.0, - "PRIMER_WT_GC_PERCENT_LT": 0.0, - "PRIMER_WT_SELF_ANY_TH": 0.0, - "PRIMER_WT_SELF_END_TH": 0.0, - "PRIMER_WT_HAIRPIN_TH": 0.0, - "PRIMER_WT_END_STABILITY": 0.0 - }, - "primer_pair_parameters" : { - "PRIMER_PAIR_MAX_DIFF_TM": 5.0, - "PRIMER_PRODUCT_OPT_SIZE": 60, - "PRIMER_PRODUCT_MIN_INSERT_SIZE": 20, - "PRIMER_PRODUCT_MAX_INSERT_SIZE": 60, - "PRIMER_PRODUCT_MAX_SIZE": 100, - "PRIMER_PAIR_WT_PR_PENALTY": 1.0, - "PRIMER_PAIR_WT_DIFF_TM": 0.0, - "PRIMER_PAIR_WT_PRODUCT_SIZE_LT": 0.5, - "PRIMER_PAIR_WT_PRODUCT_SIZE_GT": 2.0 - }, - "pcr_conditions" : { - "annealing_temperature": 60, - "primer_concentration": 50, - "dntp_concentration": 0.6, - "dna_concentration": 50, - "mv_concentration": 50, - "dv_concentration": 1.5, - "dmso_concentration": 0.0, - "dmso_fact": 0.6, - "formamide_concentration": 0.8 - }, - "snp_check_parameters": { - "af_threshold": 0.01, - "snp_penalty_weight": 10.0, - "snp_3prime_window": 5, - "snp_3prime_multiplier": 3.0, - "snp_strict": false, - "snp_af_weight": 0.5 - }, - "blast_parameters": { - "length_threshold": 15, - "evalue_threshold": 10.0, - "max_mismatches": 3, - "three_prime_tolerance": 3, - "blast_evalue": 30000.0, - "blast_word_size": 7, - "blast_reward": 1, - "blast_penalty": -1, - "blast_max_hsps": 100, - "blast_dust": "yes", - "max_amplicon_size": 2000, - "ontarget_tolerance": 5 - }, - "multiplex_picker_parameters": { - "initial_solutions": 100, - "top_solutions_to_keep": 4, - "target_plexity": 20, - "minimum_plexity": 10, - "maximum_plexity": 50, - "plexity_wt_lt": 1.0, - "plexity_wt_gt": 1.0, - "force_plexity": false, - "allow_split_panel": false, - "max_splits": 2, - "wt_pair_penalty": 1.0, - "wt_off_target": 5.0, - "wt_cross_dimer": 1.0, - "wt_pair_dimer": 0.5, - "wt_snp_penalty": 1.0 - } -} diff --git a/config/nn_model/match.json b/config/nn_model/match.json deleted file mode 100644 index 149a993..0000000 --- a/config/nn_model/match.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "AT/TA": -0.88, - "TA/AT": -0.60, - "AA/TT": -1.02, - "TT/AA": -1.02, - "AC/TG": -1.46, - "GT/CA": -1.46, - "CA/GT": -1.46, - "TG/AC": -1.46, - "TC/AG": -1.32, - "GA/CT": -1.32, - "AG/TC": -1.29, - "CT/GA": -1.29, - "CG/GC": -2.17, - "GC/CG": -2.24, - "GG/CC": -1.83, - "CC/GG": -1.83 -} diff --git a/config/nn_model/single_mismatch.json b/config/nn_model/single_mismatch.json deleted file mode 100644 index ccba475..0000000 --- a/config/nn_model/single_mismatch.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "AG/TT": 0.71, - "GT/CG": -0.59, - "CA/GC": 0.75, - "TC/AA": 1.33, - "GC/CT": 0.62, - "AG/TA": 0.02, - "TA/AG": 0.42, - "TA/AA": 0.69, - "AG/TG": -0.13, - "CT/GT": -0.12, - "AT/TG": 0.07, - "TG/AT": 0.43, - "CC/GA": 0.79, - "AC/TT": 0.64, - "GT/CC": 0.98, - "CA/GG": 0.03, - "TG/AA": 0.74, - "AC/TC": 1.33, - "CG/GG": -0.11, - "GT/CT": 0.45, - "CG/GT": -0.47, - "TT/AG": 0.34, - "GA/CC": 0.81, - "AT/TC": 0.73, - "TC/AT": 0.97, - "CG/GA": 0.11, - "AA/TA": 0.61, - "CC/GC": 0.70, - "GG/CG": -1.11, - "TT/AT": 0.68, - "CT/GG": -0.32, - "AA/TC": 0.88, - "GC/CA": 0.47, - "CC/GT": 0.62, - "TT/AC": 0.75, - "GA/CG": -0.25, - "CA/GA": 0.43, - "GC/CC": 0.79, - "TG/AG": 0.44, - "GG/CT": 0.08, - "AC/TA": 0.77, - "TA/AC": 0.92, - "CT/GC": 0.40, - "AA/TG": 0.14, - "GG/CA": -0.52, - "GA/CA": 0.17, - "TC/AC": 1.05, - "AT/TT": 0.69 -} diff --git a/docs/IMPLEMENTATION.md b/docs/IMPLEMENTATION.md index cc06988..5c78069 100644 --- a/docs/IMPLEMENTATION.md +++ b/docs/IMPLEMENTATION.md @@ -235,8 +235,8 @@ Configuration is managed via Pydantic models in `src/plexus/config.py`. The top- Two built-in presets are bundled as JSON: -* `default` (`config/designer_default_config.json`) — conservative thermodynamic thresholds -* `lenient` (`config/designer_lenient_config.json`) — relaxed thresholds for difficult junctions +* `default` (`src/plexus/data/designer_default_config.json`) — conservative thermodynamic thresholds +* `lenient` (`src/plexus/data/designer_lenient_config.json`) — relaxed thresholds for difficult junctions Users can supply a custom JSON with `--config` / `-c`. Generate a template with `plexus template`. diff --git a/docs/USER_GUIDE.md b/docs/USER_GUIDE.md index 9974d71..6059139 100644 --- a/docs/USER_GUIDE.md +++ b/docs/USER_GUIDE.md @@ -108,7 +108,7 @@ uv pip install -e . ```bash # Create conda environment -conda env create -f config/environment.yml +conda env create -f environment.yml conda activate plexus-run # Install plexus diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb index 61688ef..267d68e 100644 --- a/docs/getting_started.ipynb +++ b/docs/getting_started.ipynb @@ -328,61 +328,7 @@ "cell_type": "markdown", "id": "c1d2e3f4", "metadata": {}, - "source": [ - "## 6. Next steps\n", - "\n", - "### CLI usage\n", - "\n", - "Everything in this notebook can also be done from the command line:\n", - "\n", - "```bash\n", - "plexus run \\\n", - " --input data/junctions.csv \\\n", - " --fasta /path/to/hg38.fa \\\n", - " --output results/ \\\n", - " --name my_panel\n", - "```\n", - "\n", - "Run `plexus --help` for all options.\n", - "\n", - "### Tuning the design parameters\n", - "\n", - "Pass a `config_file` (JSON) to `run_pipeline()` or `--config` on the CLI to override any parameter. \n", - "A minimal example to widen the Tm window:\n", - "\n", - "```json\n", - "{\n", - " \"singleplex_design_parameters\": {\n", - " \"PRIMER_MIN_TM\": 55.0,\n", - " \"PRIMER_MAX_TM\": 66.0\n", - " }\n", - "}\n", - "```\n", - "\n", - "See `config/designer_default_config.json` for all available parameters.\n", - "\n", - "### SNP checking\n", - "\n", - "Filter primers that overlap common germline variants — useful for liquid biopsy panels:\n", - "\n", - "```bash\n", - "plexus init # downloads a bundled gnomAD VCF subset\n", - "plexus run -i junctions.csv -f hg38.fa --snp-strict\n", - "```\n", - "\n", - "### Multi-patient / multi-panel inputs\n", - "\n", - "Add a `Panel` column to your CSV to design independent panels for multiple patients in one run:\n", - "\n", - "```bash\n", - "plexus run -i cohort.csv -f hg38.fa --parallel\n", - "```\n", - "\n", - "### Docker / clinical deployment\n", - "\n", - "For containerised or regulated environments, plexus ships a compliance mode — \n", - "see the [README](../README.md#compliance-mode-and-container-deployment) for the Docker workflow." - ] + "source": "## 6. Next steps\n\n### CLI usage\n\nEverything in this notebook can also be done from the command line:\n\n```bash\nplexus run \\\n --input data/junctions.csv \\\n --fasta /path/to/hg38.fa \\\n --output results/ \\\n --name my_panel\n```\n\nRun `plexus --help` for all options.\n\n### Tuning the design parameters\n\nPass a `config_file` (JSON) to `run_pipeline()` or `--config` on the CLI to override any parameter. \nA minimal example to widen the Tm window:\n\n```json\n{\n \"singleplex_design_parameters\": {\n \"PRIMER_MIN_TM\": 55.0,\n \"PRIMER_MAX_TM\": 66.0\n }\n}\n```\n\nSee `src/plexus/data/designer_default_config.json` for all available parameters.\n\n### SNP checking\n\nFilter primers that overlap common germline variants — useful for liquid biopsy panels:\n\n```bash\nplexus init # downloads a bundled gnomAD VCF subset\nplexus run -i junctions.csv -f hg38.fa --snp-strict\n```\n\n### Multi-patient / multi-panel inputs\n\nAdd a `Panel` column to your CSV to design independent panels for multiple patients in one run:\n\n```bash\nplexus run -i cohort.csv -f hg38.fa --parallel\n```\n\n### Docker / clinical deployment\n\nFor containerised or regulated environments, plexus ships a compliance mode — \nsee the [README](../README.md#compliance-mode-and-container-deployment) for the Docker workflow." } ], "metadata": { diff --git a/config/environment.yml b/environment.yml similarity index 100% rename from config/environment.yml rename to environment.yml diff --git a/src/plexus/cli.py b/src/plexus/cli.py index c5821a9..339b477 100644 --- a/src/plexus/cli.py +++ b/src/plexus/cli.py @@ -350,42 +350,54 @@ def run( ) if isinstance(result, MultiPanelResult): + console.print() if result.success: - console.print() console.print( "[bold green]All panels completed successfully![/bold green]" ) - console.print(f" Panels: {len(result.panel_ids)}") - for pid in result.panel_ids: - pr = result.panel_results[pid] - console.print( - f" {pid}: {pr.num_junctions} junctions, " - f"{len(pr.selected_pairs)} selected pairs" - ) - console.print(f" Output: {result.output_dir}") else: - console.print() - console.print("[bold yellow]Some panels had errors:[/bold yellow]") + console.print("[bold red]Some panels had errors:[/bold red]") for pid in result.failed_panels: pr = result.panel_results[pid] for error in pr.errors: - console.print(f" [yellow]• {pid}: {error}[/yellow]") + console.print(f" [red]• {pid}: {error}[/red]") + + console.print(f" Panels: {len(result.panel_ids)}") + for pid in result.panel_ids: + pr = result.panel_results[pid] + console.print( + f" {pid}: {pr.num_junctions} junctions, " + f"{len(pr.selected_pairs)} selected pairs" + ) + console.print(f" Output: {result.output_dir}") + + if result.warned_panels: + console.print() + console.print("[bold yellow]Some panels had warnings:[/bold yellow]") + for pid in result.warned_panels: + pr = result.panel_results[pid] + for warning in pr.warnings: + console.print(f" [yellow]• {pid}: {warning}[/yellow]") else: + console.print() if result.success: - console.print() console.print( "[bold green]Pipeline completed successfully![/bold green]" ) - console.print(f" Junctions: {result.num_junctions}") - console.print(f" Primer pairs: {result.num_primer_pairs}") - console.print(f" Output: {result.output_dir}") else: - console.print() - console.print( - "[bold yellow]Pipeline completed with warnings:[/bold yellow]" - ) + console.print("[bold red]Pipeline completed with errors:[/bold red]") for error in result.errors: - console.print(f" [yellow]• {error}[/yellow]") + console.print(f" [red]• {error}[/red]") + + console.print(f" Junctions: {result.num_junctions}") + console.print(f" Primer pairs: {result.num_primer_pairs}") + console.print(f" Output: {result.output_dir}") + + if result.warnings: + console.print() + console.print("[bold yellow]Warnings:[/bold yellow]") + for warning in result.warnings: + console.print(f" [yellow]• {warning}[/yellow]") except FileNotFoundError as e: console.print(f"[bold red]Error: {e}[/bold red]") diff --git a/src/plexus/data/designer_default_config.json b/src/plexus/data/designer_default_config.json index 9fb5a25..36d8ff7 100644 --- a/src/plexus/data/designer_default_config.json +++ b/src/plexus/data/designer_default_config.json @@ -8,8 +8,8 @@ "primer_min_length": 15, "primer_max_length": 30, "PRIMER_OPT_BOUND": 98.0, - "PRIMER_MIN_BOUND": 30.0, - "PRIMER_MAX_BOUND": 120.0, + "PRIMER_MIN_BOUND": -10.0, + "PRIMER_MAX_BOUND": 110.0, "junction_padding_bases": 3, "forward_tail": "GGACACTCTTTCCCTACACGACGCTCTTCCGATCTAAAAAAAAAAAAAAAAAAAATGGGAAAGAGTGTCC", "reverse_tail": "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT", diff --git a/src/plexus/pipeline.py b/src/plexus/pipeline.py index 4f91158..5430fce 100644 --- a/src/plexus/pipeline.py +++ b/src/plexus/pipeline.py @@ -55,6 +55,7 @@ class PipelineResult: config: DesignerConfig steps_completed: list[str] = field(default_factory=list) errors: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) multiplex_solutions: list = field(default_factory=list) selected_pairs: list = field(default_factory=list) failed_junctions: list = field(default_factory=list) @@ -93,6 +94,11 @@ def failed_panels(self) -> list[str]: """Panel IDs that had errors.""" return [pid for pid, r in self.panel_results.items() if not r.success] + @property + def warned_panels(self) -> list[str]: + """Panel IDs that had warnings.""" + return [pid for pid, r in self.panel_results.items() if r.warnings] + @property def total_junctions(self) -> int: """Total junctions across all panels.""" @@ -119,6 +125,7 @@ def summary_dict(self) -> dict: "selected_pairs": len(r.selected_pairs), "success": r.success, "errors": r.errors, + "warnings": r.warnings, } for pid, r in self.panel_results.items() }, @@ -624,8 +631,9 @@ def advance_step(label=None): f"SNP strict mode: removed {n_removed} primer pairs overlapping SNPs" ) for name in fallback_junctions: - result.errors.append( - f"SNP strict: '{name}' — no SNP-free pairs found; least-affected pair kept" + result.warnings.append( + f"SNP strict: '{name}' — no SNP-free pairs found; " + "all least-affected pairs kept" ) result.steps_completed.append("snp_strict_filtered") except Exception as e: @@ -681,9 +689,9 @@ def advance_step(label=None): "with off-target products" ) for name in fallback_junctions: - result.errors.append( + result.warnings.append( f"Off-target filter: '{name}' — no clean pairs; " - "least-affected pair kept" + "all least-affected pairs kept" ) result.steps_completed.append("offtarget_filtered") except ImportError as e: @@ -863,10 +871,14 @@ def advance_step(label=None): f"(best cost: {result.multiplex_solutions[0].cost:.2f})" ) logger.info(f"Steps completed: {', '.join(result.steps_completed)}") + if result.warnings: + logger.warning(f"Warnings: {len(result.warnings)}") + for warn in result.warnings: + logger.warning(f" - {warn}") if result.errors: - logger.warning(f"Errors encountered: {len(result.errors)}") + logger.error(f"Errors encountered: {len(result.errors)}") for err in result.errors: - logger.warning(f" - {err}") + logger.error(f" - {err}") logger.info(f"Output directory: {output_dir}") logger.info("=" * 60) @@ -895,6 +907,8 @@ def advance_step(label=None): _final_prov["status"] = "completed" _final_prov["errors"] = result.errors if result else [] + _final_prov["warnings"] = result.warnings if result else [] + _final_prov["completed_at"] = _completed_at _final_prov["steps_completed"] = result.steps_completed if result else [] diff --git a/src/plexus/selector/cost.py b/src/plexus/selector/cost.py index c1145ad..167e411 100644 --- a/src/plexus/selector/cost.py +++ b/src/plexus/selector/cost.py @@ -61,9 +61,12 @@ def calc_cost(self, primer_pair_ids: list[str]) -> float: cost += self.wt_off_target * len(pair.off_target_products) cost += self.wt_pair_dimer * max(0, -(pair.dimer_score or 0.0)) - # 2. All-pairwise cross-dimer interactions + # 2. All-pairwise cross-dimer interactions (normalised per interaction) if self.wt_cross_dimer > 0: - cost += self.wt_cross_dimer * self._calc_cross_dimer_penalty(pairs) + raw_dimer = self._calc_cross_dimer_penalty(pairs) + n_primers = len(pairs) * 2 + n_interactions = n_primers * (n_primers - 1) / 2 + cost += self.wt_cross_dimer * (raw_dimer / max(n_interactions, 1)) return cost diff --git a/tests/test_cli.py b/tests/test_cli.py index 85dd00c..cddc27c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -184,7 +184,7 @@ def test_run_success(self, mock_run_pipeline): @patch("plexus.orchestrator.run_pipeline") def test_run_with_warnings(self, mock_run_pipeline): - """Test run that completes with warnings.""" + """Test run that completes with warnings (not errors).""" mock_panel = MagicMock() mock_panel.junctions = [MagicMock()] mock_panel.junctions[0].primer_pairs = [] @@ -194,7 +194,9 @@ def test_run_with_warnings(self, mock_run_pipeline): output_dir=Path("/tmp/output"), config=MagicMock(), steps_completed=["panel_created", "primers_designed"], - errors=["Save candidates failed: test error"], + warnings=[ + "Off-target filter: 'KRAS' — no clean pairs; all least-affected pairs kept" + ], ) mock_run_pipeline.return_value = mock_result @@ -213,7 +215,46 @@ def test_run_with_warnings(self, mock_run_pipeline): ) assert result.exit_code == 0 + # Should show success (no errors) plus warnings section + assert "successfully" in result.output.lower() assert "warnings" in result.output.lower() + assert "Off-target filter" in result.output + finally: + Path(csv_path).unlink() + Path(fasta_path).unlink() + + @patch("plexus.orchestrator.run_pipeline") + def test_run_with_errors(self, mock_run_pipeline): + """Test run that completes with errors.""" + mock_panel = MagicMock() + mock_panel.junctions = [MagicMock()] + mock_panel.junctions[0].primer_pairs = [] + + mock_result = PipelineResult( + panel=mock_panel, + output_dir=Path("/tmp/output"), + config=MagicMock(), + steps_completed=["panel_created", "primers_designed"], + errors=["Save candidates failed: test error"], + ) + mock_run_pipeline.return_value = mock_result + + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as csv_f: + csv_path = csv_f.name + csv_f.write(b"Name,Chrom,Position\n") + + with tempfile.NamedTemporaryFile(suffix=".fa", delete=False) as fa_f: + fasta_path = fa_f.name + fa_f.write(b">chr1\nACGT\n") + + try: + result = runner.invoke( + app, + ["run", "--input", csv_path, "--fasta", fasta_path], + ) + + assert result.exit_code == 0 + assert "errors" in result.output.lower() assert "Save candidates failed" in result.output finally: Path(csv_path).unlink() diff --git a/tests/test_cost.py b/tests/test_cost.py index 8f2bfb6..8d4405c 100644 --- a/tests/test_cost.py +++ b/tests/test_cost.py @@ -44,6 +44,51 @@ def _make_config(**kwargs): return MultiplexPickerParameters(**defaults) +class TestCrossDimerNormalisation: + def test_cross_dimer_normalised_by_interaction_count(self): + """Cross-dimer penalty is divided by C(2n, 2) so it's a per-interaction average.""" + # 2 pairs → 4 primers → C(4,2) = 6 interactions + p1 = _make_pair(pair_penalty=0.0, pair_id="p1") + # Use a distinct sequence for p2 so dimer scores are computed + p2_fwd = _make_primer("fwd2", "forward", seq="GCGCGCGCGCGCGCGCGCGC") + p2_rev = _make_primer("rev2", "reverse", seq="ATATATATATATATATATATAT") + p2 = PrimerPair( + forward=p2_fwd, + reverse=p2_rev, + insert_size=20, + amplicon_sequence="A" * 60, + amplicon_length=60, + pair_penalty=0.0, + pair_id="p2", + ) + p2.snp_penalty = 0.0 + + config = _make_config( + wt_pair_penalty=0.0, + wt_cross_dimer=1.0, + ) + cf = MultiplexCostFunction({"p1": p1, "p2": p2}, config) + + # Get the raw dimer penalty for comparison + raw = cf._calc_cross_dimer_penalty([p1, p2]) + cost = cf.calc_cost(["p1", "p2"]) + + # 4 primers → 6 interactions; cost should be raw / 6 + n_interactions = 6 + assert cost == pytest.approx(raw / n_interactions) + + def test_single_pair_normalises_to_one_interaction(self): + """With 1 pair (2 primers), there's C(2,2)=1 interaction.""" + p1 = _make_pair(pair_penalty=0.0, pair_id="p1") + config = _make_config(wt_pair_penalty=0.0, wt_cross_dimer=1.0) + cf = MultiplexCostFunction({"p1": p1}, config) + + raw = cf._calc_cross_dimer_penalty([p1]) + cost = cf.calc_cost(["p1"]) + # 2 primers → C(2,2) = 1 interaction; cost = raw / 1 + assert cost == pytest.approx(raw / 1) + + class TestSnpPenaltyCostTerm: def test_snp_penalty_used_independently(self): """wt_snp_penalty scales pair.snp_penalty as a separate cost term.""" From 7238981676bdb542ec433ab7e0cada1630ef1974 Mon Sep 17 00:00:00 2001 From: sfilges Date: Wed, 4 Mar 2026 14:09:16 +0100 Subject: [PATCH 8/8] Increased cross-dimer penalty weight --- src/plexus/data/designer_default_config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plexus/data/designer_default_config.json b/src/plexus/data/designer_default_config.json index 36d8ff7..7ebc898 100644 --- a/src/plexus/data/designer_default_config.json +++ b/src/plexus/data/designer_default_config.json @@ -98,7 +98,7 @@ "max_splits": 2, "wt_pair_penalty": 1.0, "wt_off_target": 5.0, - "wt_cross_dimer": 1.0, + "wt_cross_dimer": 20.0, "wt_pair_dimer": 1.0, "wt_snp_penalty": 3.0 }