From 8c108dbab4772969e3ba304d58f57510c2183321 Mon Sep 17 00:00:00 2001
From: sfilges <stefan.filges@pm.me>
Date: Tue, 3 Mar 2026 15:19:08 +0100
Subject: [PATCH 1/3] Further tuning of BLAST parameters

---
 CHANGELOG.md                        | 18 +++++++++
 config/designer_default_config.json | 11 +++--
 config/designer_lenient_config.json |  9 ++++-
 src/plexus/blast/blast_runner.py    | 15 ++++++-
 src/plexus/blast/specificity.py     | 19 ++++++++-
 src/plexus/config.py                | 45 +++++++++++++++++++--
 src/plexus/designer/design.py       |  2 +
 src/plexus/pipeline.py              |  4 ++
 src/plexus/utils/utils.py           | 50 ++++++++++++++++++++++-
 src/plexus/version.py               |  2 +-
 tests/test_blast_runner.py          | 45 +++++++++++++++++++++
 tests/test_blast_specificity.py     | 11 +++++
 tests/test_config.py                |  6 ++-
 tests/test_utils.py                 | 62 ++++++++++++++++++++++++++++-
 14 files changed, 283 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7714c56..378a0d0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,24 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.0.1] - 2026-03-03
+
+### Changed
+
+- **BLAST E-value raised to Primer-BLAST default** (`config.py`): `blast_evalue` 1000 → 30000 to match the Primer-BLAST default and ensure short, gapped primer alignments are not silently dropped.
+- **BLAST word size now explicit** (`config.py`, `specificity.py`, `pipeline.py`): New `blast_word_size` field (default 7) passed to `blastn -word_size`, matching the Primer-BLAST / blastn-short default. Previously relied on the implicit blastn-short default; now configurable.
+- **Tightened poly-X default** (`config.py`): `primer_max_poly_x` default lowered from 5 to 4.
+- **Increased BLAST `max_mismatches` default** (`config.py`): 2 → 3 so that 3-mismatch paralog alignments (e.g. DCAF12L1) are classified as "predicted bound".
+- **BLAST mismatch penalty** (`config.py`): New `blast_penalty` default changed from the blastn-short default of -3 to -1. With reward=1/penalty=-3, a 21bp primer with 3 mismatches scores only 9 (E ≈ 190,000) and is silently dropped by BLAST. With penalty=-1, the same alignment scores 15 (E ≈ 70), making paralog off-targets like DCAF12L1 detectable.
+- Updated both `designer_default_config.json` and `designer_lenient_config.json` to match the new defaults.
+
+### Added
+
+- **G/C-specific poly-run filter** (`utils.py`): New `find_max_poly_gc()` function rejects primers with homopolymer runs of G or C exceeding `primer_max_poly_gc` (default 3). This catches primers like the DCAF12L2 `GGGGCCGGATGTTCTGCTG` forward primer that passed the old poly-X filter. Wired through `check_kmer()`, `generate_kmers()`, `filter_kmers()`, and `design.py`.
+- **BLAST `-evalue` parameter** (`config.py`, `blast_runner.py`, `specificity.py`, `pipeline.py`): New `blast_evalue` field (default 30000.0) passed to `blastn -evalue`, ensuring weak but real off-target alignments are not silently dropped before the annotator can evaluate them.
+- **BLAST `-reward` / `-penalty` parameters** (`config.py`, `blast_runner.py`, `specificity.py`, `pipeline.py`): New `blast_reward` (default 1) and `blast_penalty` (default -1) fields override the blastn-short scoring defaults, improving sensitivity for mismatched paralog alignments.
+- Tests for `find_max_poly_gc`, `check_kmer` poly-GC integration, BLAST evalue/reward/penalty/word_size parameter forwarding, and specificity check threading.
+
 ## [1.0.0] - 2026-03-03
 
 First stable release. All v1.0 roadmap items complete — see `docs/ROADMAP.md` for the full list.
diff --git a/config/designer_default_config.json b/config/designer_default_config.json
index a0e9dda..95e5dba 100644
--- a/config/designer_default_config.json
+++ b/config/designer_default_config.json
@@ -8,7 +8,7 @@
         "primer_min_length": 15,
         "primer_max_length": 30,
         "PRIMER_OPT_BOUND": 98.0,
-        "PRIMER_MIN_BOUND": -10.0,
+        "PRIMER_MIN_BOUND": 35.0,
         "PRIMER_MAX_BOUND": 120.0,
         "junction_padding_bases": 3,
         "forward_tail": "GGACACTCTTTCCCTACACGACGCTCTTCCGATCTAAAAAAAAAAAAAAAAAAAATGGGAAAGAGTGTCC",
@@ -20,7 +20,8 @@
         "primer_min_gc": 30,
         "primer_max_gc": 70,
         "primer_gc_clamp": 1,
-        "primer_max_poly_x": 5,
+        "primer_max_poly_x": 4,
+        "primer_max_poly_gc": 3,
         "primer_max_n": 0,
         "PRIMER_MAX_SELF_ANY_TH": 45.0,
         "PRIMER_MAX_SELF_END_TH": 35.0,
@@ -73,7 +74,11 @@
     "blast_parameters": {
         "length_threshold": 15,
         "evalue_threshold": 10.0,
-        "max_mismatches": 2,
+        "max_mismatches": 3,
+        "blast_evalue": 30000.0,
+        "blast_word_size": 7,
+        "blast_reward": 1,
+        "blast_penalty": -1,
         "max_amplicon_size": 2000,
         "ontarget_tolerance": 5
     },
diff --git a/config/designer_lenient_config.json b/config/designer_lenient_config.json
index 759a34c..352875d 100644
--- a/config/designer_lenient_config.json
+++ b/config/designer_lenient_config.json
@@ -20,7 +20,8 @@
         "primer_min_gc": 30,
         "primer_max_gc": 70,
         "primer_gc_clamp": 0,
-        "primer_max_poly_x": 5,
+        "primer_max_poly_x": 4,
+        "primer_max_poly_gc": 3,
         "primer_max_n": 0,
         "PRIMER_MAX_SELF_ANY_TH": 45.0,
         "PRIMER_MAX_SELF_END_TH": 35.0,
@@ -73,7 +74,11 @@
     "blast_parameters": {
         "length_threshold": 15,
         "evalue_threshold": 10.0,
-        "max_mismatches": 2,
+        "max_mismatches": 3,
+        "blast_evalue": 30000.0,
+        "blast_word_size": 7,
+        "blast_reward": 1,
+        "blast_penalty": -1,
         "max_amplicon_size": 2000,
         "ontarget_tolerance": 5
     },
diff --git a/src/plexus/blast/blast_runner.py b/src/plexus/blast/blast_runner.py
index cf9bb9f..4433c7a 100644
--- a/src/plexus/blast/blast_runner.py
+++ b/src/plexus/blast/blast_runner.py
@@ -91,7 +91,14 @@ def create_database(self):
         return self
 
     def run(
-        self, output_archive, word_size=None, task="blastn-short", num_threads: int = 1
+        self,
+        output_archive,
+        word_size=None,
+        task="blastn-short",
+        num_threads: int = 1,
+        evalue: float | None = None,
+        reward: int | None = None,
+        penalty: int | None = None,
     ):
         """
         Run blast, writing a BLAST archive to `output_archive`.
@@ -120,6 +127,12 @@ def run(
         ]
         if word_size is not None:
             cmd.extend(["-word_size", str(word_size)])
+        if evalue is not None:
+            cmd.extend(["-evalue", str(evalue)])
+        if reward is not None:
+            cmd.extend(["-reward", str(reward)])
+        if penalty is not None:
+            cmd.extend(["-penalty", str(penalty)])
         if num_threads > 1:
             cmd.extend(["-num_threads", str(num_threads)])
 
diff --git a/src/plexus/blast/specificity.py b/src/plexus/blast/specificity.py
index bc1a8b0..4db1f74 100644
--- a/src/plexus/blast/specificity.py
+++ b/src/plexus/blast/specificity.py
@@ -16,9 +16,13 @@ def run_specificity_check(
     *,
     length_threshold: int = 15,
     evalue_threshold: float = 10.0,
-    max_mismatches: int = 2,
+    max_mismatches: int = 3,
     max_amplicon_size: int = 2000,
     ontarget_tolerance: int = 5,
+    blast_evalue: float = 30000.0,
+    blast_word_size: int = 7,
+    blast_reward: int = 1,
+    blast_penalty: int = -1,
 ):
     """
     Run BLAST on all candidate primers in the panel to check for specificity
@@ -36,6 +40,10 @@ def run_specificity_check(
         max_mismatches: Maximum mismatches in a 3'-anchored alignment.
         max_amplicon_size: Maximum amplicon size (bp) to consider.
         ontarget_tolerance: Coordinate tolerance (bp) for on-target classification.
+        blast_evalue: E-value passed to blastn via -evalue to control search sensitivity.
+        blast_word_size: Word size passed to blastn via -word_size.
+        blast_reward: Match reward passed to blastn via -reward.
+        blast_penalty: Mismatch penalty passed to blastn via -penalty.
     """
     logger.info("Starting specificity check (BLAST)...")
     os.makedirs(work_dir, exist_ok=True)
@@ -59,7 +67,14 @@ def run_specificity_check(
     runner = BlastRunner(input_fasta, genome_fasta)
     runner.create_database()
     try:
-        runner.run(output_archive=blast_archive, num_threads=num_threads)
+        runner.run(
+            output_archive=blast_archive,
+            num_threads=num_threads,
+            evalue=blast_evalue,
+            word_size=blast_word_size,
+            reward=blast_reward,
+            penalty=blast_penalty,
+        )
         runner.reformat_output_as_table(blast_table)
     finally:
         if os.path.exists(blast_archive):
diff --git a/src/plexus/config.py b/src/plexus/config.py
index 6af183c..68a26ab 100644
--- a/src/plexus/config.py
+++ b/src/plexus/config.py
@@ -70,7 +70,8 @@ class SingleplexDesignParameters(BaseModel):
     )
 
     # Sequence composition constraints
-    primer_max_poly_x: int = Field(default=5, ge=1, le=10)
+    primer_max_poly_x: int = Field(default=4, ge=1, le=10)
+    primer_max_poly_gc: int = Field(default=3, ge=1, le=10)
     primer_max_n: int = Field(default=0, ge=0, le=5)
 
     # Thermodynamic thresholds (°C)
@@ -272,15 +273,53 @@ class BlastParameters(BaseModel):
         ),
     )
     max_mismatches: int = Field(
-        default=2,
+        default=3,
         ge=0,
         le=5,
         description=(
             "Maximum mismatches allowed in a 3'-anchored alignment for it to be "
-            "classified as 'predicted bound'. A primer with 1-2 mismatches in a "
+            "classified as 'predicted bound'. A primer with 1-3 mismatches in a "
             "15+ bp 3' stretch will still extend in PCR."
         ),
     )
+    blast_evalue: float = Field(
+        default=30000.0,
+        gt=0.0,
+        description=(
+            "E-value passed to blastn via -evalue. Matches the Primer-BLAST "
+            "default (30000) to ensure short, gapped primer alignments are "
+            "not silently dropped by BLAST before plexus can evaluate them."
+        ),
+    )
+    blast_word_size: int = Field(
+        default=7,
+        ge=4,
+        le=28,
+        description=(
+            "Word size passed to blastn via -word_size. Matches the "
+            "Primer-BLAST / blastn-short default of 7, which is appropriate "
+            "for primer-length queries (<30 bp)."
+        ),
+    )
+    blast_reward: int = Field(
+        default=1,
+        ge=1,
+        le=5,
+        description="Match reward passed to blastn via -reward.",
+    )
+    blast_penalty: int = Field(
+        default=-1,
+        ge=-5,
+        le=-1,
+        description=(
+            "Mismatch penalty passed to blastn via -penalty. "
+            "The blastn-short default (-3) causes 3-mismatch paralog alignments "
+            "to score too low (E > 100,000) for BLAST to report them. "
+            "A gentler penalty (-1) keeps these alignments reportable "
+            "(E ~ 70 for a 21bp primer with 3 mismatches) while the annotator's "
+            "3'-anchored filters still reject true noise."
+        ),
+    )
     max_amplicon_size: int = Field(
         default=2000,
         ge=100,
diff --git a/src/plexus/designer/design.py b/src/plexus/designer/design.py
index 34ccfb6..0248243 100644
--- a/src/plexus/designer/design.py
+++ b/src/plexus/designer/design.py
@@ -64,6 +64,7 @@ def design_multiplex_primers(panel: MultiplexPanel) -> MultiplexPanel:
     min_primer_length = singleplex.primer_min_length
     max_primer_length = singleplex.primer_max_length
     max_poly_X = singleplex.primer_max_poly_x
+    max_poly_gc = singleplex.primer_max_poly_gc
     max_N = singleplex.primer_max_n
     min_gc = singleplex.primer_min_gc
     max_gc = singleplex.primer_max_gc
@@ -135,6 +136,7 @@ def design_multiplex_primers(panel: MultiplexPanel) -> MultiplexPanel:
                     min_gc=min_gc,
                     max_gc=max_gc,
                     gc_clamp=gc_clamp,
+                    max_poly_gc=max_poly_gc,
                 )
                 if orientation == "forward":
                     left_kmers = kmers
diff --git a/src/plexus/pipeline.py b/src/plexus/pipeline.py
index e63412e..59ddc59 100644
--- a/src/plexus/pipeline.py
+++ b/src/plexus/pipeline.py
@@ -534,6 +534,10 @@ def run_pipeline(
                     max_mismatches=blast_config.max_mismatches,
                     max_amplicon_size=blast_config.max_amplicon_size,
                     ontarget_tolerance=blast_config.ontarget_tolerance,
+                    blast_evalue=blast_config.blast_evalue,
+                    blast_word_size=blast_config.blast_word_size,
+                    blast_reward=blast_config.blast_reward,
+                    blast_penalty=blast_config.blast_penalty,
                 )
                 result.steps_completed.append("specificity_checked")
                 logger.info("Specificity check complete")
diff --git a/src/plexus/utils/utils.py b/src/plexus/utils/utils.py
index f9ec903..a80c4c1 100644
--- a/src/plexus/utils/utils.py
+++ b/src/plexus/utils/utils.py
@@ -210,6 +210,43 @@ def reverse_complement(dna: str) -> str:
 # ===============================================
 
 
+def find_max_poly_gc(kmer: str, n: int) -> bool:
+    """Check if a DNA sequence has a homopolymer run of G or C exceeding *n*.
+
+    Same logic as :func:`find_max_poly_X` (counts consecutive *identical*
+    bases) but only triggers when the repeated base is G or C.
+
+    Parameters
+    ----------
+    kmer : str
+        The DNA sequence to check.
+    n : int
+        Maximum allowed consecutive identical G/C bases.
+
+    Returns
+    -------
+    bool
+        True if a run of G or C exceeding *n* is found, False otherwise.
+    """
+    if len(kmer) <= n:
+        return False
+
+    kmer = kmer.upper()
+    current_base = kmer[0]
+    count = 1
+
+    for i in range(1, len(kmer)):
+        if kmer[i] == current_base:
+            count += 1
+            if count > n and current_base in "GC":
+                return True
+        else:
+            current_base = kmer[i]
+            count = 1
+
+    return False
+
+
 def generate_kmers(
     target_name: str,
     target_sequence: str,
@@ -222,6 +259,7 @@ def generate_kmers(
     min_gc: int = 30,
     max_gc: int = 70,
     gc_clamp: int = 0,
+    max_poly_gc: int = 3,
 ) -> list:
     """
     Generate k-mers as candidate primers.
@@ -237,6 +275,7 @@ def generate_kmers(
         max_N - Max times N bases can occur anywhere in the kmer
         min_gc - Minimum GC content of kmer
         max_gc - Maximum GC content of kmer
+        max_poly_gc - Max consecutive G or C bases allowed
     """
     from plexus.designer.primer import Primer
 
@@ -279,6 +318,7 @@ def generate_kmers(
                 min_gc=min_gc,
                 max_gc=max_gc,
                 gc_clamp=gc_clamp,
+                max_poly_gc=max_poly_gc,
             ):
                 kmer_filtered_counter += 1
 
@@ -331,23 +371,25 @@ def check_kmer(
     min_gc: int = 30,
     max_gc: int = 70,
     gc_clamp: int = 0,
+    max_poly_gc: int = 3,
 ) -> bool:
     """
     Filter kmers (putative primers) based on GC-content, number of 'N' bases,
-    and number of repeated bases (polyX).
+    number of repeated bases (polyX), and consecutive G/C runs.
     """
     kmer_gc = gc_content(kmer)
     passed = (
         min_gc <= kmer_gc <= max_gc  # Should be True
         and not check_N_in_kmers(kmer, max_N)  # Should be False
         and not find_max_poly_X(kmer, max_poly_X)  # Should be False
+        and not find_max_poly_gc(kmer, max_poly_gc)  # Should be False
     )
     if passed and gc_clamp > 0 and not check_gc_clamp(kmer):
         return False
     return passed
 
 
-def filter_kmers(kmers, max_poly_X=4, max_N=0):
+def filter_kmers(kmers, max_poly_X=4, max_N=0, max_poly_gc=3):
     """
     Filter kmers (putative primers).
     """
@@ -366,6 +408,10 @@ def filter_kmers(kmers, max_poly_X=4, max_N=0):
         if find_max_poly_X(kmer, max_poly_X):
             continue
 
+        # Exclude kmers with too many consecutive G/C bases.
+        if find_max_poly_gc(kmer, max_poly_gc):
+            continue
+
         filtered_kmers.append(kmer)
 
     return filtered_kmers
diff --git a/src/plexus/version.py b/src/plexus/version.py
index 5becc17..5c4105c 100644
--- a/src/plexus/version.py
+++ b/src/plexus/version.py
@@ -1 +1 @@
-__version__ = "1.0.0"
+__version__ = "1.0.1"
diff --git a/tests/test_blast_runner.py b/tests/test_blast_runner.py
index 6cd3755..cc77ac6 100644
--- a/tests/test_blast_runner.py
+++ b/tests/test_blast_runner.py
@@ -151,6 +151,51 @@ def test_run_omits_num_threads_when_1(runner, tmp_path):
         assert "-num_threads" not in cmd
 
 
+def test_run_includes_evalue_when_provided(runner, tmp_path):
+    """evalue parameter is included in the blastn command when set."""
+    output_archive = str(tmp_path / "out.asn")
+    runner.db_path = "/fake/db"
+    with patch("subprocess.run") as mock_run:
+        runner.run(output_archive, evalue=1000.0)
+        cmd = mock_run.call_args[0][0]
+        assert "-evalue" in cmd
+        assert "1000.0" in cmd
+
+
+def test_run_omits_evalue_when_none(runner, tmp_path):
+    """evalue parameter is omitted from the blastn command when None."""
+    output_archive = str(tmp_path / "out.asn")
+    runner.db_path = "/fake/db"
+    with patch("subprocess.run") as mock_run:
+        runner.run(output_archive, evalue=None)
+        cmd = mock_run.call_args[0][0]
+        assert "-evalue" not in cmd
+
+
+def test_run_includes_reward_penalty_when_provided(runner, tmp_path):
+    """reward and penalty parameters are included in the blastn command."""
+    output_archive = str(tmp_path / "out.asn")
+    runner.db_path = "/fake/db"
+    with patch("subprocess.run") as mock_run:
+        runner.run(output_archive, reward=1, penalty=-1)
+        cmd = mock_run.call_args[0][0]
+        assert "-reward" in cmd
+        assert "1" in cmd
+        assert "-penalty" in cmd
+        assert "-1" in cmd
+
+
+def test_run_omits_reward_penalty_when_none(runner, tmp_path):
+    """reward and penalty are omitted from the blastn command when None."""
+    output_archive = str(tmp_path / "out.asn")
+    runner.db_path = "/fake/db"
+    with patch("subprocess.run") as mock_run:
+        runner.run(output_archive, reward=None, penalty=None)
+        cmd = mock_run.call_args[0][0]
+        assert "-reward" not in cmd
+        assert "-penalty" not in cmd
+
+
 def test_get_dataframe(runner, tmp_path):
     output_table = tmp_path / "output.txt"
     runner.output_table = str(output_table)
diff --git a/tests/test_blast_specificity.py b/tests/test_blast_specificity.py
index 55ccfe4..87ef6f7 100644
--- a/tests/test_blast_specificity.py
+++ b/tests/test_blast_specificity.py
@@ -178,6 +178,10 @@ def test_run_specificity_check_forwards_blast_parameters(mock_panel, tmp_path):
             evalue_threshold=5.0,
             max_mismatches=1,
             max_amplicon_size=5000,
+            blast_evalue=500.0,
+            blast_word_size=11,
+            blast_reward=2,
+            blast_penalty=-3,
         )
 
         # Verify annotator received custom thresholds
@@ -188,6 +192,13 @@ def test_run_specificity_check_forwards_blast_parameters(mock_panel, tmp_path):
         # Verify finder received custom max amplicon size
         finder_instance.find_amplicons.assert_called_once_with(max_size_bp=5000)
 
+        # Verify blast parameters were forwarded to runner.run()
+        _, run_kwargs = runner_instance.run.call_args
+        assert run_kwargs.get("evalue") == 500.0
+        assert run_kwargs.get("word_size") == 11
+        assert run_kwargs.get("reward") == 2
+        assert run_kwargs.get("penalty") == -3
+
 
 def test_run_specificity_check_no_hits(mock_panel, tmp_path):
     with (
diff --git a/tests/test_config.py b/tests/test_config.py
index b080d21..4d6c5b6 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -191,7 +191,11 @@ def test_default_values(self):
         params = BlastParameters()
         assert params.length_threshold == 15
         assert params.evalue_threshold == 10.0
-        assert params.max_mismatches == 2
+        assert params.max_mismatches == 3
+        assert params.blast_evalue == 30000.0
+        assert params.blast_word_size == 7
+        assert params.blast_reward == 1
+        assert params.blast_penalty == -1
         assert params.max_amplicon_size == 2000
         assert params.ontarget_tolerance == 5
 
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 425b77e..4e83614 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -2,7 +2,7 @@
 # Tests for utility functions (GC clamp, check_kmer, etc.)
 # ================================================================================
 
-from plexus.utils.utils import check_gc_clamp, check_kmer
+from plexus.utils.utils import check_gc_clamp, check_kmer, find_max_poly_gc
 
 
 class TestCheckGcClamp:
@@ -62,3 +62,63 @@ def test_gc_clamp_enabled_accepts_good_3prime(self):
         """gc_clamp=1 accepts primers with 1-3 G/C at 3' end."""
         seq = "GCGCGATATAATATC"  # last 5 = ATATC, GC=1
         assert check_kmer(seq, gc_clamp=1) is True
+
+
+class TestFindMaxPolyGc:
+    """Tests for the G/C-specific poly-run filter."""
+
+    def test_four_g_rejected(self):
+        """GGGG (4 > 3) should be rejected."""
+        assert find_max_poly_gc("ATGGGGAT", 3) is True
+
+    def test_four_c_rejected(self):
+        """CCCC (4 > 3) should be rejected."""
+        assert find_max_poly_gc("ATCCCCAT", 3) is True
+
+    def test_three_g_passes(self):
+        """GGG (3 <= 3) should pass."""
+        assert find_max_poly_gc("ATGGGATC", 3) is False
+
+    def test_three_c_passes(self):
+        """CCC (3 <= 3) should pass."""
+        assert find_max_poly_gc("ATCCCATG", 3) is False
+
+    def test_four_a_passes(self):
+        """AAAA is not G/C, should pass."""
+        assert find_max_poly_gc("GCAAAAGC", 3) is False
+
+    def test_four_t_passes(self):
+        """TTTT is not G/C, should pass."""
+        assert find_max_poly_gc("GCTTTTGC", 3) is False
+
+    def test_mixed_gc_passes(self):
+        """GGCC has G-run=2 and C-run=2, both <= 3."""
+        assert find_max_poly_gc("ATGGCCAT", 3) is False
+
+    def test_short_sequence_passes(self):
+        """Sequence shorter than threshold always passes."""
+        assert find_max_poly_gc("GGG", 3) is False
+
+    def test_lowercase_handled(self):
+        """Lowercase G/C should be detected."""
+        assert find_max_poly_gc("atggggatc", 3) is True
+
+
+class TestCheckKmerPolyGc:
+    """Test max_poly_gc integration in check_kmer."""
+
+    def test_gggg_rejected_by_default(self):
+        """A primer with GGGG is rejected by the default max_poly_gc=3."""
+        # GGGGCCGGATGTTCTGCTG — the real DCAF12L2 primer
+        seq = "GGGGCCGGATGTTCTGCTG"
+        assert check_kmer(seq, max_poly_X=5, max_poly_gc=3) is False
+
+    def test_ggg_passes(self):
+        """A primer with GGG passes max_poly_gc=3."""
+        seq = "GGGACCGGATGTTCTGCTG"
+        assert check_kmer(seq, max_poly_X=5, max_poly_gc=3) is True
+
+    def test_poly_gc_can_be_relaxed(self):
+        """Setting max_poly_gc=4 allows GGGG."""
+        seq = "GGGGCCGGATGTTCTGCTG"
+        assert check_kmer(seq, max_poly_X=5, max_poly_gc=4) is True

From df2e5df168f4aa45aa490a380601436183df17ee Mon Sep 17 00:00:00 2001
From: sfilges <stefan.filges@pm.me>
Date: Tue, 3 Mar 2026 16:17:22 +0100
Subject: [PATCH 2/3] Added additional blast parameters for better performance

---
 CHANGELOG.md                        |  1 +
 config/designer_default_config.json |  6 +++--
 config/designer_lenient_config.json |  2 ++
 src/plexus/blast/blast_runner.py    |  6 +++++
 src/plexus/blast/specificity.py     |  4 +++
 src/plexus/config.py                | 20 ++++++++++++++
 src/plexus/pipeline.py              |  2 ++
 tests/test_blast_runner.py          | 42 +++++++++++++++++++++++++++++
 tests/test_blast_specificity.py     |  4 +++
 tests/test_config.py                |  2 ++
 10 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 378a0d0..2828667 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **G/C-specific poly-run filter** (`utils.py`): New `find_max_poly_gc()` function rejects primers with homopolymer runs of G or C exceeding `primer_max_poly_gc` (default 3). This catches primers like the DCAF12L2 `GGGGCCGGATGTTCTGCTG` forward primer that passed the old poly-X filter. Wired through `check_kmer()`, `generate_kmers()`, `filter_kmers()`, and `design.py`.
 - **BLAST `-evalue` parameter** (`config.py`, `blast_runner.py`, `specificity.py`, `pipeline.py`): New `blast_evalue` field (default 30000.0) passed to `blastn -evalue`, ensuring weak but real off-target alignments are not silently dropped before the annotator can evaluate them.
 - **BLAST `-reward` / `-penalty` parameters** (`config.py`, `blast_runner.py`, `specificity.py`, `pipeline.py`): New `blast_reward` (default 1) and `blast_penalty` (default -1) fields override the blastn-short scoring defaults, improving sensitivity for mismatched paralog alignments.
+- **BLAST `-max_hsps` and `-dust` flags** (`config.py`, `blast_runner.py`, `specificity.py`, `pipeline.py`): New `blast_max_hsps` (default 100) limits HSPs per query-subject pair, and `blast_dust` (default `"yes"`) enables low-complexity masking. Together these cut BLAST runtime significantly after the evalue/penalty sensitivity increase, without affecting off-target detection (the annotator already discards low-quality HSPs).
 - Tests for `find_max_poly_gc`, `check_kmer` poly-GC integration, BLAST evalue/reward/penalty/word_size parameter forwarding, and specificity check threading.
 
 ## [1.0.0] - 2026-03-03
diff --git a/config/designer_default_config.json b/config/designer_default_config.json
index 95e5dba..1b4ae7c 100644
--- a/config/designer_default_config.json
+++ b/config/designer_default_config.json
@@ -8,7 +8,7 @@
         "primer_min_length": 15,
         "primer_max_length": 30,
         "PRIMER_OPT_BOUND": 98.0,
-        "PRIMER_MIN_BOUND": 35.0,
+        "PRIMER_MIN_BOUND": 30.0,
         "PRIMER_MAX_BOUND": 120.0,
         "junction_padding_bases": 3,
         "forward_tail": "GGACACTCTTTCCCTACACGACGCTCTTCCGATCTAAAAAAAAAAAAAAAAAAAATGGGAAAGAGTGTCC",
@@ -20,7 +20,7 @@
         "primer_min_gc": 30,
         "primer_max_gc": 70,
         "primer_gc_clamp": 1,
-        "primer_max_poly_x": 4,
+        "primer_max_poly_x": 5,
         "primer_max_poly_gc": 3,
         "primer_max_n": 0,
         "PRIMER_MAX_SELF_ANY_TH": 45.0,
@@ -79,6 +79,8 @@
         "blast_word_size": 7,
         "blast_reward": 1,
         "blast_penalty": -1,
+        "blast_max_hsps": 100,
+        "blast_dust": "yes",
         "max_amplicon_size": 2000,
         "ontarget_tolerance": 5
     },
diff --git a/config/designer_lenient_config.json b/config/designer_lenient_config.json
index 352875d..02d7b2e 100644
--- a/config/designer_lenient_config.json
+++ b/config/designer_lenient_config.json
@@ -79,6 +79,8 @@
         "blast_word_size": 7,
         "blast_reward": 1,
         "blast_penalty": -1,
+        "blast_max_hsps": 100,
+        "blast_dust": "yes",
         "max_amplicon_size": 2000,
         "ontarget_tolerance": 5
     },
diff --git a/src/plexus/blast/blast_runner.py b/src/plexus/blast/blast_runner.py
index 4433c7a..34641cc 100644
--- a/src/plexus/blast/blast_runner.py
+++ b/src/plexus/blast/blast_runner.py
@@ -99,6 +99,8 @@ def run(
         evalue: float | None = None,
         reward: int | None = None,
         penalty: int | None = None,
+        max_hsps: int | None = None,
+        dust: str | None = None,
     ):
         """
         Run blast, writing a BLAST archive to `output_archive`.
@@ -133,6 +135,10 @@ def run(
             cmd.extend(["-reward", str(reward)])
         if penalty is not None:
             cmd.extend(["-penalty", str(penalty)])
+        if max_hsps is not None:
+            cmd.extend(["-max_hsps", str(max_hsps)])
+        if dust is not None:
+            cmd.extend(["-dust", dust])
         if num_threads > 1:
             cmd.extend(["-num_threads", str(num_threads)])
 
diff --git a/src/plexus/blast/specificity.py b/src/plexus/blast/specificity.py
index 4db1f74..295bfda 100644
--- a/src/plexus/blast/specificity.py
+++ b/src/plexus/blast/specificity.py
@@ -23,6 +23,8 @@ def run_specificity_check(
     blast_word_size: int = 7,
     blast_reward: int = 1,
     blast_penalty: int = -1,
+    blast_max_hsps: int = 100,
+    blast_dust: str = "yes",
 ):
     """
     Run BLAST on all candidate primers in the panel to check for specificity
@@ -74,6 +76,8 @@ def run_specificity_check(
             word_size=blast_word_size,
             reward=blast_reward,
             penalty=blast_penalty,
+            max_hsps=blast_max_hsps,
+            dust=blast_dust,
         )
         runner.reformat_output_as_table(blast_table)
     finally:
diff --git a/src/plexus/config.py b/src/plexus/config.py
index 68a26ab..04bbf2f 100644
--- a/src/plexus/config.py
+++ b/src/plexus/config.py
@@ -320,6 +320,26 @@ class BlastParameters(BaseModel):
             "3'-anchored filters still reject true noise."
         ),
     )
+    blast_max_hsps: int = Field(
+        default=100,
+        ge=1,
+        le=10000,
+        description=(
+            "Maximum HSPs per query-subject pair passed to blastn via -max_hsps. "
+            "Each chromosome is one subject; 100 is generous for real binding sites "
+            "and avoids wasting time on thousands of low-quality HSPs."
+        ),
+    )
+    blast_dust: str = Field(
+        default="yes",
+        pattern=r"^(yes|no|\d+ \d+ \d+)$",
+        description=(
+            "Low-complexity filter passed to blastn via -dust. "
+            "blastn-short defaults to 'no'; enabling it ('yes') avoids extending "
+            "seeds in Alu/LINE regions, improving runtime without affecting "
+            "off-target detection."
+        ),
+    )
     max_amplicon_size: int = Field(
         default=2000,
         ge=100,
diff --git a/src/plexus/pipeline.py b/src/plexus/pipeline.py
index 59ddc59..25ab685 100644
--- a/src/plexus/pipeline.py
+++ b/src/plexus/pipeline.py
@@ -538,6 +538,8 @@ def run_pipeline(
                     blast_word_size=blast_config.blast_word_size,
                     blast_reward=blast_config.blast_reward,
                     blast_penalty=blast_config.blast_penalty,
+                    blast_max_hsps=blast_config.blast_max_hsps,
+                    blast_dust=blast_config.blast_dust,
                 )
                 result.steps_completed.append("specificity_checked")
                 logger.info("Specificity check complete")
diff --git a/tests/test_blast_runner.py b/tests/test_blast_runner.py
index cc77ac6..0d84bbd 100644
--- a/tests/test_blast_runner.py
+++ b/tests/test_blast_runner.py
@@ -196,6 +196,48 @@ def test_run_omits_reward_penalty_when_none(runner, tmp_path):
         assert "-penalty" not in cmd
 
 
+def test_run_includes_max_hsps_when_provided(runner, tmp_path):
+    """max_hsps parameter is included in the blastn command when set."""
+    output_archive = str(tmp_path / "out.asn")
+    runner.db_path = "/fake/db"
+    with patch("subprocess.run") as mock_run:
+        runner.run(output_archive, max_hsps=100)
+        cmd = mock_run.call_args[0][0]
+        assert "-max_hsps" in cmd
+        assert "100" in cmd
+
+
+def test_run_omits_max_hsps_when_none(runner, tmp_path):
+    """max_hsps parameter is omitted from the blastn command when None."""
+    output_archive = str(tmp_path / "out.asn")
+    runner.db_path = "/fake/db"
+    with patch("subprocess.run") as mock_run:
+        runner.run(output_archive, max_hsps=None)
+        cmd = mock_run.call_args[0][0]
+        assert "-max_hsps" not in cmd
+
+
+def test_run_includes_dust_when_provided(runner, tmp_path):
+    """dust parameter is included in the blastn command when set."""
+    output_archive = str(tmp_path / "out.asn")
+    runner.db_path = "/fake/db"
+    with patch("subprocess.run") as mock_run:
+        runner.run(output_archive, dust="yes")
+        cmd = mock_run.call_args[0][0]
+        assert "-dust" in cmd
+        assert "yes" in cmd
+
+
+def test_run_omits_dust_when_none(runner, tmp_path):
+    """dust parameter is omitted from the blastn command when None."""
+    output_archive = str(tmp_path / "out.asn")
+    runner.db_path = "/fake/db"
+    with patch("subprocess.run") as mock_run:
+        runner.run(output_archive, dust=None)
+        cmd = mock_run.call_args[0][0]
+        assert "-dust" not in cmd
+
+
 def test_get_dataframe(runner, tmp_path):
     output_table = tmp_path / "output.txt"
     runner.output_table = str(output_table)
diff --git a/tests/test_blast_specificity.py b/tests/test_blast_specificity.py
index 87ef6f7..c3cf862 100644
--- a/tests/test_blast_specificity.py
+++ b/tests/test_blast_specificity.py
@@ -182,6 +182,8 @@ def test_run_specificity_check_forwards_blast_parameters(mock_panel, tmp_path):
             blast_word_size=11,
             blast_reward=2,
             blast_penalty=-3,
+            blast_max_hsps=50,
+            blast_dust="no",
         )
 
         # Verify annotator received custom thresholds
@@ -198,6 +200,8 @@ def test_run_specificity_check_forwards_blast_parameters(mock_panel, tmp_path):
         assert run_kwargs.get("word_size") == 11
         assert run_kwargs.get("reward") == 2
         assert run_kwargs.get("penalty") == -3
+        assert run_kwargs.get("max_hsps") == 50
+        assert run_kwargs.get("dust") == "no"
 
 
 def test_run_specificity_check_no_hits(mock_panel, tmp_path):
diff --git a/tests/test_config.py b/tests/test_config.py
index 4d6c5b6..f5d1949 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -196,6 +196,8 @@ def test_default_values(self):
         assert params.blast_word_size == 7
         assert params.blast_reward == 1
         assert params.blast_penalty == -1
+        assert params.blast_max_hsps == 100
+        assert params.blast_dust == "yes"
         assert params.max_amplicon_size == 2000
         assert params.ontarget_tolerance == 5
 

From a2b9c7fad72048bb45c31b18ecdb4f2c3198b3b0 Mon Sep 17 00:00:00 2001
From: sfilges <stefan.filges@pm.me>
Date: Tue, 3 Mar 2026 17:02:50 +0100
Subject: [PATCH 3/3] Further tuning to the BLAST off-target detection

---
 CHANGELOG.md                        |  2 +
 config/designer_default_config.json |  1 +
 config/designer_lenient_config.json |  1 +
 docs/ROADMAP.md                     |  1 +
 src/plexus/blast/annotator.py       | 21 ++++--
 src/plexus/blast/blast_runner.py    | 44 +++++++++----
 src/plexus/blast/specificity.py     | 32 ++++------
 src/plexus/config.py                | 13 ++++
 src/plexus/pipeline.py              |  1 +
 tests/test_blast_annotator.py       | 48 ++++++++++++++
 tests/test_blast_runner.py          | 99 +++++++++++++++++++++--------
 tests/test_blast_specificity.py     | 31 +++++----
 12 files changed, 220 insertions(+), 74 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2828667..e10a55f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **BLAST `-evalue` parameter** (`config.py`, `blast_runner.py`, `specificity.py`, `pipeline.py`): New `blast_evalue` field (default 30000.0) passed to `blastn -evalue`, ensuring weak but real off-target alignments are not silently dropped before the annotator can evaluate them.
 - **BLAST `-reward` / `-penalty` parameters** (`config.py`, `blast_runner.py`, `specificity.py`, `pipeline.py`): New `blast_reward` (default 1) and `blast_penalty` (default -1) fields override the blastn-short scoring defaults, improving sensitivity for mismatched paralog alignments.
 - **BLAST `-max_hsps` and `-dust` flags** (`config.py`, `blast_runner.py`, `specificity.py`, `pipeline.py`): New `blast_max_hsps` (default 100) limits HSPs per query-subject pair, and `blast_dust` (default `"yes"`) enables low-complexity masking. Together these cut BLAST runtime significantly after the evalue/penalty sensitivity increase, without affecting off-target detection (the annotator already discards low-quality HSPs).
+- **Direct tabular BLAST output** (`blast_runner.py`, `specificity.py`): `BlastRunner.run()` now accepts `output_table=` to write tabular results (`-outfmt 6`) directly, eliminating the intermediate archive (`-outfmt 11`) write/read and the `blast_formatter` subprocess. The specificity check uses this new path by default, removing the `try/finally` archive cleanup. The legacy `output_archive=` parameter is deprecated but still functional.
+- **3'-end tolerance for BLAST annotation** (`annotator.py`, `config.py`, `specificity.py`, `pipeline.py`): New `three_prime_tolerance` parameter (default 3) relaxes the `from_3prime` check from `qend == qlen` to `qlen - qend <= tolerance`. BLAST's local alignment clips terminal mismatches, causing hits like the DCAF12L1 reverse primer (19/21bp aligned, 2bp clipped at 3' end) to be wrongly discarded as "not 3'-anchored" even though Primer-BLAST detects them via semi-global alignment.
 - Tests for `find_max_poly_gc`, `check_kmer` poly-GC integration, BLAST evalue/reward/penalty/word_size parameter forwarding, and specificity check threading.
 
 ## [1.0.0] - 2026-03-03
diff --git a/config/designer_default_config.json b/config/designer_default_config.json
index 1b4ae7c..9fb5a25 100644
--- a/config/designer_default_config.json
+++ b/config/designer_default_config.json
@@ -75,6 +75,7 @@
         "length_threshold": 15,
         "evalue_threshold": 10.0,
         "max_mismatches": 3,
+        "three_prime_tolerance": 3,
         "blast_evalue": 30000.0,
         "blast_word_size": 7,
         "blast_reward": 1,
diff --git a/config/designer_lenient_config.json b/config/designer_lenient_config.json
index 02d7b2e..037e371 100644
--- a/config/designer_lenient_config.json
+++ b/config/designer_lenient_config.json
@@ -75,6 +75,7 @@
         "length_threshold": 15,
         "evalue_threshold": 10.0,
         "max_mismatches": 3,
+        "three_prime_tolerance": 3,
         "blast_evalue": 30000.0,
         "blast_word_size": 7,
         "blast_reward": 1,
diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md
index 77df852..148ac86 100644
--- a/docs/ROADMAP.md
+++ b/docs/ROADMAP.md
@@ -639,6 +639,7 @@ Add a `max_candidates_per_junction` parameter to `MultiplexPickerParameters` (de
 **File: `src/plexus/resources.py`**
 
 Add presets for:
+
 - `hg19` / `GRCh37` (legacy clinical datasets)
 - `mm39` (mouse, common model organism)
 - `GRCh38_no_alt` (analysis-set FASTA without alt contigs, preferred for clinical pipelines)
diff --git a/src/plexus/blast/annotator.py b/src/plexus/blast/annotator.py
index ed83742..562872f 100644
--- a/src/plexus/blast/annotator.py
+++ b/src/plexus/blast/annotator.py
@@ -20,14 +20,27 @@ def __init__(self, blast_df, target_map=None):
         self.target_map = target_map or {}
 
     def build_annotation_dict(
-        self, length_threshold=12, evalue_threshold=4, max_mismatches=2
+        self,
+        length_threshold=12,
+        evalue_threshold=4,
+        max_mismatches=2,
+        three_prime_tolerance=0,
     ):
         """
-        Build a dictionary specifying conditions for annotation
-
+        Build a dictionary specifying conditions for annotation.
+
+        Parameters
+        ----------
+        three_prime_tolerance : int
+            Maximum number of unaligned bases at the 3' end of the primer
+            for a hit to still be considered "3'-anchored".  BLAST's local
+            alignment clips terminal mismatches, so a primer with a mismatch
+            at or near the 3' end will have ``qend < qlen``.  A tolerance
+            of 2-3 catches these cases without accepting 5'-only hits.
         """
         self.annotations = {
-            "from_3prime": lambda row: row["qend"] == row["qlen"],
+            "from_3prime": lambda row: (row["qlen"] - row["qend"])
+            <= three_prime_tolerance,
             "length_pass_3prime": lambda row: (
                 row["length"] >= length_threshold
                 and row["mismatch"] <= max_mismatches
diff --git a/src/plexus/blast/blast_runner.py b/src/plexus/blast/blast_runner.py
index 34641cc..ccbba9b 100644
--- a/src/plexus/blast/blast_runner.py
+++ b/src/plexus/blast/blast_runner.py
@@ -1,4 +1,5 @@
 import os
+import warnings
 
 import pandas as pd
 from loguru import logger
@@ -30,8 +31,7 @@ def __init__(self, input_fasta, reference_fasta):
         """
         Interface for running BLAST:
         - Creates BLAST database
-        - Runs BLAST in archive mode
-        - Reformats to a table
+        - Runs BLAST (direct tabular output or legacy archive mode)
         - Converts table to a pandas data frame
 
         params
@@ -92,7 +92,9 @@ def create_database(self):
 
     def run(
         self,
-        output_archive,
+        output_archive=None,
+        *,
+        output_table=None,
         word_size=None,
         task="blastn-short",
         num_threads: int = 1,
@@ -103,16 +105,34 @@ def run(
         dust: str | None = None,
     ):
         """
-        Run blast, writing a BLAST archive to `output_archive`.
+        Run blastn and write results.
 
         Uses ``-task blastn-short`` by default, which is tuned for
         primer-length queries (<30 bp) with word_size=7, reward 1,
         penalty −3, and gap costs 5/2.
 
-        Note that we run with output format 11 ``-outfmt 11`` to
-        produce the archive; from this format you can convert to
-        all other formats.
+        Parameters
+        ----------
+        output_table : str
+            Path for direct tabular output (``-outfmt 6``).  Preferred.
+        output_archive : str
+            *Deprecated.*  Path for a BLAST archive (``-outfmt 11``).
+            Use ``output_table`` instead to skip the archive/reformat step.
         """
+        if output_table is not None:
+            outfmt = f"6 {self.BLAST_COLS}"
+            out_path = output_table
+        elif output_archive is not None:
+            warnings.warn(
+                "output_archive is deprecated; use output_table for direct "
+                "tabular output and avoid the archive/reformat step.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            outfmt = "11"
+            out_path = output_archive
+        else:
+            raise ValueError("Either output_table or output_archive must be provided.")
 
         cmd = [
             "blastn",
@@ -123,9 +143,9 @@ def run(
             "-task",
             task,
             "-outfmt",
-            "11",
+            outfmt,
             "-out",
-            output_archive,
+            out_path,
         ]
         if word_size is not None:
             cmd.extend(["-word_size", str(word_size)])
@@ -144,8 +164,10 @@ def run(
 
         run_command(cmd, check=True, retries=2)
 
-        # Save as instance variable, for reformattings
-        self.output_archive = output_archive
+        if output_table is not None:
+            self.output_table = output_table
+        else:
+            self.output_archive = output_archive
 
         return self
 
diff --git a/src/plexus/blast/specificity.py b/src/plexus/blast/specificity.py
index 295bfda..ccf4f2b 100644
--- a/src/plexus/blast/specificity.py
+++ b/src/plexus/blast/specificity.py
@@ -17,6 +17,7 @@ def run_specificity_check(
     length_threshold: int = 15,
     evalue_threshold: float = 10.0,
     max_mismatches: int = 3,
+    three_prime_tolerance: int = 3,
     max_amplicon_size: int = 2000,
     ontarget_tolerance: int = 5,
     blast_evalue: float = 30000.0,
@@ -40,6 +41,8 @@ def run_specificity_check(
         length_threshold: Minimum 3'-anchored alignment length (bp) to predict binding.
         evalue_threshold: E-value cutoff for predicted binding.
         max_mismatches: Maximum mismatches in a 3'-anchored alignment.
+        three_prime_tolerance: Max unaligned bases at the 3' end for a hit to be
+            considered 3'-anchored.  Compensates for BLAST clipping terminal mismatches.
         max_amplicon_size: Maximum amplicon size (bp) to consider.
         ontarget_tolerance: Coordinate tolerance (bp) for on-target classification.
         blast_evalue: E-value passed to blastn via -evalue to control search sensitivity.
@@ -63,28 +66,20 @@ def run_specificity_check(
     panel.save_candidate_primers_to_fasta(input_fasta)
 
     # 3. Run BLAST
-    blast_archive = os.path.join(work_dir, "blast_archive")
     blast_table = os.path.join(work_dir, "blast_table.txt")
 
     runner = BlastRunner(input_fasta, genome_fasta)
     runner.create_database()
-    try:
-        runner.run(
-            output_archive=blast_archive,
-            num_threads=num_threads,
-            evalue=blast_evalue,
-            word_size=blast_word_size,
-            reward=blast_reward,
-            penalty=blast_penalty,
-            max_hsps=blast_max_hsps,
-            dust=blast_dust,
-        )
-        runner.reformat_output_as_table(blast_table)
-    finally:
-        if os.path.exists(blast_archive):
-            os.remove(blast_archive)
-            logger.debug(f"Removed BLAST archive: {blast_archive}")
-
+    runner.run(
+        output_table=blast_table,
+        num_threads=num_threads,
+        evalue=blast_evalue,
+        word_size=blast_word_size,
+        reward=blast_reward,
+        penalty=blast_penalty,
+        max_hsps=blast_max_hsps,
+        dust=blast_dust,
+    )
     blast_df = runner.get_dataframe()
 
     if blast_df.empty:
@@ -98,6 +93,7 @@ def run_specificity_check(
         length_threshold=length_threshold,
         evalue_threshold=evalue_threshold,
         max_mismatches=max_mismatches,
+        three_prime_tolerance=three_prime_tolerance,
     )
     annotator.add_annotations()
 
diff --git a/src/plexus/config.py b/src/plexus/config.py
index 04bbf2f..d117038 100644
--- a/src/plexus/config.py
+++ b/src/plexus/config.py
@@ -282,6 +282,19 @@ class BlastParameters(BaseModel):
             "15+ bp 3' stretch will still extend in PCR."
         ),
     )
+    three_prime_tolerance: int = Field(
+        default=3,
+        ge=0,
+        le=5,
+        description=(
+            "Maximum number of unaligned bases at the 3' end of the primer for "
+            "a BLAST hit to still be considered '3'-anchored'. BLAST's local "
+            "alignment clips terminal mismatches, so a primer with mismatches "
+            "at or near the 3' end will have qend < qlen. A tolerance of 3 "
+            "catches these real binding events (e.g. DCAF12L1 paralogs) that "
+            "Primer-BLAST detects via its semi-global alignment."
+        ),
+    )
     blast_evalue: float = Field(
         default=30000.0,
         gt=0.0,
diff --git a/src/plexus/pipeline.py b/src/plexus/pipeline.py
index 25ab685..eb8cbb5 100644
--- a/src/plexus/pipeline.py
+++ b/src/plexus/pipeline.py
@@ -532,6 +532,7 @@ def run_pipeline(
                     length_threshold=blast_config.length_threshold,
                     evalue_threshold=blast_config.evalue_threshold,
                     max_mismatches=blast_config.max_mismatches,
+                    three_prime_tolerance=blast_config.three_prime_tolerance,
                     max_amplicon_size=blast_config.max_amplicon_size,
                     ontarget_tolerance=blast_config.ontarget_tolerance,
                     blast_evalue=blast_config.blast_evalue,
diff --git a/tests/test_blast_annotator.py b/tests/test_blast_annotator.py
index e32aac6..f0c1a2f 100644
--- a/tests/test_blast_annotator.py
+++ b/tests/test_blast_annotator.py
@@ -154,3 +154,51 @@ def test_length_pass_rejects_high_mismatch():
     # 2 mismatches: from_3prime=True and length_pass_3prime=True
     assert bool(df.iloc[1]["from_3prime"]) is True
     assert bool(df.iloc[1]["length_pass_3prime"]) is True
+
+
+def test_three_prime_tolerance_catches_clipped_terminal_mismatches():
+    """
+    BLAST's local alignment clips terminal mismatches, so a primer with a
+    mismatch at or near the 3' end will have qend < qlen.  With
+    three_prime_tolerance > 0, these hits should be classified as from_3prime.
+
+    This is the DCAF12L1 scenario: a 21bp reverse primer aligns 19bp
+    (qend=19, qlen=21) because BLAST clips 2 terminal mismatched bases.
+    """
+    cols = "qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore sstrand qlen".split()
+    data = [
+        # Exact 3' match: qend == qlen
+        ["SEQ_0", "chr1", 100.0, 21, 0, 0, 1, 21, 100, 120, 0.4, 35.0, "minus", 21],
+        # 2bp clipped at 3' end: qend=19, qlen=21 (the DCAF12L1 hit)
+        ["SEQ_0", "chr1", 89.5, 19, 2, 0, 1, 19, 200, 218, 298.0, 25.4, "minus", 21],
+        # 5bp clipped at 3' end: qend=16, qlen=21 — too far from 3' end
+        ["SEQ_0", "chr1", 87.5, 16, 2, 0, 1, 16, 300, 315, 900.0, 22.0, "minus", 21],
+    ]
+    df = pd.DataFrame(data, columns=cols)
+
+    # tolerance=0 (old behaviour): only exact 3' match passes
+    annotator0 = BlastResultsAnnotator(df.copy())
+    annotator0.build_annotation_dict(
+        length_threshold=15,
+        evalue_threshold=10.0,
+        max_mismatches=3,
+        three_prime_tolerance=0,
+    )
+    annotator0.add_annotations()
+    assert bool(annotator0.blast_df.iloc[0]["from_3prime"]) is True
+    assert bool(annotator0.blast_df.iloc[1]["from_3prime"]) is False  # missed!
+    assert bool(annotator0.blast_df.iloc[2]["from_3prime"]) is False
+
+    # tolerance=3: the 2bp-clipped hit now passes
+    annotator3 = BlastResultsAnnotator(df.copy())
+    annotator3.build_annotation_dict(
+        length_threshold=15,
+        evalue_threshold=10.0,
+        max_mismatches=3,
+        three_prime_tolerance=3,
+    )
+    annotator3.add_annotations()
+    assert bool(annotator3.blast_df.iloc[0]["from_3prime"]) is True
+    assert bool(annotator3.blast_df.iloc[1]["from_3prime"]) is True  # now caught
+    assert bool(annotator3.blast_df.iloc[1]["predicted_bound"]) is True
+    assert bool(annotator3.blast_df.iloc[2]["from_3prime"]) is False  # still too far
diff --git a/tests/test_blast_runner.py b/tests/test_blast_runner.py
index 0d84bbd..7fdc00d 100644
--- a/tests/test_blast_runner.py
+++ b/tests/test_blast_runner.py
@@ -1,3 +1,4 @@
+import warnings
 from unittest.mock import patch
 
 import pandas as pd
@@ -62,12 +63,12 @@ def isfile_v4(path):
 
 
 def test_run_command_defaults(runner, tmp_path):
-    """Default run uses -task blastn-short and omits -word_size."""
-    output_archive = tmp_path / "output.asn"
+    """Default run with output_table uses -task blastn-short, outfmt 6, and omits -word_size."""
+    output_table = tmp_path / "output.txt"
     runner.db_path = "/fake/db"
 
     with patch("subprocess.run") as mock_run:
-        runner.run(str(output_archive))
+        runner.run(output_table=str(output_table))
 
         args, kwargs = mock_run.call_args
         command = args[0]
@@ -80,17 +81,18 @@ def test_run_command_defaults(runner, tmp_path):
         assert "blastn-short" in command
         assert "-word_size" not in command
         assert "-outfmt" in command
+        assert any("6 qseqid sseqid" in arg for arg in command)
         assert "-out" in command
-        assert str(output_archive) in command
+        assert str(output_table) in command
 
 
 def test_run_command_explicit_word_size(runner, tmp_path):
     """Explicit word_size is included in the command."""
-    output_archive = tmp_path / "output.asn"
+    output_table = tmp_path / "output.txt"
     runner.db_path = "/fake/db"
 
     with patch("subprocess.run") as mock_run:
-        runner.run(str(output_archive), word_size=11)
+        runner.run(output_table=str(output_table), word_size=11)
 
         args, kwargs = mock_run.call_args
         command = args[0]
@@ -102,11 +104,11 @@ def test_run_command_explicit_word_size(runner, tmp_path):
 
 def test_run_command_custom_task(runner, tmp_path):
     """Custom task parameter is passed through."""
-    output_archive = tmp_path / "output.asn"
+    output_table = tmp_path / "output.txt"
     runner.db_path = "/fake/db"
 
     with patch("subprocess.run") as mock_run:
-        runner.run(str(output_archive), task="blastn")
+        runner.run(output_table=str(output_table), task="blastn")
 
         args, kwargs = mock_run.call_args
         command = args[0]
@@ -133,30 +135,30 @@ def test_reformat_output_command(runner, tmp_path):
 
 
 def test_run_emits_num_threads_when_gt_1(runner, tmp_path):
-    output_archive = str(tmp_path / "out.asn")
+    output_table = str(tmp_path / "out.txt")
     runner.db_path = "/fake/db"
     with patch("subprocess.run") as mock_run:
-        runner.run(output_archive, num_threads=4)
+        runner.run(output_table=output_table, num_threads=4)
         cmd = mock_run.call_args[0][0]
         assert "-num_threads" in cmd
         assert "4" in cmd
 
 
 def test_run_omits_num_threads_when_1(runner, tmp_path):
-    output_archive = str(tmp_path / "out.asn")
+    output_table = str(tmp_path / "out.txt")
     runner.db_path = "/fake/db"
     with patch("subprocess.run") as mock_run:
-        runner.run(output_archive, num_threads=1)
+        runner.run(output_table=output_table, num_threads=1)
         cmd = mock_run.call_args[0][0]
         assert "-num_threads" not in cmd
 
 
 def test_run_includes_evalue_when_provided(runner, tmp_path):
     """evalue parameter is included in the blastn command when set."""
-    output_archive = str(tmp_path / "out.asn")
+    output_table = str(tmp_path / "out.txt")
     runner.db_path = "/fake/db"
     with patch("subprocess.run") as mock_run:
-        runner.run(output_archive, evalue=1000.0)
+        runner.run(output_table=output_table, evalue=1000.0)
         cmd = mock_run.call_args[0][0]
         assert "-evalue" in cmd
         assert "1000.0" in cmd
@@ -164,20 +166,20 @@ def test_run_includes_evalue_when_provided(runner, tmp_path):
 
 def test_run_omits_evalue_when_none(runner, tmp_path):
     """evalue parameter is omitted from the blastn command when None."""
-    output_archive = str(tmp_path / "out.asn")
+    output_table = str(tmp_path / "out.txt")
     runner.db_path = "/fake/db"
     with patch("subprocess.run") as mock_run:
-        runner.run(output_archive, evalue=None)
+        runner.run(output_table=output_table, evalue=None)
         cmd = mock_run.call_args[0][0]
         assert "-evalue" not in cmd
 
 
 def test_run_includes_reward_penalty_when_provided(runner, tmp_path):
     """reward and penalty parameters are included in the blastn command."""
-    output_archive = str(tmp_path / "out.asn")
+    output_table = str(tmp_path / "out.txt")
     runner.db_path = "/fake/db"
     with patch("subprocess.run") as mock_run:
-        runner.run(output_archive, reward=1, penalty=-1)
+        runner.run(output_table=output_table, reward=1, penalty=-1)
         cmd = mock_run.call_args[0][0]
         assert "-reward" in cmd
         assert "1" in cmd
@@ -187,10 +189,10 @@ def test_run_includes_reward_penalty_when_provided(runner, tmp_path):
 
 def test_run_omits_reward_penalty_when_none(runner, tmp_path):
     """reward and penalty are omitted from the blastn command when None."""
-    output_archive = str(tmp_path / "out.asn")
+    output_table = str(tmp_path / "out.txt")
     runner.db_path = "/fake/db"
     with patch("subprocess.run") as mock_run:
-        runner.run(output_archive, reward=None, penalty=None)
+        runner.run(output_table=output_table, reward=None, penalty=None)
         cmd = mock_run.call_args[0][0]
         assert "-reward" not in cmd
         assert "-penalty" not in cmd
@@ -198,10 +200,10 @@ def test_run_omits_reward_penalty_when_none(runner, tmp_path):
 
 def test_run_includes_max_hsps_when_provided(runner, tmp_path):
     """max_hsps parameter is included in the blastn command when set."""
-    output_archive = str(tmp_path / "out.asn")
+    output_table = str(tmp_path / "out.txt")
     runner.db_path = "/fake/db"
     with patch("subprocess.run") as mock_run:
-        runner.run(output_archive, max_hsps=100)
+        runner.run(output_table=output_table, max_hsps=100)
         cmd = mock_run.call_args[0][0]
         assert "-max_hsps" in cmd
         assert "100" in cmd
@@ -209,20 +211,20 @@ def test_run_includes_max_hsps_when_provided(runner, tmp_path):
 
 def test_run_omits_max_hsps_when_none(runner, tmp_path):
     """max_hsps parameter is omitted from the blastn command when None."""
-    output_archive = str(tmp_path / "out.asn")
+    output_table = str(tmp_path / "out.txt")
     runner.db_path = "/fake/db"
     with patch("subprocess.run") as mock_run:
-        runner.run(output_archive, max_hsps=None)
+        runner.run(output_table=output_table, max_hsps=None)
         cmd = mock_run.call_args[0][0]
         assert "-max_hsps" not in cmd
 
 
 def test_run_includes_dust_when_provided(runner, tmp_path):
     """dust parameter is included in the blastn command when set."""
-    output_archive = str(tmp_path / "out.asn")
+    output_table = str(tmp_path / "out.txt")
     runner.db_path = "/fake/db"
     with patch("subprocess.run") as mock_run:
-        runner.run(output_archive, dust="yes")
+        runner.run(output_table=output_table, dust="yes")
         cmd = mock_run.call_args[0][0]
         assert "-dust" in cmd
         assert "yes" in cmd
@@ -230,10 +232,10 @@ def test_run_includes_dust_when_provided(runner, tmp_path):
 
 def test_run_omits_dust_when_none(runner, tmp_path):
     """dust parameter is omitted from the blastn command when None."""
-    output_archive = str(tmp_path / "out.asn")
+    output_table = str(tmp_path / "out.txt")
     runner.db_path = "/fake/db"
     with patch("subprocess.run") as mock_run:
-        runner.run(output_archive, dust=None)
+        runner.run(output_table=output_table, dust=None)
         cmd = mock_run.call_args[0][0]
         assert "-dust" not in cmd
 
@@ -251,3 +253,44 @@ def test_get_dataframe(runner, tmp_path):
     assert isinstance(df, pd.DataFrame)
     assert len(df) == 1
     assert df.iloc[0]["qseqid"] == "Q1"
+
+
+def test_run_direct_tabular_output(runner, tmp_path):
+    """output_table= produces outfmt 6 and sets self.output_table."""
+    output_table = str(tmp_path / "table.txt")
+    runner.db_path = "/fake/db"
+
+    with patch("subprocess.run"):
+        runner.run(output_table=output_table)
+
+    assert runner.output_table == output_table
+    assert not hasattr(runner, "output_archive")
+
+
+def test_run_legacy_archive_emits_deprecation_warning(runner, tmp_path):
+    """output_archive= still works but emits DeprecationWarning."""
+    output_archive = str(tmp_path / "out.asn")
+    runner.db_path = "/fake/db"
+
+    with patch("subprocess.run") as mock_run:
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            runner.run(output_archive)
+
+            assert len(w) == 1
+            assert issubclass(w[0].category, DeprecationWarning)
+            assert "output_archive is deprecated" in str(w[0].message)
+
+        cmd = mock_run.call_args[0][0]
+        assert "-outfmt" in cmd
+        idx = cmd.index("-outfmt")
+        assert cmd[idx + 1] == "11"
+    assert runner.output_archive == output_archive
+
+
+def test_run_raises_when_no_output_specified(runner):
+    """ValueError when neither output_table nor output_archive is given."""
+    runner.db_path = "/fake/db"
+
+    with pytest.raises(ValueError, match="Either output_table or output_archive"):
+        runner.run()
diff --git a/tests/test_blast_specificity.py b/tests/test_blast_specificity.py
index c3cf862..07b230e 100644
--- a/tests/test_blast_specificity.py
+++ b/tests/test_blast_specificity.py
@@ -43,22 +43,14 @@ def mock_panel():
     return panel
 
 
-def test_blast_archive_removed_after_run(mock_panel, tmp_path):
-    """The BLAST archive file is cleaned up after a successful specificity check."""
-    blast_archive = tmp_path / "blast_archive"
-
+def test_direct_tabular_no_archive(mock_panel, tmp_path):
+    """Specificity check uses output_table directly, no archive file is created."""
     with (
         patch("plexus.blast.specificity.BlastRunner") as MockRunner,
         patch("plexus.blast.specificity.BlastResultsAnnotator") as MockAnnotator,
         patch("plexus.blast.specificity.AmpliconFinder") as MockFinder,
     ):
         runner_instance = MockRunner.return_value
-
-        def fake_run(output_archive, **kwargs):
-            # Simulate BLAST creating the archive file
-            open(output_archive, "w").close()
-
-        runner_instance.run.side_effect = fake_run
         runner_instance.get_dataframe.return_value = pd.DataFrame({"dummy": [1]})
 
         annotator_instance = MockAnnotator.return_value
@@ -71,7 +63,14 @@ def fake_run(output_archive, **kwargs):
 
         run_specificity_check(mock_panel, str(tmp_path), "fake_genome.fa")
 
-        assert not blast_archive.exists(), "BLAST archive should be removed after run"
+        # Verify output_table= was used (not output_archive)
+        _, run_kwargs = runner_instance.run.call_args
+        assert "output_table" in run_kwargs
+        assert run_kwargs["output_table"].endswith("blast_table.txt")
+        # output_archive should NOT appear in the call
+        assert run_kwargs.get("output_archive") is None
+        # reformat_output_as_table should never be called
+        runner_instance.reformat_output_as_table.assert_not_called()
 
 
 def test_run_specificity_check_integration(mock_panel, tmp_path):
@@ -149,6 +148,7 @@ def test_run_specificity_check_forwards_num_threads(mock_panel, tmp_path):
 
         _, kwargs = runner_instance.run.call_args
         assert kwargs.get("num_threads") == 6
+        assert "output_table" in kwargs
 
 
 def test_run_specificity_check_forwards_blast_parameters(mock_panel, tmp_path):
@@ -186,9 +186,13 @@ def test_run_specificity_check_forwards_blast_parameters(mock_panel, tmp_path):
             blast_dust="no",
         )
 
-        # Verify annotator received custom thresholds
+        # Verify annotator received custom thresholds (three_prime_tolerance
+        # uses default since not overridden in this call)
         annotator_instance.build_annotation_dict.assert_called_once_with(
-            length_threshold=20, evalue_threshold=5.0, max_mismatches=1
+            length_threshold=20,
+            evalue_threshold=5.0,
+            max_mismatches=1,
+            three_prime_tolerance=3,
         )
 
         # Verify finder received custom max amplicon size
@@ -202,6 +206,7 @@ def test_run_specificity_check_forwards_blast_parameters(mock_panel, tmp_path):
         assert run_kwargs.get("penalty") == -3
         assert run_kwargs.get("max_hsps") == 50
         assert run_kwargs.get("dust") == "no"
+        assert "output_table" in run_kwargs
 
 
 def test_run_specificity_check_no_hits(mock_panel, tmp_path):