From c7011bd61f9a75f29a821c51afacc905a36659ca Mon Sep 17 00:00:00 2001 From: milkschen Date: Sun, 31 May 2026 21:49:06 -0700 Subject: [PATCH 1/6] fix typo --- workflow/leviosam2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/leviosam2.py b/workflow/leviosam2.py index f3e371f..3bb015e 100644 --- a/workflow/leviosam2.py +++ b/workflow/leviosam2.py @@ -166,7 +166,7 @@ def parse_args() -> argparse.Namespace: "--lift_bed_defer_target", type=str, help=( - "[lift] Path to a BED (target cooridnates)" + "[lift] Path to a BED (target coordinates) " "where reads in the regions are always " "deferred" ), From ee141205ffe246635e162069c3f216f80f3a1d71 Mon Sep 17 00:00:00 2001 From: milkschen Date: Sun, 31 May 2026 21:51:29 -0700 Subject: [PATCH 2/6] main cpp doc improvements --- src/leviosam.cpp | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/leviosam.cpp b/src/leviosam.cpp index 48c9002..5ed51dd 100644 --- a/src/leviosam.cpp +++ b/src/leviosam.cpp @@ -486,6 +486,8 @@ void print_lift_help_msg() { "to be lifted. \n"; std::cerr << " " "Leave empty or set to \"-\" to read from stdin.\n"; + std::cerr << " -p string Prefix of the output files. [stdout]\n"; + std::cerr << " -O string Format of the output file: sam, bam, or cram. [bam]\n"; std::cerr << " -t INT Number of threads used.\n" " " "If -t is not set, the value would be the sum of\n" @@ -509,11 +511,21 @@ void print_lift_help_msg() { "Add MD and NM to output alignment records (requires -f)\n"; std::cerr << " -f path " "Path to the FASTA file of the target reference. \n"; + std::cerr << " -F path " + "Path to the FAI (FASTA index) file of the target reference. \n"; std::cerr << " -x path Re-alignment preset. [] \n"; std::cerr << " -G INT " "Maximum allowed gap size (in base pairs) between chain " "intervals for an alignment to be considered liftable. A " "value of 0 requires perfect chain interval continuity. [0]\n"; + std::cerr << " -g INT " + "Haplotype (0 or 1) for variant-aware (VCF) mode. \n" + " " + "NOTE: Do not confuse with workflow script's `-g` (gap size).\n"; + std::cerr << " -s string " + "Sample name for variant-aware (VCF) mode.\n"; + std::cerr << " -v path " + "Path to the VCF file for variant-aware mode.\n"; std::cerr << " -T INT " "Chunk size for each thread. [256] \n" " " // align @@ -522,12 +534,14 @@ void print_lift_help_msg() { "Setting a larger -T uses slightly more " "memory but might benefit thread scaling.\n"; std::cerr << "\n"; - std::cerr << " Commit/defer rule options:\n"; + std::cerr << " Commit/defer/suppress rule options:\n"; std::cerr << " -S string<:int/float> Key-value pair of " "a split rule. We allow appending multiple `-S` options.\n"; std::cerr << " Options: " - "mapq:, aln_score:, isize:, hdist:, " - "clipped_frac:. lifted. [none]\n"; + "lifted, mapq:, aln_score:, isize:, hdist:, " + "clipped_frac: [none]\n"; + std::cerr << " * lifted " + "Defer alignments that are unlifted (unmapped) after liftover.\n"; std::cerr << " * mapq INT " "Min MAPQ (pre-liftover) accepted for a committed read.\n"; std::cerr << " * aln_score INT " @@ -549,10 +563,10 @@ void print_lift_help_msg() { std::cerr << " Example: `-S mapq:20 -S aln_score:20` commits " "MQ>=20 and AS>=20 alignments.\n"; std::cerr << " -r string Path to a BED file (source " - "coordinates). Reads overlap with the regions are always " + "coordinates). Reads overlapping with the regions are always " "committed. [none]\n"; std::cerr << " -D string Path to a BED file (dest coordinates). " - "Reads overlap with the regions are always deferred. [none]\n"; + "Reads overlapping with the regions are always deferred. [none]\n"; std::cerr << " -B float Threshold for BED record intersection. " "[0]\n" " If <= 0: consider any overlap (>0 bp)\n" From 815d3dc97890b2802a3e60d349d935e4972e7bc5 Mon Sep 17 00:00:00 2001 From: milkschen Date: Sun, 31 May 2026 21:52:14 -0700 Subject: [PATCH 3/6] bash workflow doc update --- workflow/leviosam2.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/leviosam2.sh b/workflow/leviosam2.sh index 7adf157..41db068 100644 --- a/workflow/leviosam2.sh +++ b/workflow/leviosam2.sh @@ -75,7 +75,7 @@ function print_usage_and_exit { echo " -w path Path to the input FASTQ (read 1) []" echo " -W path Path to the input FASTQ (read 2, optional) []" echo " LevioSAM2-lift:" - echo " -g INT Number of gaps allowed during leviosam2-lift [0]" + echo " -g INT Max chain gap size allowed (in base pairs) during leviosam2-lift [0]" echo " -x path Path to the levioSAM2 re-alignment config YAML []" echo " Commit/Defer/Suppress rules:" echo " -A INT Alignment score cutoff for the defer rule []" From 485ced09934338bffc83baef48e92150439d05f8 Mon Sep 17 00:00:00 2001 From: milkschen Date: Sun, 31 May 2026 21:56:08 -0700 Subject: [PATCH 4/6] update workflow readme --- workflow/README.md | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/workflow/README.md b/workflow/README.md index 8bd0449..d716df7 100644 --- a/workflow/README.md +++ b/workflow/README.md @@ -1,5 +1,14 @@ # LevioSAM2 workflow +> [!WARNING] +> **CLI Flag Differences between Workflows and the C++ Binary:** +> The flag names in these workflow scripts (`leviosam2.py` / `leviosam2.sh`) differ from those of the underlying C++ binary (`leviosam2 lift`). If you run the C++ binary directly, verify the option mappings: +> * **Gap size threshold:** Use `-g` in Python/Bash scripts, but **`-G`** in the C++ binary. (The C++ binary's `-g` is reserved for VCF haplotype). +> * **Sequence / Aligner / Sample flags:** `-s` specifies sequence type in Python, source index path in Bash, and VCF sample name in the C++ binary. +> * **Commit / Suppress annotations:** Use `--lift_bed_commit_source` (Python) or `-R` (Bash), but **`-r`** in the C++ binary. +> * **Defer rules (`-m` and `-p`):** In Bash, `-m` is the template size cutoff and `-p` is the clipped fraction cutoff. In the C++ binary, `-m` enables MD/NM tags, and `-p` is the **output prefix**. +> Check `leviosam2 lift -h` for details. + ## Dependencies All dependent software is included in our Docker/Singularity container. @@ -30,7 +39,7 @@ python leviosam2.py \ --lift_bed_defer_target defer_annotations.bed # optional ``` -With the bash worklow (older workflow to be deprecated): +Alternative with the Bash workflow (not recommended): ```shell bash leviosam2.sh \ @@ -62,7 +71,7 @@ python leviosam2.py \ --lift_bed_defer_target defer_annotations.bed # optional ``` -With the bash worklow (older workflow to be deprecated): +Alternative with the Bash workflow (not recommended): ```shell bash leviosam2.sh \ @@ -102,7 +111,7 @@ python leviosam2.py \ --lift_bed_commit_source suppress_annotations.bed # optional ``` -With the bash worklow (older workflow to be deprecated): +Alternative with the Bash workflow (not recommended): ```shell bash leviosam2.sh \ @@ -132,7 +141,7 @@ python leviosam2.py \ --lift_bed_commit_source suppress_annotations.bed # optional ``` -With the bash worklow (older workflow to be deprecated): +Alternative with the Bash workflow (not recommended): ```shell bash leviosam2.sh \ From 5856e2d620a7d40ba8468e695d99f25199f97260 Mon Sep 17 00:00:00 2001 From: milkschen Date: Sun, 31 May 2026 21:57:45 -0700 Subject: [PATCH 5/6] update main readme --- README.md | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ed5a0e5..5442c41 100644 --- a/README.md +++ b/README.md @@ -83,12 +83,31 @@ leviosam2 lift -C source_to_target.clft -a aligned_to_source.bam -p lifted_from_ The levioSAM2 workflow includes lift-over using the `leviosam2-lift` kernel and a selective re-mapping strategy. This approach can improve accuracy. -Example: +We highly recommend using the **Python workflow (`leviosam2.py`)** as it is the newer, standard interface. The older Bash workflow (`leviosam2.sh`) is supported but not recommended. + +**Primary Example (Python script):** ```shell -# You may skip the indexing step if you've already run it +# Index the chain file first (if not already done) leviosam2 index -c source_to_target.chain -p source_to_target -F target.fai -sh leviosam2.sh \ + +# Run the Python workflow script +python workflow/leviosam2.py \ + -i aligned_to_source.bam \ + -o aligned_to_source-lifted \ + -C source_to_target.clft \ + -f target.fna \ + -fi bt2/target \ + -a bowtie2 \ + -s ilmn_pe \ + -t 16 \ + --use_preset +``` + +**Alternative Example (Bash script - not recommended):** + +```shell +sh workflow/leviosam2.sh \ -a bowtie2 -A -10 -q 10 -H 5 \ -i aligned_to_source.bam \ -o aligned_to_source-lifted \ @@ -98,7 +117,7 @@ sh leviosam2.sh \ -t 16 ``` -See [this README](https://github.com/milkschen/leviosam2/blob/main/workflow/README.md) to learn more about running the full levioSAM2 workflow. +See [workflow README](https://github.com/milkschen/leviosam2/blob/main/workflow/README.md) to learn more about running the full levioSAM2 workflow and how the parameter options differ between scripts and the binary. ## Publication From 1d96fd808def669830cbdca15eec375f9626a30f Mon Sep 17 00:00:00 2001 From: milkschen Date: Sun, 31 May 2026 22:03:00 -0700 Subject: [PATCH 6/6] remove cram from -O doc --- src/leviosam.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/leviosam.cpp b/src/leviosam.cpp index 5ed51dd..72a6a28 100644 --- a/src/leviosam.cpp +++ b/src/leviosam.cpp @@ -487,7 +487,7 @@ void print_lift_help_msg() { std::cerr << " " "Leave empty or set to \"-\" to read from stdin.\n"; std::cerr << " -p string Prefix of the output files. [stdout]\n"; - std::cerr << " -O string Format of the output file: sam, bam, or cram. [bam]\n"; + std::cerr << " -O string Format of the output file: sam or bam. [sam]\n"; std::cerr << " -t INT Number of threads used.\n" " " "If -t is not set, the value would be the sum of\n"