diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index a9c8b5c..34e6d15 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -9,6 +9,7 @@ on: - 'main' paths: - '.github/workflows/pytest.yml' + - 'pyproject.toml' - 'chaintools_bio/**' - 'testdata/**' diff --git a/README.md b/README.md index 55abc26..7efbe9e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# chaintools: utilities for the genomic chain format +# chaintools_bio: utilities for the genomic chain format This toolkit provides utilities to process whole-genome maps in the ([chain format](https://genome.ucsc.edu/goldenPath/help/chain.html)). @@ -21,7 +21,7 @@ Lift-over software such as [UCSC LiftOver](https://genome.ucsc.edu/cgi-bin/hgLif ## Install ```shell -git clone git@github.com:milkschen/chaintools.git +git clone git@github.com:milkschen/chaintools_bio.git # Option 1: pip pip install -e . # Option 2: uv @@ -29,31 +29,25 @@ uv pip install -e . ``` - Python 3.8+ -- Dependencies: [intervaltree](https://github.com/chaimleib/intervaltree), [pandas](https://pandas.pydata.org), and [pysam](https://pysam.readthedocs.io/en/latest/). See [INSTALL.md](INSTALL.md) for instructions. +- See [INSTALL.md](INSTALL.md) for dependencies and installation instructions. ## Usage -### Set up $PYTHONPATH - -```shell -export PYTHONPATH=$(pwd)/:${PYTHONPATH} -``` - ### Annotate Annotate a chain file: - Specify the contig and start/end positions of each segment -- Calculate the identity of each segment (optional) +- Calculate the sequence identity of each segment (optional) - Write liftable regions to a pair of BED files (one for target and one for query) (optional) ```shell # Annotate contig and positions -chaintools annotate -c -o +chaintools_bio annotate -c -o # Add identity -chaintools annotate -c -o -fs -ft +chaintools_bio annotate -c -o -fs -ft # Also write liftable regions to BED files -chaintools annotate -c -o -fs -ft -b +chaintools_bio annotate -c -o -fs -ft -b ``` ### Convert to BED @@ -62,9 +56,9 @@ Convert a chain file to the BED format using either target or query coordinates ```shell # Report using the target coordinates -chaintools to_bed -c -o --coord target +chaintools_bio to_bed -c -o --coord target # Report using the query coordinates -chaintools to_bed -c -o --coord query +chaintools_bio to_bed -c -o --coord query ``` ### Convert to PAF @@ -77,7 +71,7 @@ If both `target.fa` and `query.fa` are provided, this script checks the referenc Otherwise, it uses `[MID]+` and `[X]+` at chain break points. A breakpoint is a gap wrt both target and query, e.g., `149 341 2894`. ```shell -chaintools to_paf -c -o [-t -q ] +chaintools_bio to_paf -c -o [-t -q ] ``` ### Convert to SAM @@ -87,7 +81,7 @@ using the target fasta file for the genome _from_ which the chain lifts, and the query fasta file for the genome _to_ which the chain lifts. ```shell -chaintools to_sam -c -t -q -o +chaintools_bio to_sam -c -t -q -o ``` Note: For a chain file used to convert from a target genome's coordinates to a query @@ -101,7 +95,7 @@ using the target fasta file for the genome _from_ which the chain lifts, and the query fasta file for the genome _to_ which the chain lifts. ```shell -chaintools to_vcf -c -t -q -o +chaintools_bio to_vcf -c -t -q -o ``` ### Filter @@ -112,9 +106,9 @@ The overlap filter makes sure no chains overlap wrt either target or query refer ```shell # Filter by chain size -chaintools chain_filter -c -o -s +chaintools_bio chain_filter -c -o -s # Filter by both chain size and overlap status -chaintools chain_filter -c -o -u -oc -s +chaintools_bio chain_filter -c -o -u -oc -s ``` ### Invert @@ -122,7 +116,7 @@ chaintools chain_filter -c -o -u -oc -o +chaintools_bio invert -c -o ``` ### Split @@ -130,7 +124,7 @@ chaintools invert -c -o Split a chain at large gaps or breakpoints. A breakpoint is a gap wrt both target and query, e.g., `149 341 2894`. ```shell -chaintools split -c -o [--min_gap --min_bp ] +chaintools_bio split -c -o [--min_gap --min_bp ] ``` ### Stats @@ -138,5 +132,5 @@ chaintools split -c -o [--min_gap --min_bp ] Calculate summary statistics of a chain file ```shell -chaintools stats -c -o +chaintools_bio stats -c -o ``` diff --git a/pyproject.toml b/pyproject.toml index 4a940ab..38dac3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,11 +24,8 @@ dependencies = [ "typer>=0.9.0", ] -[tool.hatch.build.targets.wheel] -packages = ["chaintools/chaintools_bio"] - [project.scripts] chaintools_bio = "chaintools_bio.cli:main" [project.urls] -"Homepage" = "https://github.com/milkschen/chaintools" +"Homepage" = "https://github.com/milkschen/chaintools_bio"