diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e6d8841..b74f9d8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,13 +14,29 @@ permissions: jobs: test: + name: Test (${{ matrix.python-version }}, pandas ${{ matrix.pandas-label }}) runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - python-version: ['3.9', '3.12'] + include: + - python-version: '3.10' + pandas-label: '2.2.3' + pandas-spec: 'pandas==2.2.3' + - python-version: '3.12' + pandas-label: '2.2.3' + pandas-spec: 'pandas==2.2.3' + - python-version: '3.12' + pandas-label: '3.x' + pandas-spec: 'pandas>=3,<4' + - python-version: '3.14' + pandas-label: '3.x' + pandas-spec: 'pandas>=3,<4' steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 @@ -30,6 +46,10 @@ jobs: - name: Upgrade pip, setuptools, and packaging run: | python -m pip install --upgrade pip setuptools packaging + + - name: Pin pandas ${{ matrix.pandas-label }} + run: | + python -m pip install --upgrade "${{ matrix.pandas-spec }}" - name: Cache src directory uses: actions/cache@v4 @@ -62,11 +82,13 @@ jobs: python3 test.py -t GRCh37 - name: Build and push Docker image - if: github.ref == 'refs/heads/master' && github.event_name == 'push' && matrix.python-version == '3.12' + if: github.ref == 'refs/heads/master' && github.event_name == 'push' && matrix.python-version == '3.12' && matrix.pandas-label == '3.x' run: | echo "Starting Docker deployment to GHCR for sigprofilersuite..." - VERSION_TAG=$(grep "VERSION = " setup.py | cut -d'"' -f2) + python -m pip install --upgrade setuptools_scm + VERSION_TAG=$(python -m setuptools_scm) + VERSION_TAG=${VERSION_TAG//+/-} # Get the repository name and convert it to lowercase REPO_NAME=$(basename ${{ github.repository }} | tr '[:upper:]' '[:lower:]') diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..51df595 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,92 @@ +name: Release + +on: + push: + tags: + - "v*" + release: + types: + - published + workflow_dispatch: + +jobs: + build: + name: Build distributions + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Set up Python 3.12 + uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - name: Install build tooling + run: | + python -m pip install --upgrade pip build twine + + - name: Build distributions + run: | + python -m build + + - name: Check distributions + run: | + python -m twine check dist/* + + - name: Verify package version + run: | + python -m pip install dist/*.whl + python -c "import SigProfilerMatrixGenerator; print(SigProfilerMatrixGenerator.__version__)" + + - name: Upload distributions + uses: actions/upload-artifact@v7 + with: + name: python-package-distributions + path: dist/ + if-no-files-found: error + + publish-testpypi: + name: Publish to TestPyPI + needs: build + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + runs-on: ubuntu-latest + environment: + name: testpypi + url: https://test.pypi.org/p/SigProfilerMatrixGenerator + permissions: + id-token: write + steps: + - name: Download distributions + uses: actions/download-artifact@v5 + with: + name: python-package-distributions + path: dist/ + + - name: Publish package distributions to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + + publish-pypi: + name: Publish to PyPI + needs: build + if: github.event_name == 'release' && github.event.action == 'published' + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/SigProfilerMatrixGenerator + permissions: + id-token: write + steps: + - name: Download distributions + uses: actions/download-artifact@v5 + with: + name: python-package-distributions + path: dist/ + + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.gitignore b/.gitignore index 1ae1b19..019a6b2 100644 --- a/.gitignore +++ b/.gitignore @@ -16,7 +16,7 @@ # MkDocs build output /site/ -SigProfilerMatrixGenerator/version.py +SigProfilerMatrixGenerator/_version.py SigProfilerMatrixGenerator/references/chromosomes/tsb/ SigProfilerMatrixGenerator/references/vcf_files/*_bench/input/ SigProfilerMatrixGenerator/references/vcf_files/*_bench/output/ diff --git a/RELEASE_ADMIN.md b/RELEASE_ADMIN.md new file mode 100644 index 0000000..df9456b --- /dev/null +++ b/RELEASE_ADMIN.md @@ -0,0 +1,145 @@ +# SigProfilerMatrixGenerator Release Administration + +This repository publishes Python distributions through GitHub Actions and PyPI +Trusted Publishing. The release process intentionally has two publish steps: + +1. Push a version tag such as `v1.3.7` to publish to TestPyPI. +2. Publish a GitHub Release for that same tag to publish to PyPI. + +Merging into `master` does not publish a Python package by itself. + +## Required GitHub Setup + +Create these GitHub environments: + +```text +testpypi +pypi +``` + +The environment names must exactly match `.github/workflows/release.yml`. +The `pypi` environment should require reviewer approval before deployment. + +## Required TestPyPI Trusted Publisher + +Configure this from TestPyPI: + +```text +PyPI Project Name: SigProfilerMatrixGenerator +Owner: SigProfilerSuite +Repository name: SigProfilerMatrixGenerator +Workflow name: release.yml +Environment name: testpypi +``` + +If the project does not exist on TestPyPI yet, use the pending trusted +publisher flow to create it on first publish. + +## Required PyPI Trusted Publisher + +Configure this from the existing PyPI project: + +```text +PyPI Project Name: SigProfilerMatrixGenerator +Owner: SigProfilerSuite +Repository name: SigProfilerMatrixGenerator +Workflow name: release.yml +Environment name: pypi +``` + +For an existing PyPI project, configure this from: + +```text +PyPI -> Your projects -> SigProfilerMatrixGenerator -> Manage -> Publishing +``` + +If `Manage` is disabled, ask a current project owner to add your PyPI account +as an owner or maintainer with publishing permissions. + +## Versioning + +Package versions are derived from Git tags by `setuptools_scm`. + +Release tags must start with `v`, for example: + +```text +v1.3.7 +``` + +The package version for that tag becomes: + +```text +1.3.7 +``` + +Untagged commits produce development versions, for example: + +```text +1.3.7.dev2+gabcdef0.d20260524 +``` + +The generated file `SigProfilerMatrixGenerator/_version.py` is ignored and +must not be committed. + +## Release Flow + +First merge the release-ready branch into `master` through the repository's +normal review process. + +Then create and push the tag: + +```bash +git checkout master +git pull origin master +git tag -a v1.3.7 -m "v1.3.7" +git push origin v1.3.7 +``` + +The pushed tag triggers `.github/workflows/release.yml` and publishes to +TestPyPI. + +After TestPyPI is verified, publish a GitHub Release for the same tag. That +release event triggers the PyPI publish job. + +## Verification + +After the tag push, verify the GitHub Actions run: + +- build job passed +- `twine check` passed +- package version printed correctly +- TestPyPI publish job passed + +Optional TestPyPI install check: + +```bash +python -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple SigProfilerMatrixGenerator==1.3.7 +python -c "import SigProfilerMatrixGenerator; print(SigProfilerMatrixGenerator.__version__)" +``` + +After publishing the GitHub Release, verify the PyPI job and package page: + +```text +https://pypi.org/project/SigProfilerMatrixGenerator/ +``` + +Optional PyPI install check: + +```bash +python -m pip install SigProfilerMatrixGenerator==1.3.7 +python -c "import SigProfilerMatrixGenerator; print(SigProfilerMatrixGenerator.__version__)" +``` + +## Troubleshooting + +If the package version is `0+unknown`, check that: + +- the release workflow checkout uses `fetch-depth: 0` +- the tag exists locally and remotely +- the tag starts with `v` + +If Trusted Publishing fails, check that: + +- the TestPyPI/PyPI trusted publisher values exactly match this repository +- the GitHub environment name is `testpypi` or `pypi` +- the publish job has `id-token: write` diff --git a/SigProfilerMatrixGenerator/scripts/SVMatrixGenerator.py b/SigProfilerMatrixGenerator/scripts/SVMatrixGenerator.py index 2bcb7c7..c2a44bf 100644 --- a/SigProfilerMatrixGenerator/scripts/SVMatrixGenerator.py +++ b/SigProfilerMatrixGenerator/scripts/SVMatrixGenerator.py @@ -920,9 +920,9 @@ def annotateBedpe(sv_bedpe): check_exist_list = sample_bps["id"][sample_bps["is_clustered"]] sample_bps["is_clustered"][ - np.in1d(sample_bps["id"].values, check_exist_list.values) + np.isin(sample_bps["id"].values, check_exist_list.values) ] = True - sv_bedpe["is_clustered"] = np.in1d( + sv_bedpe["is_clustered"] = np.isin( sv_bedpe["id"], sample_bps["id"][sample_bps["is_clustered"]] ) sv_bedpe = processBEDPE(sv_bedpe) diff --git a/SigProfilerMatrixGenerator/scripts/vcfToBedpe.py b/SigProfilerMatrixGenerator/scripts/vcfToBedpe.py index 19485a3..1f3d9e4 100644 --- a/SigProfilerMatrixGenerator/scripts/vcfToBedpe.py +++ b/SigProfilerMatrixGenerator/scripts/vcfToBedpe.py @@ -8,6 +8,48 @@ # -------------------------------------- # define functions +def _infer_bnd_strands(alt): + """Infer BRASS-style breakend strands from VCF BND ALT notation.""" + if "[" in alt: + bracket = "[" + elif "]" in alt: + bracket = "]" + else: + return ".", "." + + bracket_is_after_local_sequence = alt.find(bracket) > 0 + if bracket == "]" and bracket_is_after_local_sequence: + return "+", "+" + if bracket == "[" and bracket_is_after_local_sequence: + return "-", "-" + if bracket == "]": + return "-", "+" + return "+", "-" + + +def _classify_sv(svtype, chrom1, chrom2, strand1=".", strand2="."): + mapping = { + "DEL": "deletion", + "INS": "insertion", + "DUP": "tandem-duplication", + "CPX": "unknown", + "INV": "inversion", + "CNV": "unknown", + "CTX": "translocation", + } + if svtype != "BND": + return mapping.get(svtype, "unknown") + if str(chrom1) != str(chrom2): + return "translocation" + if strand1 == "+" and strand2 == "+": + return "deletion" + if strand1 == "-" and strand2 == "-": + return "tandem-duplication" + if (strand1 == "+" and strand2 == "-") or (strand1 == "-" and strand2 == "+"): + return "inversion" + return "unknown" + + class Vcf(object): def __init__(self): self.file_format = "VCFv4.2" @@ -280,6 +322,7 @@ def vcfToBedpe(vcf_path, output_path): vcf = Vcf() in_header = True sample_list = [] + processed_bnd_ids = set() for line in vcf_file: if in_header: @@ -324,6 +367,11 @@ def vcfToBedpe(vcf_path, output_path): else: if "SECONDARY" in var.info: continue + if var.var_id in processed_bnd_ids: + continue + processed_bnd_ids.add(var.var_id) + if "MATEID" in var.info: + processed_bnd_ids.update(var.info["MATEID"].split(",")) sep = "[" if sep not in var.alt: sep = "]" @@ -331,6 +379,8 @@ def vcfToBedpe(vcf_path, output_path): if len(r.findall(var.alt)) > 0: chrom2, b2 = r.findall(var.alt)[0].split(":") b2 = int(b2) + else: + b2 = b1 if "EVENT" in var.info: name = var.info["EVENT"] @@ -341,6 +391,8 @@ def vcfToBedpe(vcf_path, output_path): strands = var.info["STRANDS"] o1 = strands[0] o2 = strands[1] + elif var.info["SVTYPE"] == "BND": + o1, o2 = _infer_bnd_strands(var.alt) span = [0, 0] if "CIPOS" in var.info: @@ -390,22 +442,30 @@ def vcfToBedpe(vcf_path, output_path): l = list(df.columns) - cols = l[0:6] + [l[10]] + cols = l[0:6] + [l[8], l[9], l[10]] df = df[cols] - df.columns = ["chrom1", "start1", "end1", "chrom2", "start2", "end2", "svclass"] + df.columns = [ + "chrom1", + "start1", + "end1", + "chrom2", + "start2", + "end2", + "strand1", + "strand2", + "svtype", + ] # deletion, translocation, tandem-duplication, or inversion - mapping = { - "DEL": "deletion", - "BND": "unknown", - "INS": "insertion", - "DUP": "tandem-duplication", - "CPX": "unknown", - "INV": "inversion", - "CNV": "unknown", - "CTX": "translocation", - } - df["svclass"] = df["svclass"].map(mapping) + df["svclass"] = [ + _classify_sv(svtype, chrom1, chrom2, strand1, strand2) + for svtype, chrom1, chrom2, strand1, strand2 in zip( + df["svtype"], df["chrom1"], df["chrom2"], df["strand1"], df["strand2"] + ) + ] + df = df[ + ["chrom1", "start1", "end1", "chrom2", "start2", "end2", "svclass"] + ] df2 = df[df["svclass"] != "unknown"] # classified confidently unclassified = df[df["svclass"] == "unknown"] # not classified confidentlyprint dropped = int(df.shape[0] - df2.shape[0]) diff --git a/SigProfilerMatrixGenerator/version.py b/SigProfilerMatrixGenerator/version.py new file mode 100644 index 0000000..445eb73 --- /dev/null +++ b/SigProfilerMatrixGenerator/version.py @@ -0,0 +1,21 @@ +try: + from ._version import version as __version__ +except Exception: + try: + from importlib.metadata import PackageNotFoundError + from importlib.metadata import version as _metadata_version + except Exception: # pragma: no cover + PackageNotFoundError = Exception # type: ignore + + def _metadata_version(_name: str) -> str: # type: ignore + raise PackageNotFoundError() + + try: + __version__ = _metadata_version("SigProfilerMatrixGenerator") + except PackageNotFoundError: + __version__ = "0+unknown" + + +short_version = __version__ +version = __version__ +Update = "" diff --git a/pyproject.toml b/pyproject.toml index aa9739a..5ad4ff7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,9 @@ [build-system] -requires = ["setuptools>=61", "wheel", "build"] +requires = ["setuptools>=69", "setuptools_scm[toml]>=8", "wheel"] build-backend = "setuptools.build_meta" + +[tool.setuptools_scm] +tag_regex = "^v(?P.+)$" +version_scheme = "guess-next-dev" +local_scheme = "node-and-date" +fallback_version = "0+unknown" diff --git a/setup.py b/setup.py index 099a391..507b22d 100644 --- a/setup.py +++ b/setup.py @@ -1,63 +1,39 @@ -import os -import shutil +from pathlib import Path -from setuptools import setup +from setuptools import find_namespace_packages, setup -VERSION = "1.3.6" -# remove the dist folder first if exists -if os.path.exists("dist"): - shutil.rmtree("dist") +LONG_DESCRIPTION = Path("README.md").read_text(encoding="utf-8") +INSTALL_REQUIRES = [ + "matplotlib>=2.2.2", + "sigProfilerPlotting>=1.4.1", + "statsmodels>=0.9.0", + "numpy>=2.0.0", + "pandas>=2.0.0", + "scipy>=1.12.0", +] -def readme(): - this_directory = os.path.abspath(os.path.dirname(__file__)) - with open(os.path.join(this_directory, "README.md"), encoding="latin-1") as f: - long_description = f.read() - return long_description +VERSION_TEMPLATE = "version = '{version}'\n" -def write_version_py(filename="SigProfilerMatrixGenerator/version.py"): - # Copied from numpy setup.py - cnt = """ -# THIS FILE IS GENERATED FROM SIGPROFILEMATRIXGENERATOR SETUP.PY -short_version = '%(version)s' -version = '%(version)s' -Update = 'v1.3.6: Add automated Docker build and publish pipeline' - - """ - fh = open(filename, "w") - fh.write( - cnt - % { - "version": VERSION, - } - ) - fh.close() - - -write_version_py() - setup( name="SigProfilerMatrixGenerator", - version=VERSION, + use_scm_version={ + "write_to": "SigProfilerMatrixGenerator/_version.py", + "write_to_template": VERSION_TEMPLATE, + "fallback_version": "0+unknown", + }, description="SigProfiler matrix generator tool", - long_description=readme(), + long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown", url="https://github.com/SigProfilerSuite/SigProfilerMatrixGenerator.git", author="Erik Bergstrom", author_email="ebergstr@eng.ucsd.edu", license="UCSD", - packages=["SigProfilerMatrixGenerator"], + packages=find_namespace_packages(include=["SigProfilerMatrixGenerator*"]), python_requires=">=3.9", - install_requires=[ - "matplotlib>=2.2.2", - "sigProfilerPlotting>=1.4.1", - "statsmodels>=0.9.0", - "numpy>=2.0.0", - "pandas>=2.0.0", - "scipy>=1.12.0", - ], + install_requires=INSTALL_REQUIRES, entry_points={ "console_scripts": [ "SigProfilerMatrixGenerator=SigProfilerMatrixGenerator.scripts.SigProfilerMatrixGenerator_CLI:main_function", diff --git a/tests/scripts/test_ref_install.py b/tests/scripts/test_ref_install.py index 886c26d..979c608 100644 --- a/tests/scripts/test_ref_install.py +++ b/tests/scripts/test_ref_install.py @@ -1,6 +1,5 @@ import pathlib -import pkg_resources import pytest import SigProfilerMatrixGenerator @@ -10,10 +9,7 @@ class TestReferenceDir: @pytest.fixture def package_reference_dir(self): - result = pathlib.Path( - pkg_resources.resource_filename(SigProfilerMatrixGenerator.__name__, "") - ) - return result + return pathlib.Path(SigProfilerMatrixGenerator.__file__).parent.resolve() @pytest.fixture def default_fasta_dir(self, package_reference_dir): @@ -27,7 +23,6 @@ def default_tsb_dir(self, package_reference_dir): def test_path_no_secondary_chromosome_install_dir(self, package_reference_dir): refdir = ref_install.reference_dir() - # using deprecated pkg_resources for compatibility with python 3.8 observed = refdir.path assert package_reference_dir == observed # check path is absolute diff --git a/tests/scripts/test_sv_vcf_conversion.py b/tests/scripts/test_sv_vcf_conversion.py new file mode 100644 index 0000000..a7fc6db --- /dev/null +++ b/tests/scripts/test_sv_vcf_conversion.py @@ -0,0 +1,38 @@ +import importlib.util +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parents[2] +VCF_TO_BEDPE_PATH = ( + REPO_ROOT / "SigProfilerMatrixGenerator" / "scripts" / "vcfToBedpe.py" +) +SPEC = importlib.util.spec_from_file_location("vcfToBedpe", VCF_TO_BEDPE_PATH) +vcf_to_bedpe = importlib.util.module_from_spec(SPEC) +SPEC.loader.exec_module(vcf_to_bedpe) + + +def test_purple_gridss_bnd_vcf_classifies_events(tmp_path): + vcf_path = ( + REPO_ROOT + / "SigProfilerMatrixGenerator" + / "references" + / "SV" + / "example_input" + / "VCF" + / "COLO829v003T.purple.sv.vcf" + ) + + bedpe, unclassified = vcf_to_bedpe.vcfToBedpe(str(vcf_path), str(tmp_path)) + + assert len(bedpe) == 109 + assert len(unclassified) == 38 + assert set(bedpe["svclass"]).issubset( + {"deletion", "translocation", "tandem-duplication", "inversion"} + ) + assert bedpe["sample"].unique().tolist() == ["COLO829v003T"] + assert bedpe["svclass"].value_counts().to_dict() == { + "tandem-duplication": 53, + "inversion": 28, + "translocation": 22, + "deletion": 6, + }