From e5c34ec6910ebb34511d0040684677012547624d Mon Sep 17 00:00:00 2001 From: Constantine Lignos Date: Thu, 4 Jun 2026 10:41:01 -0400 Subject: [PATCH 1/8] Move all setup to pyproject.toml and add uv instructions --- README.md | 12 ++++++---- pyproject.toml | 55 ++++++++++++++++++++++++++++++++++++++++++++- requirements.txt | 19 ---------------- setup.py | 52 ------------------------------------------ tests/pre_commit.sh | 2 +- 5 files changed, 63 insertions(+), 77 deletions(-) delete mode 100644 requirements.txt delete mode 100755 setup.py diff --git a/README.md b/README.md index 6fc280d..43b68ad 100644 --- a/README.md +++ b/README.md @@ -595,15 +595,19 @@ The following instructions are for the project maintainers only. For development, check out the `dev` branch (latest, but less tested than `main`). -To install from a clone of this repository, use: -`pip install -e .` - ## Setting up an environment for development +### Using uv + +1. Create an environment: `uv venv --python 3.10 .venv` +2. Install seqscore and development dependencies: `uv pip install -e ".[dev]"` + +### Using conda + 1. Create an environment: `conda create -yn seqscore python=3.10` 2. Activate the environment: `conda activate seqscore` 3. Install seqscore: `pip install -e .` -4. Install development dependencies: `pip install -r requirements.txt` +4. Install development dependencies: `pip install -e ".[dev]"` # Contributors diff --git a/pyproject.toml b/pyproject.toml index 212b567..23866eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,57 @@ +[build-system] +requires = ["setuptools>=61"] +build-backend = "setuptools.build_meta" + +[project] +name = "seqscore" +dynamic = ["version"] +description = "SeqScore: Scoring for named entity recognition and other sequence labeling tasks" +readme = "README.md" +license = {text = "MIT"} +authors = [ + {name = "Constantine Lignos", email = "lignos@brandeis.edu"}, +] +requires-python = ">=3.10" +dependencies = [ + "attrs>=19.2.0", + "click", + "tabulate", +] +classifiers = [ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Topic :: Scientific/Engineering :: Artificial Intelligence", +] + +[project.urls] +Homepage = "https://github.com/bltlab/seqscore" + +[project.scripts] +seqscore = "seqscore.scripts.seqscore:cli" + +[project.optional-dependencies] +dev = [ + "types-tabulate", + "pytest==9.0.3", + "pytest-cov>=7.1.0", + "mypy==2.1.0", + "ruff==0.15.15", +] + +[tool.setuptools.dynamic] +version = {attr = "seqscore.__version__"} + +[tool.setuptools.packages.find] +include = ["seqscore", "seqscore.*"] + +[tool.setuptools.package-data] +seqscore = ["py.typed"] + [tool.mypy] python_version = "3.10" strict_optional = false @@ -6,7 +60,6 @@ disallow_untyped_calls = true [[tool.mypy.overrides]] module = [ - "setuptools", "click.*", ] ignore_missing_imports = true diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 8ac2e1b..0000000 --- a/requirements.txt +++ /dev/null @@ -1,19 +0,0 @@ -# This file only contains dependencies needed for development. -# setup.py contains the actual package dependencies, and the package -# should be installed before these requirements. - -# Type annotations for tabulate -types-tabulate - -# For testing -pytest==9.0.3 -pytest-cov>=7.1.0 - -# For development -mypy==2.1.0 -ruff==0.15.15 - -# Documentation build -# Disabled for now since we don't need them -# sphinx -# sphinx-rtd-theme diff --git a/setup.py b/setup.py deleted file mode 100755 index fbb7f4a..0000000 --- a/setup.py +++ /dev/null @@ -1,52 +0,0 @@ -#! /usr/bin/env python - -from os import path - -from setuptools import find_packages, setup - -from seqscore import __version__ - - -def setup_package() -> None: - root = path.abspath(path.dirname(__file__)) - with open(path.join(root, "README.md"), encoding="utf-8") as f: - long_description = f.read() - - setup( - name="seqscore", - version=__version__, - packages=find_packages(include=("seqscore", "seqscore.*")), - # Package type information - package_data={"seqscore": ["py.typed"]}, - python_requires=">=3.10", - license="MIT", - description="SeqScore: Scoring for named entity recognition and other sequence labeling tasks", - long_description=long_description, - install_requires=[ - "attrs>=19.2.0", - "click", - "tabulate", - ], - entry_points=""" - [console_scripts] - seqscore=seqscore.scripts.seqscore:cli - """, - classifiers=[ - "Development Status :: 4 - Beta", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3.14", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - ], - url="https://github.com/bltlab/seqscore", - long_description_content_type="text/markdown", - author="Constantine Lignos", - author_email="lignos@brandeis.edu", - ) - - -if __name__ == "__main__": - setup_package() diff --git a/tests/pre_commit.sh b/tests/pre_commit.sh index 411b6f3..3b76d84 100755 --- a/tests/pre_commit.sh +++ b/tests/pre_commit.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euxo pipefail -files=(seqscore/ tests/ *.py) +files=(seqscore/ tests/) ruff check --fix "${files[@]}" ruff check --select I --fix "${files[@]}" # Organize imports ruff format "${files[@]}" From d0b029b83c4cf6a42b3e66106e7f802064930c49 Mon Sep 17 00:00:00 2001 From: Constantine Lignos Date: Thu, 4 Jun 2026 10:50:14 -0400 Subject: [PATCH 2/8] Add flowmark for markdown autoformatting --- pyproject.toml | 1 + tests/pre_commit.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 23866eb..a6256ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ dev = [ "pytest-cov>=7.1.0", "mypy==2.1.0", "ruff==0.15.15", + "flowmark", ] [tool.setuptools.dynamic] diff --git a/tests/pre_commit.sh b/tests/pre_commit.sh index 3b76d84..1967c0a 100755 --- a/tests/pre_commit.sh +++ b/tests/pre_commit.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash set -euxo pipefail +flowmark -i --nobackup *.md files=(seqscore/ tests/) ruff check --fix "${files[@]}" ruff check --select I --fix "${files[@]}" # Organize imports From 4a8c64c6cc38fefc584992460363c325c3d25eb1 Mon Sep 17 00:00:00 2001 From: Constantine Lignos Date: Thu, 4 Jun 2026 10:50:29 -0400 Subject: [PATCH 3/8] Autoformat README --- README.md | 212 ++++++++++++++++++++------------------------ tests/pre_commit.sh | 2 +- 2 files changed, 95 insertions(+), 119 deletions(-) diff --git a/README.md b/README.md index 43b68ad..afb832d 100644 --- a/README.md +++ b/README.md @@ -5,24 +5,22 @@ [![image](https://img.shields.io/pypi/l/seqscore.svg)](https://pypi.python.org/pypi/seqscore) [![image](https://img.shields.io/pypi/pyversions/seqscore.svg)](https://pypi.python.org/pypi/seqscore) -SeqScore provides scoring for named entity recognition and other -chunking tasks evaluated over sequence labels. +SeqScore provides scoring for named entity recognition and other chunking tasks +evaluated over sequence labels. -SeqScore is maintained by the BLT Lab at Brandeis University. Please -open an issue if you find incorrect behavior or features you would like -to see added. Due to the risk of introducing regressions or incorrect -scoring behavior, *we generally do not accept pull requests*. Please do not -open a pull request unless you are asked to do so by a maintainer in an -issue. +SeqScore is maintained by the BLT Lab at Brandeis University. Please open an issue if +you find incorrect behavior or features you would like to see added. Due to the risk of +introducing regressions or incorrect scoring behavior, *we generally do not accept pull +requests*. Please do not open a pull request unless you are asked to do so by a +maintainer in an issue. ## Installation -To install the latest official release of SeqScore, run: `pip install seqscore`. -This will install the package and add the command `seqscore` in your Python -environment. +To install the latest official release of SeqScore, run: `pip install seqscore`. This +will install the package and add the command `seqscore` in your Python environment. -SeqScore requires Python 3.10 or higher. It is tested on Python 3.10, 3.11, 3.12, -3.13, and 3.14. +SeqScore requires Python 3.10 or higher. It is tested on Python 3.10, 3.11, 3.12, 3.13, +and 3.14. ## License @@ -78,7 +76,6 @@ Other papers related to SeqScore include: * [Toward More Meaningful Resources for Lower-resourced Languages](https://aclanthology.org/2022.findings-acl.44/) * [CoNLL#: Fine-grained Error Analysis and a Corrected Test Set for CoNLL-03 English](https://aclanthology.org/2024.lrec-main.330/) - # Usage ## Overview @@ -108,10 +105,9 @@ Commands: ## Scoring -The most common application of SeqScore is scoring CoNLL-format NER -predictions. Let's assume you have two files, one containing the -correct labels (annotation) and the other containing the predictions -(system output). +The most common application of SeqScore is scoring CoNLL-format NER predictions. Let's +assume you have two files, one containing the correct labels (annotation) and the other +containing the predictions (system output). The correct labels are in the file [samples/reference.bio](samples/reference.bio): @@ -132,7 +128,6 @@ Philadelphia I-LOC , O Pennsylvania B-LOC . O - ``` The predictions are in the file [samples/predicted.bio](samples/predicted.bio): @@ -154,7 +149,6 @@ Philadelphia B-LOC , O Pennsylvania B-LOC . O - ``` To score the predictions, run: @@ -171,27 +165,23 @@ To score the predictions, run: A few things to note: * The reference file must be specified with the `--reference` flag. -* The chunk encoding (BIO, BIOES, etc.) must be specified using the - `--labels` flag. -* Both files need to use the same chunk encoding. If you have - files that use different chunk encodings, use the `convert` command. -* You can get output in different formats using the `--score-format` - flag. Using `--score-format delim` will produce tab-delimited - output. In the delimited format, you can specify the `--full-precision` - flag to output higher numerical precision. -* In the default (pretty) output format, numbers are rounded "half up" - at two decimal places. In other words, 57.124 will round to 57.12, - and 57.125 will round to 57.13. This is different than the "half even" - rounding used by `conlleval` and other libraries that rely on `printf` - behavior for rounding. Half up rounding is used as it is more likely to - match the rounding a user would perform if shown three decimal places. - If you request `conlleval` output format, the same rounding used by +* The chunk encoding (BIO, BIOES, etc.) must be specified using the `--labels` flag. +* Both files need to use the same chunk encoding. If you have files that use different + chunk encodings, use the `convert` command. +* You can get output in different formats using the `--score-format` flag. Using + `--score-format delim` will produce tab-delimited output. In the delimited format, you + can specify the `--full-precision` flag to output higher numerical precision. +* In the default (pretty) output format, numbers are rounded "half up" at two decimal + places. In other words, 57.124 will round to 57.12, and 57.125 will round to 57.13. + This is different than the "half even" rounding used by `conlleval` and other + libraries that rely on `printf` behavior for rounding. Half up rounding is used as it + is more likely to match the rounding a user would perform if shown three decimal + places. If you request `conlleval` output format, the same rounding used by `conlleval` will be used. -The above scoring command will work for files that do not have any -invalid transitions, that is, those that perfectly follow what the -encoding allows. However, consider this BIO-encoded file, -[samples/invalid.bio](samples/invalid.bio): +The above scoring command will work for files that do not have any invalid transitions, +that is, those that perfectly follow what the encoding allows. However, consider this +BIO-encoded file, [samples/invalid.bio](samples/invalid.bio): ``` This O @@ -210,11 +200,10 @@ Philadelphia I-LOC , O Pennsylvania B-LOC . O - ``` -Note that the token `University` has the label `I-ORG`, but there is -no preceding `B-ORG`. If we score it as before with +Note that the token `University` has the label `I-ORG`, but there is no preceding +`B-ORG`. If we score it as before with `seqscore score --labels BIO --reference samples/reference.bio samples/invalid.bio`, scoring will fail: @@ -223,10 +212,9 @@ seqscore.encoding.EncodingError: Stopping due to validation errors in invalid.bi Invalid transition 'O' -> 'I-ORG' for token 'University' on line 7 ``` -To score output with invalid transitions, we need to specify a repair -method which can correct them. We can tell SeqScore to use the same -approach that conlleval uses (which we refer to as "begin" repair in our -paper): +To score output with invalid transitions, we need to specify a repair method which can +correct them. We can tell SeqScore to use the same approach that conlleval uses (which +we refer to as "begin" repair in our paper): `seqscore score --labels BIO --repair-method conlleval --reference samples/reference.bio samples/invalid.bio`: ``` @@ -242,8 +230,8 @@ New: ('B-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'B-LOC', 'I-LOC', 'O', 'B-LOC', 'O') | ORG | 100.00 | 100.00 | 100.00 | 1 | 1 | 1 | ``` -You can use the `-q` flag to suppress the logging of all of the repairs -applied. For example, running the command +You can use the `-q` flag to suppress the logging of all of the repairs applied. For +example, running the command `seqscore score -q --labels BIO --repair-method conlleval --reference samples/reference.bio samples/invalid.bio` will hide the repairs: @@ -255,13 +243,12 @@ will hide the repairs: | ORG | 100.00 | 100.00 | 100.00 | 1 | 1 | 1 | ``` -You may want to also explore the `discard` repair, which can -produce higher scores for output from models without a CRF/constrained -decoding as they are more likely to produce invalid transitions. +You may want to also explore the `discard` repair, which can produce higher scores for +output from models without a CRF/constrained decoding as they are more likely to produce +invalid transitions. -SeqScore can also display all errors (false positives and false negatives) -encountered in scoring using the `--error-counts` flag. For example, running the -command +SeqScore can also display all errors (false positives and false negatives) encountered +in scoring using the `--error-counts` flag. For example, running the command `seqscore score --labels BIO --error-counts --reference samples/reference.bio samples/predicted.bio` will produce the following output: @@ -273,10 +260,10 @@ will produce the following output: | 1 | FN | LOC | West Philadelphia | ``` -The output shows that the system produced two false positives and missed one -mention in the reference (false negative). The most frequent errors appear at -the top. The `--error-counts` flag can be combined with `--score-format delim` -to write a delimited table that can be read as a spreadsheet. +The output shows that the system produced two false positives and missed one mention in +the reference (false negative). The most frequent errors appear at the top. The +`--error-counts` flag can be combined with `--score-format delim` to write a delimited +table that can be read as a spreadsheet. ## Validation @@ -290,7 +277,7 @@ No errors found in 0 tokens, 2 sequences, and 1 documents in reference.bio For the example of the [samples/invalid.bio](samples/invalid.bio), we can run `seqscore validate --labels BIO samples/invalid.bio`: - ``` +``` Encountered 1 errors in 1 tokens, 2 sequences, and 1 documents in invalid.bio Invalid transition 'O' -> 'I-ORG' for token 'University' on line 7 ``` @@ -299,8 +286,8 @@ Invalid transition 'O' -> 'I-ORG' for token 'University' on line 7 We can convert a file from one chunk encoding to another. For example, `seqscore convert --input-labels BIO --output-labels BIOES samples/reference.bio samples/reference.bioes` -will read [samples/reference.bio](samples/reference.bio) in BIO -encoding and write the BIOES-converted file to [samples/reference.bioes](samples/reference.bioes): +will read [samples/reference.bio](samples/reference.bio) in BIO encoding and write the +BIOES-converted file to [samples/reference.bioes](samples/reference.bioes): ``` This O @@ -319,7 +306,6 @@ Philadelphia E-LOC , O Pennsylvania S-LOC . O - ``` We can get a list of available chunk encodings by running `seqscore convert --help`: @@ -341,12 +327,11 @@ Options: ## Repair -We can also apply repair methods to a file, creating an output file -with only valid transitions. For example, we can run +We can also apply repair methods to a file, creating an output file with only valid +transitions. For example, we can run `seqscore repair --labels BIO --repair-method conlleval samples/invalid.bio samples/invalid_repair_conlleval.bio`, which will apply the conlleval repair method to the -[samples/invalid.bio](samples/invalid.bio) and write the repaired -labels to +[samples/invalid.bio](samples/invalid.bio) and write the repaired labels to [samples/invalid_repair_conlleval.bio](samples/invalid_repair_conlleval.bio): ``` @@ -366,12 +351,12 @@ Philadelphia I-LOC , O Pennsylvania B-LOC . O - ``` If we want to apply the discard repair method, we can run `seqscore repair --labels BIO --repair-method discard samples/invalid.bio samples/invalid_repair_discard.bio` -and the output will be written to [samples/invalid_repair_discard.bio](samples/invalid_repair_discard.bio): +and the output will be written to +[samples/invalid_repair_discard.bio](samples/invalid_repair_discard.bio): ``` This O @@ -390,18 +375,16 @@ Philadelphia I-LOC , O Pennsylvania B-LOC . O - ``` -Repairing the file before performing other operations is available in the -`count` and `summarize` subcommands. +Repairing the file before performing other operations is available in the `count` and +`summarize` subcommands. ## Summarize -The `summarize` subcommand can produce counts of the types of chunks -in the input file. For example, if we run -`seqscore summarize --labels BIO samples/reference.bio` -we get the following output: +The `summarize` subcommand can produce counts of the types of chunks in the input file. +For example, if we run `seqscore summarize --labels BIO samples/reference.bio` we get +the following output: ``` File 'samples/reference.bio' contains 1 document(s) with the following mentions: @@ -411,14 +394,13 @@ File 'samples/reference.bio' contains 1 document(s) with the following mentions: | ORG | 1 | ``` -If the quiet (`-q`) flag is provided, the first line giving the filename -and document count is not printed. +If the quiet (`-q`) flag is provided, the first line giving the filename and document +count is not printed. ## Count -The `count` subcommand can produce the counts of chunks in the input -file. Unlike `summarize`, it counts chunk-type pairs, not just types. -For example, if we run +The `count` subcommand can produce the counts of chunks in the input file. Unlike +`summarize`, it counts chunk-type pairs, not just types. For example, if we run `seqscore count --labels BIO samples/reference.bio --output-file counts.csv`, tab-delimited counts would be written to `counts.csv` as follows: @@ -433,18 +415,18 @@ standard output. However, you may encounter Unicode issues if your terminal is n configured properly. You can use the `--output-delim` argument to change the delimiter used in the counts. -The default delimiter of tab is strongly recommended, as there is no escaping or -quoting of the names in the output. +The default delimiter of tab is strongly recommended, as there is no escaping or quoting +of the names in the output. ## Process -The `process` subcommand can remove entity types from a file or map them to -other types. Removing types can be performed by specifying one of `--keep-types` -or `--remove-types`. +The `process` subcommand can remove entity types from a file or map them to other types. +Removing types can be performed by specifying one of `--keep-types` or `--remove-types`. For example, if we wanted to keep only the ORG type, we could run: `seqscore process --labels BIO --keep-types ORG samples/reference.bio samples/keep_ORG.bio`, -and the following output will be written to [samples/keep_ORG.bio](samples/keep_ORG.bio): +and the following output will be written to +[samples/keep_ORG.bio](samples/keep_ORG.bio): ``` This O @@ -468,11 +450,12 @@ Pennsylvania O You can also keep multiple types by specifying a comma-separated list of types: `--keep-types LOC,ORG`. -Instead of specifying which types to keep, we can also specify which types to -remove using `--remove-types`. For example, if we wanted to remove only the -ORG type, we could run: +Instead of specifying which types to keep, we can also specify which types to remove +using `--remove-types`. For example, if we wanted to remove only the ORG type, we could +run: `seqscore process --labels BIO --remove-types ORG samples/reference.bio samples/remove_ORG.bio`, -and the following output will be written to [samples/remove_ORG.bio](samples/remove_ORG.bio): +and the following output will be written to +[samples/remove_ORG.bio](samples/remove_ORG.bio): ``` This O @@ -496,10 +479,9 @@ Pennsylvania B-LOC As with keep, you can specify multiple tags to remove, for example `--remove-types LOC,ORG`. -The `--type-map` argument allows you to specify a JSON file that specifies a -mapping between types and other types. Suppose you want to collapse several -types into a more generic NAME type. In that case, the type map would be -specified as follows: +The `--type-map` argument allows you to specify a JSON file that specifies a mapping +between types and other types. Suppose you want to collapse several types into a more +generic NAME type. In that case, the type map would be specified as follows: ``` { @@ -507,9 +489,9 @@ specified as follows: } ``` -The type map must be a JSON dictionary. The keys are the types to be mapped to, -while the value for each key is a list of types to be mapped from. Note that -the value must always be a list, even if it would only contain one element. +The type map must be a JSON dictionary. The keys are the types to be mapped to, while +the value for each key is a list of types to be mapped from. Note that the value must +always be a list, even if it would only contain one element. We can apply the above type map to a file using the following command: `seqscore process --labels BIO --type-map samples/type_map_NAME.json samples/reference.bio samples/all_NAME.bio`, @@ -534,9 +516,8 @@ Pennsylvania B-NAME . O ``` -When `--type-map` is specified at the same time as `--keep-types` or -`--remove-types`, the type mapping is applied **before** the keep/remove -filtering is applied. +When `--type-map` is specified at the same time as `--keep-types` or `--remove-types`, +the type mapping is applied **before** the keep/remove filtering is applied. ## Text extraction @@ -555,14 +536,12 @@ University of Pennsylvania is in West Philadelphia , Pennsylvania . Each sentence is written on one line with space-delimited tokens. - # FAQ ## Why can't I score output files that are in the format `conlleval` expects? -SeqScore intentionally does not support the "merged" -format used by `conlleval` where each line contains a token, correct -tag, and predicted tag: +SeqScore intentionally does not support the "merged" format used by `conlleval` where +each line contains a token, correct tag, and predicted tag: ``` University B-ORG B-ORG @@ -577,23 +556,21 @@ Pennsylvania B-LOC B-LOC . O O ``` -We do not support this format because we have found that creating -predictions in this format is a common source of errors in scoring -pipelines. +We do not support this format because we have found that creating predictions in this +format is a common source of errors in scoring pipelines. ## When do I need to specify the `--labels` argument? -The `--labels` argument must be specified for commands where knowing the label -encoding is essential to getting correct answers. These commands are `validate`, -`repair`, and `score`. For all other commands, `--labels BIO` is assumed by -default but can be overridden. +The `--labels` argument must be specified for commands where knowing the label encoding +is essential to getting correct answers. These commands are `validate`, `repair`, and +`score`. For all other commands, `--labels BIO` is assumed by default but can be +overridden. # Development The following instructions are for the project maintainers only. -For development, check out the `dev` branch (latest, but less tested -than `main`). +For development, check out the `dev` branch (latest, but less tested than `main`). ## Setting up an environment for development @@ -611,8 +588,7 @@ than `main`). # Contributors -SeqScore was developed by the BLT Lab at Brandeis University under the -direction of PI and lead developer Constantine Lignos. Chester -Palen-Michel, Nolan Holley, and Claire Wang contributed to its -development. Gordon Dou, Maya Kruse, and Andrew Rueda gave feedback -on its features and assisted in README writing. +SeqScore was developed by the BLT Lab at Brandeis University under the direction of PI +and lead developer Constantine Lignos. Chester Palen-Michel, Nolan Holley, and Claire +Wang contributed to its development. Gordon Dou, Maya Kruse, and Andrew Rueda gave +feedback on its features and assisted in README writing. diff --git a/tests/pre_commit.sh b/tests/pre_commit.sh index 1967c0a..abd3ec1 100755 --- a/tests/pre_commit.sh +++ b/tests/pre_commit.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euxo pipefail -flowmark -i --nobackup *.md +flowmark -i --nobackup ./*.md files=(seqscore/ tests/) ruff check --fix "${files[@]}" ruff check --select I --fix "${files[@]}" # Organize imports From 614a9dfefbd53a579884214c9ed289b3067697a2 Mon Sep 17 00:00:00 2001 From: Constantine Lignos Date: Thu, 4 Jun 2026 10:58:06 -0400 Subject: [PATCH 4/8] Enable build on dev* branches --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ebf8a1e..d380885 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -4,11 +4,11 @@ on: push: branches: - main - - dev + - dev* pull_request: branches: - main - - dev + - dev* jobs: build: From 01af545d500d0e9803aa85f8d4531ca841c1bcdf Mon Sep 17 00:00:00 2001 From: Constantine Lignos Date: Thu, 4 Jun 2026 11:02:03 -0400 Subject: [PATCH 5/8] Update build to use pyproject.toml --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d380885..0bc5ba2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -43,7 +43,7 @@ jobs: - name: Install quality check dependencies run: | - pip install -r requirements.txt + pip install ".[dev]" - name: Run quality checks run: | From fc3ed150775ddcb1d320329a8357378ade33b6db Mon Sep 17 00:00:00 2001 From: Constantine Lignos Date: Thu, 4 Jun 2026 11:06:10 -0400 Subject: [PATCH 6/8] Pin version of pytest-cov --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a6256ca..94f89c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ seqscore = "seqscore.scripts.seqscore:cli" dev = [ "types-tabulate", "pytest==9.0.3", - "pytest-cov>=7.1.0", + "pytest-cov==7.1.0", "mypy==2.1.0", "ruff==0.15.15", "flowmark", From 35dbd792b9340802469358371793ce15984d04fc Mon Sep 17 00:00:00 2001 From: Constantine Lignos Date: Thu, 4 Jun 2026 11:08:44 -0400 Subject: [PATCH 7/8] Update check.sh for removal of setup.py --- tests/check.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/check.sh b/tests/check.sh index 1f05a83..7fb6ba5 100755 --- a/tests/check.sh +++ b/tests/check.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash set -euxo pipefail -files=(seqscore/ tests/ setup.py) +files=(seqscore/ tests/) ruff check "${files[@]}" mypy "${files[@]}" From 23d1335a4a52db91020a74c4c5d12d32b4d34f49 Mon Sep 17 00:00:00 2001 From: Constantine Lignos Date: Thu, 4 Jun 2026 11:22:49 -0400 Subject: [PATCH 8/8] Add release script --- README.md | 10 ++++++++ pyproject.toml | 2 ++ scripts/release.sh | 57 ++++++++++++++++++++++++++++++++++++++++++++++ tests/check.sh | 1 + 4 files changed, 70 insertions(+) create mode 100755 scripts/release.sh diff --git a/README.md b/README.md index afb832d..9b1320c 100644 --- a/README.md +++ b/README.md @@ -586,6 +586,16 @@ For development, check out the `dev` branch (latest, but less tested than `main` 3. Install seqscore: `pip install -e .` 4. Install development dependencies: `pip install -e ".[dev]"` +## Release + +The release script is located at `scripts/release.sh` and can only be used by project +maintainers. To make a release: + +1. Make sure `__version__` is up to date in `seqscore/__init__.py`. +2. Make sure you are on the main branch with no uncommitted changes. +3. Run `scripts/release.sh`. If anything goes wrong between tagging and releasing, you + will have to delete the tag on GitHub and try again. + # Contributors SeqScore was developed by the BLT Lab at Brandeis University under the direction of PI diff --git a/pyproject.toml b/pyproject.toml index 94f89c1..38cb8e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,8 @@ dev = [ "mypy==2.1.0", "ruff==0.15.15", "flowmark", + "build", + "twine", ] [tool.setuptools.dynamic] diff --git a/scripts/release.sh b/scripts/release.sh new file mode 100755 index 0000000..13fa608 --- /dev/null +++ b/scripts/release.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +# Builds, uploads to PyPI, and tags the release. +# Should only be run by project maintainers. +set -euo pipefail + +VENV=".venv/bin" + +# Run pre-commit checks +bash tests/check.sh + +# Must be on main with a clean working tree +current_branch=$(git rev-parse --abbrev-ref HEAD) +if [[ "$current_branch" != "main" ]]; then + echo "Error: must be on main branch (currently on '$current_branch')" + exit 1 +fi + +if ! git diff --quiet || ! git diff --cached --quiet; then + echo "Error: working tree is not clean" + exit 1 +fi + +# Read version from package +version=$("$VENV/python" -c "import seqscore; print(seqscore.__version__)") +tag="v$version" + +# Abort if tag already exists +if git rev-parse "$tag" >/dev/null 2>&1; then + echo "Error: tag $tag already exists. Update __version__ in seqscore/__init__.py." + exit 1 +fi + +echo "Releasing $tag" + +# Build +rm -rf dist/ +"$VENV/python" -m build + +# Tag and push +git tag "$tag" +git push origin "$tag" + +# Prompt to verify tag before uploading +echo "" +echo "Tag $tag pushed. Check the release on GitHub before uploading to PyPI:" +echo " https://github.com/bltlab/seqscore/releases/tag/$tag" +echo "" +read -r -p "Upload to PyPI? [y/N] " confirm +if [[ "${confirm,,}" != "y" ]]; then + echo "Aborted. Re-run this script to retry the upload." + exit 1 +fi + +# Upload to PyPI +"$VENV/twine" upload dist/* + +echo "Done: $tag released and pushed" diff --git a/tests/check.sh b/tests/check.sh index 7fb6ba5..e544ad2 100755 --- a/tests/check.sh +++ b/tests/check.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash set -euxo pipefail +flowmark --check ./*.md files=(seqscore/ tests/) ruff check "${files[@]}" mypy "${files[@]}"