From e5c34ec6910ebb34511d0040684677012547624d Mon Sep 17 00:00:00 2001
From: Constantine Lignos <lignos@brandeis.edu>
Date: Thu, 4 Jun 2026 10:41:01 -0400
Subject: [PATCH 1/8] Move all setup to pyproject.toml and add uv instructions

---
 README.md           | 12 ++++++----
 pyproject.toml      | 55 ++++++++++++++++++++++++++++++++++++++++++++-
 requirements.txt    | 19 ----------------
 setup.py            | 52 ------------------------------------------
 tests/pre_commit.sh |  2 +-
 5 files changed, 63 insertions(+), 77 deletions(-)
 delete mode 100644 requirements.txt
 delete mode 100755 setup.py

diff --git a/README.md b/README.md
index 6fc280d..43b68ad 100644
--- a/README.md
+++ b/README.md
@@ -595,15 +595,19 @@ The following instructions are for the project maintainers only.
 For development, check out the `dev` branch (latest, but less tested
 than `main`).
 
-To install from a clone of this repository, use:
-`pip install -e .`
-
 ## Setting up an environment for development
 
+### Using uv
+
+1. Create an environment: `uv venv --python 3.10 .venv`
+2. Install seqscore and development dependencies: `uv pip install -e ".[dev]"`
+
+### Using conda
+
 1. Create an environment: `conda create -yn seqscore python=3.10`
 2. Activate the environment: `conda activate seqscore`
 3. Install seqscore: `pip install -e .`
-4. Install development dependencies: `pip install -r requirements.txt`
+4. Install development dependencies: `pip install -e ".[dev]"`
 
 # Contributors
 
diff --git a/pyproject.toml b/pyproject.toml
index 212b567..23866eb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,57 @@
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "seqscore"
+dynamic = ["version"]
+description = "SeqScore: Scoring for named entity recognition and other sequence labeling tasks"
+readme = "README.md"
+license = {text = "MIT"}
+authors = [
+    {name = "Constantine Lignos", email = "lignos@brandeis.edu"},
+]
+requires-python = ">=3.10"
+dependencies = [
+    "attrs>=19.2.0",
+    "click",
+    "tabulate",
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+]
+
+[project.urls]
+Homepage = "https://github.com/bltlab/seqscore"
+
+[project.scripts]
+seqscore = "seqscore.scripts.seqscore:cli"
+
+[project.optional-dependencies]
+dev = [
+    "types-tabulate",
+    "pytest==9.0.3",
+    "pytest-cov>=7.1.0",
+    "mypy==2.1.0",
+    "ruff==0.15.15",
+]
+
+[tool.setuptools.dynamic]
+version = {attr = "seqscore.__version__"}
+
+[tool.setuptools.packages.find]
+include = ["seqscore", "seqscore.*"]
+
+[tool.setuptools.package-data]
+seqscore = ["py.typed"]
+
 [tool.mypy]
 python_version = "3.10"
 strict_optional = false
@@ -6,7 +60,6 @@ disallow_untyped_calls = true
 
 [[tool.mypy.overrides]]
 module = [
-    "setuptools",
     "click.*",
 ]
 ignore_missing_imports = true
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 8ac2e1b..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# This file only contains dependencies needed for development.
-# setup.py contains the actual package dependencies, and the package
-# should be installed before these requirements.
-
-# Type annotations for tabulate
-types-tabulate
-
-# For testing
-pytest==9.0.3
-pytest-cov>=7.1.0
-
-# For development
-mypy==2.1.0
-ruff==0.15.15
-
-# Documentation build
-# Disabled for now since we don't need them
-# sphinx
-# sphinx-rtd-theme
diff --git a/setup.py b/setup.py
deleted file mode 100755
index fbb7f4a..0000000
--- a/setup.py
+++ /dev/null
@@ -1,52 +0,0 @@
-#! /usr/bin/env python
-
-from os import path
-
-from setuptools import find_packages, setup
-
-from seqscore import __version__
-
-
-def setup_package() -> None:
-    root = path.abspath(path.dirname(__file__))
-    with open(path.join(root, "README.md"), encoding="utf-8") as f:
-        long_description = f.read()
-
-    setup(
-        name="seqscore",
-        version=__version__,
-        packages=find_packages(include=("seqscore", "seqscore.*")),
-        # Package type information
-        package_data={"seqscore": ["py.typed"]},
-        python_requires=">=3.10",
-        license="MIT",
-        description="SeqScore: Scoring for named entity recognition and other sequence labeling tasks",
-        long_description=long_description,
-        install_requires=[
-            "attrs>=19.2.0",
-            "click",
-            "tabulate",
-        ],
-        entry_points="""
-            [console_scripts]
-            seqscore=seqscore.scripts.seqscore:cli
-        """,
-        classifiers=[
-            "Development Status :: 4 - Beta",
-            "License :: OSI Approved :: MIT License",
-            "Programming Language :: Python :: 3.10",
-            "Programming Language :: Python :: 3.11",
-            "Programming Language :: Python :: 3.12",
-            "Programming Language :: Python :: 3.13",
-            "Programming Language :: Python :: 3.14",
-            "Topic :: Scientific/Engineering :: Artificial Intelligence",
-        ],
-        url="https://github.com/bltlab/seqscore",
-        long_description_content_type="text/markdown",
-        author="Constantine Lignos",
-        author_email="lignos@brandeis.edu",
-    )
-
-
-if __name__ == "__main__":
-    setup_package()
diff --git a/tests/pre_commit.sh b/tests/pre_commit.sh
index 411b6f3..3b76d84 100755
--- a/tests/pre_commit.sh
+++ b/tests/pre_commit.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 set -euxo pipefail
 
-files=(seqscore/ tests/ *.py)
+files=(seqscore/ tests/)
 ruff check --fix "${files[@]}"
 ruff check --select I --fix "${files[@]}"  # Organize imports
 ruff format "${files[@]}"

From d0b029b83c4cf6a42b3e66106e7f802064930c49 Mon Sep 17 00:00:00 2001
From: Constantine Lignos <lignos@brandeis.edu>
Date: Thu, 4 Jun 2026 10:50:14 -0400
Subject: [PATCH 2/8] Add flowmark for markdown autoformatting

---
 pyproject.toml      | 1 +
 tests/pre_commit.sh | 1 +
 2 files changed, 2 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 23866eb..a6256ca 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,6 +41,7 @@ dev = [
     "pytest-cov>=7.1.0",
     "mypy==2.1.0",
     "ruff==0.15.15",
+    "flowmark",
 ]
 
 [tool.setuptools.dynamic]
diff --git a/tests/pre_commit.sh b/tests/pre_commit.sh
index 3b76d84..1967c0a 100755
--- a/tests/pre_commit.sh
+++ b/tests/pre_commit.sh
@@ -1,6 +1,7 @@
 #!/usr/bin/env bash
 set -euxo pipefail
 
+flowmark -i --nobackup *.md
 files=(seqscore/ tests/)
 ruff check --fix "${files[@]}"
 ruff check --select I --fix "${files[@]}"  # Organize imports

From 4a8c64c6cc38fefc584992460363c325c3d25eb1 Mon Sep 17 00:00:00 2001
From: Constantine Lignos <lignos@brandeis.edu>
Date: Thu, 4 Jun 2026 10:50:29 -0400
Subject: [PATCH 3/8] Autoformat README

---
 README.md           | 212 ++++++++++++++++++++------------------------
 tests/pre_commit.sh |   2 +-
 2 files changed, 95 insertions(+), 119 deletions(-)

diff --git a/README.md b/README.md
index 43b68ad..afb832d 100644
--- a/README.md
+++ b/README.md
@@ -5,24 +5,22 @@
 [![image](https://img.shields.io/pypi/l/seqscore.svg)](https://pypi.python.org/pypi/seqscore)
 [![image](https://img.shields.io/pypi/pyversions/seqscore.svg)](https://pypi.python.org/pypi/seqscore)
 
-SeqScore provides scoring for named entity recognition and other
-chunking tasks evaluated over sequence labels.
+SeqScore provides scoring for named entity recognition and other chunking tasks
+evaluated over sequence labels.
 
-SeqScore is maintained by the BLT Lab at Brandeis University. Please
-open an issue if you find incorrect behavior or features you would like
-to see added. Due to the risk of introducing regressions or incorrect
-scoring behavior, *we generally do not accept pull requests*. Please do not
-open a pull request unless you are asked to do so by a maintainer in an
-issue.
+SeqScore is maintained by the BLT Lab at Brandeis University. Please open an issue if
+you find incorrect behavior or features you would like to see added. Due to the risk of
+introducing regressions or incorrect scoring behavior, *we generally do not accept pull
+requests*. Please do not open a pull request unless you are asked to do so by a
+maintainer in an issue.
 
 ## Installation
 
-To install the latest official release of SeqScore, run: `pip install seqscore`.
-This will install the package and add the command `seqscore` in your Python
-environment.
+To install the latest official release of SeqScore, run: `pip install seqscore`. This
+will install the package and add the command `seqscore` in your Python environment.
 
-SeqScore requires Python 3.10 or higher. It is tested on Python 3.10, 3.11, 3.12,
-3.13, and 3.14.
+SeqScore requires Python 3.10 or higher. It is tested on Python 3.10, 3.11, 3.12, 3.13,
+and 3.14.
 
 ## License
 
@@ -78,7 +76,6 @@ Other papers related to SeqScore include:
 * [Toward More Meaningful Resources for Lower-resourced Languages](https://aclanthology.org/2022.findings-acl.44/)
 * [CoNLL#: Fine-grained Error Analysis and a Corrected Test Set for CoNLL-03 English](https://aclanthology.org/2024.lrec-main.330/)
 
-
 # Usage
 
 ## Overview
@@ -108,10 +105,9 @@ Commands:
 
 ## Scoring
 
-The most common application of SeqScore is scoring CoNLL-format NER
-predictions. Let's assume you have two files, one containing the
-correct labels (annotation) and the other containing the predictions
-(system output).
+The most common application of SeqScore is scoring CoNLL-format NER predictions. Let's
+assume you have two files, one containing the correct labels (annotation) and the other
+containing the predictions (system output).
 
 The correct labels are in the file [samples/reference.bio](samples/reference.bio):
 
@@ -132,7 +128,6 @@ Philadelphia I-LOC
 , O
 Pennsylvania B-LOC
 . O
-
 ```
 
 The predictions are in the file [samples/predicted.bio](samples/predicted.bio):
@@ -154,7 +149,6 @@ Philadelphia B-LOC
 , O
 Pennsylvania B-LOC
 . O
-
 ```
 
 To score the predictions, run:
@@ -171,27 +165,23 @@ To score the predictions, run:
 A few things to note:
 
 * The reference file must be specified with the `--reference` flag.
-* The chunk encoding (BIO, BIOES, etc.) must be specified using the
-  `--labels` flag.
-* Both files need to use the same chunk encoding. If you have
-  files that use different chunk encodings, use the `convert` command.
-* You can get output in different formats using the `--score-format`
-  flag. Using `--score-format delim` will produce tab-delimited
-  output. In the delimited format, you can specify the `--full-precision`
-  flag to output higher numerical precision.
-* In the default (pretty) output format, numbers are rounded "half up"
-  at two decimal places. In other words, 57.124 will round to 57.12,
-  and 57.125 will round to 57.13. This is different than the "half even"
-  rounding used by `conlleval` and other libraries that rely on `printf`
-  behavior for rounding. Half up rounding is used as it is more likely to
-  match the rounding a user would perform if shown three decimal places.
-  If you request `conlleval` output format, the same rounding used by
+* The chunk encoding (BIO, BIOES, etc.) must be specified using the `--labels` flag.
+* Both files need to use the same chunk encoding. If you have files that use different
+  chunk encodings, use the `convert` command.
+* You can get output in different formats using the `--score-format` flag. Using
+  `--score-format delim` will produce tab-delimited output. In the delimited format, you
+  can specify the `--full-precision` flag to output higher numerical precision.
+* In the default (pretty) output format, numbers are rounded "half up" at two decimal
+  places. In other words, 57.124 will round to 57.12, and 57.125 will round to 57.13.
+  This is different than the "half even" rounding used by `conlleval` and other
+  libraries that rely on `printf` behavior for rounding. Half up rounding is used as it
+  is more likely to match the rounding a user would perform if shown three decimal
+  places. If you request `conlleval` output format, the same rounding used by
   `conlleval` will be used.
 
-The above scoring command will work for files that do not have any
-invalid transitions, that is, those that perfectly follow what the
-encoding allows. However, consider this BIO-encoded file,
-[samples/invalid.bio](samples/invalid.bio):
+The above scoring command will work for files that do not have any invalid transitions,
+that is, those that perfectly follow what the encoding allows. However, consider this
+BIO-encoded file, [samples/invalid.bio](samples/invalid.bio):
 
 ```
 This O
@@ -210,11 +200,10 @@ Philadelphia I-LOC
 , O
 Pennsylvania B-LOC
 . O
-
 ```
 
-Note that the token `University` has the label `I-ORG`, but there is
-no preceding `B-ORG`. If we score it as before with
+Note that the token `University` has the label `I-ORG`, but there is no preceding
+`B-ORG`. If we score it as before with
 `seqscore score --labels BIO --reference samples/reference.bio samples/invalid.bio`,
 scoring will fail:
 
@@ -223,10 +212,9 @@ seqscore.encoding.EncodingError: Stopping due to validation errors in invalid.bi
 Invalid transition 'O' -> 'I-ORG' for token 'University' on line 7
 ```
 
-To score output with invalid transitions, we need to specify a repair
-method which can correct them. We can tell SeqScore to use the same
-approach that conlleval uses (which we refer to as "begin" repair in our
-paper):
+To score output with invalid transitions, we need to specify a repair method which can
+correct them. We can tell SeqScore to use the same approach that conlleval uses (which
+we refer to as "begin" repair in our paper):
 `seqscore score --labels BIO --repair-method conlleval --reference samples/reference.bio samples/invalid.bio`:
 
 ```
@@ -242,8 +230,8 @@ New: ('B-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'B-LOC', 'I-LOC', 'O', 'B-LOC', 'O')
 | ORG    |      100.00 |   100.00 | 100.00 |           1 |           1 |         1 |
 ```
 
-You can use the `-q` flag to suppress the logging of all of the repairs
-applied. For example, running the command
+You can use the `-q` flag to suppress the logging of all of the repairs applied. For
+example, running the command
 `seqscore score -q --labels BIO --repair-method conlleval --reference samples/reference.bio samples/invalid.bio`
 will hide the repairs:
 
@@ -255,13 +243,12 @@ will hide the repairs:
 | ORG    |      100.00 |   100.00 | 100.00 |           1 |           1 |         1 |
 ```
 
-You may want to also explore the `discard` repair, which can
-produce higher scores for output from models without a CRF/constrained
-decoding as they are more likely to produce invalid transitions.
+You may want to also explore the `discard` repair, which can produce higher scores for
+output from models without a CRF/constrained decoding as they are more likely to produce
+invalid transitions.
 
-SeqScore can also display all errors (false positives and false negatives)
-encountered in scoring using the `--error-counts` flag. For example, running the
-command
+SeqScore can also display all errors (false positives and false negatives) encountered
+in scoring using the `--error-counts` flag. For example, running the command
 `seqscore score --labels BIO --error-counts --reference samples/reference.bio samples/predicted.bio`
 will produce the following output:
 
@@ -273,10 +260,10 @@ will produce the following output:
 |       1 | FN      | LOC    | West Philadelphia |
 ```
 
-The output shows that the system produced two false positives and missed one
-mention in the reference (false negative). The most frequent errors appear at
-the top. The `--error-counts` flag can be combined with `--score-format delim`
-to write a delimited table that can be read as a spreadsheet.
+The output shows that the system produced two false positives and missed one mention in
+the reference (false negative). The most frequent errors appear at the top. The
+`--error-counts` flag can be combined with `--score-format delim` to write a delimited
+table that can be read as a spreadsheet.
 
 ## Validation
 
@@ -290,7 +277,7 @@ No errors found in 0 tokens, 2 sequences, and 1 documents in reference.bio
 For the example of the [samples/invalid.bio](samples/invalid.bio), we can run
 `seqscore validate --labels BIO samples/invalid.bio`:
 
- ```
+```
 Encountered 1 errors in 1 tokens, 2 sequences, and 1 documents in invalid.bio
 Invalid transition 'O' -> 'I-ORG' for token 'University' on line 7
 ```
@@ -299,8 +286,8 @@ Invalid transition 'O' -> 'I-ORG' for token 'University' on line 7
 
 We can convert a file from one chunk encoding to another. For example,
 `seqscore convert --input-labels BIO --output-labels BIOES samples/reference.bio samples/reference.bioes`
-will read [samples/reference.bio](samples/reference.bio) in BIO
-encoding and write the BIOES-converted file to [samples/reference.bioes](samples/reference.bioes):
+will read [samples/reference.bio](samples/reference.bio) in BIO encoding and write the
+BIOES-converted file to [samples/reference.bioes](samples/reference.bioes):
 
 ```
 This O
@@ -319,7 +306,6 @@ Philadelphia E-LOC
 , O
 Pennsylvania S-LOC
 . O
-
 ```
 
 We can get a list of available chunk encodings by running `seqscore convert --help`:
@@ -341,12 +327,11 @@ Options:
 
 ## Repair
 
-We can also apply repair methods to a file, creating an output file
-with only valid transitions. For example, we can run
+We can also apply repair methods to a file, creating an output file with only valid
+transitions. For example, we can run
 `seqscore repair --labels BIO --repair-method conlleval samples/invalid.bio samples/invalid_repair_conlleval.bio`,
 which will apply the conlleval repair method to the
-[samples/invalid.bio](samples/invalid.bio) and write the repaired
-labels to
+[samples/invalid.bio](samples/invalid.bio) and write the repaired labels to
 [samples/invalid_repair_conlleval.bio](samples/invalid_repair_conlleval.bio):
 
 ```
@@ -366,12 +351,12 @@ Philadelphia I-LOC
 , O
 Pennsylvania B-LOC
 . O
-
 ```
 
 If we want to apply the discard repair method, we can run
 `seqscore repair --labels BIO --repair-method discard samples/invalid.bio samples/invalid_repair_discard.bio`
-and the output will be written to [samples/invalid_repair_discard.bio](samples/invalid_repair_discard.bio):
+and the output will be written to
+[samples/invalid_repair_discard.bio](samples/invalid_repair_discard.bio):
 
 ```
 This O
@@ -390,18 +375,16 @@ Philadelphia I-LOC
 , O
 Pennsylvania B-LOC
 . O
-
 ```
 
-Repairing the file before performing other operations is available in the
-`count` and `summarize` subcommands.
+Repairing the file before performing other operations is available in the `count` and
+`summarize` subcommands.
 
 ## Summarize
 
-The `summarize` subcommand can produce counts of the types of chunks
-in the input file. For example, if we run
-`seqscore summarize --labels BIO samples/reference.bio`
-we get the following output:
+The `summarize` subcommand can produce counts of the types of chunks in the input file.
+For example, if we run `seqscore summarize --labels BIO samples/reference.bio` we get
+the following output:
 
 ```
 File 'samples/reference.bio' contains 1 document(s) with the following mentions:
@@ -411,14 +394,13 @@ File 'samples/reference.bio' contains 1 document(s) with the following mentions:
 | ORG           |       1 |
 ```
 
-If the quiet (`-q`) flag is provided, the first line giving the filename
-and document count is not printed.
+If the quiet (`-q`) flag is provided, the first line giving the filename and document
+count is not printed.
 
 ## Count
 
-The `count` subcommand can produce the counts of chunks in the input
-file. Unlike `summarize`, it counts chunk-type pairs, not just types.
-For example, if we run
+The `count` subcommand can produce the counts of chunks in the input file. Unlike
+`summarize`, it counts chunk-type pairs, not just types. For example, if we run
 `seqscore count --labels BIO samples/reference.bio --output-file counts.csv`,
 tab-delimited counts would be written to `counts.csv` as follows:
 
@@ -433,18 +415,18 @@ standard output. However, you may encounter Unicode issues if your terminal is n
 configured properly.
 
 You can use the `--output-delim` argument to change the delimiter used in the counts.
-The default delimiter of tab is strongly recommended, as there is no escaping or
-quoting of the names in the output.
+The default delimiter of tab is strongly recommended, as there is no escaping or quoting
+of the names in the output.
 
 ## Process
 
-The `process` subcommand can remove entity types from a file or map them to
-other types. Removing types can be performed by specifying one of `--keep-types`
-or `--remove-types`.
+The `process` subcommand can remove entity types from a file or map them to other types.
+Removing types can be performed by specifying one of `--keep-types` or `--remove-types`.
 
 For example, if we wanted to keep only the ORG type, we could run:
 `seqscore process --labels BIO --keep-types ORG samples/reference.bio samples/keep_ORG.bio`,
-and the following output will be written to [samples/keep_ORG.bio](samples/keep_ORG.bio):
+and the following output will be written to
+[samples/keep_ORG.bio](samples/keep_ORG.bio):
 
 ```
 This O
@@ -468,11 +450,12 @@ Pennsylvania O
 You can also keep multiple types by specifying a comma-separated list of types:
 `--keep-types LOC,ORG`.
 
-Instead of specifying which types to keep, we can also specify which types to
-remove using `--remove-types`. For example, if we wanted to remove only the
-ORG type, we could run:
+Instead of specifying which types to keep, we can also specify which types to remove
+using `--remove-types`. For example, if we wanted to remove only the ORG type, we could
+run:
 `seqscore process --labels BIO --remove-types ORG samples/reference.bio samples/remove_ORG.bio`,
-and the following output will be written to [samples/remove_ORG.bio](samples/remove_ORG.bio):
+and the following output will be written to
+[samples/remove_ORG.bio](samples/remove_ORG.bio):
 
 ```
 This O
@@ -496,10 +479,9 @@ Pennsylvania B-LOC
 As with keep, you can specify multiple tags to remove, for example
 `--remove-types LOC,ORG`.
 
-The `--type-map` argument allows you to specify a JSON file that specifies a
-mapping between types and other types. Suppose you want to collapse several
-types into a more generic NAME type. In that case, the type map would be
-specified as follows:
+The `--type-map` argument allows you to specify a JSON file that specifies a mapping
+between types and other types. Suppose you want to collapse several types into a more
+generic NAME type. In that case, the type map would be specified as follows:
 
 ```
 {
@@ -507,9 +489,9 @@ specified as follows:
 }
 ```
 
-The type map must be a JSON dictionary. The keys are the types to be mapped to,
-while the value for each key is a list of types to be mapped from. Note that
-the value must always be a list, even if it would only contain one element.
+The type map must be a JSON dictionary. The keys are the types to be mapped to, while
+the value for each key is a list of types to be mapped from. Note that the value must
+always be a list, even if it would only contain one element.
 
 We can apply the above type map to a file using the following command:
 `seqscore process --labels BIO --type-map samples/type_map_NAME.json samples/reference.bio samples/all_NAME.bio`,
@@ -534,9 +516,8 @@ Pennsylvania B-NAME
 . O
 ```
 
-When `--type-map` is specified at the same time as `--keep-types` or
-`--remove-types`, the type mapping is applied **before** the keep/remove
-filtering is applied.
+When `--type-map` is specified at the same time as `--keep-types` or `--remove-types`,
+the type mapping is applied **before** the keep/remove filtering is applied.
 
 ## Text extraction
 
@@ -555,14 +536,12 @@ University of Pennsylvania is in West Philadelphia , Pennsylvania .
 
 Each sentence is written on one line with space-delimited tokens.
 
-
 # FAQ
 
 ## Why can't I score output files that are in the format `conlleval` expects?
 
-SeqScore intentionally does not support the "merged"
-format used by `conlleval` where each line contains a token, correct
-tag, and predicted tag:
+SeqScore intentionally does not support the "merged" format used by `conlleval` where
+each line contains a token, correct tag, and predicted tag:
 
 ```
 University B-ORG B-ORG
@@ -577,23 +556,21 @@ Pennsylvania B-LOC B-LOC
 . O O
 ```
 
-We do not support this format because we have found that creating
-predictions in this format is a common source of errors in scoring
-pipelines.
+We do not support this format because we have found that creating predictions in this
+format is a common source of errors in scoring pipelines.
 
 ## When do I need to specify the `--labels` argument?
 
-The `--labels` argument must be specified for commands where knowing the label
-encoding is essential to getting correct answers. These commands are `validate`,
-`repair`, and `score`. For all other commands, `--labels BIO` is assumed by
-default but can be overridden.
+The `--labels` argument must be specified for commands where knowing the label encoding
+is essential to getting correct answers. These commands are `validate`, `repair`, and
+`score`. For all other commands, `--labels BIO` is assumed by default but can be
+overridden.
 
 # Development
 
 The following instructions are for the project maintainers only.
 
-For development, check out the `dev` branch (latest, but less tested
-than `main`).
+For development, check out the `dev` branch (latest, but less tested than `main`).
 
 ## Setting up an environment for development
 
@@ -611,8 +588,7 @@ than `main`).
 
 # Contributors
 
-SeqScore was developed by the BLT Lab at Brandeis University under the
-direction of PI and lead developer Constantine Lignos. Chester
-Palen-Michel, Nolan Holley, and Claire Wang contributed to its
-development.  Gordon Dou, Maya Kruse, and Andrew Rueda gave feedback
-on its features and assisted in README writing.
+SeqScore was developed by the BLT Lab at Brandeis University under the direction of PI
+and lead developer Constantine Lignos. Chester Palen-Michel, Nolan Holley, and Claire
+Wang contributed to its development. Gordon Dou, Maya Kruse, and Andrew Rueda gave
+feedback on its features and assisted in README writing.
diff --git a/tests/pre_commit.sh b/tests/pre_commit.sh
index 1967c0a..abd3ec1 100755
--- a/tests/pre_commit.sh
+++ b/tests/pre_commit.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 set -euxo pipefail
 
-flowmark -i --nobackup *.md
+flowmark -i --nobackup ./*.md
 files=(seqscore/ tests/)
 ruff check --fix "${files[@]}"
 ruff check --select I --fix "${files[@]}"  # Organize imports

From 614a9dfefbd53a579884214c9ed289b3067697a2 Mon Sep 17 00:00:00 2001
From: Constantine Lignos <lignos@brandeis.edu>
Date: Thu, 4 Jun 2026 10:58:06 -0400
Subject: [PATCH 4/8] Enable build on dev* branches

---
 .github/workflows/main.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index ebf8a1e..d380885 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -4,11 +4,11 @@ on:
   push:
     branches:
     - main
-    - dev
+    - dev*
   pull_request:
     branches:
     - main
-    - dev
+    - dev*
 
 jobs:
   build:

From 01af545d500d0e9803aa85f8d4531ca841c1bcdf Mon Sep 17 00:00:00 2001
From: Constantine Lignos <lignos@brandeis.edu>
Date: Thu, 4 Jun 2026 11:02:03 -0400
Subject: [PATCH 5/8] Update build to use pyproject.toml

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index d380885..0bc5ba2 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -43,7 +43,7 @@ jobs:
 
       - name: Install quality check dependencies
         run: |
-          pip install -r requirements.txt
+          pip install ".[dev]"
 
       - name: Run quality checks
         run: |

From fc3ed150775ddcb1d320329a8357378ade33b6db Mon Sep 17 00:00:00 2001
From: Constantine Lignos <lignos@brandeis.edu>
Date: Thu, 4 Jun 2026 11:06:10 -0400
Subject: [PATCH 6/8] Pin version of pytest-cov

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index a6256ca..94f89c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,7 +38,7 @@ seqscore = "seqscore.scripts.seqscore:cli"
 dev = [
     "types-tabulate",
     "pytest==9.0.3",
-    "pytest-cov>=7.1.0",
+    "pytest-cov==7.1.0",
     "mypy==2.1.0",
     "ruff==0.15.15",
     "flowmark",

From 35dbd792b9340802469358371793ce15984d04fc Mon Sep 17 00:00:00 2001
From: Constantine Lignos <lignos@brandeis.edu>
Date: Thu, 4 Jun 2026 11:08:44 -0400
Subject: [PATCH 7/8] Update check.sh for removal of setup.py

---
 tests/check.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/check.sh b/tests/check.sh
index 1f05a83..7fb6ba5 100755
--- a/tests/check.sh
+++ b/tests/check.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 set -euxo pipefail
 
-files=(seqscore/ tests/ setup.py)
+files=(seqscore/ tests/)
 ruff check "${files[@]}"
 mypy "${files[@]}"

From 23d1335a4a52db91020a74c4c5d12d32b4d34f49 Mon Sep 17 00:00:00 2001
From: Constantine Lignos <lignos@brandeis.edu>
Date: Thu, 4 Jun 2026 11:22:49 -0400
Subject: [PATCH 8/8] Add release script

---
 README.md          | 10 ++++++++
 pyproject.toml     |  2 ++
 scripts/release.sh | 57 ++++++++++++++++++++++++++++++++++++++++++++++
 tests/check.sh     |  1 +
 4 files changed, 70 insertions(+)
 create mode 100755 scripts/release.sh

diff --git a/README.md b/README.md
index afb832d..9b1320c 100644
--- a/README.md
+++ b/README.md
@@ -586,6 +586,16 @@ For development, check out the `dev` branch (latest, but less tested than `main`
 3. Install seqscore: `pip install -e .`
 4. Install development dependencies: `pip install -e ".[dev]"`
 
+## Release
+
+The release script is located at `scripts/release.sh` and can only be used by project
+maintainers. To make a release:
+
+1. Make sure `__version__` is up to date in `seqscore/__init__.py`.
+2. Make sure you are on the main branch with no uncommitted changes.
+3. Run `scripts/release.sh`. If anything goes wrong between tagging and releasing, you
+   will have to delete the tag on GitHub and try again.
+
 # Contributors
 
 SeqScore was developed by the BLT Lab at Brandeis University under the direction of PI
diff --git a/pyproject.toml b/pyproject.toml
index 94f89c1..38cb8e9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,6 +42,8 @@ dev = [
     "mypy==2.1.0",
     "ruff==0.15.15",
     "flowmark",
+    "build",
+    "twine",
 ]
 
 [tool.setuptools.dynamic]
diff --git a/scripts/release.sh b/scripts/release.sh
new file mode 100755
index 0000000..13fa608
--- /dev/null
+++ b/scripts/release.sh
@@ -0,0 +1,57 @@
+#!/usr/bin/env bash
+# Builds, uploads to PyPI, and tags the release.
+# Should only be run by project maintainers.
+set -euo pipefail
+
+VENV=".venv/bin"
+
+# Run pre-commit checks
+bash tests/check.sh
+
+# Must be on main with a clean working tree
+current_branch=$(git rev-parse --abbrev-ref HEAD)
+if [[ "$current_branch" != "main" ]]; then
+    echo "Error: must be on main branch (currently on '$current_branch')"
+    exit 1
+fi
+
+if ! git diff --quiet || ! git diff --cached --quiet; then
+    echo "Error: working tree is not clean"
+    exit 1
+fi
+
+# Read version from package
+version=$("$VENV/python" -c "import seqscore; print(seqscore.__version__)")
+tag="v$version"
+
+# Abort if tag already exists
+if git rev-parse "$tag" >/dev/null 2>&1; then
+    echo "Error: tag $tag already exists. Update __version__ in seqscore/__init__.py."
+    exit 1
+fi
+
+echo "Releasing $tag"
+
+# Build
+rm -rf dist/
+"$VENV/python" -m build
+
+# Tag and push
+git tag "$tag"
+git push origin "$tag"
+
+# Prompt to verify tag before uploading
+echo ""
+echo "Tag $tag pushed. Check the release on GitHub before uploading to PyPI:"
+echo "  https://github.com/bltlab/seqscore/releases/tag/$tag"
+echo ""
+read -r -p "Upload to PyPI? [y/N] " confirm
+if [[ "${confirm,,}" != "y" ]]; then
+    echo "Aborted. Re-run this script to retry the upload."
+    exit 1
+fi
+
+# Upload to PyPI
+"$VENV/twine" upload dist/*
+
+echo "Done: $tag released and pushed"
diff --git a/tests/check.sh b/tests/check.sh
index 7fb6ba5..e544ad2 100755
--- a/tests/check.sh
+++ b/tests/check.sh
@@ -1,6 +1,7 @@
 #!/usr/bin/env bash
 set -euxo pipefail
 
+flowmark --check ./*.md
 files=(seqscore/ tests/)
 ruff check "${files[@]}"
 mypy "${files[@]}"