From 2ae28ba7f431cc9545afb3288bbc5f1c45405a65 Mon Sep 17 00:00:00 2001
From: Jean Mertz <git@jeanmertz.com>
Date: Mon, 25 May 2026 14:10:51 +0200
Subject: [PATCH] chore(comfort): Add semantic line-break Rust/Markdown
 formatter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`comfort` is a new contrib crate that reformats Rust doc comments (`///`
and `//!`) and Markdown files using semantic line breaks — one sentence
per line — with an optional `--max-width` safety net for long sentences.

The pipeline runs in three layers: `extract` finds doc-comment blocks
via `ra-ap-rustc_lexer`, `reflow_markdown` parses each block's body with
comrak and walks the AST to locate reflowable paragraphs, and
`reflow_paragraph` segments those paragraphs into sentences using a UAX
#29 splitter with abbreviation-aware merging.

Markdown structure (headings, code blocks, tables, block quotes, lists,
footnotes, hard line breaks) is preserved verbatim; only prose paragraph
contents are reflowed. Container blocks (block quotes, list items, task
items, alerts, footnote definitions) each contribute the correct
per-line continuation prefix so reflow output round-trips cleanly.

Two optional passes compose on top of the always-on sembr engine:

- `--format-markdown`: canonicalizes markdown structure via comrak's
  `format_commonmark` (normalizes list markers to `-`, prefers fenced
  code blocks, aligns table columns for visual display width including
  CJK wide characters) before reflowing.
- `--reference-links`: converts inline `[text](url)` links to
  reference-style `[text]` with definitions consolidated and sorted
  alphabetically at the bottom of each body.

Both transformations are idempotent individually and when composed.

The tool ships two binaries from the same entry point: `comfort` (direct
invocation, defaults to stdin/stdout) and `cargo-comfort` (cargo
subcommand, defaults to `--workspace`). Common flags: `--check`,
`--list-changed`, `--workspace`, `-p`/`--package`, `--exclude`,
`--language`, `--stdin-filename`, `--max-width`.

A new `fmt-comments-ci` justfile target runs `comfort --check
--workspace` in CI, added to the `ci` recipe alongside the existing
`fmt-ci` step.

Signed-off-by: Jean Mertz <git@jeanmertz.com>
---
 .config/supply-chain/audits.toml              |   26 +
 .config/supply-chain/config.toml              |    4 -
 .config/supply-chain/imports.lock             |   49 +-
 Cargo.lock                                    |   68 +-
 Cargo.toml                                    |    6 +-
 crates/contrib/comfort/Cargo.toml             |   36 +
 .../contrib/comfort/src/bin/cargo-comfort.rs  |    3 +
 crates/contrib/comfort/src/bin/comfort.rs     |    3 +
 crates/contrib/comfort/src/cli.rs             |  159 ++
 crates/contrib/comfort/src/cli_tests.rs       |   69 +
 crates/contrib/comfort/src/extract.rs         |  228 +++
 crates/contrib/comfort/src/extract_tests.rs   |  199 ++
 crates/contrib/comfort/src/format.rs          | 1706 +++++++++++++++++
 crates/contrib/comfort/src/format_tests.rs    |  595 ++++++
 crates/contrib/comfort/src/lib.rs             |  126 ++
 crates/contrib/comfort/src/lib_tests.rs       | 1545 +++++++++++++++
 crates/contrib/comfort/src/run.rs             |  188 ++
 crates/contrib/comfort/src/sentence.rs        |  302 +++
 crates/contrib/comfort/src/sentence_tests.rs  |  267 +++
 crates/contrib/comfort/src/walk.rs            |  129 ++
 crates/contrib/comfort/src/walk_tests.rs      |  102 +
 deny.toml                                     |    3 +-
 justfile                                      |   10 +-
 23 files changed, 5796 insertions(+), 27 deletions(-)
 create mode 100644 crates/contrib/comfort/Cargo.toml
 create mode 100644 crates/contrib/comfort/src/bin/cargo-comfort.rs
 create mode 100644 crates/contrib/comfort/src/bin/comfort.rs
 create mode 100644 crates/contrib/comfort/src/cli.rs
 create mode 100644 crates/contrib/comfort/src/cli_tests.rs
 create mode 100644 crates/contrib/comfort/src/extract.rs
 create mode 100644 crates/contrib/comfort/src/extract_tests.rs
 create mode 100644 crates/contrib/comfort/src/format.rs
 create mode 100644 crates/contrib/comfort/src/format_tests.rs
 create mode 100644 crates/contrib/comfort/src/lib.rs
 create mode 100644 crates/contrib/comfort/src/lib_tests.rs
 create mode 100644 crates/contrib/comfort/src/run.rs
 create mode 100644 crates/contrib/comfort/src/sentence.rs
 create mode 100644 crates/contrib/comfort/src/sentence_tests.rs
 create mode 100644 crates/contrib/comfort/src/walk.rs
 create mode 100644 crates/contrib/comfort/src/walk_tests.rs

diff --git a/.config/supply-chain/audits.toml b/.config/supply-chain/audits.toml
index 5dd03139..42ce71aa 100644
--- a/.config/supply-chain/audits.toml
+++ b/.config/supply-chain/audits.toml
@@ -21,6 +21,11 @@ who = "Jean Mertz <git@jeanmertz.com>"
 criteria = "safe-to-deploy"
 delta = "0.49.0 -> 0.50.0"
 
+[[audits.comrak]]
+who = "Jean Mertz <git@jeanmertz.com>"
+criteria = "safe-to-deploy"
+delta = "0.50.0 -> 0.52.0"
+
 [[audits.datetime_literal]]
 who = "Jean Mertz <git@jeanmertz.com>"
 criteria = "safe-to-deploy"
@@ -41,6 +46,11 @@ who = "Jean Mertz <git@jeanmertz.com>"
 criteria = "safe-to-deploy"
 delta = "0.16.2 -> 0.17.0"
 
+[[audits.finl_unicode]]
+who = "Jean Mertz <git@jeanmertz.com>"
+criteria = "safe-to-deploy"
+version = "1.4.0"
+
 [[audits.futf]]
 who = "Jean Mertz <git@jeanmertz.com>"
 criteria = "safe-to-deploy"
@@ -56,6 +66,11 @@ who = "Jean Mertz <git@jeanmertz.com>"
 criteria = "safe-to-deploy"
 version = "0.36.1"
 
+[[audits.imara-diff]]
+who = "Jean Mertz <git@jeanmertz.com>"
+criteria = "safe-to-deploy"
+version = "0.2.0"
+
 [[audits.infer]]
 who = "Jean Mertz <git@jeanmertz.com>"
 criteria = "safe-to-deploy"
@@ -111,6 +126,11 @@ who = "Jean Mertz <git@jeanmertz.com>"
 criteria = "safe-to-deploy"
 delta = "0.38.4 -> 0.39.2"
 
+[[audits.ra-ap-rustc_lexer]]
+who = "Jean Mertz <git@jeanmertz.com>"
+criteria = "safe-to-deploy"
+version = "0.167.0"
+
 [[audits.rand_xorshift]]
 who = "Jean Mertz <git@jeanmertz.com>"
 criteria = "safe-to-deploy"
@@ -1005,6 +1025,12 @@ user-id = 3618 # David Tolnay (dtolnay)
 start = "2021-10-02"
 end = "2027-02-13"
 
+[[trusted.unicode-properties]]
+criteria = "safe-to-deploy"
+user-id = 1139 # Manish Goregaokar (Manishearth)
+start = "2023-07-27"
+end = "2027-05-21"
+
 [[trusted.unsafe-libyaml]]
 criteria = "safe-to-deploy"
 user-id = 3618 # David Tolnay (dtolnay)
diff --git a/.config/supply-chain/config.toml b/.config/supply-chain/config.toml
index 4330acb4..84402393 100644
--- a/.config/supply-chain/config.toml
+++ b/.config/supply-chain/config.toml
@@ -683,10 +683,6 @@ criteria = "safe-to-deploy"
 version = "1.18.0"
 criteria = "safe-to-deploy"
 
-[[exemptions.unicode_categories]]
-version = "0.1.1"
-criteria = "safe-to-deploy"
-
 [[exemptions.untrusted]]
 version = "0.9.0"
 criteria = "safe-to-deploy"
diff --git a/.config/supply-chain/imports.lock b/.config/supply-chain/imports.lock
index c55c4688..f5f18ebd 100644
--- a/.config/supply-chain/imports.lock
+++ b/.config/supply-chain/imports.lock
@@ -399,8 +399,8 @@ user-login = "Amanieu"
 user-name = "Amanieu d'Antras"
 
 [[publisher.memchr]]
-version = "2.7.5"
-when = "2025-06-11"
+version = "2.8.0"
+when = "2026-02-06"
 user-id = 189
 user-login = "BurntSushi"
 user-name = "Andrew Gallant"
@@ -791,8 +791,8 @@ user-login = "BurntSushi"
 user-name = "Andrew Gallant"
 
 [[publisher.unicode-ident]]
-version = "1.0.19"
-when = "2025-09-10"
+version = "1.0.24"
+when = "2026-02-16"
 user-id = 3618
 user-login = "dtolnay"
 user-name = "David Tolnay"
@@ -804,6 +804,13 @@ user-id = 1139
 user-login = "Manishearth"
 user-name = "Manish Goregaokar"
 
+[[publisher.unicode-properties]]
+version = "0.1.4"
+when = "2025-10-30"
+user-id = 1139
+user-login = "Manishearth"
+user-name = "Manish Goregaokar"
+
 [[publisher.unicode-segmentation]]
 version = "1.12.0"
 when = "2024-09-13"
@@ -2752,7 +2759,7 @@ who = "Manish Goregaokar <manishsmail@gmail.com>"
 criteria = "safe-to-deploy"
 user-id = 1139 # Manish Goregaokar (Manishearth)
 start = "2019-11-06"
-end = "2026-02-01"
+end = "2027-04-23"
 notes = "All code written or reviewed by Manish"
 aggregated-from = "https://hg.mozilla.org/mozilla-central/raw-file/tip/supply-chain/audits.toml"
 
@@ -2761,7 +2768,7 @@ who = "Manish Goregaokar <manishsmail@gmail.com>"
 criteria = "safe-to-deploy"
 user-id = 1139 # Manish Goregaokar (Manishearth)
 start = "2019-05-15"
-end = "2026-02-01"
+end = "2027-04-23"
 notes = "All code written or reviewed by Manish"
 aggregated-from = "https://hg.mozilla.org/mozilla-central/raw-file/tip/supply-chain/audits.toml"
 
@@ -3560,6 +3567,36 @@ criteria = "safe-to-deploy"
 delta = "0.13.1 -> 0.13.2"
 aggregated-from = "https://raw.githubusercontent.com/mozilla/cargo-vet/main/supply-chain/audits.toml"
 
+[[audits.mozilla.audits.textwrap]]
+who = "Jan-Erik Rediger <jrediger@mozilla.com>"
+criteria = "safe-to-deploy"
+version = "0.15.0"
+aggregated-from = "https://raw.githubusercontent.com/mozilla/glean/main/supply-chain/audits.toml"
+
+[[audits.mozilla.audits.textwrap]]
+who = "Mike Hommey <mh+mozilla@glandium.org>"
+criteria = "safe-to-deploy"
+delta = "0.15.0 -> 0.15.2"
+aggregated-from = "https://hg.mozilla.org/mozilla-central/raw-file/tip/supply-chain/audits.toml"
+
+[[audits.mozilla.audits.textwrap]]
+who = "Mike Hommey <mh+mozilla@glandium.org>"
+criteria = "safe-to-deploy"
+delta = "0.15.2 -> 0.16.0"
+aggregated-from = "https://hg.mozilla.org/mozilla-central/raw-file/tip/supply-chain/audits.toml"
+
+[[audits.mozilla.audits.textwrap]]
+who = "Jan-Erik Rediger <jrediger@mozilla.com>"
+criteria = "safe-to-deploy"
+delta = "0.16.0 -> 0.16.1"
+aggregated-from = "https://hg.mozilla.org/mozilla-central/raw-file/tip/supply-chain/audits.toml"
+
+[[audits.mozilla.audits.textwrap]]
+who = "Nika Layzell <nika@thelayzells.com>"
+criteria = "safe-to-deploy"
+delta = "0.16.1 -> 0.16.2"
+aggregated-from = "https://raw.githubusercontent.com/mozilla/cargo-vet/main/supply-chain/audits.toml"
+
 [[audits.mozilla.audits.thiserror]]
 who = "Jan-Erik Rediger <jrediger@mozilla.com>"
 criteria = "safe-to-deploy"
diff --git a/Cargo.lock b/Cargo.lock
index 8d50c667..f18844d5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -565,6 +565,25 @@ version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
 
+[[package]]
+name = "comfort"
+version = "0.1.0"
+dependencies = [
+ "cargo_metadata",
+ "clap",
+ "comrak",
+ "ignore",
+ "indoc",
+ "pretty_assertions",
+ "ra-ap-rustc_lexer",
+ "regex",
+ "similar",
+ "textwrap",
+ "thiserror 2.0.18",
+ "unicode-segmentation",
+ "unicode-width",
+]
+
 [[package]]
 name = "comfy-table"
 version = "7.2.1"
@@ -580,19 +599,19 @@ dependencies = [
 
 [[package]]
 name = "comrak"
-version = "0.50.0"
+version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "321d20bf105b6871a49da44c5fbb93e90a7cd6178ea5a9fe6cbc1e6d4504bc5e"
+checksum = "aac0b255932a9cd52fbfd664b67957f9f2e095ae4711cb0e41b4e291edef94c2"
 dependencies = [
  "caseless",
  "entities",
+ "finl_unicode",
  "jetscii",
  "phf",
  "phf_codegen",
  "rustc-hash 2.1.1",
  "smallvec",
  "typed-arena",
- "unicode_categories",
 ]
 
 [[package]]
@@ -1196,6 +1215,12 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
 
+[[package]]
+name = "finl_unicode"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9844ddc3a6e533d62bba727eb6c28b5d360921d5175e9ff0f1e621a5c590a4d5"
+
 [[package]]
 name = "flate2"
 version = "1.1.2"
@@ -2701,9 +2726,9 @@ dependencies = [
 
 [[package]]
 name = "memchr"
-version = "2.7.5"
+version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
 
 [[package]]
 name = "memmap2"
@@ -3232,6 +3257,17 @@ version = "5.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
 
+[[package]]
+name = "ra-ap-rustc_lexer"
+version = "0.167.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e91ff5a3b958382dbdfb5bd325ad10643da18f83b3894485908b5d20b37abc0a"
+dependencies = [
+ "memchr",
+ "unicode-ident",
+ "unicode-properties",
+]
+
 [[package]]
 name = "rand"
 version = "0.9.2"
@@ -4322,6 +4358,12 @@ version = "1.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233"
 
+[[package]]
+name = "textwrap"
+version = "0.16.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057"
+
 [[package]]
 name = "thiserror"
 version = "1.0.69"
@@ -4768,9 +4810,9 @@ checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.19"
+version = "1.0.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
 
 [[package]]
 name = "unicode-normalization"
@@ -4781,6 +4823,12 @@ dependencies = [
  "tinyvec",
 ]
 
+[[package]]
+name = "unicode-properties"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"
+
 [[package]]
 name = "unicode-segmentation"
 version = "1.12.0"
@@ -4793,12 +4841,6 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
 
-[[package]]
-name = "unicode_categories"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
-
 [[package]]
 name = "unsafe-libyaml"
 version = "0.2.11"
diff --git a/Cargo.toml b/Cargo.toml
index 7636edf2..2be32280 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -54,7 +54,7 @@ chrono = { version = "0.4", default-features = false, features = ["clock", "serd
 clap = { version = "4", default-features = false }
 clean-path = { version = "0.2", default-features = false }
 comfy-table = { version = "7", default-features = false }
-comrak = { version = "0.50", default-features = false }
+comrak = { version = "0.52", default-features = false }
 convert_case = { version = "0.11", default-features = false }
 crossbeam-channel = { version = "0.5", default-features = false }
 crossterm = { version = "0.29", default-features = false }
@@ -98,7 +98,9 @@ proc-macro2 = { version = "1", default-features = false }
 proptest = { version = "1", default-features = false }
 quick-xml = { version = "0.39", default-features = false }
 quote = { version = "1", default-features = false }
+ra-ap-rustc_lexer = { version = "0.167", default-features = false }
 rayon = { version = "1", default-features = false }
+regex = { version = "1", default-features = false }
 relative-path = { version = "2", default-features = false }
 reqwest = { version = "0.12", default-features = false }
 reqwest-eventsource = { version = "0.6", default-features = false }
@@ -125,6 +127,7 @@ strip-ansi-escapes = { version = "0.2", default-features = false }
 syn = { version = "2", default-features = false }
 syntect = { version = "5.3", default-features = false }
 test-log = { version = "0.2", default-features = false, features = ["trace"] }
+textwrap = { version = "0.16", default-features = false }
 thiserror = { version = "2", default-features = false }
 timeago = { version = "0.6", default-features = false }
 tokio = { version = "1", default-features = false, features = ["full"] }
@@ -136,6 +139,7 @@ tracing = { version = "0.1", default-features = false }
 tracing-subscriber = { version = "0.3", default-features = false }
 two-face = { version = "0.5", default-features = false }
 typetag = { version = "0.2", default-features = false }
+unicode-segmentation = { version = "1", default-features = false }
 unicode-width = { version = "0.2", default-features = false }
 url = { version = "2", default-features = false }
 which = { version = "8", default-features = false }
diff --git a/crates/contrib/comfort/Cargo.toml b/crates/contrib/comfort/Cargo.toml
new file mode 100644
index 00000000..7f1d4302
--- /dev/null
+++ b/crates/contrib/comfort/Cargo.toml
@@ -0,0 +1,36 @@
+[package]
+description = "Semantic line-break formatter for Rust doc comments."
+name = "comfort"
+
+authors.workspace = true
+documentation.workspace = true
+edition.workspace = true
+homepage.workspace = true
+license-file.workspace = true
+publish.workspace = true
+readme.workspace = true
+repository.workspace = true
+version.workspace = true
+
+[dependencies]
+cargo_metadata = { workspace = true }
+clap = { workspace = true, features = ["std", "derive", "help", "usage", "error-context"] }
+comrak = { workspace = true }
+ignore = { workspace = true }
+ra-ap-rustc_lexer = { workspace = true }
+regex = { workspace = true, features = ["std", "perf", "unicode-perl"] }
+similar = { workspace = true, features = ["text"] }
+textwrap = { workspace = true }
+thiserror = { workspace = true }
+unicode-segmentation = { workspace = true }
+unicode-width = { workspace = true }
+
+[dev-dependencies]
+indoc = { workspace = true }
+pretty_assertions = { workspace = true, features = ["std"] }
+
+[lints]
+workspace = true
+
+[lib]
+doctest = false
diff --git a/crates/contrib/comfort/src/bin/cargo-comfort.rs b/crates/contrib/comfort/src/bin/cargo-comfort.rs
new file mode 100644
index 00000000..78186415
--- /dev/null
+++ b/crates/contrib/comfort/src/bin/cargo-comfort.rs
@@ -0,0 +1,3 @@
+fn main() -> std::process::ExitCode {
+    comfort::cli_main()
+}
diff --git a/crates/contrib/comfort/src/bin/comfort.rs b/crates/contrib/comfort/src/bin/comfort.rs
new file mode 100644
index 00000000..78186415
--- /dev/null
+++ b/crates/contrib/comfort/src/bin/comfort.rs
@@ -0,0 +1,3 @@
+fn main() -> std::process::ExitCode {
+    comfort::cli_main()
+}
diff --git a/crates/contrib/comfort/src/cli.rs b/crates/contrib/comfort/src/cli.rs
new file mode 100644
index 00000000..e8896564
--- /dev/null
+++ b/crates/contrib/comfort/src/cli.rs
@@ -0,0 +1,159 @@
+//! CLI argument definitions.
+//!
+//! Two binaries point at the same `main.rs`: `comfort` (direct) and
+//! `cargo-comfort` (a cargo subcommand).
+//! The binary entry detects which one it was invoked as, strips the leading
+//! `comfort` argv inserted by cargo, and adjusts defaults — direct invocation
+//! defaults to stdin/stdout, cargo invocation defaults to `--workspace`.
+
+use std::path::PathBuf;
+
+use clap::Parser;
+
+use crate::DEFAULT_MAX_WIDTH;
+
+/// Source language to format.
+/// With [`Auto`], per-file detection (extension or `--stdin-filename`)
+/// determines the format and workspace/directory walks include both Rust and
+/// Markdown files.
+/// With an explicit language, every selected file is formatted as that language
+/// and walks filter to its extensions only.
+///
+/// [`Auto`]: Language::Auto
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, clap::ValueEnum)]
+#[clap(rename_all = "kebab-case")]
+pub enum Language {
+    /// Detect per file: `.rs` → Rust, `.md`/`.markdown` → Markdown,
+    /// everything else → Rust (the stdin default and the dominant use case).
+    #[default]
+    Auto,
+    /// Force Rust mode regardless of extension.
+    Rust,
+    /// Force Markdown mode regardless of extension.
+    Markdown,
+}
+
+impl Language {
+    /// Resolve the effective format for a given file path.
+    /// `None` for `path` means the caller has no filename hint (e.g. stdin
+    /// without `--stdin-filename`), in which case `Auto` defaults to Rust.
+    #[must_use]
+    pub fn resolve(self, path: Option<&std::path::Path>) -> Format {
+        match self {
+            Self::Rust => Format::Rust,
+            Self::Markdown => Format::Markdown,
+            Self::Auto => match path.and_then(|p| p.extension()).and_then(|e| e.to_str()) {
+                Some("md" | "markdown") => Format::Markdown,
+                _ => Format::Rust,
+            },
+        }
+    }
+}
+
+/// Resolved per-file format used by [`run`] to dispatch to the correct
+/// pipeline.
+///
+/// [`run`]: crate::run::run
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Format {
+    Rust,
+    Markdown,
+}
+
+#[cfg(test)]
+#[path = "cli_tests.rs"]
+mod tests;
+
+/// How the binary was invoked.
+/// Determines whether the empty-args default is stdin (direct) or `--workspace`
+/// (cargo subcommand).
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Invocation {
+    Direct,
+    Cargo,
+}
+
+#[derive(Debug, Parser)]
+#[command(
+    name = "comfort",
+    about = "Format Rust doc comments with semantic line breaks.",
+    long_about = "Reflows outer (`///`) and inner (`//!`) doc-comment blocks using semantic line \
+                  breaks (one sentence per line), with an optional `--max-width` safety net. \
+                  Inline `//` comments and `/** */` block-style doc comments are left untouched.",
+    version
+)]
+pub struct Cli {
+    /// Files or directories to format.
+    /// Directories are walked recursively; `.gitignore` is honored.
+    /// If no paths are given, the tool reads from stdin and writes to stdout
+    /// (direct invocation) or walks the whole workspace (cargo subcommand).
+    ///
+    /// Mutually exclusive with `--workspace`, `--package`, and `--exclude`.
+    #[arg(conflicts_with_all = ["workspace", "packages", "exclude"])]
+    pub paths: Vec<PathBuf>,
+
+    /// Format every `.rs` file under the current cargo workspace.
+    /// Default for `cargo comfort`; explicit for `comfort`.
+    #[arg(long)]
+    pub workspace: bool,
+
+    /// Limit the workspace walk to the named package(s).
+    /// Repeat the flag for multiple packages.
+    /// Implies workspace mode.
+    #[arg(short = 'p', long = "package", value_name = "SPEC")]
+    pub packages: Vec<String>,
+
+    /// Exclude the named package(s) from the workspace walk.
+    /// Repeat the flag for multiple packages.
+    /// Implies workspace mode.
+    #[arg(long = "exclude", value_name = "SPEC")]
+    pub exclude: Vec<String>,
+
+    /// Check whether files would change; print a diff and exit non-zero if any
+    /// do.
+    /// Never writes to disk.
+    #[arg(long)]
+    pub check: bool,
+
+    /// Print the path of each changed file to stdout, one per line.
+    /// In write mode, lists files that were reformatted; in `--check` mode,
+    /// lists files that would be reformatted (and suppresses the diff).
+    #[arg(long)]
+    pub list_changed: bool,
+
+    /// Force a specific source language.
+    /// With `auto` (default), detect from each file's extension and let
+    /// workspace/directory walks pick up both Rust and Markdown.
+    /// With `rust` or `markdown`, every selected file is formatted in that mode
+    /// and walks filter to its extensions only.
+    #[arg(long, value_enum, default_value_t = Language::Auto)]
+    pub language: Language,
+
+    /// Also canonicalize the markdown structure of each formatted body: align
+    /// tables, normalise list markers, prefer fenced over indented code blocks,
+    /// etc. Off by default — in default mode, only paragraph prose gets
+    /// reflowed and everything else is preserved byte-for-byte.
+    #[arg(long)]
+    pub format_markdown: bool,
+
+    /// Convert inline markdown links to reference-style links and move all
+    /// reference definitions to the bottom of the body.
+    /// Adaptive: shortcut form `[text]` where possible, full form
+    /// `[text][label]` for collisions.
+    /// Independent of `--format-markdown` — enable either, both, or neither.
+    #[arg(long)]
+    pub reference_links: bool,
+
+    /// Maximum line width for reflow.
+    /// Long sentences wrap at word boundaries within sembr blocks.
+    /// `0` disables width wrapping.
+    #[arg(long, default_value_t = DEFAULT_MAX_WIDTH)]
+    pub max_width: usize,
+
+    /// The original filename for content piped via stdin.
+    /// In `--language auto` (default), the extension drives format detection —
+    /// e.g. `--stdin-filename notes.md` switches to Markdown mode.
+    /// Also improves diagnostic messages; defaults to `<stdin>`.
+    #[arg(long, value_name = "PATH")]
+    pub stdin_filename: Option<PathBuf>,
+}
diff --git a/crates/contrib/comfort/src/cli_tests.rs b/crates/contrib/comfort/src/cli_tests.rs
new file mode 100644
index 00000000..cf551126
--- /dev/null
+++ b/crates/contrib/comfort/src/cli_tests.rs
@@ -0,0 +1,69 @@
+//! Tests for the language resolution rules: which `Format` we end up with given
+//! the `--language` flag and an optional filename hint.
+
+use std::path::Path;
+
+use pretty_assertions::assert_eq;
+
+use super::{Format, Language};
+
+#[test]
+fn auto_with_rust_extension_resolves_to_rust() {
+    assert_eq!(
+        Language::Auto.resolve(Some(Path::new("foo.rs"))),
+        Format::Rust
+    );
+}
+
+#[test]
+fn auto_with_markdown_extension_resolves_to_markdown() {
+    assert_eq!(
+        Language::Auto.resolve(Some(Path::new("foo.md"))),
+        Format::Markdown
+    );
+    assert_eq!(
+        Language::Auto.resolve(Some(Path::new("foo.markdown"))),
+        Format::Markdown
+    );
+}
+
+#[test]
+fn auto_with_unknown_extension_falls_back_to_rust() {
+    assert_eq!(
+        Language::Auto.resolve(Some(Path::new("foo.txt"))),
+        Format::Rust
+    );
+}
+
+#[test]
+fn auto_with_no_filename_hint_falls_back_to_rust() {
+    // Stdin without `--stdin-filename`: no extension to detect from.
+    assert_eq!(Language::Auto.resolve(None), Format::Rust);
+}
+
+#[test]
+fn explicit_rust_overrides_markdown_extension() {
+    // The pushed-back case: user has rust code in a `.md` file (slides,
+    // generated stub, whatever) and forces rust mode.
+    assert_eq!(
+        Language::Rust.resolve(Some(Path::new("slides.md"))),
+        Format::Rust
+    );
+}
+
+#[test]
+fn explicit_markdown_overrides_rust_extension() {
+    // Inverse of the above: rare but symmetric.
+    assert_eq!(
+        Language::Markdown.resolve(Some(Path::new("notes.rs"))),
+        Format::Markdown
+    );
+}
+
+#[test]
+fn explicit_language_wins_over_missing_hint() {
+    // Stdin with `--language markdown` and no `--stdin-filename`: still
+    // markdown.
+    assert_eq!(Language::Markdown.resolve(None), Format::Markdown);
+    assert_eq!(Language::Rust.resolve(None), Format::Rust);
+}
diff --git a/crates/contrib/comfort/src/extract.rs b/crates/contrib/comfort/src/extract.rs
new file mode 100644
index 00000000..f094eb37
--- /dev/null
+++ b/crates/contrib/comfort/src/extract.rs
@@ -0,0 +1,228 @@
+//! Doc-comment block extraction from Rust source.
+//!
+//! A *block* is a maximal run of consecutive line doc-comments — either outer
+//! (`///`) or inner (`//!`) — sharing the same indentation and separated only
+//! by a single newline.
+//! Blank lines inside the block (i.e.
+//! `///\n` with no body content) are part of the block; a truly blank source
+//! line ends it.
+
+use std::ops::Range;
+
+use ra_ap_rustc_lexer::{DocStyle, FrontmatterAllowed, TokenKind, tokenize};
+
+/// A contiguous run of `///` or `//!` lines in the source.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct Block {
+    /// Byte range covering the block in the original source, starting at the
+    /// indent of the first line and ending just past the last comment line's
+    /// last character (not including its trailing newline).
+    pub range: Range<usize>,
+
+    /// Whitespace prefix shared by every line of the block.
+    pub indent: String,
+
+    /// Outer (`///`) or inner (`//!`).
+    pub style: DocStyle,
+
+    /// Markdown body, one entry per source line, with the prefix and at most
+    /// one separator space stripped.
+    /// Empty entries represent blank doc-comment lines (a `///` with nothing
+    /// after it).
+    pub lines: Vec<String>,
+}
+
+impl Block {
+    /// The marker for this block's style: `///` or `//!`.
+    #[must_use]
+    pub fn marker(&self) -> &'static str {
+        match self.style {
+            DocStyle::Outer => "///",
+            DocStyle::Inner => "//!",
+        }
+    }
+
+    /// Source-column overhead of this block's per-line prefix: indent, marker,
+    /// and separator space.
+    /// Used to compute how much of a global `max_width` budget is left for body
+    /// content.
+    /// Counted in bytes; for the all-ASCII whitespace indents Rust source uses,
+    /// this matches the rendered column count.
+    #[must_use]
+    pub fn prefix_width(&self) -> usize {
+        self.indent.len() + self.marker().len() + 1
+    }
+
+    /// Reassemble the block as Rust source from a freshly formatted markdown
+    /// body.
+    /// Lines are split on `\n`; non-empty lines get ` {indent}{marker}  `,
+    /// empty lines get `{indent}{marker}` with no trailing space.
+    #[must_use]
+    pub fn reassemble(&self, formatted_body: &str) -> String {
+        let marker = self.marker();
+        let mut out = String::with_capacity(formatted_body.len() + self.indent.len() * 4);
+
+        for (i, line) in formatted_body.split('\n').enumerate() {
+            if i > 0 {
+                out.push('\n');
+            }
+            out.push_str(&self.indent);
+            out.push_str(marker);
+            if !line.is_empty() {
+                out.push(' ');
+                out.push_str(line);
+            }
+        }
+
+        out
+    }
+}
+
+/// Find all outer/inner doc-comment blocks in `source`, in source order.
+///
+/// Only line doc-comments are recognised.
+/// Block doc-comments (`/** */`, `/*! */`) and regular `//` comments are
+/// ignored.
+/// Doc-comments that don't start at the line's first non-whitespace character
+/// (e.g. a `///` trailing some code) are also skipped.
+#[must_use]
+pub fn find_blocks(source: &str) -> Vec<Block> {
+    let bytes = source.as_bytes();
+    let mut blocks: Vec<Block> = Vec::new();
+    let mut offset: usize = 0;
+
+    // Pending block we're still extending across consecutive lines.
+    let mut pending: Option<PendingBlock> = None;
+
+    for token in tokenize(source, FrontmatterAllowed::Yes) {
+        let token_start = offset;
+        let token_end = offset + token.len as usize;
+        offset = token_end;
+
+        match token.kind {
+            TokenKind::LineComment {
+                doc_style: Some(style),
+            } => {
+                // Confirm the comment starts at the beginning of a logical line
+                // (only whitespace between the previous '\n' and this token).
+                let line_start = line_start_of(bytes, token_start);
+                let leading = &source[line_start..token_start];
+                if !leading.chars().all(|c| c == ' ' || c == '\t') {
+                    // Trailing comment after code; flush any pending block.
+                    if let Some(prev) = pending.take() {
+                        blocks.push(prev.into_block());
+                    }
+                    continue;
+                }
+
+                let body = extract_body(&source[token_start..token_end], style);
+
+                match pending.as_mut() {
+                    Some(prev)
+                        if prev.style == style
+                            && prev.indent == leading
+                            && prev.next_line_start == line_start =>
+                    {
+                        prev.lines.push(body);
+                        prev.end = token_end;
+                    }
+                    _ => {
+                        if let Some(prev) = pending.take() {
+                            blocks.push(prev.into_block());
+                        }
+                        pending = Some(PendingBlock {
+                            start: line_start,
+                            end: token_end,
+                            indent: leading.to_owned(),
+                            style,
+                            lines: vec![body],
+                            next_line_start: line_start,
+                        });
+                    }
+                }
+            }
+            TokenKind::Whitespace => {
+                // The block extends across a single `\n`. Two or more newlines
+                // (a truly blank source line) break the block.
+                let Some(prev) = pending.as_mut() else {
+                    continue;
+                };
+                let ws = &source[token_start..token_end];
+                let mut newline_idx = None;
+                let mut newlines = 0_usize;
+                for (i, b) in ws.bytes().enumerate() {
+                    if b == b'\n' {
+                        newlines += 1;
+                        newline_idx = Some(i);
+                    }
+                }
+                if newlines == 1 {
+                    // Predict where the next line starts so we can confirm
+                    // the next comment is at column 0 of that line.
+                    let idx = newline_idx.unwrap_or(0);
+                    prev.next_line_start = token_start + idx + 1;
+                } else if let Some(prev) = pending.take() {
+                    blocks.push(prev.into_block());
+                }
+            }
+            _ => {
+                if let Some(prev) = pending.take() {
+                    blocks.push(prev.into_block());
+                }
+            }
+        }
+    }
+
+    if let Some(prev) = pending {
+        blocks.push(prev.into_block());
+    }
+
+    blocks
+}
+
+struct PendingBlock {
+    start: usize,
+    end: usize,
+    indent: String,
+    style: DocStyle,
+    lines: Vec<String>,
+    // Byte position where the next line begins, used to confirm that the
+    // following doc-comment (if any) is the first content on its line.
+    next_line_start: usize,
+}
+
+impl PendingBlock {
+    fn into_block(self) -> Block {
+        Block {
+            range: self.start..self.end,
+            indent: self.indent,
+            style: self.style,
+            lines: self.lines,
+        }
+    }
+}
+
+/// Strip the `///` / `//!` marker and an optional single separator space.
+fn extract_body(raw: &str, style: DocStyle) -> String {
+    let marker = match style {
+        DocStyle::Outer => "///",
+        DocStyle::Inner => "//!",
+    };
+    let rest = raw.strip_prefix(marker).unwrap_or(raw);
+    // Strip at most one separator space; preserve additional indentation so
+    // markdown code blocks indented by 4+ spaces survive the round-trip.
+    rest.strip_prefix(' ').unwrap_or(rest).to_owned()
+}
+
+/// Walk backwards from `pos` to find the byte index just past the previous `\n`
+/// (or 0 if there is none).
+fn line_start_of(bytes: &[u8], pos: usize) -> usize {
+    bytes[..pos]
+        .iter()
+        .rposition(|b| *b == b'\n')
+        .map_or(0, |i| i + 1)
+}
+
+#[cfg(test)]
+#[path = "extract_tests.rs"]
+mod tests;
diff --git a/crates/contrib/comfort/src/extract_tests.rs b/crates/contrib/comfort/src/extract_tests.rs
new file mode 100644
index 00000000..7c22a8fe
--- /dev/null
+++ b/crates/contrib/comfort/src/extract_tests.rs
@@ -0,0 +1,199 @@
+use indoc::indoc;
+use pretty_assertions::assert_eq;
+use ra_ap_rustc_lexer::DocStyle;
+
+use super::{Block, find_blocks};
+
+#[test]
+fn finds_single_outer_block() {
+    let src = "/// One line.\nfn f() {}\n";
+    let blocks = find_blocks(src);
+    assert_eq!(blocks.len(), 1);
+    assert_eq!(blocks[0].style, DocStyle::Outer);
+    assert_eq!(blocks[0].indent, "");
+    assert_eq!(blocks[0].lines, vec!["One line."]);
+    // Range covers `/// One line.` (13 chars), not the trailing newline.
+    assert_eq!(&src[blocks[0].range.clone()], "/// One line.");
+}
+
+#[test]
+fn finds_inner_doc_block() {
+    let src = "//! Module docs.\n//! Second line.\n";
+    let blocks = find_blocks(src);
+    assert_eq!(blocks.len(), 1);
+    assert_eq!(blocks[0].style, DocStyle::Inner);
+    assert_eq!(blocks[0].lines, vec!["Module docs.", "Second line."]);
+}
+
+#[test]
+fn groups_consecutive_outer_lines_into_one_block() {
+    let src = indoc! {"
+        /// First.
+        /// Second.
+        /// Third.
+        fn f() {}
+    "};
+    let blocks = find_blocks(src);
+    assert_eq!(blocks.len(), 1);
+    assert_eq!(blocks[0].lines, vec!["First.", "Second.", "Third."]);
+}
+
+#[test]
+fn preserves_empty_doc_lines_within_block() {
+    let src = indoc! {"
+        /// First paragraph.
+        ///
+        /// Second paragraph.
+        fn f() {}
+    "};
+    let blocks = find_blocks(src);
+    assert_eq!(blocks.len(), 1);
+    assert_eq!(blocks[0].lines, vec![
+        "First paragraph.",
+        "",
+        "Second paragraph."
+    ]);
+}
+
+#[test]
+fn separates_blocks_across_blank_source_line() {
+    let src = indoc! {"
+        /// First block.
+
+        /// Second block.
+        fn f() {}
+    "};
+    let blocks = find_blocks(src);
+    assert_eq!(blocks.len(), 2);
+    assert_eq!(blocks[0].lines, vec!["First block."]);
+    assert_eq!(blocks[1].lines, vec!["Second block."]);
+}
+
+#[test]
+fn separates_blocks_across_intervening_code() {
+    let src = indoc! {"
+        /// First.
+        fn f() {}
+        /// Second.
+        fn g() {}
+    "};
+    let blocks = find_blocks(src);
+    assert_eq!(blocks.len(), 2);
+    assert_eq!(blocks[0].lines, vec!["First."]);
+    assert_eq!(blocks[1].lines, vec!["Second."]);
+}
+
+#[test]
+fn separates_outer_from_inner_block() {
+    // Different doc styles never merge, even with no intervening code.
+    let src = indoc! {"
+        //! Module doc.
+        /// Item doc.
+        fn f() {}
+    "};
+    let blocks = find_blocks(src);
+    assert_eq!(blocks.len(), 2);
+    assert_eq!(blocks[0].style, DocStyle::Inner);
+    assert_eq!(blocks[1].style, DocStyle::Outer);
+}
+
+#[test]
+fn captures_indentation() {
+    let src = indoc! {"
+        mod m {
+            /// Indented doc.
+            /// Second line.
+            fn f() {}
+        }
+    "};
+    let blocks = find_blocks(src);
+    assert_eq!(blocks.len(), 1);
+    assert_eq!(blocks[0].indent, "    ");
+    assert_eq!(blocks[0].lines, vec!["Indented doc.", "Second line."]);
+}
+
+#[test]
+fn skips_trailing_doc_after_code_on_same_line() {
+    // `///` only triggers a doc-comment when it starts the line. A `///`
+    // after code on the same line is still a doc-comment token to the
+    // lexer but it's misplaced; we ignore it.
+    let src = "let x = 5; /// not really a doc\n/// real doc\nfn f() {}\n";
+    let blocks = find_blocks(src);
+    assert_eq!(blocks.len(), 1);
+    assert_eq!(blocks[0].lines, vec!["real doc"]);
+}
+
+#[test]
+fn ignores_triple_slash_inside_string_literals() {
+    let src = "fn f() { let s = \"/// not a doc\"; }\n/// real doc\nfn g() {}\n";
+    let blocks = find_blocks(src);
+    assert_eq!(blocks.len(), 1);
+    assert_eq!(blocks[0].lines, vec!["real doc"]);
+}
+
+#[test]
+fn ignores_block_doc_comments() {
+    let src = "/** outer block doc */\n/*! inner block doc */\n/// real doc\nfn f() {}\n";
+    let blocks = find_blocks(src);
+    assert_eq!(blocks.len(), 1);
+    assert_eq!(blocks[0].lines, vec!["real doc"]);
+}
+
+#[test]
+fn preserves_extra_leading_whitespace_for_markdown_code_blocks() {
+    // `///     foo` (4 extra spaces) becomes `    foo` in the body, which is
+    // a 4-space-indented markdown code block. Only ONE separator space is
+    // stripped.
+    let src = "/// para\n///     code_block_line\n";
+    let blocks = find_blocks(src);
+    assert_eq!(blocks.len(), 1);
+    assert_eq!(blocks[0].lines, vec!["para", "    code_block_line"]);
+}
+
+#[test]
+fn block_marker_returns_outer_or_inner() {
+    let outer = Block {
+        range: 0..0,
+        indent: String::new(),
+        style: DocStyle::Outer,
+        lines: vec![],
+    };
+    let inner = Block {
+        range: 0..0,
+        indent: String::new(),
+        style: DocStyle::Inner,
+        lines: vec![],
+    };
+    assert_eq!(outer.marker(), "///");
+    assert_eq!(inner.marker(), "//!");
+}
+
+#[test]
+fn reassemble_uses_indent_and_marker() {
+    let block = Block {
+        range: 0..0,
+        indent: "    ".to_owned(),
+        style: DocStyle::Outer,
+        lines: vec![],
+    };
+    let formatted = "First line.\n\nSecond paragraph.";
+    let out = block.reassemble(formatted);
+    assert_eq!(
+        out,
+        "    /// First line.\n    ///\n    /// Second paragraph."
+    );
+}
+
+#[test]
+fn reassemble_does_not_add_trailing_space_on_empty_lines() {
+    let block = Block {
+        range: 0..0,
+        indent: String::new(),
+        style: DocStyle::Outer,
+        lines: vec![],
+    };
+    let out = block.reassemble("a\n\nb");
+    assert_eq!(out, "/// a\n///\n/// b");
+    // Verify there's no `/// ` (with trailing space) on the empty line.
+    assert!(!out.contains("/// \n"));
+}
diff --git a/crates/contrib/comfort/src/format.rs b/crates/contrib/comfort/src/format.rs
new file mode 100644
index 00000000..c482da9c
--- /dev/null
+++ b/crates/contrib/comfort/src/format.rs
@@ -0,0 +1,1706 @@
+//! Pure source-string-in, source-string-out pipeline.
+//!
+//! The pipeline runs in three layers:
+//!
+//! 1. [`format_source`] finds `///` / `//!` doc-comment blocks via
+//!    [`find_blocks`] and splices their reformatted bodies back into the source
+//!    byte-for-byte.
+//! 2. [`reflow_markdown`] parses each block's body with comrak, walks the AST
+//!    recursively, and hands each reflowable paragraph's text to
+//!    [`reflow_paragraph`].
+//!    Leaf blocks that aren't paragraphs — reference link definitions, code
+//!    blocks, headings, tables, HTML blocks, thematic breaks — are preserved
+//!    verbatim, as are paragraphs that contain a hard line break (`  \n ` or
+//!    `\\\n`).
+//! 3. [`reflow_paragraph`] splits the paragraph into sentences with the
+//!    [`sentence`] module and width-wraps each sentence via `textwrap`, keeping
+//!    atomic tokens (URLs, paths, identifiers) intact even when they exceed
+//!    `max_width`.
+//!
+//! Containers we descend into: [`BlockQuote`], [`List`], [`Item`],
+//! [`TaskItem`], [`Alert`], [`MultilineBlockQuote`], [`FootnoteDefinition`],
+//! [`BlockDirective`].
+//! Each contributes a per-line continuation prefix that gets applied to every
+//! line after the first (the first line's prefix is already in the source,
+//! outside the Paragraph's sourcepos range).
+//!
+//! [`Alert`]: NodeValue::Alert
+//! [`BlockDirective`]: NodeValue::BlockDirective
+//! [`BlockQuote`]: NodeValue::BlockQuote
+//! [`FootnoteDefinition`]: NodeValue::FootnoteDefinition
+//! [`Item`]: NodeValue::Item
+//! [`List`]: NodeValue::List
+//! [`MultilineBlockQuote`]: NodeValue::MultilineBlockQuote
+//! [`TaskItem`]: NodeValue::TaskItem
+//! [`sentence`]: crate::sentence
+
+use std::{collections::HashMap, ops::Range, sync::Arc};
+
+use comrak::{
+    Arena, Options, ResolvedReference,
+    nodes::{AstNode, NodeValue, TableAlignment},
+    options::{BrokenLinkCallback, BrokenLinkReference, Extension, ListStyleType, Parse, Render},
+};
+use textwrap::WordSplitter;
+use unicode_width::UnicodeWidthStr;
+
+use crate::{extract::find_blocks, sentence::split_sentences};
+
+/// Options that control which transformations the markdown pipeline applies on
+/// top of the always-on sembr reflow.
+/// Used by [`format_markdown_with`] and [`format_rust_source_with`].
+#[derive(Debug, Clone, Default)]
+pub struct FormatOptions {
+    /// Maximum line width passed to the sembr engine.
+    pub max_width: usize,
+    /// `--format-markdown`: canonicalize markdown structure (tables, list
+    /// markers, fences, etc.) via comrak's formatter plus our table aligner.
+    pub canonical: bool,
+    /// `--reference-links`: convert inline links to reference style and
+    /// consolidate definitions at the bottom of each body.
+    pub reference_links: bool,
+}
+
+/// Reformat every `///` and `//!` block in `source`, returning the new text.
+///
+/// `max_width` is the maximum source-line width the user wants to enforce.
+/// Per block, the effective width handed to the reflow step is reduced by the
+/// block's prefix overhead — its leading indent plus the `///` or `//!` marker
+/// and separator space — so the user-visible ceiling is honoured regardless of
+/// how deep the doc comment is nested.
+///
+/// Returns the original `source` (as a fresh `String`) when no blocks need
+/// reflow.
+#[must_use]
+pub fn format_source(source: &str, max_width: usize) -> String {
+    format_rust_source_with(source, &FormatOptions {
+        max_width,
+        ..Default::default()
+    })
+}
+
+/// Like [`format_source`], but also canonicalize the markdown inside each `///`
+/// / `//!` block before reflowing it.
+/// See [`format_markdown_canonical`] for what canonicalisation entails.
+///
+/// The doc-comment scaffolding (`///` prefix, indentation, surrounding code) is
+/// still byte-preserved; only the *body* of each block is rewritten.
+#[must_use]
+pub fn format_source_canonical(source: &str, max_width: usize) -> String {
+    format_rust_source_with(source, &FormatOptions {
+        max_width,
+        canonical: true,
+        ..Default::default()
+    })
+}
+
+/// Option-aware Rust-source entry point.
+/// Each `///` / `//!` block's body goes through [`format_markdown_with`] with
+/// the same options applied to it (with `max_width` adjusted for the block's
+/// prefix overhead).
+#[must_use]
+pub fn format_rust_source_with(source: &str, opts: &FormatOptions) -> String {
+    format_source_impl(source, opts.max_width, |body, effective_width| {
+        let inner = FormatOptions {
+            max_width: effective_width,
+            ..opts.clone()
+        };
+        format_markdown_with(body, &inner)
+    })
+}
+
+/// Option-aware markdown entry point.
+/// Composes the optional canonical and reference-link passes before running the
+/// always-on sembr reflow.
+///
+/// The output preserves the input's *exact* trailing-newline count.
+/// This matters for callers that map newlines back to source structure — e.g.
+/// nvim's conform.nvim `injected` formatter, which extracts the markdown body
+/// of Rust doc comments, runs comfort on it, and re-inserts.
+/// Collapsing `\n\n` to `\n` would silently drop the trailing empty `///` line
+/// on every save.
+#[must_use]
+pub fn format_markdown_with(body: &str, opts: &FormatOptions) -> String {
+    if body.is_empty() {
+        return String::new();
+    }
+    let trailing_newlines = body
+        .as_bytes()
+        .iter()
+        .rev()
+        .take_while(|&&b| b == b'\n')
+        .count();
+
+    let mut text = if opts.canonical {
+        // Comrak's `format_commonmark` unconditionally emits links in inline
+        // form (`[text](url)`), dropping the user's reference definitions
+        // along the way. Protect reference-form links by sentinelising them
+        // and stashing definitions out-of-band before the canonical pass,
+        // then restore both afterwards.
+        let protection = protect_reference_form_links(body);
+        let canonical = match canonicalize_markdown(&protection.protected_text) {
+            Some(canonical) => align_tables(&canonical),
+            None => protection.protected_text.clone(),
+        };
+        restore_protected_reference_links(&canonical, &protection)
+    } else {
+        body.to_owned()
+    };
+    if opts.reference_links {
+        text = extract_reference_links(&text);
+    }
+    let text = reflow_markdown(&text, opts.max_width);
+
+    // Both `canonicalize_markdown` and `extract_reference_links` track
+    // "trailing newline present?" but collapse multiple to one. Restore the
+    // exact count from the input.
+    let trimmed = text.trim_end_matches('\n');
+    let mut out = String::with_capacity(trimmed.len() + trailing_newlines);
+    out.push_str(trimmed);
+    for _ in 0..trailing_newlines {
+        out.push('\n');
+    }
+    out
+}
+
+/// Shared implementation for the `///`-block pipeline.
+/// The body processor differs between default mode, `--format-markdown`, and
+/// `--reference-links`; passed in by the caller.
+fn format_source_impl<F>(source: &str, max_width: usize, process_body: F) -> String
+where
+    F: Fn(&str, usize) -> String,
+{
+    let blocks = find_blocks(source);
+    if blocks.is_empty() {
+        return source.to_owned();
+    }
+
+    let mut out = String::with_capacity(source.len());
+    let mut cursor = 0;
+
+    for block in blocks {
+        out.push_str(&source[cursor..block.range.start]);
+
+        let body = block.lines.join("\n");
+        // Subtract the per-line prefix from the user's budget. If the
+        // prefix alone exceeds `max_width`, saturate to 0 (no width wrap)
+        // rather than wrapping every word onto its own line — the user's
+        // constraint is impossible here, so we degrade to pure sembr.
+        let effective_width = if max_width == 0 {
+            0
+        } else {
+            max_width.saturating_sub(block.prefix_width())
+        };
+        let formatted = process_body(&body, effective_width);
+        out.push_str(&block.reassemble(&formatted));
+
+        cursor = block.range.end;
+    }
+
+    out.push_str(&source[cursor..]);
+    out
+}
+
+/// Canonicalize the markdown structure of `body` (align tables, normalise list
+/// markers, prefer fenced code blocks, etc.) and then reflow its paragraphs
+/// with semantic line breaks.
+///
+/// Canonicalisation is delegated to [`comrak::format_commonmark`] with our
+/// render options (see `canonical_render_options` for the rationale); width
+/// handling is left to our downstream sembr pipeline.
+/// The output is the canonical markdown with paragraphs sembr'd.
+///
+/// The input's trailing-newline convention is preserved: doc-comment block
+/// bodies (no trailing newline) round-trip without one; markdown files (usually
+/// trailing newline) keep theirs.
+#[must_use]
+pub fn format_markdown_canonical(body: &str, max_width: usize) -> String {
+    format_markdown_with(body, &FormatOptions {
+        max_width,
+        canonical: true,
+        ..Default::default()
+    })
+}
+
+/// Run comrak's `format_commonmark` over `body` and return the canonical
+/// markdown text, with the input's trailing-newline convention preserved.
+/// Returns `None` if the formatter errors — callers should fall back to the
+/// input in that case.
+fn canonicalize_markdown(body: &str) -> Option<String> {
+    let arena = Arena::new();
+    let parse_options = comrak_options();
+    let root = comrak::parse_document(&arena, body, &parse_options);
+
+    let render_options = canonical_render_options();
+    let mut canonical = String::new();
+    if comrak::format_commonmark(root, &render_options, &mut canonical).is_err() {
+        return None;
+    }
+
+    // Comrak's formatter appends a trailing newline unconditionally;
+    // normalise to match the input's convention so the caller (block
+    // reassembly for `///` blocks, file writes for `.md` files) sees a
+    // consistent shape.
+    let canonical = match (body.ends_with('\n'), canonical.ends_with('\n')) {
+        (true, false) => canonical + "\n",
+        (false, true) => canonical.trim_end_matches('\n').to_owned(),
+        _ => canonical,
+    };
+
+    Some(canonical)
+}
+
+/// Re-parse `text` to find markdown tables, then rewrite each one with column
+/// widths padded for visual alignment.
+/// The separator row's alignment markers come from the AST's [`TableAlignment`]
+/// (the colon pattern in the source), not from re-scanning the text.
+///
+/// Tables are identified by [`NodeValue::Table`] nodes; cell content is taken
+/// from each [`NodeValue::TableCell`]'s sourcepos slice, so any inline markdown
+/// (`**bold**`, `` `code` ``) and escapes (`\|`) survive verbatim.
+fn align_tables(text: &str) -> String {
+    if text.is_empty() {
+        return String::new();
+    }
+
+    let arena = Arena::new();
+    let options = comrak_options();
+    let root = comrak::parse_document(&arena, text, &options);
+    let line_starts = line_start_offsets(text);
+
+    let mut replacements: Vec<Replacement> = Vec::new();
+    collect_table_replacements(root, text, &line_starts, &mut replacements);
+
+    if replacements.is_empty() {
+        return text.to_owned();
+    }
+
+    replacements.sort_by_key(|r| r.range.start);
+
+    let mut out = String::with_capacity(text.len());
+    let mut cursor = 0;
+    for r in replacements {
+        out.push_str(&text[cursor..r.range.start]);
+        out.push_str(&r.text);
+        cursor = r.range.end;
+    }
+    out.push_str(&text[cursor..]);
+    out
+}
+
+/// Walk the AST, queueing a [`Replacement`] for every table found.
+fn collect_table_replacements<'a>(
+    node: &'a AstNode<'a>,
+    text: &str,
+    line_starts: &[usize],
+    out: &mut Vec<Replacement>,
+) {
+    let data = node.data();
+    if let NodeValue::Table(table_meta) = &data.value {
+        if let Some(range) = sourcepos_to_byte_range(line_starts, text.len(), &data.sourcepos)
+            && let Some(aligned) =
+                render_aligned_table(node, &table_meta.alignments, text, line_starts)
+        {
+            // Preserve the trailing newline convention of the source slice
+            // — if the original ended with `\n`, the replacement should
+            // too (and vice versa).
+            let original_slice = &text[range.clone()];
+            let aligned = match (original_slice.ends_with('\n'), aligned.ends_with('\n')) {
+                (true, false) => aligned + "\n",
+                (false, true) => aligned.trim_end_matches('\n').to_owned(),
+                _ => aligned,
+            };
+            out.push(Replacement {
+                range,
+                text: aligned,
+            });
+        }
+        // Don't descend further — tables don't nest within tables in our model.
+        return;
+    }
+    for child in node.children() {
+        collect_table_replacements(child, text, line_starts, out);
+    }
+}
+
+/// Build the aligned markdown text for a single table node.
+/// Returns `None` if the table is malformed (no rows, mismatched cell counts,
+/// sourcepos gaps) — in which case the caller falls back to leaving the source
+/// unchanged.
+fn render_aligned_table<'a>(
+    table: &'a AstNode<'a>,
+    alignments: &[TableAlignment],
+    text: &str,
+    line_starts: &[usize],
+) -> Option<String> {
+    let num_cols = alignments.len();
+    if num_cols == 0 {
+        return None;
+    }
+
+    // Walk rows → cells, slicing each cell's source bytes via its sourcepos.
+    let mut rows: Vec<Vec<String>> = Vec::new();
+    for row_node in table.children() {
+        if !matches!(row_node.data().value, NodeValue::TableRow(_)) {
+            continue;
+        }
+        let mut cells: Vec<String> = Vec::new();
+        for cell_node in row_node.children() {
+            if !matches!(cell_node.data().value, NodeValue::TableCell) {
+                continue;
+            }
+            let cell_range =
+                sourcepos_to_byte_range(line_starts, text.len(), &cell_node.data().sourcepos)?;
+            // Trim the cell's source slice. Comrak's cell sourcepos usually
+            // covers the content between the `|` delimiters with any leading
+            // and trailing spaces, but trimming defensively handles both
+            // shapes.
+            let raw = text[cell_range].trim();
+            cells.push(raw.to_owned());
+        }
+        rows.push(cells);
+    }
+
+    if rows.is_empty() {
+        return None;
+    }
+
+    // Column widths: max display width per column, with a floor of 3 so
+    // the separator row's alignment markers (`:-:`, `---`) always fit.
+    // `UnicodeWidthStr::width` gives terminal-cell width — wide chars (CJK)
+    // count as 2, zero-width chars as 0, which matches what a human eye
+    // sees when scanning a column.
+    let mut col_widths = vec![3_usize; num_cols];
+    for row in &rows {
+        for (col, cell) in row.iter().enumerate() {
+            if col < num_cols {
+                col_widths[col] = col_widths[col].max(UnicodeWidthStr::width(cell.as_str()));
+            }
+        }
+    }
+
+    // Emit. GFM tables: row 0 is the header; the separator row follows
+    // (synthesised from `alignments`); remaining rows are data rows.
+    let mut out = String::new();
+    for (row_idx, row) in rows.iter().enumerate() {
+        emit_data_row(&mut out, row, &col_widths, alignments, num_cols);
+        if row_idx == 0 {
+            emit_separator_row(&mut out, &col_widths, alignments, num_cols);
+        }
+    }
+
+    Some(out)
+}
+
+fn emit_data_row(
+    out: &mut String,
+    row: &[String],
+    col_widths: &[usize],
+    alignments: &[TableAlignment],
+    num_cols: usize,
+) {
+    out.push('|');
+    for col in 0..num_cols {
+        let cell = row.get(col).map_or("", String::as_str);
+        let padded = pad_cell(cell, col_widths[col], alignments[col]);
+        out.push(' ');
+        out.push_str(&padded);
+        out.push_str(" |");
+    }
+    out.push('\n');
+}
+
+fn emit_separator_row(
+    out: &mut String,
+    col_widths: &[usize],
+    alignments: &[TableAlignment],
+    num_cols: usize,
+) {
+    out.push('|');
+    for col in 0..num_cols {
+        let w = col_widths[col];
+        let sep = match alignments[col] {
+            // The colon-or-not pattern encodes alignment; width = `w`.
+            TableAlignment::Left => format!(":{}", "-".repeat(w.saturating_sub(1))),
+            TableAlignment::Right => format!("{}:", "-".repeat(w.saturating_sub(1))),
+            TableAlignment::Center => format!(":{}:", "-".repeat(w.saturating_sub(2))),
+            TableAlignment::None => "-".repeat(w),
+        };
+        out.push(' ');
+        out.push_str(&sep);
+        out.push_str(" |");
+    }
+    out.push('\n');
+}
+
+fn pad_cell(content: &str, width: usize, alignment: TableAlignment) -> String {
+    let content_width = UnicodeWidthStr::width(content);
+    let pad = width.saturating_sub(content_width);
+    match alignment {
+        TableAlignment::Right => format!("{}{content}", " ".repeat(pad)),
+        TableAlignment::Center => {
+            let left = pad / 2;
+            let right = pad - left;
+            format!("{}{content}{}", " ".repeat(left), " ".repeat(right))
+        }
+        TableAlignment::Left | TableAlignment::None => {
+            format!("{content}{}", " ".repeat(pad))
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// `--reference-links`: convert inline links to reference style and
+// consolidate definitions at the bottom of the body.
+// ---------------------------------------------------------------------------
+
+/// Convert inline markdown links to reference-style links and move all
+/// reference definitions to the bottom of `text`.
+///
+/// Adaptive label strategy:
+///
+/// - Shortcut form `[text]` when the link's text can serve as a unique label.
+/// - Full form `[text][label]` when text collides with an already-used label
+///   for a different URL (label gets a `-N` suffix).
+///
+/// Pre-existing scattered reference definitions are also moved to the bottom
+/// and sorted alphabetically.
+fn extract_reference_links(text: &str) -> String {
+    if text.is_empty() {
+        return String::new();
+    }
+    let had_trailing_newline = text.ends_with('\n');
+
+    // Pull out any existing `[label]: url "title"` definitions; the result
+    // is the text minus those lines, plus a list of definitions.
+    let (text_without_defs, existing_defs) = extract_existing_reference_definitions(text);
+
+    // Seed the label map with existing definitions so newly converted
+    // inline links can reuse them via full-form references.
+    let mut label_map = LabelMap::default();
+    let mut all_defs: Vec<LinkDef> = Vec::new();
+    for def in existing_defs {
+        label_map.register(&def);
+        all_defs.push(def);
+    }
+
+    // Walk the AST for inline `Link` nodes and queue conversions. Each new
+    // definition gets appended to `all_defs` as it's discovered.
+    let arena = Arena::new();
+    let options = comrak_options();
+    let root = comrak::parse_document(&arena, &text_without_defs, &options);
+    let line_starts = line_start_offsets(&text_without_defs);
+
+    let mut replacements: Vec<Replacement> = Vec::new();
+    collect_inline_link_replacements(
+        root,
+        &text_without_defs,
+        &line_starts,
+        &mut label_map,
+        &mut all_defs,
+        &mut replacements,
+    );
+
+    // Splice link replacements into the text.
+    let text_after = if replacements.is_empty() {
+        text_without_defs
+    } else {
+        replacements.sort_by_key(|r| r.range.start);
+        let mut out = String::with_capacity(text_without_defs.len());
+        let mut cursor = 0;
+        for r in &replacements {
+            out.push_str(&text_without_defs[cursor..r.range.start]);
+            out.push_str(&r.text);
+            cursor = r.range.end;
+        }
+        out.push_str(&text_without_defs[cursor..]);
+        out
+    };
+
+    // Append all definitions, sorted alphabetically by label, at the
+    // bottom of the body with a blank-line separator.
+    let result = if all_defs.is_empty() {
+        text_after
+    } else {
+        all_defs.sort_by(|a, b| a.label.cmp(&b.label));
+        let mut result = text_after.trim_end().to_owned();
+        if !result.is_empty() {
+            result.push_str("\n\n");
+        }
+        for def in &all_defs {
+            result.push_str(&def.render());
+            result.push('\n');
+        }
+        // Strip the trailing `\n` we just added; the trailing-newline
+        // adjustment below will put one back if the input had one.
+        result.trim_end_matches('\n').to_owned()
+    };
+
+    if had_trailing_newline && !result.ends_with('\n') {
+        format!("{result}\n")
+    } else {
+        result
+    }
+}
+
+/// A single CommonMark reference-link definition.
+/// `title` is empty when the definition has no title; otherwise it's the
+/// unescaped title text (matching how comrak hands us inline-link titles).
+#[derive(Debug, Clone, PartialEq, Eq)]
+struct LinkDef {
+    label: String,
+    url: String,
+    title: String,
+}
+
+impl LinkDef {
+    /// Render as a definition line, without the trailing newline.
+    /// Uses double-quoted title syntax; embedded backslashes and double quotes
+    /// are backslash-escaped (CommonMark allows backslash escapes in titles).
+    fn render(&self) -> String {
+        if self.title.is_empty() {
+            format!("[{}]: {}", self.label, self.url)
+        } else {
+            let escaped = self.title.replace('\\', r"\\").replace('"', r#"\""#);
+            format!("[{}]: {} \"{escaped}\"", self.label, self.url)
+        }
+    }
+}
+
+/// Normalize a reference label per CommonMark §4.7: Unicode case fold, then
+/// trim outer whitespace and collapse internal whitespace runs to a single
+/// space.
+/// Two labels match when their normalized forms are equal.
+///
+/// We use `str::to_lowercase` as a pragmatic stand-in for full Unicode case
+/// folding — it covers ASCII and the Latin/Cyrillic/Greek scripts that show up
+/// in practice, without pulling in a new dependency.
+fn normalize_label(label: &str) -> String {
+    label
+        .to_lowercase()
+        .split_whitespace()
+        .collect::<Vec<_>>()
+        .join(" ")
+}
+
+/// Label registry that tracks bidirectional label ↔ (url, title) mapping.
+/// Used to decide whether a new inline link can reuse an existing definition
+/// (shortcut form, full form, or a fresh definition).
+///
+/// `by_label` is keyed by the *normalized* label (CommonMark §4.7 —
+/// case-insensitive, whitespace-folded), so an existing `[Foo]: /old` collides
+/// with an inline `[foo](/new)` as the renderer would: without that, we'd emit
+/// two definitions sharing one canonical label and the renderer would resolve
+/// the converted shortcut to whichever came first.
+///
+/// `by_url_title` keys on the literal `(url, title)` tuple so two links
+/// pointing at the same URL with different titles get distinct definitions —
+/// otherwise the title metadata of one would be silently dropped.
+/// Its values are the *original-cased* labels, so full-form references write
+/// `[text][Foo]` (the casing the definition is stored under) rather than the
+/// normalized form.
+#[derive(Debug, Default)]
+struct LabelMap {
+    by_label: HashMap<String, (String, String)>,
+    by_url_title: HashMap<(String, String), String>,
+}
+
+impl LabelMap {
+    /// Register a definition.
+    /// If the `(url, title)` pair doesn't already have a canonical label, this
+    /// one becomes it.
+    fn register(&mut self, def: &LinkDef) {
+        self.by_label
+            .entry(normalize_label(&def.label))
+            .or_insert_with(|| (def.url.clone(), def.title.clone()));
+        self.by_url_title
+            .entry((def.url.clone(), def.title.clone()))
+            .or_insert_with(|| def.label.clone());
+    }
+
+    /// Resolve an inline `[text](url "title")` link to its reference-form
+    /// replacement and, if a new definition was needed, append it to `defs`.
+    fn resolve_inline(
+        &mut self,
+        text: &str,
+        url: &str,
+        title: &str,
+        defs: &mut Vec<LinkDef>,
+    ) -> String {
+        // (url, title) already has a canonical label?
+        let key = (url.to_owned(), title.to_owned());
+        if let Some(existing_label) = self.by_url_title.get(&key) {
+            let existing_label = existing_label.clone();
+            return if existing_label == text {
+                format!("[{text}]")
+            } else {
+                format!("[{text}][{existing_label}]")
+            };
+        }
+        // New (url, title) — pick a label. Use the link text if its
+        // normalized form is free; otherwise disambiguate with a numeric
+        // suffix. Collision checks go through `normalize_label` so we don't
+        // emit `[foo]: /new` next to an existing `[Foo]: /old`.
+        let label = if self.by_label.contains_key(&normalize_label(text)) {
+            let mut i = 2_usize;
+            loop {
+                let candidate = format!("{text}-{i}");
+                if !self.by_label.contains_key(&normalize_label(&candidate)) {
+                    break candidate;
+                }
+                i += 1;
+            }
+        } else {
+            text.to_owned()
+        };
+        self.by_label
+            .insert(normalize_label(&label), (url.to_owned(), title.to_owned()));
+        self.by_url_title.insert(key, label.clone());
+        defs.push(LinkDef {
+            label: label.clone(),
+            url: url.to_owned(),
+            title: title.to_owned(),
+        });
+        if label == text {
+            format!("[{text}]")
+        } else {
+            format!("[{text}][{label}]")
+        }
+    }
+}
+
+/// Walk the AST for inline `Link` nodes.
+/// For each, queue a [`Replacement`] of its source bytes with the
+/// reference-form output.
+/// Anchor links (`#fragment`), images, autolinks, and pre-existing
+/// reference-form links are left alone.
+fn collect_inline_link_replacements<'a>(
+    node: &'a AstNode<'a>,
+    text: &str,
+    line_starts: &[usize],
+    label_map: &mut LabelMap,
+    defs: &mut Vec<LinkDef>,
+    out: &mut Vec<Replacement>,
+) {
+    let data = node.data();
+    match &data.value {
+        NodeValue::Link(link) => {
+            // Skip anchor-only URLs (`#foo`) and images (Image is its own
+            // NodeValue variant so the match below handles that).
+            if !link.url.starts_with('#')
+                && let Some(range) =
+                    sourcepos_to_byte_range(line_starts, text.len(), &data.sourcepos)
+                && let Some(link_text) = parse_inline_link_text(&text[range.clone()])
+            {
+                let replacement =
+                    label_map.resolve_inline(&link_text, &link.url, &link.title, defs);
+                out.push(Replacement {
+                    range,
+                    text: replacement,
+                });
+            }
+            // Don't descend into Link children — they're inlines that get
+            // included in the replacement text already.
+            return;
+        }
+        NodeValue::Image(_) => {
+            // Per design: leave images as inline `![alt](url)`. Don't recurse.
+            return;
+        }
+        _ => {}
+    }
+    for child in node.children() {
+        collect_inline_link_replacements(child, text, line_starts, label_map, defs, out);
+    }
+}
+
+/// If `slice` is the source of an inline-form link `[text](url)`, return the
+/// raw text between `[` and `](`.
+/// Returns `None` for reference-form links (`[text][label]`, `[label][]`,
+/// `[label]`) and for autolinks.
+fn parse_inline_link_text(slice: &str) -> Option<String> {
+    let bytes = slice.as_bytes();
+    if bytes.first() != Some(&b'[') {
+        return None;
+    }
+    let mut depth = 0_i32;
+    let mut i = 0_usize;
+    while i < bytes.len() {
+        match bytes[i] {
+            b'\\' => {
+                // Backslash-escape: skip the next byte too.
+                i += 2;
+                continue;
+            }
+            b'[' => depth += 1,
+            b']' => {
+                depth -= 1;
+                if depth == 0 {
+                    // Inline form requires `(` immediately after the
+                    // matched `]`. Anything else (`[`, end-of-slice,
+                    // whitespace) is a reference form or invalid.
+                    if bytes.get(i + 1) == Some(&b'(') {
+                        // Text is between the opening `[` (index 0) and the
+                        // closing `]` (index i).
+                        return Some(slice[1..i].to_owned());
+                    }
+                    return None;
+                }
+            }
+            _ => {}
+        }
+        i += 1;
+    }
+    None
+}
+
+/// Find pre-existing reference definitions in `text` (lines of the form
+/// `[label]: url`) at the document level, returning the text with those lines
+/// removed and a list of the extracted `(label, url)` pairs.
+///
+/// Lines inside fenced code blocks and HTML blocks are skipped, identified via
+/// comrak's AST so we don't false-match content that just happens to look like
+/// a definition.
+fn extract_existing_reference_definitions(text: &str) -> (String, Vec<LinkDef>) {
+    let arena = Arena::new();
+    let options = comrak_options();
+    let root = comrak::parse_document(&arena, text, &options);
+    let line_starts = line_start_offsets(text);
+
+    let mut excluded: Vec<Range<usize>> = Vec::new();
+    collect_excluded_ranges_for_refdefs(root, text, &line_starts, &mut excluded);
+
+    let mut content_lines: Vec<&str> = Vec::new();
+    let mut defs: Vec<LinkDef> = Vec::new();
+    let mut byte_pos = 0_usize;
+
+    for line in text.split('\n') {
+        let line_start = byte_pos;
+        let line_end = byte_pos + line.len();
+        let in_excluded = excluded
+            .iter()
+            .any(|r| line_start >= r.start && line_end <= r.end);
+        if !in_excluded && let Some(def) = parse_reference_definition_line(line) {
+            defs.push(def);
+        } else {
+            content_lines.push(line);
+        }
+        // Advance past line and its trailing `\n` (if any).
+        byte_pos = line_end + 1;
+    }
+
+    (content_lines.join("\n"), defs)
+}
+
+/// Walk the AST for block ranges where a `[label]: url` shape must NOT be
+/// extracted as a reference definition.
+///
+/// - [`CodeBlock`] / [`HtmlBlock`]: the bracket pattern is literal content.
+/// - [`Paragraph`]: CommonMark forbids reference definitions from interrupting
+///   a paragraph, so a `[label]: url` line that comrak parsed as part of a
+///   paragraph's sourcepos is visible prose, not a definition.
+///
+/// [`CodeBlock`]: NodeValue::CodeBlock
+/// [`HtmlBlock`]: NodeValue::HtmlBlock
+/// [`Paragraph`]: NodeValue::Paragraph
+fn collect_excluded_ranges_for_refdefs<'a>(
+    node: &'a AstNode<'a>,
+    text: &str,
+    line_starts: &[usize],
+    out: &mut Vec<Range<usize>>,
+) {
+    let data = node.data();
+    if matches!(
+        data.value,
+        NodeValue::CodeBlock(_) | NodeValue::HtmlBlock(_) | NodeValue::Paragraph
+    ) && let Some(range) = sourcepos_to_byte_range(line_starts, text.len(), &data.sourcepos)
+    {
+        out.push(range);
+        // Paragraphs have only inline children; code and HTML blocks are
+        // leaves. No further recursion needed.
+        return;
+    }
+    for child in node.children() {
+        collect_excluded_ranges_for_refdefs(child, text, line_starts, out);
+    }
+}
+
+/// Parse a single line as a CommonMark-ish reference definition `[label]: url
+/// "title"`.
+/// Title is optional and may be enclosed in `"..."`, `'...'`, or `(...)`.
+/// Backslash escapes inside the title are unescaped (CommonMark semantics) so
+/// the stored value matches how comrak gives us inline-link titles.
+/// Returns `None` for lines that don't match the reference-definition shape.
+///
+/// Multi-line titles (where the title sits on the line after the URL) are
+/// **not** supported here; this matches the rest of the pipeline, which only
+/// extracts same-line definitions.
+fn parse_reference_definition_line(line: &str) -> Option<LinkDef> {
+    let trimmed = line.trim_start();
+    let indent = line.len() - trimmed.len();
+    // CommonMark allows up to 3 spaces of indentation.
+    if indent > 3 || !trimmed.starts_with('[') {
+        return None;
+    }
+
+    // Find the matching `]`, allowing nested `[...]` inside the label.
+    let bytes = trimmed.as_bytes();
+    let mut depth = 0_i32;
+    let mut close = None;
+    let mut i = 0_usize;
+    while i < bytes.len() {
+        match bytes[i] {
+            b'\\' => {
+                i += 2;
+                continue;
+            }
+            b'[' => depth += 1,
+            b']' => {
+                depth -= 1;
+                if depth == 0 {
+                    close = Some(i);
+                    break;
+                }
+            }
+            _ => {}
+        }
+        i += 1;
+    }
+    let close = close?;
+    let label = &trimmed[1..close];
+    if label.is_empty() {
+        return None;
+    }
+    // Footnote definitions (`[^label]: ...`) are handled by the
+    // footnotes extension, not as regular reference definitions. If we
+    // extracted them here, the protection round-trip would strip them
+    // before the canonical pass and re-emit them at the bottom — by
+    // which time comrak has parsed `[^label]` in prose as an undefined
+    // reference and escaped it as `[^label]`.
+    if label.starts_with('^') {
+        return None;
+    }
+
+    let after = &trimmed[close + 1..];
+    let after = after.strip_prefix(':')?.trim_start();
+    if after.is_empty() {
+        return None;
+    }
+
+    // Split URL from optional title. The URL is either `<...>` or the first
+    // run of non-whitespace bytes; the title (if any) follows after
+    // whitespace.
+    let (url, rest) = if let Some(after_lt) = after.strip_prefix('<') {
+        let end = after_lt.find('>')?;
+        (after_lt[..end].to_owned(), &after_lt[end + 1..])
+    } else {
+        let end = after.find(char::is_whitespace).unwrap_or(after.len());
+        (after[..end].to_owned(), &after[end..])
+    };
+    if url.is_empty() {
+        return None;
+    }
+
+    let rest = rest.trim();
+    let title = if rest.is_empty() {
+        String::new()
+    } else {
+        // If the trailing text isn't a well-formed title, treat the line as
+        // not a reference definition at all — trailing junk would otherwise
+        // round-trip lossily.
+        parse_quoted_title(rest)?
+    };
+
+    Some(LinkDef {
+        label: label.to_owned(),
+        url,
+        title,
+    })
+}
+
+/// Parse a CommonMark reference-definition title.
+/// Accepts the three CommonMark forms: `"..."`, `'...'`, or `(...)`.
+/// Backslash escapes inside the title are unescaped.
+fn parse_quoted_title(s: &str) -> Option<String> {
+    let bytes = s.as_bytes();
+    let (open, close) = match bytes.first()? {
+        b'"' => (b'"', b'"'),
+        b'\'' => (b'\'', b'\''),
+        b'(' => (b'(', b')'),
+        _ => return None,
+    };
+    // The closing delimiter must be the last byte. `"..."trailing` is not
+    // a well-formed title.
+    if bytes.len() < 2 || bytes[bytes.len() - 1] != close {
+        return None;
+    }
+    let inner = &s[1..s.len() - 1];
+    // Reject unbalanced delimiters of the same kind inside the body — e.g.
+    // `"foo"bar"` would otherwise parse as `foo"bar`. For parens we don't
+    // try to balance properly; nested unescaped parens are rare in titles.
+    let mut unescaped = String::with_capacity(inner.len());
+    let mut chars = inner.chars();
+    while let Some(c) = chars.next() {
+        if c == '\\' {
+            if let Some(next) = chars.next() {
+                unescaped.push(next);
+            }
+            continue;
+        }
+        if c as u32 == u32::from(open) && open == close {
+            return None;
+        }
+        unescaped.push(c);
+    }
+    Some(unescaped)
+}
+
+// ---------------------------------------------------------------------------
+// Reference-form link protection across the canonical pass.
+//
+// Comrak's `format_commonmark` always emits links inline (`[text](url)`)
+// regardless of how they appeared in the source. It also drops orphaned
+// reference definitions once all references have been inlined. To preserve
+// the user's choice of reference form (and their label names), we wrap the
+// canonical pass with two helpers:
+//
+// 1. `protect_reference_form_links`: substitute citations with alphanumeric
+//    sentinels and stash definitions out-of-band.
+// 2. `restore_protected_reference_links`: replace sentinels with original
+//    citation bytes and re-append definitions at the end of the body.
+//
+// The sentinels are bare alphanumeric strings, which comrak treats as plain
+// text and emits verbatim through its parse + serialize cycle.
+// ---------------------------------------------------------------------------
+
+struct LinkProtection {
+    /// Sentinel-substituted text fed to the canonical pass.
+    protected_text: String,
+    /// For each citation: (sentinel string, original source bytes).
+    citations: Vec<(String, String)>,
+    /// Original reference-definition lines, in source order, to re-append after
+    /// canonical.
+    /// The text-without-defs is what we sentinelise and pass to the canonical
+    /// pass.
+    definitions: Vec<String>,
+}
+
+fn protect_reference_form_links(text: &str) -> LinkProtection {
+    if text.is_empty() {
+        return LinkProtection {
+            protected_text: String::new(),
+            citations: Vec::new(),
+            definitions: Vec::new(),
+        };
+    }
+
+    let arena = Arena::new();
+    let options = comrak_options_with_intra_doc_links();
+    let root = comrak::parse_document(&arena, text, &options);
+    let line_starts = line_start_offsets(text);
+
+    // Collect citation source ranges (reference-form links only — inline
+    // links and autolinks are left alone).
+    let mut citation_ranges: Vec<Range<usize>> = Vec::new();
+    collect_reference_form_link_ranges(root, text, &line_starts, &mut citation_ranges);
+
+    // Collect reference definition line ranges, excluding code blocks and
+    // HTML blocks (where `[label]: url` patterns are content, not defs).
+    let mut excluded: Vec<Range<usize>> = Vec::new();
+    collect_excluded_ranges_for_refdefs(root, text, &line_starts, &mut excluded);
+
+    let mut definitions: Vec<String> = Vec::new();
+    let mut definition_ranges: Vec<Range<usize>> = Vec::new();
+    let mut byte_pos = 0_usize;
+    for line in text.split('\n') {
+        let line_start = byte_pos;
+        let line_end = byte_pos + line.len();
+        let in_excluded = excluded
+            .iter()
+            .any(|r| line_start >= r.start && line_end <= r.end);
+        if !in_excluded && parse_reference_definition_line(line).is_some() {
+            // Include the trailing newline (if any) so the line and its
+            // separator are both removed cleanly.
+            let range_end = if line_end < text.len() {
+                line_end + 1
+            } else {
+                line_end
+            };
+            definition_ranges.push(line_start..range_end);
+            definitions.push(line.to_owned());
+        }
+        byte_pos = line_end + 1;
+    }
+
+    // Build the sentinel-substituted text.
+    let mut substitutions: Vec<(Range<usize>, String)> = Vec::new();
+    let mut citations: Vec<(String, String)> = Vec::new();
+    for range in citation_ranges {
+        let sentinel = format!("XCMFRTLR{:04}X", citations.len());
+        let original = text[range.clone()].to_owned();
+        substitutions.push((range, sentinel.clone()));
+        citations.push((sentinel, original));
+    }
+    for range in definition_ranges {
+        substitutions.push((range, String::new()));
+    }
+    substitutions.sort_by_key(|(r, _)| r.start);
+
+    let mut protected_text = String::with_capacity(text.len());
+    let mut cursor = 0_usize;
+    for (range, replacement) in substitutions {
+        protected_text.push_str(&text[cursor..range.start]);
+        protected_text.push_str(&replacement);
+        cursor = range.end;
+    }
+    protected_text.push_str(&text[cursor..]);
+
+    LinkProtection {
+        protected_text,
+        citations,
+        definitions,
+    }
+}
+
+fn restore_protected_reference_links(canonical: &str, protection: &LinkProtection) -> String {
+    let had_trailing_newline = canonical.ends_with('\n');
+    let mut text = canonical.to_owned();
+
+    // Step 1: replace sentinels with original citation source.
+    for (sentinel, original) in &protection.citations {
+        text = text.replace(sentinel, original);
+    }
+
+    // Step 2: re-append definitions at the bottom (separated by a blank
+    // line). If `--reference-links` is also enabled, the subsequent
+    // `extract_reference_links` pass will re-sort and consolidate.
+    if !protection.definitions.is_empty() {
+        let trimmed = text.trim_end();
+        let mut result = trimmed.to_owned();
+        if !result.is_empty() {
+            result.push_str("\n\n");
+        }
+        for def in &protection.definitions {
+            result.push_str(def);
+            result.push('\n');
+        }
+        text = result.trim_end_matches('\n').to_owned();
+    }
+
+    if had_trailing_newline && !text.ends_with('\n') {
+        text.push('\n');
+    } else if !had_trailing_newline {
+        text = text.trim_end_matches('\n').to_owned();
+    }
+
+    text
+}
+
+/// Walk the AST for [`NodeValue::Link`] nodes whose source slice is
+/// reference-form (`[text][label]`, `[label][]`, or shortcut `[label]`).
+/// Skips inline links, autolinks, and images.
+fn collect_reference_form_link_ranges<'a>(
+    node: &'a AstNode<'a>,
+    text: &str,
+    line_starts: &[usize],
+    out: &mut Vec<Range<usize>>,
+) {
+    let data = node.data();
+    match &data.value {
+        NodeValue::Link(_) => {
+            if let Some(range) = sourcepos_to_byte_range(line_starts, text.len(), &data.sourcepos)
+                && is_reference_form_link(&text[range.clone()])
+            {
+                out.push(range);
+            }
+            return;
+        }
+        NodeValue::Image(_) => {
+            // Don't recurse into images. Reference-form images would also be
+            // inlined by comrak, but extending protection to them is a
+            // separate concern — the present bug is link-only.
+            return;
+        }
+        _ => {}
+    }
+    for child in node.children() {
+        collect_reference_form_link_ranges(child, text, line_starts, out);
+    }
+}
+
+/// Returns `true` when the source slice is the source of a reference-form link.
+/// Inline links (slice ends with `](url)`) and autolinks (slice starts with
+/// `<`) return `false`.
+fn is_reference_form_link(slice: &str) -> bool {
+    let bytes = slice.as_bytes();
+    if bytes.first() != Some(&b'[') {
+        // Autolink `<url>` or some other non-bracket-prefixed link.
+        return false;
+    }
+    let mut depth = 0_i32;
+    let mut i = 0_usize;
+    while i < bytes.len() {
+        match bytes[i] {
+            b'\\' => {
+                i += 2;
+                continue;
+            }
+            b'[' => depth += 1,
+            b']' => {
+                depth -= 1;
+                if depth == 0 {
+                    // Inline form would have `(` immediately after the
+                    // matched `]`. Anything else (`[`, EOL, whitespace) is
+                    // reference form.
+                    return bytes.get(i + 1) != Some(&b'(');
+                }
+            }
+            _ => {}
+        }
+        i += 1;
+    }
+    false
+}
+
+/// Render options for canonical-markdown output: comrak's defaults with our
+/// tweaks.
+///
+/// `width = usize::MAX` is deliberate.
+/// Counter-intuitively, `width = 0` makes comrak's formatter *preserve source
+/// soft breaks within paragraphs*, which leaves digit-period sequences (`404.`)
+/// and other otherwise-meaningful characters at the start of continuation
+/// lines. comrak then defensively escapes them (`404\.`) so that re-parsing the
+/// canonical output produces the same AST.
+/// The escapes are visible to the user as cosmetic noise.
+///
+/// Setting `width = usize::MAX` makes comrak collapse soft breaks: each
+/// paragraph emits as one logical line, putting those characters mid-line where
+/// no escape is needed.
+/// Our downstream sembr pass then handles width-wrapping, so the lost soft
+/// breaks are immediately replaced with sentence-per-line layout.
+///
+/// The other choices match `jp_md`'s existing conventions.
+fn canonical_render_options() -> Options<'static> {
+    let mut options = comrak_options();
+    options.render = Render {
+        width: usize::MAX,
+        list_style: ListStyleType::Dash,
+        prefer_fenced: true,
+        ..Default::default()
+    };
+    options
+}
+
+/// Replace every top-level paragraph in a markdown body with its reflowed
+/// version.
+/// Other block types are left as-is.
+#[must_use]
+pub fn reflow_markdown(body: &str, max_width: usize) -> String {
+    if body.is_empty() {
+        return String::new();
+    }
+
+    let arena = Arena::new();
+    let options = comrak_options();
+    let root = comrak::parse_document(&arena, body, &options);
+
+    let line_starts = line_start_offsets(body);
+    let mut replacements: Vec<Replacement> = Vec::new();
+    let mut ancestors: Vec<&AstNode<'_>> = Vec::new();
+    collect_paragraphs(
+        root,
+        &mut ancestors,
+        &mut replacements,
+        body,
+        &line_starts,
+        max_width,
+    );
+
+    if replacements.is_empty() {
+        return body.to_owned();
+    }
+
+    // Comrak doesn't guarantee AST order matches source order: footnote
+    // definitions in particular get reordered (the definition appears in
+    // the AST after the paragraph that references it, regardless of where
+    // it lived in the source). Sort by source byte offset before splicing
+    // so the cursor walks the body in monotonic order.
+    replacements.sort_by_key(|r| r.range.start);
+
+    let mut out = String::with_capacity(body.len());
+    let mut cursor = 0;
+    for r in replacements {
+        out.push_str(&body[cursor..r.range.start]);
+        out.push_str(&r.text);
+        cursor = r.range.end;
+    }
+    out.push_str(&body[cursor..]);
+    out
+}
+
+/// Resolver that turns unresolved shortcut/collapsed references (`[X]` or
+/// `[X][]`) into dummy `Link` AST nodes — specifically the ones that look like
+/// Rust intra-doc links (`[`foo`]`, `[crate::Foo]`, etc.).
+/// Without this, comrak's parser treats unresolved references as plain text
+/// with literal `[` and `]`, which the formatter then defensively escapes as
+/// `[X]`.
+/// By forcing intra-doc-like labels to be `Link` nodes,
+/// [`protect_reference_form_links`] can sentinelise their source bytes and
+/// bypass comrak's escape logic entirely.
+///
+/// **Critically narrow.** The callback must *not* match task-list markers (`[
+/// ]`, `[x]`, `[X]`) or footnote references (`[^name]`): `broken_link_callback`
+/// fires before the `tasklist` / `footnotes` extensions get to recognise them,
+/// so a too-eager callback eats task items and footnotes silently.
+/// Returning `None` for those patterns lets the extensions handle them.
+///
+/// The dummy URL is empty; the value never reaches output because protection
+/// substitutes the source bytes back verbatim.
+struct ResolveIntraDocLinks;
+
+impl BrokenLinkCallback for ResolveIntraDocLinks {
+    fn resolve(&self, link: BrokenLinkReference<'_>) -> Option<ResolvedReference> {
+        let label = link.normalized.trim();
+        // Footnote references: handled by the footnotes extension.
+        if label.starts_with('^') {
+            return None;
+        }
+        // Task-list markers: `[ ]` normalises to empty, `[x]` / `[X]`
+        // normalise to single characters. Let the tasklist extension
+        // recognise them.
+        if label.is_empty() || label.eq_ignore_ascii_case("x") {
+            return None;
+        }
+        Some(ResolvedReference {
+            url: String::new(),
+            title: String::new(),
+        })
+    }
+}
+
+/// Build the comrak parse options used throughout the pipeline.
+/// Factored out so the re-parse for block-quote-nested paragraphs (see
+/// [`collect_inline_atomic_ranges_from_text`]) uses the exact same extension
+/// set.
+///
+/// Note: this is the *plain* parse options without the intra-doc broken-link
+/// callback.
+/// The callback would interfere with the tasklist and footnotes extensions (see
+/// [`ResolveIntraDocLinks`]).
+/// Use [`comrak_options_with_intra_doc_links`] only where the callback's effect
+/// is genuinely needed — currently only [`protect_reference_form_links`].
+fn comrak_options() -> Options<'static> {
+    Options {
+        extension: Extension {
+            table: true,
+            tasklist: true,
+            alerts: true,
+            multiline_block_quotes: true,
+            footnotes: true,
+            block_directive: true,
+            // Detect YAML frontmatter (`---` at the top of a document).
+            // Required for markdown files; benign for doc comments because
+            // frontmatter only triggers when the first non-empty line of
+            // the document is the delimiter, which is almost never the case
+            // inside a `///` block.
+            front_matter_delimiter: Some("---".to_owned()),
+            ..Default::default()
+        },
+        ..Default::default()
+    }
+}
+
+/// Parse options with the [`ResolveIntraDocLinks`] callback enabled, so
+/// unresolved intra-doc shortcut/collapsed references become `Link` nodes in
+/// the AST.
+/// Used exclusively by [`protect_reference_form_links`] to find these
+/// references and sentinelise their source bytes.
+fn comrak_options_with_intra_doc_links() -> Options<'static> {
+    let mut options = comrak_options();
+    options.parse = Parse {
+        broken_link_callback: Some(Arc::new(ResolveIntraDocLinks)),
+        ..Default::default()
+    };
+    options
+}
+
+/// Recursively walk the AST collecting paragraphs to reflow.
+///
+/// Descends into the container types matched explicitly below.
+/// Other containers (e.g.
+/// `DescriptionList`) and leaf blocks (code blocks, headings, tables, HTML
+/// blocks) are skipped, so their content survives verbatim.
+///
+/// Paragraphs that contain a [`LineBreak`] inline child — i.e. an explicit
+/// markdown hard break — are also left untouched.
+/// `collapse_whitespace` in the sembr step would otherwise silently eat the
+/// hard-break marker, changing how rustdoc renders the paragraph.
+/// The same coarse-grained rule we apply to code blocks and tables: when reflow
+/// would lose information, opt out of reflow for the whole element.
+///
+/// [`LineBreak`]: NodeValue::LineBreak
+fn collect_paragraphs<'a>(
+    node: &'a AstNode<'a>,
+    ancestors: &mut Vec<&'a AstNode<'a>>,
+    out: &mut Vec<Replacement>,
+    body: &str,
+    line_starts: &[usize],
+    max_width: usize,
+) {
+    let data = node.data();
+    match &data.value {
+        NodeValue::Paragraph => {
+            // Hard breaks (`  \n` or `\\\n`) mean the user deliberately
+            // chose where lines break; reflowing would silently destroy
+            // that intent. Leave the paragraph verbatim and skip ahead.
+            if has_hard_line_break(node) {
+                return;
+            }
+            let Some(range) = sourcepos_to_byte_range(line_starts, body.len(), &data.sourcepos)
+            else {
+                return;
+            };
+            let prefix = continuation_prefix_from_ancestors(ancestors);
+            let paragraph_max = if max_width == 0 {
+                0
+            } else {
+                max_width.saturating_sub(prefix.len())
+            };
+            // The paragraph's source bytes include the `>` continuation
+            // markers on continuation lines (block quotes only — list-item
+            // continuation is plain whitespace that `collapse_whitespace`
+            // already eats). Strip them before sembr.
+            let bq_depth = block_quote_depth(ancestors);
+            let cleaned = strip_block_quote_markers(&body[range.clone()], bq_depth);
+            // Atomic-range protection from the inline AST. The outer AST's
+            // sourcepos values are in *body* coordinates, which align with
+            // `cleaned` only when no stripping happened (block-quote depth
+            // zero). For nested-in-blockquote paragraphs, the cleaner
+            // approach is to re-parse `cleaned` as a standalone markdown
+            // fragment and read inline sourcepos from that AST — those
+            // values are in cleaned coordinates by construction.
+            let atomic_ranges = if bq_depth == 0 {
+                collect_inline_atomic_ranges(node, range.start, line_starts, body.len())
+            } else {
+                collect_inline_atomic_ranges_from_text(&cleaned)
+            };
+            let raw = reflow_paragraph(&cleaned, &atomic_ranges, paragraph_max);
+            let text = if prefix.is_empty() {
+                raw
+            } else {
+                raw.replace('\n', &format!("\n{prefix}"))
+            };
+            out.push(Replacement { range, text });
+            // Paragraph's children are inlines, not blocks — no further
+            // recursion needed.
+        }
+        NodeValue::Document
+        | NodeValue::BlockQuote
+        | NodeValue::List(_)
+        | NodeValue::Item(_)
+        | NodeValue::TaskItem(_)
+        | NodeValue::Alert(_)
+        | NodeValue::MultilineBlockQuote(_)
+        | NodeValue::FootnoteDefinition(_)
+        | NodeValue::BlockDirective(_) => {
+            ancestors.push(node);
+            for child in node.children() {
+                collect_paragraphs(child, ancestors, out, body, line_starts, max_width);
+            }
+            ancestors.pop();
+        }
+        _ => {
+            // Unsupported container or non-reflowable leaf block. Preserve
+            // verbatim by not descending; any nested paragraphs inside (e.g.
+            // inside a FootnoteDefinition or DescriptionList) stay as-is.
+        }
+    }
+}
+
+/// Build the continuation-prefix string from the chain of ancestor nodes
+/// surrounding a paragraph.
+/// Each supported container contributes a fragment; unsupported ancestors
+/// contribute nothing.
+fn continuation_prefix_from_ancestors(ancestors: &[&AstNode<'_>]) -> String {
+    let mut prefix = String::new();
+    for (i, ancestor) in ancestors.iter().enumerate() {
+        match &ancestor.data().value {
+            // Alert (GFM `> [!NOTE]`) shares BlockQuote's per-line `>`
+            // prefix. MultilineBlockQuote (`>>>`) has its delimiters on
+            // their own lines and unprefixed content inside, so it falls
+            // through to the wildcard arm and contributes nothing.
+            NodeValue::BlockQuote | NodeValue::Alert(_) => prefix.push_str("> "),
+            NodeValue::Item(node_list) => {
+                // `padding` is the marker width including the trailing space,
+                // per comrak's NodeList documentation.
+                for _ in 0..node_list.padding {
+                    prefix.push(' ');
+                }
+            }
+            // Footnote definition: continuation indent is fixed at 4 spaces
+            // per CommonMark's footnotes extension.
+            NodeValue::FootnoteDefinition(_) => prefix.push_str("    "),
+            NodeValue::TaskItem(_) => {
+                // TaskItem has no padding of its own. Inherit the parent
+                // List's padding (marker width) and add 4 for `[X] `.
+                if i > 0
+                    && let NodeValue::List(node_list) = &ancestors[i - 1].data().value
+                {
+                    for _ in 0..node_list.padding {
+                        prefix.push(' ');
+                    }
+                }
+                prefix.push_str("    ");
+            }
+            _ => {}
+        }
+    }
+    prefix
+}
+
+/// Returns `true` if the paragraph has at least one inline [`LineBreak`] (a
+/// markdown hard break) anywhere in its subtree.
+///
+/// Hard breaks can live nested inside inline containers (emphasis, link text,
+/// etc.).
+/// A direct-children check misses those: the paragraph would then reflow,
+/// `walk_inline_for_atomic_ranges` would treat the outer span as atomic, and
+/// `fold_line_breaks` would collapse the hard break into a space.
+///
+/// [`LineBreak`]: NodeValue::LineBreak
+fn has_hard_line_break<'a>(paragraph: &'a AstNode<'a>) -> bool {
+    paragraph
+        .descendants()
+        .any(|n| matches!(n.data().value, NodeValue::LineBreak))
+}
+
+/// Count ancestors that introduce a per-line `>` marker (regular block quotes
+/// and GFM alerts), so we know how many layers of `>` to strip from
+/// continuation lines before sembr.
+fn block_quote_depth(ancestors: &[&AstNode<'_>]) -> usize {
+    ancestors
+        .iter()
+        .filter(|a| matches!(a.data().value, NodeValue::BlockQuote | NodeValue::Alert(_)))
+        .count()
+}
+
+/// Remove leading `>` block-quote markers from each line after the first, up to
+/// `depth` layers per line.
+/// Leaves line 0 alone (its prefix is outside the paragraph's sourcepos range
+/// already).
+///
+/// Tolerant of both ` >  ` and bare `>` markers, and of leading whitespace
+/// before each marker (CommonMark allows up to 3 spaces of indent).
+fn strip_block_quote_markers(text: &str, depth: usize) -> String {
+    if depth == 0 {
+        return text.to_owned();
+    }
+    let mut out = String::with_capacity(text.len());
+    for (i, line) in text.split('\n').enumerate() {
+        if i == 0 {
+            out.push_str(line);
+            continue;
+        }
+        out.push('\n');
+        let mut rest = line;
+        for _ in 0..depth {
+            rest = rest.trim_start();
+            if let Some(after) = rest.strip_prefix("> ") {
+                rest = after;
+            } else if let Some(after) = rest.strip_prefix('>') {
+                rest = after;
+            } else {
+                break;
+            }
+        }
+        out.push_str(rest);
+    }
+    out
+}
+
+/// Walk a [`Paragraph`]'s inline subtree and collect byte ranges (in the
+/// original body) for inline elements that must be treated as indivisible
+/// during sentence segmentation.
+/// The set covers all emphasis variants (`Emph`, `Strong`, `Strikethrough`),
+/// inline code (`Code`), links and images, raw HTML, math spans, footnote
+/// references, and wikilinks.
+/// Once a node matches, recursion stops at that subtree: nested emphasis inside
+/// a link is already covered by the outer link's range.
+///
+/// [`Paragraph`]: NodeValue::Paragraph
+fn collect_inline_atomic_ranges<'a>(
+    paragraph: &'a AstNode<'a>,
+    paragraph_start: usize,
+    line_starts: &[usize],
+    body_len: usize,
+) -> Vec<Range<usize>> {
+    let mut out = Vec::new();
+    for child in paragraph.children() {
+        walk_inline_for_atomic_ranges(child, paragraph_start, line_starts, body_len, &mut out);
+    }
+    out
+}
+
+/// Re-parse `text` as a standalone markdown fragment and collect inline atomic
+/// ranges from any paragraphs found inside.
+/// Used for paragraphs nested in block quotes: the outer AST's sourcepos values
+/// are in body coordinates that drifted out of alignment when
+/// `strip_block_quote_markers` removed the per-line `>` prefixes, so the
+/// simplest correct thing is to re-parse the stripped text and read sourcepos
+/// from that fresh AST, where values are in `text` coordinates by construction.
+///
+/// Cost: one extra comrak parse per block-quote-nested paragraph.
+/// Block quotes are rare in doc comments and markdown files alike, so this is
+/// acceptable.
+fn collect_inline_atomic_ranges_from_text(text: &str) -> Vec<Range<usize>> {
+    let arena = Arena::new();
+    let options = comrak_options();
+    let root = comrak::parse_document(&arena, text, &options);
+    let line_starts = line_start_offsets(text);
+
+    let mut out = Vec::new();
+    walk_paragraphs_for_atomic_ranges(root, &line_starts, text.len(), &mut out);
+    out
+}
+
+/// Descend the re-parsed AST and collect inline atomic ranges from every
+/// paragraph encountered.
+/// Mirrors the descend list in `collect_paragraphs` so we don't miss a
+/// paragraph nested in a list item or alert inside the stripped block-quote
+/// content (e.g.
+/// `> - foo. bar.`).
+fn walk_paragraphs_for_atomic_ranges<'a>(
+    node: &'a AstNode<'a>,
+    line_starts: &[usize],
+    text_len: usize,
+    out: &mut Vec<Range<usize>>,
+) {
+    let data = node.data();
+    if matches!(data.value, NodeValue::Paragraph) {
+        for child in node.children() {
+            walk_inline_for_atomic_ranges(child, 0, line_starts, text_len, out);
+        }
+        return;
+    }
+    for child in node.children() {
+        walk_paragraphs_for_atomic_ranges(child, line_starts, text_len, out);
+    }
+}
+
+fn walk_inline_for_atomic_ranges<'a>(
+    node: &'a AstNode<'a>,
+    paragraph_start: usize,
+    line_starts: &[usize],
+    body_len: usize,
+    out: &mut Vec<Range<usize>>,
+) {
+    let data = node.data();
+    let is_atomic = matches!(
+        data.value,
+        NodeValue::Emph
+            | NodeValue::Strong
+            | NodeValue::Strikethrough
+            | NodeValue::Code(_)
+            | NodeValue::Link(_)
+            | NodeValue::Image(_)
+            | NodeValue::HtmlInline(_)
+            | NodeValue::Math(_)
+            | NodeValue::FootnoteReference(_)
+            | NodeValue::WikiLink(_)
+    );
+
+    if is_atomic {
+        if let Some(range) = sourcepos_to_byte_range(line_starts, body_len, &data.sourcepos)
+            && let Some(rel_start) = range.start.checked_sub(paragraph_start)
+            && let Some(rel_end) = range.end.checked_sub(paragraph_start)
+        {
+            out.push(rel_start..rel_end);
+        }
+        // Outer span covers any nested inlines; no further recursion.
+        return;
+    }
+
+    for child in node.children() {
+        walk_inline_for_atomic_ranges(child, paragraph_start, line_starts, body_len, out);
+    }
+}
+
+/// Reflow a single paragraph of prose: semantic line breaks (one sentence per
+/// line) plus an optional `max_width` safety net.
+///
+/// `max_width == 0` disables width wrapping.
+/// With width wrapping on, individual tokens that exceed the width are kept
+/// intact rather than split mid-token (URLs, paths, identifiers stay whole).
+#[must_use]
+pub fn reflow_paragraph(
+    paragraph: &str,
+    atomic_ranges: &[Range<usize>],
+    max_width: usize,
+) -> String {
+    let sentences = split_sentences(paragraph, atomic_ranges);
+    if sentences.is_empty() {
+        return String::new();
+    }
+
+    if max_width == 0 {
+        return sentences.join("\n");
+    }
+
+    let wrap_options = textwrap::Options::new(max_width)
+        .break_words(false)
+        .word_splitter(WordSplitter::NoHyphenation);
+
+    sentences
+        .iter()
+        .map(|s| textwrap::fill(s, &wrap_options))
+        .collect::<Vec<_>>()
+        .join("\n")
+}
+
+struct Replacement {
+    range: Range<usize>,
+    text: String,
+}
+
+/// Convert a comrak [`Sourcepos`] (1-based line, 1-based byte column,
+/// end-inclusive) into a half-open byte range.
+///
+/// Returns `None` if the coordinates fall outside `body_len` — a defensive
+/// guard against any sourcepos drift we haven't observed but shouldn't rely on
+/// the absence of.
+///
+/// [`Sourcepos`]: comrak::nodes::Sourcepos
+fn sourcepos_to_byte_range(
+    line_starts: &[usize],
+    body_len: usize,
+    sp: &comrak::nodes::Sourcepos,
+) -> Option<Range<usize>> {
+    let start_line = sp.start.line.checked_sub(1)?;
+    let end_line = sp.end.line.checked_sub(1)?;
+    let start_line_offset = *line_starts.get(start_line)?;
+    let end_line_offset = *line_starts.get(end_line)?;
+
+    let start = start_line_offset.checked_add(sp.start.column.saturating_sub(1))?;
+    let end = end_line_offset.checked_add(sp.end.column)?;
+
+    if start > end || end > body_len {
+        return None;
+    }
+    Some(start..end)
+}
+
+/// Byte offset of the first character of each line, with an implicit
+/// `line_starts[0] == 0`.
+fn line_start_offsets(s: &str) -> Vec<usize> {
+    let mut offsets = vec![0_usize];
+    for (i, b) in s.bytes().enumerate() {
+        if b == b'\n' {
+            offsets.push(i + 1);
+        }
+    }
+    offsets
+}
+
+#[cfg(test)]
+#[path = "format_tests.rs"]
+mod tests;
diff --git a/crates/contrib/comfort/src/format_tests.rs b/crates/contrib/comfort/src/format_tests.rs
new file mode 100644
index 00000000..0c67fa5a
--- /dev/null
+++ b/crates/contrib/comfort/src/format_tests.rs
@@ -0,0 +1,595 @@
+//! Tests for the format pipeline.
+//! The engine trait is gone, so these tests exercise the real sentence splitter
+//! directly.
+//! Output is deterministic and idempotent, so we assert on exact strings where
+//! it's useful and on invariants otherwise.
+
+use indoc::indoc;
+use pretty_assertions::assert_eq;
+
+use super::{format_source, reflow_markdown, reflow_paragraph};
+
+// ---------------------------------------------------------------------------
+// reflow_paragraph: sentence splitting + width wrapping
+// ---------------------------------------------------------------------------
+
+#[test]
+fn paragraph_splits_two_sentences_onto_their_own_lines() {
+    let out = reflow_paragraph("Hello world. This is a test.", &[], 0);
+    assert_eq!(out, "Hello world.\nThis is a test.");
+}
+
+#[test]
+fn paragraph_single_sentence_returns_single_line() {
+    let out = reflow_paragraph("Just one sentence.", &[], 0);
+    assert_eq!(out, "Just one sentence.");
+}
+
+#[test]
+fn paragraph_width_wraps_at_word_boundaries() {
+    let out = reflow_paragraph("alpha beta gamma delta epsilon zeta.", &[], 12);
+    for line in out.lines() {
+        assert!(line.len() <= 12, "line exceeded max_width: {line:?}");
+    }
+    assert!(out.lines().count() > 1);
+}
+
+#[test]
+fn paragraph_does_not_break_long_unbreakable_tokens() {
+    // A URL longer than `max_width` must stay on one line rather than be
+    // split mid-token.
+    let url = "https://example.com/path/to/very/long/resource";
+    let input = format!("Visit {url} for details.");
+    let out = reflow_paragraph(&input, &[], 10);
+    // The URL appears intact on some line.
+    assert!(
+        out.lines().any(|l| l.contains(url)),
+        "URL was broken: {out:?}"
+    );
+}
+
+#[test]
+fn paragraph_idempotent_under_repeated_reflow() {
+    let input = "First sentence here. Second sentence too. Third for good measure.";
+    let once = reflow_paragraph(input, &[], 30);
+    let twice = reflow_paragraph(&once, &[], 30);
+    assert_eq!(once, twice);
+}
+
+#[test]
+fn paragraph_empty_input_returns_empty() {
+    assert_eq!(reflow_paragraph("", &[], 0), "");
+    assert_eq!(reflow_paragraph("   ", &[], 0), "");
+}
+
+// ---------------------------------------------------------------------------
+// reflow_markdown: comrak-driven block awareness
+// ---------------------------------------------------------------------------
+
+#[test]
+fn reference_link_definitions_are_preserved_verbatim() {
+    let body = indoc! {"
+        [`format`]: super::format
+        [`extract`]: super::extract
+        [`engine`]: super::engine
+    "};
+    assert_eq!(reflow_markdown(body, 0), body);
+    // Idempotent under width-wrapping too.
+    assert_eq!(reflow_markdown(body, 80), body);
+}
+
+#[test]
+fn paragraph_with_trailing_ref_link_defs_reflows_only_the_paragraph() {
+    let body = indoc! {"
+        First. Second.
+
+        [foo]: bar
+        [baz]: qux
+    "};
+    let expected = indoc! {"
+        First.
+        Second.
+
+        [foo]: bar
+        [baz]: qux
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn block_quote_two_sentences_split_to_two_lines() {
+    let body = indoc! {"
+        > First sentence. Second sentence.
+    "};
+    let expected = indoc! {"
+        > First sentence.
+        > Second sentence.
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn block_quote_multi_line_paragraph_is_reflowed_as_one_logical_para() {
+    // The `>` markers on continuation lines must be stripped before sembr;
+    // otherwise they leak into sentence content and double on output.
+    let body = indoc! {"
+        > First sentence here. This second sentence
+        > continues onto another line.
+    "};
+    let expected = indoc! {"
+        > First sentence here.
+        > This second sentence continues onto another line.
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn block_quote_single_sentence_stays_on_one_line() {
+    let body = indoc! {"
+        > A B C D E F.
+    "};
+    assert_eq!(reflow_markdown(body, 0), body);
+}
+
+#[test]
+fn nested_block_quote_uses_compound_prefix() {
+    let body = indoc! {"
+        > > First. Second.
+    "};
+    let expected = indoc! {"
+        > > First.
+        > > Second.
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn alert_body_reflows_like_block_quote() {
+    // GFM `> [!NOTE]` admonition. The `[!NOTE]` header line stays put,
+    // body paragraph is sembr'd with `> ` continuation.
+    let body = indoc! {"
+        > [!NOTE]
+        > First sentence. Second sentence.
+    "};
+    let expected = indoc! {"
+        > [!NOTE]
+        > First sentence.
+        > Second sentence.
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn alert_multi_line_body_strips_continuation_markers() {
+    // The `>` on continuation lines inside the body must be stripped
+    // before sembr (same logic as plain block quotes).
+    let body = indoc! {"
+        > [!WARNING]
+        > First sentence here.
+        > Second sentence here.
+    "};
+    let expected = indoc! {"
+        > [!WARNING]
+        > First sentence here.
+        > Second sentence here.
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn footnote_definition_continuation_uses_four_space_indent() {
+    // CommonMark's footnotes extension specifies 4 spaces of continuation
+    // indent regardless of the label width. Comrak only retains footnote
+    // definitions in the AST when they're actually referenced, so the test
+    // includes a reference too.
+    let body = indoc! {"
+        See[^note] for details.
+
+        [^note]: First sentence. Second sentence.
+    "};
+    let expected = indoc! {"
+        See[^note] for details.
+
+        [^note]: First sentence.
+            Second sentence.
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn footnote_definition_long_label_still_four_spaces() {
+    // Continuation indent is the spec's 4 spaces, *not* aligned with the
+    // label width.
+    let body = indoc! {"
+        Like[^very-long-label] this.
+
+        [^very-long-label]: First. Second.
+    "};
+    let expected = indoc! {"
+        Like[^very-long-label] this.
+
+        [^very-long-label]: First.
+            Second.
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn orphan_footnote_definitions_are_preserved_verbatim() {
+    // Comrak silently drops unreferenced footnote definitions from the AST;
+    // we can't reflow what we can't see, but the source bytes survive
+    // intact because nothing in the AST triggers a replacement.
+    let body = indoc! {"
+        [^orphan]: Some text. More text.
+    "};
+    assert_eq!(reflow_markdown(body, 0), body);
+}
+
+#[test]
+fn block_directive_reflows_interior_without_per_line_prefix() {
+    // `:::name` block directive: like multiline block quote, delimiters
+    // sit on their own lines and content inside has no per-line prefix.
+    let body = indoc! {"
+        :::warning
+        First sentence. Second sentence.
+        :::
+    "};
+    let expected = indoc! {"
+        :::warning
+        First sentence.
+        Second sentence.
+        :::
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn multiline_block_quote_reflows_interior_without_per_line_prefix() {
+    // `>>>` block quote: delimiters are unique to their own lines, the
+    // content inside is unprefixed.
+    let body = indoc! {"
+        >>>
+        First sentence. Second sentence.
+        >>>
+    "};
+    let expected = indoc! {"
+        >>>
+        First sentence.
+        Second sentence.
+        >>>
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn fenced_code_blocks_are_preserved_verbatim() {
+    let body = indoc! {"
+        Some prose.
+
+        ```rust
+        let x = 1;
+        let y = 2;
+        ```
+
+        More prose.
+    "};
+    let out = reflow_markdown(body, 0);
+    assert!(out.contains("```rust\nlet x = 1;\nlet y = 2;\n```"));
+}
+
+#[test]
+fn list_items_each_reflow_independently() {
+    let body = indoc! {"
+        - First item. With two sentences.
+        - Second item. Also two.
+    "};
+    let expected = indoc! {"
+        - First item.
+          With two sentences.
+        - Second item.
+          Also two.
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn ordered_list_item_uses_three_space_continuation() {
+    let body = indoc! {"
+        1. First step. With detail.
+        2. Second step.
+    "};
+    let expected = indoc! {"
+        1. First step.
+           With detail.
+        2. Second step.
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn list_item_continuation_indent_matches_marker_width() {
+    // `100. ` is a 5-char marker, so continuation lines should be indented
+    // by 5 spaces.
+    let body = indoc! {"
+        100. A very long item with several sentences. Like this one.
+    "};
+    let expected = indoc! {"
+        100. A very long item with several sentences.
+             Like this one.
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn task_item_continuation_aligns_with_text_after_checkbox() {
+    // `- [ ] ` is 6 chars total: 2 for the bullet marker, 4 for `[X] `.
+    // Continuation lines should land at column 7.
+    let body = indoc! {"
+        - [ ] First task. With more detail.
+    "};
+    let expected = indoc! {"
+        - [ ] First task.
+              With more detail.
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn checked_task_item_aligns_the_same_as_unchecked() {
+    let body = indoc! {"
+        - [x] Done thing. Some explanation.
+    "};
+    let expected = indoc! {"
+        - [x] Done thing.
+              Some explanation.
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn task_items_in_a_list_each_get_aligned_continuation() {
+    let body = indoc! {"
+        - [ ] First. With more.
+        - [x] Second. With more.
+    "};
+    let expected = indoc! {"
+        - [ ] First.
+              With more.
+        - [x] Second.
+              With more.
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn list_item_in_block_quote_uses_compound_prefix() {
+    let body = indoc! {"
+        > - First. Second.
+    "};
+    let expected = indoc! {"
+        > - First.
+        >   Second.
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn gfm_pipe_tables_are_preserved_verbatim() {
+    // Tables are gated on the `table` extension. Without it comrak parses
+    // each row as a soft-broken paragraph, which sembr would then split
+    // mid-row.
+    let body = indoc! {"
+        Some prose.
+
+        | head | row |
+        | ---- | --- |
+        | a    | b   |
+        | c    | d   |
+
+        More prose. Two sentences.
+    "};
+    let expected = indoc! {"
+        Some prose.
+
+        | head | row |
+        | ---- | --- |
+        | a    | b   |
+        | c    | d   |
+
+        More prose.
+        Two sentences.
+    "};
+    assert_eq!(reflow_markdown(body, 0), expected);
+    // Idempotent across width-wrapping too.
+    let once = reflow_markdown(body, 80);
+    let twice = reflow_markdown(&once, 80);
+    assert_eq!(once, twice);
+}
+
+#[test]
+fn paragraph_with_backslash_hard_break_is_preserved_verbatim() {
+    // GFM hard break: `\` at end of line. The paragraph stays untouched
+    // even when it contains content sembr would otherwise split.
+    let body = "Foo. Bar.\\\nBaz.\n";
+    assert_eq!(reflow_markdown(body, 0), body);
+}
+
+#[test]
+fn paragraph_with_trailing_spaces_hard_break_is_preserved_verbatim() {
+    // GFM hard break: two trailing spaces before `\n`. `collapse_whitespace`
+    // would silently eat the marker, so we opt out of reflow.
+    let body = concat!("Foo. Bar.", "  \n", "Baz.\n");
+    assert_eq!(reflow_markdown(body, 0), body);
+}
+
+#[test]
+fn hard_break_nested_inside_emphasis_is_preserved() {
+    // Regression: `has_hard_line_break` used to only check direct paragraph
+    // children, so a hard break under an `Emph` (or any other inline
+    // container) was invisible. The emphasis span was then treated as
+    // atomic and `fold_line_breaks` collapsed the hard break into a space.
+    let body = concat!("*first.", "  \n", "second*\n");
+    assert_eq!(reflow_markdown(body, 80), body);
+}
+
+#[test]
+fn hard_break_nested_inside_link_text_is_preserved() {
+    // Same problem class as the emphasis case: a hard break under a Link
+    // node escapes the direct-children check.
+    let body = concat!("[first.", "  \n", "second](https://example.com)\n");
+    assert_eq!(reflow_markdown(body, 80), body);
+}
+
+#[test]
+fn backslash_hard_break_nested_inside_emphasis_is_preserved() {
+    // Backslash form of hard break, nested inside emphasis. Same protection.
+    let body = "*first.\\\nsecond*\n";
+    assert_eq!(reflow_markdown(body, 80), body);
+}
+
+#[test]
+fn hard_break_only_skips_its_own_paragraph() {
+    // A paragraph with a hard break stays verbatim; siblings still reflow.
+    let body = concat!(
+        "First sentence. Second sentence.\n",
+        "\n",
+        "Has break.",
+        "  \n",
+        "Stays put.\n",
+        "\n",
+        "Third sentence. Fourth sentence.\n",
+    );
+    let expected = concat!(
+        "First sentence.\nSecond sentence.\n",
+        "\n",
+        "Has break.",
+        "  \n",
+        "Stays put.\n",
+        "\n",
+        "Third sentence.\nFourth sentence.\n",
+    );
+    assert_eq!(reflow_markdown(body, 0), expected);
+}
+
+#[test]
+fn atx_headings_are_preserved_verbatim() {
+    let body = indoc! {"
+        # A Heading
+
+        Some prose.
+    "};
+    let out = reflow_markdown(body, 0);
+    assert!(out.contains("# A Heading"));
+    assert!(out.contains("Some prose."));
+}
+
+#[test]
+fn body_with_no_top_level_paragraphs_is_unchanged() {
+    let body = "[`x`]: y\n";
+    assert_eq!(reflow_markdown(body, 0), body);
+}
+
+#[test]
+fn empty_body_returns_empty() {
+    assert_eq!(reflow_markdown("", 0), "");
+}
+
+// ---------------------------------------------------------------------------
+// format_source: full pipeline including extract + reassemble
+// ---------------------------------------------------------------------------
+
+#[test]
+fn empty_source_returns_empty() {
+    assert_eq!(format_source("", 0), "");
+}
+
+#[test]
+fn source_without_doc_comments_is_unchanged() {
+    let src = indoc! {"
+        fn main() {
+            let x = 1; // not a doc comment
+            println!(\"{x}\");
+        }
+    "};
+    assert_eq!(format_source(src, 0), src);
+}
+
+#[test]
+fn multiple_blocks_are_all_reformatted() {
+    let src = indoc! {"
+        /// First. Second.
+        fn one() {}
+
+        /// Third. Fourth.
+        fn two() {}
+    "};
+    let expected = indoc! {"
+        /// First.
+        /// Second.
+        fn one() {}
+
+        /// Third.
+        /// Fourth.
+        fn two() {}
+    "};
+    assert_eq!(format_source(src, 0), expected);
+}
+
+#[test]
+fn reassembly_uses_block_indent() {
+    let src = indoc! {"
+        mod m {
+            /// Hello. World.
+            fn f() {}
+        }
+    "};
+    let expected = indoc! {"
+        mod m {
+            /// Hello.
+            /// World.
+            fn f() {}
+        }
+    "};
+    assert_eq!(format_source(src, 0), expected);
+}
+
+#[test]
+fn trailing_newline_is_preserved() {
+    let src = "/// foo bar baz.\nfn f() {}\n";
+    let out = format_source(src, 0);
+    assert!(out.ends_with('\n'));
+}
+
+#[test]
+fn surrounding_code_is_preserved_verbatim() {
+    let src = indoc! {"
+        use std::io;
+
+        /// Greet. Politely.
+        pub fn greet() {
+            // inline comment with weird chars: !@#$%
+            let s = \"contains /// inside string\";
+            println!(\"{s}\");
+        }
+    "};
+    let out = format_source(src, 0);
+    assert!(out.contains("use std::io;"));
+    assert!(out.contains("// inline comment with weird chars: !@#$%"));
+    assert!(out.contains("\"contains /// inside string\""));
+    assert!(out.contains("/// Greet."));
+    assert!(out.contains("/// Politely."));
+}
+
+#[test]
+fn format_source_reflows_paragraphs_but_preserves_ref_link_defs() {
+    let src = indoc! {"
+        //! Some prose. Sentence two.
+        //!
+        //! [`x`]: y
+        //! [`z`]: w
+        fn f() {}
+    "};
+    let out = format_source(src, 100);
+    assert!(out.contains("//! Some prose.\n//! Sentence two."));
+    assert!(out.contains("//! [`x`]: y\n//! [`z`]: w"));
+}
diff --git a/crates/contrib/comfort/src/lib.rs b/crates/contrib/comfort/src/lib.rs
new file mode 100644
index 00000000..b5b098d0
--- /dev/null
+++ b/crates/contrib/comfort/src/lib.rs
@@ -0,0 +1,126 @@
+//! Semantic line-break formatter for Rust doc comments.
+//!
+//! `comfort` walks Rust source files, locates outer (`///`) and inner (`//!`)
+//! doc-comment blocks, and reflows each block's prose paragraphs with semantic
+//! line breaks (one sentence per line) plus an optional `max_width` safety net.
+//!
+//! Non-doc code, inline `//` comments, and `/** */` block-style doc comments
+//! are left untouched.
+//! Markdown structure inside doc comments — reference link definitions, block
+//! quotes, lists, code blocks, headings, tables — is preserved verbatim; only
+//! paragraph contents are reflowed.
+
+pub mod cli;
+pub mod extract;
+pub mod format;
+pub mod run;
+pub mod sentence;
+pub mod walk;
+
+#[cfg(test)]
+#[path = "lib_tests.rs"]
+mod tests;
+
+use std::{
+    ffi::OsString,
+    path::{Path, PathBuf},
+    process::ExitCode,
+};
+
+use clap::Parser;
+
+use crate::cli::{Cli, Invocation};
+
+/// Default maximum line width for wrapped doc-comment content.
+pub const DEFAULT_MAX_WIDTH: usize = 80;
+
+/// Shared binary entry-point.
+/// Both `comfort` and `cargo-comfort` delegate here; the invocation mode is
+/// detected from `argv[0]` at runtime.
+///
+/// `eprintln!` is otherwise denied by the workspace lints — allowing it here
+/// keeps fatal-error reporting in one place.
+#[allow(clippy::print_stderr)]
+#[must_use]
+pub fn cli_main() -> ExitCode {
+    let raw: Vec<OsString> = std::env::args_os().collect();
+    let (invocation, args) = parse_invocation(raw);
+
+    let cli = Cli::parse_from(args);
+
+    match run::run(&cli, invocation) {
+        Ok(()) => ExitCode::SUCCESS,
+        Err(Error::CheckFailed(_)) => ExitCode::from(1),
+        Err(err) => {
+            eprintln!("comfort: {err}");
+            ExitCode::from(2)
+        }
+    }
+}
+
+/// Identify whether we were invoked directly (`comfort`) or by cargo
+/// (`cargo-comfort`).
+/// For the cargo case, cargo passes the subcommand name (`comfort`) as
+/// `args[1]`, which we strip before handing args to clap.
+fn parse_invocation(mut raw: Vec<OsString>) -> (Invocation, Vec<OsString>) {
+    let Some(bin) = raw
+        .first()
+        .and_then(|p| Path::new(p).file_name().map(OsString::from))
+    else {
+        return (Invocation::Direct, raw);
+    };
+
+    // On Windows the binary name carries `.exe`; match either form.
+    let is_cargo = bin == *"cargo-comfort" || bin == *"cargo-comfort.exe";
+
+    if !is_cargo {
+        return (Invocation::Direct, raw);
+    }
+
+    // Cargo always passes the subcommand name as args[1]. Skip it if present.
+    if raw.get(1).is_some_and(|s| s == "comfort") {
+        raw.remove(1);
+    }
+    (Invocation::Cargo, raw)
+}
+
+/// Errors produced by the comfort library.
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+    #[error("io: {0}")]
+    Io(#[from] std::io::Error),
+
+    #[error("cargo metadata: {0}")]
+    CargoMetadata(#[from] cargo_metadata::Error),
+
+    #[error("walk: {0}")]
+    Walk(#[from] ignore::Error),
+
+    /// Failed to read a source file.
+    /// Carries the path so the user knows which file failed when walking many
+    /// at once.
+    #[error("failed to read {path}: {source}")]
+    ReadFile {
+        path: PathBuf,
+        #[source]
+        source: std::io::Error,
+    },
+
+    /// Failed to write a reformatted file back to disk.
+    #[error("failed to write {path}: {source}")]
+    WriteFile {
+        path: PathBuf,
+        #[source]
+        source: std::io::Error,
+    },
+
+    /// One of the names passed to `-p`/`--package` or `--exclude` doesn't match
+    /// any workspace package.
+    #[error("unknown package: {0}")]
+    UnknownPackage(String),
+
+    /// Reported in `--check` mode when at least one file would be reformatted.
+    /// Carries the count of files that differ.
+    #[error("{0} file(s) would be reformatted")]
+    CheckFailed(usize),
+}
diff --git a/crates/contrib/comfort/src/lib_tests.rs b/crates/contrib/comfort/src/lib_tests.rs
new file mode 100644
index 00000000..a923a6a4
--- /dev/null
+++ b/crates/contrib/comfort/src/lib_tests.rs
@@ -0,0 +1,1545 @@
+//! End-to-end tests through the full pipeline (extract + markdown parsing +
+//! sentence splitting + width wrapping).
+//! These tests assert on invariants the user-visible contract makes —
+//! surrounding code preserved, idempotence, markdown blocks unmolested.
+
+use std::{io, path::PathBuf};
+
+use indoc::indoc;
+use pretty_assertions::assert_eq;
+use unicode_width::UnicodeWidthStr;
+
+use crate::{
+    DEFAULT_MAX_WIDTH, Error,
+    format::{
+        FormatOptions, format_markdown_canonical, format_markdown_with, format_rust_source_with,
+        format_source, format_source_canonical, reflow_markdown,
+    },
+};
+
+#[test]
+fn formatting_is_idempotent() {
+    let src = indoc! {"
+        /// First sentence here. Second sentence on the same source line, which
+        /// should be split by sembr into two separate output lines.
+        pub fn f() {}
+    "};
+    let once = format_source(src, DEFAULT_MAX_WIDTH);
+    let twice = format_source(&once, DEFAULT_MAX_WIDTH);
+    assert_eq!(once, twice, "format_source must be idempotent");
+}
+
+#[test]
+fn surrounding_code_unchanged() {
+    let src = indoc! {"
+        use std::io;
+
+        /// Two sentences here. The splitter will split them.
+        pub fn greet() -> io::Result<()> {
+            // inline // not a doc
+            let s = \"contains /// inside string\";
+            println!(\"{s}\");
+            Ok(())
+        }
+    "};
+    let out = format_source(src, DEFAULT_MAX_WIDTH);
+    assert!(out.contains("use std::io;"));
+    assert!(out.contains("    // inline // not a doc"));
+    assert!(out.contains("\"contains /// inside string\""));
+    assert!(out.contains("    println!(\"{s}\");"));
+    assert!(out.contains("    Ok(())"));
+}
+
+#[test]
+fn fenced_code_block_inside_doc_comment_survives() {
+    let src = indoc! {"
+        /// Example.
+        ///
+        /// ```rust
+        /// let x = 1;
+        /// let y = 2;
+        /// ```
+        ///
+        /// More prose.
+        pub fn f() {}
+    "};
+    let out = format_source(src, DEFAULT_MAX_WIDTH);
+    assert!(out.contains("/// ```rust"));
+    assert!(out.contains("/// let x = 1;"));
+    assert!(out.contains("/// let y = 2;"));
+    assert!(out.contains("/// ```"));
+    let twice = format_source(&out, DEFAULT_MAX_WIDTH);
+    assert_eq!(out, twice);
+}
+
+#[test]
+fn inner_module_docs_are_handled() {
+    let src = indoc! {"
+        //! This module does a thing. It does several things, actually.
+
+        pub fn f() {}
+    "};
+    let out = format_source(src, DEFAULT_MAX_WIDTH);
+    assert!(out.contains("//! This module does a thing."));
+    assert!(out.contains("//! It does several things, actually."));
+}
+
+#[test]
+fn max_width_accounts_for_indent_and_prefix() {
+    // The user's exact example: a 4-space-indented `//!` block with
+    // max_width=10 should fit content within `10 - 4 - 4 = 2` columns.
+    // Words longer than 2 chars stay intact (NoHyphenation, break_words=false).
+    let src = indoc! {"
+        mod m {
+            //! foo bar
+        }
+    "};
+    let expected = indoc! {"
+        mod m {
+            //! foo
+            //! bar
+        }
+    "};
+    assert_eq!(format_source(src, 10), expected);
+}
+
+#[test]
+fn long_urls_are_not_broken_under_tight_max_width() {
+    // The other regression we cared about: max_width small enough to want
+    // to break a URL, but the URL must stay intact.
+    let src = indoc! {"
+        /// See https://example.com/path/to/very/long/resource for details.
+        pub fn f() {}
+    "};
+    let out = format_source(src, 20);
+    // The URL is on a line by itself but unbroken.
+    assert!(
+        out.contains("https://example.com/path/to/very/long/resource"),
+        "URL was broken: {out}"
+    );
+}
+
+#[test]
+fn max_width_zero_disables_width_wrapping() {
+    let src = indoc! {"
+        /// One very long sentence with many words that would otherwise wrap.
+        pub fn f() {}
+    "};
+    let out = format_source(src, 0);
+    assert!(out.contains("/// One very long sentence with many words that would otherwise wrap."));
+}
+
+#[test]
+fn max_width_smaller_than_prefix_degrades_to_pure_sembr() {
+    let src = indoc! {"
+        mod outer {
+            mod inner {
+                /// First sentence. Second sentence.
+                pub fn f() {}
+            }
+        }
+    "};
+    let out = format_source(src, 4);
+    assert!(out.contains("/// First sentence."));
+    assert!(out.contains("/// Second sentence."));
+}
+
+#[test]
+fn soft_line_breaks_in_paragraph_source_are_collapsed() {
+    // Regression: a paragraph spanning multiple source lines (with `///`
+    // prefixes preserving its layout) must be reflowed as one logical
+    // paragraph, not as multiple line-broken sentences.
+    let src = indoc! {"
+        /// If `forced_tool` is provided, that tool is included even when its
+        /// `enable()` check returns `false`. This prevents a mismatch between
+        /// `tool_choice` and the declared tools list.
+        pub fn f() {}
+    "};
+    let out = format_source(src, DEFAULT_MAX_WIDTH);
+
+    // The two sentences each occupy a contiguous run of lines, but neither
+    // mid-sentence `///` line break from the input survives — `This\n`
+    // followed by `prevents` on the next line was the original bug.
+    assert!(!out.contains("`false`.\n/// This\n/// prevents"));
+    assert!(!out.contains("that\n/// tool"));
+
+    // Idempotence: running twice produces the same output.
+    let twice = format_source(&out, DEFAULT_MAX_WIDTH);
+    assert_eq!(out, twice);
+}
+
+#[test]
+fn reference_link_definitions_survive_end_to_end() {
+    let src = indoc! {"
+        //! Module docs.
+        //!
+        //! [`format`]: super::format
+        //! [`extract`]: super::extract
+        //! [`engine`]: super::engine
+
+        pub fn f() {}
+    "};
+    let out = format_source(src, DEFAULT_MAX_WIDTH);
+    assert!(out.contains("//! [`format`]: super::format"));
+    assert!(out.contains("//! [`extract`]: super::extract"));
+    assert!(out.contains("//! [`engine`]: super::engine"));
+    assert!(!out.contains("super::format ["));
+}
+
+#[test]
+fn block_quote_round_trips_when_already_sembr() {
+    // Input is already one sentence per `> ` line — idempotent under
+    // reflow.
+    let src = indoc! {"
+        /// > This is a note.
+        /// > It spans two lines.
+        pub fn f() {}
+    "};
+    let out = format_source(src, DEFAULT_MAX_WIDTH);
+    assert_eq!(out, src);
+}
+
+#[test]
+fn block_quote_reflows_two_sentences_end_to_end() {
+    let src = indoc! {"
+        /// > Two sentences on one line. Like this.
+        pub fn f() {}
+    "};
+    let expected = indoc! {"
+        /// > Two sentences on one line.
+        /// > Like this.
+        pub fn f() {}
+    "};
+    assert_eq!(format_source(src, DEFAULT_MAX_WIDTH), expected);
+}
+
+#[test]
+fn list_items_reflow_with_marker_aligned_continuation_end_to_end() {
+    let src = indoc! {"
+        /// - First item with two sentences. Like so.
+        /// - 100. Outer item. Continues.
+        pub fn f() {}
+    "};
+    // Bulleted list: 2-space continuation. The `100.` text inside the
+    // first item is literal (no nested list parsed inside a bullet item
+    // without proper formatting), so it just becomes prose.
+    let out = format_source(src, DEFAULT_MAX_WIDTH);
+    assert!(out.contains("/// - First item with two sentences.\n///   Like so."));
+}
+
+#[test]
+fn list_item_inside_block_quote_uses_compound_prefix_end_to_end() {
+    let src = indoc! {"
+        /// > - First. Second.
+        pub fn f() {}
+    "};
+    let expected = indoc! {"
+        /// > - First.
+        /// >   Second.
+        pub fn f() {}
+    "};
+    assert_eq!(format_source(src, DEFAULT_MAX_WIDTH), expected);
+}
+
+#[test]
+fn list_item_idempotent_end_to_end() {
+    let src = indoc! {"
+        /// - First item.
+        ///   With continuation.
+        /// - Second item.
+        pub fn f() {}
+    "};
+    let once = format_source(src, DEFAULT_MAX_WIDTH);
+    let twice = format_source(&once, DEFAULT_MAX_WIDTH);
+    assert_eq!(once, twice);
+}
+
+#[test]
+fn gfm_pipe_table_in_doc_comment_survives_end_to_end() {
+    let src = indoc! {"
+        /// Examples.
+        ///
+        /// | name | meaning |
+        /// | ---- | ------- |
+        /// | foo  | a thing |
+        /// | bar  | another |
+        ///
+        /// See above.
+        pub fn f() {}
+    "};
+    let out = format_source(src, DEFAULT_MAX_WIDTH);
+    assert!(out.contains("/// | name | meaning |"));
+    assert!(out.contains("/// | ---- | ------- |"));
+    assert!(out.contains("/// | foo  | a thing |"));
+    assert!(out.contains("/// | bar  | another |"));
+    let twice = format_source(&out, DEFAULT_MAX_WIDTH);
+    assert_eq!(out, twice);
+}
+
+#[test]
+fn backslash_hard_break_in_doc_comment_survives_end_to_end() {
+    // Address-block-style use of hard breaks: each line is meant to render
+    // as a forced `<br>` in rustdoc.
+    let src = concat!(
+        "/// Example output:\\\n",
+        "/// 123 Main St\\\n",
+        "/// Springfield\n",
+        "pub fn f() {}\n",
+    );
+    let out = format_source(src, DEFAULT_MAX_WIDTH);
+    assert_eq!(out, src);
+}
+
+#[test]
+fn trailing_spaces_hard_break_in_doc_comment_survives_end_to_end() {
+    // The trailing-two-spaces hard-break syntax must survive too — it's
+    // the variant whose marker is invisible in plain text and therefore
+    // easiest to lose by accident.
+    let src = concat!(
+        "/// Note: this works.",
+        "  \n",
+        "/// More info below.\n",
+        "pub fn f() {}\n",
+    );
+    let out = format_source(src, DEFAULT_MAX_WIDTH);
+    assert_eq!(out, src);
+}
+
+#[test]
+fn markdown_paragraph_is_reflowed_end_to_end() {
+    // Treated as a raw markdown file (would be invoked as `comfort foo.md`).
+    // No `///` prefix; the whole file is markdown.
+    let src = indoc! {"
+        # Title
+
+        First sentence here. Second sentence on the same source line.
+
+        > A blockquote. With two sentences.
+
+        - Item one. With detail.
+        - Item two.
+
+        [^note]: A footnote. With two sentences.
+
+        See[^note] for details.
+    "};
+    let out = reflow_markdown(src, DEFAULT_MAX_WIDTH);
+
+    // Paragraph reflowed.
+    assert!(out.contains("First sentence here.\nSecond sentence on the same source line."));
+    // Blockquote reflowed with `> ` continuation.
+    assert!(out.contains("> A blockquote.\n> With two sentences."));
+    // List item reflowed with 2-space continuation.
+    assert!(out.contains("- Item one.\n  With detail."));
+    // Footnote reflowed with 4-space continuation.
+    assert!(out.contains("[^note]: A footnote.\n    With two sentences."));
+    // Heading preserved.
+    assert!(out.contains("# Title"));
+}
+
+#[test]
+fn markdown_frontmatter_is_preserved_verbatim() {
+    // YAML frontmatter at the top of the file must not be reflowed; the
+    // `title: Foo` line would otherwise look like a one-line paragraph and
+    // pass through the sentence splitter as content.
+    let src = indoc! {"
+        ---
+        title: Foo
+        date: 2024-01-01
+        tags:
+          - one
+          - two
+        ---
+
+        # Heading
+
+        A paragraph. With two sentences.
+    "};
+    let out = reflow_markdown(src, DEFAULT_MAX_WIDTH);
+    // Frontmatter survives byte-for-byte.
+    assert!(out.contains("---\ntitle: Foo\ndate: 2024-01-01"));
+    assert!(out.contains("  - one\n  - two\n---"));
+    // Paragraph below still reflows.
+    assert!(out.contains("A paragraph.\nWith two sentences."));
+}
+
+#[test]
+fn list_item_with_bold_lead_in_keeps_bold_intact() {
+    // Regression: a list item whose first sentence ended inside a `**...**`
+    // span used to split at the period, leaving the closing `**` on the
+    // next line.
+    let src = indoc! {"
+        - **What every rerank call records.** Provider ID, model name.
+    "};
+    let out = reflow_markdown(src, 80);
+    assert!(
+        out.contains("**What every rerank call records.**"),
+        "bold span was broken: {out}"
+    );
+    // And the closing `**` is on the same line as the opening one.
+    assert!(
+        !out.lines().any(|l| l.trim_start().starts_with("**")),
+        "closing `**` got stranded on its own line: {out}"
+    );
+}
+
+#[test]
+fn italic_span_with_period_keeps_emphasis_intact() {
+    // Asterisk italics: `*foo.*` should not split at the inner period.
+    let src = indoc! {"
+        *Foo.* Body sentence here.
+    "};
+    let out = reflow_markdown(src, 80);
+    assert!(
+        out.contains("*Foo.* Body sentence here."),
+        "italic span broken: {out}"
+    );
+}
+
+#[test]
+fn underscore_italic_with_period_keeps_emphasis_intact() {
+    // Underscore italic. The regex fallback would over-match `snake_case`,
+    // but the AST knows the right rules.
+    let src = indoc! {"
+        _Foo._ Body sentence here.
+    "};
+    let out = reflow_markdown(src, 80);
+    assert!(
+        out.contains("_Foo._ Body sentence here."),
+        "underscore italic broken: {out}"
+    );
+}
+
+#[test]
+fn underscore_bold_with_period_keeps_emphasis_intact() {
+    let src = indoc! {"
+        __Title.__ Body sentence here.
+    "};
+    let out = reflow_markdown(src, 80);
+    assert!(
+        out.contains("__Title.__ Body sentence here."),
+        "underscore bold broken: {out}"
+    );
+}
+
+#[test]
+fn triple_asterisk_bold_italic_with_period_keeps_emphasis_intact() {
+    // CommonMark `***foo***` is Strong nested in Emph (or vice versa);
+    // either way the outer span's AST range covers everything.
+    let src = indoc! {"
+        ***Title.*** Body sentence here.
+    "};
+    let out = reflow_markdown(src, 80);
+    assert!(
+        out.contains("***Title.*** Body sentence here."),
+        "triple-asterisk bold-italic broken: {out}"
+    );
+}
+
+#[test]
+fn italic_inside_block_quote_keeps_emphasis_intact() {
+    // The block-quote stripping shifts byte offsets, but the re-parse on
+    // the cleaned text gives us inline sourcepos in the right coordinate
+    // system. Underscore italics (which the regex fallback can't catch
+    // without false-matching `snake_case`) survive inside blockquotes too.
+    let src = indoc! {"
+        > _Foo._ Body sentence here.
+    "};
+    let out = reflow_markdown(src, 80);
+    assert!(
+        out.contains("> _Foo._ Body sentence here."),
+        "underscore italic broken inside blockquote: {out}"
+    );
+}
+
+#[test]
+fn snake_case_inside_block_quote_is_not_protected() {
+    // Inverse of the above: an identifier inside a blockquote must not be
+    // treated as italic.
+    let src = indoc! {"
+        > See foo_bar_baz. Next sentence.
+    "};
+    let out = reflow_markdown(src, 80);
+    assert!(
+        out.contains("> See foo_bar_baz.\n> Next sentence."),
+        "snake_case got mangled inside blockquote: {out}"
+    );
+}
+
+#[test]
+fn nested_block_quote_emphasis_survives() {
+    // Two `>` markers stripped, then re-parsed. Emphasis inside survives.
+    let src = indoc! {"
+        > > _Foo._ Body sentence here.
+    "};
+    let out = reflow_markdown(src, 80);
+    assert!(
+        out.contains("> > _Foo._ Body sentence here."),
+        "emphasis broken inside nested blockquote: {out}"
+    );
+}
+
+#[test]
+fn emphasis_spanning_two_source_lines_does_not_over_indent_continuation() {
+    // Regression: a list item containing an italic that crosses a source
+    // line boundary used to make the continuation line over-indent by
+    // four spaces instead of two, because the embedded `\n  ` from the
+    // italic span survived into textwrap's view and the container prefix
+    // step then doubled the indent.
+    let body = indoc! {"
+        - Lead in here. *Italics span across
+          two source lines*, then more body sentence here.
+    "};
+    let out = reflow_markdown(body, 80);
+    for line in out.lines() {
+        // Either column 0 (the list-marker line) or exactly two spaces of
+        // continuation indent. Four spaces would be the bug.
+        assert!(
+            !line.starts_with("    "),
+            "line over-indented (4 spaces): {line:?}"
+        );
+    }
+    // And the italic span is now folded onto a single logical sentence
+    // — no `\n  ` survives inside.
+    assert!(
+        !out.contains("*Italics span across\n"),
+        "italic span retained its source-level newline: {out}"
+    );
+}
+
+#[test]
+fn inline_code_spanning_two_source_lines_does_not_over_indent_continuation() {
+    // Same as above but for inline code spans (reproduction of the
+    // `tracing::warn!(...)` case from the original report).
+    let body = indoc! {"
+        - Emit `tracing::warn!(\"foo bar baz qux quux corge
+          grault garply\")` for the legacy field on each launch.
+    "};
+    let out = reflow_markdown(body, 80);
+    for line in out.lines() {
+        assert!(
+            !line.starts_with("    "),
+            "line over-indented (4 spaces): {line:?}"
+        );
+    }
+    // The inline code span is folded onto a single line — the source-level
+    // `\n  ` inside it does not survive.
+    assert!(
+        !out.contains("corge\n"),
+        "inline code retained its source-level newline: {out}"
+    );
+}
+
+#[test]
+fn snake_case_identifier_is_not_treated_as_underscore_italic() {
+    // The regex `_[^_]+_` would falsely match `_bar_` inside `foo_bar_baz`.
+    // The AST approach uses CommonMark rules, which require word-boundary
+    // markers for underscore emphasis, so identifiers survive.
+    let src = indoc! {"
+        See foo_bar_baz. Next sentence.
+    "};
+    let out = reflow_markdown(src, 80);
+    // The identifier survives literally, and the period after it does
+    // trigger a sembr split.
+    assert!(
+        out.contains("See foo_bar_baz.\nNext sentence."),
+        "snake_case got mangled: {out}"
+    );
+}
+
+#[test]
+fn markdown_is_idempotent_end_to_end() {
+    let src = indoc! {"
+        # Title
+
+        Some prose. Two sentences worth.
+
+        - Item. Continued.
+    "};
+    let once = reflow_markdown(src, DEFAULT_MAX_WIDTH);
+    let twice = reflow_markdown(&once, DEFAULT_MAX_WIDTH);
+    assert_eq!(once, twice);
+}
+
+// ---------------------------------------------------------------------------
+// `--format-markdown` (canonical) mode
+// ---------------------------------------------------------------------------
+
+#[test]
+fn canonical_default_off_preserves_alternate_list_marker_byte_for_byte() {
+    // Without `--format-markdown`, `*` bullets stay as `*` even when the
+    // markdown content is otherwise reflowable. Default mode is
+    // byte-preserving outside paragraphs.
+    let body = indoc! {"
+        * First item.
+        * Second item.
+    "};
+    let out = reflow_markdown(body, 80);
+    assert!(
+        out.contains("* First item."),
+        "default mode rewrote the bullet marker: {out}"
+    );
+}
+
+#[test]
+fn canonical_mode_normalizes_list_markers_to_dash() {
+    // With canonical mode on, comrak's formatter applies our `Dash`
+    // preference.
+    let body = indoc! {"
+        * First item.
+        * Second item.
+    "};
+    let out = format_markdown_canonical(body, 80);
+    assert!(
+        out.contains("- First item."),
+        "canonical mode didn't normalize bullet to dash: {out}"
+    );
+    assert!(
+        !out.contains("* First item."),
+        "original `*` marker leaked through: {out}"
+    );
+}
+
+#[test]
+fn canonical_mode_aligns_table_columns() {
+    // Misaligned source table; canonical mode should pad data cells to
+    // match the widest cell per column.
+    let body = indoc! {"
+        | A | B |
+        |---|---|
+        | short | very long content |
+        | x | y |
+    "};
+    let out = format_markdown_canonical(body, 80);
+    // Every row's `|` separators should be at consistent column positions.
+    let table_lines: Vec<&str> = out
+        .lines()
+        .filter(|l| l.trim_start().starts_with('|'))
+        .collect();
+    assert!(
+        table_lines.len() >= 4,
+        "expected header + separator + 2 data rows, got {} lines",
+        table_lines.len()
+    );
+    let pipe_positions: Vec<Vec<usize>> = table_lines
+        .iter()
+        .map(|l| {
+            l.char_indices()
+                .filter(|(_, c)| *c == '|')
+                .map(|(i, _)| i)
+                .collect()
+        })
+        .collect();
+    let first = &pipe_positions[0];
+    for (i, positions) in pipe_positions.iter().enumerate() {
+        assert_eq!(
+            positions, first,
+            "row {i} pipe positions don't align with header: {table_lines:#?}"
+        );
+    }
+}
+
+#[test]
+fn canonical_mode_aligns_with_explicit_alignment_markers() {
+    // The separator row's colon pattern carries through after alignment.
+    let body = indoc! {"
+        | left | center | right |
+        | :--- | :---: | ---: |
+        | a | b | c |
+    "};
+    let out = format_markdown_canonical(body, 80);
+    // Left-aligned column keeps leading `:`, right-aligned trailing `:`,
+    // center has both. The dashes get padded to match column width.
+    assert!(
+        out.contains(":---") && out.contains(":----:") && out.contains("----:"),
+        "alignment markers lost or mis-shaped: {out}"
+    );
+}
+
+#[test]
+fn canonical_mode_aligns_table_with_wide_characters() {
+    // Wide characters (CJK) count as 2 cells per `UnicodeWidthStr`. The
+    // table should align visually, not by codepoint count.
+    let body = indoc! {"
+        | en | jp |
+        |---|---|
+        | hi | こんにちは |
+        | x | y |
+    "};
+    let out = format_markdown_canonical(body, 80);
+    let table_lines: Vec<&str> = out
+        .lines()
+        .filter(|l| l.trim_start().starts_with('|'))
+        .collect();
+    // Pipe positions are computed in BYTE offsets, which won't match for
+    // multi-byte CJK rows. The correct check is visual: every row, after
+    // the second `|`, the second cell should be padded to the same display
+    // width. Approximation: count `|` characters per line — every row
+    // should have exactly 3 pipes (start, between cols, end).
+    for line in &table_lines {
+        let pipe_count = line.chars().filter(|c| *c == '|').count();
+        assert_eq!(pipe_count, 3, "row has unexpected pipe count: {line:?}");
+    }
+    // And the CJK row should be padded such that its right edge `|`
+    // lands at the same DISPLAY column as the other rows.
+    let display_col_of_last_pipe = |line: &str| -> usize {
+        let last_pipe_byte = line.rfind('|').unwrap();
+        UnicodeWidthStr::width(&line[..last_pipe_byte])
+    };
+    let first_last = display_col_of_last_pipe(table_lines[0]);
+    for line in &table_lines[1..] {
+        assert_eq!(
+            display_col_of_last_pipe(line),
+            first_last,
+            "row's right edge isn't aligned: header={:?} other={:?}",
+            table_lines[0],
+            line
+        );
+    }
+}
+
+#[test]
+fn canonical_mode_table_alignment_is_idempotent() {
+    // Aligned table should round-trip unchanged.
+    let body = indoc! {"
+        | A     | B                 |
+        | ----- | ----------------- |
+        | short | very long content |
+        | x     | y                 |
+    "};
+    let once = format_markdown_canonical(body, 80);
+    let twice = format_markdown_canonical(&once, 80);
+    assert_eq!(once, twice);
+}
+
+#[test]
+fn canonical_mode_still_does_sembr_on_paragraphs() {
+    // After canonicalisation, the sembr pipeline still runs on paragraphs.
+    let body = indoc! {"
+        First sentence. Second sentence on the same line.
+    "};
+    let out = format_markdown_canonical(body, 80);
+    assert!(
+        out.contains("First sentence.\nSecond sentence on the same line."),
+        "sembr didn't run after canonicalisation: {out}"
+    );
+}
+
+#[test]
+fn canonical_mode_is_idempotent_end_to_end() {
+    let body = indoc! {"
+        # Heading
+
+        First sentence here. Second sentence here.
+
+        * Item one. With more.
+        * Item two.
+
+        | A | B |
+        |---|---|
+        | x | y |
+    "};
+    let once = format_markdown_canonical(body, 80);
+    let twice = format_markdown_canonical(&once, 80);
+    assert_eq!(once, twice, "canonical mode must be idempotent");
+}
+
+#[test]
+fn canonical_mode_on_rust_source_normalizes_inside_doc_comments() {
+    // `format_source_canonical` is the Rust-source entry that runs
+    // canonical mode per `///` block.
+    let src = indoc! {"
+        /// First sentence here. Second sentence here.
+        ///
+        /// * Item one.
+        /// * Item two.
+        pub fn f() {}
+    "};
+    let out = format_source_canonical(src, 80);
+    assert!(
+        out.contains("/// - Item one."),
+        "list markers not normalised inside doc comment: {out}"
+    );
+    assert!(
+        out.contains("/// First sentence here.\n/// Second sentence here."),
+        "sembr didn't run inside doc comment: {out}"
+    );
+    // The surrounding code is byte-preserved as always.
+    assert!(out.contains("pub fn f() {}"));
+}
+
+#[test]
+fn canonical_mode_preserves_doc_comment_scaffolding() {
+    // Even with canonical mode on, the `///` prefix and indentation come
+    // straight from the original source.
+    let src = indoc! {"
+        mod m {
+            /// Inner doc. Two sentences.
+            pub fn f() {}
+        }
+    "};
+    let out = format_source_canonical(src, 80);
+    assert!(out.contains("    /// Inner doc."));
+    assert!(out.contains("    /// Two sentences."));
+}
+
+#[test]
+fn canonical_mode_preserves_hard_line_breaks() {
+    // Hard breaks (two trailing spaces or `\\\n`) are semantically distinct
+    // from soft breaks: they render as `<br>` rather than a space. The
+    // canonical pipeline must preserve them through the width=MAX change.
+    // comrak normalises two-trailing-spaces to backslash form, which is
+    // semantically equivalent and arguably more readable in source.
+    let body = "First line.  \nSecond line, hard-broken from first.\n";
+    let out = format_markdown_canonical(body, 80);
+    assert!(
+        out.contains("First line.\\\n") || out.contains("First line.  \n"),
+        "hard break lost: {out:?}"
+    );
+}
+
+#[test]
+fn default_mode_preserves_hard_line_breaks_verbatim() {
+    // Without --format-markdown, hard breaks pass through byte-for-byte
+    // (we don't round-trip through comrak's formatter).
+    let body = "First line.  \nSecond line, hard-broken from first.\n";
+    let out = reflow_markdown(body, 80);
+    assert!(
+        out.contains("First line.  \n"),
+        "two-trailing-spaces hard break form not preserved verbatim: {out:?}"
+    );
+}
+
+#[test]
+fn canonical_mode_does_not_escape_digit_period_in_continuation_lines() {
+    // Regression: with `render.width = 0`, comrak's `format_commonmark`
+    // preserves source soft breaks and defensively escapes `N.` sequences
+    // (e.g. `404\.`) that land at the start of continuation lines, on the
+    // theory that they could be interpreted as ordered-list markers on
+    // re-parse. The fix is `render.width = usize::MAX`, which collapses
+    // soft breaks so digit-period sequences end up mid-line.
+    let body = indoc! {"
+        Each model is loaded at startup; requests for unloaded models return HTTP
+        404.
+        Apply sigmoid normalization next.
+    "};
+    let out = format_markdown_canonical(body, 80);
+    assert!(
+        out.contains("404."),
+        "output missing literal `404.`: {out:?}"
+    );
+    assert!(
+        !out.contains(r"404\."),
+        "output has defensive escape `404\\.`: {out:?}"
+    );
+}
+
+#[test]
+fn canonical_mode_preserves_rust_intra_doc_shortcut_references() {
+    // Regression: `[`format_source`]` and similar shortcut references
+    // (no `[label]: url` definition in the body) used to be escaped as
+    // `[`format_source`]` by comrak's defensive escape logic, because
+    // the parser treated them as plain bracketed text. The fix uses a
+    // narrow `broken_link_callback` in `protect_reference_form_links`
+    // that resolves intra-doc-like labels to `Link` nodes, so the
+    // protection step can sentinelise their source bytes.
+    let body = indoc! {"
+        1. [`format_source`] finds `///` blocks via
+           [`find_blocks`] and splices bodies back.
+        2. [`reflow_markdown`] parses each block's body.
+    "};
+    let out = format_markdown_canonical(body, 80);
+    for needle in [
+        "[`format_source`]",
+        "[`find_blocks`]",
+        "[`reflow_markdown`]",
+    ] {
+        assert!(
+            out.contains(needle),
+            "intra-doc reference {needle:?} missing from output: {out:?}"
+        );
+    }
+    assert!(
+        !out.contains(r"\["),
+        "defensive bracket escape leaked into output: {out:?}"
+    );
+}
+
+#[test]
+fn intra_doc_callback_does_not_break_task_items() {
+    // The `broken_link_callback` would gobble `[ ]` task markers if it
+    // returned `Some` for them. Narrow filter (empty / `x` / `X` labels)
+    // returns `None`, letting the tasklist extension recognise them.
+    let body = indoc! {"
+        - [ ] First task. With more detail.
+    "};
+    let out = reflow_markdown(body, 0);
+    assert!(
+        out.contains("- [ ] First task."),
+        "task marker lost: {out:?}"
+    );
+    // Continuation indent should be 6 spaces (2 for list padding + 4 for
+    // task item), confirming the parser still recognised the task item.
+    assert!(
+        out.contains("\n      With more detail."),
+        "task item continuation indent wrong: {out:?}"
+    );
+}
+
+#[test]
+fn intra_doc_callback_does_not_break_footnotes() {
+    // The `broken_link_callback` would gobble `[^note]` references if it
+    // returned `Some` for them. Narrow filter (`^...` labels) returns
+    // `None`, letting the footnotes extension recognise them.
+    let body = indoc! {"
+        See[^note] for details.
+
+        [^note]: First sentence. Second sentence.
+    "};
+    let out = format_markdown_canonical(body, 0);
+    // The reference in prose stays as `[^note]` — not the defensive
+    // `\[^note\]` we'd see if the parser failed to recognise it as a
+    // footnote reference.
+    assert!(
+        out.contains("See[^note] for details."),
+        "footnote reference got escaped: {out:?}"
+    );
+    assert!(
+        !out.contains(r"\[^note\]"),
+        "defensive escape leaked into footnote reference: {out:?}"
+    );
+    // The definition survives the canonical pass (comrak may reshape it
+    // — e.g. put the label on its own line — but the content stays).
+    assert!(
+        out.contains("[^note]:"),
+        "footnote definition disappeared: {out:?}"
+    );
+    assert!(
+        out.contains("First sentence.") && out.contains("Second sentence."),
+        "footnote definition content lost: {out:?}"
+    );
+}
+
+#[test]
+fn markdown_pipeline_preserves_exact_trailing_newline_count() {
+    // Regression: conform.nvim's `injected` formatter extracts the markdown
+    // body of Rust doc comments and runs comfort as the markdown formatter
+    // on it. The body ending in `\n\n` corresponds to a trailing empty
+    // `///` line in the source. If we collapse `\n\n` to `\n`, the empty
+    // `///` is silently lost on every save.
+    let body_two_newlines = "Some prose.\n\n[link]: https://example.com\n\n";
+    let out = format_markdown_canonical(body_two_newlines, 80);
+    assert!(
+        out.ends_with("\n\n"),
+        "trailing newline count not preserved: {out:?}"
+    );
+
+    let body_three_newlines = "Some prose.\n\n\n";
+    let out = format_markdown_canonical(body_three_newlines, 80);
+    assert!(
+        out.ends_with("\n\n\n"),
+        "trailing newline count not preserved: {out:?}"
+    );
+}
+
+#[test]
+fn canonical_mode_preserves_trailing_newline_of_input() {
+    // Markdown files: keep trailing newline. Doc-comment bodies: don't add
+    // one.
+    let with_newline = "Some prose.\n";
+    let out = format_markdown_canonical(with_newline, 80);
+    assert!(out.ends_with('\n'), "trailing newline dropped: {out:?}");
+
+    let without_newline = "Some prose.";
+    let out = format_markdown_canonical(without_newline, 80);
+    assert!(!out.ends_with('\n'), "trailing newline added: {out:?}");
+}
+
+// ---------------------------------------------------------------------------
+// Reference-form link protection across canonical (`--format-markdown`)
+// ---------------------------------------------------------------------------
+
+#[test]
+fn canonical_preserves_full_form_reference_link() {
+    // Regression: comrak's `format_commonmark` would otherwise inline
+    // `[text][label]` as `[text](url)` and drop the definition.
+    let body = indoc! {"
+        See [`foo`][foo-impl] for more.
+
+        [foo-impl]: ../../crates/foo.rs
+    "};
+    let out = format_markdown_canonical(body, 80);
+    assert!(
+        out.contains("[`foo`][foo-impl]"),
+        "full-form reference link was inlined: {out}"
+    );
+    assert!(
+        out.contains("[foo-impl]: ../../crates/foo.rs"),
+        "reference definition was dropped: {out}"
+    );
+}
+
+#[test]
+fn canonical_preserves_shortcut_form_reference_link() {
+    let body = indoc! {"
+        See [foo] for more.
+
+        [foo]: https://example.com
+    "};
+    let out = format_markdown_canonical(body, 80);
+    assert!(
+        out.contains("See [foo] for more."),
+        "shortcut form not preserved: {out}"
+    );
+    assert!(
+        out.contains("[foo]: https://example.com"),
+        "definition dropped: {out}"
+    );
+}
+
+#[test]
+fn canonical_still_inlines_actual_inline_links() {
+    // Sanity: inline links are NOT protected (the user wrote them inline,
+    // they stay inline). This is a guard against the protection logic
+    // accidentally over-firing.
+    let body = indoc! {"
+        See [docs](https://example.com) for more.
+    "};
+    let out = format_markdown_canonical(body, 80);
+    assert!(
+        out.contains("[docs](https://example.com)"),
+        "inline link got converted to reference: {out}"
+    );
+}
+
+#[test]
+fn canonical_handles_mixed_inline_and_reference_links() {
+    let body = indoc! {"
+        See [docs](https://example.com) and [`foo`][foo-impl] for more.
+
+        [foo-impl]: ../../crates/foo.rs
+    "};
+    let out = format_markdown_canonical(body, 80);
+    assert!(
+        out.contains("[docs](https://example.com)"),
+        "inline link mangled: {out}"
+    );
+    assert!(
+        out.contains("[`foo`][foo-impl]"),
+        "reference-form link inlined: {out}"
+    );
+    assert!(
+        out.contains("[foo-impl]: ../../crates/foo.rs"),
+        "definition dropped: {out}"
+    );
+}
+
+#[test]
+fn canonical_preserves_user_chosen_labels_with_reference_links_flag() {
+    // The original bug: with BOTH `--format-markdown` and `--reference-links`,
+    // the user's chosen short labels were destroyed. Verify they survive now.
+    let body = indoc! {"
+        See [`verify_file_checksum`][verify-impl] for the impl.
+
+        [verify-impl]: ../../crates/jp_mcp/src/client.rs
+    "};
+    let opts = FormatOptions {
+        max_width: 80,
+        canonical: true,
+        reference_links: true,
+    };
+    let out = format_markdown_with(body, &opts);
+    assert!(
+        out.contains("[verify-impl]: ../../crates/jp_mcp/src/client.rs"),
+        "user's `verify-impl` label was rewritten: {out}"
+    );
+    assert!(
+        out.contains("[`verify_file_checksum`][verify-impl]"),
+        "reference form not preserved: {out}"
+    );
+}
+
+#[test]
+fn canonical_protection_only_affects_resolved_reference_links() {
+    // Bare `[brackets]` with no matching definition aren't reference-form
+    // links — comrak doesn't parse them as Link nodes — so our protection
+    // doesn't touch them. Comrak itself escapes the brackets during
+    // serialisation to disambiguate (a behaviour of `format_commonmark`,
+    // not our protection), so the output has `\[...\]`. The point of this
+    // test is the negative: our protection didn't spuriously stash these.
+    let body = indoc! {"
+        Use [square brackets] in prose freely.
+    "};
+    let out = format_markdown_canonical(body, 80);
+    // No sentinel marker leaked into the output (would start with `XCMFRTLR`).
+    assert!(
+        !out.contains("XCMFRTLR"),
+        "sentinel leaked into output: {out}"
+    );
+    // The visible text "square brackets" survives in some form.
+    assert!(
+        out.contains("square brackets"),
+        "prose content disappeared: {out}"
+    );
+}
+
+#[test]
+fn canonical_protection_ignores_definitions_inside_code_fences() {
+    let body = indoc! {"
+        Real link: [foo].
+
+        ```
+        [example]: not-a-real-def
+        ```
+
+        [foo]: https://example.com
+    "};
+    let out = format_markdown_canonical(body, 80);
+    // The fake def inside the fence stays in the fence.
+    let fence_close = out.rfind("```").unwrap();
+    let example_pos = out.find("[example]: not-a-real-def").unwrap();
+    assert!(
+        example_pos < fence_close,
+        "fake def was extracted out of the fence: {out}"
+    );
+    // The real def survives.
+    assert!(
+        out.contains("[foo]: https://example.com"),
+        "real definition dropped: {out}"
+    );
+}
+
+// ---------------------------------------------------------------------------
+// `--reference-links` (reference-link extraction) mode
+// ---------------------------------------------------------------------------
+
+fn ref_opts(max_width: usize) -> FormatOptions {
+    FormatOptions {
+        max_width,
+        canonical: false,
+        reference_links: true,
+    }
+}
+
+#[test]
+fn reference_links_default_off_preserves_inline_links() {
+    // Without `--reference-links`, inline links pass through unchanged.
+    let body = indoc! {"
+        See [docs](https://example.com) for more.
+    "};
+    let out = reflow_markdown(body, 80);
+    assert!(
+        out.contains("[docs](https://example.com)"),
+        "default mode rewrote the inline link: {out}"
+    );
+}
+
+#[test]
+fn reference_links_converts_inline_to_shortcut_form() {
+    let body = indoc! {"
+        See [docs](https://example.com) for more.
+    "};
+    let out = format_markdown_with(body, &ref_opts(80));
+    assert!(
+        out.contains("See [docs] for more."),
+        "inline link not converted to shortcut form: {out}"
+    );
+    assert!(
+        out.contains("[docs]: https://example.com"),
+        "reference definition not appended: {out}"
+    );
+}
+
+#[test]
+fn reference_links_dedupes_same_url() {
+    // Same URL referenced twice with different text: second link uses full
+    // form referring back to the first's canonical label — only one
+    // definition is emitted.
+    let body = indoc! {"
+        See [docs](https://example.com) and [more docs](https://example.com).
+    "};
+    let out = format_markdown_with(body, &ref_opts(80));
+    assert!(
+        out.contains("[docs]") && out.contains("[more docs][docs]"),
+        "same-URL collision not handled with full-form fallback: {out}"
+    );
+    assert_eq!(
+        out.matches("[docs]: https://example.com").count(),
+        1,
+        "shared URL got more than one definition: {out}"
+    );
+}
+
+#[test]
+fn reference_links_disambiguates_same_text_different_url() {
+    // Same text, different URLs: second link gets a suffixed label.
+    let body = indoc! {"
+        See [docs](https://example.com) and [docs](https://other.com).
+    "};
+    let out = format_markdown_with(body, &ref_opts(80));
+    assert!(
+        out.contains("[docs]: https://example.com"),
+        "first definition missing: {out}"
+    );
+    assert!(
+        out.contains("[docs-2]: https://other.com"),
+        "disambiguated definition missing: {out}"
+    );
+    assert!(
+        out.contains("[docs][docs-2]"),
+        "second link not in full form: {out}"
+    );
+}
+
+#[test]
+fn reference_links_skips_anchor_links() {
+    let body = indoc! {"
+        See [section](#foo) for more.
+    "};
+    let out = format_markdown_with(body, &ref_opts(80));
+    assert!(
+        out.contains("[section](#foo)"),
+        "anchor link should not be converted: {out}"
+    );
+}
+
+#[test]
+fn reference_links_skips_image_links() {
+    let body = indoc! {"
+        See ![diagram](https://example.com/d.png) below.
+    "};
+    let out = format_markdown_with(body, &ref_opts(80));
+    assert!(
+        out.contains("![diagram](https://example.com/d.png)"),
+        "image link should not be converted: {out}"
+    );
+}
+
+#[test]
+fn reference_links_aggregates_pre_existing_definitions() {
+    // Pre-existing scattered definitions should also move to the bottom
+    // and sort alphabetically with the newly converted ones.
+    let body = indoc! {"
+        See [zebra] and [alpha](https://alpha.example).
+
+        [zebra]: https://zebra.example
+    "};
+    let out = format_markdown_with(body, &ref_opts(80));
+    // Both definitions should be at the bottom, in alphabetical order.
+    let alpha_pos = out.find("[alpha]: https://alpha.example").unwrap();
+    let zebra_pos = out.find("[zebra]: https://zebra.example").unwrap();
+    assert!(
+        alpha_pos < zebra_pos,
+        "definitions not sorted alphabetically: {out}"
+    );
+}
+
+#[test]
+fn reference_links_preserves_inline_code_with_link_syntax() {
+    // Inline code containing `[link](url)` syntax must NOT be converted.
+    let body = indoc! {"
+        Use the syntax `[text](url)` to write links.
+    "};
+    let out = format_markdown_with(body, &ref_opts(80));
+    assert!(
+        out.contains("`[text](url)`"),
+        "inline code with link syntax got mangled: {out}"
+    );
+    assert!(
+        !out.contains("[text]: url"),
+        "link inside inline code spuriously generated a definition: {out}"
+    );
+}
+
+#[test]
+fn reference_links_is_idempotent() {
+    let body = indoc! {"
+        See [docs](https://example.com) and [Rust](https://rust-lang.org).
+    "};
+    let once = format_markdown_with(body, &ref_opts(80));
+    let twice = format_markdown_with(&once, &ref_opts(80));
+    assert_eq!(once, twice, "reference-link mode must be idempotent");
+}
+
+#[test]
+fn reference_links_works_with_rust_doc_comments() {
+    // The original motivating example from the user.
+    let src = indoc! {"
+        /// Source language to format.
+        /// With [`Auto`](Language::Auto), per-file detection (extension or
+        /// `--stdin-filename`) determines the format.
+        pub fn f() {}
+    "};
+    let out = format_rust_source_with(src, &ref_opts(80));
+    assert!(
+        out.contains("/// With [`Auto`],"),
+        "link not converted in doc comment: {out}"
+    );
+    assert!(
+        out.contains("/// [`Auto`]: Language::Auto"),
+        "reference definition not at bottom of doc comment: {out}"
+    );
+    assert!(out.contains("pub fn f() {}"));
+}
+
+#[test]
+fn reference_links_composes_with_canonical_mode() {
+    // Both flags enabled: canonical pass runs first (normalising structure),
+    // then reference-link extraction. Both transformations should apply.
+    let body = indoc! {"
+        * See [docs](https://example.com).
+        * Another [item](https://other.com).
+    "};
+    let opts = FormatOptions {
+        max_width: 80,
+        canonical: true,
+        reference_links: true,
+    };
+    let out = format_markdown_with(body, &opts);
+    // Canonical: `*` → `-`.
+    assert!(
+        out.contains("- See [docs]"),
+        "canonical pass didn't normalise list marker: {out}"
+    );
+    // Reference: definitions at the bottom.
+    assert!(
+        out.contains("[docs]: https://example.com") && out.contains("[item]: https://other.com"),
+        "reference-link pass didn't run: {out}"
+    );
+}
+
+#[test]
+fn reference_links_preserves_inline_link_title() {
+    // Regression: `[docs](url "Title")` used to round-trip as
+    // `[docs] + [docs]: url`, silently dropping the title metadata.
+    let body = indoc! {r#"
+        See [docs](https://example.com "Docs Title") for more.
+    "#};
+    let out = format_markdown_with(body, &ref_opts(80));
+    assert!(
+        out.contains("See [docs] for more."),
+        "inline link not converted to shortcut form: {out}"
+    );
+    assert!(
+        out.contains(r#"[docs]: https://example.com "Docs Title""#),
+        "reference definition lost its title: {out}"
+    );
+}
+
+#[test]
+fn reference_links_disambiguates_same_url_with_different_titles() {
+    // Two links pointing at the same URL but carrying different titles
+    // must get distinct definitions — otherwise the title of one is
+    // silently dropped during dedup.
+    let body = indoc! {r#"
+        See [primary](https://example.com "Primary view") and
+        [alternate](https://example.com "Alternate view").
+    "#};
+    let out = format_markdown_with(body, &ref_opts(80));
+    assert!(
+        out.contains(r#"[primary]: https://example.com "Primary view""#),
+        "first definition missing or titleless: {out}"
+    );
+    assert!(
+        out.contains(r#"[alternate]: https://example.com "Alternate view""#),
+        "second definition missing or titleless: {out}"
+    );
+    // Both link sites should use shortcut form (each label was free).
+    assert!(
+        out.contains("[primary]") && out.contains("[alternate]"),
+        "link sites didn't pick up their reference forms: {out}"
+    );
+}
+
+#[test]
+fn reference_links_dedupes_same_url_same_title() {
+    // Same URL AND same title: a single definition, both link sites point
+    // at the same canonical label (full form for the second to preserve
+    // its different link text).
+    let body = indoc! {r#"
+        See [docs](https://example.com "Docs") and
+        [more docs](https://example.com "Docs").
+    "#};
+    let out = format_markdown_with(body, &ref_opts(80));
+    assert_eq!(
+        out.matches(r#"[docs]: https://example.com "Docs""#).count(),
+        1,
+        "shared (url, title) got more than one definition: {out}"
+    );
+    assert!(
+        out.contains("[more docs][docs]"),
+        "second link not in full-form referring back to the first: {out}"
+    );
+}
+
+#[test]
+fn reference_links_preserves_existing_definition_with_title() {
+    // A pre-existing scattered `[foo]: url "title"` definition must come
+    // out the other end with its title intact (and moved to the bottom).
+    let body = indoc! {r#"
+        See [foo] for more.
+
+        [foo]: https://example.com "Foo title"
+    "#};
+    let out = format_markdown_with(body, &ref_opts(80));
+    assert!(
+        out.contains(r#"[foo]: https://example.com "Foo title""#),
+        "existing definition lost its title: {out}"
+    );
+}
+
+#[test]
+fn reference_links_with_titles_is_idempotent() {
+    let body = indoc! {r#"
+        See [docs](https://example.com "D") and [other](https://other.com "O").
+    "#};
+    let once = format_markdown_with(body, &ref_opts(80));
+    let twice = format_markdown_with(&once, &ref_opts(80));
+    assert_eq!(
+        once, twice,
+        "reference-link mode with titles must be idempotent"
+    );
+}
+
+#[test]
+fn reference_links_handles_case_insensitive_label_collisions() {
+    // Regression: CommonMark reference labels match case-insensitively
+    // (§4.7). An existing `[Foo]: /old` must collide with an inline
+    // `[foo](/new)` even though the raw strings differ in case —
+    // otherwise we'd emit two definitions with the same canonical label
+    // and the renderer would resolve the converted shortcut to whichever
+    // came first.
+    let body = indoc! {"
+        See [Foo] and [foo](/new).
+
+        [Foo]: /old
+    "};
+    let out = format_markdown_with(body, &ref_opts(80));
+    assert!(
+        out.contains("[Foo]: /old"),
+        "existing definition lost: {out}"
+    );
+    assert!(
+        out.contains("[foo-2]: /new"),
+        "disambiguated definition for new URL missing: {out}"
+    );
+    assert!(
+        out.contains("[foo][foo-2]"),
+        "new link doesn't reference the disambiguated label: {out}"
+    );
+}
+
+#[test]
+fn reference_links_handles_whitespace_normalized_label_collisions() {
+    // CommonMark §4.7 normalises internal whitespace too: `[foo bar]` and
+    // `[Foo   Bar]` are the same label.
+    let body = indoc! {"
+        See [Foo   Bar] and [foo bar](/new).
+
+        [Foo   Bar]: /old
+    "};
+    let out = format_markdown_with(body, &ref_opts(80));
+    // The new link's URL is different, so it must get a disambiguated
+    // label even though `foo bar` looks free to a raw-string lookup.
+    assert!(
+        out.contains("[foo bar-2]: /new"),
+        "whitespace-collision not disambiguated: {out}"
+    );
+}
+
+#[test]
+fn reference_links_does_not_extract_def_that_interrupts_paragraph() {
+    // Regression: CommonMark forbids reference definitions from
+    // interrupting a paragraph. `Foo\n[bar]: /baz` is one paragraph, and
+    // the `[bar]: /baz` line is visible prose — not a definition. The
+    // line-shape extractor used to take it out anyway and re-emit it
+    // below, silently changing rendered content.
+    let body = "Foo\n[bar]: /baz\n";
+    let out = format_markdown_with(body, &ref_opts(80));
+    assert!(
+        !out.contains("\n\n[bar]: /baz"),
+        "in-paragraph ref-def shape was extracted to a separate block: {out:?}"
+    );
+    assert!(
+        out.contains("[bar]: /baz"),
+        "the [bar]: /baz text disappeared from the output: {out:?}"
+    );
+}
+
+#[test]
+fn reference_links_still_extracts_legitimately_separated_definitions() {
+    // Canary for the fix above: a definition that's NOT inside a paragraph
+    // (separated by a blank line) must still be extracted and consolidated
+    // at the bottom. The paragraph-protection rule has to be specific
+    // enough not to swallow this case.
+    let body = indoc! {"
+        Some prose.
+
+        [foo]: /bar
+    "};
+    let out = format_markdown_with(body, &ref_opts(80));
+    assert!(
+        out.contains("[foo]: /bar"),
+        "legitimate ref-def lost: {out}"
+    );
+}
+
+#[test]
+fn reference_links_skips_definitions_inside_fenced_code() {
+    // A `[label]: url` line inside a fenced code block must NOT be treated
+    // as a reference definition (it's literal example text).
+    let body = indoc! {"
+        Real link: [docs](https://example.com).
+
+        ```
+        [example]: https://not-a-real-def.com
+        ```
+    "};
+    let out = format_markdown_with(body, &ref_opts(80));
+    // The fake def inside the fence should stay where it is.
+    assert!(
+        out.contains("[example]: https://not-a-real-def.com"),
+        "fake definition inside fence got extracted: {out}"
+    );
+    // It should appear inside the fence, not at the bottom.
+    let example_pos = out.find("[example]: https://not-a-real-def.com").unwrap();
+    let fence_close = out.rfind("```").unwrap();
+    assert!(
+        example_pos < fence_close,
+        "fake definition extracted out of fence: {out}"
+    );
+}
+
+#[test]
+fn no_doc_comments_means_byte_identical_output() {
+    let src = indoc! {"
+        fn main() {
+            // ordinary comment
+            let x = 42;
+            println!(\"{x}\");
+        }
+    "};
+    assert_eq!(format_source(src, DEFAULT_MAX_WIDTH), src);
+}
+
+#[test]
+fn read_file_error_carries_the_path() {
+    let err = Error::ReadFile {
+        path: PathBuf::from("/tmp/nope.rs"),
+        source: io::Error::new(io::ErrorKind::PermissionDenied, "denied"),
+    };
+    let msg = err.to_string();
+    assert!(msg.contains("/tmp/nope.rs"), "missing path: {msg}");
+    assert!(msg.contains("denied"), "missing source: {msg}");
+}
+
+#[test]
+fn write_file_error_carries_the_path() {
+    let err = Error::WriteFile {
+        path: PathBuf::from("/tmp/nope.rs"),
+        source: io::Error::new(io::ErrorKind::PermissionDenied, "denied"),
+    };
+    let msg = err.to_string();
+    assert!(msg.contains("/tmp/nope.rs"), "missing path: {msg}");
+    assert!(msg.contains("denied"), "missing source: {msg}");
+}
diff --git a/crates/contrib/comfort/src/run.rs b/crates/contrib/comfort/src/run.rs
new file mode 100644
index 00000000..1950b390
--- /dev/null
+++ b/crates/contrib/comfort/src/run.rs
@@ -0,0 +1,188 @@
+//! Orchestration layer: parses CLI intent, walks filesystem, dispatches to the
+//! pure format pipeline, handles `--check` diffing and exit codes.
+//!
+//! This is the imperative shell.
+//! The functional core lives in [`format`] and [`extract`].
+//!
+//! [`extract`]: super::extract
+//! [`format`]: super::format
+
+use std::{
+    io::{self, IsTerminal, Read, Write},
+    path::{Path, PathBuf},
+};
+
+use similar::{ChangeTag, TextDiff};
+
+use crate::{
+    Error,
+    cli::{Cli, Format, Invocation, Language},
+    format::{FormatOptions, format_markdown_with, format_rust_source_with},
+    walk::{expand_path, workspace_files},
+};
+
+/// Top-level entry point.
+/// Returns an [`Error`] for I/O failures; returns [`Error::CheckFailed`] when
+/// `--check` finds drift.
+pub fn run(cli: &Cli, invocation: Invocation) -> Result<(), Error> {
+    // Source selection. The intent ladder:
+    //   1. Workspace mode (explicit `--workspace`, or `-p`/`--exclude`
+    //      restricting which packages to walk).
+    //   2. Explicit paths process those paths.
+    //   3. No paths + cargo invocation: workspace (all packages).
+    //   4. No paths + direct invocation: stdin/stdout.
+    let opts = FormatOptions {
+        max_width: cli.max_width,
+        canonical: cli.format_markdown,
+        reference_links: cli.reference_links,
+    };
+    let workspace_mode = cli.workspace || !cli.packages.is_empty() || !cli.exclude.is_empty();
+    if workspace_mode {
+        let files = workspace_files(&cli.packages, &cli.exclude, cli.language)?;
+        return run_files(files, cli.language, cli.check, cli.list_changed, &opts);
+    }
+    if !cli.paths.is_empty() {
+        let mut files = Vec::new();
+        for path in &cli.paths {
+            files.extend(expand_path(path, cli.language)?);
+        }
+        return run_files(files, cli.language, cli.check, cli.list_changed, &opts);
+    }
+    if invocation == Invocation::Cargo {
+        let files = workspace_files(&[], &[], cli.language)?;
+        return run_files(files, cli.language, cli.check, cli.list_changed, &opts);
+    }
+
+    // Default for direct invocation: stdin → stdout (or stdin → check-diff).
+    if io::stdin().is_terminal() {
+        let mut stderr = io::stderr().lock();
+        writeln!(
+            stderr,
+            "comfort: no input. Pass paths, use --workspace, or pipe source on stdin."
+        )?;
+        return Ok(());
+    }
+    run_stdin(
+        cli.language,
+        cli.check,
+        cli.list_changed,
+        cli.stdin_filename.as_deref(),
+        &opts,
+    )
+}
+
+fn run_stdin(
+    language: Language,
+    check: bool,
+    list_changed: bool,
+    stdin_filename: Option<&Path>,
+    opts: &FormatOptions,
+) -> Result<(), Error> {
+    let mut buf = String::new();
+    io::stdin().read_to_string(&mut buf)?;
+
+    let format = language.resolve(stdin_filename);
+    let formatted = format_for(&buf, format, opts);
+
+    let label = stdin_filename.unwrap_or(Path::new("<stdin>"));
+
+    if check {
+        if formatted != buf {
+            if list_changed {
+                writeln!(io::stdout().lock(), "{}", label.display())?;
+            } else {
+                print_diff(label, &buf, &formatted)?;
+            }
+            return Err(Error::CheckFailed(1));
+        }
+        return Ok(());
+    }
+
+    // Write mode + `--list-changed`: announce the label on stderr so it
+    // doesn't corrupt the formatted-content stream on stdout. (In check
+    // mode there's no payload on stdout, so the label goes there to match
+    // the file-walk path.)
+    if list_changed && formatted != buf {
+        writeln!(io::stderr().lock(), "{}", label.display())?;
+    }
+
+    let mut stdout = io::stdout().lock();
+    stdout.write_all(formatted.as_bytes())?;
+    Ok(())
+}
+
+fn run_files(
+    files: Vec<PathBuf>,
+    language: Language,
+    check: bool,
+    list_changed: bool,
+    opts: &FormatOptions,
+) -> Result<(), Error> {
+    let mut changed = 0_usize;
+    let mut stdout = io::stdout().lock();
+
+    for path in files {
+        let source = std::fs::read_to_string(&path).map_err(|source| Error::ReadFile {
+            path: path.clone(),
+            source,
+        })?;
+        let format = language.resolve(Some(&path));
+        let formatted = format_for(&source, format, opts);
+        if formatted == source {
+            continue;
+        }
+
+        changed += 1;
+        if list_changed {
+            writeln!(stdout, "{}", path.display())?;
+        } else if check {
+            print_diff(&path, &source, &formatted)?;
+        }
+        if !check {
+            std::fs::write(&path, formatted).map_err(|source| Error::WriteFile {
+                path: path.clone(),
+                source,
+            })?;
+        }
+    }
+
+    if check && changed > 0 {
+        return Err(Error::CheckFailed(changed));
+    }
+    Ok(())
+}
+
+/// Dispatch to the right pipeline for the resolved format.
+/// Both optional transformations (`--format-markdown` for structural
+/// canonicalisation, `--reference-links` for link extraction) compose
+/// orthogonally on top of the always-on sembr reflow.
+fn format_for(source: &str, format: Format, opts: &FormatOptions) -> String {
+    match format {
+        Format::Rust => format_rust_source_with(source, opts),
+        Format::Markdown => format_markdown_with(source, opts),
+    }
+}
+
+fn print_diff(label: &Path, old: &str, new: &str) -> Result<(), io::Error> {
+    let diff = TextDiff::from_lines(old, new);
+    let mut out = io::stdout().lock();
+
+    writeln!(out, "--- {}", label.display())?;
+    writeln!(out, "+++ {} (formatted)", label.display())?;
+
+    for hunk in diff.unified_diff().iter_hunks() {
+        writeln!(out, "{}", hunk.header())?;
+        for change in hunk.iter_changes() {
+            let sigil = match change.tag() {
+                ChangeTag::Delete => '-',
+                ChangeTag::Insert => '+',
+                ChangeTag::Equal => ' ',
+            };
+            write!(out, "{sigil}{}", change.value())?;
+            if !change.value().ends_with('\n') {
+                writeln!(out)?;
+            }
+        }
+    }
+    Ok(())
+}
diff --git a/crates/contrib/comfort/src/sentence.rs b/crates/contrib/comfort/src/sentence.rs
new file mode 100644
index 00000000..464701ab
--- /dev/null
+++ b/crates/contrib/comfort/src/sentence.rs
@@ -0,0 +1,302 @@
+//! Sentence segmentation with abbreviation-aware merging.
+//!
+//! Adapted from snapper-fmt (<https://github.com/TurtleTech-ehf/snapper>),
+//! MIT-licensed, Copyright (c) 2026 Rohit Goswami.
+//!
+//! Reduced to the English-only subset comfort actually needs and inlined to
+//! avoid the upstream dependency.
+//! Logic is otherwise unchanged: protect inline tokens (URLs, code spans,
+//! links) with placeholders, run UAX \#29 sentence segmentation, then merge
+//! false splits caused by abbreviations and quoted punctuation.
+
+use std::{ops::Range, sync::LazyLock};
+
+use regex::Regex;
+use unicode_segmentation::UnicodeSegmentation;
+
+/// English abbreviations whose trailing period must not be treated as a
+/// sentence boundary.
+/// Kept short and code-comment-focused.
+static EN_ABBREVIATIONS: &[&str] = &[
+    // Titles
+    "Mr", "Mrs", "Ms", "Dr", "Prof", "Sr", "Jr", "St", "Rev", "Gen", "Gov", "Sgt", "Cpl", "Pvt",
+    "Capt", "Lt", "Col", "Maj", "Cmdr", "Adm", // Academic / scientific
+    "Fig", "Figs", "Eq", "Eqs", "Ref", "Refs", "Tab", "Sec", "Ch", "Vol", "No", "Nos", "Ed", "Eds",
+    "Trans", "Dept", "Thm", "Lem", "Prop", "Def", "Cor", "Rem", "Ex", // Latin
+    "al", "approx", "ca", "cf", "etc", "et", "ibid", "viz", // Common
+    "vs", "misc", "est", "govt", "dept", "univ", "inc", "corp", "ltd", "Ave", "Blvd", "Rd", "Jan",
+    "Feb", "Mar", "Apr", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "Mon", "Tue", "Wed",
+    "Thu", "Fri", "Sat", "Sun", "pp", "pg", "pt", "pts", // Single letters (initials)
+    "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S",
+    "T", "U", "V", "W", "X", "Y", "Z",
+];
+
+/// Multi-word abbreviations where the period falls inside, e.g. `e.g.`, `i.e.`,
+/// `a.m.`, `p.m.`, `v.s.`.
+static EN_MULTI_ABBREVS: &[&str] = &["e.g", "i.e", "a.m", "p.m", "v.s"];
+
+/// Inline tokens that must not be broken across sentences.
+/// Replaced with placeholders before segmentation, restored after.
+static INLINE_TOKEN_RE: LazyLock<Regex> = LazyLock::new(|| {
+    Regex::new(
+        &[
+            r"\[\[[^\]]*\]\]",                  // Org links: [[url]]
+            r"\[\[[^\]]*\]\[[^\]]*\]\]",        // Org links with description
+            r"\[[^\]]+\]\([^)]+\)",             // Markdown inline links
+            r"!\[[^\]]*\]\([^)]+\)",            // Markdown images
+            r"\$[^$]+\$",                       // Inline math
+            r"\\([a-zA-Z]+)\{[^}]*\}",          // LaTeX commands
+            r"~[^~]+~",                         // Org inline code
+            r"=[^=]+=",                         // Org verbatim
+            r"`[^`]+`",                         // Markdown inline code
+            r"\*\*[^*]+\*\*",                   // Markdown bold: **text**
+            r"~~[^~]+~~",                       // Markdown strikethrough: ~~text~~
+            r#"https?://\S+[^.\s!?,;:)\]'""]"#, // URLs (don't swallow trailing punct)
+            r"file:\S+",                        // file:// links
+        ]
+        .join("|"),
+    )
+    .expect("valid inline-token regex")
+});
+
+/// Punctuation followed by closing quote/paren at the end of a segment.
+/// Used to detect false splits like `He said "wow!"` + `and left.`.
+static QUOTED_PUNCT_END_RE: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new(r#"[.!?]["')\]]+\s*$"#).expect("valid quoted-punct regex"));
+
+/// Compiled regex matching a single-token abbreviation immediately before a
+/// trailing period.
+/// Anchored to end of segment.
+static ABBREV_RE: LazyLock<Regex> = LazyLock::new(|| {
+    let alts = EN_ABBREVIATIONS.join("|");
+    let pattern = format!(r#"(?:^|[\s"'`(\[])(?:{alts})$"#);
+    Regex::new(&pattern).expect("valid abbreviation regex")
+});
+
+/// Compiled regex matching a multi-word abbreviation immediately before a
+/// trailing period.
+static MULTI_ABBREV_RE: LazyLock<Regex> = LazyLock::new(|| {
+    let alts: Vec<String> = EN_MULTI_ABBREVS.iter().map(|a| regex::escape(a)).collect();
+    let pattern = format!(r"(?:^|\s)(?:{})$", alts.join("|"));
+    Regex::new(&pattern).expect("valid multi-abbreviation regex")
+});
+
+/// Split a prose paragraph into individual sentences, respecting common
+/// abbreviations and inline-token boundaries.
+///
+/// `atomic_ranges` are byte ranges in `text` that must be treated as
+/// indivisible by sentence segmentation: typically markdown inline spans
+/// (`Emph`, `Strong`, `Strikethrough`, `Code`, `Link`, etc.) whose byte extents
+/// come from the AST walker in [`format`].
+/// Pass `&[]` for the standalone path; in that case only `INLINE_TOKEN_RE`
+/// regex protection applies.
+///
+/// Ranges that overlap with earlier ones (or with regex matches in the same
+/// position) are dropped; the first match wins.
+///
+/// [`format`]: crate::format
+#[must_use]
+pub fn split_sentences(text: &str, atomic_ranges: &[Range<usize>]) -> Vec<String> {
+    // Trim and adjust caller-provided ranges to the trimmed slice. Atomic
+    // ranges typically arrive aligned to `text` exactly (the AST walker
+    // computes them from sourcepos relative to the paragraph's start),
+    // but the trim is defensive.
+    let leading = text.len() - text.trim_start().len();
+    let trimmed = text.trim();
+    if trimmed.is_empty() {
+        return Vec::new();
+    }
+
+    // Gather every protected span: caller-provided atomic ranges first,
+    // then regex matches for the patterns we can't reliably get from the
+    // AST (bare URLs, file:// links, org-mode tokens, etc.). Dropped if
+    // out-of-bounds or not at char boundaries.
+    let mut protected: Vec<Range<usize>> = Vec::new();
+    for r in atomic_ranges {
+        let Some(start) = r.start.checked_sub(leading) else {
+            continue;
+        };
+        let Some(end) = r.end.checked_sub(leading) else {
+            continue;
+        };
+        if start < end
+            && end <= trimmed.len()
+            && trimmed.is_char_boundary(start)
+            && trimmed.is_char_boundary(end)
+        {
+            protected.push(start..end);
+        }
+    }
+    for m in INLINE_TOKEN_RE.find_iter(trimmed) {
+        protected.push(m.start()..m.end());
+    }
+    // Sort by start; drop ranges that overlap with an earlier one
+    // (earlier always wins).
+    protected.sort_by_key(|r| r.start);
+    let mut non_overlapping: Vec<Range<usize>> = Vec::new();
+    let mut max_end = 0;
+    for r in protected {
+        if r.start >= max_end {
+            max_end = r.end;
+            non_overlapping.push(r);
+        }
+    }
+
+    // Substitute placeholders in a single forward pass. Placeholders use
+    // NUL to avoid colliding with any normal text content.
+    //
+    // The atomic content goes through `fold_line_breaks` first: a span
+    // whose source crosses a line boundary (e.g. an italic that wraps
+    // across two markdown lines with a continuation indent) would
+    // otherwise leak the embedded `\n  ` into the placeholder. textwrap
+    // treats `\n` as a forced break, and the downstream container
+    // prefix step would then add its own continuation indent on top of
+    // the preserved source indent — producing visibly over-indented
+    // output. Folding line breaks to a single space matches CommonMark's
+    // rendering rule for inline spans.
+    let mut placeholders: Vec<String> = Vec::new();
+    let mut substituted = String::with_capacity(trimmed.len());
+    let mut cursor = 0;
+    for r in &non_overlapping {
+        substituted.push_str(&trimmed[cursor..r.start]);
+        let original = fold_line_breaks(&trimmed[r.clone()]);
+        let idx = placeholders.len();
+        substituted.push_str(&format!("\x00PH{idx}\x00"));
+        placeholders.push(original);
+        cursor = r.end;
+    }
+    substituted.push_str(&trimmed[cursor..]);
+
+    // Collapse runs of whitespace (newlines, tabs, multiple spaces) into a
+    // single space. Markdown renders soft line breaks as spaces; if we skip
+    // this step, embedded `\n` from the source comes through into each
+    // sentence and breaks textwrap's notion of where lines start. Safe to
+    // run after placeholder substitution because placeholders
+    // (`\x00PH<n>\x00`) contain no whitespace.
+    let normalized = collapse_whitespace(&substituted);
+
+    let raw_segments: Vec<&str> = normalized.unicode_sentences().collect();
+    if raw_segments.is_empty() {
+        return vec![trimmed.to_owned()];
+    }
+
+    let merged = merge_abbreviation_splits(&raw_segments);
+    let merged = merge_quoted_punct_splits(merged);
+
+    merged
+        .into_iter()
+        .map(|s| restore_placeholders(s.trim(), &placeholders))
+        .filter(|s| !s.is_empty())
+        .collect()
+}
+
+/// Replace any newline (CR or LF) followed by horizontal whitespace with a
+/// single space.
+/// Multi-space runs that don't include a newline are left alone (matching
+/// CommonMark's preservation of literal spaces in inline code, and avoiding
+/// surprising changes elsewhere).
+///
+/// Used to fold the contents of atomic spans (emphasis, inline code, links)
+/// that happen to cross a source-line boundary before they're stored as
+/// placeholders; without this, textwrap would later treat the embedded ` \n  `
+/// as a forced break and the container-prefix step would double up the
+/// continuation indent.
+fn fold_line_breaks(s: &str) -> String {
+    let mut out = String::with_capacity(s.len());
+    let mut chars = s.chars().peekable();
+    while let Some(c) = chars.next() {
+        if c == '\n' || c == '\r' {
+            out.push(' ');
+            while chars.peek().is_some_and(|next| matches!(*next, ' ' | '\t')) {
+                chars.next();
+            }
+        } else {
+            out.push(c);
+        }
+    }
+    out
+}
+
+/// Collapse every run of Unicode whitespace into a single ASCII space.
+/// Used to normalise markdown paragraph content (soft line breaks, indent on
+/// continuation lines, accidental double spaces) before sentence segmentation.
+fn collapse_whitespace(s: &str) -> String {
+    let mut out = String::with_capacity(s.len());
+    let mut in_ws = false;
+    for c in s.chars() {
+        if c.is_whitespace() {
+            if !in_ws {
+                out.push(' ');
+                in_ws = true;
+            }
+        } else {
+            out.push(c);
+            in_ws = false;
+        }
+    }
+    out.trim().to_owned()
+}
+
+fn restore_placeholders(s: &str, placeholders: &[String]) -> String {
+    let mut restored = s.to_owned();
+    for (i, original) in placeholders.iter().enumerate() {
+        let ph = format!("\x00PH{i}\x00");
+        restored = restored.replace(&ph, original);
+    }
+    restored
+}
+
+/// Re-join consecutive segments when the earlier one ends in a known
+/// abbreviation; UAX \#29 doesn't know about these and false-splits.
+fn merge_abbreviation_splits(segments: &[&str]) -> Vec<String> {
+    let mut result: Vec<String> = Vec::with_capacity(segments.len());
+    for &segment in segments {
+        let merge = result
+            .last()
+            .is_some_and(|prev| is_abbreviation_ending(prev));
+        if merge {
+            result.last_mut().unwrap().push_str(segment);
+        } else {
+            result.push(segment.to_owned());
+        }
+    }
+    result
+}
+
+/// Re-join when a segment ends with sentence punctuation inside closing
+/// quotes/parens AND the next segment starts with a lowercase letter, meaning
+/// the apparent break is actually mid-sentence.
+/// E.g.
+/// `He said "wow!" and left.` is one sentence, not two.
+fn merge_quoted_punct_splits(segments: Vec<String>) -> Vec<String> {
+    let mut result: Vec<String> = Vec::with_capacity(segments.len());
+    for segment in segments {
+        let merge = result.last().is_some_and(|prev| {
+            QUOTED_PUNCT_END_RE.is_match(prev.trim_end())
+                && segment
+                    .trim_start()
+                    .chars()
+                    .next()
+                    .is_some_and(char::is_lowercase)
+        });
+        if merge {
+            result.last_mut().unwrap().push_str(&segment);
+        } else {
+            result.push(segment);
+        }
+    }
+    result
+}
+
+fn is_abbreviation_ending(s: &str) -> bool {
+    let trimmed = s.trim_end();
+    if !trimmed.ends_with('.') {
+        return false;
+    }
+    let before_dot = &trimmed[..trimmed.len() - 1];
+    ABBREV_RE.is_match(before_dot) || MULTI_ABBREV_RE.is_match(before_dot)
+}
+
+#[cfg(test)]
+#[path = "sentence_tests.rs"]
+mod tests;
diff --git a/crates/contrib/comfort/src/sentence_tests.rs b/crates/contrib/comfort/src/sentence_tests.rs
new file mode 100644
index 00000000..73cc4bf6
--- /dev/null
+++ b/crates/contrib/comfort/src/sentence_tests.rs
@@ -0,0 +1,267 @@
+//! Test suite ported from snapper-fmt's `sentence/unicode.rs`, MIT-licensed,
+//! Copyright (c) 2026 Rohit Goswami.
+//! Verifies that comfort's inlined English splitter behaves identically to the
+//! upstream English configuration.
+
+use pretty_assertions::assert_eq;
+
+use super::split_sentences;
+
+fn split(text: &str) -> Vec<String> {
+    split_sentences(text, &[])
+}
+
+fn split_with_atomic(text: &str, atomic_ranges: &[std::ops::Range<usize>]) -> Vec<String> {
+    split_sentences(text, atomic_ranges)
+}
+
+#[test]
+fn simple_sentences() {
+    assert_eq!(
+        split("Hello world. This is a test. Another sentence here."),
+        vec!["Hello world.", "This is a test.", "Another sentence here."]
+    );
+}
+
+#[test]
+fn abbreviation_dr() {
+    assert_eq!(split("Dr. Smith went home. He was tired."), vec![
+        "Dr. Smith went home.",
+        "He was tired."
+    ]);
+}
+
+#[test]
+fn abbreviation_eg() {
+    assert_eq!(
+        split("Use a formatter, e.g. snapper. It works well."),
+        vec!["Use a formatter, e.g. snapper.", "It works well."]
+    );
+}
+
+#[test]
+fn abbreviation_fig() {
+    assert_eq!(
+        split("See Fig. 3 for details. The results are clear."),
+        vec!["See Fig. 3 for details.", "The results are clear."]
+    );
+}
+
+#[test]
+fn empty_input() {
+    assert_eq!(split(""), Vec::<String>::new());
+}
+
+#[test]
+fn single_sentence() {
+    assert_eq!(split("Just one sentence."), vec!["Just one sentence."]);
+}
+
+#[test]
+fn question_and_exclamation() {
+    assert_eq!(split("Is this working? Yes! It is."), vec![
+        "Is this working?",
+        "Yes!",
+        "It is."
+    ]);
+}
+
+#[test]
+fn no_trailing_period() {
+    assert_eq!(split("First sentence. Second without period"), vec![
+        "First sentence.",
+        "Second without period"
+    ]);
+}
+
+#[test]
+fn inline_org_link_preserved() {
+    assert_eq!(
+        split("See [[https://example.com][Ex. Site]] for details. Then continue."),
+        vec![
+            "See [[https://example.com][Ex. Site]] for details.",
+            "Then continue."
+        ]
+    );
+}
+
+#[test]
+fn inline_math_preserved() {
+    assert_eq!(split("The value $x = 3.14$ matters. Next sentence."), vec![
+        "The value $x = 3.14$ matters.",
+        "Next sentence."
+    ]);
+}
+
+#[test]
+fn inline_markdown_link_preserved() {
+    assert_eq!(
+        split("Visit [Example Inc.](https://example.com) now. Then read more."),
+        vec![
+            "Visit [Example Inc.](https://example.com) now.",
+            "Then read more."
+        ]
+    );
+}
+
+#[test]
+fn bold_span_with_period_does_not_split_mid_span() {
+    // Regression: `**Heading.** Body.` used to split at the period inside
+    // the bold span, stranding `**` on the next line.
+    assert_eq!(split("**Heading.** Body sentence here."), vec![
+        "**Heading.** Body sentence here."
+    ]);
+}
+
+#[test]
+fn bold_span_with_internal_period_then_real_sentence_break() {
+    // The period inside the bold span doesn't break, but the period
+    // outside it still does.
+    assert_eq!(split("**Title.** First sentence. Second sentence."), vec![
+        "**Title.** First sentence.",
+        "Second sentence.",
+    ]);
+}
+
+#[test]
+fn atomic_range_protects_explicit_span() {
+    // The caller (format.rs) marks the bold span as atomic via byte range.
+    // The splitter must not break inside it, even though it contains a
+    // sentence-terminator period.
+    let text = "**Heading.** Body sentence here.";
+    let bold = 0..text.find("** B").unwrap() + 2; // covers `**Heading.**`
+    assert_eq!(split_with_atomic(text, &[bold]), vec![
+        "**Heading.** Body sentence here."
+    ]);
+}
+
+#[test]
+fn atomic_range_does_not_swallow_following_sentence_break() {
+    let text = "**Title.** First. Second.";
+    let bold = 0..text.find("** F").unwrap() + 2;
+    assert_eq!(split_with_atomic(text, &[bold]), vec![
+        "**Title.** First.",
+        "Second.",
+    ]);
+}
+
+#[test]
+fn atomic_range_overlapping_a_regex_match_dedupes_to_first() {
+    // `**Heading.**` is matched by both the caller's AST atomic range AND
+    // the bold-regex fallback. The caller's range wins; the regex match
+    // gets dropped as overlapping.
+    let text = "**Heading.** Body.";
+    let bold = 0..12; // `**Heading.**`
+    let out = split_with_atomic(text, &[bold]);
+    assert_eq!(out, vec!["**Heading.** Body."]);
+}
+
+#[test]
+#[allow(
+    clippy::reversed_empty_ranges,
+    reason = "testing malformed input on purpose"
+)]
+fn atomic_range_out_of_bounds_is_ignored() {
+    // Defensive: malformed ranges shouldn't panic.
+    let text = "Hello world.";
+    let bogus = vec![100..200, 5..3];
+    let out = split_with_atomic(text, &bogus);
+    assert_eq!(out, vec!["Hello world."]);
+}
+
+#[test]
+fn strikethrough_with_period_is_preserved() {
+    assert_eq!(split("~~obsolete.~~ Still here."), vec![
+        "~~obsolete.~~ Still here."
+    ]);
+}
+
+#[test]
+fn inline_code_preserved() {
+    assert_eq!(split("Use `std.io.Read` for input. Then process."), vec![
+        "Use `std.io.Read` for input.",
+        "Then process."
+    ]);
+}
+
+#[test]
+fn quoted_exclamation_no_false_split() {
+    assert_eq!(split(r#"He said "wow!" and left. She agreed."#), vec![
+        r#"He said "wow!" and left."#,
+        "She agreed."
+    ]);
+}
+
+#[test]
+fn paren_exclamation_no_false_split() {
+    assert_eq!(
+        split("He replied (with emphasis!) loudly. She agreed."),
+        vec!["He replied (with emphasis!) loudly.", "She agreed."]
+    );
+}
+
+#[test]
+fn paren_question_no_false_split() {
+    assert_eq!(
+        split("The answer (really?) surprised them. Next sentence."),
+        vec!["The answer (really?) surprised them.", "Next sentence."]
+    );
+}
+
+#[test]
+fn url_trailing_period_not_swallowed() {
+    assert_eq!(
+        split("Visit https://example.com/path. Then read more."),
+        vec!["Visit https://example.com/path.", "Then read more."]
+    );
+}
+
+#[test]
+fn url_with_query_trailing_period() {
+    assert_eq!(
+        split("See https://example.com/path?q=1&r=2. Next sentence."),
+        vec!["See https://example.com/path?q=1&r=2.", "Next sentence."]
+    );
+}
+
+#[test]
+fn ellipsis_splits() {
+    assert_eq!(split("Sentence one... Sentence two."), vec![
+        "Sentence one...",
+        "Sentence two."
+    ]);
+}
+
+#[test]
+fn soft_line_breaks_are_collapsed_to_spaces() {
+    // The text comes in with embedded newlines (markdown soft breaks).
+    // Each output sentence must be one logical line — no `\n` leakage.
+    let out = split("If foo, that\ntool is included. This\nprevents a problem.");
+    assert_eq!(out, vec![
+        "If foo, that tool is included.",
+        "This prevents a problem.",
+    ]);
+}
+
+#[test]
+fn runs_of_whitespace_collapse_to_one_space() {
+    let out = split("First  sentence.    Second\n\nsentence.");
+    assert_eq!(out, vec!["First sentence.", "Second sentence."]);
+}
+
+#[test]
+fn inline_code_internal_whitespace_is_preserved_through_normalisation() {
+    // The two spaces inside the backticks survive the whitespace collapse
+    // because the inline code span is placeholdered first.
+    let out = split("Use `foo  bar` for this.");
+    assert_eq!(out, vec!["Use `foo  bar` for this."]);
+}
+
+#[test]
+fn quoted_period_end_of_sentence() {
+    // "done." followed by uppercase Start is a real sentence boundary.
+    assert_eq!(split(r#"End of quote: "done." Start again."#), vec![
+        r#"End of quote: "done.""#,
+        "Start again."
+    ]);
+}
diff --git a/crates/contrib/comfort/src/walk.rs b/crates/contrib/comfort/src/walk.rs
new file mode 100644
index 00000000..f7676ba2
--- /dev/null
+++ b/crates/contrib/comfort/src/walk.rs
@@ -0,0 +1,129 @@
+//! File discovery for workspace and path-based invocations.
+
+use std::path::{Path, PathBuf};
+
+use cargo_metadata::{MetadataCommand, Package};
+use ignore::WalkBuilder;
+
+use crate::{Error, cli::Language};
+
+/// Discover files inside the current cargo workspace, honoring `.gitignore` and
+/// friends, filtering by `language`.
+/// Returns paths in walker order.
+///
+/// `include` and `exclude` further filter the workspace by package name.
+/// When both are empty, every workspace package is walked.
+/// When `include` is non-empty, only those packages are walked.
+/// `exclude` always removes packages from the resulting set.
+/// Either list having an unknown name produces [`Error::UnknownPackage`].
+pub fn workspace_files(
+    include: &[String],
+    exclude: &[String],
+    language: Language,
+) -> Result<Vec<PathBuf>, Error> {
+    let metadata = MetadataCommand::new().no_deps().exec()?;
+
+    if include.is_empty() && exclude.is_empty() {
+        return walk_files(metadata.workspace_root.as_std_path(), language);
+    }
+
+    let workspace_packages = metadata.workspace_packages();
+    let available: Vec<&str> = workspace_packages.iter().map(|p| p.name.as_str()).collect();
+
+    validate_package_names(&available, include)?;
+    validate_package_names(&available, exclude)?;
+
+    let selected = select_packages(&workspace_packages, include, exclude);
+
+    let mut files = Vec::new();
+    for pkg in selected {
+        let Some(dir) = pkg.manifest_path.parent() else {
+            continue;
+        };
+        files.extend(walk_files(dir.as_std_path(), language)?);
+    }
+    Ok(files)
+}
+
+/// Walk a single directory or accept a single file path.
+/// Files are returned as-is (even if their extension doesn't match `language`)
+/// — the caller asked for them by name.
+/// Directories are walked, respecting `.gitignore`, and filtered by `language`.
+/// Returns [`Error::Walk`] for walker errors (unreadable directory, symlink
+/// loop, etc.) so a `--check --workspace` run can't silently exit 0 without
+/// having inspected every file it was supposed to cover.
+pub fn expand_path(input: &Path, language: Language) -> Result<Vec<PathBuf>, Error> {
+    if input.is_dir() {
+        walk_files(input, language)
+    } else {
+        Ok(vec![input.to_path_buf()])
+    }
+}
+
+fn walk_files(root: &Path, language: Language) -> Result<Vec<PathBuf>, Error> {
+    let mut out = Vec::new();
+    for entry in WalkBuilder::new(root).standard_filters(true).build() {
+        let entry = entry?;
+        if !entry.file_type().is_some_and(|t| t.is_file()) {
+            continue;
+        }
+        let path = entry.into_path();
+        if matches_language(&path, language) {
+            out.push(path);
+        }
+    }
+    Ok(out)
+}
+
+/// True if a discovered file's extension falls inside the set selected by the
+/// given language.
+/// With [`Language::Auto`], both Rust and Markdown extensions are included;
+/// with an explicit language, only that one's.
+fn matches_language(path: &Path, language: Language) -> bool {
+    let ext = path.extension().and_then(|e| e.to_str());
+    matches!(
+        (language, ext),
+        (Language::Auto, Some("rs" | "md" | "markdown"))
+            | (Language::Rust, Some("rs"))
+            | (Language::Markdown, Some("md" | "markdown"))
+    )
+}
+
+/// Apply include/exclude filters to a list of workspace packages.
+fn select_packages<'a>(
+    packages: &'a [&'a Package],
+    include: &[String],
+    exclude: &[String],
+) -> Vec<&'a Package> {
+    packages
+        .iter()
+        .filter(|p| should_include(p.name.as_str(), include, exclude))
+        .copied()
+        .collect()
+}
+
+/// Returns true if a package with the given name should be included given the
+/// user's `-p`/`--exclude` selection.
+/// Pure; extracted so the resolution logic can be tested without constructing
+/// `cargo_metadata` types.
+fn should_include(name: &str, include: &[String], exclude: &[String]) -> bool {
+    let included = include.is_empty() || include.iter().any(|n| n == name);
+    let excluded = exclude.iter().any(|n| n == name);
+    included && !excluded
+}
+
+/// Confirm every name in `names` matches some entry in `available`.
+/// Returns [`Error::UnknownPackage`] for the first mismatch — fail-fast on
+/// typos.
+fn validate_package_names(available: &[&str], names: &[String]) -> Result<(), Error> {
+    for name in names {
+        if !available.iter().any(|a| a == name) {
+            return Err(Error::UnknownPackage(name.clone()));
+        }
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+#[path = "walk_tests.rs"]
+mod tests;
diff --git a/crates/contrib/comfort/src/walk_tests.rs b/crates/contrib/comfort/src/walk_tests.rs
new file mode 100644
index 00000000..dcb6d64f
--- /dev/null
+++ b/crates/contrib/comfort/src/walk_tests.rs
@@ -0,0 +1,102 @@
+//! Tests for the pure parts of workspace walking: the include/exclude predicate
+//! and the unknown-package validation.
+//! The actual file walking and `cargo_metadata` invocation are covered by
+//! end-to-end integration tests via the binary, not unit-tested here.
+
+use std::path::Path;
+
+use pretty_assertions::assert_eq;
+
+use super::{matches_language, should_include, validate_package_names};
+use crate::{Error, cli::Language};
+
+fn names(vs: &[&str]) -> Vec<String> {
+    vs.iter().map(|s| (*s).to_owned()).collect()
+}
+
+#[test]
+fn empty_include_empty_exclude_keeps_every_package() {
+    assert!(should_include("foo", &[], &[]));
+    assert!(should_include("bar", &[], &[]));
+}
+
+#[test]
+fn explicit_include_restricts_to_listed_packages() {
+    let include = names(&["foo", "bar"]);
+    assert!(should_include("foo", &include, &[]));
+    assert!(should_include("bar", &include, &[]));
+    assert!(!should_include("baz", &include, &[]));
+}
+
+#[test]
+fn exclude_alone_keeps_unlisted_packages() {
+    let exclude = names(&["baz"]);
+    assert!(should_include("foo", &[], &exclude));
+    assert!(!should_include("baz", &[], &exclude));
+}
+
+#[test]
+fn exclude_takes_precedence_over_include() {
+    // A package both included AND excluded is excluded. This matches
+    // `cargo check --workspace --exclude foo`-style semantics.
+    let include = names(&["foo", "bar"]);
+    let exclude = names(&["foo"]);
+    assert!(!should_include("foo", &include, &exclude));
+    assert!(should_include("bar", &include, &exclude));
+}
+
+#[test]
+fn validate_succeeds_when_every_name_matches() {
+    let available = ["foo", "bar", "baz"];
+    let names = names(&["bar", "baz"]);
+    assert!(validate_package_names(&available, &names).is_ok());
+}
+
+#[test]
+fn validate_succeeds_on_empty_input() {
+    let available = ["foo"];
+    assert!(validate_package_names(&available, &[]).is_ok());
+}
+
+#[test]
+fn language_auto_accepts_both_rust_and_markdown() {
+    assert!(matches_language(Path::new("foo.rs"), Language::Auto));
+    assert!(matches_language(Path::new("foo.md"), Language::Auto));
+    assert!(matches_language(Path::new("foo.markdown"), Language::Auto));
+    assert!(!matches_language(Path::new("foo.txt"), Language::Auto));
+    assert!(!matches_language(Path::new("Cargo.toml"), Language::Auto));
+}
+
+#[test]
+fn language_rust_filters_to_rs_only() {
+    assert!(matches_language(Path::new("foo.rs"), Language::Rust));
+    assert!(!matches_language(Path::new("foo.md"), Language::Rust));
+    assert!(!matches_language(Path::new("foo.markdown"), Language::Rust));
+}
+
+#[test]
+fn language_markdown_filters_to_md_and_markdown() {
+    assert!(matches_language(Path::new("foo.md"), Language::Markdown));
+    assert!(matches_language(
+        Path::new("foo.markdown"),
+        Language::Markdown
+    ));
+    assert!(!matches_language(Path::new("foo.rs"), Language::Markdown));
+}
+
+#[test]
+fn language_filter_skips_files_without_extension() {
+    assert!(!matches_language(Path::new("README"), Language::Auto));
+    assert!(!matches_language(Path::new("Makefile"), Language::Rust));
+    assert!(!matches_language(Path::new("LICENSE"), Language::Markdown));
+}
+
+#[test]
+fn validate_returns_first_unknown_name() {
+    let available = ["foo", "bar"];
+    let lookup = names(&["bar", "ghost"]);
+    match validate_package_names(&available, &lookup) {
+        Err(Error::UnknownPackage(name)) => assert_eq!(name, "ghost"),
+        other => panic!("expected UnknownPackage, got {other:?}"),
+    }
+}
diff --git a/deny.toml b/deny.toml
index 23ae1ce9..16d24209 100644
--- a/deny.toml
+++ b/deny.toml
@@ -15,6 +15,7 @@ allow = [
     "MIT",
     "MPL-2.0",
     "Unicode-3.0",
+    "Unicode-DFS-2016",
     "Zlib",
 ]
 
@@ -33,6 +34,4 @@ allow-git = [
     "https://github.com/JeanMertz/inquire?branch=submit-on-valid-parse",
     "https://github.com/JeanMertz/openai-responses-rs",
     "https://github.com/JeanMertz/saphyr?branch=jean/fix-valid-literal-block-scalar-check",
-    "https://github.com/JeanMertz/schematic?branch=merged",
-    "https://github.com/zkat/miette",
 ]
diff --git a/justfile b/justfile
index 32e19204..79a9c02e 100644
--- a/justfile
+++ b/justfile
@@ -1631,7 +1631,7 @@ plugin-build-local: _install-jp (plugin-build "")
 
 # Run all ci tasks.
 [group('ci')]
-ci: lint-ci fmt-ci test-ci docs-ci coverage-ci deny-ci insta-ci shear-ci vet-ci
+ci: lint-ci fmt-ci fmt-comments-ci test-ci docs-ci coverage-ci deny-ci insta-ci shear-ci vet-ci
 
 # Lint the code on CI.
 [group('ci')]
@@ -1643,6 +1643,11 @@ lint-ci: (_rustup_component "clippy") _install_ci_matchers
 fmt-ci: (_rustup_component "rustfmt") _install_ci_matchers
     cargo fmt --all --check
 
+# Check Rust doc-comment formatting on CI.
+[group('ci')]
+fmt-comments-ci: _install-comfort _install_ci_matchers
+    comfort --check --workspace
+
 # Test the code on CI.
 [group('ci')]
 test-ci: (_install "cargo-nextest@" + nextest_version) _install_ci_matchers
@@ -1706,6 +1711,9 @@ vet-ci: (_install "cargo-vet@" + vet_version)
 @_install-jp *args:
     cargo install {{quiet_flag}} --locked --path crates/jp_cli {{args}}
 
+@_install-comfort *args:
+    cargo install {{quiet_flag}} --locked --path crates/contrib/comfort {{args}}
+
 @_install-binstall:
     command -v cargo-binstall >/dev/null 2>&1 || { \
         curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | BINSTALL_VERSION={{binstall_version}} sh; \