From 31b66868669284c6bd89c1ad0864074e08c64001 Mon Sep 17 00:00:00 2001 From: Mark Scherer Date: Sat, 20 Jun 2026 13:50:09 +0200 Subject: [PATCH 1/3] Add ListTableExtension (list-as-table for block-content cells) Renders ::: list-table divs as real HTML tables, with the table authored as a nested list so cells can hold full block content (paragraphs, lists, code) that the native pipe-table syntax cannot express. Outer list items are rows, inner list items are cells. The caption, header-rows, and header-cols are read from the div's preceding attribute line. Single-paragraph cells collapse to inline content; multi-block cells keep their wrappers. Ragged rows pad with empty cells. Only list-table divs whose sole block child is the table list are claimed; everything else (and any malformed row) defers to the default div so no content is dropped. Builds on the marker-line nested-list block-absorption fix (PR #251). --- docs/extensions/index.md | 87 +++++ src/Extension/ListTableExtension.php | 314 ++++++++++++++++++ .../Extension/ListTableExtensionTest.php | 312 +++++++++++++++++ 3 files changed, 713 insertions(+) create mode 100644 src/Extension/ListTableExtension.php create mode 100644 tests/TestCase/Extension/ListTableExtensionTest.php diff --git a/docs/extensions/index.md b/docs/extensions/index.md index 9df909ef..5f04a02d 100644 --- a/docs/extensions/index.md +++ b/docs/extensions/index.md @@ -17,6 +17,7 @@ Extensions provide a clean way to bundle related customizations together. Each e | [HeadingPermalinksExtension](#headingpermalinksextension) | Adds clickable anchor links to headings | | [InlineFootnotesExtension](#inlinefootnotesextension) | Converts `[content]{.fn}` spans to inline footnotes | | [LineBlockDivExtension](#lineblockdivextension) | Adds a fenced `::: |` line block (verse/addresses) without prefixing every line | +| [ListTableExtension](#listtableextension) | Renders `::: list-table` divs as real HTML tables whose cells can hold block content | | [MentionsExtension](#mentionsextension) | Converts `@username` patterns to profile links | | [MermaidExtension](#mermaidextension) | Transforms mermaid code blocks into diagrams | | [SemanticSpanExtension](#semanticspanextension) | Converts span attributes to semantic HTML elements (``, ``, ``) | @@ -664,6 +665,92 @@ The pipe is consumed as the marker, so the output is a `line-block` div, never a This follows the approach discussed in [djot issue #29](https://github.com/jgm/djot/issues/29). A leading `|` on every line (Pandoc-style line blocks) can be confused with pipe tables and is awkward to edit; an English keyword div class (`::: verse`) was undesirable. A language-neutral `|` marker on the div opener sidesteps both concerns. +## ListTableExtension + +Renders `::: list-table` blocks as real HTML `` markup, with the table +authored as a nested list. Because each cell is a list item, cells can hold full +block content (paragraphs, lists, code blocks) that the native pipe-table syntax +cannot express. + +```php +use Djot\Extension\ListTableExtension; + +$converter->addExtension(new ListTableExtension()); +``` + +Each outer list item is a row; each inner list item is a cell. + +::: info Attributes go on a preceding line +djot has no `::: type "title"` parse, so the caption and header controls are +read from the div's attribute block, which sits on the line **above** the +`:::` opener: `{caption="Quarterly results" header-rows=1}`. +::: + +**Input:** + +```djot +{caption="Quarterly results" header-rows=1} +::: list-table +- - Region + - Notes +- - EMEA + - Strong quarter. + + Drivers: + + - new logos + - renewals +::: +``` + +**Output:** + +```html +
+ + + + + +
Quarterly results
RegionNotes
EMEA

Strong quarter.

+

Drivers:

+
    +
  • +new logos +
  • +
  • +renewals +
  • +
+``` + +The `caption` attribute becomes the `` (omitted when absent). Two +attributes control header promotion (both default `0`): + +- `header-rows=N` promotes the first `N` rows to `` with `` cells. +- `header-cols=N` promotes the first `N` cells of **every** row to row-header + ``. + +A cell whose only content is a single plain paragraph collapses to inline +content (`text`), exactly like a tight list item; a cell with multiple +blocks keeps its `

`/`

    ` wrappers (as in the `Strong quarter.` cell above). +This is the core benefit over pipe tables: rich, multi-block cells. + +Ragged rows (rows with differing cell counts) are padded with empty `` to +the widest row, so no content is ever silently dropped. Inline markup inside a +cell renders normally. Block attributes on the opener carry onto the `` +tag in source order (safe-mode filtering still applies); the structural +`caption`, `header-rows`, `header-cols`, and the auto `list-table` class are +consumed by the extension and not emitted. + +This builds on the marker-line nested-list block-absorption fix +([#251](https://github.com/php-collective/djot-php/pull/251)) that lets a cell +hold full block content. HTML output only. + +Without the extension the same block degrades gracefully to the default +`
    ` holding the literal nested list, so source is never +lost. + ## MentionsExtension Converts `@username` patterns into user profile links. diff --git a/src/Extension/ListTableExtension.php b/src/Extension/ListTableExtension.php new file mode 100644 index 00000000..12f931c9 --- /dev/null +++ b/src/Extension/ListTableExtension.php @@ -0,0 +1,314 @@ +` markup, with the + * table authored as a nested list so that cells can hold full block content + * (paragraphs, lists, code, …) that the native pipe-table syntax cannot. + * + * A `list-table` div is authored as an outer list where each outer item is a + * row and each inner item is a cell: + * + * ``` + * {caption="Quarterly results" header-rows=1} + * ::: list-table + * - - Region + * - Notes + * - - EMEA + * - Strong quarter. + * + * Drivers: + * + * - new logos + * - renewals + * ::: + * ``` + * + * The caption, `header-rows` and `header-cols` are read from the div's + * attributes, which sit on the PRECEDING attribute line (djot has no + * `::: type "title"` parse - a quoted title would land in the class name). + * + * `caption="..."` emits a `
    `/``); multi-block cells keep their block wrappers. + * + * Only `::: list-table` divs whose sole block child is the table list are + * claimed; every other div defers to the core renderer. When this extension is + * not registered the block degrades to the default `
    ` + * holding the literal nested list. + * + * Only applies to HTML output. + * + * Example: + * ```php + * $converter = new DjotConverter(); + * $converter->addExtension(new ListTableExtension()); + * ``` + */ +class ListTableExtension implements ExtensionInterface +{ + /** + * The div class this extension claims. + * + * @var string + */ + public const KIND = 'list-table'; + + public function register(DjotConverter $converter): void + { + // Only applies to HTML output - other renderers render the div normally. + $renderer = $converter->getRenderer(); + if (!$renderer instanceof HtmlRenderer) { + return; + } + + $converter->on('render.div', function (RenderEvent $event) use ($renderer): void { + $node = $event->getNode(); + if (!$node instanceof Div) { + return; + } + + // Only claim `::: list-table` blocks; everything else defers to the + // core div renderer (and any other extension that wants it). + if (!$node->hasClass(self::KIND)) { + return; + } + + $html = $this->renderListTable($node, $renderer); + if ($html === null) { + // No usable outer list found; defer to the default div renderer + // so content is never silently dropped. + return; + } + + $event->setHtml($html); + }); + } + + /** + * Render the `
    `; `header-rows=N` promotes the first N + * rows to `
    `, and `header-cols=N` promotes the first N cells of + * every row to row-header ``. Single-paragraph cells collapse to inline + * content (`text
    ` for a `list-table` div, or null to defer. + */ + protected function renderListTable(Div $node, HtmlRenderer $renderer): ?string + { + // Claim the div only when its sole block child is the table list. If it + // holds extra siblings (a stray paragraph before/after the list, etc.) + // defer to the default div renderer so that content is never silently + // dropped - the block then degrades to the literal nested-list div. + $children = $node->getChildren(); + if (count($children) !== 1 || !$children[0] instanceof ListBlock) { + return null; + } + $outerList = $children[0]; + + // Each outer list item is a row; its cells are the items of the inner + // ListBlock children, in document order. djot-php yields exactly one + // inner list per row, but the flatten-all-inner-lists path stays for + // robustness. + $rows = []; + foreach ($outerList->getChildren() as $rowItem) { + if (!$rowItem instanceof ListItem) { + continue; + } + + $cells = $this->extractCells($rowItem); + + // A row without an inner cell list (e.g. `- Row label` with direct + // content rather than nested `- - cell` items) yields no cells. Such + // a structure is not a clean table; defer to the default div so the + // row's content is never silently dropped into an empty ``. + if ($cells === []) { + return null; + } + + $rows[] = $cells; + } + + if ($rows === []) { + return null; + } + + $headerRows = max(0, (int)($node->getAttribute('header-rows') ?? '0')); + $headerCols = max(0, (int)($node->getAttribute('header-cols') ?? '0')); + + // Ragged rows: pad short rows with empty cells to the widest row so no + // content is dropped and the grid stays rectangular. + $columnCount = 0; + foreach ($rows as $cells) { + $columnCount = max($columnCount, count($cells)); + } + + $lines = []; + + $caption = $node->getAttribute('caption'); + if ($caption !== null && trim($caption) !== '') { + $lines[] = ' '; + } + + $renderRow = function (array $cells, bool $isHeaderRow) use ($renderer, $headerCols, $columnCount): string { + $html = ''; + for ($i = 0; $i < $columnCount; $i++) { + $isHeaderCell = $isHeaderRow || $i < $headerCols; + $tag = $isHeaderCell ? 'th' : 'td'; + $cell = $cells[$i] ?? null; + $content = $cell !== null ? $this->renderCell($cell, $renderer) : ''; + $html .= '<' . $tag . '>' . $content . ''; + } + + return '' . $html . ''; + }; + + $headRows = array_slice($rows, 0, $headerRows); + $bodyRows = array_slice($rows, $headerRows); + + if ($headRows !== []) { + $thead = ''; + foreach ($headRows as $cells) { + $thead .= $renderRow($cells, true); + } + $lines[] = ' ' . $thead . ''; + } + + if ($bodyRows !== []) { + $tbody = ''; + foreach ($bodyRows as $cells) { + $tbody .= ' ' . $renderRow($cells, false) . "\n"; + } + $lines[] = " \n" . rtrim($tbody, "\n") . "\n "; + } + + $attrs = $this->renderTableAttributes($node, $renderer); + + return '\n" . implode("\n", $lines) . "\n
    ' . $this->escapeHtml($caption) . '
    \n"; + } + + /** + * Extract the cells of a row. + * + * A row like `- - A` / ` - B` parses to the outer item holding ONE inner + * ListBlock whose items are the cells. The flatten-all-inner-lists loop + * keeps multiple inner lists working too. Any non-list block sibling (e.g. + * a trailing paragraph the parser left outside the inner list) is appended + * to the most recently opened cell so multi-block content is never dropped. + * + * @return array<\Djot\Node\Block\ListItem> + */ + protected function extractCells(ListItem $rowItem): array + { + $cells = []; + foreach ($rowItem->getChildren() as $child) { + if ($child instanceof ListBlock) { + foreach ($child->getChildren() as $cellItem) { + if ($cellItem instanceof ListItem) { + $cells[] = $cellItem; + } + } + + continue; + } + + // A stray block following the inner list belongs to the last cell. + if ($cells !== []) { + $cells[count($cells) - 1]->appendChild($child); + } + } + + return $cells; + } + + /** + * Render a single cell's content. + * + * A cell whose only child is an attribute-free paragraph collapses to its + * inline content (no `

    ` wrapper), matching tight list-item/table-cell + * rendering. Otherwise the block children render normally and keep their + * wrappers. + */ + protected function renderCell(ListItem $cell, HtmlRenderer $renderer): string + { + $children = $cell->getChildren(); + + if (count($children) === 1 && $children[0] instanceof Paragraph && $children[0]->getAttributes() === []) { + $html = rtrim($renderer->renderNodeFragment($children[0]), "\n"); + + // Strip the single

    wrapper to inline the content. + if (preg_match('/^

    (.*)<\/p>$/s', $html, $m) === 1) { + return $m[1]; + } + + return $html; + } + + $html = ''; + foreach ($children as $child) { + $html .= $renderer->renderNodeFragment($child); + } + + return rtrim($html, "\n"); + } + + /** + * Build the `` tag attributes. + * + * Drops the structural attributes consumed by this extension (`caption`, + * `header-rows`, `header-cols`) and the auto `list-table` class (the + * `
    ` tag is itself the styling hook); preserves any sibling classes + * and other attributes in source order. Applies the same safe-mode + * filtering the core renderer does. + */ + protected function renderTableAttributes(Div $node, HtmlRenderer $renderer): string + { + $attrs = $node->getAttributes(); + unset($attrs['caption'], $attrs['header-rows'], $attrs['header-cols']); + + $safeMode = $renderer->getSafeMode(); + if ($safeMode !== null) { + $attrs = $safeMode->filterAttributes($attrs); + } + + if (isset($attrs['class'])) { + $classes = array_values(array_filter( + preg_split('/\s+/', trim($attrs['class'])) ?: [], + static fn (string $class): bool => $class !== '' && $class !== self::KIND, + )); + + if ($classes === []) { + unset($attrs['class']); + } else { + $attrs['class'] = implode(' ', $classes); + } + } + + $html = ''; + foreach ($attrs as $key => $value) { + $html .= ' ' . $this->escapeHtml((string)$key) . '="' . $renderer->escapeAttribute((string)$value) . '"'; + } + + return $html; + } + + /** + * Escape text for HTML content (caption / attribute names). + * + * Matches the core renderer's `escape()`: escapes only `<`, `>`, `&` + * (ENT_NOQUOTES, djot keeps quotes literal) and converts the escaped-space + * placeholder to ` `. + */ + protected function escapeHtml(string $text): string + { + $escaped = htmlspecialchars($text, ENT_NOQUOTES | ENT_HTML5, 'UTF-8'); + + return str_replace("\u{E000}", ' ', $escaped); + } +} diff --git a/tests/TestCase/Extension/ListTableExtensionTest.php b/tests/TestCase/Extension/ListTableExtensionTest.php new file mode 100644 index 00000000..ca17e09d --- /dev/null +++ b/tests/TestCase/Extension/ListTableExtensionTest.php @@ -0,0 +1,312 @@ +addExtension(new ListTableExtension()); + + return trim($converter->convert($djot)); + } + + public function testBasicTwoColumnWithHeaderRowAndCaption(): void + { + $djot = implode("\n", [ + '{caption="Quarterly results" header-rows=1}', + '::: list-table', + '- - Region', + ' - Notes', + '- - EMEA', + ' - Strong quarter.', + ':::', + ]); + + $expected = implode("\n", [ + '
    ', + ' ', + ' ', + ' ', + ' ', + ' ', + '
    Quarterly results
    RegionNotes
    EMEAStrong quarter.
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testMultiBlockCellStaysWrappedWhileSingleParagraphCollapses(): void + { + $djot = implode("\n", [ + '::: list-table', + '- - EMEA', + ' - Strong quarter.', + '', + ' Drivers:', + '', + ' - new logos', + ' - renewals', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + '
    EMEA

    Strong quarter.

    ', + '

    Drivers:

    ', + '
      ', + '
    • ', + 'new logos', + '
    • ', + '
    • ', + 'renewals', + '
    • ', + '
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testHeaderCols(): void + { + $djot = implode("\n", [ + '{header-cols=1}', + '::: list-table', + '- - Region', + ' - Revenue', + '- - EMEA', + ' - 1.2M', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + ' ', + '
    RegionRevenue
    EMEA1.2M
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testHeaderRowsAndHeaderColsCombine(): void + { + $djot = implode("\n", [ + '{header-rows=1 header-cols=1}', + '::: list-table', + '- - Metric', + ' - Q1', + ' - Q2', + '- - EMEA', + ' - 1.0', + ' - 1.2', + ':::', + ]); + + // The whole header row and the first column are all . + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + ' ', + '
    MetricQ1Q2
    EMEA1.01.2
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testRaggedRowsArePadded(): void + { + $djot = implode("\n", [ + '::: list-table', + '- - A', + ' - B', + ' - C', + '- - D', + ' - E', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + ' ', + '
    ABC
    DE
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testNoCaption(): void + { + $djot = implode("\n", [ + '::: list-table', + '- - A', + ' - B', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + '
    AB
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testInlineMarkupInCell(): void + { + $djot = implode("\n", [ + '::: list-table', + '- - Use `flat` markup', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + '
    Use flat markup
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testExtensionOffRendersDefaultDiv(): void + { + $djot = implode("\n", [ + '::: list-table', + '- - A', + ' - B', + ':::', + ]); + + $converter = new DjotConverter(); + $html = trim($converter->convert($djot)); + + $expected = implode("\n", [ + '

    ', + '
      ', + '
    • ', + '
        ', + '
      • ', + 'A', + '
      • ', + '
      • ', + 'B', + '
      • ', + '
      ', + '
    • ', + '
    ', + '
    ', + ]); + $this->assertSame($expected, $html); + } + + public function testOtherDivsAreNotClaimed(): void + { + $djot = implode("\n", [ + '::: note', + 'Hello.', + ':::', + ]); + + $html = $this->render($djot); + + $this->assertStringContainsString('

    Hello.

    ', $html); + $this->assertStringNotContainsString('', + '

    Just a paragraph, no list.

    ', + '', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testStraySiblingContentDefersToDefaultAndIsNotDropped(): void + { + $djot = implode("\n", [ + '::: list-table', + 'Intro paragraph.', + '', + '- - A', + ' - B', + '', + 'Trailing paragraph.', + ':::', + ]); + + $html = $this->render($djot); + + // The div is not claimed (extra siblings around the list); it degrades + // to the default nested-list div so no content is lost. + $this->assertStringStartsWith('
    ', $html); + $this->assertStringContainsString('

    Intro paragraph.

    ', $html); + $this->assertStringContainsString('

    Trailing paragraph.

    ', $html); + $this->assertStringContainsString('
  • ', $html); + $this->assertStringContainsString('A', $html); + $this->assertStringContainsString('B', $html); + $this->assertStringNotContainsString('render($djot); + + // A row authored with direct content (no inner cell list) means the + // structure is not a clean table; defer to the default div so the + // label is never dropped into an empty . + $this->assertStringStartsWith('
    ', $html); + $this->assertStringContainsString('Row label only', $html); + $this->assertStringNotContainsString('render($djot); + + $this->assertStringStartsWith('', $html); + $this->assertStringNotContainsString('list-table', $html); + } +} From 2be9af498f42cd1660d58df7aabab19d07000550 Mon Sep 17 00:00:00 2001 From: Mark Scherer Date: Sat, 20 Jun 2026 14:38:57 +0200 Subject: [PATCH 2/3] Add rowspan/colspan span markers to ListTableExtension A list-table cell whose sole inline content is a lone caret merges into the cell above (rowspan) and a lone less-than merges into the cell to the left (colspan), reusing djot-php's native pipe-table span markers and the same continuation semantics: colspan of three is two trailing markers, rowspan of N is N-1 markers in the rows below the origin. The marker detection runs after the inline parse, so an escaped marker or an attributed cell keeps its literal content and is never a span marker. The grid is resolved into effective columns that account for colspans and for rowspans reserved by earlier rows, padding ragged rows with empty cells; well-formed spans produce the same table markup the equivalent pipe table emits. No span markers leaves the previous behavior unchanged. Span resolution lives in a small SpanDescriptors value object so the rowspan/colspan mutations stay on one typed list. Docs: document the marker syntax, the Sales authoring example with its rendered HTML, and the note that it reuses the pipe-table span markers. --- docs/extensions/index.md | 53 +++ src/Extension/ListTableExtension.php | 293 +++++++++++++++- src/Extension/SpanDescriptors.php | 66 ++++ .../Extension/ListTableExtensionTest.php | 314 ++++++++++++++++++ 4 files changed, 708 insertions(+), 18 deletions(-) create mode 100644 src/Extension/SpanDescriptors.php diff --git a/docs/extensions/index.md b/docs/extensions/index.md index 5f04a02d..4ea91ec9 100644 --- a/docs/extensions/index.md +++ b/docs/extensions/index.md @@ -743,6 +743,59 @@ tag in source order (safe-mode filtering still applies); the structural `caption`, `header-rows`, `header-cols`, and the auto `list-table` class are consumed by the extension and not emitted. +### Spanning rows and columns + +Cells can span rows and columns using the **same `^` / `<` markers djot-php's +native pipe tables use**, with the same continuation semantics. A cell (list +item) whose sole inline content is: + +- a lone `^` merges into the cell **above** it (rowspan); +- a lone `<` merges into the cell to its **left** (colspan). + +Spans are written as continuations: a `colspan=3` is the origin cell followed by +two `<` cells; a `rowspan=N` is the origin cell followed by `N - 1` `^` cells in +the rows below it. To keep a literal `^` or `<` as cell text, escape it (`\^`, +`\<`) or attach an attribute - an attributed cell is never treated as a span +marker. A `^` with no cell above it, or a leading `<` with no cell to its left, +degrades to an empty cell rather than being dropped. + +**Input:** +```djot +{caption="Sales" header-rows=1} +::: list-table +- - Region + - Q1 + - Q2 +- - EMEA + - 10 + - 12 +- - ^ + - 14 + - 16 +- - Total + - < + - < +::: +``` + +**Output:** +```html +
    + + + + + + + +
    Sales
    RegionQ1Q2
    EMEA1012
    1416
    Total
    +``` + +The `^` under `EMEA` gives its column `rowspan="2"`; the two `<` cells under +`Total` give it `colspan="3"`. The resulting span markup matches what the +equivalent pipe table produces - the extension reuses djot-php's pipe-table span +resolution semantics. + This builds on the marker-line nested-list block-absorption fix ([#251](https://github.com/php-collective/djot-php/pull/251)) that lets a cell hold full block content. HTML output only. diff --git a/src/Extension/ListTableExtension.php b/src/Extension/ListTableExtension.php index 12f931c9..ed53b88d 100644 --- a/src/Extension/ListTableExtension.php +++ b/src/Extension/ListTableExtension.php @@ -10,6 +10,7 @@ use Djot\Node\Block\ListBlock; use Djot\Node\Block\ListItem; use Djot\Node\Block\Paragraph; +use Djot\Node\Inline\Text; use Djot\Renderer\HtmlRenderer; /** @@ -44,6 +45,15 @@ * every row to row-header ``. Single-paragraph cells collapse to inline * content (`text`); multi-block cells keep their block wrappers. * + * Cells may span rows and columns using the same markers djot-php's native + * pipe tables use, with the same continuation semantics: a cell whose sole + * inline content is a lone `^` merges into the cell ABOVE (rowspan), and a + * lone `<` merges into the cell to the LEFT (colspan). `colspan=3` is written + * as two trailing `<` cells; `rowspan=N` as `N - 1` `^` cells in the rows + * below the origin. Escape the marker (`\^`, `\<`) or attach an attribute to + * keep it literal - an attributed cell is never treated as a span marker. The + * resulting `` matches the span markup the equivalent pipe table emits. + * * Only `::: list-table` divs whose sole block child is the table list are * claimed; every other div defers to the core renderer. When this extension is * not registered the block degrades to the default `
    ` @@ -142,12 +152,11 @@ protected function renderListTable(Div $node, HtmlRenderer $renderer): ?string $headerRows = max(0, (int)($node->getAttribute('header-rows') ?? '0')); $headerCols = max(0, (int)($node->getAttribute('header-cols') ?? '0')); - // Ragged rows: pad short rows with empty cells to the widest row so no - // content is dropped and the grid stays rectangular. - $columnCount = 0; - foreach ($rows as $cells) { - $columnCount = max($columnCount, count($cells)); - } + // Resolve `^` (rowspan) / `<` (colspan) span markers into a placed grid, + // reusing the same continuation semantics as native pipe tables. Each + // placed entry carries the origin cell plus its resolved span and the + // effective column it starts at; marker cells are consumed (omitted). + [$grid] = $this->resolveSpans($rows); $lines = []; @@ -156,34 +165,40 @@ protected function renderListTable(Div $node, HtmlRenderer $renderer): ?string $lines[] = '
    '; } - $renderRow = function (array $cells, bool $isHeaderRow) use ($renderer, $headerCols, $columnCount): string { + $renderRow = function (array $placedCells, bool $isHeaderRow) use ($renderer, $headerCols): string { $html = ''; - for ($i = 0; $i < $columnCount; $i++) { - $isHeaderCell = $isHeaderRow || $i < $headerCols; + foreach ($placedCells as $placed) { + $isHeaderCell = $isHeaderRow || $placed['col'] < $headerCols; $tag = $isHeaderCell ? 'th' : 'td'; - $cell = $cells[$i] ?? null; - $content = $cell !== null ? $this->renderCell($cell, $renderer) : ''; - $html .= '<' . $tag . '>' . $content . ''; + $spanAttrs = ''; + if ($placed['rowspan'] > 1) { + $spanAttrs .= ' rowspan="' . $placed['rowspan'] . '"'; + } + if ($placed['colspan'] > 1) { + $spanAttrs .= ' colspan="' . $placed['colspan'] . '"'; + } + $content = $this->renderCell($placed['cell'], $renderer); + $html .= '<' . $tag . $spanAttrs . '>' . $content . ''; } return '' . $html . ''; }; - $headRows = array_slice($rows, 0, $headerRows); - $bodyRows = array_slice($rows, $headerRows); + $headRows = array_slice($grid, 0, $headerRows); + $bodyRows = array_slice($grid, $headerRows); if ($headRows !== []) { $thead = ''; - foreach ($headRows as $cells) { - $thead .= $renderRow($cells, true); + foreach ($headRows as $placedCells) { + $thead .= $renderRow($placedCells, true); } $lines[] = ' ' . $thead . ''; } if ($bodyRows !== []) { $tbody = ''; - foreach ($bodyRows as $cells) { - $tbody .= ' ' . $renderRow($cells, false) . "\n"; + foreach ($bodyRows as $placedCells) { + $tbody .= ' ' . $renderRow($placedCells, false) . "\n"; } $lines[] = " \n" . rtrim($tbody, "\n") . "\n "; } @@ -193,6 +208,248 @@ protected function renderListTable(Div $node, HtmlRenderer $renderer): ?string return '\n" . implode("\n", $lines) . "\n
    ' . $this->escapeHtml($caption) . '
    \n"; } + /** + * Resolve `^` / `<` span markers across a ragged grid of row cells. + * + * Mirrors the continuation semantics of native pipe tables (see + * `BlockParser` / `TableParser`): walking each row left to right, a `<` + * cell grows the cell to its left (colspan) and is omitted, and a `^` cell + * grows the cell directly above in the same effective column (rowspan) and + * is omitted. Effective columns account for colspans and for rowspans + * reserved by earlier rows, exactly like the pipe-table grid. Leading `<` + * with no cell to the left, and `^` with no origin above, degrade to an + * empty cell rather than being dropped (pipe-table parity). + * + * Returns `[$grid, $columnCount]` where `$grid` is a list of rows, each a + * list of placed cells `['cell' => ListItem, 'col' => int, 'rowspan' => + * int, 'colspan' => int]` in left-to-right order, and `$columnCount` is the + * effective width of the widest row. Short rows are padded with empty cells + * so the grid stays rectangular (no content dropped). + * + * @param array> $rows + * + * @return array{0: array>, 1: int} + */ + protected function resolveSpans(array $rows): array + { + // Flat list of origin descriptors. Each is referenced from the grid by + // its integer index, so the rowspan/colspan mutations below stay on a + // single typed list instead of a nested-array shape. + $descriptors = new SpanDescriptors(); + + // grid[row][col] = descriptor index that occupies this grid position, + // whether it originates here, spans in from the left (colspan), or spans + // in from above (rowspan). Every column of every row is occupied, so a + // `^` only ever consults the row immediately above it. + $grid = []; + // Per-row, left-to-right list of descriptor indices that ORIGINATE in + // that row (authored cells and ragged padding), used to walk rows for + // rendering. Rowspan/colspan continuations are NOT origins here. + $rowOrigins = []; + + // Running effective width. Earlier rows are padded up to this so a later + // `^` can attach to the empty cell directly above it, mirroring how the + // equivalent pipe table pads ragged rows with real empty cells. + $width = 0; + + foreach ($rows as $rowIndex => $cells) { + $rowOrigins[$rowIndex] = []; + $col = 0; + $lastOriginIndex = null; + + // Each origin grows at most once per row even when several `^` + // markers fall under the columns a single wide cell covers. + $extendedThisRow = []; + + foreach ($cells as $cell) { + $marker = $this->spanMarker($cell); + + if ($marker === '^' && isset($grid[$rowIndex - 1][$col])) { + // Rowspan: the descriptor directly above (which may itself be + // a colspan origin) extends down into this row. A marker maps + // 1:1 to a source column - it advances the cursor by one and + // does not skip - mirroring the native pipe table's per-column + // rowspan resolution. Grow the origin once, then reserve its + // WHOLE rectangle here so a real cell never lands inside it. + $originIndex = $grid[$rowIndex - 1][$col]; + if (!isset($extendedThisRow[$originIndex])) { + $descriptors->growRowspan($originIndex); + $extendedThisRow[$originIndex] = true; + + $origin = $descriptors->get($originIndex); + for ($c = $origin['col']; $c < $origin['col'] + $origin['colspan']; $c++) { + $grid[$rowIndex][$c] = $originIndex; + } + } + + $col++; + $lastOriginIndex = null; + + continue; + } + + // Real cells (and degraded markers) skip columns already reserved + // by rowspan rectangles - their own row's or earlier rows'. + // + // Note: for malformed input where a real cell would land inside a + // rowspan rectangle (a lone `^` under a colspan>1 cell, then more + // cells in the same row), the native pipe table drops that cell; + // here it is relocated to the next free column instead. We keep + // the content deliberately - the extension's guarantee is never to + // silently drop authored content - at the cost of one column of + // pipe-table divergence on this malformed shape. Well-formed spans + // (a `^` under every column a wide cell covers) match the pipe + // table exactly. + while (isset($grid[$rowIndex][$col])) { + $col++; + } + + if ($marker === '<' && $lastOriginIndex !== null) { + // Colspan: grow the cell to the left, claim this column for it. + $descriptors->growColspan($lastOriginIndex); + $grid[$rowIndex][$col] = $lastOriginIndex; + $col++; + + continue; + } + + // A normal cell, a leading `<` with no left neighbor, or a `^` + // with no cell above: the latter two degrade to an empty cell. + // A degraded marker is NOT a colspan target, so a run of leading + // `<` yields one empty cell each (pipe-table parity). + $isEmpty = $marker !== null; + $index = $descriptors->add($isEmpty ? $this->emptyCell() : $cell, $col); + $grid[$rowIndex][$col] = $index; + $rowOrigins[$rowIndex][] = $index; + $lastOriginIndex = $isEmpty ? null : $index; + $col++; + } + + $width = max($width, $col); + + // Pad this row up to the running width with empty origin cells so a + // later `^` always has a real cell directly above it to extend. + $this->padRow($descriptors, $grid, $rowOrigins, $rowIndex, $width); + } + + return $this->buildGrid($descriptors, $rowOrigins, $grid, $width); + } + + /** + * Pad a row up to the target width with empty origin cells. + * + * Fills any free columns (gaps left by ragged input or by spans that did not + * reach the running width) with fresh empty cells so every column of every + * processed row is occupied. This is what lets a later `^` attach to the + * cell directly above it, matching the pipe table's ragged-row padding. + * + * @param \Djot\Extension\SpanDescriptors $descriptors + * @param array> $grid + * @param array> $rowOrigins + * @param int $width + * @param int $rowIndex + */ + protected function padRow(SpanDescriptors $descriptors, array &$grid, array &$rowOrigins, int $rowIndex, int $width): void + { + for ($col = 0; $col < $width; $col++) { + if (isset($grid[$rowIndex][$col])) { + continue; + } + + $index = $descriptors->add($this->emptyCell(), $col); + $grid[$rowIndex][$col] = $index; + $rowOrigins[$rowIndex][] = $index; + } + } + + /** + * Assemble the rectangular render grid from the resolved descriptors. + * + * Walks each row's originating cells in order and pads with trailing empty + * cells up to the widest effective column count, so ragged input still + * yields a rectangular table and no content is dropped, matching the + * no-span ragged behavior. + * + * @param \Djot\Extension\SpanDescriptors $descriptors + * @param array> $rowOrigins + * @param array> $grid + * @param int $columnCount + * + * @return array{0: array>, 1: int} + */ + protected function buildGrid(SpanDescriptors $descriptors, array $rowOrigins, array $grid, int $columnCount): array + { + $rendered = []; + foreach ($rowOrigins as $rowIndex => $indices) { + $cells = []; + foreach ($indices as $index) { + $cells[] = $descriptors->get($index); + } + + // Highest column this row already covers (origins + rowspans from + // above); pad the remaining gap with empty cells. + $covered = 0; + foreach (($grid[$rowIndex] ?? []) as $c => $_) { + $covered = max($covered, $c + 1); + } + + for ($c = $covered; $c < $columnCount; $c++) { + $cells[] = [ + 'cell' => $this->emptyCell(), + 'col' => $c, + 'rowspan' => 1, + 'colspan' => 1, + ]; + } + + $rendered[$rowIndex] = $cells; + } + + return [$rendered, $columnCount]; + } + + /** + * Create an empty placeholder cell (an empty list item with no content). + */ + protected function emptyCell(): ListItem + { + return new ListItem(); + } + + /** + * Detect a span marker cell. + * + * Returns `'^'` or `'<'` when the cell's sole inline content is exactly that + * marker - i.e. a single attribute-free paragraph whose only child is a Text + * node equal to the marker. Anything else (escaped `\^`/`\<` parses to an + * EscapedText node, an attribute wraps the text in a Span) is not a marker + * and returns null, so the literal `^`/`<` content is kept. + */ + protected function spanMarker(ListItem $cell): ?string + { + $children = $cell->getChildren(); + if (count($children) !== 1) { + return null; + } + + $paragraph = $children[0]; + if (!$paragraph instanceof Paragraph || $paragraph->getAttributes() !== []) { + return null; + } + + $inline = $paragraph->getChildren(); + if (count($inline) !== 1 || !$inline[0] instanceof Text) { + return null; + } + + $content = $inline[0]->getContent(); + if ($content === '^' || $content === '<') { + return $content; + } + + return null; + } + /** * Extract the cells of a row. * diff --git a/src/Extension/SpanDescriptors.php b/src/Extension/SpanDescriptors.php new file mode 100644 index 00000000..d5c64315 --- /dev/null +++ b/src/Extension/SpanDescriptors.php @@ -0,0 +1,66 @@ + + */ + protected array $descriptors = []; + + /** + * Add an origin cell at the given effective column and return its index. + */ + public function add(ListItem $cell, int $col): int + { + $index = count($this->descriptors); + $this->descriptors[$index] = [ + 'cell' => $cell, + 'col' => $col, + 'rowspan' => 1, + 'colspan' => 1, + ]; + + return $index; + } + + /** + * Grow the colspan of the descriptor at the given index by one. + */ + public function growColspan(int $index): void + { + $this->descriptors[$index]['colspan']++; + } + + /** + * Grow the rowspan of the descriptor at the given index by one. + */ + public function growRowspan(int $index): void + { + $this->descriptors[$index]['rowspan']++; + } + + /** + * Get the descriptor at the given index. + * + * @return array{cell: \Djot\Node\Block\ListItem, col: int, rowspan: int, colspan: int} + */ + public function get(int $index): array + { + return $this->descriptors[$index]; + } +} diff --git a/tests/TestCase/Extension/ListTableExtensionTest.php b/tests/TestCase/Extension/ListTableExtensionTest.php index ca17e09d..c68ac722 100644 --- a/tests/TestCase/Extension/ListTableExtensionTest.php +++ b/tests/TestCase/Extension/ListTableExtensionTest.php @@ -294,6 +294,320 @@ public function testRowWithoutCellListDefersAndKeepsContent(): void $this->assertStringNotContainsString('', + ' ', + ' EMEA10', + ' 14', + ' ', + '', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testColspanWithSingleLessThan(): void + { + // X spans two columns: the cell to its right is a lone `<`. + $djot = implode("\n", [ + '::: list-table', + '- - A', + ' - B', + ' - C', + '- - X', + ' - <', + ' - Z', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + ' ', + '
    ABC
    XZ
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testColspanWithTwoLessThan(): void + { + // Total spans all three columns: two trailing `<` cells. + $djot = implode("\n", [ + '::: list-table', + '- - A', + ' - B', + ' - C', + '- - Total', + ' - <', + ' - <', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + ' ', + '
    ABC
    Total
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testSalesExampleWithRowspanAndColspan(): void + { + $djot = implode("\n", [ + '{caption="Sales" header-rows=1}', + '::: list-table', + '- - Region', + ' - Q1', + ' - Q2', + '- - EMEA', + ' - 10', + ' - 12', + '- - ^', + ' - 14', + ' - 16', + '- - Total', + ' - <', + ' - <', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + ' ', + '
    Sales
    RegionQ1Q2
    EMEA1012
    1416
    Total
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testCombinedRowspanAndColspan(): void + { + // C spans two rows; X spans two columns in the row that also rowspans C. + $djot = implode("\n", [ + '::: list-table', + '- - A', + ' - B', + ' - C', + '- - X', + ' - <', + ' - ^', + '- - P', + ' - Q', + ' - R', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + ' ', + ' ', + '
    ABC
    X
    PQR
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testEscapedMarkersAreKeptLiteralNotSpans(): void + { + // An escaped `\^` / `\<` (and an attributed marker) is literal content, + // never a span marker. + $djot = implode("\n", [ + '::: list-table', + '- - A', + ' - B', + '- - \\^', + ' - \\<', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + ' ', + '
    AB
    ^<
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testListTableSpanHtmlMatchesEquivalentPipeTable(): void + { + // The span resolution must produce the same rowspan/colspan markup the + // native pipe table emits for the equivalent input. + $listTable = implode("\n", [ + '::: list-table', + '- - A', + ' - B', + ' - C', + '- - X', + ' - <', + ' - ^', + '- - P', + ' - Q', + ' - R', + ':::', + ]); + + $pipeTable = implode("\n", [ + '| A | B | C |', + '|---|---|---|', + '| X | < | ^ |', + '| P | Q | R |', + ]); + + $converter = new DjotConverter(); + $converter->addExtension(new ListTableExtension()); + $listHtml = trim($converter->convert($listTable)); + + $pipeConverter = new DjotConverter(); + $pipeHtml = trim($pipeConverter->convert($pipeTable)); + + // Normalize both to the bare cell markup (tag + span attributes + text), + // dropping whitespace, thead/tbody grouping and the th/td distinction + // that the list-table header conventions add on top. + $normalize = static function (string $html): string { + $html = preg_replace('/<\/?(table|thead|tbody|caption)[^>]*>/', '', $html) ?? $html; + $html = preg_replace('/\s+/', '', $html) ?? $html; + $html = str_replace([''], [''], $html); + + return $html; + }; + + $this->assertSame($normalize($pipeHtml), $normalize($listHtml)); + } + + public function testRowspanOverColspanReservesTheWholeRectangle(): void + { + // A colspan=2 cell that also spans two rows needs a `^` under each of + // its columns; the result matches the equivalent pipe table. + $djot = implode("\n", [ + '::: list-table', + '- - A', + ' - <', + ' - C', + '- - ^', + ' - ^', + ' - D', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + ' ', + '
    AC
    D
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + + // Same span markup as the equivalent pipe table. + $pipe = implode("\n", [ + '| A | < | C |', + '|---|---|---|', + '| ^ | ^ | D |', + ]); + $pipeHtml = trim((new DjotConverter())->convert($pipe)); + $this->assertStringContainsString('A', $pipeHtml); + } + + public function testRowspanAttachesToCellDirectlyAboveNotAcrossRaggedGaps(): void + { + // The middle row is short; its second column is padded with an empty + // cell. A `^` in the next row must extend THAT padded cell, never jump + // up to `B` two rows above - matching the equivalent pipe table. + $djot = implode("\n", [ + '::: list-table', + '- - A', + ' - B', + '- - C', + '- - X', + ' - ^', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + ' ', + ' ', + '
    AB
    C
    X
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testConsecutiveLeadingColspanMarkersEachBecomeEmptyCell(): void + { + // A run of leading `<` with no cell to the left yields one empty cell + // per marker (they never merge into each other), matching the pipe + // table's leading-marker fallback. + $djot = implode("\n", [ + '::: list-table', + '- - <', + ' - <', + ' - A', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + '
    A
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testNoSpanMarkersIsUnchanged(): void + { + // A table with no span markers must render exactly as before. + $djot = implode("\n", [ + '::: list-table', + '- - A', + ' - B', + '- - C', + ' - D', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + ' ', + '
    AB
    CD
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + public function testSiblingClassIsPreservedOnTable(): void { $djot = implode("\n", [ From b6ef1798888ea94a066dacf66d1e1f8158ba1973 Mon Sep 17 00:00:00 2001 From: Mark Scherer Date: Sat, 20 Jun 2026 14:59:07 +0200 Subject: [PATCH 3/3] Fix list-table edge cases: attributed cell, defer mutation, header rowspan Three edge-case defects in the list-table extension: - An attributed cell (e.g. -{.x} ^) was still treated as a span marker because the marker check only looked at the paragraph's attributes, not the cell list item's. The class and the literal ^ were dropped and the neighbor wrongly gained a rowspan. spanMarker() now returns null when the cell carries its own attributes, and the cell attributes are emitted onto the / with the same safe-mode filtering the core renderer applies. - A malformed list-table that defers to the default div renderer could duplicate user content: extractCells() appended a stray trailing block to the previous cell before the defer decision was made, leaving the mutation on the AST. Cells and pending appends are now collected without mutating; the appends are applied only once every row validates and the div is claimed, so a deferred render is byte-identical to the plain div. - A rowspan authored across the header/body boundary emitted a cell inside with a rowspan reaching into , which browsers misrender. The rowspan is now clamped at the boundary: a ^ in the first body row whose origin lives in the header rows degrades to a fresh empty body cell, and the header cell keeps its rowspan within the header rows. Rowspans entirely within the header or entirely within the body are unaffected. --- docs/extensions/index.md | 24 ++- src/Extension/ListTableExtension.php | 123 +++++++++++++--- src/Extension/SpanDescriptors.php | 18 ++- .../Extension/ListTableExtensionTest.php | 138 ++++++++++++++++++ 4 files changed, 274 insertions(+), 29 deletions(-) diff --git a/docs/extensions/index.md b/docs/extensions/index.md index 4ea91ec9..7c17a3ac 100644 --- a/docs/extensions/index.md +++ b/docs/extensions/index.md @@ -754,10 +754,26 @@ item) whose sole inline content is: Spans are written as continuations: a `colspan=3` is the origin cell followed by two `<` cells; a `rowspan=N` is the origin cell followed by `N - 1` `^` cells in -the rows below it. To keep a literal `^` or `<` as cell text, escape it (`\^`, -`\<`) or attach an attribute - an attributed cell is never treated as a span -marker. A `^` with no cell above it, or a leading `<` with no cell to its left, -degrades to an empty cell rather than being dropped. +the rows below it. + +To keep a literal `^` or `<` as cell text, **escape it** (`\^`, `\<`) or **give +the cell its own attribute** - an attributed cell is never treated as a span +marker. The attribute can sit on the cell's list item, e.g. `-{.note} ^` renders +as a literal `^` (the `class` is preserved, the `^` is kept +verbatim), not a rowspan. + +A span marker with no cell to merge into renders as an **empty cell** rather than +being dropped: a `^` in the first row (no cell above), a `<` in the first column +(no cell to the left), or a `<` adjacent to a cell already pulled in by a rowspan +all become an empty ``. A row whose every cell is a span marker emits an empty +`` - valid, if cosmetically odd. + +A `rowspan` is **clamped at the ``/`` boundary**: a `^` in the first +body row whose origin lives in the header rows does not extend that header cell +down into the body (HTML cells cannot reliably span across row groups - browsers +misrender). The `^` degrades to a fresh empty body cell instead, and the header +cell keeps its rowspan within the header rows. A rowspan that lives entirely +inside the header rows, or entirely inside the body, is unaffected. **Input:** ```djot diff --git a/src/Extension/ListTableExtension.php b/src/Extension/ListTableExtension.php index ed53b88d..cbc35302 100644 --- a/src/Extension/ListTableExtension.php +++ b/src/Extension/ListTableExtension.php @@ -126,29 +126,48 @@ protected function renderListTable(Div $node, HtmlRenderer $renderer): ?string // ListBlock children, in document order. djot-php yields exactly one // inner list per row, but the flatten-all-inner-lists path stays for // robustness. + // + // Validate EVERY row before mutating the AST. `extractCells` collects any + // stray block that should move into the previous cell as a deferred + // append rather than applying it immediately - so a later row that forces + // a defer leaves the original tree untouched. Otherwise the default div + // renderer would render that stray block twice (once in its original + // position, once inside the cell it was appended to), duplicating the + // user's content for a malformed `list-table`. $rows = []; + $pendingAppends = []; foreach ($outerList->getChildren() as $rowItem) { if (!$rowItem instanceof ListItem) { continue; } - $cells = $this->extractCells($rowItem); + [$cells, $appends] = $this->extractCells($rowItem); // A row without an inner cell list (e.g. `- Row label` with direct // content rather than nested `- - cell` items) yields no cells. Such // a structure is not a clean table; defer to the default div so the - // row's content is never silently dropped into an empty ``. + // row's content is never silently dropped into an empty ``. We + // return BEFORE applying any pending append, leaving the AST intact. if ($cells === []) { return null; } $rows[] = $cells; + foreach ($appends as $append) { + $pendingAppends[] = $append; + } } if ($rows === []) { return null; } + // All rows are valid - we will claim the div. Now it is safe to move the + // stray blocks into their cells; the default renderer will never run. + foreach ($pendingAppends as [$cell, $block]) { + $cell->appendChild($block); + } + $headerRows = max(0, (int)($node->getAttribute('header-rows') ?? '0')); $headerCols = max(0, (int)($node->getAttribute('header-cols') ?? '0')); @@ -156,7 +175,9 @@ protected function renderListTable(Div $node, HtmlRenderer $renderer): ?string // reusing the same continuation semantics as native pipe tables. Each // placed entry carries the origin cell plus its resolved span and the // effective column it starts at; marker cells are consumed (omitted). - [$grid] = $this->resolveSpans($rows); + // `headerRows` is passed so a rowspan never crosses the thead/tbody + // boundary (HTML cells cannot reliably span row groups). + [$grid] = $this->resolveSpans($rows, $headerRows); $lines = []; @@ -177,8 +198,9 @@ protected function renderListTable(Div $node, HtmlRenderer $renderer): ?string if ($placed['colspan'] > 1) { $spanAttrs .= ' colspan="' . $placed['colspan'] . '"'; } + $cellAttrs = $this->renderCellAttributes($placed['cell'], $renderer); $content = $this->renderCell($placed['cell'], $renderer); - $html .= '<' . $tag . $spanAttrs . '>' . $content . ''; + $html .= '<' . $tag . $cellAttrs . $spanAttrs . '>' . $content . ''; } return '' . $html . ''; @@ -226,11 +248,18 @@ protected function renderListTable(Div $node, HtmlRenderer $renderer): ?string * effective width of the widest row. Short rows are padded with empty cells * so the grid stays rectangular (no content dropped). * + * A rowspan is clamped at the `$headerRows` boundary: a `^` in the first + * body row whose origin lives in the header rows does NOT extend that header + * cell into the body. HTML cells cannot reliably span across `` / + * `` (browsers misrender), so the `^` degrades to a fresh empty body + * cell instead and the header cell's rowspan stays within the header rows. + * * @param array> $rows + * @param int $headerRows * * @return array{0: array>, 1: int} */ - protected function resolveSpans(array $rows): array + protected function resolveSpans(array $rows, int $headerRows = 0): array { // Flat list of origin descriptors. Each is referenced from the grid by // its integer index, so the rowspan/colspan mutations below stay on a @@ -264,7 +293,19 @@ protected function resolveSpans(array $rows): array foreach ($cells as $cell) { $marker = $this->spanMarker($cell); - if ($marker === '^' && isset($grid[$rowIndex - 1][$col])) { + // Clamp a rowspan at the header/body boundary. A `^` in the first + // body row whose origin lives in the header rows would extend a + // `` cell down into ``; HTML cannot reliably span a + // cell across row groups (browsers misrender), so here the `^` + // is NOT a rowspan - it degrades to a fresh empty body cell, and + // the header cell keeps its rowspan within the header rows. + $crossesHeaderBoundary = false; + if ($marker === '^' && isset($grid[$rowIndex - 1][$col]) && $rowIndex >= $headerRows) { + $originAbove = $descriptors->get($grid[$rowIndex - 1][$col]); + $crossesHeaderBoundary = $originAbove['row'] < $headerRows; + } + + if ($marker === '^' && !$crossesHeaderBoundary && isset($grid[$rowIndex - 1][$col])) { // Rowspan: the descriptor directly above (which may itself be // a colspan origin) extends down into this row. A marker maps // 1:1 to a source column - it advances the cursor by one and @@ -313,12 +354,13 @@ protected function resolveSpans(array $rows): array continue; } - // A normal cell, a leading `<` with no left neighbor, or a `^` - // with no cell above: the latter two degrade to an empty cell. - // A degraded marker is NOT a colspan target, so a run of leading - // `<` yields one empty cell each (pipe-table parity). + // A normal cell, a leading `<` with no left neighbor, a `^` with + // no cell above, or a `^` clamped at the header/body boundary: + // the markers degrade to an empty cell. A degraded marker is NOT + // a colspan target, so a run of leading `<` yields one empty cell + // each (pipe-table parity). $isEmpty = $marker !== null; - $index = $descriptors->add($isEmpty ? $this->emptyCell() : $cell, $col); + $index = $descriptors->add($isEmpty ? $this->emptyCell() : $cell, $rowIndex, $col); $grid[$rowIndex][$col] = $index; $rowOrigins[$rowIndex][] = $index; $lastOriginIndex = $isEmpty ? null : $index; @@ -356,7 +398,7 @@ protected function padRow(SpanDescriptors $descriptors, array &$grid, array &$ro continue; } - $index = $descriptors->add($this->emptyCell(), $col); + $index = $descriptors->add($this->emptyCell(), $rowIndex, $col); $grid[$rowIndex][$col] = $index; $rowOrigins[$rowIndex][] = $index; } @@ -424,9 +466,21 @@ protected function emptyCell(): ListItem * node equal to the marker. Anything else (escaped `\^`/`\<` parses to an * EscapedText node, an attribute wraps the text in a Span) is not a marker * and returns null, so the literal `^`/`<` content is kept. + * + * A cell that carries its OWN attributes (authored `-{.x} ^`, where the + * attribute lands on the cell's list item, not its paragraph) is never a + * span marker either - the documented escape rule keeps the literal `^`/`<` + * content and the cell's attributes. */ protected function spanMarker(ListItem $cell): ?string { + // An attributed cell is literal content, never a span marker. The + // attribute sits on the list item itself (e.g. `-{.x} ^`), so the + // paragraph below may still look like a bare marker - check here first. + if ($cell->getAttributes() !== []) { + return null; + } + $children = $cell->getChildren(); if (count($children) !== 1) { return null; @@ -456,14 +510,20 @@ protected function spanMarker(ListItem $cell): ?string * A row like `- - A` / ` - B` parses to the outer item holding ONE inner * ListBlock whose items are the cells. The flatten-all-inner-lists loop * keeps multiple inner lists working too. Any non-list block sibling (e.g. - * a trailing paragraph the parser left outside the inner list) is appended - * to the most recently opened cell so multi-block content is never dropped. + * a trailing paragraph the parser left outside the inner list) belongs to + * the most recently opened cell so multi-block content is never dropped. * - * @return array<\Djot\Node\Block\ListItem> + * This method does NOT mutate the AST: it returns the cells plus a list of + * pending `[cell, block]` appends. The caller applies them only once it has + * decided to claim the div, so a deferred render leaves the tree untouched + * (no duplicated content). See `renderListTable`. + * + * @return array{0: array<\Djot\Node\Block\ListItem>, 1: array} */ protected function extractCells(ListItem $rowItem): array { $cells = []; + $appends = []; foreach ($rowItem->getChildren() as $child) { if ($child instanceof ListBlock) { foreach ($child->getChildren() as $cellItem) { @@ -476,12 +536,13 @@ protected function extractCells(ListItem $rowItem): array } // A stray block following the inner list belongs to the last cell. + // Record it; the caller applies the move only if the div is claimed. if ($cells !== []) { - $cells[count($cells) - 1]->appendChild($child); + $appends[] = [$cells[count($cells) - 1], $child]; } } - return $cells; + return [$cells, $appends]; } /** @@ -555,6 +616,34 @@ protected function renderTableAttributes(Div $node, HtmlRenderer $renderer): str return $html; } + /** + * Build the per-cell attributes for a ``/``. + * + * A cell authored with its own attributes (`-{.x} ^`, `-{#id} value`) emits + * them onto the cell tag, in source order, with the same safe-mode filtering + * the core renderer applies. The structural span attributes (`rowspan` / + * `colspan`) are added separately by the caller and are not part of this. + */ + protected function renderCellAttributes(ListItem $cell, HtmlRenderer $renderer): string + { + $attrs = $cell->getAttributes(); + if ($attrs === []) { + return ''; + } + + $safeMode = $renderer->getSafeMode(); + if ($safeMode !== null) { + $attrs = $safeMode->filterAttributes($attrs); + } + + $html = ''; + foreach ($attrs as $key => $value) { + $html .= ' ' . $this->escapeHtml((string)$key) . '="' . $renderer->escapeAttribute((string)$value) . '"'; + } + + return $html; + } + /** * Escape text for HTML content (caption / attribute names). * diff --git a/src/Extension/SpanDescriptors.php b/src/Extension/SpanDescriptors.php index d5c64315..f7cba718 100644 --- a/src/Extension/SpanDescriptors.php +++ b/src/Extension/SpanDescriptors.php @@ -10,26 +10,28 @@ * A flat, mutable list of placed table cells used while the * {@see \Djot\Extension\ListTableExtension} resolves `^` / `<` span markers. * - * Each descriptor records the origin cell (a list item), the effective column - * it starts at, and its resolved rowspan / colspan. Keeping them in one typed - * list - referenced from the grid by integer index - lets span markers grow an - * earlier cell's span without losing the descriptor's array shape. + * Each descriptor records the origin cell (a list item), the row and effective + * column it starts at, and its resolved rowspan / colspan. Keeping them in one + * typed list - referenced from the grid by integer index - lets span markers + * grow an earlier cell's span without losing the descriptor's array shape. The + * origin row lets the renderer clamp a rowspan at the header/body boundary. */ class SpanDescriptors { /** - * @var array + * @var array */ protected array $descriptors = []; /** - * Add an origin cell at the given effective column and return its index. + * Add an origin cell at the given row and effective column; return its index. */ - public function add(ListItem $cell, int $col): int + public function add(ListItem $cell, int $row, int $col): int { $index = count($this->descriptors); $this->descriptors[$index] = [ 'cell' => $cell, + 'row' => $row, 'col' => $col, 'rowspan' => 1, 'colspan' => 1, @@ -57,7 +59,7 @@ public function growRowspan(int $index): void /** * Get the descriptor at the given index. * - * @return array{cell: \Djot\Node\Block\ListItem, col: int, rowspan: int, colspan: int} + * @return array{cell: \Djot\Node\Block\ListItem, row: int, col: int, rowspan: int, colspan: int} */ public function get(int $index): array { diff --git a/tests/TestCase/Extension/ListTableExtensionTest.php b/tests/TestCase/Extension/ListTableExtensionTest.php index c68ac722..e86f08a4 100644 --- a/tests/TestCase/Extension/ListTableExtensionTest.php +++ b/tests/TestCase/Extension/ListTableExtensionTest.php @@ -608,6 +608,144 @@ public function testNoSpanMarkersIsUnchanged(): void $this->assertSame($expected, $this->render($djot)); } + public function testAttributedCellIsNeverASpanMarkerAndKeepsItsAttribute(): void + { + // A cell authored `-{.x} ^` carries an attribute on its list item; that + // makes it literal content, never a rowspan marker. The neighbor above + // must NOT gain a rowspan, the literal `^` must stay, and the `.x` + // attribute must be preserved on the cell. + $djot = implode("\n", [ + '::: list-table', + '- - A', + ' - B', + '- -{.x} ^', + ' - C', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + ' ', + '
    AB
    ^C
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testMalformedDeferDoesNotDuplicateStrayContent(): void + { + // A row whose last cell has a stray block appended, followed by a later + // row that forces a defer (no inner cell list). The extension must NOT + // have mutated the AST before deferring; otherwise the default div + // renderer renders the stray block twice. The claimed-extension output + // must be byte-identical to the plain div the converter produces with + // no extension registered. + $djot = implode("\n", [ + '::: list-table', + '- - A', + ' - B', + '', + ' Stray.', + '- Row label only', + ':::', + ]); + + $withExtension = $this->render($djot); + + $plain = trim((new DjotConverter())->convert($djot)); + + $this->assertSame($plain, $withExtension); + // Sanity: the stray block appears exactly once, not duplicated. + $this->assertSame(1, substr_count($withExtension, '

    Stray.

    ')); + } + + public function testHeaderRowRowspanDoesNotCrossIntoBody(): void + { + // A header-row cell with a `^` below it would span from into + // ; HTML cannot reliably span a cell across row groups. The + // rowspan is clamped: the header cell stays a plain and the body + // row gets a fresh empty cell in that column. + $djot = implode("\n", [ + '{header-rows=1}', + '::: list-table', + '- - H', + ' - X', + '- - ^', + ' - Y', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + ' ', + '
    HX
    Y
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + + // The header cell must not carry a rowspan reaching into the body. + $this->assertStringNotContainsString('rowspan', $this->render($djot)); + } + + public function testRowspanWithinHeaderRowsIsKept(): void + { + // A rowspan that lives entirely inside the header rows is valid and + // must be preserved (it never crosses into ). + $djot = implode("\n", [ + '{header-rows=2}', + '::: list-table', + '- - H', + ' - X', + '- - ^', + ' - Y', + '- - B1', + ' - B2', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + ' ', + '
    HX
    Y
    B1B2
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + + public function testRowspanWithinBodyIsKeptWithHeaderRows(): void + { + // A rowspan that lives entirely inside the body is unaffected by the + // header/body clamp. + $djot = implode("\n", [ + '{header-rows=1}', + '::: list-table', + '- - H1', + ' - H2', + '- - A', + ' - B', + '- - ^', + ' - C', + ':::', + ]); + + $expected = implode("\n", [ + '', + ' ', + ' ', + ' ', + ' ', + ' ', + '
    H1H2
    AB
    C
    ', + ]); + $this->assertSame($expected, $this->render($djot)); + } + public function testSiblingClassIsPreservedOnTable(): void { $djot = implode("\n", [