From 391805cbce04ca18ceebbfb863f3110d37c8161c Mon Sep 17 00:00:00 2001 From: Mark Scherer Date: Tue, 16 Jun 2026 13:54:32 +0200 Subject: [PATCH] Fix renderer content loss in non-HTML renderers (markdown/plain/ansi) Content vanished in the markdown / plain / ansi renderers (HTML was fine): - EscapedText (e.g. an escaped literal like a backslash-star) was dropped in all three: the node is not a Text subclass, so it fell through to renderChildren() and produced nothing. Added an explicit arm in each renderer. Markdown keeps the leading backslash so the literal stays literal when re-parsed; plain/ansi emit the raw content. - MarkdownRenderer had no Figure / Caption arms, so a captioned image glued the caption to the image; and no Abbreviation arm, so the title was lost. Added renderFigure (caption on its own block), renderCaption, and renderAbbreviation (inline abbr HTML, mirroring how sub/sup fall back to inline HTML). - A Div's title attribute (Djot attribute syntax title="...") was dropped in all three non-HTML renderers' renderDiv. Preserve it as a leading bold/plain line. Ported from carve-php commit 217b72f. --- src/Renderer/AnsiRenderer.php | 13 +++- src/Renderer/MarkdownRenderer.php | 67 ++++++++++++++++++- src/Renderer/PlainTextRenderer.php | 17 +++++ .../Renderer/RendererContentLossTest.php | 54 +++++++++++++++ 4 files changed, 148 insertions(+), 3 deletions(-) create mode 100644 tests/TestCase/Renderer/RendererContentLossTest.php diff --git a/src/Renderer/AnsiRenderer.php b/src/Renderer/AnsiRenderer.php index 4c51b80c..811380e1 100644 --- a/src/Renderer/AnsiRenderer.php +++ b/src/Renderer/AnsiRenderer.php @@ -30,6 +30,7 @@ use Djot\Node\Inline\Code; use Djot\Node\Inline\Delete; use Djot\Node\Inline\Emphasis; +use Djot\Node\Inline\EscapedText; use Djot\Node\Inline\FootnoteRef; use Djot\Node\Inline\HardBreak; use Djot\Node\Inline\Highlight; @@ -403,6 +404,7 @@ protected function renderNode(Node $node): string $node instanceof LineBlock => $this->renderLineBlock($node), $node instanceof Footnote => $this->renderFootnote($node), $node instanceof Text => $node->getContent(), + $node instanceof EscapedText => $node->getContent(), $node instanceof Abbreviation => $this->renderAbbreviation($node), $node instanceof Emphasis => $this->renderEmphasis($node), $node instanceof Strong => $this->renderStrong($node), @@ -608,7 +610,16 @@ protected function renderThematicBreak(): string protected function renderDiv(Div $node): string { - return $this->renderChildren($node); + $body = $this->renderChildren($node); + // A Div's quoted title (e.g. an admonition title carried as the `title` + // attribute) is preserved as a leading bold line instead of being + // dropped. + $title = $node->getAttribute('title'); + if (is_string($title) && $title !== '') { + return $this->style($title, self::BOLD) . "\n\n" . $body; + } + + return $body; } protected function renderTable(Table $node): string diff --git a/src/Renderer/MarkdownRenderer.php b/src/Renderer/MarkdownRenderer.php index 8c7cd940..0514f801 100644 --- a/src/Renderer/MarkdownRenderer.php +++ b/src/Renderer/MarkdownRenderer.php @@ -6,12 +6,14 @@ use Djot\Event\RenderEvent; use Djot\Node\Block\BlockQuote; +use Djot\Node\Block\Caption; use Djot\Node\Block\CodeBlock; use Djot\Node\Block\Comment; use Djot\Node\Block\DefinitionDescription; use Djot\Node\Block\DefinitionList; use Djot\Node\Block\DefinitionTerm; use Djot\Node\Block\Div; +use Djot\Node\Block\Figure; use Djot\Node\Block\Footnote; use Djot\Node\Block\Heading; use Djot\Node\Block\LineBlock; @@ -24,9 +26,11 @@ use Djot\Node\Block\TableRow; use Djot\Node\Block\ThematicBreak; use Djot\Node\Document; +use Djot\Node\Inline\Abbreviation; use Djot\Node\Inline\Code; use Djot\Node\Inline\Delete; use Djot\Node\Inline\Emphasis; +use Djot\Node\Inline\EscapedText; use Djot\Node\Inline\FootnoteRef; use Djot\Node\Inline\HardBreak; use Djot\Node\Inline\Highlight; @@ -140,6 +144,14 @@ protected function renderNode(Node $node): string $node instanceof LineBlock => $this->renderLineBlock($node), $node instanceof Footnote => $this->renderFootnote($node), $node instanceof Text => $this->escapeText($node->getContent()), + // Keep the backslash so the literal stays literal when re-parsed as + // Markdown: a bare `.` from `\.` would turn `1\. x` back into an + // ordered list. EscapedText only ever holds escaped ASCII + // punctuation, all of which CommonMark allows a `\` before. + $node instanceof EscapedText => '\\' . $node->getContent(), + $node instanceof Figure => $this->renderFigure($node), + $node instanceof Caption => $this->renderCaption($node), + $node instanceof Abbreviation => $this->renderAbbreviation($node), $node instanceof Emphasis => $this->renderEmphasis($node), $node instanceof Strong => $this->renderStrong($node), $node instanceof Code => $this->renderCode($node), @@ -287,8 +299,17 @@ protected function renderDefinitionDescription(DefinitionDescription $node): str protected function renderDiv(Div $node): string { - // Divs don't exist in Markdown, just render content - return $this->renderChildren($node); + // Divs/admonitions don't exist in Markdown; render the content. A Div's + // quoted title (e.g. an admonition title carried as the `title` + // attribute) would otherwise be lost - preserve it as a leading bold + // line. + $body = $this->renderChildren($node); + $title = $node->getAttribute('title'); + if (is_string($title) && $title !== '') { + return '**' . $this->escapeText($title) . "**\n\n" . $body; + } + + return $body; } protected function renderTable(Table $node): string @@ -475,6 +496,48 @@ protected function renderRawInline(RawInline $node): string return ''; } + /** + * A figure renders its target then its caption as a separate block + * (Markdown has no figure element). A BLANK line before the caption is + * required, not just a newline: against a block-quote target a single + * newline would make the caption a lazy continuation of the quote and + * swallow it. + */ + protected function renderFigure(Figure $node): string + { + $output = ''; + foreach ($node->getChildren() as $child) { + if ($child instanceof Caption) { + $output = rtrim($output) . "\n\n" . $this->renderCaption($child); + } else { + $output .= $this->renderNode($child); + } + } + + return $output; + } + + protected function renderCaption(Caption $node): string + { + return trim($this->renderChildren($node)) . "\n\n"; + } + + /** + * Markdown has no abbreviation syntax; emit inline abbr HTML so the title + * is preserved (mirrors how subscript/superscript fall back to inline HTML). + */ + protected function renderAbbreviation(Abbreviation $node): string + { + // The whole element is raw inline HTML, so both the title (attribute) + // and the text (element content) need HTML escaping, NOT Markdown text + // escaping: a `"` in the title or a `<` in the text would otherwise + // break the tag / be misparsed as markup downstream. + $title = htmlspecialchars($node->getTitle(), ENT_QUOTES, 'UTF-8'); + $text = htmlspecialchars($this->renderChildren($node), ENT_QUOTES, 'UTF-8'); + + return '' . $text . ''; + } + protected function escapeText(string $text): string { // Escape special Markdown characters in text diff --git a/src/Renderer/PlainTextRenderer.php b/src/Renderer/PlainTextRenderer.php index 450cfb74..b5f150a2 100644 --- a/src/Renderer/PlainTextRenderer.php +++ b/src/Renderer/PlainTextRenderer.php @@ -11,6 +11,7 @@ use Djot\Node\Block\DefinitionDescription; use Djot\Node\Block\DefinitionList; use Djot\Node\Block\DefinitionTerm; +use Djot\Node\Block\Div; use Djot\Node\Block\Footnote; use Djot\Node\Block\Heading; use Djot\Node\Block\LineBlock; @@ -25,6 +26,7 @@ use Djot\Node\Document; use Djot\Node\Inline\Code; use Djot\Node\Inline\Delete; +use Djot\Node\Inline\EscapedText; use Djot\Node\Inline\FootnoteRef; use Djot\Node\Inline\HardBreak; use Djot\Node\Inline\Image; @@ -114,6 +116,7 @@ protected function renderNode(Node $node): string return match (true) { $node instanceof Document => $this->renderChildren($node), + $node instanceof Div => $this->renderDiv($node), $node instanceof Paragraph => $this->renderParagraph($node), $node instanceof Heading => $this->renderHeading($node), $node instanceof CodeBlock => $this->renderCodeBlock($node), @@ -132,6 +135,7 @@ protected function renderNode(Node $node): string $node instanceof LineBlock => $this->renderLineBlock($node), $node instanceof Footnote => $this->renderFootnote($node), $node instanceof Text => $node->getContent(), + $node instanceof EscapedText => $node->getContent(), $node instanceof Code => $node->getContent(), $node instanceof Math => $node->getContent(), $node instanceof Image => $node->getAlt(), @@ -161,6 +165,19 @@ protected function renderParagraph(Paragraph $node): string return $this->renderChildren($node) . "\n\n"; } + protected function renderDiv(Div $node): string + { + $body = $this->renderChildren($node); + // A Div's quoted title (e.g. an admonition title carried as the `title` + // attribute) is preserved as a leading line instead of being dropped. + $title = $node->getAttribute('title'); + if (is_string($title) && $title !== '') { + return $title . "\n\n" . $body; + } + + return $body; + } + protected function renderHeading(Heading $node): string { return $this->renderChildren($node) . "\n\n"; diff --git a/tests/TestCase/Renderer/RendererContentLossTest.php b/tests/TestCase/Renderer/RendererContentLossTest.php new file mode 100644 index 00000000..f2df27d2 --- /dev/null +++ b/tests/TestCase/Renderer/RendererContentLossTest.php @@ -0,0 +1,54 @@ +convert('a \*lit\* b'); + $this->assertStringContainsString('lit', $md); + $this->assertStringContainsString('*', $md); + + $plain = DjotConverter::plainText()->convert('a \*lit\* b'); + $this->assertStringContainsString('*lit*', $plain); + + $ansi = DjotConverter::ansi()->convert('a \*lit\* b'); + $this->assertStringContainsString('*lit*', $ansi); + } + + public function testAbbreviationTitlePreservedInMarkdown(): void + { + $md = DjotConverter::markdown()->convert("The HTML spec.\n\n*[HTML]: HyperText Markup Language"); + $this->assertStringContainsString('HyperText Markup Language', $md); + } + + public function testFigureCaptionNotGluedInMarkdown(): void + { + $md = DjotConverter::markdown()->convert("![a](i.png)\n^ Cap text"); + // caption sits on its own line, not glued to the image + $this->assertStringNotContainsString('i.png)Cap', $md); + $this->assertStringContainsString('Cap text', $md); + } + + public function testDivTitlePreservedInNonHtmlRenderers(): void + { + // A Div carries a `title` attribute via Djot's attribute syntax; the + // non-HTML renderers must surface it as a leading line, not drop it. + $src = "{title=\"Heads up\"}\n:::\nbody\n:::"; + $this->assertStringContainsString('Heads up', DjotConverter::markdown()->convert($src)); + $this->assertStringContainsString('Heads up', DjotConverter::plainText()->convert($src)); + $this->assertStringContainsString('Heads up', DjotConverter::ansi()->convert($src)); + } +}