From 14926c4cc991c4d9bc9e39e00d97f7d5830baf79 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 10 Jun 2026 11:06:38 +0200 Subject: [PATCH] HTML API: Limit leading newline serialization to HTML elements --- .../html-api/class-wp-html-processor.php | 6 +- .../html-api/wpHtmlProcessor-serialize.php | 82 +++++++++++++++++++ 2 files changed, 85 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 35d91fad3129c..b0d3d53755da1 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1461,8 +1461,8 @@ public function serialize_token(): string { /* * The HTML parser strips a leading newline immediately after the start - * tag of TEXTAREA, PRE, and LISTING elements. When serializing, prepend - * a leading newline to ensure the semantic HTML content is preserved. + * tag of HTML TEXTAREA, PRE, and LISTING elements. When serializing, + * prepend a leading newline to ensure the semantic HTML content is preserved. * * For example, `
\n\nX
` must not become `
\nX
` because its content * has changed. However, `
X
` and `
\nX
` are _equivalent_. @@ -1481,7 +1481,7 @@ public function serialize_token(): string { * * @see https://html.spec.whatwg.org/multipage/parsing.html */ - if ( 'TEXTAREA' === $tag_name || 'PRE' === $tag_name || 'LISTING' === $tag_name ) { + if ( $in_html && ( 'TEXTAREA' === $tag_name || 'PRE' === $tag_name || 'LISTING' === $tag_name ) ) { $html .= "\n"; } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php index e516addb6c314..fb11e414b8201 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php @@ -340,6 +340,88 @@ public function test_normalize_special_leading_newline_handling( string $input, $this->assertEqualHTML( $expected, $normalized_twice ); } + /** + * Ensures that the special leading newline rule applies only in the HTML namespace. + * + * @ticket 64607 + * + * @dataProvider data_provider_special_leading_newline_namespace_serialization + * + * @param string $input HTML input containing a PRE, LISTING, or TEXTAREA element. + * @param string $expected Expected normalized output. + */ + public function test_special_leading_newline_rule_depends_on_namespace( string $input, string $expected ) { + $normalized = WP_HTML_Processor::normalize( $input ); + $this->assertSame( + $expected, + $normalized, + 'Should serialize special leading newlines according to the element namespace.' + ); + $this->assertSame( + $expected, + WP_HTML_Processor::normalize( $normalized ), + 'Normalizing already-normalized special leading newlines should not change them.' + ); + } + + /** + * Data provider. + * + * @return array[] + */ + public static function data_provider_special_leading_newline_namespace_serialization() { + return array( + 'MathML TEXTAREA' => array( + '', + '', + ), + 'MathML TEXTAREA with leading newline' => array( + "", + "", + ), + 'SVG TEXTAREA' => array( + '', + '', + ), + 'SVG TEXTAREA with leading newline' => array( + "", + "", + ), + 'HTML TEXTAREA inside SVG HTML integration point' => array( + '', + "", + ), + 'HTML TEXTAREA with leading newline inside SVG HTML integration point' => array( + "", + "", + ), + 'HTML TEXTAREA inside MathML text integration point' => array( + '', + "", + ), + 'HTML TEXTAREA with leading newline inside MathML text integration point' => array( + "", + "", + ), + 'HTML TEXTAREA inside MathML HTML integration point' => array( + '', + "", + ), + 'HTML TEXTAREA with leading newline inside MathML HTML integration point' => array( + "", + "", + ), + 'HTML PRE after exiting SVG foreign content' => array( + '
X
', + "
\nX
", + ), + 'HTML LISTING after exiting MathML foreign content' => array( + 'X', + "\nX", + ), + ); + } + /** * Ensures that fuzzer-discovered inputs do not emit native PHP errors. *