From ccdb11186cbb10f1d253c58770cb9b2fbf4b0c97 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 11 Jun 2026 23:40:58 +0200 Subject: [PATCH] HTML API: Preserve XMP raw text serialization --- .../html-api/class-wp-html-processor.php | 1 + .../html-api/wpHtmlProcessor-serialize.php | 25 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 35d91fad3129c..b05da8a80e99d 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1498,6 +1498,7 @@ public function serialize_token(): string { case 'SCRIPT': case 'STYLE': + case 'XMP': break; default: diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php index e516addb6c314..1aa367fe7283a 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php @@ -134,6 +134,30 @@ public function test_style_contents_are_not_escaped() { ); } + /** + * Ensures that XMP contents are not escaped, as they are not parsed like text nodes are. + * + * XMP contents are parsed as raw text: character references are never decoded. + * Escaping the contents would change the document, e.g. a "<" would be replaced + * by the literal text "<" after serializing and re-parsing. + * + * @ticket 65372 + */ + public function test_xmp_contents_are_not_escaped() { + $normalized = WP_HTML_Processor::normalize( "1 < 2 &amp; apples > or\x00anges" ); + + $this->assertSame( + "1 < 2 &amp; apples > or\u{FFFD}anges", + $normalized, + 'Should have preserved text inside an XMP element, except for replacing NULL bytes.' + ); + $this->assertSame( + $normalized, + WP_HTML_Processor::normalize( $normalized ), + 'Normalizing already-normalized XMP should not escape the raw text again.' + ); + } + public function test_unexpected_closing_tags_are_removed() { $this->assertSame( WP_HTML_Processor::normalize( 'onetwothree' ), @@ -281,6 +305,7 @@ public static function data_tokens_with_null_bytes() { 'Foreign content text' => array( "one\x00two", "one\u{FFFD}two" ), 'SCRIPT content' => array( "", "" ), 'STYLE content' => array( "", "" ), + 'XMP content' => array( "a\x00b", "a\u{FFFD}b" ), 'Comment text' => array( "", "" ), ); }