From 81b255d946e9011cd313339be74057ba587be415 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 11 Jun 2026 23:18:45 +0200 Subject: [PATCH 1/2] HTML API: Preserve raw text contents in serialize. The serializer was discarding the raw-text contents of IFRAME, NOEMBED, and NOFRAMES even though get_modifiable_text() already returns the browser-equivalent raw text for those elements. Let those elements follow the same raw emission path as SCRIPT and STYLE, preserving contents while retaining existing NUL and newline normalization. See #65372. --- .../html-api/class-wp-html-processor.php | 3 -- .../html-api/wpHtmlProcessor-serialize.php | 30 +++++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 35d91fad3129c..1c1499c298a33 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1493,9 +1493,6 @@ public function serialize_token(): string { case 'IFRAME': case 'NOEMBED': case 'NOFRAMES': - $text = ''; - break; - case 'SCRIPT': case 'STYLE': break; diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php index e516addb6c314..7016cff756a4a 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php @@ -134,6 +134,36 @@ public function test_style_contents_are_not_escaped() { ); } + /** + * Ensures that IFRAME, NOEMBED, and NOFRAMES contents are not escaped, as they are not parsed like text nodes are. + * + * @ticket 65372 + * + * @dataProvider data_raw_text_elements_with_unescaped_contents + * + * @param string $tag_name Tag name under test. + */ + public function test_iframe_noembed_noframes_contents_are_not_escaped( string $tag_name ) { + $this->assertSame( + WP_HTML_Processor::normalize( "<{$tag_name}>apples > or\x00anges < p &" ), + "<{$tag_name}>apples > or\u{FFFD}anges < p &", + "Should have preserved text inside an {$tag_name} element, except for replacing NULL bytes." + ); + } + + /** + * Data provider. + * + * @return array[] + */ + public static function data_raw_text_elements_with_unescaped_contents() { + return array( + 'IFRAME' => array( 'iframe' ), + 'NOEMBED' => array( 'noembed' ), + 'NOFRAMES' => array( 'noframes' ), + ); + } + public function test_unexpected_closing_tags_are_removed() { $this->assertSame( WP_HTML_Processor::normalize( 'onetwothree' ), From 2f02318d2095ce4b6e2a974a61ec7e2fb3c165ff Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 12 Jun 2026 11:24:16 +0200 Subject: [PATCH 2/2] HTML API: Expand rawtext serialize coverage --- .../html-api/wpHtmlProcessor-serialize.php | 128 ++++++++++++++---- 1 file changed, 98 insertions(+), 30 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php index 7016cff756a4a..053d8023d4929 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php @@ -134,36 +134,6 @@ public function test_style_contents_are_not_escaped() { ); } - /** - * Ensures that IFRAME, NOEMBED, and NOFRAMES contents are not escaped, as they are not parsed like text nodes are. - * - * @ticket 65372 - * - * @dataProvider data_raw_text_elements_with_unescaped_contents - * - * @param string $tag_name Tag name under test. - */ - public function test_iframe_noembed_noframes_contents_are_not_escaped( string $tag_name ) { - $this->assertSame( - WP_HTML_Processor::normalize( "<{$tag_name}>apples > or\x00anges < p &" ), - "<{$tag_name}>apples > or\u{FFFD}anges < p &", - "Should have preserved text inside an {$tag_name} element, except for replacing NULL bytes." - ); - } - - /** - * Data provider. - * - * @return array[] - */ - public static function data_raw_text_elements_with_unescaped_contents() { - return array( - 'IFRAME' => array( 'iframe' ), - 'NOEMBED' => array( 'noembed' ), - 'NOFRAMES' => array( 'noframes' ), - ); - } - public function test_unexpected_closing_tags_are_removed() { $this->assertSame( WP_HTML_Processor::normalize( 'onetwothree' ), @@ -311,10 +281,108 @@ public static function data_tokens_with_null_bytes() { 'Foreign content text' => array( "one\x00two", "one\u{FFFD}two" ), 'SCRIPT content' => array( "", "" ), 'STYLE content' => array( "", "" ), + 'IFRAME content' => array( "", "" ), + 'NOEMBED content' => array( "a\x00b", "a\u{FFFD}b" ), + 'NOFRAMES content' => array( "a\x00b", "a\u{FFFD}b" ), 'Comment text' => array( "", "" ), ); } + /** + * Ensures that the contents of IFRAME, NOEMBED, and NOFRAMES elements are + * preserved when serializing. + * + * These elements contain raw text which is part of the parsed document. + * Dropping it would change the document's contents across a serialize and + * re-parse cycle. + * + * @ticket 65372 + * + * @dataProvider data_rawtext_elements_with_contents + * + * @param string $html Normalized HTML containing a rawtext element with contents. + */ + public function test_rawtext_element_contents_are_preserved_when_normalizing( string $html ) { + $this->assertSame( + $html, + WP_HTML_Processor::normalize( $html ), + 'Should have preserved the rawtext element contents.' + ); + } + + /** + * Data provider. + * + * @return array[] + */ + public static function data_rawtext_elements_with_contents() { + return array( + 'IFRAME with following text' => array( 'y' ), + 'NOEMBED with following text' => array( 'xy' ), + 'NOFRAMES with following text' => array( '
xy
' ), + 'NOFRAMES before comment' => array( '
x
' ), + 'IFRAME with markup-like contents' => array( '' ), + 'NOEMBED with character reference' => array( '&amp;' ), + 'NOFRAMES with character reference' => array( '&lt;' ), + ); + } + + /** + * Ensures that the contents of IFRAME, NOEMBED, and NOFRAMES elements are + * preserved when serializing full documents, including NOFRAMES elements + * in the HEAD or after a FRAMESET. + * + * @ticket 65372 + * + * @dataProvider data_full_documents_with_rawtext_elements + * + * @param string $html Input HTML document. + * @param string $expected Expected serialization of the full document. + */ + public function test_rawtext_element_contents_are_preserved_in_full_documents( string $html, string $expected ) { + $processor = WP_HTML_Processor::create_full_parser( $html ); + + $this->assertSame( + $expected, + $processor->serialize(), + 'Should have preserved the rawtext element contents.' + ); + } + + /** + * Data provider. + * + * @return array[] + */ + public static function data_full_documents_with_rawtext_elements() { + return array( + 'IFRAME in BODY' => array( + 'y', + 'y', + ), + 'NOEMBED in BODY' => array( + 'ax', + 'ax', + ), + 'NOFRAMES in BODY' => array( + 'ax', + 'ax', + ), + 'NOFRAMES in HEAD' => array( + 'xz', + 'xz', + ), + 'NOFRAMES in FRAMESET' => array( + 'x', + 'x', + ), + 'IFRAME before a comment' => array( + '

', + '

', + ), + ); + } + /** * @ticket 62396 *