diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php
index 35d91fad3129c..947a866551040 100644
--- a/src/wp-includes/html-api/class-wp-html-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-processor.php
@@ -1347,6 +1347,8 @@ public function serialize(): ?string {
*
* @since 6.7.0
* @since 6.9.0 Converted from protected to public method.
+ * @since 7.1.0 Contents of IFRAME, NOEMBED, NOFRAMES, and XMP elements are
+ * serialized literally instead of being dropped or escaped.
*
* @return string Serialization of token, or empty string if no serialization exists.
*/
@@ -1490,16 +1492,38 @@ public function serialize_token(): string {
$text = $this->get_modifiable_text();
switch ( $tag_name ) {
+ /*
+ * The contents of these elements are emitted literally to preserve
+ * the document's contents, following the HTML serialization spec:
+ *
+ * > If the parent of current node is a style, script, xmp, iframe,
+ * > noembed, noframes, or plaintext element, or if the parent of
+ * > current node is a noscript element and scripting is enabled for
+ * > the node, then append the value of current node's data literally.
+ *
+ * This is safe because character references are never decoded in
+ * their contents. RAWTEXT contents (IFRAME, NOEMBED, NOFRAMES,
+ * STYLE, XMP) cannot contain their own closing tag, so the closer
+ * appended below cannot be matched early. SCRIPT data may contain
+ * escaped closers (e.g. within ``), but re-parsing the
+ * identical bytes follows the same tokenization rules that produced
+ * this text, terminating at the appended closer all the same.
+ *
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#serialising-html-fragments
+ */
case 'IFRAME':
case 'NOEMBED':
case 'NOFRAMES':
- $text = '';
- break;
-
case 'SCRIPT':
case 'STYLE':
+ case 'XMP':
break;
+ /*
+ * The contents of TEXTAREA and TITLE are parsed as RCDATA, in which
+ * character references are decoded, so the decoded modifiable text
+ * must be re-escaped to preserve the document's contents.
+ */
default:
$text = htmlspecialchars( $text, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' );
}
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php
index e516addb6c314..a08d5bb0a6fa9 100644
--- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php
@@ -134,6 +134,118 @@ public function test_style_contents_are_not_escaped() {
);
}
+ /**
+ * Ensures that XMP contents are not escaped, as they are not parsed like text nodes are.
+ *
+ * XMP contents are parsed as raw text: character references are never decoded.
+ * Escaping the contents would change the document, e.g. a "<" would be replaced
+ * by the literal text "<" after serializing and re-parsing.
+ *
+ * @ticket 65372
+ */
+ public function test_xmp_contents_are_not_escaped() {
+ $this->assertSame(
+ "
1 < 2 & apples > or\u{FFFD}anges",
+ WP_HTML_Processor::normalize( "1 < 2 & apples > or\x00anges" ),
+ 'Should have preserved text inside an XMP element, except for replacing NULL bytes.'
+ );
+ }
+
+ /**
+ * Ensures that the contents of IFRAME, NOEMBED, and NOFRAMES elements are
+ * preserved when serializing.
+ *
+ * These elements contain raw text which is part of the parsed document.
+ * Dropping it would change the document's contents across a serialize and
+ * re-parse cycle.
+ *
+ * @ticket 65372
+ *
+ * @dataProvider data_rawtext_elements_with_contents
+ *
+ * @param string $html Normalized HTML containing a rawtext element with contents.
+ */
+ public function test_rawtext_element_contents_are_preserved_when_normalizing( string $html ) {
+ $this->assertSame(
+ $html,
+ WP_HTML_Processor::normalize( $html ),
+ 'Should have preserved the rawtext element contents.'
+ );
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array[]
+ */
+ public static function data_rawtext_elements_with_contents() {
+ return array(
+ 'IFRAME with following text' => array( 'y' ),
+ 'NOEMBED with following text' => array( 'xy' ),
+ 'NOFRAMES with following text' => array( 'xy' ),
+ 'NOFRAMES before comment' => array( 'x' ),
+ 'IFRAME with markup-like contents' => array( '' ),
+ 'NOEMBED with character reference' => array( '&' ),
+ 'IFRAME in foreign content' => array( '' ),
+ );
+ }
+
+ /**
+ * Ensures that the contents of IFRAME, NOEMBED, and NOFRAMES elements are
+ * preserved when serializing full documents, including NOFRAMES elements
+ * in the HEAD or after a FRAMESET.
+ *
+ * @ticket 65372
+ *
+ * @dataProvider data_full_documents_with_rawtext_elements
+ *
+ * @param string $html Input HTML document.
+ * @param string $expected Expected serialization of the full document.
+ */
+ public function test_rawtext_element_contents_are_preserved_in_full_documents( string $html, string $expected ) {
+ $processor = WP_HTML_Processor::create_full_parser( $html );
+
+ $this->assertSame(
+ $expected,
+ $processor->serialize(),
+ 'Should have preserved the rawtext element contents.'
+ );
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array[]
+ */
+ public static function data_full_documents_with_rawtext_elements() {
+ return array(
+ 'IFRAME in BODY' => array(
+ 'y',
+ 'y',
+ ),
+ 'NOEMBED in BODY' => array(
+ 'ax',
+ 'ax',
+ ),
+ 'NOFRAMES in BODY' => array(
+ 'ax',
+ 'ax',
+ ),
+ 'NOFRAMES in HEAD' => array(
+ 'xz',
+ 'xz',
+ ),
+ 'NOFRAMES in FRAMESET' => array(
+ '