From 026a141679aa35e46b205a624b8722bfcaedb3b0 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 11 Jun 2026 23:43:33 +0200 Subject: [PATCH] HTML API: Reconsume script data less-than tokens --- .../html-api/class-wp-html-tag-processor.php | 2 +- .../html-api/wpHtmlProcessor-serialize.php | 35 +++++++++++++++++++ .../tests/html-api/wpHtmlTagProcessor.php | 2 ++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 77c1a471db5b1..af90a5ebcf18b 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1621,7 +1621,7 @@ private function skip_script_data(): bool { ( 'p' === $html[ $at + 4 ] || 'P' === $html[ $at + 4 ] ) && ( 't' === $html[ $at + 5 ] || 'T' === $html[ $at + 5 ] ) ) ) { - ++$at; + // Reconsume the current byte; it may be another `<` starting the real closer. continue; } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php index e516addb6c314..8c4a6d247e5fc 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php @@ -121,6 +121,41 @@ public function test_script_contents_are_not_escaped() { ); } + /** + * Ensures that text less-than sequences before the closing tag in special + * text elements are not misclassified as incomplete tokens. + * + * @ticket 65372 + * + * @dataProvider data_special_text_less_than_sequences_before_closer + * + * @param string $html HTML containing a special text element with text that looks like markup. + */ + public function test_special_text_less_than_sequences_before_closer_are_not_incomplete( string $html ) { + $this->assertSame( + WP_HTML_Processor::normalize( $html ), + $html, + 'Should have preserved the element and its text content.' + ); + } + + /** + * Data provider. + * + * @return array[] + */ + public static function data_special_text_less_than_sequences_before_closer() { + return array( + 'Less-than sign before closer' => array( '' ), + 'End-tag opener text before closer' => array( '' ), + 'Less-than sign and space before close' => array( '' ), + 'Multiple less-than signs before close' => array( '' ), + 'Markup declaration opener before close' => array( '' ), + 'Tag-like text before close' => array( '' ), + 'STYLE less-than sign before closer' => array( '' ), + ); + } + /** * Ensures that STYLE contents are not escaped, as they are not parsed like text nodes are. * diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index 22ace3890f469..33eb2d0e851a6 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -2116,6 +2116,8 @@ public static function data_script_tag(): Generator { yield 'Script tag with close' => array( "', true ); + yield 'Script text less-than sign before close' => array( '', true ); + yield 'Script text end-tag opener before close' => array( '', true ); yield 'Script data escaped' => array( '', true ); yield 'Script data double-escaped exit (comment)' => array( '', true ); yield 'Script data double-escaped exit (closed ">")' => array( '', true );