From d676e28d93f76f2db42edccb0bc37f467a96ce38 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 10 Jun 2026 11:06:42 +0200 Subject: [PATCH 1/3] HTML API: Visit implied body at EOF after head --- .../html-api/class-wp-html-processor.php | 114 ++++++++++++++--- .../html-api/wpHtmlProcessor-serialize.php | 115 ++++++++++++++++++ 2 files changed, 211 insertions(+), 18 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 35d91fad3129c..906909d29a0f1 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -228,6 +228,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { */ private $element_queue = array(); + /** + * Whether the end-of-file token has been processed through the insertion modes. + * + * @since 7.0.0 + * + * @var bool + */ + private $has_processed_eof = false; + /** * Stores the current breadcrumbs. * @@ -1036,12 +1045,23 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool { } } - // Finish stepping when there are no more tokens in the document. + // Process EOF once in the insertion modes before finishing. + $is_eof = false; if ( WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state || WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state ) { - return false; + if ( self::PROCESS_NEXT_NODE === $node_to_process ) { + if ( $this->has_processed_eof || ! isset( $this->state->current_token ) ) { + return false; + } + + $this->has_processed_eof = true; + } elseif ( ! isset( $this->state->current_token ) ) { + return false; + } + + $is_eof = true; } $adjusted_current_node = $this->get_adjusted_current_node(); @@ -1049,7 +1069,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool { $is_start_tag = WP_HTML_Tag_Processor::STATE_MATCHED_TAG === $this->parser_state && ! $is_closer; $token_name = $this->get_token_name(); - if ( self::REPROCESS_CURRENT_NODE !== $node_to_process ) { + if ( self::REPROCESS_CURRENT_NODE !== $node_to_process && ! $is_eof ) { try { $bookmark_name = $this->bookmark_token(); } catch ( Exception $e ) { @@ -1088,6 +1108,33 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool { ) ); + if ( $is_eof && ! $parse_in_current_insertion_mode ) { + if ( ! empty( $this->state->stack_of_template_insertion_modes ) ) { + return $this->step_in_template(); + } + + return false; + } + + if ( $is_eof ) { + switch ( $this->state->insertion_mode ) { + case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD: + case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD: + case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD_NOSCRIPT: + case WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD: + case WP_HTML_Processor_State::INSERTION_MODE_IN_BODY: + case WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE: + break; + + default: + if ( ! empty( $this->state->stack_of_template_insertion_modes ) ) { + return $this->step_in_template(); + } + + return false; + } + } + try { if ( ! $parse_in_current_insertion_mode ) { return $this->step_in_foreign_content(); @@ -3238,6 +3285,17 @@ private function step_in_body(): bool { return $this->step(); } + /* + * > An end-of-file token + */ + if ( null === $token_name ) { + if ( ! empty( $this->state->stack_of_template_insertion_modes ) ) { + return $this->step_in_template(); + } + + return false; + } + if ( ! parent::is_tag_closer() ) { /* * > Any other start tag @@ -4412,6 +4470,23 @@ private function step_in_template(): bool { return $this->step( self::REPROCESS_CURRENT_NODE ); } + /* + * > An end-of-file token + */ + if ( null === $token_name ) { + if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) { + // Stop parsing. + return false; + } + + // @todo Indicate a parse error once it's possible. + $this->state->stack_of_open_elements->pop_until( 'TEMPLATE' ); + $this->state->active_formatting_elements->clear_up_to_last_marker(); + array_pop( $this->state->stack_of_template_insertion_modes ); + $this->reset_insertion_mode_appropriately(); + return $this->step( self::REPROCESS_CURRENT_NODE ); + } + /* * > Any other start tag */ @@ -4430,20 +4505,7 @@ private function step_in_template(): bool { return $this->step(); } - /* - * > An end-of-file token - */ - if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) { - // Stop parsing. - return false; - } - - // @todo Indicate a parse error once it's possible. - $this->state->stack_of_open_elements->pop_until( 'TEMPLATE' ); - $this->state->active_formatting_elements->clear_up_to_last_marker(); - array_pop( $this->state->stack_of_template_insertion_modes ); - $this->reset_insertion_mode_appropriately(); - return $this->step( self::REPROCESS_CURRENT_NODE ); + return false; } /** @@ -5675,6 +5737,7 @@ public function seek( $bookmark_name ): bool { $this->state->current_token = null; $this->current_element = null; $this->element_queue = array(); + $this->has_processed_eof = false; /* * The absence of a context node indicates a full parse. @@ -6413,7 +6476,22 @@ private function insert_foreign_element( WP_HTML_Token $token, bool $only_add_to */ private function insert_virtual_node( $token_name, $bookmark_name = null ): WP_HTML_Token { $here = $this->bookmarks[ $this->state->current_token->bookmark_name ]; - $name = $bookmark_name ?? $this->bookmark_token(); + if ( + null === $bookmark_name && + ( + WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state || + WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state + ) + ) { + if ( count( $this->bookmarks ) >= static::MAX_BOOKMARKS ) { + $this->last_error = self::ERROR_EXCEEDED_MAX_BOOKMARKS; + throw new Exception( 'could not allocate bookmark' ); + } + + $name = (string) ++$this->bookmark_counter; + } else { + $name = $bookmark_name ?? $this->bookmark_token(); + } $this->bookmarks[ $name ] = new WP_HTML_Span( $here->start, 0 ); diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php index e516addb6c314..f5199423738c1 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php @@ -300,6 +300,121 @@ public function test_full_document_serialize_includes_doctype( string $doctype_i ); } + /** + * Ensures full document serialization is idempotent when the body is implied after head content. + * + * @ticket 65372 + * + * @dataProvider data_provider_full_document_serialize_includes_implied_body_after_head_at_eof + * + * @param string $input Full document input ending after HEAD content with no explicit BODY. + * @param string $expected_serialized Expected serialization with the implied empty BODY element. + */ + public function test_full_document_serialize_includes_implied_body_after_head_at_eof( string $input, string $expected_serialized ) { + $processor = WP_HTML_Processor::create_full_parser( $input ); + $serialized = $processor->serialize(); + + $this->assertSame( + $expected_serialized, + $serialized, + 'Should have serialized the implied empty BODY element before HTML closes.' + ); + + $processor = WP_HTML_Processor::create_full_parser( $serialized ); + + $this->assertSame( + $serialized, + $processor->serialize(), + 'Should have produced idempotent full document serialization.' + ); + + $processor = WP_HTML_Processor::create_full_parser( $input ); + $tag_events = array(); + while ( $processor->next_token() ) { + if ( '#tag' !== $processor->get_token_type() ) { + continue; + } + + $tag_events[] = array( $processor->is_tag_closer() ? '-' : '+', $processor->get_tag() ); + } + + $this->assertSame( + array( + array( '+', 'BODY' ), + array( '-', 'BODY' ), + array( '-', 'HTML' ), + ), + array_slice( $tag_events, -3 ), + 'Should visit the implied empty BODY element before closing HTML.' + ); + } + + /** + * Data provider. + * + * @return array[] + */ + public static function data_provider_full_document_serialize_includes_implied_body_after_head_at_eof() { + return array( + 'Closed HEAD at EOF' => array( + 'x', + 'x', + ), + 'Unclosed TEMPLATE in HEAD' => array( + '