diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php
index 35d91fad3129c..34bf6e2076c01 100644
--- a/src/wp-includes/html-api/class-wp-html-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-processor.php
@@ -228,6 +228,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
*/
private $element_queue = array();
+ /**
+ * Whether the end-of-file token has been processed through the insertion modes.
+ *
+ * @since 7.1.0
+ *
+ * @var bool
+ */
+ private $has_processed_eof = false;
+
/**
* Stores the current breadcrumbs.
*
@@ -399,7 +408,7 @@ public function __construct( $html, $use_the_static_create_methods_instead = nul
$this->state->stack_of_open_elements->set_push_handler(
function ( WP_HTML_Token $token ): void {
- $is_virtual = ! isset( $this->state->current_token ) || $this->is_tag_closer();
+ $is_virtual = $this->is_eof_token() || ! isset( $this->state->current_token ) || $this->is_tag_closer();
$same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name;
$provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real';
$this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance );
@@ -410,7 +419,7 @@ function ( WP_HTML_Token $token ): void {
$this->state->stack_of_open_elements->set_pop_handler(
function ( WP_HTML_Token $token ): void {
- $is_virtual = ! isset( $this->state->current_token ) || ! $this->is_tag_closer();
+ $is_virtual = $this->is_eof_token() || ! isset( $this->state->current_token ) || ! $this->is_tag_closer();
$same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name;
$provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real';
$this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::POP, $provenance );
@@ -1036,12 +1045,23 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool {
}
}
- // Finish stepping when there are no more tokens in the document.
+ // Process EOF once in the insertion modes before finishing.
+ $is_eof = false;
if (
WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state
) {
- return false;
+ if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
+ if ( $this->has_processed_eof || ! isset( $this->state->current_token ) ) {
+ return false;
+ }
+
+ $this->has_processed_eof = true;
+ } elseif ( ! isset( $this->state->current_token ) ) {
+ return false;
+ }
+
+ $is_eof = true;
}
$adjusted_current_node = $this->get_adjusted_current_node();
@@ -1049,7 +1069,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool {
$is_start_tag = WP_HTML_Tag_Processor::STATE_MATCHED_TAG === $this->parser_state && ! $is_closer;
$token_name = $this->get_token_name();
- if ( self::REPROCESS_CURRENT_NODE !== $node_to_process ) {
+ if ( self::REPROCESS_CURRENT_NODE !== $node_to_process && ! $is_eof ) {
try {
$bookmark_name = $this->bookmark_token();
} catch ( Exception $e ) {
@@ -1088,6 +1108,33 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool {
)
);
+ if ( $is_eof && ! $parse_in_current_insertion_mode ) {
+ if ( ! empty( $this->state->stack_of_template_insertion_modes ) ) {
+ return $this->step_in_template();
+ }
+
+ return false;
+ }
+
+ if ( $is_eof ) {
+ switch ( $this->state->insertion_mode ) {
+ case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD:
+ case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD:
+ case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD_NOSCRIPT:
+ case WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD:
+ case WP_HTML_Processor_State::INSERTION_MODE_IN_BODY:
+ case WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE:
+ break;
+
+ default:
+ if ( ! empty( $this->state->stack_of_template_insertion_modes ) ) {
+ return $this->step_in_template();
+ }
+
+ return false;
+ }
+ }
+
try {
if ( ! $parse_in_current_insertion_mode ) {
return $this->step_in_foreign_content();
@@ -3238,6 +3285,17 @@ private function step_in_body(): bool {
return $this->step();
}
+ /*
+ * > An end-of-file token
+ */
+ if ( null === $token_name ) {
+ if ( ! empty( $this->state->stack_of_template_insertion_modes ) ) {
+ return $this->step_in_template();
+ }
+
+ return false;
+ }
+
if ( ! parent::is_tag_closer() ) {
/*
* > Any other start tag
@@ -4412,6 +4470,23 @@ private function step_in_template(): bool {
return $this->step( self::REPROCESS_CURRENT_NODE );
}
+ /*
+ * > An end-of-file token
+ */
+ if ( null === $token_name ) {
+ if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) {
+ // Stop parsing.
+ return false;
+ }
+
+ // @todo Indicate a parse error once it's possible.
+ $this->state->stack_of_open_elements->pop_until( 'TEMPLATE' );
+ $this->state->active_formatting_elements->clear_up_to_last_marker();
+ array_pop( $this->state->stack_of_template_insertion_modes );
+ $this->reset_insertion_mode_appropriately();
+ return $this->step( self::REPROCESS_CURRENT_NODE );
+ }
+
/*
* > Any other start tag
*/
@@ -4430,20 +4505,7 @@ private function step_in_template(): bool {
return $this->step();
}
- /*
- * > An end-of-file token
- */
- if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) {
- // Stop parsing.
- return false;
- }
-
- // @todo Indicate a parse error once it's possible.
- $this->state->stack_of_open_elements->pop_until( 'TEMPLATE' );
- $this->state->active_formatting_elements->clear_up_to_last_marker();
- array_pop( $this->state->stack_of_template_insertion_modes );
- $this->reset_insertion_mode_appropriately();
- return $this->step( self::REPROCESS_CURRENT_NODE );
+ return false;
}
/**
@@ -5199,6 +5261,20 @@ private function step_in_foreign_content(): bool {
* Internal helpers
*/
+ /**
+ * Indicates if the Tag Processor has consumed all input.
+ *
+ * @since 7.1.0
+ *
+ * @return bool Whether the current token is the end-of-file token.
+ */
+ private function is_eof_token(): bool {
+ return (
+ WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
+ WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state
+ );
+ }
+
/**
* Creates a new bookmark for the currently-matched token and returns the generated name.
*
@@ -5675,6 +5751,7 @@ public function seek( $bookmark_name ): bool {
$this->state->current_token = null;
$this->current_element = null;
$this->element_queue = array();
+ $this->has_processed_eof = false;
/*
* The absence of a context node indicates a full parse.
@@ -6413,7 +6490,22 @@ private function insert_foreign_element( WP_HTML_Token $token, bool $only_add_to
*/
private function insert_virtual_node( $token_name, $bookmark_name = null ): WP_HTML_Token {
$here = $this->bookmarks[ $this->state->current_token->bookmark_name ];
- $name = $bookmark_name ?? $this->bookmark_token();
+ if (
+ null === $bookmark_name &&
+ (
+ WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
+ WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state
+ )
+ ) {
+ if ( count( $this->bookmarks ) >= static::MAX_BOOKMARKS ) {
+ $this->last_error = self::ERROR_EXCEEDED_MAX_BOOKMARKS;
+ throw new Exception( 'could not allocate bookmark' );
+ }
+
+ $name = (string) ++$this->bookmark_counter;
+ } else {
+ $name = $bookmark_name ?? $this->bookmark_token();
+ }
$this->bookmarks[ $name ] = new WP_HTML_Span( $here->start, 0 );
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php
index e516addb6c314..d8f521c4f7181 100644
--- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php
@@ -300,6 +300,129 @@ public function test_full_document_serialize_includes_doctype( string $doctype_i
);
}
+ /**
+ * Ensures full document serialization is idempotent when the body is implied after head content.
+ *
+ * @ticket 65372
+ *
+ * @dataProvider data_provider_full_document_serialize_includes_implied_body_after_head_at_eof
+ *
+ * @param string $input Full document input ending after HEAD content with no explicit BODY.
+ * @param string $expected_serialized Expected serialization with the implied empty BODY element.
+ */
+ public function test_full_document_serialize_includes_implied_body_after_head_at_eof( string $input, string $expected_serialized ) {
+ $processor = WP_HTML_Processor::create_full_parser( $input );
+ $serialized = $processor->serialize();
+
+ $this->assertSame(
+ $expected_serialized,
+ $serialized,
+ 'Should have serialized the implied empty BODY element before HTML closes.'
+ );
+
+ $processor = WP_HTML_Processor::create_full_parser( $serialized );
+
+ $this->assertSame(
+ $serialized,
+ $processor->serialize(),
+ 'Should have produced idempotent full document serialization.'
+ );
+
+ $processor = WP_HTML_Processor::create_full_parser( $input );
+ $tag_events = array();
+ while ( $processor->next_token() ) {
+ if ( '#tag' !== $processor->get_token_type() ) {
+ continue;
+ }
+
+ $tag_events[] = array( $processor->is_tag_closer() ? '-' : '+', $processor->get_tag() );
+ }
+
+ $this->assertSame(
+ array(
+ array( '+', 'BODY' ),
+ array( '-', 'BODY' ),
+ array( '-', 'HTML' ),
+ ),
+ array_slice( $tag_events, -3 ),
+ 'Should visit the implied empty BODY element before closing HTML.'
+ );
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array[]
+ */
+ public static function data_provider_full_document_serialize_includes_implied_body_after_head_at_eof() {
+ return array(
+ 'Closed HEAD at EOF' => array(
+ '
x',
+ 'x',
+ ),
+ 'Unclosed TEMPLATE in HEAD' => array(
+ 'x',
+ 'x',
+ ),
+ 'Unclosed table in TEMPLATE in HEAD' => array(
+ 'x| x',
+ 'x',
+ ),
+ 'Ignored BODY in TEMPLATE at EOF' => array(
+ '',
+ '',
+ ),
+ 'Ignored BODY closer in NOSCRIPT' => array(
+ ' |