Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 112 additions & 20 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
*/
private $element_queue = array();

/**
* Whether the end-of-file token has been processed through the insertion modes.
*
* @since 7.1.0
*
* @var bool
*/
private $has_processed_eof = false;

/**
* Stores the current breadcrumbs.
*
Expand Down Expand Up @@ -399,7 +408,7 @@ public function __construct( $html, $use_the_static_create_methods_instead = nul

$this->state->stack_of_open_elements->set_push_handler(
function ( WP_HTML_Token $token ): void {
$is_virtual = ! isset( $this->state->current_token ) || $this->is_tag_closer();
$is_virtual = $this->is_eof_token() || ! isset( $this->state->current_token ) || $this->is_tag_closer();
$same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name;
$provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real';
$this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance );
Expand All @@ -410,7 +419,7 @@ function ( WP_HTML_Token $token ): void {

$this->state->stack_of_open_elements->set_pop_handler(
function ( WP_HTML_Token $token ): void {
$is_virtual = ! isset( $this->state->current_token ) || ! $this->is_tag_closer();
$is_virtual = $this->is_eof_token() || ! isset( $this->state->current_token ) || ! $this->is_tag_closer();
$same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name;
$provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real';
$this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::POP, $provenance );
Expand Down Expand Up @@ -1036,20 +1045,31 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool {
}
}

// Finish stepping when there are no more tokens in the document.
// Process EOF once in the insertion modes before finishing.
$is_eof = false;
if (
WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state
) {
return false;
if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
if ( $this->has_processed_eof || ! isset( $this->state->current_token ) ) {
return false;
}

$this->has_processed_eof = true;
} elseif ( ! isset( $this->state->current_token ) ) {
return false;
}

$is_eof = true;
}

$adjusted_current_node = $this->get_adjusted_current_node();
$is_closer = $this->is_tag_closer();
$is_start_tag = WP_HTML_Tag_Processor::STATE_MATCHED_TAG === $this->parser_state && ! $is_closer;
$token_name = $this->get_token_name();

if ( self::REPROCESS_CURRENT_NODE !== $node_to_process ) {
if ( self::REPROCESS_CURRENT_NODE !== $node_to_process && ! $is_eof ) {
try {
$bookmark_name = $this->bookmark_token();
} catch ( Exception $e ) {
Expand Down Expand Up @@ -1088,6 +1108,33 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool {
)
);

if ( $is_eof && ! $parse_in_current_insertion_mode ) {
if ( ! empty( $this->state->stack_of_template_insertion_modes ) ) {
return $this->step_in_template();
}

return false;
}

if ( $is_eof ) {
switch ( $this->state->insertion_mode ) {
case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD:
case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD:
case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD_NOSCRIPT:
case WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD:
case WP_HTML_Processor_State::INSERTION_MODE_IN_BODY:
case WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE:
break;

default:
if ( ! empty( $this->state->stack_of_template_insertion_modes ) ) {
return $this->step_in_template();
}

return false;
}
}

try {
if ( ! $parse_in_current_insertion_mode ) {
return $this->step_in_foreign_content();
Expand Down Expand Up @@ -3238,6 +3285,17 @@ private function step_in_body(): bool {
return $this->step();
}

/*
* > An end-of-file token
*/
if ( null === $token_name ) {
if ( ! empty( $this->state->stack_of_template_insertion_modes ) ) {
return $this->step_in_template();
}

return false;
}

if ( ! parent::is_tag_closer() ) {
/*
* > Any other start tag
Expand Down Expand Up @@ -4412,6 +4470,23 @@ private function step_in_template(): bool {
return $this->step( self::REPROCESS_CURRENT_NODE );
}

/*
* > An end-of-file token
*/
if ( null === $token_name ) {
if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) {
// Stop parsing.
return false;
}

// @todo Indicate a parse error once it's possible.
$this->state->stack_of_open_elements->pop_until( 'TEMPLATE' );
$this->state->active_formatting_elements->clear_up_to_last_marker();
array_pop( $this->state->stack_of_template_insertion_modes );
$this->reset_insertion_mode_appropriately();
return $this->step( self::REPROCESS_CURRENT_NODE );
}

/*
* > Any other start tag
*/
Expand All @@ -4430,20 +4505,7 @@ private function step_in_template(): bool {
return $this->step();
}

/*
* > An end-of-file token
*/
if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) {
// Stop parsing.
return false;
}

// @todo Indicate a parse error once it's possible.
$this->state->stack_of_open_elements->pop_until( 'TEMPLATE' );
$this->state->active_formatting_elements->clear_up_to_last_marker();
array_pop( $this->state->stack_of_template_insertion_modes );
$this->reset_insertion_mode_appropriately();
return $this->step( self::REPROCESS_CURRENT_NODE );
return false;
}

/**
Expand Down Expand Up @@ -5199,6 +5261,20 @@ private function step_in_foreign_content(): bool {
* Internal helpers
*/

/**
* Indicates if the Tag Processor has consumed all input.
*
* @since 7.1.0
*
* @return bool Whether the current token is the end-of-file token.
*/
private function is_eof_token(): bool {
return (
WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state
);
}

/**
* Creates a new bookmark for the currently-matched token and returns the generated name.
*
Expand Down Expand Up @@ -5675,6 +5751,7 @@ public function seek( $bookmark_name ): bool {
$this->state->current_token = null;
$this->current_element = null;
$this->element_queue = array();
$this->has_processed_eof = false;

/*
* The absence of a context node indicates a full parse.
Expand Down Expand Up @@ -6413,7 +6490,22 @@ private function insert_foreign_element( WP_HTML_Token $token, bool $only_add_to
*/
private function insert_virtual_node( $token_name, $bookmark_name = null ): WP_HTML_Token {
$here = $this->bookmarks[ $this->state->current_token->bookmark_name ];
$name = $bookmark_name ?? $this->bookmark_token();
if (
null === $bookmark_name &&
(
WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state
)
) {
if ( count( $this->bookmarks ) >= static::MAX_BOOKMARKS ) {
$this->last_error = self::ERROR_EXCEEDED_MAX_BOOKMARKS;
throw new Exception( 'could not allocate bookmark' );
}

$name = (string) ++$this->bookmark_counter;
} else {
$name = $bookmark_name ?? $this->bookmark_token();
}

$this->bookmarks[ $name ] = new WP_HTML_Span( $here->start, 0 );

Expand Down
123 changes: 123 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,129 @@ public function test_full_document_serialize_includes_doctype( string $doctype_i
);
}

/**
* Ensures full document serialization is idempotent when the body is implied after head content.
*
* @ticket 65372
*
* @dataProvider data_provider_full_document_serialize_includes_implied_body_after_head_at_eof
*
* @param string $input Full document input ending after HEAD content with no explicit BODY.
* @param string $expected_serialized Expected serialization with the implied empty BODY element.
*/
public function test_full_document_serialize_includes_implied_body_after_head_at_eof( string $input, string $expected_serialized ) {
$processor = WP_HTML_Processor::create_full_parser( $input );
$serialized = $processor->serialize();

$this->assertSame(
$expected_serialized,
$serialized,
'Should have serialized the implied empty BODY element before HTML closes.'
);

$processor = WP_HTML_Processor::create_full_parser( $serialized );

$this->assertSame(
$serialized,
$processor->serialize(),
'Should have produced idempotent full document serialization.'
);

$processor = WP_HTML_Processor::create_full_parser( $input );
$tag_events = array();
while ( $processor->next_token() ) {
if ( '#tag' !== $processor->get_token_type() ) {
continue;
}

$tag_events[] = array( $processor->is_tag_closer() ? '-' : '+', $processor->get_tag() );
}

$this->assertSame(
array(
array( '+', 'BODY' ),
array( '-', 'BODY' ),
array( '-', 'HTML' ),
),
array_slice( $tag_events, -3 ),
'Should visit the implied empty BODY element before closing HTML.'
);
}

/**
* Data provider.
*
* @return array[]
*/
public static function data_provider_full_document_serialize_includes_implied_body_after_head_at_eof() {
return array(
'Closed HEAD at EOF' => array(
'<!DOCTYPE html><html><head><title>x</title></head>',
'<!DOCTYPE html><html><head><title>x</title></head><body></body></html>',
),
'Unclosed TEMPLATE in HEAD' => array(
'<!DOCTYPE html><html><head><template>x',
'<!DOCTYPE html><html><head><template>x</template></head><body></body></html>',
),
'Unclosed table in TEMPLATE in HEAD' => array(
'<html><title>x</title><template><table><tr><td>x',
'<html><head><title>x</title><template><table><tbody><tr><td>x</td></tr></tbody></table></template></head><body></body></html>',
),
'Ignored BODY in TEMPLATE at EOF' => array(
'<template><body>',
'<html><head><template></template></head><body></body></html>',
),
'Ignored BODY closer in NOSCRIPT' => array(
'<noscript></body>',
'<html><head><noscript></noscript></head><body></body></html>',
),
);
}

/**
* Ensures table insertion modes still close open elements at EOF.
*
* @ticket 65372
*
* @dataProvider data_provider_normalize_closes_tables_at_eof
*
* @param string $input Fragment input ending in a table insertion mode.
* @param string $expected Expected normalized fragment.
*/
public function test_normalize_closes_tables_at_eof( string $input, string $expected ) {
$this->assertSame(
$expected,
WP_HTML_Processor::normalize( $input ),
'Should have closed open table elements at EOF.'
);
}

/**
* Data provider.
*
* @return array[]
*/
public static function data_provider_normalize_closes_tables_at_eof() {
return array(
'Open TABLE' => array(
'<table>',
'<table></table>',
),
'Open TBODY' => array(
'<table><tbody>',
'<table><tbody></tbody></table>',
),
'Open TR' => array(
'<table><tr>',
'<table><tbody><tr></tr></tbody></table>',
),
'Open TD' => array(
'<table><tr><td>x',
'<table><tbody><tr><td>x</td></tr></tbody></table>',
),
);
}

/**
* Data provider.
*
Expand Down
Loading