diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php index 0cd1f0fc45e07..4e9dfb5b928ff 100644 --- a/src/wp-includes/html-api/class-wp-html-open-elements.php +++ b/src/wp-includes/html-api/class-wp-html-open-elements.php @@ -588,7 +588,7 @@ public function remove_node( WP_HTML_Token $token ): bool { $position_from_start = $this->count() - $position_from_end - 1; array_splice( $this->stack, $position_from_start, 1 ); - $this->after_element_pop( $item ); + $this->after_element_pop( $item, 0 === $position_from_end ); return true; } @@ -731,9 +731,10 @@ public function after_element_push( WP_HTML_Token $item ): void { * * @since 6.4.0 * - * @param WP_HTML_Token $item Element that was removed from the stack of open elements. + * @param WP_HTML_Token $item Element that was removed from the stack of open elements. + * @param bool $invoke_pop_handler Whether to call the pop handler. */ - public function after_element_pop( WP_HTML_Token $item ): void { + public function after_element_pop( WP_HTML_Token $item, bool $invoke_pop_handler = true ): void { /* * When adding support for new elements, expand this switch to trap * cases where the precalculated value needs to change. @@ -767,7 +768,7 @@ public function after_element_pop( WP_HTML_Token $item ): void { break; } - if ( null !== $this->pop_handler ) { + if ( $invoke_pop_handler && null !== $this->pop_handler ) { call_user_func( $this->pop_handler, $item ); } } diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 35d91fad3129c..f7c9fe285fdd9 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -251,6 +251,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { */ private $current_element = null; + /** + * Elements removed from the stack of open elements without a normal pop event. + * + * @since 7.1.0 + * + * @var array[] + */ + private $non_lifo_breadcrumb_removals = array(); + /** * Context node if created as a fragment parser. * @@ -814,6 +823,10 @@ private function next_visitable_token(): bool { * tokens works in the meantime and isn't obviously wrong. */ if ( empty( $this->element_queue ) ) { + if ( $this->queue_virtual_closer_after_non_lifo_removal() ) { + return $this->next_visitable_token(); + } + if ( $this->step() ) { return $this->next_visitable_token(); } @@ -823,6 +836,10 @@ private function next_visitable_token(): bool { } } + if ( $this->queue_virtual_closer_after_non_lifo_removal() ) { + return $this->next_visitable_token(); + } + // Process the next event on the queue. $this->current_element = array_shift( $this->element_queue ); if ( ! isset( $this->current_element ) ) { @@ -860,6 +877,68 @@ private function next_visitable_token(): bool { return true; } + /** + * Queues a virtual closer for a removed node once its subtree closes. + * + * Non-LIFO removals from the stack of open elements do not emit a normal + * pop event because those events blindly pop the current breadcrumb. The + * removed node remains an ancestor of the currently open subtree, but must + * be reported as a virtual closer before visiting the next token after + * that subtree closes. + * + * @since 7.1.0 + * + * @return bool Whether a virtual closer was queued. + */ + private function queue_virtual_closer_after_non_lifo_removal(): bool { + if ( empty( $this->non_lifo_breadcrumb_removals ) ) { + return false; + } + + $removed_node = end( $this->non_lifo_breadcrumb_removals ); + $removed_token = $removed_node['token']; + $breadcrumb_depth = $removed_node['breadcrumb_depth']; + + if ( + count( $this->breadcrumbs ) !== $breadcrumb_depth || + empty( $this->breadcrumbs ) || + end( $this->breadcrumbs ) !== $removed_token->node_name + ) { + return false; + } + + // At EOF, normal stack pops may be queued and processed after the stack is empty. + $adjusted_current_node = $this->get_adjusted_current_node(); + + if ( isset( $adjusted_current_node ) && end( $this->breadcrumbs ) === $adjusted_current_node->node_name ) { + return false; + } + + /* + * The depth and node-name checks above cannot distinguish the removed + * element from a same-named element at the same depth; identity is + * recovered here. If a queued POP closes a different element with the + * same name, that element owns the current breadcrumb and the virtual + * closer must wait for it. + */ + $next_event = reset( $this->element_queue ); + if ( + false !== $next_event && + WP_HTML_Stack_Event::POP === $next_event->operation && + $next_event->token !== $removed_token && + $next_event->token->node_name === $removed_token->node_name + ) { + return false; + } + + array_pop( $this->non_lifo_breadcrumb_removals ); + array_unshift( + $this->element_queue, + new WP_HTML_Stack_Event( $removed_token, WP_HTML_Stack_Event::POP, 'virtual' ) + ); + return true; + } + /** * Indicates if the current tag token is a tag closer. * @@ -2848,7 +2927,28 @@ private function step_in_body(): bool { case 'A': $this->run_adoption_agency_algorithm(); $this->state->active_formatting_elements->remove_node( $item ); - $this->state->stack_of_open_elements->remove_node( $item ); + $is_current_node = $item === $this->state->stack_of_open_elements->current_node(); + + /* + * The removed node's breadcrumb sits at its position in the + * stack of open elements: one crumb for each open element at + * or below it. Fragment parsers carry an extra crumb for the + * context node, which never appears on the stack. + */ + $stack_position = 0; + foreach ( $this->state->stack_of_open_elements->walk_down() as $node ) { + ++$stack_position; + if ( $node === $item ) { + break; + } + } + + if ( $this->state->stack_of_open_elements->remove_node( $item ) && ! $is_current_node ) { + $this->non_lifo_breadcrumb_removals[] = array( + 'token' => $item, + 'breadcrumb_depth' => isset( $this->context_node ) ? $stack_position + 1 : $stack_position, + ); + } break 2; } } @@ -5675,6 +5775,7 @@ public function seek( $bookmark_name ): bool { $this->state->current_token = null; $this->current_element = null; $this->element_queue = array(); + $this->non_lifo_breadcrumb_removals = array(); /* * The absence of a context node indicates a full parse. diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index 911fa8b910b37..2c18a10a94712 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -418,6 +418,459 @@ public function test_remains_stable_when_editing_attributes() { ); } + /** + * Ensures that HTML elements inside MathML text integration points retain + * the full path to their MathML parent. + * + * @ticket 61576 + * + * @covers WP_HTML_Processor::get_breadcrumbs + * @covers WP_HTML_Processor::get_namespace + */ + public function test_reports_nested_anchor_breadcrumbs_inside_mathml_text_integration_point() { + $processor = WP_HTML_Processor::create_fragment( 'xy' ); + + $this->assertTrue( $processor->next_tag( 'A' ), 'Failed to find the outer A element.' ); + $this->assertTrue( $processor->next_tag( 'A' ), 'Failed to find the inner A element.' ); + + $this->assertSame( + array( 'HTML', 'BODY', 'A', 'MATH', 'MI', 'A' ), + $processor->get_breadcrumbs(), + 'The inner A element should remain nested inside the MathML MI element.' + ); + + $this->assertSame( + 'html', + $processor->get_namespace(), + 'The inner A element should be an HTML element inside the MathML text integration point.' + ); + + $this->assertTrue( $processor->next_token(), 'Failed to find the text following the inner A element.' ); + + $this->assertSame( + '#text', + $processor->get_token_name(), + 'The inner A element should contain the following text node.' + ); + + $this->assertSame( + array( 'HTML', 'BODY', 'A', 'MATH', 'MI', 'A', '#text' ), + $processor->get_breadcrumbs(), + 'The text after the inner A element should remain nested inside that A element.' + ); + + $this->assertSame( + 'y', + $processor->get_modifiable_text(), + 'The inner A element should contain the expected text.' + ); + } + + /** + * Ensures that an outer A element removed from the stack of open elements + * is removed from breadcrumbs after its existing child subtree closes. + * + * @ticket 61576 + * + * @covers WP_HTML_Processor::get_breadcrumbs + * @covers WP_HTML_Processor::matches_breadcrumbs + */ + public function test_removes_outer_anchor_breadcrumb_after_mathml_text_integration_point_closes() { + $processor = WP_HTML_Processor::create_fragment( 'xyzt' ); + + $this->assertTrue( $processor->next_tag( 'SPAN' ), 'Failed to find the SPAN element after the MathML subtree.' ); + + $this->assertSame( + array( 'HTML', 'BODY', 'SPAN' ), + $processor->get_breadcrumbs(), + 'The SPAN element after the MathML subtree should not remain nested inside the removed outer A element.' + ); + + $this->assertFalse( + $processor->matches_breadcrumbs( array( 'A', 'SPAN' ) ), + 'The SPAN element should not match breadcrumbs inside the removed outer A element.' + ); + } + + /** + * Ensures that a removed outer A element's breadcrumb is not confused with + * a same-named foreign element between it and the integration point. + * + * Foreign A elements never participate in the active formatting elements, + * so the removed node is the outer HTML A element, not the foreign one. + * + * @ticket 61576 + * + * @covers WP_HTML_Processor::get_breadcrumbs + * @covers WP_HTML_Processor::matches_breadcrumbs + * + * @dataProvider data_intervening_foreign_anchor_html + * + * @param string $html HTML with a foreign A element between the removed outer A element and the integration point. + */ + public function test_removes_outer_anchor_breadcrumb_with_intervening_foreign_anchor( string $html ) { + $processor = WP_HTML_Processor::create_fragment( $html ); + + $this->assertTrue( $processor->next_tag( 'SPAN' ), 'Failed to find the SPAN element after the foreign subtree.' ); + + $this->assertSame( + array( 'HTML', 'BODY', 'SPAN' ), + $processor->get_breadcrumbs(), + 'The SPAN element after the foreign subtree should not remain nested inside the removed outer A element.' + ); + + $this->assertFalse( + $processor->matches_breadcrumbs( array( 'A', 'SPAN' ) ), + 'The SPAN element should not match breadcrumbs inside the removed outer A element.' + ); + } + + /** + * Data provider. + * + * @return array[] + */ + public static function data_intervening_foreign_anchor_html() { + return array( + 'MathML A before text integration point' => array( 'xyzt' ), + 'SVG A before integration point' => array( 'xyzt' ), + ); + } + + /** + * Ensures that an HTML heading end tag inside a MathML text integration + * point is ignored, so following content stays inside the integration point. + * + * The `` is dispatched through the foreign-content rules, which walk up + * to the HTML-namespace `H2` and hand off to the "in body" heading end-tag + * steps. Those require the heading to be in scope, but a MathML text + * integration point (`MI`) is a scope boundary, so `H2` is not in scope and + * the end tag is dropped. The following `` is therefore inserted into + * `MI` rather than becoming a sibling of `H2`. + * + * This matches the HTML specification and browsers (verified against + * Chromium); PHP's `Dom\HTMLDocument` reparents `` out of `MI`, which is + * a limitation of that parser, not of the HTML API. + * + * @see https://software.hixie.ch/utilities/js/live-dom-viewer/?%3Ch2%3E%3Cmath%3E%3Cmi%3Ea%3C%2Fh2%3E%3Cx-0%3Eb%3C%2Fx-0%3E + * + * @ticket 61576 + * + * @covers WP_HTML_Processor::get_breadcrumbs + */ + public function test_heading_end_tag_in_mathml_text_integration_point_is_ignored() { + $processor = WP_HTML_Processor::create_fragment( '

a

b' ); + + $this->assertTrue( $processor->next_tag( 'X-0' ), 'Failed to find the X-0 element following the ignored heading end tag.' ); + + $this->assertSame( + array( 'HTML', 'BODY', 'H2', 'MATH', 'MI', 'X-0' ), + $processor->get_breadcrumbs(), + 'The X-0 element should remain inside the MathML MI text integration point because the end tag is not in scope and is ignored.' + ); + } + + /** + * Ensures that an outer A element removed from the stack of open elements + * remains visitable as a virtual closer after its existing child subtree closes. + * + * @ticket 61576 + * + * @covers WP_HTML_Processor::get_breadcrumbs + * @covers WP_HTML_Processor::is_tag_closer + */ + public function test_visits_outer_anchor_virtual_closer_after_mathml_text_integration_point_closes() { + $processor = WP_HTML_Processor::create_fragment( 'xyz' ); + + $this->assertTrue( + $processor->next_tag( + array( + 'tag_name' => 'A', + 'tag_closers' => 'visit', + ) + ), + 'Failed to find the outer A opener.' + ); + + $this->assertFalse( $processor->is_tag_closer(), 'The first A should be the outer A opener.' ); + + $this->assertTrue( + $processor->next_tag( + array( + 'tag_name' => 'A', + 'tag_closers' => 'visit', + ) + ), + 'Failed to find the inner A opener.' + ); + + $this->assertFalse( $processor->is_tag_closer(), 'The second A should be the inner A opener.' ); + + $this->assertSame( + array( 'HTML', 'BODY', 'A', 'MATH', 'MI', 'A' ), + $processor->get_breadcrumbs(), + 'The inner A opener should remain nested inside the MathML MI element.' + ); + + $this->assertTrue( + $processor->next_tag( + array( + 'tag_name' => 'A', + 'tag_closers' => 'visit', + ) + ), + 'Failed to find the inner A closer.' + ); + + $this->assertTrue( $processor->is_tag_closer(), 'The third A should be the inner A closer.' ); + + $this->assertSame( + array( 'HTML', 'BODY', 'A', 'MATH', 'MI' ), + $processor->get_breadcrumbs(), + 'The inner A closer should report its parent breadcrumbs.' + ); + + $this->assertTrue( + $processor->next_tag( + array( + 'tag_name' => 'A', + 'tag_closers' => 'visit', + ) + ), + 'Failed to find the virtual outer A closer.' + ); + + $this->assertTrue( $processor->is_tag_closer(), 'The fourth A should be the virtual outer A closer.' ); + + $this->assertSame( + array( 'HTML', 'BODY' ), + $processor->get_breadcrumbs(), + 'The virtual outer A closer should report its parent breadcrumbs.' + ); + } + + /** + * Ensures that the removed outer A element's virtual closer is visited + * before a new same-name opener immediately following the subtree. + * + * This is the one input where the adjusted-current-node guard and the + * same-name next-event lookahead in the virtual-closer queueing must + * cooperate: the new A opener shares the removed element's tag name, but + * the virtual closer must still fire first so the new element opens as a + * sibling, not a child. + * + * @ticket 61576 + * + * @covers WP_HTML_Processor::get_breadcrumbs + * @covers WP_HTML_Processor::is_tag_closer + */ + public function test_visits_outer_anchor_virtual_closer_before_same_name_opener() { + $processor = WP_HTML_Processor::create_fragment( 'xyz' ); + + $visits = array(); + while ( $processor->next_tag( + array( + 'tag_name' => 'A', + 'tag_closers' => 'visit', + ) + ) ) { + $visits[] = array( + $processor->is_tag_closer() ? 'closer' : 'opener', + $processor->get_breadcrumbs(), + ); + } + + $this->assertSame( + array( + array( 'opener', array( 'HTML', 'BODY', 'A' ) ), + array( 'opener', array( 'HTML', 'BODY', 'A', 'MATH', 'MI', 'A' ) ), + array( 'closer', array( 'HTML', 'BODY', 'A', 'MATH', 'MI' ) ), + array( 'closer', array( 'HTML', 'BODY' ) ), + array( 'opener', array( 'HTML', 'BODY', 'A' ) ), + array( 'closer', array( 'HTML', 'BODY' ) ), + ), + $visits, + 'Expected the removed outer A virtual closer to be visited before the new same-name A opener.' + ); + } + + /** + * Ensures that an outer A element removed from the stack of open elements + * remains visitable as a virtual closer when the fragment ends inside its + * existing child subtree. + * + * @ticket 61576 + * + * @covers WP_HTML_Processor::get_breadcrumbs + * @covers WP_HTML_Processor::is_tag_closer + */ + public function test_visits_outer_anchor_virtual_closer_at_end_of_fragment() { + $processor = WP_HTML_Processor::create_fragment( 'xy' ); + + $this->assertTrue( + $processor->next_tag( + array( + 'tag_name' => 'A', + 'tag_closers' => 'visit', + ) + ), + 'Failed to find the outer A opener.' + ); + + $this->assertFalse( $processor->is_tag_closer(), 'The first A should be the outer A opener.' ); + + $this->assertTrue( + $processor->next_tag( + array( + 'tag_name' => 'A', + 'tag_closers' => 'visit', + ) + ), + 'Failed to find the inner A opener.' + ); + + $this->assertFalse( $processor->is_tag_closer(), 'The second A should be the inner A opener.' ); + + $this->assertSame( + array( 'HTML', 'BODY', 'A', 'MATH', 'MI', 'A' ), + $processor->get_breadcrumbs(), + 'The inner A opener should remain nested inside the MathML MI element.' + ); + + $this->assertTrue( + $processor->next_tag( + array( + 'tag_name' => 'A', + 'tag_closers' => 'visit', + ) + ), + 'Failed to find the inner A closer.' + ); + + $this->assertTrue( $processor->is_tag_closer(), 'The third A should be the inner A closer.' ); + + $this->assertSame( + array( 'HTML', 'BODY', 'A', 'MATH', 'MI' ), + $processor->get_breadcrumbs(), + 'The inner A closer should report its parent breadcrumbs.' + ); + + $this->assertTrue( + $processor->next_tag( + array( + 'tag_name' => 'A', + 'tag_closers' => 'visit', + ) + ), + 'Failed to find the virtual outer A closer.' + ); + + $this->assertTrue( $processor->is_tag_closer(), 'The fourth A should be the virtual outer A closer.' ); + + $this->assertSame( + array( 'HTML', 'BODY' ), + $processor->get_breadcrumbs(), + 'The virtual outer A closer should report its parent breadcrumbs.' + ); + } + + /** + * Ensures that an outer A element removed from the stack of open elements + * remains visitable as a virtual closer before full-parser EOF closers. + * + * @ticket 61576 + * + * @covers WP_HTML_Processor::get_breadcrumbs + * @covers WP_HTML_Processor::is_tag_closer + */ + public function test_visits_outer_anchor_virtual_closer_before_full_parser_eof_closers() { + $processor = WP_HTML_Processor::create_full_parser( 'xy' ); + + $this->assertTrue( + $processor->next_tag( + array( + 'tag_name' => 'A', + 'tag_closers' => 'visit', + ) + ), + 'Failed to find the outer A opener.' + ); + + $this->assertFalse( $processor->is_tag_closer(), 'The first A should be the outer A opener.' ); + + $this->assertTrue( + $processor->next_tag( + array( + 'tag_name' => 'A', + 'tag_closers' => 'visit', + ) + ), + 'Failed to find the inner A opener.' + ); + + $this->assertFalse( $processor->is_tag_closer(), 'The second A should be the inner A opener.' ); + + $this->assertSame( + array( 'HTML', 'BODY', 'A', 'MATH', 'MI', 'A' ), + $processor->get_breadcrumbs(), + 'The inner A opener should remain nested inside the MathML MI element.' + ); + + $this->assertTrue( + $processor->next_tag( + array( + 'tag_name' => 'A', + 'tag_closers' => 'visit', + ) + ), + 'Failed to find the inner A closer.' + ); + + $this->assertTrue( $processor->is_tag_closer(), 'The third A should be the inner A closer.' ); + + $this->assertSame( + array( 'HTML', 'BODY', 'A', 'MATH', 'MI' ), + $processor->get_breadcrumbs(), + 'The inner A closer should report its parent breadcrumbs.' + ); + + $this->assertTrue( + $processor->next_tag( + array( + 'tag_name' => 'A', + 'tag_closers' => 'visit', + ) + ), + 'Failed to find the virtual outer A closer.' + ); + + $this->assertTrue( $processor->is_tag_closer(), 'The fourth A should be the virtual outer A closer.' ); + + $this->assertSame( + array( 'HTML', 'BODY' ), + $processor->get_breadcrumbs(), + 'The virtual outer A closer should report its parent breadcrumbs.' + ); + + $this->assertTrue( + $processor->next_tag( + array( + 'tag_name' => 'BODY', + 'tag_closers' => 'visit', + ) + ), + 'Failed to find the full-parser BODY closer.' + ); + + $this->assertTrue( $processor->is_tag_closer(), 'The BODY token should be a closer.' ); + + $this->assertSame( + array( 'HTML' ), + $processor->get_breadcrumbs(), + 'The BODY closer should not consume the stale outer A breadcrumb.' + ); + } + /** * Ensures that the ability to set attributes isn't broken by the HTML Processor. *