From 17e6b42d7e43a2f7dd627f30ef9c65028793692a Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 12 Jun 2026 16:46:03 +0200 Subject: [PATCH] HTML API: Preserve PRE/LISTING text replay after seek --- .../html-api/class-wp-html-tag-processor.php | 41 ++- .../wpHtmlProcessorModifiableText.php | 31 +++ .../html-api/wpHtmlTagProcessor-bookmark.php | 41 +++ .../wpHtmlTagProcessorModifiableText.php | 261 +++++++++++++++++- 4 files changed, 360 insertions(+), 14 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 77c1a471db5b1..c6de19b057b43 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -754,6 +754,14 @@ class WP_HTML_Tag_Processor { */ protected $bookmarks = array(); + /** + * Tracks bookmarks set on text nodes whose leading linefeed is ignored. + * + * @since 6.9.0 + * @var bool[] + */ + private $bookmarks_with_skipped_newline = array(); + const ADD_CLASS = true; const REMOVE_CLASS = false; const SKIP_CLASS = null; @@ -1357,6 +1365,11 @@ public function set_bookmark( $name ): bool { } $this->bookmarks[ $name ] = new WP_HTML_Span( $this->token_starts_at, $this->token_length ); + if ( self::STATE_TEXT_NODE === $this->parser_state && $this->skip_newline_at === $this->token_starts_at ) { + $this->bookmarks_with_skipped_newline[ $name ] = true; + } else { + unset( $this->bookmarks_with_skipped_newline[ $name ] ); + } return true; } @@ -1377,6 +1390,7 @@ public function release_bookmark( $name ): bool { } unset( $this->bookmarks[ $name ] ); + unset( $this->bookmarks_with_skipped_newline[ $name ] ); return true; } @@ -2517,7 +2531,9 @@ private function apply_attributes_updates( int $shift_this_point ): int { return 0; } - $accumulated_shift_for_given_point = 0; + $accumulated_shift_for_given_point = 0; + $accumulated_shift_for_skip_newline = 0; + $skip_newline_at = $this->skip_newline_at; /* * Attribute updates can be enqueued in any order but updates @@ -2541,6 +2557,10 @@ private function apply_attributes_updates( int $shift_this_point ): int { $this->bytes_already_parsed += $shift; } + if ( null !== $skip_newline_at && $diff->start < $skip_newline_at ) { + $accumulated_shift_for_skip_newline += $shift; + } + // Accumulate shift of the given pointer within this function call. if ( $diff->start < $shift_this_point ) { $accumulated_shift_for_given_point += $shift; @@ -2551,6 +2571,10 @@ private function apply_attributes_updates( int $shift_this_point ): int { $bytes_already_copied = $diff->start + $diff->length; } + if ( null !== $skip_newline_at ) { + $this->skip_newline_at += $accumulated_shift_for_skip_newline; + } + $this->html = $output_buffer . substr( $this->html, $bytes_already_copied ); /* @@ -2583,8 +2607,11 @@ private function apply_attributes_updates( int $shift_this_point ): int { $delta = strlen( $diff->text ) - $diff->length; - if ( $bookmark->start >= $diff->start ) { + if ( $bookmark->start > $diff->start || ( $bookmark->start === $diff->start && 0 === $diff->length ) ) { $head_delta += $delta; + if ( $bookmark->start === $diff->start && 0 === $diff->length && '' !== $diff->text ) { + unset( $this->bookmarks_with_skipped_newline[ $bookmark_name ] ); + } } if ( $bookmark_end >= $diff_end ) { @@ -2657,6 +2684,9 @@ public function seek( $bookmark_name ): bool { // Point this tag processor before the sought tag opener and consume it. $this->bytes_already_parsed = $this->bookmarks[ $bookmark_name ]->start; + $this->skip_newline_at = isset( $this->bookmarks_with_skipped_newline[ $bookmark_name ] ) + ? $this->bytes_already_parsed + : null; $this->parser_state = self::STATE_READY; return $this->next_token(); } @@ -3636,13 +3666,6 @@ public function subdivide_text_appropriately(): bool { * that a token has modifiable text, and a token with modifiable text may * have an empty string (e.g. a comment with no contents). * - * Limitations: - * - * - This function will not strip the leading newline appropriately - * after seeking into a LISTING or PRE element. To ensure that the - * newline is treated properly, seek to the LISTING or PRE opening - * tag instead of to the first text node inside the element. - * * @since 6.5.0 * @since 6.7.0 Replaces NULL bytes (U+0000) and newlines appropriately. * diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorModifiableText.php b/tests/phpunit/tests/html-api/wpHtmlProcessorModifiableText.php index 5d093ae05dd07..c8bf1f6ef248a 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorModifiableText.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorModifiableText.php @@ -9,6 +9,37 @@ * @coversDefaultClass WP_HTML_Processor */ class Tests_HtmlApi_WpHtmlProcessorModifiableText extends WP_UnitTestCase { + /** + * Ensures that bookmarked text remains seekable after updating the same text node. + */ + public function test_seeks_to_bookmarked_text_after_modifiable_text_update(): void { + $processor = WP_HTML_Processor::create_fragment( "
\nabc
" ); + + $this->assertTrue( $processor->next_token(), 'Should have found the PRE opener.' ); + $this->assertSame( 'PRE', $processor->get_token_name(), 'Should have found the PRE opener: check test setup.' ); + + while ( $processor->next_token() && 'abc' !== $processor->get_modifiable_text() ) { + continue; + } + + $this->assertSame( '#text', $processor->get_token_name(), 'Should have found the PRE text node: check test setup.' ); + $this->assertSame( 'abc', $processor->get_modifiable_text(), 'Should have stripped the leading newline from the PRE text on first traversal.' ); + $this->assertTrue( $processor->set_bookmark( 'text' ), 'Should have bookmarked the PRE text node.' ); + + $this->assertTrue( $processor->set_modifiable_text( 'xyz' ), 'Should have updated the PRE text node.' ); + $this->assertTrue( $processor->next_token(), 'Should have advanced away from the bookmarked text node.' ); + $this->assertSame( 'PRE', $processor->get_token_name(), 'Should have advanced to the PRE closer: check test setup.' ); + $this->assertSame( "
\nxyz
", $processor->get_updated_html(), 'Should have updated the PRE text node.' ); + + $this->assertTrue( $processor->seek( 'text' ), 'Should have sought back to the updated PRE text node.' ); + $this->assertSame( '#text', $processor->get_token_name(), 'Should have sought back to the text node.' ); + $this->assertSame( + 'xyz', + $processor->get_modifiable_text(), + 'Should have replayed the updated PRE text node after seeking.' + ); + } + /** * TEXTAREA elements ignore the first newline in their content. * Setting the modifiable text with a leading newline (or carriage return variants) diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-bookmark.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-bookmark.php index 0e72f9d726835..e33ed195a63a5 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-bookmark.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-bookmark.php @@ -410,6 +410,47 @@ public function test_updates_bookmark_for_deletions_before_both_sides() { ); } + /** + * Ensures that bookmarks stay attached to their original token when text is inserted at the bookmark start. + * + * @ticket 56299 + * + * @covers WP_HTML_Tag_Processor::seek + */ + public function test_updates_bookmark_for_insertions_at_start() { + $processor = new class( '
' ) extends WP_HTML_Tag_Processor { + /** + * Inserts HTML at the start of the given bookmark. + * + * @param string $bookmark_name Bookmark name. + * @param string $html HTML to insert. + */ + public function insert_html_at_bookmark_start( string $bookmark_name, string $html ): void { + $this->lexical_updates[] = new WP_HTML_Text_Replacement( + $this->bookmarks[ $bookmark_name ]->start, + 0, + $html + ); + } + }; + + $processor->next_tag( 'SPAN' ); + $processor->set_bookmark( 'target' ); + $processor->insert_html_at_bookmark_start( 'target', '

' ); + + $this->assertSame( + '

', + $processor->get_updated_html(), + 'Should have inserted the HTML at the bookmark start.' + ); + $this->assertTrue( $processor->seek( 'target' ), 'Should have sought back to the original bookmarked token.' ); + $this->assertSame( + 'SPAN', + $processor->get_token_name(), + 'Should have kept the bookmark attached to the original SPAN token after insertion.' + ); + } + /** * @ticket 56299 * diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php index f43d1fffaad0e..c5eb24996a802 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php @@ -39,6 +39,261 @@ public function test_get_modifiable_text_is_idempotent() { } } + /** + * Ensures that a bookmarked PRE or LISTING text node replays its leading-newline handling after seeking. + * + * @dataProvider data_token_names_ignoring_leading_newline + * + * @param string $tag_name Tag name whose first text node ignores a leading newline. + */ + public function test_get_modifiable_text_replays_leading_newline_after_seeking_to_text( string $tag_name ) { + $html_tag = strtolower( $tag_name ); + $processor = new WP_HTML_Tag_Processor( "<{$html_tag}>\n \n\n><{$html_tag}>" ); + + $this->assertTrue( $processor->next_token(), "Should have found the {$tag_name} opener." ); + $this->assertSame( $tag_name, $processor->get_token_name(), "Should have found the {$tag_name} opener: check test setup." ); + + $this->assertTrue( $processor->next_token(), 'Should have found the first text node.' ); + $this->assertSame( '#text', $processor->get_token_name(), 'Should have found the first text node: check test setup.' ); + + $first = $processor->get_modifiable_text(); + $this->assertSame( " \n\n>", $first, "Should have stripped the leading newline from the {$tag_name} text on first traversal." ); + $this->assertTrue( $processor->set_bookmark( 'text' ), "Should have bookmarked the {$tag_name} text node." ); + + $this->assertTrue( $processor->next_token(), 'Should have advanced away from the bookmarked text node.' ); + $this->assertSame( $tag_name, $processor->get_token_name(), "Should have advanced to the next {$tag_name} opener: check test setup." ); + + $this->assertTrue( $processor->seek( 'text' ), "Should have sought back to the {$tag_name} text node." ); + $this->assertSame( + $first, + $processor->get_modifiable_text(), + "Should have replayed the leading-newline handling after seeking back to the {$tag_name} text node." + ); + } + + /** + * Data provider. + * + * @return array[] + */ + public static function data_token_names_ignoring_leading_newline() { + return array( + 'PRE' => array( 'PRE' ), + 'LISTING' => array( 'LISTING' ), + ); + } + + /** + * Ensures that bookmarked text remains seekable after updating the same text node. + * + * @dataProvider data_token_names_ignoring_leading_newline + * + * @param string $tag_name Tag name whose first text node ignores a leading newline. + */ + public function test_seeks_to_bookmarked_text_after_modifiable_text_update( string $tag_name ) { + $html_tag = strtolower( $tag_name ); + $processor = new WP_HTML_Tag_Processor( "<{$html_tag}>\nabc" ); + + $this->assertTrue( $processor->next_token(), "Should have found the {$tag_name} opener." ); + $this->assertSame( $tag_name, $processor->get_token_name(), "Should have found the {$tag_name} opener: check test setup." ); + + $this->assertTrue( $processor->next_token(), 'Should have found the first text node.' ); + $this->assertSame( '#text', $processor->get_token_name(), 'Should have found the first text node: check test setup.' ); + $this->assertSame( 'abc', $processor->get_modifiable_text(), "Should have stripped the leading newline from the {$tag_name} text on first traversal." ); + $this->assertTrue( $processor->set_bookmark( 'text' ), "Should have bookmarked the {$tag_name} text node." ); + + $this->assertTrue( $processor->set_modifiable_text( 'xyz' ), "Should have updated the {$tag_name} text node." ); + $this->assertTrue( $processor->next_token(), 'Should have advanced away from the bookmarked text node.' ); + $this->assertSame( $tag_name, $processor->get_token_name(), "Should have advanced to the {$tag_name} closer: check test setup." ); + $this->assertSame( "<{$html_tag}>xyz", $processor->get_updated_html(), "Should have updated the {$tag_name} text node." ); + + $this->assertTrue( $processor->seek( 'text' ), "Should have sought back to the updated {$tag_name} text node." ); + $this->assertSame( '#text', $processor->get_token_name(), 'Should have sought back to the text node.' ); + $this->assertSame( + 'xyz', + $processor->get_modifiable_text(), + "Should have replayed the updated {$tag_name} text node after seeking." + ); + } + + /** + * Ensures that a leading-newline text bookmark can be set after earlier updates are flushed. + * + * @dataProvider data_token_names_ignoring_leading_newline + * + * @param string $tag_name Tag name whose first text node ignores a leading newline. + */ + public function test_bookmarks_leading_newline_text_after_flushing_prior_update( string $tag_name ) { + $html_tag = strtolower( $tag_name ); + $processor = new WP_HTML_Tag_Processor( "
<{$html_tag}>\nabc" ); + + $this->assertTrue( $processor->next_tag( 'DIV' ), 'Should have found the DIV opener: check test setup.' ); + $processor->add_class( 'x' ); + + while ( $processor->next_token() && '#text' !== $processor->get_token_name() ) { + continue; + } + + $this->assertSame( 'abc', $processor->get_modifiable_text(), "Should have stripped the leading newline from the {$tag_name} text before flushing updates." ); + $this->assertSame( + "
<{$html_tag}>\nabc", + $processor->get_updated_html(), + 'Should have applied the prior DIV update.' + ); + $this->assertSame( 'abc', $processor->get_modifiable_text(), "Should still strip the leading newline from the {$tag_name} text after flushing updates." ); + $this->assertTrue( $processor->set_bookmark( 'text' ), "Should have bookmarked the {$tag_name} text node." ); + + $this->assertTrue( $processor->next_token(), 'Should have advanced away from the bookmarked text node.' ); + $this->assertTrue( $processor->seek( 'text' ), "Should have sought back to the {$tag_name} text node." ); + $this->assertSame( + 'abc', + $processor->get_modifiable_text(), + "Should have replayed the leading-newline handling after seeking back to the {$tag_name} text node." + ); + } + + /** + * Ensures that skipped-newline state is shifted once when flushing multiple updates. + * + * @dataProvider data_token_names_ignoring_leading_newline + * + * @param string $tag_name Tag name whose first text node ignores a leading newline. + */ + public function test_bookmarks_leading_newline_text_after_flushing_prior_and_text_updates( string $tag_name ) { + $html_tag = strtolower( $tag_name ); + $processor = new WP_HTML_Tag_Processor( "<{$html_tag}>\nabc" ); + + $this->assertTrue( $processor->next_token(), "Should have found the {$tag_name} opener." ); + $this->assertSame( $tag_name, $processor->get_token_name(), "Should have found the {$tag_name} opener: check test setup." ); + $processor->add_class( 'x' ); + + $this->assertTrue( $processor->next_token(), 'Should have found the first text node.' ); + $this->assertSame( '#text', $processor->get_token_name(), 'Should have found the first text node: check test setup.' ); + $this->assertSame( 'abc', $processor->get_modifiable_text(), "Should have stripped the leading newline from the {$tag_name} text before flushing updates." ); + $this->assertTrue( $processor->set_modifiable_text( "\nxy" ), "Should have updated the {$tag_name} text node." ); + + $this->assertSame( + "<{$html_tag} class=\"x\">\nxy", + $processor->get_updated_html(), + "Should have applied the {$tag_name} opener and text updates." + ); + $this->assertSame( 'xy', $processor->get_modifiable_text(), "Should still strip the leading newline from the updated {$tag_name} text after flushing updates." ); + $this->assertTrue( $processor->set_bookmark( 'text' ), "Should have bookmarked the {$tag_name} text node." ); + + $this->assertTrue( $processor->next_token(), 'Should have advanced away from the bookmarked text node.' ); + $this->assertTrue( $processor->seek( 'text' ), "Should have sought back to the {$tag_name} text node." ); + $this->assertSame( + 'xy', + $processor->get_modifiable_text(), + "Should have replayed the leading-newline handling after seeking back to the updated {$tag_name} text node." + ); + } + + /** + * Ensures that inserted markup at the bookmarked text start cancels leading-newline handling. + * + * @dataProvider data_token_names_ignoring_leading_newline + * + * @param string $tag_name Tag name whose first text node ignores a leading newline. + */ + public function test_bookmarked_leading_newline_text_after_insertion_at_start( string $tag_name ) { + $html_tag = strtolower( $tag_name ); + $processor = new class( "<{$html_tag}>\nabc" ) extends WP_HTML_Tag_Processor { + /** + * Inserts HTML at the start of the given bookmark. + * + * @param string $bookmark_name Bookmark name. + * @param string $html HTML to insert. + */ + public function insert_html_at_bookmark_start( string $bookmark_name, string $html ): void { + $this->lexical_updates[] = new WP_HTML_Text_Replacement( + $this->bookmarks[ $bookmark_name ]->start, + 0, + $html + ); + } + }; + + $this->assertTrue( $processor->next_token(), "Should have found the {$tag_name} opener." ); + $this->assertSame( $tag_name, $processor->get_token_name(), "Should have found the {$tag_name} opener: check test setup." ); + + $this->assertTrue( $processor->next_token(), 'Should have found the first text node.' ); + $this->assertSame( '#text', $processor->get_token_name(), 'Should have found the first text node: check test setup.' ); + $this->assertSame( 'abc', $processor->get_modifiable_text(), "Should have stripped the leading newline from the {$tag_name} text on first traversal." ); + $this->assertTrue( $processor->set_bookmark( 'text' ), "Should have bookmarked the {$tag_name} text node." ); + + $processor->insert_html_at_bookmark_start( 'text', '' ); + $this->assertSame( + "<{$html_tag}>\nabc", + $processor->get_updated_html(), + "Should have inserted markup at the start of the {$tag_name} text node." + ); + + while ( $processor->next_token() && '#text' !== $processor->get_token_name() ) { + continue; + } + + $normal_scan_text = $processor->get_modifiable_text(); + $this->assertSame( "\nabc", $normal_scan_text, "Should not strip the newline after inserting markup before the {$tag_name} text." ); + + $this->assertTrue( $processor->next_token(), 'Should have advanced away from the bookmarked text node.' ); + $this->assertTrue( $processor->seek( 'text' ), "Should have sought back to the {$tag_name} text node." ); + $this->assertSame( + $normal_scan_text, + $processor->get_modifiable_text(), + "Should have replayed the same {$tag_name} text after seeking." + ); + } + + /** + * Ensures that a no-op update at the bookmarked text start preserves leading-newline handling. + * + * @dataProvider data_token_names_ignoring_leading_newline + * + * @param string $tag_name Tag name whose first text node ignores a leading newline. + */ + public function test_bookmarked_leading_newline_text_after_noop_at_start( string $tag_name ) { + $html_tag = strtolower( $tag_name ); + $processor = new class( "<{$html_tag}>\nabc" ) extends WP_HTML_Tag_Processor { + /** + * Enqueues a no-op update at the start of the given bookmark. + * + * @param string $bookmark_name Bookmark name. + */ + public function enqueue_noop_at_bookmark_start( string $bookmark_name ): void { + $this->lexical_updates[] = new WP_HTML_Text_Replacement( + $this->bookmarks[ $bookmark_name ]->start, + 0, + '' + ); + } + }; + + $this->assertTrue( $processor->next_token(), "Should have found the {$tag_name} opener." ); + $this->assertSame( $tag_name, $processor->get_token_name(), "Should have found the {$tag_name} opener: check test setup." ); + + $this->assertTrue( $processor->next_token(), 'Should have found the first text node.' ); + $this->assertSame( '#text', $processor->get_token_name(), 'Should have found the first text node: check test setup.' ); + $this->assertSame( 'abc', $processor->get_modifiable_text(), "Should have stripped the leading newline from the {$tag_name} text on first traversal." ); + $this->assertTrue( $processor->set_bookmark( 'text' ), "Should have bookmarked the {$tag_name} text node." ); + + $processor->enqueue_noop_at_bookmark_start( 'text' ); + $this->assertSame( + "<{$html_tag}>\nabc", + $processor->get_updated_html(), + 'Should not have changed the HTML.' + ); + $this->assertSame( 'abc', $processor->get_modifiable_text(), "Should still strip the leading newline from the {$tag_name} text after the no-op update." ); + + $this->assertTrue( $processor->next_token(), 'Should have advanced away from the bookmarked text node.' ); + $this->assertTrue( $processor->seek( 'text' ), "Should have sought back to the {$tag_name} text node." ); + $this->assertSame( + 'abc', + $processor->get_modifiable_text(), + "Should have replayed the leading-newline handling after seeking back to the {$tag_name} text node." + ); + } + /** * Data provider. * @@ -324,14 +579,10 @@ public function test_get_modifiable_text_ignores_newlines_after_seeking() { ); $processor->seek( 'listing' ); - if ( "\ngone" === $processor->get_modifiable_text() ) { - $this->markTestSkipped( "There's no support currently for handling the leading newline after seeking." ); - } - $this->assertSame( 'gone', $processor->get_modifiable_text(), - 'Should have remembered to remote leading newline from LISTING element after seeking around it.' + 'Should have remembered to remove the leading newline from LISTING element after seeking around it.' ); $processor->seek( 'div' );