From 63ac0ac7300b271d8f186035c8a3f4996414750b Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 12 Jun 2026 17:45:27 +0200 Subject: [PATCH 1/2] HTML API: Ignore leading newline after seeking to text following PRE. The first newline after LISTING and PRE opening tags is ignored as an authoring convenience. The Tag Processor tracks where such a newline would be found and ignores it when reading modifiable text, but this single tracked offset is overwritten upon scanning past any later LISTING or PRE opening tag. Seeking directly to a bookmarked text node restored the cursor without restoring that state, so the same token could report different modifiable text on a repeat visit: $processor = new WP_HTML_Tag_Processor( "
\nX
" );
    $processor->next_token();                  // PRE.
    $processor->next_token();                  // #text.
    $processor->get_modifiable_text();         // 'X'
    $processor->set_bookmark( 'text' );
    $processor->next_token();                  // Second PRE clobbers state.
    $processor->seek( 'text' );
    $processor->get_modifiable_text();         // "\nX" - wrong!

Resolve this by recording, for each bookmark, whether its token
immediately follows a LISTING or PRE opening tag, and restoring the
ignored-newline state from the bookmark when seeking. Because the
recorded state is keyed to the bookmark, it shifts together with the
bookmark when applied document updates move token offsets.

This also prevents a stale tracked offset from colliding with an
unrelated token's post-update location, which could previously strip
a newline that should have been preserved.

Removes the corresponding documented limitation on
get_modifiable_text() and the skipped-test guard which covered it.

See #65372.
---
 .../html-api/class-wp-html-tag-processor.php  | 46 ++++++++--
 .../wpHtmlTagProcessorModifiableText.php      | 91 ++++++++++++++++++-
 2 files changed, 124 insertions(+), 13 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 501a623afb10b..96780871752cc 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -826,6 +826,23 @@ class WP_HTML_Tag_Processor {
 	 */
 	private $skip_newline_at = null;
 
+	/**
+	 * Indicates which bookmarks point to a token which immediately follows
+	 * the opening tag of a LISTING or PRE element, where a leading newline
+	 * should be ignored when reading modifiable text.
+	 *
+	 * When seeking to a bookmark, this state must be restored because it
+	 * cannot be re-derived from the bookmarked location alone.
+	 *
+	 * @since 7.1.0
+	 *
+	 * @see WP_HTML_Tag_Processor::$skip_newline_at
+	 * @see WP_HTML_Tag_Processor::seek()
+	 *
+	 * @var array
+	 */
+	private $bookmark_skips_newline = array();
+
 	/**
 	 * Constructor.
 	 *
@@ -1358,6 +1375,12 @@ public function set_bookmark( $name ): bool {
 
 		$this->bookmarks[ $name ] = new WP_HTML_Span( $this->token_starts_at, $this->token_length );
 
+		if ( $this->token_starts_at === $this->skip_newline_at ) {
+			$this->bookmark_skips_newline[ $name ] = true;
+		} else {
+			unset( $this->bookmark_skips_newline[ $name ] );
+		}
+
 		return true;
 	}
 
@@ -1376,7 +1399,7 @@ public function release_bookmark( $name ): bool {
 			return false;
 		}
 
-		unset( $this->bookmarks[ $name ] );
+		unset( $this->bookmarks[ $name ], $this->bookmark_skips_newline[ $name ] );
 
 		return true;
 	}
@@ -2620,6 +2643,7 @@ public function has_bookmark( $bookmark_name ): bool {
 	 * maximum limit on the number of times seek() can be called.
 	 *
 	 * @since 6.2.0
+	 * @since 7.1.0 Restores the ignored-newline state for tokens following LISTING and PRE opening tags.
 	 *
 	 * @param string $bookmark_name Jump to the place in the document identified by this bookmark name.
 	 * @return bool Whether the internal cursor was successfully moved to the bookmark's location.
@@ -2658,6 +2682,18 @@ public function seek( $bookmark_name ): bool {
 		// Point this tag processor before the sought tag opener and consume it.
 		$this->bytes_already_parsed = $this->bookmarks[ $bookmark_name ]->start;
 		$this->parser_state         = self::STATE_READY;
+
+		/*
+		 * The leading newline after a LISTING or PRE opening tag is ignored
+		 * as an authoring convenience. This state is set when scanning past
+		 * one of these opening tags, but a later LISTING or PRE tag may have
+		 * overwritten it; it must be restored from the bookmark when seeking
+		 * to a token which immediately follows such an opening tag.
+		 */
+		$this->skip_newline_at = isset( $this->bookmark_skips_newline[ $bookmark_name ] )
+			? $this->bytes_already_parsed
+			: null;
+
 		return $this->next_token();
 	}
 
@@ -3642,15 +3678,9 @@ public function subdivide_text_appropriately(): bool {
 	 * that a token has modifiable text, and a token with modifiable text may
 	 * have an empty string (e.g. a comment with no contents).
 	 *
-	 * Limitations:
-	 *
-	 *  - This function will not strip the leading newline appropriately
-	 *    after seeking into a LISTING or PRE element. To ensure that the
-	 *    newline is treated properly, seek to the LISTING or PRE opening
-	 *    tag instead of to the first text node inside the element.
-	 *
 	 * @since 6.5.0
 	 * @since 6.7.0 Replaces NULL bytes (U+0000) and newlines appropriately.
+	 * @since 7.1.0 Ignores the leading newline after LISTING and PRE opening tags even after seeking.
 	 *
 	 * @return string
 	 */
diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php
index f43d1fffaad0e..27c0c17ebb127 100644
--- a/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php
+++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php
@@ -275,6 +275,8 @@ public function test_modifiable_text_reads_updates_after_setting() {
 	/**
 	 * Ensures that when ignoring a newline after LISTING and PRE tags, that this
 	 * happens appropriately after seeking.
+	 *
+	 * @ticket 65372
 	 */
 	public function test_get_modifiable_text_ignores_newlines_after_seeking() {
 		$processor = new WP_HTML_Tag_Processor(
@@ -324,14 +326,10 @@ public function test_get_modifiable_text_ignores_newlines_after_seeking() {
 		);
 
 		$processor->seek( 'listing' );
-		if ( "\ngone" === $processor->get_modifiable_text() ) {
-			$this->markTestSkipped( "There's no support currently for handling the leading newline after seeking." );
-		}
-
 		$this->assertSame(
 			'gone',
 			$processor->get_modifiable_text(),
-			'Should have remembered to remote leading newline from LISTING element after seeking around it.'
+			'Should have remembered to remove the leading newline from LISTING element after seeking around it.'
 		);
 
 		$processor->seek( 'div' );
@@ -342,6 +340,89 @@ public function test_get_modifiable_text_ignores_newlines_after_seeking() {
 		);
 	}
 
+	/**
+	 * Ensures that seeking directly to a text node immediately following a PRE
+	 * or LISTING opener continues to ignore its leading newline, even after
+	 * passing another PRE or LISTING tag before seeking.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers WP_HTML_Tag_Processor::seek
+	 * @covers WP_HTML_Tag_Processor::get_modifiable_text
+	 *
+	 * @dataProvider data_pre_and_listing_tags
+	 *
+	 * @param string $tag_name Tag name of the element which ignores a leading newline.
+	 */
+	public function test_get_modifiable_text_ignores_leading_newline_after_seeking_directly_to_text( string $tag_name ) {
+		$tag       = strtolower( $tag_name );
+		$processor = new WP_HTML_Tag_Processor( "<{$tag}>\n \n\n><{$tag}>" );
+
+		$this->assertTrue( $processor->next_token(), 'Failed to find the first tag: check test setup.' );
+		$this->assertSame( $tag_name, $processor->get_token_name(), 'Failed to find the first tag: check test setup.' );
+
+		$this->assertTrue( $processor->next_token(), 'Failed to find the text node: check test setup.' );
+		$this->assertSame( '#text', $processor->get_token_name(), 'Failed to find the text node: check test setup.' );
+
+		$before_seeking = $processor->get_modifiable_text();
+		$this->assertSame( " \n\n>", $before_seeking, 'Should have ignored the leading newline on the first traversal.' );
+		$this->assertTrue( $processor->set_bookmark( 'text' ), 'Failed to set a bookmark on the text node: check test setup.' );
+
+		$this->assertTrue( $processor->next_token(), 'Failed to find the second tag: check test setup.' );
+		$this->assertSame( $tag_name, $processor->get_token_name(), 'Failed to find the second tag: check test setup.' );
+
+		$this->assertTrue( $processor->seek( 'text' ), 'Failed to seek to the bookmarked text node.' );
+		$this->assertSame( $before_seeking, $processor->get_modifiable_text(), 'Should have ignored the leading newline after seeking back to the text node.' );
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_pre_and_listing_tags() {
+		return array(
+			'PRE'     => array( 'PRE' ),
+			'LISTING' => array( 'LISTING' ),
+		);
+	}
+
+	/**
+	 * Ensures that seeking directly to a text node immediately following a PRE
+	 * or LISTING opener continues to ignore its leading newline when document
+	 * offsets have shifted from applying enqueued attribute updates.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers WP_HTML_Tag_Processor::seek
+	 * @covers WP_HTML_Tag_Processor::get_modifiable_text
+	 *
+	 * @dataProvider data_pre_and_listing_tags
+	 *
+	 * @param string $tag_name Tag name of the element which ignores a leading newline.
+	 */
+	public function test_get_modifiable_text_ignores_leading_newline_after_seeking_when_offsets_have_shifted( string $tag_name ) {
+		$tag       = strtolower( $tag_name );
+		$processor = new WP_HTML_Tag_Processor( "<{$tag} class=\"pad\">\nfoo<{$tag}>" );
+
+		$this->assertTrue( $processor->next_token(), 'Failed to find the first tag: check test setup.' );
+		$this->assertSame( $tag_name, $processor->get_token_name(), 'Failed to find the first tag: check test setup.' );
+		$processor->remove_attribute( 'class' );
+
+		$this->assertTrue( $processor->next_token(), 'Failed to find the text node: check test setup.' );
+		$this->assertSame( '#text', $processor->get_token_name(), 'Failed to find the text node: check test setup.' );
+
+		$before_seeking = $processor->get_modifiable_text();
+		$this->assertSame( 'foo', $before_seeking, 'Should have ignored the leading newline on the first traversal.' );
+		$this->assertTrue( $processor->set_bookmark( 'text' ), 'Failed to set a bookmark on the text node: check test setup.' );
+
+		$this->assertTrue( $processor->next_token(), 'Failed to find the second tag: check test setup.' );
+		$this->assertSame( $tag_name, $processor->get_token_name(), 'Failed to find the second tag: check test setup.' );
+
+		$this->assertTrue( $processor->seek( 'text' ), 'Failed to seek to the bookmarked text node.' );
+		$this->assertSame( $before_seeking, $processor->get_modifiable_text(), 'Should have ignored the leading newline after seeking back to the text node.' );
+	}
+
 	/**
 	 * Ensures that modifiable text updates are not applied where they aren't supported.
 	 *

From 613bb18a4b9f15ec8eae68be4e25c09d6366347e Mon Sep 17 00:00:00 2001
From: Jon Surrell 
Date: Fri, 12 Jun 2026 17:58:11 +0200
Subject: [PATCH 2/2] HTML API: Shift ignored-newline position when applying
 updates.

The position where an ignorable newline may follow a LISTING or PRE
opening tag was never adjusted when applying enqueued updates, even
though those updates shift the offsets of everything after them in
the document. Applying updates while paused on the text node following
one of these tags changed what get_modifiable_text() returned:

    $processor = new WP_HTML_Tag_Processor( "
\nfoo" );
    $processor->next_token();                  // PRE.
    $processor->remove_attribute( 'class' );
    $processor->next_token();                  // #text.
    $processor->get_modifiable_text();         // 'foo'
    $processor->get_updated_html();
    $processor->get_modifiable_text();         // "\nfoo" - wrong!

A stale position also poisoned the ignored-newline state recorded for
bookmarks set after applying updates, which could reintroduce the
inconsistency through seek().

Resolve this by accumulating, against the original document
coordinates, the shift from every update located before the ignored-
newline position, and applying the sum once after the updates are
written - the same pattern used for shifting a given pointer. Comparing
against original coordinates matters: adjusting the position inside the
update loop would compare earlier-shifted values against later updates'
unshifted offsets and apply the wrong total shift when multiple updates
are applied together.

Follow-up to [63ac0ac730].

See #65372.
---
 .../html-api/class-wp-html-tag-processor.php  |  17 ++-
 .../wpHtmlTagProcessorModifiableText.php      | 100 ++++++++++++++++++
 2 files changed, 115 insertions(+), 2 deletions(-)

diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 96780871752cc..7f7806735460d 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -2530,6 +2530,7 @@ private function class_name_updates_to_attributes_updates(): void {
 	 * @since 6.2.0
 	 * @since 6.2.1 Accumulates shift for internal cursor and passed pointer.
 	 * @since 6.3.0 Invalidate any bookmarks whose targets are overwritten.
+	 * @since 7.1.0 Accumulates shift for the ignored-newline position after LISTING and PRE opening tags.
 	 * @ignore
 	 *
 	 * @param int $shift_this_point Accumulate and return shift for this position.
@@ -2540,7 +2541,8 @@ private function apply_attributes_updates( int $shift_this_point ): int {
 			return 0;
 		}
 
-		$accumulated_shift_for_given_point = 0;
+		$accumulated_shift_for_given_point  = 0;
+		$accumulated_shift_for_skip_newline = 0;
 
 		/*
 		 * Attribute updates can be enqueued in any order but updates
@@ -2564,6 +2566,11 @@ private function apply_attributes_updates( int $shift_this_point ): int {
 				$this->bytes_already_parsed += $shift;
 			}
 
+			// Accumulate shift of the ignored-newline position within this function call.
+			if ( null !== $this->skip_newline_at && $diff->start < $this->skip_newline_at ) {
+				$accumulated_shift_for_skip_newline += $shift;
+			}
+
 			// Accumulate shift of the given pointer within this function call.
 			if ( $diff->start < $shift_this_point ) {
 				$accumulated_shift_for_given_point += $shift;
@@ -2576,6 +2583,11 @@ private function apply_attributes_updates( int $shift_this_point ): int {
 
 		$this->html = $output_buffer . substr( $this->html, $bytes_already_copied );
 
+		// Adjust the ignored-newline position by however much the updates moved it.
+		if ( null !== $this->skip_newline_at ) {
+			$this->skip_newline_at += $accumulated_shift_for_skip_newline;
+		}
+
 		/*
 		 * Adjust bookmark locations to account for how the text
 		 * replacements adjust offsets in the input document.
@@ -3680,7 +3692,8 @@ public function subdivide_text_appropriately(): bool {
 	 *
 	 * @since 6.5.0
 	 * @since 6.7.0 Replaces NULL bytes (U+0000) and newlines appropriately.
-	 * @since 7.1.0 Ignores the leading newline after LISTING and PRE opening tags even after seeking.
+	 * @since 7.1.0 Ignores the leading newline after LISTING and PRE opening tags even after
+	 *              seeking or applying enqueued updates.
 	 *
 	 * @return string
 	 */
diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php
index 27c0c17ebb127..09aaa59e48828 100644
--- a/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php
+++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php
@@ -387,6 +387,106 @@ public static function data_pre_and_listing_tags() {
 		);
 	}
 
+	/**
+	 * Ensures that reading the text node immediately following a PRE or
+	 * LISTING opener continues to ignore its leading newline after enqueued
+	 * updates have been applied and document offsets have shifted.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers WP_HTML_Tag_Processor::get_updated_html
+	 * @covers WP_HTML_Tag_Processor::get_modifiable_text
+	 *
+	 * @dataProvider data_pre_and_listing_tags
+	 *
+	 * @param string $tag_name Tag name of the element which ignores a leading newline.
+	 */
+	public function test_get_modifiable_text_ignores_leading_newline_after_applying_updates( string $tag_name ) {
+		$tag       = strtolower( $tag_name );
+		$processor = new WP_HTML_Tag_Processor( "<{$tag} class=\"pad\">\nfoo" );
+
+		$this->assertTrue( $processor->next_token(), 'Failed to find the tag: check test setup.' );
+		$this->assertSame( $tag_name, $processor->get_token_name(), 'Failed to find the tag: check test setup.' );
+		$processor->remove_attribute( 'class' );
+
+		$this->assertTrue( $processor->next_token(), 'Failed to find the text node: check test setup.' );
+		$this->assertSame( '#text', $processor->get_token_name(), 'Failed to find the text node: check test setup.' );
+
+		$before_applying = $processor->get_modifiable_text();
+		$this->assertSame( 'foo', $before_applying, 'Should have ignored the leading newline before applying updates.' );
+
+		$processor->get_updated_html();
+		$this->assertSame( $before_applying, $processor->get_modifiable_text(), 'Should have ignored the leading newline after applying updates.' );
+	}
+
+	/**
+	 * Ensures that reading the text node immediately following a PRE or
+	 * LISTING opener continues to ignore its leading newline after applying
+	 * multiple enqueued updates of different sizes in a single pass.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers WP_HTML_Tag_Processor::get_updated_html
+	 * @covers WP_HTML_Tag_Processor::get_modifiable_text
+	 *
+	 * @dataProvider data_pre_and_listing_tags
+	 *
+	 * @param string $tag_name Tag name of the element which ignores a leading newline.
+	 */
+	public function test_get_modifiable_text_ignores_leading_newline_after_applying_multiple_updates( string $tag_name ) {
+		$tag       = strtolower( $tag_name );
+		$processor = new WP_HTML_Tag_Processor( "
x
<{$tag} aaaaaaaaaaaaaaaaaaaaaaaa=\"x\" b=\"y\">\nfoo" ); + + $this->assertTrue( $processor->next_tag( 'DIV' ), 'Failed to find the DIV: check test setup.' ); + $processor->remove_attribute( 'class' ); + + $this->assertTrue( $processor->next_tag( $tag_name ), 'Failed to find the tag: check test setup.' ); + $processor->remove_attribute( 'aaaaaaaaaaaaaaaaaaaaaaaa' ); + $processor->remove_attribute( 'b' ); + + $this->assertTrue( $processor->next_token(), 'Failed to find the text node: check test setup.' ); + $this->assertSame( '#text', $processor->get_token_name(), 'Failed to find the text node: check test setup.' ); + + $before_applying = $processor->get_modifiable_text(); + $this->assertSame( 'foo', $before_applying, 'Should have ignored the leading newline before applying updates.' ); + + $processor->get_updated_html(); + $this->assertSame( $before_applying, $processor->get_modifiable_text(), 'Should have ignored the leading newline after applying updates.' ); + } + + /** + * Ensures that the text node immediately following a PRE or LISTING opener + * continues to ignore its leading newline after applying an attribute + * update on the opener together with a replacement of the text itself. + * + * @ticket 65372 + * + * @covers WP_HTML_Tag_Processor::get_updated_html + * @covers WP_HTML_Tag_Processor::get_modifiable_text + * + * @dataProvider data_pre_and_listing_tags + * + * @param string $tag_name Tag name of the element which ignores a leading newline. + */ + public function test_get_modifiable_text_ignores_leading_newline_after_growing_opener_and_replacing_text( string $tag_name ) { + $tag = strtolower( $tag_name ); + $processor = new WP_HTML_Tag_Processor( "<{$tag}>\nfoo" ); + + $this->assertTrue( $processor->next_token(), 'Failed to find the tag: check test setup.' ); + $this->assertSame( $tag_name, $processor->get_token_name(), 'Failed to find the tag: check test setup.' ); + $processor->set_attribute( 'class', 'wide' ); + + $this->assertTrue( $processor->next_token(), 'Failed to find the text node: check test setup.' ); + $this->assertSame( '#text', $processor->get_token_name(), 'Failed to find the text node: check test setup.' ); + $this->assertTrue( $processor->set_modifiable_text( "\nlonger" ), 'Failed to replace the modifiable text: check test setup.' ); + + $before_applying = $processor->get_modifiable_text(); + $this->assertSame( 'longer', $before_applying, 'Should have ignored the leading newline before applying updates.' ); + + $processor->get_updated_html(); + $this->assertSame( $before_applying, $processor->get_modifiable_text(), 'Should have ignored the leading newline after applying updates.' ); + } + /** * Ensures that seeking directly to a text node immediately following a PRE * or LISTING opener continues to ignore its leading newline when document