Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 52 additions & 9 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,23 @@ class WP_HTML_Tag_Processor {
*/
private $skip_newline_at = null;

/**
* Indicates which bookmarks point to a token which immediately follows
* the opening tag of a LISTING or PRE element, where a leading newline
* should be ignored when reading modifiable text.
*
* When seeking to a bookmark, this state must be restored because it
* cannot be re-derived from the bookmarked location alone.
*
* @since 7.1.0
*
* @see WP_HTML_Tag_Processor::$skip_newline_at
* @see WP_HTML_Tag_Processor::seek()
*
* @var array<string, true>
*/
private $bookmark_skips_newline = array();

/**
* Constructor.
*
Expand Down Expand Up @@ -1358,6 +1375,12 @@ public function set_bookmark( $name ): bool {

$this->bookmarks[ $name ] = new WP_HTML_Span( $this->token_starts_at, $this->token_length );

if ( $this->token_starts_at === $this->skip_newline_at ) {
$this->bookmark_skips_newline[ $name ] = true;
} else {
unset( $this->bookmark_skips_newline[ $name ] );
}

return true;
}

Expand All @@ -1376,7 +1399,7 @@ public function release_bookmark( $name ): bool {
return false;
}

unset( $this->bookmarks[ $name ] );
unset( $this->bookmarks[ $name ], $this->bookmark_skips_newline[ $name ] );

return true;
}
Expand Down Expand Up @@ -2507,6 +2530,7 @@ private function class_name_updates_to_attributes_updates(): void {
* @since 6.2.0
* @since 6.2.1 Accumulates shift for internal cursor and passed pointer.
* @since 6.3.0 Invalidate any bookmarks whose targets are overwritten.
* @since 7.1.0 Accumulates shift for the ignored-newline position after LISTING and PRE opening tags.
* @ignore
*
* @param int $shift_this_point Accumulate and return shift for this position.
Expand All @@ -2517,7 +2541,8 @@ private function apply_attributes_updates( int $shift_this_point ): int {
return 0;
}

$accumulated_shift_for_given_point = 0;
$accumulated_shift_for_given_point = 0;
$accumulated_shift_for_skip_newline = 0;

/*
* Attribute updates can be enqueued in any order but updates
Expand All @@ -2541,6 +2566,11 @@ private function apply_attributes_updates( int $shift_this_point ): int {
$this->bytes_already_parsed += $shift;
}

// Accumulate shift of the ignored-newline position within this function call.
if ( null !== $this->skip_newline_at && $diff->start < $this->skip_newline_at ) {
$accumulated_shift_for_skip_newline += $shift;
}

// Accumulate shift of the given pointer within this function call.
if ( $diff->start < $shift_this_point ) {
$accumulated_shift_for_given_point += $shift;
Expand All @@ -2553,6 +2583,11 @@ private function apply_attributes_updates( int $shift_this_point ): int {

$this->html = $output_buffer . substr( $this->html, $bytes_already_copied );

// Adjust the ignored-newline position by however much the updates moved it.
if ( null !== $this->skip_newline_at ) {
$this->skip_newline_at += $accumulated_shift_for_skip_newline;
}

/*
* Adjust bookmark locations to account for how the text
* replacements adjust offsets in the input document.
Expand Down Expand Up @@ -2620,6 +2655,7 @@ public function has_bookmark( $bookmark_name ): bool {
* maximum limit on the number of times seek() can be called.
*
* @since 6.2.0
* @since 7.1.0 Restores the ignored-newline state for tokens following LISTING and PRE opening tags.
*
* @param string $bookmark_name Jump to the place in the document identified by this bookmark name.
* @return bool Whether the internal cursor was successfully moved to the bookmark's location.
Expand Down Expand Up @@ -2658,6 +2694,18 @@ public function seek( $bookmark_name ): bool {
// Point this tag processor before the sought tag opener and consume it.
$this->bytes_already_parsed = $this->bookmarks[ $bookmark_name ]->start;
$this->parser_state = self::STATE_READY;

/*
* The leading newline after a LISTING or PRE opening tag is ignored
* as an authoring convenience. This state is set when scanning past
* one of these opening tags, but a later LISTING or PRE tag may have
* overwritten it; it must be restored from the bookmark when seeking
* to a token which immediately follows such an opening tag.
*/
$this->skip_newline_at = isset( $this->bookmark_skips_newline[ $bookmark_name ] )
? $this->bytes_already_parsed
: null;

return $this->next_token();
}

Expand Down Expand Up @@ -3642,15 +3690,10 @@ public function subdivide_text_appropriately(): bool {
* that a token has modifiable text, and a token with modifiable text may
* have an empty string (e.g. a comment with no contents).
*
* Limitations:
*
* - This function will not strip the leading newline appropriately
* after seeking into a LISTING or PRE element. To ensure that the
* newline is treated properly, seek to the LISTING or PRE opening
* tag instead of to the first text node inside the element.
*
* @since 6.5.0
* @since 6.7.0 Replaces NULL bytes (U+0000) and newlines appropriately.
* @since 7.1.0 Ignores the leading newline after LISTING and PRE opening tags even after
* seeking or applying enqueued updates.
*
* @return string
*/
Expand Down
191 changes: 186 additions & 5 deletions tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,8 @@ public function test_modifiable_text_reads_updates_after_setting() {
/**
* Ensures that when ignoring a newline after LISTING and PRE tags, that this
* happens appropriately after seeking.
*
* @ticket 65372
*/
public function test_get_modifiable_text_ignores_newlines_after_seeking() {
$processor = new WP_HTML_Tag_Processor(
Expand Down Expand Up @@ -324,14 +326,10 @@ public function test_get_modifiable_text_ignores_newlines_after_seeking() {
);

$processor->seek( 'listing' );
if ( "\ngone" === $processor->get_modifiable_text() ) {
$this->markTestSkipped( "There's no support currently for handling the leading newline after seeking." );
}

$this->assertSame(
'gone',
$processor->get_modifiable_text(),
'Should have remembered to remote leading newline from LISTING element after seeking around it.'
'Should have remembered to remove the leading newline from LISTING element after seeking around it.'
);

$processor->seek( 'div' );
Expand All @@ -342,6 +340,189 @@ public function test_get_modifiable_text_ignores_newlines_after_seeking() {
);
}

/**
* Ensures that seeking directly to a text node immediately following a PRE
* or LISTING opener continues to ignore its leading newline, even after
* passing another PRE or LISTING tag before seeking.
*
* @ticket 65372
*
* @covers WP_HTML_Tag_Processor::seek
* @covers WP_HTML_Tag_Processor::get_modifiable_text
*
* @dataProvider data_pre_and_listing_tags
*
* @param string $tag_name Tag name of the element which ignores a leading newline.
*/
public function test_get_modifiable_text_ignores_leading_newline_after_seeking_directly_to_text( string $tag_name ) {
$tag = strtolower( $tag_name );
$processor = new WP_HTML_Tag_Processor( "<{$tag}>\n \n\n><{$tag}>" );

$this->assertTrue( $processor->next_token(), 'Failed to find the first tag: check test setup.' );
$this->assertSame( $tag_name, $processor->get_token_name(), 'Failed to find the first tag: check test setup.' );

$this->assertTrue( $processor->next_token(), 'Failed to find the text node: check test setup.' );
$this->assertSame( '#text', $processor->get_token_name(), 'Failed to find the text node: check test setup.' );

$before_seeking = $processor->get_modifiable_text();
$this->assertSame( " \n\n>", $before_seeking, 'Should have ignored the leading newline on the first traversal.' );
$this->assertTrue( $processor->set_bookmark( 'text' ), 'Failed to set a bookmark on the text node: check test setup.' );

$this->assertTrue( $processor->next_token(), 'Failed to find the second tag: check test setup.' );
$this->assertSame( $tag_name, $processor->get_token_name(), 'Failed to find the second tag: check test setup.' );

$this->assertTrue( $processor->seek( 'text' ), 'Failed to seek to the bookmarked text node.' );
$this->assertSame( $before_seeking, $processor->get_modifiable_text(), 'Should have ignored the leading newline after seeking back to the text node.' );
}

/**
* Data provider.
*
* @return array[]
*/
public static function data_pre_and_listing_tags() {
return array(
'PRE' => array( 'PRE' ),
'LISTING' => array( 'LISTING' ),
);
}

/**
* Ensures that reading the text node immediately following a PRE or
* LISTING opener continues to ignore its leading newline after enqueued
* updates have been applied and document offsets have shifted.
*
* @ticket 65372
*
* @covers WP_HTML_Tag_Processor::get_updated_html
* @covers WP_HTML_Tag_Processor::get_modifiable_text
*
* @dataProvider data_pre_and_listing_tags
*
* @param string $tag_name Tag name of the element which ignores a leading newline.
*/
public function test_get_modifiable_text_ignores_leading_newline_after_applying_updates( string $tag_name ) {
$tag = strtolower( $tag_name );
$processor = new WP_HTML_Tag_Processor( "<{$tag} class=\"pad\">\nfoo" );

$this->assertTrue( $processor->next_token(), 'Failed to find the tag: check test setup.' );
$this->assertSame( $tag_name, $processor->get_token_name(), 'Failed to find the tag: check test setup.' );
$processor->remove_attribute( 'class' );

$this->assertTrue( $processor->next_token(), 'Failed to find the text node: check test setup.' );
$this->assertSame( '#text', $processor->get_token_name(), 'Failed to find the text node: check test setup.' );

$before_applying = $processor->get_modifiable_text();
$this->assertSame( 'foo', $before_applying, 'Should have ignored the leading newline before applying updates.' );

$processor->get_updated_html();
$this->assertSame( $before_applying, $processor->get_modifiable_text(), 'Should have ignored the leading newline after applying updates.' );
}

/**
* Ensures that reading the text node immediately following a PRE or
* LISTING opener continues to ignore its leading newline after applying
* multiple enqueued updates of different sizes in a single pass.
*
* @ticket 65372
*
* @covers WP_HTML_Tag_Processor::get_updated_html
* @covers WP_HTML_Tag_Processor::get_modifiable_text
*
* @dataProvider data_pre_and_listing_tags
*
* @param string $tag_name Tag name of the element which ignores a leading newline.
*/
public function test_get_modifiable_text_ignores_leading_newline_after_applying_multiple_updates( string $tag_name ) {
$tag = strtolower( $tag_name );
$processor = new WP_HTML_Tag_Processor( "<div class=\"aaaaaaaaaaaaaaaaaaaa\">x</div><{$tag} aaaaaaaaaaaaaaaaaaaaaaaa=\"x\" b=\"y\">\nfoo" );

$this->assertTrue( $processor->next_tag( 'DIV' ), 'Failed to find the DIV: check test setup.' );
$processor->remove_attribute( 'class' );

$this->assertTrue( $processor->next_tag( $tag_name ), 'Failed to find the tag: check test setup.' );
$processor->remove_attribute( 'aaaaaaaaaaaaaaaaaaaaaaaa' );
$processor->remove_attribute( 'b' );

$this->assertTrue( $processor->next_token(), 'Failed to find the text node: check test setup.' );
$this->assertSame( '#text', $processor->get_token_name(), 'Failed to find the text node: check test setup.' );

$before_applying = $processor->get_modifiable_text();
$this->assertSame( 'foo', $before_applying, 'Should have ignored the leading newline before applying updates.' );

$processor->get_updated_html();
$this->assertSame( $before_applying, $processor->get_modifiable_text(), 'Should have ignored the leading newline after applying updates.' );
}

/**
* Ensures that the text node immediately following a PRE or LISTING opener
* continues to ignore its leading newline after applying an attribute
* update on the opener together with a replacement of the text itself.
*
* @ticket 65372
*
* @covers WP_HTML_Tag_Processor::get_updated_html
* @covers WP_HTML_Tag_Processor::get_modifiable_text
*
* @dataProvider data_pre_and_listing_tags
*
* @param string $tag_name Tag name of the element which ignores a leading newline.
*/
public function test_get_modifiable_text_ignores_leading_newline_after_growing_opener_and_replacing_text( string $tag_name ) {
$tag = strtolower( $tag_name );
$processor = new WP_HTML_Tag_Processor( "<{$tag}>\nfoo" );

$this->assertTrue( $processor->next_token(), 'Failed to find the tag: check test setup.' );
$this->assertSame( $tag_name, $processor->get_token_name(), 'Failed to find the tag: check test setup.' );
$processor->set_attribute( 'class', 'wide' );

$this->assertTrue( $processor->next_token(), 'Failed to find the text node: check test setup.' );
$this->assertSame( '#text', $processor->get_token_name(), 'Failed to find the text node: check test setup.' );
$this->assertTrue( $processor->set_modifiable_text( "\nlonger" ), 'Failed to replace the modifiable text: check test setup.' );

$before_applying = $processor->get_modifiable_text();
$this->assertSame( 'longer', $before_applying, 'Should have ignored the leading newline before applying updates.' );

$processor->get_updated_html();
$this->assertSame( $before_applying, $processor->get_modifiable_text(), 'Should have ignored the leading newline after applying updates.' );
}

/**
* Ensures that seeking directly to a text node immediately following a PRE
* or LISTING opener continues to ignore its leading newline when document
* offsets have shifted from applying enqueued attribute updates.
*
* @ticket 65372
*
* @covers WP_HTML_Tag_Processor::seek
* @covers WP_HTML_Tag_Processor::get_modifiable_text
*
* @dataProvider data_pre_and_listing_tags
*
* @param string $tag_name Tag name of the element which ignores a leading newline.
*/
public function test_get_modifiable_text_ignores_leading_newline_after_seeking_when_offsets_have_shifted( string $tag_name ) {
$tag = strtolower( $tag_name );
$processor = new WP_HTML_Tag_Processor( "<{$tag} class=\"pad\">\nfoo<{$tag}>" );

$this->assertTrue( $processor->next_token(), 'Failed to find the first tag: check test setup.' );
$this->assertSame( $tag_name, $processor->get_token_name(), 'Failed to find the first tag: check test setup.' );
$processor->remove_attribute( 'class' );

$this->assertTrue( $processor->next_token(), 'Failed to find the text node: check test setup.' );
$this->assertSame( '#text', $processor->get_token_name(), 'Failed to find the text node: check test setup.' );

$before_seeking = $processor->get_modifiable_text();
$this->assertSame( 'foo', $before_seeking, 'Should have ignored the leading newline on the first traversal.' );
$this->assertTrue( $processor->set_bookmark( 'text' ), 'Failed to set a bookmark on the text node: check test setup.' );

$this->assertTrue( $processor->next_token(), 'Failed to find the second tag: check test setup.' );
$this->assertSame( $tag_name, $processor->get_token_name(), 'Failed to find the second tag: check test setup.' );

$this->assertTrue( $processor->seek( 'text' ), 'Failed to seek to the bookmarked text node.' );
$this->assertSame( $before_seeking, $processor->get_modifiable_text(), 'Should have ignored the leading newline after seeking back to the text node.' );
}

/**
* Ensures that modifiable text updates are not applied where they aren't supported.
*
Expand Down
Loading