Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions src/wp-includes/html-api/class-wp-html-decoder.php
Original file line number Diff line number Diff line change
Expand Up @@ -378,12 +378,14 @@ public static function read_character_reference( $context, $text, $at = 0, &$mat
* character reference table but the match doesn't end in `;`.
* It may be allowed if it's followed by something unambiguous.
*/
$follower_byte = $after_name < $length ? ord( $text[ $after_name ] ) : null;
$ambiguous_follower = (
$after_name < $length &&
$name_at < $length &&
null !== $follower_byte &&
(
ctype_alnum( $text[ $after_name ] ) ||
'=' === $text[ $after_name ]
( $follower_byte >= 0x30 && $follower_byte <= 0x39 ) ||
( $follower_byte >= 0x41 && $follower_byte <= 0x5A ) ||
( $follower_byte >= 0x61 && $follower_byte <= 0x7A ) ||
0x3D === $follower_byte
)
);

Expand Down
74 changes: 74 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlDecoder.php
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,80 @@
$this->assertSame( "&\x00b", $decoded, 'Should have decoded the text without changing it.' );
}

/**
* Ensures semicolonless legacy references decode before non-ASCII UTF-8 bytes in attributes.
*/
public function test_semicolonless_legacy_reference_before_multibyte_attribute_follower() {
$previous_locale = setlocale( LC_CTYPE, 0 );
$affected_locale = setlocale( LC_CTYPE, 'C.UTF-8', 'en_US.UTF-8', 'de_DE.UTF-8', 'fr_FR.UTF-8' );

if ( false === $affected_locale || ! ctype_alnum( "\xC2" ) ) {
if ( false !== $previous_locale ) {
setlocale( LC_CTYPE, $previous_locale );
}

$this->markTestSkipped( 'Requires an LC_CTYPE locale where ctype_alnum() classifies high-bit bytes as alphanumeric.' );
}

$raw_attribute = "&Aacute\xC2\x80";

try {
$this->assertSame(
"\xC3\x81\xC2\x80",
WP_HTML_Decoder::decode_attribute( $raw_attribute ),
'Should have decoded the semicolonless legacy reference before a multibyte follower.'
);

$match_byte_length = null;
$this->assertSame(
"\xC3\x81",
WP_HTML_Decoder::read_character_reference( 'attribute', $raw_attribute, 0, $match_byte_length ),
'Should have matched the semicolonless legacy reference before a multibyte follower.'
);
$this->assertSame( strlen( '&Aacute' ), $match_byte_length );
} finally {
if ( false !== $previous_locale ) {
setlocale( LC_CTYPE, $previous_locale );
}
}
}

/**
* Ensures semicolonless legacy references remain ambiguous before ASCII alnum or equals.
*
* @dataProvider data_ambiguous_ascii_attribute_followers
*
* @param string $raw_attribute Raw attribute value with an ambiguous legacy reference follower.
*/
public function test_semicolonless_legacy_reference_before_ascii_attribute_follower_is_ambiguous( $raw_attribute ) {
$this->assertSame(
$raw_attribute,
WP_HTML_Decoder::decode_attribute( $raw_attribute ),
'Should not have decoded an ambiguous semicolonless legacy reference.'
);

$match_byte_length = 'sentinel';
$this->assertNull(
WP_HTML_Decoder::read_character_reference( 'attribute', $raw_attribute, 0, $match_byte_length ),
'Should not have matched an ambiguous semicolonless legacy reference.'
);
$this->assertSame( 'sentinel', $match_byte_length );
}

/**
* Data provider.
*
* @return array[].
*/
public static function data_ambiguous_ascii_attribute_followers() {
return array(
'ASCII digit' => array( '&Aacute0' ),

Check warning on line 131 in tests/phpunit/tests/html-api/wpHtmlDecoder.php

View workflow job for this annotation

GitHub Actions / Coding standards / PHP checks

Array double arrow not aligned correctly; expected 11 space(s) between "'ASCII digit'" and double arrow, but found 1.
'ASCII uppercase alpha' => array( '&AacuteA' ),
'ASCII lowercase alpha' => array( '&Aacutea' ),
'equals' => array( '&Aacute=' ),

Check warning on line 134 in tests/phpunit/tests/html-api/wpHtmlDecoder.php

View workflow job for this annotation

GitHub Actions / Coding standards / PHP checks

Array double arrow not aligned correctly; expected 16 space(s) between "'equals'" and double arrow, but found 1.
);
}

/**
* Ensures proper detection of attribute prefixes ignoring ASCII case.
*
Expand Down
Loading