From f681caf17a07157377cb14d4e494fbfdd50d6b99 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 10 Jun 2026 22:45:03 +0200 Subject: [PATCH] Fix decoded HTML attribute prefix matching --- src/wp-includes/html-api/class-wp-html-decoder.php | 14 ++++++++++---- tests/phpunit/tests/html-api/wpHtmlDecoder.php | 5 +++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-decoder.php b/src/wp-includes/html-api/class-wp-html-decoder.php index d14009d3d9fb8..439ab83190d63 100644 --- a/src/wp-includes/html-api/class-wp-html-decoder.php +++ b/src/wp-includes/html-api/class-wp-html-decoder.php @@ -60,17 +60,23 @@ public static function attribute_starts_with( $haystack, $search_text, $case_sen continue; } - // If there is a character reference, then the decoded value must exactly match what follows in the search string. - if ( 0 !== substr_compare( $search_text, $next_chunk, $search_at, strlen( $next_chunk ), $loose_case ) ) { + /* + * If there is a character reference, then the decoded value must + * match what follows in the search string. The search string may + * end within a multi-code-point replacement, such as `<⃒` + * decoding to `<⃒`, and still be a prefix match. + */ + $match_length = min( strlen( $next_chunk ), $search_length - $search_at ); + if ( 0 !== substr_compare( $search_text, $next_chunk, $search_at, $match_length, $loose_case ) ) { return false; } // The character reference matched, so continue checking. $haystack_at += $token_length; - $search_at += strlen( $next_chunk ); + $search_at += $match_length; } - return true; + return $search_at === $search_length; } /** diff --git a/tests/phpunit/tests/html-api/wpHtmlDecoder.php b/tests/phpunit/tests/html-api/wpHtmlDecoder.php index 97954f4eb3e30..bbbd94fc09d12 100644 --- a/tests/phpunit/tests/html-api/wpHtmlDecoder.php +++ b/tests/phpunit/tests/html-api/wpHtmlDecoder.php @@ -161,6 +161,11 @@ public static function data_attributes_with_prefix_and_case_sensitive_match() { array( 'http://wordpress.org', 'Http', 'ascii-case-insensitive', true ), array( 'http://wordpress.org', 'https', 'case-sensitive', false ), array( 'http://wordpress.org', 'https', 'ascii-case-insensitive', false ), + array( '', 'http', 'case-sensitive', false ), + array( 'jav', 'javascript:', 'case-sensitive', false ), + array( 'jav', 'javascript:', 'ascii-case-insensitive', false ), + array( '<⃒script', '<', 'case-sensitive', true ), + array( '>⃒script', '>', 'case-sensitive', true ), ); } }