From 05cfed923c3d43c9b8eda70625f004d28440ac5f Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Wed, 10 Jun 2026 23:19:37 +0200
Subject: [PATCH 1/6] Fix WP_Token_Map array export key length

---
 src/wp-includes/class-wp-token-map.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/wp-includes/class-wp-token-map.php b/src/wp-includes/class-wp-token-map.php
index fc223b187f8c5..108f28475241a 100644
--- a/src/wp-includes/class-wp-token-map.php
+++ b/src/wp-includes/class-wp-token-map.php
@@ -662,7 +662,7 @@ public function to_array(): array {
 		}
 
 		foreach ( $this->large_words as $index => $group ) {
-			$prefix       = substr( $this->groups, $index * ( $this->key_length + 1 ), 2 );
+			$prefix       = substr( $this->groups, $index * ( $this->key_length + 1 ), $this->key_length );
 			$group_length = strlen( $group );
 			$at           = 0;
 			while ( $at < $group_length ) {

From d875beeb79a145a8ec52325a397e8ae0b6279ac6 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Wed, 10 Jun 2026 23:30:41 +0200
Subject: [PATCH 2/6] Fix WP_Token_Map read_token bounds

---
 src/wp-includes/class-wp-token-map.php | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/wp-includes/class-wp-token-map.php b/src/wp-includes/class-wp-token-map.php
index 108f28475241a..da038556146fe 100644
--- a/src/wp-includes/class-wp-token-map.php
+++ b/src/wp-includes/class-wp-token-map.php
@@ -536,14 +536,16 @@ public function read_token( string $text, int $offset = 0, &$matched_token_byte_
 		$text_length = strlen( $text );
 
 		// Search for a long word first, if the text is long enough, and if that fails, a short one.
-		if ( $text_length > $this->key_length ) {
+		if ( $text_length - $offset > $this->key_length ) {
 			/*
 			 * Keys cannot contain null bytes, which is taken care of for the full words,
 			 * but here it’s required to reject group keys with null bytes so that the
 			 * lookup doesn’t get off track when scanning the group string.
 			 */
 			if ( strcspn( $text, "\x00", $offset, $this->key_length ) < $this->key_length ) {
-				return null;
+				return strlen( $this->small_words ) > 0
+					? $this->read_small_token( $text, $offset, $matched_token_byte_length, $case_sensitivity )
+					: null;
 			}
 
 			$group_key = substr( $text, $offset, $this->key_length );
@@ -596,6 +598,10 @@ private function read_small_token( string $text, int $offset = 0, &$matched_toke
 		$ignore_case  = 'ascii-case-insensitive' === $case_sensitivity;
 		$small_length = strlen( $this->small_words );
 		$search_text  = substr( $text, $offset, $this->key_length );
+		if ( '' === $search_text ) {
+			return null;
+		}
+
 		if ( $ignore_case ) {
 			$search_text = strtoupper( $search_text );
 		}
@@ -617,6 +623,11 @@ private function read_small_token( string $text, int $offset = 0, &$matched_toke
 					return $this->small_mappings[ $at / ( $this->key_length + 1 ) ];
 				}
 
+				if ( ! isset( $search_text[ $adjust ] ) ) {
+					$at += $this->key_length + 1;
+					continue 2;
+				}
+
 				if (
 					$search_text[ $adjust ] !== $this->small_words[ $at + $adjust ] &&
 					( ! $ignore_case || strtoupper( $this->small_words[ $at + $adjust ] !== $search_text[ $adjust ] ) )

From 2110e539bdf64e2ce7b5d4121d13a1885f35e161 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Wed, 10 Jun 2026 23:31:53 +0200
Subject: [PATCH 3/6] Fix WP_Token_Map ASCII matching

---
 src/wp-includes/class-wp-token-map.php | 74 +++++++++++++++++++++-----
 1 file changed, 62 insertions(+), 12 deletions(-)

diff --git a/src/wp-includes/class-wp-token-map.php b/src/wp-includes/class-wp-token-map.php
index da038556146fe..3409d5519b13c 100644
--- a/src/wp-includes/class-wp-token-map.php
+++ b/src/wp-includes/class-wp-token-map.php
@@ -451,13 +451,20 @@ public function contains( string $word, string $case_sensitivity = 'case-sensiti
 				return false;
 			}
 
-			$term    = str_pad( $word, $this->key_length + 1, "\x00", STR_PAD_RIGHT );
-			$word_at = $ignore_case ? stripos( $this->small_words, $term ) : strpos( $this->small_words, $term );
-			if ( false === $word_at ) {
-				return false;
+			$term = str_pad( $word, $this->key_length + 1, "\x00", STR_PAD_RIGHT );
+			if ( ! $ignore_case ) {
+				return false !== strpos( $this->small_words, $term );
+			}
+
+			$small_length  = strlen( $this->small_words );
+			$record_length = $this->key_length + 1;
+			for ( $at = 0; $at < $small_length; $at += $record_length ) {
+				if ( self::matches_at( $this->small_words, $term, $at, $record_length, $ignore_case ) ) {
+					return true;
+				}
 			}
 
-			return true;
+			return false;
 		}
 
 		$group_key = substr( $word, 0, $this->key_length );
@@ -478,7 +485,7 @@ public function contains( string $word, string $case_sensitivity = 'case-sensiti
 			$mapping_length = unpack( 'C', $group[ $at++ ] )[1];
 			$mapping_at     = $at;
 
-			if ( $token_length === $length && 0 === substr_compare( $group, $slug, $token_at, $token_length, $ignore_case ) ) {
+			if ( $token_length === $length && self::matches_at( $group, $slug, $token_at, $token_length, $ignore_case ) ) {
 				return true;
 			}
 
@@ -567,7 +574,7 @@ public function read_token( string $text, int $offset = 0, &$matched_token_byte_
 				$mapping_length = unpack( 'C', $group[ $at++ ] )[1];
 				$mapping_at     = $at;
 
-				if ( 0 === substr_compare( $text, $token, $offset + $this->key_length, $token_length, $ignore_case ) ) {
+				if ( self::matches_at( $text, $token, $offset + $this->key_length, $token_length, $ignore_case ) ) {
 					$matched_token_byte_length = $this->key_length + $token_length;
 					return substr( $group, $mapping_at, $mapping_length );
 				}
@@ -603,15 +610,18 @@ private function read_small_token( string $text, int $offset = 0, &$matched_toke
 		}
 
 		if ( $ignore_case ) {
-			$search_text = strtoupper( $search_text );
+			$search_text = self::ascii_lowercase( $search_text );
 		}
 		$starting_char = $search_text[0];
 
 		$at = 0;
 		while ( $at < $small_length ) {
+			$stored_starting_char = $ignore_case
+				? self::ascii_lowercase( $this->small_words[ $at ] )
+				: $this->small_words[ $at ];
+
 			if (
-				$starting_char !== $this->small_words[ $at ] &&
-				( ! $ignore_case || strtoupper( $this->small_words[ $at ] ) !== $starting_char )
+				$starting_char !== $stored_starting_char
 			) {
 				$at += $this->key_length + 1;
 				continue;
@@ -628,9 +638,12 @@ private function read_small_token( string $text, int $offset = 0, &$matched_toke
 					continue 2;
 				}
 
+				$stored_char = $ignore_case
+					? self::ascii_lowercase( $this->small_words[ $at + $adjust ] )
+					: $this->small_words[ $at + $adjust ];
+
 				if (
-					$search_text[ $adjust ] !== $this->small_words[ $at + $adjust ] &&
-					( ! $ignore_case || strtoupper( $this->small_words[ $at + $adjust ] !== $search_text[ $adjust ] ) )
+					$search_text[ $adjust ] !== $stored_char
 				) {
 					$at += $this->key_length + 1;
 					continue 2;
@@ -840,4 +853,41 @@ private static function longest_first_then_alphabetical( string $a, string $b ):
 
 		return strcmp( $a, $b );
 	}
+
+	/**
+	 * Checks whether a substring matches at a given offset.
+	 *
+	 * @since 6.6.0
+	 *
+	 * @param string $haystack    String to search within.
+	 * @param string $needle      String to match.
+	 * @param int    $offset      Offset into the haystack.
+	 * @param int    $length      Number of bytes to compare.
+	 * @param bool   $ignore_case Whether to fold ASCII case while matching.
+	 * @return bool Whether the substring matched.
+	 */
+	private static function matches_at( string $haystack, string $needle, int $offset, int $length, bool $ignore_case ): bool {
+		$candidate = substr( $haystack, $offset, $length );
+		if ( strlen( $candidate ) !== $length ) {
+			return false;
+		}
+
+		if ( ! $ignore_case ) {
+			return $candidate === $needle;
+		}
+
+		return self::ascii_lowercase( $candidate ) === self::ascii_lowercase( $needle );
+	}
+
+	/**
+	 * Lowercases ASCII bytes only.
+	 *
+	 * @since 6.6.0
+	 *
+	 * @param string $text Text to lowercase.
+	 * @return string Text with only ASCII uppercase bytes folded to lowercase.
+	 */
+	private static function ascii_lowercase( string $text ): string {
+		return strtr( $text, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz' );
+	}
 }

From fc0d5febcc328b93362ca597cf86863b376911b0 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Wed, 10 Jun 2026 23:33:13 +0200
Subject: [PATCH 4/6] Handle WP_Token_Map folded group keys

---
 src/wp-includes/class-wp-token-map.php | 142 +++++++++++++++++++------
 1 file changed, 108 insertions(+), 34 deletions(-)

diff --git a/src/wp-includes/class-wp-token-map.php b/src/wp-includes/class-wp-token-map.php
index 3409d5519b13c..6e38fd9c05774 100644
--- a/src/wp-includes/class-wp-token-map.php
+++ b/src/wp-includes/class-wp-token-map.php
@@ -467,29 +467,33 @@ public function contains( string $word, string $case_sensitivity = 'case-sensiti
 			return false;
 		}
 
-		$group_key = substr( $word, 0, $this->key_length );
-		$group_at  = $ignore_case ? stripos( $this->groups, $group_key ) : strpos( $this->groups, $group_key );
-		if ( false === $group_at ) {
+		$group_key     = substr( $word, 0, $this->key_length );
+		$group_indexes = $this->find_group_indexes( $group_key, $ignore_case );
+		if ( empty( $group_indexes ) ) {
 			return false;
 		}
-		$group        = $this->large_words[ $group_at / ( $this->key_length + 1 ) ];
-		$group_length = strlen( $group );
-		$slug         = substr( $word, $this->key_length );
-		$length       = strlen( $slug );
-		$at           = 0;
 
-		while ( $at < $group_length ) {
-			$token_length   = unpack( 'C', $group[ $at++ ] )[1];
-			$token_at       = $at;
-			$at            += $token_length;
-			$mapping_length = unpack( 'C', $group[ $at++ ] )[1];
-			$mapping_at     = $at;
+		$slug   = substr( $word, $this->key_length );
+		$length = strlen( $slug );
 
-			if ( $token_length === $length && self::matches_at( $group, $slug, $token_at, $token_length, $ignore_case ) ) {
-				return true;
-			}
+		foreach ( $group_indexes as $group_index ) {
+			$group        = $this->large_words[ $group_index ];
+			$group_length = strlen( $group );
+			$at           = 0;
+
+			while ( $at < $group_length ) {
+				$token_length   = unpack( 'C', $group[ $at++ ] )[1];
+				$token_at       = $at;
+				$at            += $token_length;
+				$mapping_length = unpack( 'C', $group[ $at++ ] )[1];
+				$mapping_at     = $at;
+
+				if ( $token_length === $length && self::matches_at( $group, $slug, $token_at, $token_length, $ignore_case ) ) {
+					return true;
+				}
 
-			$at = $mapping_at + $mapping_length;
+				$at = $mapping_at + $mapping_length;
+			}
 		}
 
 		return false;
@@ -555,31 +559,67 @@ public function read_token( string $text, int $offset = 0, &$matched_token_byte_
 					: null;
 			}
 
-			$group_key = substr( $text, $offset, $this->key_length );
-			$group_at  = $ignore_case ? stripos( $this->groups, $group_key ) : strpos( $this->groups, $group_key );
-			if ( false === $group_at ) {
+			$group_key     = substr( $text, $offset, $this->key_length );
+			$group_indexes = $this->find_group_indexes( $group_key, $ignore_case );
+			if ( empty( $group_indexes ) ) {
 				// Perhaps a short word then.
 				return strlen( $this->small_words ) > 0
 					? $this->read_small_token( $text, $offset, $matched_token_byte_length, $case_sensitivity )
 					: null;
 			}
 
-			$group        = $this->large_words[ $group_at / ( $this->key_length + 1 ) ];
-			$group_length = strlen( $group );
-			$at           = 0;
-			while ( $at < $group_length ) {
-				$token_length   = unpack( 'C', $group[ $at++ ] )[1];
-				$token          = substr( $group, $at, $token_length );
-				$at            += $token_length;
-				$mapping_length = unpack( 'C', $group[ $at++ ] )[1];
-				$mapping_at     = $at;
+			if ( ! $ignore_case ) {
+				$group        = $this->large_words[ $group_indexes[0] ];
+				$group_length = strlen( $group );
+				$at           = 0;
+				while ( $at < $group_length ) {
+					$token_length   = unpack( 'C', $group[ $at++ ] )[1];
+					$token          = substr( $group, $at, $token_length );
+					$at            += $token_length;
+					$mapping_length = unpack( 'C', $group[ $at++ ] )[1];
+					$mapping_at     = $at;
+
+					if ( 0 === substr_compare( $text, $token, $offset + $this->key_length, $token_length ) ) {
+						$matched_token_byte_length = $this->key_length + $token_length;
+						return substr( $group, $mapping_at, $mapping_length );
+					}
+
+					$at = $mapping_at + $mapping_length;
+				}
 
-				if ( self::matches_at( $text, $token, $offset + $this->key_length, $token_length, $ignore_case ) ) {
-					$matched_token_byte_length = $this->key_length + $token_length;
-					return substr( $group, $mapping_at, $mapping_length );
+				return strlen( $this->small_words ) > 0
+					? $this->read_small_token( $text, $offset, $matched_token_byte_length, $case_sensitivity )
+					: null;
+			}
+
+			$best_match_length = null;
+			$best_mapping      = null;
+			foreach ( $group_indexes as $group_index ) {
+				$group        = $this->large_words[ $group_index ];
+				$group_length = strlen( $group );
+				$at           = 0;
+				while ( $at < $group_length ) {
+					$token_length   = unpack( 'C', $group[ $at++ ] )[1];
+					$token          = substr( $group, $at, $token_length );
+					$at            += $token_length;
+					$mapping_length = unpack( 'C', $group[ $at++ ] )[1];
+					$mapping_at     = $at;
+
+					if ( self::matches_at( $text, $token, $offset + $this->key_length, $token_length, $ignore_case ) ) {
+						$match_length = $this->key_length + $token_length;
+						if ( null === $best_match_length || $match_length > $best_match_length ) {
+							$best_match_length = $match_length;
+							$best_mapping      = substr( $group, $mapping_at, $mapping_length );
+						}
+					}
+
+					$at = $mapping_at + $mapping_length;
 				}
+			}
 
-				$at = $mapping_at + $mapping_length;
+			if ( null !== $best_match_length ) {
+				$matched_token_byte_length = $best_match_length;
+				return $best_mapping;
 			}
 		}
 
@@ -854,6 +894,40 @@ private static function longest_first_then_alphabetical( string $a, string $b ):
 		return strcmp( $a, $b );
 	}
 
+	/**
+	 * Finds group indexes that match a lookup key.
+	 *
+	 * @since 6.6.0
+	 *
+	 * @param string $group_key   Group key to find.
+	 * @param bool   $ignore_case Whether to fold ASCII case while searching.
+	 * @return int[] Matching group indexes.
+	 */
+	private function find_group_indexes( string $group_key, bool $ignore_case ): array {
+		if ( ! $ignore_case ) {
+			$group_at = strpos( $this->groups, $group_key );
+
+			return false === $group_at
+				? array()
+				: array( $group_at / ( $this->key_length + 1 ) );
+		}
+
+		$group_indexes = array();
+		$record_length = $this->key_length + 1;
+		$groups_length = strlen( $this->groups );
+		$group_index   = 0;
+
+		for ( $at = 0; $at < $groups_length; $at += $record_length ) {
+			if ( self::matches_at( $this->groups, $group_key, $at, $this->key_length, $ignore_case ) ) {
+				$group_indexes[] = $group_index;
+			}
+
+			++$group_index;
+		}
+
+		return $group_indexes;
+	}
+
 	/**
 	 * Checks whether a substring matches at a given offset.
 	 *

From 505e46b56074c56754ff908718dfa8655b7a7e97 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Wed, 10 Jun 2026 23:33:30 +0200
Subject: [PATCH 5/6] Escape WP_Token_Map precomputed source

---
 src/wp-includes/class-wp-token-map.php | 96 ++++++++++++++++++--------
 1 file changed, 68 insertions(+), 28 deletions(-)

diff --git a/src/wp-includes/class-wp-token-map.php b/src/wp-includes/class-wp-token-map.php
index 6e38fd9c05774..1818c3d700b11 100644
--- a/src/wp-includes/class-wp-token-map.php
+++ b/src/wp-includes/class-wp-token-map.php
@@ -785,7 +785,7 @@ public function precomputed_php_source_table( string $indent = "\t" ): string {
 		$output .= "{$i2}\"storage_version\" => \"{$class_version}\",\n";
 		$output .= "{$i2}\"key_length\" => {$this->key_length},\n";
 
-		$group_line = str_replace( "\x00", "\\x00", $this->groups );
+		$group_line = self::escape_precomputed_php_string( $this->groups );
 		$output    .= "{$i2}\"groups\" => \"{$group_line}\",\n";
 
 		$output .= "{$i2}\"large_words\" => array(\n";
@@ -798,7 +798,7 @@ public function precomputed_php_source_table( string $indent = "\t" ): string {
 			$group        = $this->large_words[ $index ];
 			$group_length = strlen( $group );
 			$comment_line = "{$i3}//";
-			$data_line    = "{$i3}\"";
+			$group_data   = '';
 			$at           = 0;
 			while ( $at < $group_length ) {
 				$token_length   = unpack( 'C', $group[ $at++ ] )[1];
@@ -808,32 +808,11 @@ public function precomputed_php_source_table( string $indent = "\t" ): string {
 				$mapping        = substr( $group, $at, $mapping_length );
 				$at            += $mapping_length;
 
-				$token_digits   = str_pad( dechex( $token_length ), 2, '0', STR_PAD_LEFT );
-				$mapping_digits = str_pad( dechex( $mapping_length ), 2, '0', STR_PAD_LEFT );
-
-				$mapping = preg_replace_callback(
-					"~[\\x00-\\x1f\\x22\\x5c]~",
-					static function ( $match_result ) {
-						switch ( $match_result[0] ) {
-							case '"':
-								return '\\"';
-
-							case '\\':
-								return '\\\\';
-
-							default:
-								$hex = dechex( ord( $match_result[0] ) );
-								return "\\x{$hex}";
-						}
-					},
-					$mapping
-				);
-
-				$comment_line .= " {$prefix}{$token}[{$mapping}]";
-				$data_line    .= "\\x{$token_digits}{$token}\\x{$mapping_digits}{$mapping}";
+				$group_data   .= pack( 'C', $token_length ) . $token . pack( 'C', $mapping_length ) . $mapping;
+				$comment_line .= ' ' . self::escape_precomputed_php_comment( "{$prefix}{$token}" ) . '[' . self::escape_precomputed_php_comment( $mapping ) . ']';
 			}
 			$comment_line .= ".\n";
-			$data_line    .= "\",\n";
+			$data_line     = "{$i3}\"" . self::escape_precomputed_php_string( $group_data ) . "\",\n";
 
 			$output .= $comment_line;
 			$output .= $data_line;
@@ -849,12 +828,12 @@ static function ( $match_result ) {
 			$at           += $this->key_length + 1;
 		}
 
-		$small_text = str_replace( "\x00", '\x00', implode( '', $small_words ) );
+		$small_text = self::escape_precomputed_php_string( implode( '', $small_words ) );
 		$output    .= "{$i2}\"small_words\" => \"{$small_text}\",\n";
 
 		$output .= "{$i2}\"small_mappings\" => array(\n";
 		foreach ( $this->small_mappings as $mapping ) {
-			$output .= "{$i3}\"{$mapping}\",\n";
+			$output .= "{$i3}\"" . self::escape_precomputed_php_string( $mapping ) . "\",\n";
 		}
 		$output .= "{$i2})\n";
 		$output .= "{$i1})\n";
@@ -964,4 +943,65 @@ private static function matches_at( string $haystack, string $needle, int $offse
 	private static function ascii_lowercase( string $text ): string {
 		return strtr( $text, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz' );
 	}
+
+	/**
+	 * Escapes text for use inside a double-quoted PHP string literal.
+	 *
+	 * @since 6.6.0
+	 *
+	 * @param string $text Text to escape.
+	 * @return string Escaped string literal body.
+	 */
+	private static function escape_precomputed_php_string( string $text ): string {
+		$escaped = '';
+		$length  = strlen( $text );
+
+		for ( $i = 0; $i < $length; $i++ ) {
+			$byte = ord( $text[ $i ] );
+			switch ( $text[ $i ] ) {
+				case '"':
+					$escaped .= '\\"';
+					break;
+
+				case '\\':
+					$escaped .= '\\\\';
+					break;
+
+				case '$':
+					$escaped .= '\\$';
+					break;
+
+				default:
+					$escaped .= ( $byte < 0x20 || $byte >= 0x7f )
+						? sprintf( '\\x%02x', $byte )
+						: $text[ $i ];
+			}
+		}
+
+		return $escaped;
+	}
+
+	/**
+	 * Escapes text for use inside generated PHP comments.
+	 *
+	 * @since 6.6.0
+	 *
+	 * @param string $text Text to escape.
+	 * @return string Escaped comment text.
+	 */
+	private static function escape_precomputed_php_comment( string $text ): string {
+		$escaped = '';
+		$length  = strlen( $text );
+
+		for ( $i = 0; $i < $length; $i++ ) {
+			$byte = ord( $text[ $i ] );
+			$char = $text[ $i ];
+
+			$escaped .= ( $byte < 0x20 || $byte >= 0x7f || '?' === $char || '\\' === $char )
+				? sprintf( '\\x%02x', $byte )
+				: $char;
+		}
+
+		return $escaped;
+	}
 }

From 3572d0d09ef3aab7bf6517ccb12ba1dfe6ffa614 Mon Sep 17 00:00:00 2001
From: Jon Surrell <sirreal@users.noreply.github.com>
Date: Wed, 10 Jun 2026 23:33:47 +0200
Subject: [PATCH 6/6] Add WP_Token_Map property tests

---
 .../wp-token-map/wpTokenMapProperties.php     | 876 ++++++++++++++++++
 1 file changed, 876 insertions(+)
 create mode 100644 tests/phpunit/tests/wp-token-map/wpTokenMapProperties.php

diff --git a/tests/phpunit/tests/wp-token-map/wpTokenMapProperties.php b/tests/phpunit/tests/wp-token-map/wpTokenMapProperties.php
new file mode 100644
index 0000000000000..a2db89db04fce
--- /dev/null
+++ b/tests/phpunit/tests/wp-token-map/wpTokenMapProperties.php
@@ -0,0 +1,876 @@
+<?php
+/**
+ * Property tests covering WP_Token_Map functionality.
+ *
+ * @package WordPress
+ *
+ * @since 6.6.0
+ * @group html-api-token-map
+ * @group token-map
+ *
+ * @coversDefaultClass WP_Token_Map
+ */
+class Tests_WpTokenMapProperties extends WP_UnitTestCase {
+	/**
+	 * Ensure generated contains() probes agree with a naive reference lookup.
+	 *
+	 * @ticket 60698
+	 *
+	 * @dataProvider data_generated_token_sets
+	 *
+	 * @param array $mappings   Generated token mappings.
+	 * @param int   $key_length Group key length for the generated map.
+	 * @param int   $seed       Seed used to generate the token set.
+	 */
+	public function test_contains_matches_reference_for_generated_token_sets( $mappings, $key_length, $seed ) {
+		$map = WP_Token_Map::from_array( $mappings, $key_length );
+		$this->assertInstanceOf( WP_Token_Map::class, $map );
+
+		foreach ( self::contains_probes( $mappings, $seed ) as $probe ) {
+			foreach ( self::case_sensitivities() as $case_sensitivity ) {
+				$this->assert_contains_matches_reference( $map, $mappings, $probe, $key_length, $seed, $case_sensitivity, 'contains' );
+			}
+		}
+	}
+
+	/**
+	 * Ensure generated read_token() probes agree with a naive reference lookup.
+	 *
+	 * @ticket 60698
+	 *
+	 * @dataProvider data_generated_token_sets
+	 *
+	 * @param array $mappings   Generated token mappings.
+	 * @param int   $key_length Group key length for the generated map.
+	 * @param int   $seed       Seed used to generate the token set.
+	 */
+	public function test_read_token_matches_reference_for_generated_documents( $mappings, $key_length, $seed ) {
+		$map = WP_Token_Map::from_array( $mappings, $key_length );
+		$this->assertInstanceOf( WP_Token_Map::class, $map );
+
+		foreach ( self::generated_documents( $mappings, $seed ) as $document_index => $document ) {
+			$document_length = strlen( $document );
+
+			for ( $offset = 0; $offset <= $document_length; $offset++ ) {
+				foreach ( self::case_sensitivities() as $case_sensitivity ) {
+					$this->assert_read_token_matches_reference(
+						$map,
+						$mappings,
+						$document,
+						$offset,
+						$key_length,
+						$seed,
+						$case_sensitivity,
+						"read_token document {$document_index}"
+					);
+				}
+			}
+		}
+	}
+
+	/**
+	 * Ensure generated nested-prefix families match greedily.
+	 *
+	 * @ticket 60698
+	 *
+	 * @dataProvider data_key_lengths
+	 *
+	 * @param int $key_length Group key length for the generated map.
+	 */
+	public function test_generated_nested_prefix_families_match_longest_token( $key_length ) {
+		$mappings = array();
+		$token    = '';
+		foreach ( array( 'a', 'b', 'c', 'D', ';', "\x80", 'e', 'f' ) as $chunk ) {
+			$token             .= $chunk;
+			$mappings[ $token ] = 'value-' . strlen( $token );
+		}
+
+		$map = WP_Token_Map::from_array( $mappings, $key_length );
+		$this->assertInstanceOf( WP_Token_Map::class, $map );
+
+		$document = "{$token} suffix";
+		$length   = null;
+		$this->assertSame( $mappings[ $token ], $map->read_token( $document, 0, $length ) );
+		$this->assertSame( strlen( $token ), $length );
+	}
+
+	/**
+	 * Ensure generated maps preserve behavior after to_array()/from_array().
+	 *
+	 * @ticket 60698
+	 *
+	 * @dataProvider data_generated_token_sets
+	 *
+	 * @param array $mappings   Generated token mappings.
+	 * @param int   $key_length Group key length for the generated map.
+	 * @param int   $seed       Seed used to generate the token set.
+	 */
+	public function test_generated_maps_round_trip_through_array_export( $mappings, $key_length, $seed ) {
+		$map = WP_Token_Map::from_array( $mappings, $key_length );
+		$this->assertInstanceOf( WP_Token_Map::class, $map );
+
+		$round_tripped = WP_Token_Map::from_array( $map->to_array(), $key_length );
+		$this->assertInstanceOf( WP_Token_Map::class, $round_tripped );
+
+		$this->assert_map_behavior_matches_reference( $round_tripped, $mappings, $key_length, $seed, 'to_array round-trip' );
+	}
+
+	/**
+	 * Ensure generated maps preserve behavior after precomputed table export.
+	 *
+	 * @ticket 60698
+	 *
+	 * @dataProvider data_generated_token_sets
+	 *
+	 * @param array $mappings   Generated token mappings.
+	 * @param int   $key_length Group key length for the generated map.
+	 * @param int   $seed       Seed used to generate the token set.
+	 */
+	public function test_generated_maps_round_trip_through_precomputed_source_table( $mappings, $key_length, $seed ) {
+		$map = WP_Token_Map::from_array( $mappings, $key_length );
+		$this->assertInstanceOf( WP_Token_Map::class, $map );
+
+		$source_table = $map->precomputed_php_source_table();
+		// phpcs:ignore Squiz.PHP.Eval.Discouraged -- This verifies generated source round-trips.
+		$round_tripped = eval( "return {$source_table};" );
+		$this->assertInstanceOf( WP_Token_Map::class, $round_tripped );
+
+		$this->assert_map_behavior_matches_reference( $round_tripped, $mappings, $key_length, $seed, 'precomputed table round-trip' );
+	}
+
+	/**
+	 * Ensure ASCII-insensitive matching leaves non-ASCII bytes literal.
+	 *
+	 * @ticket 60698
+	 */
+	public function test_ascii_case_insensitive_matching_keeps_non_ascii_bytes_literal() {
+		$mappings = array(
+			"alpha\xE9"     => 'latin-1-lower',
+			"bravo\xC3\xA9" => 'utf-8-lower',
+			"charlie\x80Z"  => 'raw-byte',
+		);
+		$map      = WP_Token_Map::from_array( $mappings, 2 );
+
+		$this->assertTrue( $map->contains( "ALPHA\xE9", 'ascii-case-insensitive' ) );
+		$this->assertFalse( $map->contains( "ALPHA\xC9", 'ascii-case-insensitive' ) );
+		$this->assertTrue( $map->contains( "BRAVO\xC3\xA9", 'ascii-case-insensitive' ) );
+		$this->assertFalse( $map->contains( "BRAVO\xC3\x89", 'ascii-case-insensitive' ) );
+
+		$length = null;
+		$this->assertSame( 'raw-byte', $map->read_token( "CHARLIE\x80z", 0, $length, 'ascii-case-insensitive' ) );
+		$this->assertSame( strlen( "charlie\x80Z" ), $length );
+
+		$length = null;
+		$this->assertNull( $map->read_token( "CHARLIE\x81z", 0, $length, 'ascii-case-insensitive' ) );
+		$this->assertNull( $length );
+	}
+
+	/**
+	 * Ensure array export preserves one-byte group keys.
+	 *
+	 * This is the minimized regression for generated key_length=1 maps.
+	 *
+	 * @ticket 60698
+	 */
+	public function test_array_export_preserves_single_byte_group_keys() {
+		$mappings = array(
+			'a'  => 'short',
+			'ab' => 'long',
+			'ac' => 'sibling',
+		);
+		$map      = WP_Token_Map::from_array( $mappings, 1 );
+
+		$expected = $mappings;
+		$actual   = $map->to_array();
+		ksort( $expected );
+		ksort( $actual );
+
+		$this->assertSame( $expected, $actual );
+	}
+
+	/**
+	 * Ensure ASCII-insensitive reads work for short tokens.
+	 *
+	 * This is the minimized regression for generated case-insensitive short
+	 * token probes.
+	 *
+	 * @ticket 60698
+	 */
+	public function test_ascii_case_insensitive_reads_short_tokens() {
+		$map    = WP_Token_Map::from_array( array( 'ab' => 'short-token' ), 2 );
+		$length = null;
+
+		$this->assertSame( 'short-token', $map->read_token( 'AB', 0, $length, 'ascii-case-insensitive' ) );
+		$this->assertSame( 2, $length );
+	}
+
+	/**
+	 * Ensure ASCII-insensitive reads check every folded-equivalent group key.
+	 *
+	 * @ticket 60698
+	 *
+	 * @dataProvider data_ascii_case_insensitive_group_key_collisions
+	 *
+	 * @param array  $mappings   Token mappings with folded-equivalent group keys.
+	 * @param int    $key_length Group key length for the generated map.
+	 * @param string $probe      Probe text.
+	 * @param string $expected   Expected mapping.
+	 */
+	public function test_ascii_case_insensitive_reads_folded_group_key_collisions( $mappings, $key_length, $probe, $expected ) {
+		$map    = WP_Token_Map::from_array( $mappings, $key_length );
+		$length = null;
+
+		$this->assertTrue( $map->contains( $probe, 'ascii-case-insensitive' ) );
+		$this->assertSame( $expected, $map->read_token( $probe, 0, $length, 'ascii-case-insensitive' ) );
+		$this->assertSame( strlen( $probe ), $length );
+	}
+
+	/**
+	 * Ensure generated PHP source escapes tokens and mappings safely.
+	 *
+	 * @ticket 60698
+	 */
+	public function test_precomputed_source_table_escapes_php_string_and_comment_bytes() {
+		$mappings = array(
+			'quote"token'       => 'quote"value',
+			'slash\\token'      => 'slash\\value',
+			'dollar$token'      => 'dollar$value',
+			"control\ntoken"    => "control\nvalue",
+			'close?>tag'        => 'close?>value',
+			"high\x80\xFFtoken" => "high\x80\xFFvalue",
+		);
+		$map      = WP_Token_Map::from_array( $mappings, 2 );
+
+		$source_table = $map->precomputed_php_source_table();
+		// phpcs:ignore Squiz.PHP.Eval.Discouraged -- This verifies generated source round-trips.
+		$round_tripped = eval( "return {$source_table};" );
+
+		$this->assertInstanceOf( WP_Token_Map::class, $round_tripped );
+		$this->assertSame( $map->to_array(), $round_tripped->to_array() );
+	}
+
+	/**
+	 * Ensure short-token reads do not consume missing bytes.
+	 *
+	 * @ticket 60698
+	 */
+	public function test_short_token_reads_ignore_text_shorter_than_token() {
+		$map    = WP_Token_Map::from_array( array( 'ab' => 'short-token' ), 2 );
+		$length = null;
+
+		$this->assertNull( $map->read_token( 'a', 0, $length ) );
+		$this->assertNull( $length );
+
+		$length = null;
+		$this->assertNull( $map->read_token( '', 0, $length ) );
+		$this->assertNull( $length );
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[].
+	 */
+	public static function data_generated_token_sets() {
+		$cases = array(
+			'seed 539231511 key_length 1' => array( 539231511, 1, 70 ),
+			'seed 539231512 key_length 2' => array( 539231512, 2, 90 ),
+			'seed 867530901 key_length 1' => array( 867530901, 1, 60 ),
+			'seed 867530902 key_length 2' => array( 867530902, 2, 80 ),
+		);
+
+		foreach ( $cases as $name => $case ) {
+			list( $seed, $key_length, $target_count ) = $case;
+			yield $name => array( self::generate_token_set( $seed, $key_length, $target_count ), $key_length, $seed );
+		}
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[].
+	 */
+	public static function data_key_lengths() {
+		return array(
+			'key length 1' => array( 1 ),
+			'key length 2' => array( 2 ),
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[].
+	 */
+	public static function data_ascii_case_insensitive_group_key_collisions() {
+		return array(
+			'key length 1' => array(
+				array(
+					'Ab' => 'upper-group',
+					'aa' => 'lower-group',
+				),
+				1,
+				'aa',
+				'lower-group',
+			),
+			'key length 2' => array(
+				array(
+					'Abc' => 'mixed-group-one',
+					'aBd' => 'mixed-group-two',
+				),
+				2,
+				'abd',
+				'mixed-group-two',
+			),
+		);
+	}
+
+	/**
+	 * Assert that a token map behaves like the reference implementation.
+	 *
+	 * @param WP_Token_Map $map        Token map under test.
+	 * @param array        $mappings   Generated token mappings.
+	 * @param int          $key_length Group key length for the generated map.
+	 * @param int          $seed       Seed used to generate the token set.
+	 * @param string       $label      Describes the map under test.
+	 */
+	private function assert_map_behavior_matches_reference( $map, $mappings, $key_length, $seed, $label ) {
+		foreach ( self::contains_probes( $mappings, $seed ) as $probe ) {
+			foreach ( self::case_sensitivities() as $case_sensitivity ) {
+				$this->assert_contains_matches_reference( $map, $mappings, $probe, $key_length, $seed, $case_sensitivity, "{$label} contains" );
+			}
+		}
+
+		foreach ( self::generated_documents( $mappings, $seed ) as $document_index => $document ) {
+			$document_length = strlen( $document );
+			for ( $offset = 0; $offset <= $document_length; $offset++ ) {
+				foreach ( self::case_sensitivities() as $case_sensitivity ) {
+					$this->assert_read_token_matches_reference(
+						$map,
+						$mappings,
+						$document,
+						$offset,
+						$key_length,
+						$seed,
+						$case_sensitivity,
+						"{$label} read_token document {$document_index}"
+					);
+				}
+			}
+		}
+	}
+
+	/**
+	 * Assert contains() behavior against the reference implementation.
+	 *
+	 * @param WP_Token_Map $map              Token map under test.
+	 * @param array        $mappings         Generated token mappings.
+	 * @param string       $probe            Probe word.
+	 * @param int          $key_length       Group key length for the generated map.
+	 * @param int          $seed             Seed used to generate the token set.
+	 * @param string       $case_sensitivity Case sensitivity mode.
+	 * @param string       $operation        Operation being tested.
+	 */
+	private function assert_contains_matches_reference( $map, $mappings, $probe, $key_length, $seed, $case_sensitivity, $operation ) {
+		$expected = self::reference_contains( $mappings, $probe, $case_sensitivity );
+		$actual   = $map->contains( $probe, $case_sensitivity );
+
+		if ( $expected !== $actual ) {
+			$this->assertSame(
+				$expected,
+				$actual,
+				self::failure_context( $mappings, $key_length, $seed, $case_sensitivity, $operation, $probe )
+			);
+		}
+	}
+
+	/**
+	 * Assert read_token() behavior against the reference implementation.
+	 *
+	 * @param WP_Token_Map $map              Token map under test.
+	 * @param array        $mappings         Generated token mappings.
+	 * @param string       $document         Document to probe.
+	 * @param int          $offset           Offset at which to probe.
+	 * @param int          $key_length       Group key length for the generated map.
+	 * @param int          $seed             Seed used to generate the token set.
+	 * @param string       $case_sensitivity Case sensitivity mode.
+	 * @param string       $operation        Operation being tested.
+	 */
+	private function assert_read_token_matches_reference( $map, $mappings, $document, $offset, $key_length, $seed, $case_sensitivity, $operation ) {
+		$expected        = self::reference_read_token( $mappings, $document, $offset, $case_sensitivity );
+		$actual_length   = null;
+		$actual_response = $map->read_token( $document, $offset, $actual_length, $case_sensitivity );
+
+		if ( $expected['value'] !== $actual_response ) {
+			$this->assertSame(
+				$expected['value'],
+				$actual_response,
+				self::failure_context( $mappings, $key_length, $seed, $case_sensitivity, $operation, $document, $offset ) . '; response'
+			);
+		}
+
+		if ( $expected['length'] !== $actual_length ) {
+			$this->assertSame(
+				$expected['length'],
+				$actual_length,
+				self::failure_context( $mappings, $key_length, $seed, $case_sensitivity, $operation, $document, $offset ) . '; matched length'
+			);
+		}
+	}
+
+	/**
+	 * Return case-sensitivity modes used by the public API.
+	 *
+	 * @return string[] Case-sensitivity modes.
+	 */
+	private static function case_sensitivities() {
+		return array( 'case-sensitive', 'ascii-case-insensitive' );
+	}
+
+	/**
+	 * Generate a deterministic token set.
+	 *
+	 * NUL is excluded from generated tokens because the implementation treats
+	 * lookup words containing NUL as invalid. Probe words and documents include
+	 * NUL so failed lookups still exercise that byte.
+	 *
+	 * @param int $seed         Seed used to generate the token set.
+	 * @param int $key_length   Group key length for the generated map.
+	 * @param int $target_count Number of generated tokens to target.
+	 * @return array Generated token mappings.
+	 */
+	private static function generate_token_set( $seed, $key_length, $target_count ) {
+		$state    = $seed;
+		$mappings = array();
+
+		self::add_token( $mappings, 'a', $seed );
+		self::add_token( $mappings, 'B', $seed );
+		if ( $key_length > 1 ) {
+			self::add_token( $mappings, 'c', $seed );
+		}
+		self::add_token( $mappings, str_repeat( 'k', $key_length ), $seed );
+		self::add_token( $mappings, str_repeat( 'L', 255 ), $seed );
+		self::add_token( $mappings, "hi\x80A;", $seed );
+		self::add_token( $mappings, "jo\xFFb;", $seed );
+		self::add_token( $mappings, "utf\xC3\xA9;", $seed );
+		self::add_token( $mappings, "euro\xE2\x82\xAC;", $seed );
+		if ( 1 === $key_length ) {
+			self::add_token( $mappings, 'Ab', $seed );
+			self::add_token( $mappings, 'aa', $seed );
+		} else {
+			self::add_token( $mappings, 'Abc', $seed );
+			self::add_token( $mappings, 'aBd', $seed );
+		}
+
+		$nested = '';
+		foreach ( array( 'p', 'r', 'e', 'F', 'i', 'x', ';', "\x80", 'z' ) as $chunk ) {
+			$nested .= $chunk;
+			self::add_token( $mappings, $nested, $seed );
+		}
+
+		$group_key = 1 === $key_length ? 'g' : 'gy';
+		for ( $i = 0; $i < 24; $i++ ) {
+			self::add_token( $mappings, $group_key . self::random_token_suffix( $state, 2 + ( $i % 7 ) ), $seed );
+		}
+
+		$attempts = 0;
+		while ( count( $mappings ) < $target_count && $attempts < $target_count * 40 ) {
+			self::add_token( $mappings, self::random_token( $state, $key_length, $attempts ), $seed );
+			++$attempts;
+		}
+
+		return $mappings;
+	}
+
+	/**
+	 * Add a token to the generated map if it is unambiguous.
+	 *
+	 * @param array  $mappings Generated token mappings.
+	 * @param string $token    Token to add.
+	 * @param int    $seed     Seed used to generate the token set.
+	 */
+	private static function add_token( &$mappings, $token, $seed ) {
+		if ( '' === $token || false !== strpos( $token, "\x00" ) || WP_Token_Map::MAX_LENGTH <= strlen( $token ) ) {
+			return;
+		}
+
+		foreach ( $mappings as $existing_token => $mapping ) {
+			if ( self::ascii_lowercase( $existing_token ) === self::ascii_lowercase( $token ) ) {
+				return;
+			}
+		}
+
+		$mappings[ $token ] = 'value-' . $seed . '-' . count( $mappings );
+	}
+
+	/**
+	 * Generate a token from the allowed byte classes.
+	 *
+	 * @param int $state      Pseudo-random generator state.
+	 * @param int $key_length Group key length for the generated map.
+	 * @param int $index      Token index.
+	 * @return string Generated token.
+	 */
+	private static function random_token( &$state, $key_length, $index ) {
+		$choice = self::random_int( $state, 0, 9 );
+		if ( $choice < 3 && $key_length > 1 ) {
+			$target_length = self::random_int( $state, 1, $key_length - 1 );
+		} elseif ( $choice < 6 ) {
+			$target_length = $key_length;
+		} elseif ( $choice < 9 ) {
+			$target_length = self::random_int( $state, $key_length + 1, 24 );
+		} else {
+			$target_length = self::random_int( $state, 48, 96 );
+		}
+
+		$token = chr( ord( 'm' ) + ( $index % 10 ) );
+		while ( strlen( $token ) < $target_length ) {
+			$token .= self::random_token_chunk( $state );
+		}
+
+		return substr( $token, 0, $target_length );
+	}
+
+	/**
+	 * Generate a random suffix.
+	 *
+	 * @param int $state         Pseudo-random generator state.
+	 * @param int $target_length Target byte length.
+	 * @return string Generated suffix.
+	 */
+	private static function random_token_suffix( &$state, $target_length ) {
+		$suffix = '';
+		while ( strlen( $suffix ) < $target_length ) {
+			$suffix .= self::random_token_chunk( $state );
+		}
+
+		return substr( $suffix, 0, $target_length );
+	}
+
+	/**
+	 * Generate a random token chunk.
+	 *
+	 * @param int $state Pseudo-random generator state.
+	 * @return string Generated chunk.
+	 */
+	private static function random_token_chunk( &$state ) {
+		$chunks = array(
+			'a',
+			'b',
+			'C',
+			'D',
+			'0',
+			'9',
+			';',
+			"\x80",
+			"\xFF",
+			"\xC2\xA9",
+			"\xE2\x82\xAC",
+		);
+
+		return $chunks[ self::random_int( $state, 0, count( $chunks ) - 1 ) ];
+	}
+
+	/**
+	 * Generate contains() probe words.
+	 *
+	 * @param array $mappings Generated token mappings.
+	 * @param int   $seed     Seed used to generate the token set.
+	 * @return string[] Probe words.
+	 */
+	private static function contains_probes( $mappings, $seed ) {
+		$state  = $seed ^ 0x5A5A5A5A;
+		$probes = array( '', "\x00", "a\x00", "z\x00z" );
+
+		foreach ( array_keys( $mappings ) as $token ) {
+			$probes[] = $token;
+			$probes[] = self::swap_ascii_case( $token );
+			$probes[] = $token . self::random_probe_byte( $state );
+			$probes[] = self::mutate_one_byte( $token, $state );
+
+			if ( strlen( $token ) > 1 ) {
+				$probes[] = substr( $token, 0, -1 );
+			}
+
+			for ( $length = 1; $length < strlen( $token ); $length++ ) {
+				$probes[] = substr( $token, 0, $length );
+			}
+		}
+
+		for ( $i = 0; $i < 400; $i++ ) {
+			$probes[] = self::random_probe_word( $state, self::random_int( $state, 0, 32 ) );
+		}
+
+		return array_values( array_unique( $probes, SORT_STRING ) );
+	}
+
+	/**
+	 * Generate documents for read_token() probes.
+	 *
+	 * @param array $mappings Generated token mappings.
+	 * @param int   $seed     Seed used to generate the token set.
+	 * @return string[] Generated documents.
+	 */
+	private static function generated_documents( $mappings, $seed ) {
+		$state  = $seed ^ 0x13572468;
+		$tokens = array_keys( $mappings );
+		usort( $tokens, array( __CLASS__, 'longest_first_then_alphabetical' ) );
+
+		$documents = array(
+			'',
+			'prefix' . $tokens[0] . 'suffix',
+			self::swap_ascii_case( $tokens[0] ) . "\x00" . $tokens[ count( $tokens ) - 1 ],
+		);
+
+		for ( $i = 0; $i < 10; $i++ ) {
+			$document = '';
+			for ( $j = 0; $j < 32; $j++ ) {
+				$token = $tokens[ self::random_int( $state, 0, count( $tokens ) - 1 ) ];
+				switch ( self::random_int( $state, 0, 5 ) ) {
+					case 0:
+						$document .= $token;
+						break;
+
+					case 1:
+						$document .= self::swap_ascii_case( $token );
+						break;
+
+					case 2:
+						$document .= substr( $token, 0, self::random_int( $state, 0, strlen( $token ) ) );
+						break;
+
+					case 3:
+						$document .= self::mutate_one_byte( $token, $state );
+						break;
+
+					case 4:
+						$document .= $token . self::random_probe_word( $state, self::random_int( $state, 1, 4 ) );
+						break;
+
+					default:
+						$document .= self::random_probe_word( $state, self::random_int( $state, 1, 8 ) );
+						break;
+				}
+			}
+
+			$documents[] = $document;
+		}
+
+		return $documents;
+	}
+
+	/**
+	 * Reference implementation for contains().
+	 *
+	 * @param array  $mappings         Generated token mappings.
+	 * @param string $word             Probe word.
+	 * @param string $case_sensitivity Case sensitivity mode.
+	 * @return bool Whether the generated set contains the probe word.
+	 */
+	private static function reference_contains( $mappings, $word, $case_sensitivity ) {
+		if ( 'case-sensitive' === $case_sensitivity ) {
+			return array_key_exists( $word, $mappings );
+		}
+
+		foreach ( array_keys( $mappings ) as $token ) {
+			if ( self::ascii_lowercase( $word ) === self::ascii_lowercase( $token ) ) {
+				return true;
+			}
+		}
+
+		return false;
+	}
+
+	/**
+	 * Reference implementation for read_token().
+	 *
+	 * @param array  $mappings         Generated token mappings.
+	 * @param string $document         Document to probe.
+	 * @param int    $offset           Offset at which to probe.
+	 * @param string $case_sensitivity Case sensitivity mode.
+	 * @return array Expected response and matched token length.
+	 */
+	private static function reference_read_token( $mappings, $document, $offset, $case_sensitivity ) {
+		$tokens          = array_keys( $mappings );
+		$document_length = strlen( $document );
+		$ignore_case     = 'ascii-case-insensitive' === $case_sensitivity;
+		usort( $tokens, array( __CLASS__, 'longest_first_then_alphabetical' ) );
+
+		foreach ( $tokens as $token ) {
+			$token_length = strlen( $token );
+			if ( $offset + $token_length > $document_length ) {
+				continue;
+			}
+
+			$candidate = substr( $document, $offset, $token_length );
+			$matches   = $ignore_case
+				? self::ascii_lowercase( $candidate ) === self::ascii_lowercase( $token )
+				: $candidate === $token;
+
+			if ( $matches ) {
+				return array(
+					'value'  => $mappings[ $token ],
+					'length' => $token_length,
+				);
+			}
+		}
+
+		return array(
+			'value'  => null,
+			'length' => null,
+		);
+	}
+
+	/**
+	 * Sort longer strings first, then alphabetically.
+	 *
+	 * @param string $a First string to compare.
+	 * @param string $b Second string to compare.
+	 * @return int Sort order.
+	 */
+	private static function longest_first_then_alphabetical( $a, $b ) {
+		if ( $a === $b ) {
+			return 0;
+		}
+
+		$length_a = strlen( $a );
+		$length_b = strlen( $b );
+		if ( $length_a !== $length_b ) {
+			return $length_b - $length_a;
+		}
+
+		return strcmp( $a, $b );
+	}
+
+	/**
+	 * Mutate one byte in a token.
+	 *
+	 * @param string $token Token to mutate.
+	 * @param int    $state Pseudo-random generator state.
+	 * @return string Mutated token.
+	 */
+	private static function mutate_one_byte( $token, &$state ) {
+		if ( '' === $token ) {
+			return self::random_probe_byte( $state );
+		}
+
+		$offset      = self::random_int( $state, 0, strlen( $token ) - 1 );
+		$replacement = self::random_probe_byte( $state );
+		while ( $replacement === $token[ $offset ] ) {
+			$replacement = self::random_probe_byte( $state );
+		}
+
+		return substr( $token, 0, $offset ) . $replacement . substr( $token, $offset + 1 );
+	}
+
+	/**
+	 * Swap ASCII case in a byte string.
+	 *
+	 * @param string $text Text whose ASCII case should be swapped.
+	 * @return string Text with ASCII case swapped.
+	 */
+	private static function swap_ascii_case( $text ) {
+		$output = '';
+		$length = strlen( $text );
+
+		for ( $i = 0; $i < $length; $i++ ) {
+			$byte = ord( $text[ $i ] );
+			if ( 0x41 <= $byte && $byte <= 0x5A ) {
+				$output .= chr( $byte + 0x20 );
+			} elseif ( 0x61 <= $byte && $byte <= 0x7A ) {
+				$output .= chr( $byte - 0x20 );
+			} else {
+				$output .= $text[ $i ];
+			}
+		}
+
+		return $output;
+	}
+
+	/**
+	 * Lowercase ASCII bytes only.
+	 *
+	 * @param string $text Text to lowercase.
+	 * @return string Text with only ASCII uppercase bytes folded to lowercase.
+	 */
+	private static function ascii_lowercase( $text ) {
+		return strtr( $text, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz' );
+	}
+
+	/**
+	 * Generate a random probe word.
+	 *
+	 * @param int $state  Pseudo-random generator state.
+	 * @param int $length Target byte length.
+	 * @return string Generated word.
+	 */
+	private static function random_probe_word( &$state, $length ) {
+		$word = '';
+		while ( strlen( $word ) < $length ) {
+			$word .= self::random_probe_byte( $state );
+		}
+
+		return substr( $word, 0, $length );
+	}
+
+	/**
+	 * Generate one random probe byte.
+	 *
+	 * @param int $state Pseudo-random generator state.
+	 * @return string Generated byte.
+	 */
+	private static function random_probe_byte( &$state ) {
+		$bytes = array(
+			"\x00",
+			'a',
+			'Z',
+			'4',
+			';',
+			'_',
+			"\x80",
+			"\xFF",
+			"\xC3",
+			"\xA9",
+			"\xE2",
+			"\x82",
+			"\xAC",
+		);
+
+		return $bytes[ self::random_int( $state, 0, count( $bytes ) - 1 ) ];
+	}
+
+	/**
+	 * Deterministic pseudo-random integer.
+	 *
+	 * @param int $state Pseudo-random generator state.
+	 * @param int $min   Minimum value.
+	 * @param int $max   Maximum value.
+	 * @return int Generated integer.
+	 */
+	private static function random_int( &$state, $min, $max ) {
+		$state = ( ( 1103515245 * $state ) + 12345 ) % 2147483648;
+
+		return $min + ( $state % ( $max - $min + 1 ) );
+	}
+
+	/**
+	 * Build an actionable assertion failure message.
+	 *
+	 * @param array       $mappings         Generated token mappings.
+	 * @param int         $key_length       Group key length for the generated map.
+	 * @param int         $seed             Seed used to generate the token set.
+	 * @param string      $case_sensitivity Case sensitivity mode.
+	 * @param string      $operation        Operation being tested.
+	 * @param string      $probe            Probe word or document.
+	 * @param int|null    $offset           Optional offset into the probe.
+	 * @return string Assertion failure context.
+	 */
+	private static function failure_context( $mappings, $key_length, $seed, $case_sensitivity, $operation, $probe, $offset = null ) {
+		$context = "Seed {$seed}; key_length {$key_length}; {$operation}; case {$case_sensitivity}; probe " . bin2hex( $probe );
+		if ( null !== $offset ) {
+			$context .= "; offset {$offset}";
+		}
+
+		return $context . '; token_set ' . base64_encode( serialize( $mappings ) );
+	}
+}