diff --git a/.gitignore b/.gitignore
index 15876fa47fee8..dbec517727bbc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -46,6 +46,7 @@ wp-tests-config.php
/packagehash.txt
/.gutenberg-hash
/artifacts
+/tools/html-api-fuzz/oracles/lexbor/build
/setup.log
/coverage
diff --git a/merged-prs-2026-06-11.md b/merged-prs-2026-06-11.md
new file mode 100644
index 0000000000000..e86fd913c3179
--- /dev/null
+++ b/merged-prs-2026-06-11.md
@@ -0,0 +1,29 @@
+# PR merges for html-api-fuzz
+
+Date: 2026-06-11
+
+All included PRs were merged into `html-api-fuzz` with merge commits. Trunk was checked before and after these merges; `origin/trunk` is already an ancestor of this branch, so Git had no trunk merge commit to create.
+
+## Merged
+
+- PR #53, `origin/spec-compliant-getters`
+ - Merge commit: `5a3cbf19df`
+ - Why: Aligns HTML API input preprocessing and getter behavior with the spec, reducing fuzzer/oracle noise around NULL bytes, carriage returns, and decoded source values. This also gives the rebuilt #42 branch the helper behavior it expects.
+
+- PR #42, `origin/html-api-fuzz-fiz/decoded-cr`
+ - Merge commit: `ce2af0eff6`
+ - Why: Updates the earlier #42 merge to the current PR head. It preserves decoded carriage returns as `
` during serialization, normalizes NULL bytes through the shared serializer path, and removes the old `get_attribute_for_serialization()` workaround that #53 makes unnecessary.
+
+- PR #51, `origin/html-api-normalize-restore-missing-text-content`
+ - Merge commit: `ce08149069`
+ - Why: Addresses issue #50 and the latest fuzzer `normalize-tree-changed` failures where raw text was dropped from `IFRAME`, `NOEMBED`, `NOFRAMES`, and related rawtext serialization paths.
+
+- PR #17, `origin/copilot/add-script-data-filter`
+ - Merge commit: `adbe354c94`
+ - Why: Addresses issue #16 by adding the classic-script `script_data_{$handle}` JSON data hook. This is separate from the HTML API fuzzer stack, so it was kept in its own merge commit.
+ - Follow-up: `57d458df91` corrects the new test's script-tag assertion to match WordPress's emitted attribute order.
+
+## Not merged
+
+- Trunk: already present; no-op.
+- Other open PRs: either already contained in this branch or not tied to the existing issues identified in this pass.
diff --git a/src/wp-includes/class-wp-scripts.php b/src/wp-includes/class-wp-scripts.php
index 6f633d465bb2c..6eb95febe1c02 100644
--- a/src/wp-includes/class-wp-scripts.php
+++ b/src/wp-includes/class-wp-scripts.php
@@ -480,7 +480,108 @@ public function do_item( $handle, $group = false ) {
$attr['data-wp-fetchpriority'] = $original_fetchpriority;
}
- $tag = $translations . $before_script;
+ /**
+ * Filters data associated with a given script.
+ *
+ * Scripts may require data that is required for initialization or is essential
+ * to have immediately available on page load. These are suitable use cases for
+ * this data.
+ *
+ * The dynamic portion of the hook name, `$handle`, refers to the script handle.
+ *
+ * This is best suited to pass essential data that must be available to the script for
+ * initialization or immediately on page load. It does not replace the REST API or
+ * fetching data from the client.
+ *
+ * Example:
+ *
+ * add_filter(
+ * 'script_data_my-script-handle',
+ * function ( array $data ): array {
+ * $data['myConfig'] = array( 'key' => 'value' );
+ * return $data;
+ * }
+ * );
+ *
+ * If the filter returns no data (an empty array), nothing will be embedded in the page.
+ *
+ * The data for a given script, if provided, will be JSON serialized in a script
+ * tag with an ID of the form `wp-script-data-{$handle}` and type `application/json`.
+ *
+ * The data can be read on the client with a pattern like this:
+ *
+ * Example:
+ *
+ * const dataContainer = document.getElementById( 'wp-script-data-my-script-handle' );
+ * let data = {};
+ * if ( dataContainer ) {
+ * try {
+ * data = JSON.parse( dataContainer.textContent );
+ * } catch {}
+ * }
+ * // data.myConfig.key === 'value';
+ * initMyScriptWithData( data );
+ *
+ * @since 7.1.0
+ *
+ * @param array $data The data associated with the script.
+ */
+ $script_data = apply_filters( "script_data_{$handle}", array() );
+
+ $script_data_tag = '';
+ if ( ! empty( $script_data ) ) {
+ /*
+ * This data will be printed as JSON inside a script tag like this:
+ *
+ *
+ * A script tag must be closed by a sequence beginning with ``. It's impossible to
+ * close a script tag without using `<`. We ensure that `<` is escaped and `/` can
+ * remain unescaped, so `` will be printed as `\u003C/script>`.
+ *
+ * - JSON_HEX_TAG: All < and > are converted to \u003C and \u003E.
+ * - JSON_UNESCAPED_SLASHES: Don't escape /.
+ * - JSON_INVALID_UTF8_SUBSTITUTE: Substitute invalid UTF-8 sequences with U+FFFD (�)
+ * instead of failing. This avoids the overhead of `wp_json_encode()`'s fallback
+ * re-encoding and ensures consistent handling with the standard replacement character.
+ *
+ * If the page will use UTF-8 encoding, it's safe to print unescaped unicode:
+ *
+ * - JSON_UNESCAPED_UNICODE: Encode multibyte Unicode characters literally (instead of as `\uXXXX`).
+ * - JSON_UNESCAPED_LINE_TERMINATORS: The line terminators are kept unescaped when
+ * JSON_UNESCAPED_UNICODE is supplied. It uses the same behaviour as it was
+ * before PHP 7.1 without this constant. Available as of PHP 7.1.0.
+ *
+ * The JSON specification requires encoding in UTF-8, so if the generated HTML page
+ * is not encoded in UTF-8 then it's not safe to include those literals. They must
+ * be escaped to avoid encoding issues.
+ *
+ * @see https://www.rfc-editor.org/rfc/rfc8259.html for details on encoding requirements.
+ * @see https://www.php.net/manual/en/json.constants.php for details on these constants.
+ * @see https://html.spec.whatwg.org/#script-data-state for details on script tag parsing.
+ */
+ $json_encode_flags = JSON_HEX_TAG | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_LINE_TERMINATORS | JSON_INVALID_UTF8_SUBSTITUTE;
+ if ( ! is_utf8_charset() ) {
+ $json_encode_flags = JSON_HEX_TAG | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE;
+ }
+
+ /*
+ * Return the data script tag as a string (third parameter false) rather than echoing it.
+ * This allows it to be included with the script tag in the concatenated output.
+ */
+ $script_data_tag = wp_print_inline_script_tag(
+ wp_json_encode(
+ $script_data,
+ $json_encode_flags
+ ),
+ array(
+ 'type' => 'application/json',
+ 'id' => "wp-script-data-{$handle}",
+ ),
+ false
+ );
+ }
+
+ $tag = $translations . $before_script . $script_data_tag;
$tag .= wp_get_script_tag( $attr );
$tag .= $after_script;
diff --git a/src/wp-includes/html-api/class-wp-html-decoder.php b/src/wp-includes/html-api/class-wp-html-decoder.php
index d902f4b7cabc4..9f33056de0c14 100644
--- a/src/wp-includes/html-api/class-wp-html-decoder.php
+++ b/src/wp-includes/html-api/class-wp-html-decoder.php
@@ -195,6 +195,8 @@ public static function decode( $context, $text ): string {
* 7 === $token_length; // `∉`
*
* @since 6.6.0
+ * @since 7.1.0 Detects ambiguous followers of semicolon-less references
+ * by ASCII classification only, independent of the locale.
*
* @global WP_Token_Map $html5_named_character_references Mappings for HTML5 named character references.
*
@@ -377,14 +379,20 @@ public static function read_character_reference( $context, $text, $at = 0, &$mat
* At this point though there's a match for an entry in the named
* character reference table but the match doesn't end in `;`.
* It may be allowed if it's followed by something unambiguous.
+ *
+ * Only an ASCII alphanumeric or U+003D EQUALS SIGN is ambiguous.
+ * `ctype_alnum()` must be avoided here: its classification of
+ * bytes 0x80 and above depends on the process locale, but only
+ * these specific ASCII characters prevent decoding.
+ *
+ * @see https://html.spec.whatwg.org/#named-character-reference-state
*/
+ $follower = $after_name < $length ? $text[ $after_name ] : '';
$ambiguous_follower = (
- $after_name < $length &&
- $name_at < $length &&
- (
- ctype_alnum( $text[ $after_name ] ) ||
- '=' === $text[ $after_name ]
- )
+ ( 'a' <= $follower && 'z' >= $follower ) ||
+ ( 'A' <= $follower && 'Z' >= $follower ) ||
+ ( '0' <= $follower && '9' >= $follower ) ||
+ '=' === $follower
);
// It's non-ambiguous, safe to leave it in.
diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php
index 0cd1f0fc45e07..8941e4b8b181a 100644
--- a/src/wp-includes/html-api/class-wp-html-open-elements.php
+++ b/src/wp-includes/html-api/class-wp-html-open-elements.php
@@ -281,6 +281,7 @@ public function has_element_in_specific_scope( string $tag_name, $termination_li
* > - th
* > - marquee
* > - object
+ * > - select
* > - template
* > - MathML mi
* > - MathML mo
@@ -312,6 +313,7 @@ public function has_element_in_scope( string $tag_name ): bool {
'TH',
'MARQUEE',
'OBJECT',
+ 'SELECT',
'TEMPLATE',
'math MI',
@@ -362,6 +364,7 @@ public function has_element_in_list_item_scope( string $tag_name ): bool {
'MARQUEE',
'OBJECT',
'OL',
+ 'SELECT',
'TEMPLATE',
'UL',
@@ -410,6 +413,7 @@ public function has_element_in_button_scope( string $tag_name ): bool {
'TH',
'MARQUEE',
'OBJECT',
+ 'SELECT',
'TEMPLATE',
'math MI',
@@ -459,9 +463,8 @@ public function has_element_in_table_scope( string $tag_name ): bool {
/**
* Returns whether a particular element is in select scope.
*
- * This test differs from the others like it, in that its rules are inverted.
- * Instead of arriving at a match when one of any tag in a termination group
- * is reached, this one terminates if any other tag is reached.
+ * The "select scope" concept was removed from the HTML standard along with the
+ * customizable `