diff --git a/.gitignore b/.gitignore
index 15876fa47fee8..dbec517727bbc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -46,6 +46,7 @@ wp-tests-config.php
 /packagehash.txt
 /.gutenberg-hash
 /artifacts
+/tools/html-api-fuzz/oracles/lexbor/build
 /setup.log
 /coverage
 
diff --git a/merged-prs-2026-06-11.md b/merged-prs-2026-06-11.md
new file mode 100644
index 0000000000000..e86fd913c3179
--- /dev/null
+++ b/merged-prs-2026-06-11.md
@@ -0,0 +1,29 @@
+# PR merges for html-api-fuzz
+
+Date: 2026-06-11
+
+All included PRs were merged into `html-api-fuzz` with merge commits. Trunk was checked before and after these merges; `origin/trunk` is already an ancestor of this branch, so Git had no trunk merge commit to create.
+
+## Merged
+
+- PR #53, `origin/spec-compliant-getters`
+  - Merge commit: `5a3cbf19df`
+  - Why: Aligns HTML API input preprocessing and getter behavior with the spec, reducing fuzzer/oracle noise around NULL bytes, carriage returns, and decoded source values. This also gives the rebuilt #42 branch the helper behavior it expects.
+
+- PR #42, `origin/html-api-fuzz-fiz/decoded-cr`
+  - Merge commit: `ce2af0eff6`
+  - Why: Updates the earlier #42 merge to the current PR head. It preserves decoded carriage returns as `&#13;` during serialization, normalizes NULL bytes through the shared serializer path, and removes the old `get_attribute_for_serialization()` workaround that #53 makes unnecessary.
+
+- PR #51, `origin/html-api-normalize-restore-missing-text-content`
+  - Merge commit: `ce08149069`
+  - Why: Addresses issue #50 and the latest fuzzer `normalize-tree-changed` failures where raw text was dropped from `IFRAME`, `NOEMBED`, `NOFRAMES`, and related rawtext serialization paths.
+
+- PR #17, `origin/copilot/add-script-data-filter`
+  - Merge commit: `adbe354c94`
+  - Why: Addresses issue #16 by adding the classic-script `script_data_{$handle}` JSON data hook. This is separate from the HTML API fuzzer stack, so it was kept in its own merge commit.
+  - Follow-up: `57d458df91` corrects the new test's script-tag assertion to match WordPress's emitted attribute order.
+
+## Not merged
+
+- Trunk: already present; no-op.
+- Other open PRs: either already contained in this branch or not tied to the existing issues identified in this pass.
diff --git a/src/wp-includes/class-wp-scripts.php b/src/wp-includes/class-wp-scripts.php
index 6f633d465bb2c..6eb95febe1c02 100644
--- a/src/wp-includes/class-wp-scripts.php
+++ b/src/wp-includes/class-wp-scripts.php
@@ -480,7 +480,108 @@ public function do_item( $handle, $group = false ) {
 			$attr['data-wp-fetchpriority'] = $original_fetchpriority;
 		}
 
-		$tag  = $translations . $before_script;
+		/**
+		 * Filters data associated with a given script.
+		 *
+		 * Scripts may require data that is required for initialization or is essential
+		 * to have immediately available on page load. These are suitable use cases for
+		 * this data.
+		 *
+		 * The dynamic portion of the hook name, `$handle`, refers to the script handle.
+		 *
+		 * This is best suited to pass essential data that must be available to the script for
+		 * initialization or immediately on page load. It does not replace the REST API or
+		 * fetching data from the client.
+		 *
+		 * Example:
+		 *
+		 *     add_filter(
+		 *         'script_data_my-script-handle',
+		 *         function ( array $data ): array {
+		 *             $data['myConfig'] = array( 'key' => 'value' );
+		 *             return $data;
+		 *         }
+		 *     );
+		 *
+		 * If the filter returns no data (an empty array), nothing will be embedded in the page.
+		 *
+		 * The data for a given script, if provided, will be JSON serialized in a script
+		 * tag with an ID of the form `wp-script-data-{$handle}` and type `application/json`.
+		 *
+		 * The data can be read on the client with a pattern like this:
+		 *
+		 * Example:
+		 *
+		 *     const dataContainer = document.getElementById( 'wp-script-data-my-script-handle' );
+		 *     let data = {};
+		 *     if ( dataContainer ) {
+		 *         try {
+		 *             data = JSON.parse( dataContainer.textContent );
+		 *         } catch {}
+		 *     }
+		 *     // data.myConfig.key === 'value';
+		 *     initMyScriptWithData( data );
+		 *
+		 * @since 7.1.0
+		 *
+		 * @param array $data The data associated with the script.
+		 */
+		$script_data = apply_filters( "script_data_{$handle}", array() );
+
+		$script_data_tag = '';
+		if ( ! empty( $script_data ) ) {
+			/*
+			 * This data will be printed as JSON inside a script tag like this:
+			 *   <script type="application/json"></script>
+			 *
+			 * A script tag must be closed by a sequence beginning with `</`. It's impossible to
+			 * close a script tag without using `<`. We ensure that `<` is escaped and `/` can
+			 * remain unescaped, so `</script>` will be printed as `\u003C/script>`.
+			 *
+			 *   - JSON_HEX_TAG: All < and > are converted to \u003C and \u003E.
+			 *   - JSON_UNESCAPED_SLASHES: Don't escape /.
+			 *   - JSON_INVALID_UTF8_SUBSTITUTE: Substitute invalid UTF-8 sequences with U+FFFD (�)
+			 *     instead of failing. This avoids the overhead of `wp_json_encode()`'s fallback
+			 *     re-encoding and ensures consistent handling with the standard replacement character.
+			 *
+			 * If the page will use UTF-8 encoding, it's safe to print unescaped unicode:
+			 *
+			 *   - JSON_UNESCAPED_UNICODE: Encode multibyte Unicode characters literally (instead of as `\uXXXX`).
+			 *   - JSON_UNESCAPED_LINE_TERMINATORS: The line terminators are kept unescaped when
+			 *     JSON_UNESCAPED_UNICODE is supplied. It uses the same behaviour as it was
+			 *     before PHP 7.1 without this constant. Available as of PHP 7.1.0.
+			 *
+			 * The JSON specification requires encoding in UTF-8, so if the generated HTML page
+			 * is not encoded in UTF-8 then it's not safe to include those literals. They must
+			 * be escaped to avoid encoding issues.
+			 *
+			 * @see https://www.rfc-editor.org/rfc/rfc8259.html for details on encoding requirements.
+			 * @see https://www.php.net/manual/en/json.constants.php for details on these constants.
+			 * @see https://html.spec.whatwg.org/#script-data-state for details on script tag parsing.
+			 */
+			$json_encode_flags = JSON_HEX_TAG | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_LINE_TERMINATORS | JSON_INVALID_UTF8_SUBSTITUTE;
+			if ( ! is_utf8_charset() ) {
+				$json_encode_flags = JSON_HEX_TAG | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE;
+			}
+
+			/*
+			 * Return the data script tag as a string (third parameter false) rather than echoing it.
+			 * This allows it to be included with the script tag in the concatenated output.
+			 */
+			$script_data_tag = wp_print_inline_script_tag(
+				wp_json_encode(
+					$script_data,
+					$json_encode_flags
+				),
+				array(
+					'type' => 'application/json',
+					'id'   => "wp-script-data-{$handle}",
+				),
+				false
+			);
+		}
+
+		$tag  = $translations . $before_script . $script_data_tag;
 		$tag .= wp_get_script_tag( $attr );
 		$tag .= $after_script;
 
diff --git a/src/wp-includes/html-api/class-wp-html-decoder.php b/src/wp-includes/html-api/class-wp-html-decoder.php
index d902f4b7cabc4..9f33056de0c14 100644
--- a/src/wp-includes/html-api/class-wp-html-decoder.php
+++ b/src/wp-includes/html-api/class-wp-html-decoder.php
@@ -195,6 +195,8 @@ public static function decode( $context, $text ): string {
 	 *     7    === $token_length; // `&notin;`
 	 *
 	 * @since 6.6.0
+	 * @since 7.1.0 Detects ambiguous followers of semicolon-less references
+	 *              by ASCII classification only, independent of the locale.
 	 *
 	 * @global WP_Token_Map $html5_named_character_references Mappings for HTML5 named character references.
 	 *
@@ -377,14 +379,20 @@ public static function read_character_reference( $context, $text, $at = 0, &$mat
 		 * At this point though there's a match for an entry in the named
 		 * character reference table but the match doesn't end in `;`.
 		 * It may be allowed if it's followed by something unambiguous.
+		 *
+		 * Only an ASCII alphanumeric or U+003D EQUALS SIGN is ambiguous.
+		 * `ctype_alnum()` must be avoided here: its classification of
+		 * bytes 0x80 and above depends on the process locale, but only
+		 * these specific ASCII characters prevent decoding.
+		 *
+		 * @see https://html.spec.whatwg.org/#named-character-reference-state
 		 */
+		$follower           = $after_name < $length ? $text[ $after_name ] : '';
 		$ambiguous_follower = (
-			$after_name < $length &&
-			$name_at < $length &&
-			(
-				ctype_alnum( $text[ $after_name ] ) ||
-				'=' === $text[ $after_name ]
-			)
+			( 'a' <= $follower && 'z' >= $follower ) ||
+			( 'A' <= $follower && 'Z' >= $follower ) ||
+			( '0' <= $follower && '9' >= $follower ) ||
+			'=' === $follower
 		);
 
 		// It's non-ambiguous, safe to leave it in.
diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php
index 0cd1f0fc45e07..8941e4b8b181a 100644
--- a/src/wp-includes/html-api/class-wp-html-open-elements.php
+++ b/src/wp-includes/html-api/class-wp-html-open-elements.php
@@ -281,6 +281,7 @@ public function has_element_in_specific_scope( string $tag_name, $termination_li
 	 * >   - th
 	 * >   - marquee
 	 * >   - object
+	 * >   - select
 	 * >   - template
 	 * >   - MathML mi
 	 * >   - MathML mo
@@ -312,6 +313,7 @@ public function has_element_in_scope( string $tag_name ): bool {
 				'TH',
 				'MARQUEE',
 				'OBJECT',
+				'SELECT',
 				'TEMPLATE',
 
 				'math MI',
@@ -362,6 +364,7 @@ public function has_element_in_list_item_scope( string $tag_name ): bool {
 				'MARQUEE',
 				'OBJECT',
 				'OL',
+				'SELECT',
 				'TEMPLATE',
 				'UL',
 
@@ -410,6 +413,7 @@ public function has_element_in_button_scope( string $tag_name ): bool {
 				'TH',
 				'MARQUEE',
 				'OBJECT',
+				'SELECT',
 				'TEMPLATE',
 
 				'math MI',
@@ -459,9 +463,8 @@ public function has_element_in_table_scope( string $tag_name ): bool {
 	/**
 	 * Returns whether a particular element is in select scope.
 	 *
-	 * This test differs from the others like it, in that its rules are inverted.
-	 * Instead of arriving at a match when one of any tag in a termination group
-	 * is reached, this one terminates if any other tag is reached.
+	 * The "select scope" concept was removed from the HTML standard along with the
+	 * customizable `<select>` changes, so nothing is ever in select scope.
 	 *
 	 * > The stack of open elements is said to have a particular element in select scope when it has
 	 * > that element in the specific scope consisting of all element types except the following:
@@ -471,24 +474,13 @@ public function has_element_in_table_scope( string $tag_name ): bool {
 	 * @since 6.4.0 Stub implementation (throws).
 	 * @since 6.7.0 Full implementation.
 	 *
-	 * @see https://html.spec.whatwg.org/#has-an-element-in-select-scope
+	 * @deprecated 7.1.0 This method is no longer part of the HTML standard.
 	 *
 	 * @param string $tag_name Name of tag to check.
-	 * @return bool Whether the given element is in SELECT scope.
+	 * @return bool Always false; select scope no longer exists.
 	 */
 	public function has_element_in_select_scope( string $tag_name ): bool {
-		foreach ( $this->walk_up() as $node ) {
-			if ( $node->node_name === $tag_name ) {
-				return true;
-			}
-
-			if (
-				'OPTION' !== $node->node_name &&
-				'OPTGROUP' !== $node->node_name
-			) {
-				return false;
-			}
-		}
+		_deprecated_function( __METHOD__, '7.1.0' );
 
 		return false;
 	}
@@ -588,7 +580,7 @@ public function remove_node( WP_HTML_Token $token ): bool {
 
 			$position_from_start = $this->count() - $position_from_end - 1;
 			array_splice( $this->stack, $position_from_start, 1 );
-			$this->after_element_pop( $item );
+			$this->after_element_pop( $item, 0 === $position_from_end );
 			return true;
 		}
 
@@ -697,6 +689,7 @@ public function after_element_push( WP_HTML_Token $item ): void {
 			case 'TH':
 			case 'MARQUEE':
 			case 'OBJECT':
+			case 'SELECT':
 			case 'TEMPLATE':
 			case 'math MI':
 			case 'math MO':
@@ -731,9 +724,10 @@ public function after_element_push( WP_HTML_Token $item ): void {
 	 *
 	 * @since 6.4.0
 	 *
-	 * @param WP_HTML_Token $item Element that was removed from the stack of open elements.
+	 * @param WP_HTML_Token $item               Element that was removed from the stack of open elements.
+	 * @param bool          $invoke_pop_handler Whether to call the pop handler.
 	 */
-	public function after_element_pop( WP_HTML_Token $item ): void {
+	public function after_element_pop( WP_HTML_Token $item, bool $invoke_pop_handler = true ): void {
 		/*
 		 * When adding support for new elements, expand this switch to trap
 		 * cases where the precalculated value needs to change.
@@ -753,6 +747,7 @@ public function after_element_pop( WP_HTML_Token $item ): void {
 			case 'TH':
 			case 'MARQUEE':
 			case 'OBJECT':
+			case 'SELECT':
 			case 'TEMPLATE':
 			case 'math MI':
 			case 'math MO':
@@ -767,7 +762,7 @@ public function after_element_pop( WP_HTML_Token $item ): void {
 				break;
 		}
 
-		if ( null !== $this->pop_handler ) {
+		if ( $invoke_pop_handler && null !== $this->pop_handler ) {
 			call_user_func( $this->pop_handler, $item );
 		}
 	}
diff --git a/src/wp-includes/html-api/class-wp-html-processor-state.php b/src/wp-includes/html-api/class-wp-html-processor-state.php
index c7c63286e1ebf..c311618a5ce67 100644
--- a/src/wp-includes/html-api/class-wp-html-processor-state.php
+++ b/src/wp-includes/html-api/class-wp-html-processor-state.php
@@ -209,7 +209,8 @@ class WP_HTML_Processor_State {
 	 *
 	 * @since 6.7.0
 	 *
-	 * @see https://html.spec.whatwg.org/#parsing-main-inselect
+	 * @deprecated 7.1.0 The "in select" insertion mode was removed from the standard.
+	 *
 	 * @see WP_HTML_Processor_State::$insertion_mode
 	 *
 	 * @var string
@@ -221,7 +222,8 @@ class WP_HTML_Processor_State {
 	 *
 	 * @since 6.7.0
 	 *
-	 * @see https://html.spec.whatwg.org/#parsing-main-inselectintable
+	 * @deprecated 7.1.0 The "in select in table" insertion mode was removed from the standard.
+	 *
 	 * @see WP_HTML_Processor_State::$insertion_mode
 	 *
 	 * @var string
diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php
index 35d91fad3129c..275975b53c89c 100644
--- a/src/wp-includes/html-api/class-wp-html-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-processor.php
@@ -121,6 +121,17 @@
  *  - SCRIPT and STYLE tags containing text that looks like HTML but isn't, e.g. `<script>document.write('<p>Hi</p>');</script>`.
  *  - SCRIPT content which has been escaped, e.g. `<script><!-- document.write('<script>console.log("hi")</script>') --></script>`.
  *
+ * ### Customizable select elements
+ *
+ * This parser implements the updated `SELECT` parsing rules introduced by the HTML
+ * standard's customizable select elements changes.
+ *
+ * It does not implement the "maybe clone an option into selectedcontent" step that
+ * copies the selected `OPTION` content into a `SELECTEDCONTENT` element. A
+ * `SELECTEDCONTENT` is otherwise parsed like any other element. In the single case
+ * where producing a correct tree would depend on that cloning, a
+ * `SELECT > BUTTON > SELECTEDCONTENT`, the parser stops processing instead.
+ *
  * ### Unsupported Features
  *
  * This parser does not report parse errors.
@@ -139,6 +150,7 @@
  *
  * @see WP_HTML_Tag_Processor
  * @see https://html.spec.whatwg.org/
+ * @see https://github.com/whatwg/html/pull/10548 Customizable select elements parsing changes.
  * @phpstan-consistent-constructor
  */
 class WP_HTML_Processor extends WP_HTML_Tag_Processor {
@@ -228,6 +240,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	 */
 	private $element_queue = array();
 
+	/**
+	 * Whether the end-of-file token has been processed through the insertion modes.
+	 *
+	 * @since 7.1.0
+	 *
+	 * @var bool
+	 */
+	private $has_processed_eof = false;
+
 	/**
 	 * Stores the current breadcrumbs.
 	 *
@@ -251,6 +272,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	 */
 	private $current_element = null;
 
+	/**
+	 * Elements removed from the stack of open elements without a normal pop event.
+	 *
+	 * @since 7.1.0
+	 *
+	 * @var array[]
+	 */
+	private $non_lifo_breadcrumb_removals = array();
+
 	/**
 	 * Context node if created as a fragment parser.
 	 *
@@ -399,29 +429,25 @@ public function __construct( $html, $use_the_static_create_methods_instead = nul
 
 		$this->state->stack_of_open_elements->set_push_handler(
 			function ( WP_HTML_Token $token ): void {
-				$is_virtual            = ! isset( $this->state->current_token ) || $this->is_tag_closer();
+				$is_virtual            = $this->is_eof_token() || ! isset( $this->state->current_token ) || $this->is_tag_closer();
 				$same_node             = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name;
 				$provenance            = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real';
 				$this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance );
 
-				$this->change_parsing_namespace( $token->integration_node_type ? 'html' : $token->namespace );
+				$this->change_parsing_namespace_for_node( $token );
 			}
 		);
 
 		$this->state->stack_of_open_elements->set_pop_handler(
 			function ( WP_HTML_Token $token ): void {
-				$is_virtual            = ! isset( $this->state->current_token ) || ! $this->is_tag_closer();
+				$is_virtual            = $this->is_eof_token() || ! isset( $this->state->current_token ) || ! $this->is_tag_closer();
 				$same_node             = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name;
 				$provenance            = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real';
 				$this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::POP, $provenance );
 
 				$adjusted_current_node = $this->get_adjusted_current_node();
 
-				if ( $adjusted_current_node ) {
-					$this->change_parsing_namespace( $adjusted_current_node->integration_node_type ? 'html' : $adjusted_current_node->namespace );
-				} else {
-					$this->change_parsing_namespace( 'html' );
-				}
+				$this->change_parsing_namespace_for_node( $adjusted_current_node );
 			}
 		);
 
@@ -435,6 +461,28 @@ function ( WP_HTML_Token $token ): void {
 		};
 	}
 
+	/**
+	 * Switches tokenizer namespace state for the next token.
+	 *
+	 * HTML integration points parse start tags and character tokens according to
+	 * HTML rules, but CDATA detection follows the adjusted current node's actual
+	 * namespace.
+	 *
+	 * @since 7.1.0
+	 * @ignore
+	 *
+	 * @param WP_HTML_Token|null $node Node controlling the next token's parsing context.
+	 */
+	private function change_parsing_namespace_for_node( ?WP_HTML_Token $node ): void {
+		if ( null === $node ) {
+			$this->change_parsing_namespace( 'html' );
+			return;
+		}
+
+		$this->change_parsing_namespace( $node->integration_node_type ? 'html' : $node->namespace );
+		$this->change_cdata_parsing_namespace( $node->namespace );
+	}
+
 	/**
 	 * Creates a fragment processor at the current node.
 	 *
@@ -570,9 +618,7 @@ private function create_fragment_at_current_node( string $html ) {
 		 * This is important so that any push/pop from the stack of open
 		 * elements does not change the parsing namespace.
 		 */
-		$fragment_processor->change_parsing_namespace(
-			$this->current_element->token->integration_node_type ? 'html' : $namespace
-		);
+		$fragment_processor->change_parsing_namespace_for_node( $this->current_element->token );
 
 		return $fragment_processor;
 	}
@@ -814,6 +860,10 @@ private function next_visitable_token(): bool {
 		 *       tokens works in the meantime and isn't obviously wrong.
 		 */
 		if ( empty( $this->element_queue ) ) {
+			if ( $this->queue_virtual_closer_after_non_lifo_removal() ) {
+				return $this->next_visitable_token();
+			}
+
 			if ( $this->step() ) {
 				return $this->next_visitable_token();
 			}
@@ -823,6 +873,10 @@ private function next_visitable_token(): bool {
 			}
 		}
 
+		if ( $this->queue_virtual_closer_after_non_lifo_removal() ) {
+			return $this->next_visitable_token();
+		}
+
 		// Process the next event on the queue.
 		$this->current_element = array_shift( $this->element_queue );
 		if ( ! isset( $this->current_element ) ) {
@@ -860,6 +914,68 @@ private function next_visitable_token(): bool {
 		return true;
 	}
 
+	/**
+	 * Queues a virtual closer for a removed node once its subtree closes.
+	 *
+	 * Non-LIFO removals from the stack of open elements do not emit a normal
+	 * pop event because those events blindly pop the current breadcrumb. The
+	 * removed node remains an ancestor of the currently open subtree, but must
+	 * be reported as a virtual closer before visiting the next token after
+	 * that subtree closes.
+	 *
+	 * @since 7.1.0
+	 *
+	 * @return bool Whether a virtual closer was queued.
+	 */
+	private function queue_virtual_closer_after_non_lifo_removal(): bool {
+		if ( empty( $this->non_lifo_breadcrumb_removals ) ) {
+			return false;
+		}
+
+		$removed_node     = end( $this->non_lifo_breadcrumb_removals );
+		$removed_token    = $removed_node['token'];
+		$breadcrumb_depth = $removed_node['breadcrumb_depth'];
+
+		if (
+			count( $this->breadcrumbs ) !== $breadcrumb_depth ||
+			empty( $this->breadcrumbs ) ||
+			end( $this->breadcrumbs ) !== $removed_token->node_name
+		) {
+			return false;
+		}
+
+		// At EOF, normal stack pops may be queued and processed after the stack is empty.
+		$adjusted_current_node = $this->get_adjusted_current_node();
+
+		if ( isset( $adjusted_current_node ) && end( $this->breadcrumbs ) === $adjusted_current_node->node_name ) {
+			return false;
+		}
+
+		/*
+		 * The depth and node-name checks above cannot distinguish the removed
+		 * element from a same-named element at the same depth; identity is
+		 * recovered here. If a queued POP closes a different element with the
+		 * same name, that element owns the current breadcrumb and the virtual
+		 * closer must wait for it.
+		 */
+		$next_event = reset( $this->element_queue );
+		if (
+			false !== $next_event &&
+			WP_HTML_Stack_Event::POP === $next_event->operation &&
+			$next_event->token !== $removed_token &&
+			$next_event->token->node_name === $removed_token->node_name
+		) {
+			return false;
+		}
+
+		array_pop( $this->non_lifo_breadcrumb_removals );
+		array_unshift(
+			$this->element_queue,
+			new WP_HTML_Stack_Event( $removed_token, WP_HTML_Stack_Event::POP, 'virtual' )
+		);
+		return true;
+	}
+
 	/**
 	 * Indicates if the current tag token is a tag closer.
 	 *
@@ -1036,12 +1152,23 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool {
 			}
 		}
 
-		// Finish stepping when there are no more tokens in the document.
+		// Process EOF once in the insertion modes before finishing.
+		$is_eof = false;
 		if (
 			WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
 			WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state
 		) {
-			return false;
+			if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
+				if ( $this->has_processed_eof || ! isset( $this->state->current_token ) ) {
+					return false;
+				}
+
+				$this->has_processed_eof = true;
+			} elseif ( ! isset( $this->state->current_token ) ) {
+				return false;
+			}
+
+			$is_eof = true;
 		}
 
 		$adjusted_current_node = $this->get_adjusted_current_node();
@@ -1049,7 +1176,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool {
 		$is_start_tag          = WP_HTML_Tag_Processor::STATE_MATCHED_TAG === $this->parser_state && ! $is_closer;
 		$token_name            = $this->get_token_name();
 
-		if ( self::REPROCESS_CURRENT_NODE !== $node_to_process ) {
+		if ( self::REPROCESS_CURRENT_NODE !== $node_to_process && ! $is_eof ) {
 			try {
 				$bookmark_name = $this->bookmark_token();
 			} catch ( Exception $e ) {
@@ -1088,6 +1215,33 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool {
 			)
 		);
 
+		if ( $is_eof && ! $parse_in_current_insertion_mode ) {
+			if ( ! empty( $this->state->stack_of_template_insertion_modes ) ) {
+				return $this->step_in_template();
+			}
+
+			return false;
+		}
+
+		if ( $is_eof ) {
+			switch ( $this->state->insertion_mode ) {
+				case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD:
+				case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD:
+				case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD_NOSCRIPT:
+				case WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD:
+				case WP_HTML_Processor_State::INSERTION_MODE_IN_BODY:
+				case WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE:
+					break;
+
+				default:
+					if ( ! empty( $this->state->stack_of_template_insertion_modes ) ) {
+						return $this->step_in_template();
+					}
+
+					return false;
+			}
+		}
+
 		try {
 			if ( ! $parse_in_current_insertion_mode ) {
 				return $this->step_in_foreign_content();
@@ -1136,12 +1290,6 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool {
 				case WP_HTML_Processor_State::INSERTION_MODE_IN_CELL:
 					return $this->step_in_cell();
 
-				case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT:
-					return $this->step_in_select();
-
-				case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE:
-					return $this->step_in_select_in_table();
-
 				case WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE:
 					return $this->step_in_template();
 
@@ -1347,6 +1495,8 @@ public function serialize(): ?string {
 	 *
 	 * @since 6.7.0
 	 * @since 6.9.0 Converted from protected to public method.
+	 * @since 7.1.0 Contents of IFRAME, NOEMBED, NOFRAMES, and XMP elements are
+	 *              serialized literally instead of being dropped or escaped.
 	 *
 	 * @return string Serialization of token, or empty string if no serialization exists.
 	 */
@@ -1383,7 +1533,7 @@ public function serialize_token(): string {
 				break;
 
 			case '#text':
-				$html .= htmlspecialchars( $this->get_modifiable_text(), ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' );
+				$html .= self::serialize_decoded_text( $this->get_modifiable_text() );
 				break;
 
 			// Unlike the `<>` which is interpreted as plaintext, this is ignored entirely.
@@ -1404,10 +1554,9 @@ public function serialize_token(): string {
 			return $html;
 		}
 
-		$tag_name       = str_replace( "\x00", "\u{FFFD}", $this->get_tag() );
+		$tag_name       = $this->get_tag();
 		$in_html        = 'html' === $this->get_namespace();
 		$qualified_name = $in_html ? strtolower( $tag_name ) : $this->get_qualified_tag_name();
-		$qualified_name = str_replace( "\x00", "\u{FFFD}", $qualified_name );
 
 		if ( $this->is_tag_closer() ) {
 			$html .= "</{$qualified_name}>";
@@ -1426,8 +1575,8 @@ public function serialize_token(): string {
 		$seen_attribute_names        = array();
 		foreach ( $attribute_names as $attribute_name ) {
 			$qualified_attribute_name = $this->get_qualified_attribute_name( $attribute_name );
-			$qualified_attribute_name = str_replace( "\x00", "\u{FFFD}", $qualified_attribute_name );
 			$qualified_attribute_name = wp_scrub_utf8( $qualified_attribute_name );
+			$serialized_attribute_name = str_replace( ' ', ':', $qualified_attribute_name );
 			if ( isset( $seen_attribute_names[ $qualified_attribute_name ] ) ) {
 				continue;
 			} else {
@@ -1436,21 +1585,20 @@ public function serialize_token(): string {
 
 			if (
 				$previous_attribute_was_true &&
-				isset( $qualified_attribute_name[0] ) &&
-				'=' === $qualified_attribute_name[0]
+				isset( $serialized_attribute_name[0] ) &&
+				'=' === $serialized_attribute_name[0]
 			) {
 				$html .= '=""';
 			}
 
-			$html .= " {$qualified_attribute_name}";
+			$html .= " {$serialized_attribute_name}";
 			$value = $this->get_attribute( $attribute_name );
 
 			if ( is_string( $value ) ) {
-				$html .= '="' . htmlspecialchars( $value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 ) . '"';
+				$html .= '="' . self::serialize_decoded_text( $value ) . '"';
 			}
 
 			$previous_attribute_was_true = true === $value;
-			$html                        = str_replace( "\x00", "\u{FFFD}", $html );
 		}
 
 		if ( ! $in_html && $this->has_self_closing_flag() ) {
@@ -1461,8 +1609,8 @@ public function serialize_token(): string {
 
 		/*
 		 * The HTML parser strips a leading newline immediately after the start
-		 * tag of TEXTAREA, PRE, and LISTING elements. When serializing, prepend
-		 * a leading newline to ensure the semantic HTML content is preserved.
+		 * tag of HTML TEXTAREA, PRE, and LISTING elements. When serializing,
+		 * prepend a leading newline to ensure the semantic HTML content is preserved.
 		 *
 		 * For example, `<pre>\n\nX</pre>` must not become `<pre>\nX</pre>` because its content
 		 * has changed. However, `<pre>X</pre>` and `<pre>\nX</pre>` are _equivalent_.
@@ -1481,7 +1629,7 @@ public function serialize_token(): string {
 		 *
 		 * @see https://html.spec.whatwg.org/multipage/parsing.html
 		 */
-		if ( 'TEXTAREA' === $tag_name || 'PRE' === $tag_name || 'LISTING' === $tag_name ) {
+		if ( $in_html && ( 'TEXTAREA' === $tag_name || 'PRE' === $tag_name || 'LISTING' === $tag_name ) ) {
 			$html .= "\n";
 		}
 
@@ -1490,18 +1638,40 @@ public function serialize_token(): string {
 			$text = $this->get_modifiable_text();
 
 			switch ( $tag_name ) {
+				/*
+				 * The contents of these elements are emitted literally to preserve
+				 * the document's contents, following the HTML serialization spec:
+				 *
+				 * > If the parent of current node is a style, script, xmp, iframe,
+				 * > noembed, noframes, or plaintext element, or if the parent of
+				 * > current node is a noscript element and scripting is enabled for
+				 * > the node, then append the value of current node's data literally.
+				 *
+				 * This is safe because character references are never decoded in
+				 * their contents. RAWTEXT contents (IFRAME, NOEMBED, NOFRAMES,
+				 * STYLE, XMP) cannot contain their own closing tag, so the closer
+				 * appended below cannot be matched early. SCRIPT data may contain
+				 * escaped closers (e.g. within `<!-- -->`), but re-parsing the
+				 * identical bytes follows the same tokenization rules that produced
+				 * this text, terminating at the appended closer all the same.
+				 *
+				 * @see https://html.spec.whatwg.org/multipage/parsing.html#serialising-html-fragments
+				 */
 				case 'IFRAME':
 				case 'NOEMBED':
 				case 'NOFRAMES':
-					$text = '';
-					break;
-
 				case 'SCRIPT':
 				case 'STYLE':
+				case 'XMP':
 					break;
 
+				/*
+				 * The contents of TEXTAREA and TITLE are parsed as RCDATA, in which
+				 * character references are decoded, so the decoded modifiable text
+				 * must be re-escaped to preserve the document's contents.
+				 */
 				default:
-					$text = htmlspecialchars( $text, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' );
+					$text = self::serialize_decoded_text( $text );
 			}
 
 			$html .= "{$text}</{$qualified_name}>";
@@ -1510,6 +1680,30 @@ public function serialize_token(): string {
 		return $html;
 	}
 
+	/**
+	 * Serializes decoded text for use in text nodes and attribute values.
+	 *
+	 * A decoded carriage return must serialize as a character reference:
+	 * the HTML parser's input preprocessing turns a raw CR into a line
+	 * feed, so emitting it raw would change the text on the next parse
+	 * and serialized output would never reach a fixed point.
+	 *
+	 * NULL bytes, possible in API-supplied values, serialize as U+FFFD
+	 * for the same reason: the tokenizer would replace or remove a raw
+	 * NULL byte on the next parse.
+	 *
+	 * @since 7.1.0
+	 *
+	 * @param string $text Decoded text to serialize.
+	 * @return string Serialized text.
+	 */
+	private static function serialize_decoded_text( string $text ): string {
+		$text = htmlspecialchars( $text, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' );
+		$text = str_replace( "\r", '&#13;', $text );
+
+		return str_replace( "\x00", "\u{FFFD}", $text );
+	}
+
 	/**
 	 * Parses next element in the 'initial' insertion mode.
 	 *
@@ -2648,7 +2842,7 @@ private function step_in_body(): bool {
 			 * > An end tag whose tag name is one of: "address", "article", "aside", "blockquote",
 			 * > "button", "center", "details", "dialog", "dir", "div", "dl", "fieldset",
 			 * > "figcaption", "figure", "footer", "header", "hgroup", "listing", "main",
-			 * > "menu", "nav", "ol", "pre", "search", "section", "summary", "ul"
+			 * > "menu", "nav", "ol", "pre", "search", "section", "select", "summary", "ul"
 			 */
 			case '-ADDRESS':
 			case '-ARTICLE':
@@ -2675,6 +2869,7 @@ private function step_in_body(): bool {
 			case '-PRE':
 			case '-SEARCH':
 			case '-SECTION':
+			case '-SELECT':
 			case '-SUMMARY':
 			case '-UL':
 				if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $token_name ) ) {
@@ -2848,7 +3043,28 @@ private function step_in_body(): bool {
 						case 'A':
 							$this->run_adoption_agency_algorithm();
 							$this->state->active_formatting_elements->remove_node( $item );
-							$this->state->stack_of_open_elements->remove_node( $item );
+							$is_current_node = $item === $this->state->stack_of_open_elements->current_node();
+
+							/*
+							 * The removed node's breadcrumb sits at its position in the
+							 * stack of open elements: one crumb for each open element at
+							 * or below it. Fragment parsers carry an extra crumb for the
+							 * context node, which never appears on the stack.
+							 */
+							$stack_position = 0;
+							foreach ( $this->state->stack_of_open_elements->walk_down() as $node ) {
+								++$stack_position;
+								if ( $node === $item ) {
+									break;
+								}
+							}
+
+							if ( $this->state->stack_of_open_elements->remove_node( $item ) && ! $is_current_node ) {
+								$this->non_lifo_breadcrumb_removals[] = array(
+									'token'            => $item,
+									'breadcrumb_depth' => isset( $this->context_node ) ? $stack_position + 1 : $stack_position,
+								);
+							}
 							break 2;
 					}
 				}
@@ -2913,8 +3129,7 @@ private function step_in_body(): bool {
 			case '-STRONG':
 			case '-TT':
 			case '-U':
-				$this->run_adoption_agency_algorithm();
-				return true;
+				return $this->run_adoption_agency_algorithm();
 
 			/*
 			 * > A start tag whose tag name is one of: "applet", "marquee", "object"
@@ -2993,6 +3208,28 @@ private function step_in_body(): bool {
 			 * > A start tag whose tag name is "input"
 			 */
 			case '+INPUT':
+				/*
+				 * > If the parser was created as part of the HTML fragment parsing algorithm
+				 * > (fragment case) and the context element passed to that algorithm is a
+				 * > select element:
+				 * >   1. Parse error.
+				 * >   2. Ignore the token.
+				 * >   3. Return.
+				 */
+				if ( isset( $this->context_node ) && 'SELECT' === $this->context_node->node_name ) {
+					return $this->step();
+				}
+
+				/*
+				 * > If the stack of open elements has a select element in scope:
+				 * >   1. Parse error.
+				 * >   2. Pop elements from the stack of open elements until a select element
+				 * >      has been popped from the stack.
+				 */
+				if ( $this->state->stack_of_open_elements->has_element_in_scope( 'SELECT' ) ) {
+					$this->state->stack_of_open_elements->pop_until( 'SELECT' );
+				}
+
 				$this->reconstruct_active_formatting_elements();
 				$this->insert_html_element( $this->state->current_token );
 
@@ -3024,6 +3261,17 @@ private function step_in_body(): bool {
 				if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
 					$this->close_a_p_element();
 				}
+
+				if ( $this->state->stack_of_open_elements->has_element_in_scope( 'SELECT' ) ) {
+					$this->generate_implied_end_tags();
+					/*
+					 * > If the stack of open elements has an option element in scope or has
+					 * > an optgroup element in scope, then this is a parse error.
+					 *
+					 * @todo Indicate a parse error once it's possible.
+					 */
+				}
+
 				$this->insert_html_element( $this->state->current_token );
 				$this->state->frameset_ok = false;
 				return true;
@@ -3110,40 +3358,69 @@ private function step_in_body(): bool {
 			 * > A start tag whose tag name is "select"
 			 */
 			case '+SELECT':
+				/*
+				 * > If the parser was created as part of the HTML fragment parsing algorithm
+				 * > (fragment case) and the context element passed to that algorithm is a
+				 * > select element:
+				 * >   1. Parse error.
+				 * >   2. Ignore the token.
+				 */
+				if ( isset( $this->context_node ) && 'SELECT' === $this->context_node->node_name ) {
+					// @todo Indicate a parse error once it's possible.
+					return $this->step();
+				}
+				/*
+				 * > Otherwise, if the stack of open elements has a select element in scope:
+				 * >   1. Parse error.
+				 * >   2. Ignore the token.
+				 * >   3. Pop elements from the stack of open elements until a select element
+				 * >      has been popped from the stack.
+				 */
+				if ( $this->state->stack_of_open_elements->has_element_in_scope( 'SELECT' ) ) {
+					// @todo Indicate a parse error once it's possible.
+					$this->state->stack_of_open_elements->pop_until( 'SELECT' );
+					return $this->step();
+				}
+
 				$this->reconstruct_active_formatting_elements();
 				$this->insert_html_element( $this->state->current_token );
 				$this->state->frameset_ok = false;
+				return true;
 
-				switch ( $this->state->insertion_mode ) {
-					/*
-					 * > If the insertion mode is one of "in table", "in caption", "in table body", "in row",
-					 * > or "in cell", then switch the insertion mode to "in select in table".
-					 */
-					case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE:
-					case WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION:
-					case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY:
-					case WP_HTML_Processor_State::INSERTION_MODE_IN_ROW:
-					case WP_HTML_Processor_State::INSERTION_MODE_IN_CELL:
-						$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE;
-						break;
-
+			/*
+			 * > A start tag whose tag name is "option"
+			 */
+			case '+OPTION':
+				if ( $this->state->stack_of_open_elements->has_element_in_scope( 'SELECT' ) ) {
+					$this->generate_implied_end_tags( 'OPTGROUP' );
 					/*
-					 * > Otherwise, switch the insertion mode to "in select".
+					 * > If the stack of open elements has an option element in scope, then this
+					 * > is a parse error.
+					 * @todo Indicate a parse error once it's possible.
 					 */
-					default:
-						$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT;
-						break;
+				} elseif ( $this->state->stack_of_open_elements->current_node_is( 'OPTION' ) ) {
+					$this->state->stack_of_open_elements->pop();
 				}
+
+				$this->reconstruct_active_formatting_elements();
+				$this->insert_html_element( $this->state->current_token );
 				return true;
 
 			/*
-			 * > A start tag whose tag name is one of: "optgroup", "option"
+			 * > A start tag whose tag name is "optgroup"
 			 */
 			case '+OPTGROUP':
-			case '+OPTION':
-				if ( $this->state->stack_of_open_elements->current_node_is( 'OPTION' ) ) {
+				if ( $this->state->stack_of_open_elements->has_element_in_scope( 'SELECT' ) ) {
+					$this->generate_implied_end_tags();
+					/*
+					 * > If the stack of open elements has an option element in scope or has an
+					 * > optgroup element in scope, then this is a parse error.
+					 * @todo Indicate a parse error once it's possible.
+					 */
+				} elseif ( $this->state->stack_of_open_elements->current_node_is( 'OPTION' ) ) {
 					$this->state->stack_of_open_elements->pop();
 				}
+
 				$this->reconstruct_active_formatting_elements();
 				$this->insert_html_element( $this->state->current_token );
 				return true;
@@ -3238,49 +3515,103 @@ private function step_in_body(): bool {
 				return $this->step();
 		}
 
+		/*
+		 * > An end-of-file token
+		 */
+		if ( null === $token_name ) {
+			if ( ! empty( $this->state->stack_of_template_insertion_modes ) ) {
+				return $this->step_in_template();
+			}
+
+			return false;
+		}
+
 		if ( ! parent::is_tag_closer() ) {
 			/*
 			 * > Any other start tag
 			 */
+
+			/*
+			 * SELECT > BUTTON > SELECTEDCONTENT requires special handling, cloning the
+			 * selected option. This is unsupported.
+			 */
+			if ( 'SELECTEDCONTENT' === $token_name ) {
+				$walker = $this->state->stack_of_open_elements->walk_up();
+				if ( null !== $walker->current() && $walker->current()->node_name === 'BUTTON' ) {
+					$walker->next();
+					if ( null !== $walker->current() && $walker->current()->node_name === 'SELECT' ) {
+						$this->bail( 'Cannot process SELECTEDCONTENT where cloning may be necessary.' );
+					}
+				}
+			}
 			$this->reconstruct_active_formatting_elements();
 			$this->insert_html_element( $this->state->current_token );
 			return true;
 		} else {
 			/*
 			 * > Any other end tag
+			 *
+			 * OPTION end tags are handled here as well:
+			 *
+			 * > An end tag whose tag name is "option"
+			 * >   - Let option be the first option element in the stack of open elements.
+			 * >   - Run the steps for "any other end tag."
+			 * >   - If option is no longer in the stack of open elements, then run maybe clone
+			 * >     an option into selectedcontent given option.
+			 *
+			 * The "maybe clone an option into selectedcontent" algorithm is not implemented.
 			 */
+			return $this->step_in_body_any_other_end_tag();
+		}
 
-			/*
-			 * Find the corresponding tag opener in the stack of open elements, if
-			 * it exists before reaching a special element, which provides a kind
-			 * of boundary in the stack. For example, a `</custom-tag>` should not
-			 * close anything beyond its containing `P` or `DIV` element.
-			 */
-			foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
-				if ( 'html' === $node->namespace && $token_name === $node->node_name ) {
-					break;
-				}
+		$this->bail( 'Should not have been able to reach end of IN BODY processing. Check HTML API code.' );
+		// This unnecessary return prevents tools from inaccurately reporting type errors.
+		return false;
+	}
 
-				if ( self::is_special( $node ) ) {
-					// This is a parse error, ignore the token.
-					return $this->step();
-				}
+	/**
+	 * Parses an "any other end tag" token in the "in body" insertion mode.
+	 *
+	 * @since 7.1.0
+	 * @ignore
+	 *
+	 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
+	 *
+	 * @return bool Whether an element was found.
+	 */
+	private function step_in_body_any_other_end_tag(): bool {
+		$token_name = $this->get_token_name();
+
+		/*
+		 * Find the corresponding tag opener in the stack of open elements, if
+		 * it exists before reaching a special element, which provides a kind
+		 * of boundary in the stack. For example, a `</custom-tag>` should not
+		 * close anything beyond its containing `P` or `DIV` element.
+		 */
+		foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
+			if ( 'html' === $node->namespace && $token_name === $node->node_name ) {
+				break;
 			}
 
-			$this->generate_implied_end_tags( $token_name );
-			if ( $node !== $this->state->stack_of_open_elements->current_node() ) {
-				// @todo Record parse error: this error doesn't impact parsing.
+			if ( self::is_special( $node ) ) {
+				// This is a parse error, ignore the token.
+				return $this->step();
 			}
+		}
 
-			foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
-				$this->state->stack_of_open_elements->pop();
-				if ( $node === $item ) {
-					return true;
-				}
+		$this->generate_implied_end_tags( $token_name );
+		if ( $node !== $this->state->stack_of_open_elements->current_node() ) {
+			// @todo Record parse error: this error doesn't impact parsing.
+		}
+
+		foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
+			$this->state->stack_of_open_elements->pop();
+			if ( $node === $item ) {
+				return true;
 			}
 		}
 
-		$this->bail( 'Should not have been able to reach end of IN BODY processing. Check HTML API code.' );
+		$this->bail( 'Should not have been able to reach end of IN BODY "any other end tag" processing. Check HTML API code.' );
 		// This unnecessary return prevents tools from inaccurately reporting type errors.
 		return false;
 	}
@@ -4075,247 +4406,6 @@ private function step_in_cell(): bool {
 		return $this->step_in_body();
 	}
 
-	/**
-	 * Parses next element in the 'in select' insertion mode.
-	 *
-	 * This internal function performs the 'in select' insertion mode
-	 * logic for the generalized WP_HTML_Processor::step() function.
-	 *
-	 * @since 6.7.0
-	 * @ignore
-	 *
-	 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
-	 *
-	 * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inselect
-	 * @see WP_HTML_Processor::step
-	 *
-	 * @return bool Whether an element was found.
-	 */
-	private function step_in_select(): bool {
-		$token_name = $this->get_token_name();
-		$token_type = $this->get_token_type();
-		$op_sigil   = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : '';
-		$op         = "{$op_sigil}{$token_name}";
-
-		switch ( $op ) {
-			/*
-			 * > Any other character token
-			 */
-			case '#text':
-				/*
-				 * > A character token that is U+0000 NULL
-				 *
-				 * If a text node only comprises null bytes then it should be
-				 * entirely ignored and should not return to calling code.
-				 */
-				if ( parent::TEXT_IS_NULL_SEQUENCE === $this->text_node_classification ) {
-					// Parse error: ignore the token.
-					return $this->step();
-				}
-
-				$this->insert_html_element( $this->state->current_token );
-				return true;
-
-			/*
-			 * > A comment token
-			 */
-			case '#comment':
-			case '#funky-comment':
-			case '#presumptuous-tag':
-				$this->insert_html_element( $this->state->current_token );
-				return true;
-
-			/*
-			 * > A DOCTYPE token
-			 */
-			case 'html':
-				// Parse error: ignore the token.
-				return $this->step();
-
-			/*
-			 * > A start tag whose tag name is "html"
-			 */
-			case '+HTML':
-				return $this->step_in_body();
-
-			/*
-			 * > A start tag whose tag name is "option"
-			 */
-			case '+OPTION':
-				if ( $this->state->stack_of_open_elements->current_node_is( 'OPTION' ) ) {
-					$this->state->stack_of_open_elements->pop();
-				}
-				$this->insert_html_element( $this->state->current_token );
-				return true;
-
-			/*
-			 * > A start tag whose tag name is "optgroup"
-			 * > A start tag whose tag name is "hr"
-			 *
-			 * These rules are identical except for the treatment of the self-closing flag and
-			 * the subsequent pop of the HR void element, all of which is handled elsewhere in the processor.
-			 */
-			case '+OPTGROUP':
-			case '+HR':
-				if ( $this->state->stack_of_open_elements->current_node_is( 'OPTION' ) ) {
-					$this->state->stack_of_open_elements->pop();
-				}
-
-				if ( $this->state->stack_of_open_elements->current_node_is( 'OPTGROUP' ) ) {
-					$this->state->stack_of_open_elements->pop();
-				}
-
-				$this->insert_html_element( $this->state->current_token );
-				return true;
-
-			/*
-			 * > An end tag whose tag name is "optgroup"
-			 */
-			case '-OPTGROUP':
-				$current_node = $this->state->stack_of_open_elements->current_node();
-				if ( $current_node && 'OPTION' === $current_node->node_name ) {
-					foreach ( $this->state->stack_of_open_elements->walk_up( $current_node ) as $parent ) {
-						break;
-					}
-					if ( $parent && 'OPTGROUP' === $parent->node_name ) {
-						$this->state->stack_of_open_elements->pop();
-					}
-				}
-
-				if ( $this->state->stack_of_open_elements->current_node_is( 'OPTGROUP' ) ) {
-					$this->state->stack_of_open_elements->pop();
-					return true;
-				}
-
-				// Parse error: ignore the token.
-				return $this->step();
-
-			/*
-			 * > An end tag whose tag name is "option"
-			 */
-			case '-OPTION':
-				if ( $this->state->stack_of_open_elements->current_node_is( 'OPTION' ) ) {
-					$this->state->stack_of_open_elements->pop();
-					return true;
-				}
-
-				// Parse error: ignore the token.
-				return $this->step();
-
-			/*
-			 * > An end tag whose tag name is "select"
-			 * > A start tag whose tag name is "select"
-			 *
-			 * > It just gets treated like an end tag.
-			 */
-			case '-SELECT':
-			case '+SELECT':
-				if ( ! $this->state->stack_of_open_elements->has_element_in_select_scope( 'SELECT' ) ) {
-					// Parse error: ignore the token.
-					return $this->step();
-				}
-				$this->state->stack_of_open_elements->pop_until( 'SELECT' );
-				$this->reset_insertion_mode_appropriately();
-				return true;
-
-			/*
-			 * > A start tag whose tag name is one of: "input", "keygen", "textarea"
-			 *
-			 * All three of these tags are considered a parse error when found in this insertion mode.
-			 */
-			case '+INPUT':
-			case '+KEYGEN':
-			case '+TEXTAREA':
-				if ( ! $this->state->stack_of_open_elements->has_element_in_select_scope( 'SELECT' ) ) {
-					// Ignore the token.
-					return $this->step();
-				}
-				$this->state->stack_of_open_elements->pop_until( 'SELECT' );
-				$this->reset_insertion_mode_appropriately();
-				return $this->step( self::REPROCESS_CURRENT_NODE );
-
-			/*
-			 * > A start tag whose tag name is one of: "script", "template"
-			 * > An end tag whose tag name is "template"
-			 */
-			case '+SCRIPT':
-			case '+TEMPLATE':
-			case '-TEMPLATE':
-				return $this->step_in_head();
-		}
-
-		/*
-		 * > Anything else
-		 * >   Parse error: ignore the token.
-		 */
-		return $this->step();
-	}
-
-	/**
-	 * Parses next element in the 'in select in table' insertion mode.
-	 *
-	 * This internal function performs the 'in select in table' insertion mode
-	 * logic for the generalized WP_HTML_Processor::step() function.
-	 *
-	 * @since 6.7.0
-	 * @ignore
-	 *
-	 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
-	 *
-	 * @see https://html.spec.whatwg.org/#parsing-main-inselectintable
-	 * @see WP_HTML_Processor::step
-	 *
-	 * @return bool Whether an element was found.
-	 */
-	private function step_in_select_in_table(): bool {
-		$token_name = $this->get_token_name();
-		$token_type = $this->get_token_type();
-		$op_sigil   = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : '';
-		$op         = "{$op_sigil}{$token_name}";
-
-		switch ( $op ) {
-			/*
-			 * > A start tag whose tag name is one of: "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"
-			 */
-			case '+CAPTION':
-			case '+TABLE':
-			case '+TBODY':
-			case '+TFOOT':
-			case '+THEAD':
-			case '+TR':
-			case '+TD':
-			case '+TH':
-				// @todo Indicate a parse error once it's possible.
-				$this->state->stack_of_open_elements->pop_until( 'SELECT' );
-				$this->reset_insertion_mode_appropriately();
-				return $this->step( self::REPROCESS_CURRENT_NODE );
-
-			/*
-			 * > An end tag whose tag name is one of: "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"
-			 */
-			case '-CAPTION':
-			case '-TABLE':
-			case '-TBODY':
-			case '-TFOOT':
-			case '-THEAD':
-			case '-TR':
-			case '-TD':
-			case '-TH':
-				// @todo Indicate a parse error once it's possible.
-				if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $token_name ) ) {
-					return $this->step();
-				}
-				$this->state->stack_of_open_elements->pop_until( 'SELECT' );
-				$this->reset_insertion_mode_appropriately();
-				return $this->step( self::REPROCESS_CURRENT_NODE );
-		}
-
-		/*
-		 * > Anything else
-		 */
-		return $this->step_in_select();
-	}
-
 	/**
 	 * Parses next element in the 'in template' insertion mode.
 	 *
@@ -4412,6 +4502,23 @@ private function step_in_template(): bool {
 				return $this->step( self::REPROCESS_CURRENT_NODE );
 		}
 
+		/*
+		 * > An end-of-file token
+		 */
+		if ( null === $token_name ) {
+			if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) {
+				// Stop parsing.
+				return false;
+			}
+
+			// @todo Indicate a parse error once it's possible.
+			$this->state->stack_of_open_elements->pop_until( 'TEMPLATE' );
+			$this->state->active_formatting_elements->clear_up_to_last_marker();
+			array_pop( $this->state->stack_of_template_insertion_modes );
+			$this->reset_insertion_mode_appropriately();
+			return $this->step( self::REPROCESS_CURRENT_NODE );
+		}
+
 		/*
 		 * > Any other start tag
 		 */
@@ -4430,20 +4537,7 @@ private function step_in_template(): bool {
 			return $this->step();
 		}
 
-		/*
-		 * > An end-of-file token
-		 */
-		if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) {
-			// Stop parsing.
-			return false;
-		}
-
-		// @todo Indicate a parse error once it's possible.
-		$this->state->stack_of_open_elements->pop_until( 'TEMPLATE' );
-		$this->state->active_formatting_elements->clear_up_to_last_marker();
-		array_pop( $this->state->stack_of_template_insertion_modes );
-		$this->reset_insertion_mode_appropriately();
-		return $this->step( self::REPROCESS_CURRENT_NODE );
+		return false;
 	}
 
 	/**
@@ -5160,12 +5254,6 @@ private function step_in_foreign_content(): bool {
 				case WP_HTML_Processor_State::INSERTION_MODE_IN_CELL:
 					return $this->step_in_cell();
 
-				case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT:
-					return $this->step_in_select();
-
-				case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE:
-					return $this->step_in_select_in_table();
-
 				case WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE:
 					return $this->step_in_template();
 
@@ -5199,6 +5287,20 @@ private function step_in_foreign_content(): bool {
 	 * Internal helpers
 	 */
 
+	/**
+	 * Indicates if the Tag Processor has consumed all input.
+	 *
+	 * @since 7.1.0
+	 *
+	 * @return bool Whether the current token is the end-of-file token.
+	 */
+	private function is_eof_token(): bool {
+		return (
+			WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
+			WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state
+		);
+	}
+
 	/**
 	 * Creates a new bookmark for the currently-matched token and returns the generated name.
 	 *
@@ -5254,6 +5356,8 @@ public function get_namespace(): string {
 	 *     $processor->get_tag() === null;
 	 *
 	 * @since 6.4.0
+	 * @since 7.1.0 NULL bytes in source tag names are returned as U+FFFD,
+	 *              matching the tokenizer replacement browsers apply.
 	 *
 	 * @return string|null Name of currently matched tag in input HTML, or `null` if none found.
 	 */
@@ -5315,6 +5419,8 @@ public function has_self_closing_flag(): bool {
 	 * of the document without matching a token.
 	 *
 	 * @since 6.6.0 Subclassed for the HTML Processor.
+	 * @since 7.1.0 NULL bytes in source tag names are returned as U+FFFD,
+	 *              matching the tokenizer replacement browsers apply.
 	 *
 	 * @return string|null Name of the matched token.
 	 */
@@ -5675,6 +5781,8 @@ public function seek( $bookmark_name ): bool {
 			$this->state->current_token                     = null;
 			$this->current_element                          = null;
 			$this->element_queue                            = array();
+			$this->non_lifo_breadcrumb_removals             = array();
+			$this->has_processed_eof                        = false;
 
 			/*
 			 * The absence of a context node indicates a full parse.
@@ -5704,11 +5812,7 @@ public function seek( $bookmark_name ): bool {
 					)
 				);
 
-				$this->change_parsing_namespace(
-					$this->context_node->integration_node_type
-						? 'html'
-						: $this->context_node->namespace
-				);
+				$this->change_parsing_namespace_for_node( $this->context_node );
 
 				if ( 'TEMPLATE' === $this->context_node->node_name ) {
 					$this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE;
@@ -6056,46 +6160,7 @@ private function reset_insertion_mode_appropriately(): void {
 
 			switch ( $node->node_name ) {
 				/*
-				 * > 4. If node is a `select` element, run these substeps:
-				 * >   1. If _last_ is true, jump to the step below labeled done.
-				 * >   2. Let _ancestor_ be _node_.
-				 * >   3. _Loop_: If _ancestor_ is the first node in the stack of open elements,
-				 * >      jump to the step below labeled done.
-				 * >   4. Let ancestor be the node before ancestor in the stack of open elements.
-				 * >   …
-				 * >   7. Jump back to the step labeled _loop_.
-				 * >   8. _Done_: Switch the insertion mode to "in select" and return.
-				 */
-				case 'SELECT':
-					if ( ! $last ) {
-						foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $ancestor ) {
-							if ( 'html' !== $ancestor->namespace ) {
-								continue;
-							}
-
-							switch ( $ancestor->node_name ) {
-								/*
-								 * > 5. If _ancestor_ is a `template` node, jump to the step below
-								 * >    labeled _done_.
-								 */
-								case 'TEMPLATE':
-									break 2;
-
-								/*
-								 * > 6. If _ancestor_ is a `table` node, switch the insertion mode to
-								 * >    "in select in table" and return.
-								 */
-								case 'TABLE':
-									$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE;
-									return;
-							}
-						}
-					}
-					$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT;
-					return;
-
-				/*
-				 * > 5. If _node_ is a `td` or `th` element and _last_ is false, then switch the
+				 * > 4. If _node_ is a `td` or `th` element and _last_ is false, then switch the
 				 * >    insertion mode to "in cell" and return.
 				 */
 				case 'TD':
@@ -6106,16 +6171,16 @@ private function reset_insertion_mode_appropriately(): void {
 					}
 					break;
 
-					/*
-					* > 6. If _node_ is a `tr` element, then switch the insertion mode to "in row"
-					* >    and return.
-					*/
+				/*
+				* > 5. If _node_ is a `tr` element, then switch the insertion mode to "in row"
+				* >    and return.
+				*/
 				case 'TR':
 					$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW;
 					return;
 
 				/*
-				 * > 7. If _node_ is a `tbody`, `thead`, or `tfoot` element, then switch the
+				 * > 6. If _node_ is a `tbody`, `thead`, or `tfoot` element, then switch the
 				 * >    insertion mode to "in table body" and return.
 				 */
 				case 'TBODY':
@@ -6125,7 +6190,7 @@ private function reset_insertion_mode_appropriately(): void {
 					return;
 
 				/*
-				 * > 8. If _node_ is a `caption` element, then switch the insertion mode to
+				 * > 7. If _node_ is a `caption` element, then switch the insertion mode to
 				 * >    "in caption" and return.
 				 */
 				case 'CAPTION':
@@ -6133,7 +6198,7 @@ private function reset_insertion_mode_appropriately(): void {
 					return;
 
 				/*
-				 * > 9. If _node_ is a `colgroup` element, then switch the insertion mode to
+				 * > 8. If _node_ is a `colgroup` element, then switch the insertion mode to
 				 * >    "in column group" and return.
 				 */
 				case 'COLGROUP':
@@ -6141,15 +6206,15 @@ private function reset_insertion_mode_appropriately(): void {
 					return;
 
 				/*
-				 * > 10. If _node_ is a `table` element, then switch the insertion mode to
-				 * >     "in table" and return.
+				 * > 9. If _node_ is a `table` element, then switch the insertion mode to
+				 * >    "in table" and return.
 				 */
 				case 'TABLE':
 					$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE;
 					return;
 
 				/*
-				 * > 11. If _node_ is a `template` element, then switch the insertion mode to the
+				 * > 10. If _node_ is a `template` element, then switch the insertion mode to the
 				 * >     current template insertion mode and return.
 				 */
 				case 'TEMPLATE':
@@ -6157,7 +6222,7 @@ private function reset_insertion_mode_appropriately(): void {
 					return;
 
 				/*
-				 * > 12. If _node_ is a `head` element and _last_ is false, then switch the
+				 * > 11. If _node_ is a `head` element and _last_ is false, then switch the
 				 * >     insertion mode to "in head" and return.
 				 */
 				case 'HEAD':
@@ -6168,7 +6233,7 @@ private function reset_insertion_mode_appropriately(): void {
 					break;
 
 				/*
-				 * > 13. If _node_ is a `body` element, then switch the insertion mode to "in body"
+				 * > 12. If _node_ is a `body` element, then switch the insertion mode to "in body"
 				 * >     and return.
 				 */
 				case 'BODY':
@@ -6176,7 +6241,7 @@ private function reset_insertion_mode_appropriately(): void {
 					return;
 
 				/*
-				 * > 14. If _node_ is a `frameset` element, then switch the insertion mode to
+				 * > 13. If _node_ is a `frameset` element, then switch the insertion mode to
 				 * >     "in frameset" and return. (fragment case)
 				 */
 				case 'FRAMESET':
@@ -6184,7 +6249,7 @@ private function reset_insertion_mode_appropriately(): void {
 					return;
 
 				/*
-				 * > 15. If _node_ is an `html` element, run these substeps:
+				 * > 14. If _node_ is an `html` element, run these substeps:
 				 * >     1. If the head element pointer is null, switch the insertion mode to
 				 * >        "before head" and return. (fragment case)
 				 * >     2. Otherwise, the head element pointer is not null, switch the insertion
@@ -6199,7 +6264,7 @@ private function reset_insertion_mode_appropriately(): void {
 		}
 
 		/*
-		 * > 16. If _last_ is true, then switch the insertion mode to "in body"
+		 * > 15. If _last_ is true, then switch the insertion mode to "in body"
 		 * >     and return. (fragment case)
 		 *
 		 * This is only reachable if `$last` is true, as per the fragment parsing case.
@@ -6216,8 +6281,10 @@ private function reset_insertion_mode_appropriately(): void {
 	 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
 	 *
 	 * @see https://html.spec.whatwg.org/#adoption-agency-algorithm
+	 *
+	 * @return bool Whether an element was found.
 	 */
-	private function run_adoption_agency_algorithm(): void {
+	private function run_adoption_agency_algorithm(): bool {
 		$budget       = 1000;
 		$subject      = $this->get_tag();
 		$current_node = $this->state->stack_of_open_elements->current_node();
@@ -6229,13 +6296,13 @@ private function run_adoption_agency_algorithm(): void {
 			! $this->state->active_formatting_elements->contains_node( $current_node )
 		) {
 			$this->state->stack_of_open_elements->pop();
-			return;
+			return true;
 		}
 
 		$outer_loop_counter = 0;
 		while ( $budget-- > 0 ) {
 			if ( $outer_loop_counter++ >= 8 ) {
-				return;
+				return true;
 			}
 
 			/*
@@ -6258,18 +6325,18 @@ private function run_adoption_agency_algorithm(): void {
 
 			// > If there is no such element, then return and instead act as described in the "any other end tag" entry above.
 			if ( null === $formatting_element ) {
-				$this->bail( 'Cannot run adoption agency when "any other end tag" is required.' );
+				return $this->step_in_body_any_other_end_tag();
 			}
 
 			// > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return.
 			if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element ) ) {
 				$this->state->active_formatting_elements->remove_node( $formatting_element );
-				return;
+				return true;
 			}
 
 			// > If formatting element is in the stack of open elements, but the element is not in scope, then this is a parse error; return.
 			if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $formatting_element->node_name ) ) {
-				return;
+				return true;
 			}
 
 			/*
@@ -6305,7 +6372,7 @@ private function run_adoption_agency_algorithm(): void {
 
 					if ( $formatting_element->bookmark_name === $item->bookmark_name ) {
 						$this->state->active_formatting_elements->remove_node( $formatting_element );
-						return;
+						return true;
 					}
 				}
 			}
@@ -6314,6 +6381,8 @@ private function run_adoption_agency_algorithm(): void {
 		}
 
 		$this->bail( 'Cannot run adoption agency when looping required.' );
+		// This unnecessary return prevents tools from inaccurately reporting type errors.
+		return false;
 	}
 
 	/**
@@ -6413,7 +6482,22 @@ private function insert_foreign_element( WP_HTML_Token $token, bool $only_add_to
 	 */
 	private function insert_virtual_node( $token_name, $bookmark_name = null ): WP_HTML_Token {
 		$here = $this->bookmarks[ $this->state->current_token->bookmark_name ];
-		$name = $bookmark_name ?? $this->bookmark_token();
+		if (
+			null === $bookmark_name &&
+			(
+				WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
+				WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state
+			)
+		) {
+			if ( count( $this->bookmarks ) >= static::MAX_BOOKMARKS ) {
+				$this->last_error = self::ERROR_EXCEEDED_MAX_BOOKMARKS;
+				throw new Exception( 'could not allocate bookmark' );
+			}
+
+			$name = (string) ++$this->bookmark_counter;
+		} else {
+			$name = $bookmark_name ?? $this->bookmark_token();
+		}
 
 		$this->bookmarks[ $name ] = new WP_HTML_Span( $here->start, 0 );
 
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 77c1a471db5b1..17fb0f5700e46 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -544,9 +544,8 @@ class WP_HTML_Tag_Processor {
 	 * One of 'html', 'svg', or 'math'.
 	 *
 	 * Several parsing rules change based on whether the parser
-	 * is inside foreign content, including whether CDATA sections
-	 * are allowed and whether a self-closing flag indicates that
-	 * an element has no content.
+	 * is inside foreign content, including whether a self-closing
+	 * flag indicates that an element has no content.
 	 *
 	 * @since 6.7.0
 	 *
@@ -554,6 +553,19 @@ class WP_HTML_Tag_Processor {
 	 */
 	private $parsing_namespace = 'html';
 
+	/**
+	 * Indicates the current node's namespace for CDATA section detection.
+	 *
+	 * HTML integration points follow HTML tokenization for start tags and
+	 * character tokens, but CDATA sections are allowed based on the adjusted
+	 * current node's actual namespace.
+	 *
+	 * @since 7.1.0
+	 *
+	 * @var string
+	 */
+	private $cdata_parsing_namespace = 'html';
+
 	/**
 	 * What kind of syntax token became an HTML comment.
 	 *
@@ -860,7 +872,26 @@ public function change_parsing_namespace( string $new_namespace ): bool {
 			return false;
 		}
 
-		$this->parsing_namespace = $new_namespace;
+		$this->parsing_namespace       = $new_namespace;
+		$this->cdata_parsing_namespace = $new_namespace;
+		return true;
+	}
+
+	/**
+	 * Switches the namespace context used for detecting CDATA sections.
+	 *
+	 * @since 7.1.0
+	 *
+	 * @param string $new_namespace One of 'html', 'svg', or 'math' indicating whether
+	 *                              the adjusted current node can contain CDATA sections.
+	 * @return bool Whether the namespace was valid and changed.
+	 */
+	protected function change_cdata_parsing_namespace( string $new_namespace ): bool {
+		if ( ! in_array( $new_namespace, array( 'html', 'math', 'svg' ), true ) ) {
+			return false;
+		}
+
+		$this->cdata_parsing_namespace = $new_namespace;
 		return true;
 	}
 
@@ -1174,7 +1205,13 @@ public function paused_at_incomplete_token(): bool {
 	 *     }
 	 *     // Outputs: "free <egg> lang-en "
 	 *
+	 * Class names from the input document already carry the tokenizer's
+	 * U+FFFD replacement of NULL bytes through `get_attribute()`; values
+	 * supplied through the API are returned verbatim, as `Element.classList`
+	 * does in the DOM.
+	 *
 	 * @since 6.4.0
+	 * @since 7.1.0 No longer replaces NULL bytes in API-supplied class values.
 	 *
 	 * @return Generator<int, non-empty-string>
 	 */
@@ -1208,7 +1245,7 @@ public function class_list() {
 				return;
 			}
 
-			$name = str_replace( "\x00", "\u{FFFD}", substr( $class, $at, $length ) );
+			$name = substr( $class, $at, $length );
 			if ( $is_quirks ) {
 				$name = strtolower( $name );
 			}
@@ -1917,7 +1954,7 @@ private function parse_next_tag(): bool {
 				}
 
 				if (
-					'html' !== $this->parsing_namespace &&
+					'html' !== $this->cdata_parsing_namespace &&
 					strlen( $html ) > $at + 8 &&
 					'[' === $html[ $at + 2 ] &&
 					'C' === $html[ $at + 3 ] &&
@@ -2231,9 +2268,16 @@ private function parse_next_attribute(): bool {
 		 * > case-insensitive match for each other.
 		 *     - HTML 5 spec
 		 *
+		 * The tokenizer would have replaced U+0000 NULL bytes in attribute
+		 * names with U+FFFD, so names which differ only by those bytes are
+		 * duplicates. The replacement applies to the comparable name — a
+		 * comparison artifact — while the raw span in the document remains
+		 * untouched.
+		 *
 		 * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive
+		 * @see https://html.spec.whatwg.org/#attribute-name-state
 		 */
-		$comparable_name = strtolower( $attribute_name );
+		$comparable_name = strtolower( str_replace( "\x00", "\u{FFFD}", $attribute_name ) );
 
 		// If an attribute is listed many times, only use the first declaration and ignore the rest.
 		if ( ! isset( $this->attributes[ $comparable_name ] ) ) {
@@ -2359,13 +2403,7 @@ private function class_name_updates_to_attributes_updates(): void {
 		}
 
 		if ( false === $existing_class && isset( $this->attributes['class'] ) ) {
-			$existing_class = WP_HTML_Decoder::decode_attribute(
-				substr(
-					$this->html,
-					$this->attributes['class']->value_starts_at,
-					$this->attributes['class']->value_length
-				)
-			);
+			$existing_class = $this->get_decoded_source_attribute_value( $this->attributes['class'] );
 		}
 
 		if ( false === $existing_class ) {
@@ -2771,6 +2809,11 @@ private function get_enqueued_attribute_value( string $comparable_name ) {
 	 *     $p->get_attribute( 'class' ) === null;
 	 *
 	 * @since 6.2.0
+	 * @since 7.1.0 Applies input-stream preprocessing: newlines in the source value
+	 *              are normalized and NULL bytes are replaced with U+FFFD, as
+	 *              browsers do before decoding character references. Attributes
+	 *              whose source name contains a NULL byte are addressed by the
+	 *              name with U+FFFD in its place, as in the DOM.
 	 *
 	 * @param string $name Name of attribute whose value is requested.
 	 * @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`.
@@ -2793,7 +2836,7 @@ public function get_attribute( $name ) {
 		 * attribute values. If any exist, those enqueued class changes must first be flushed out
 		 * into an attribute value update.
 		 */
-		if ( 'class' === $name ) {
+		if ( 'class' === $comparable ) {
 			$this->class_name_updates_to_attributes_updates();
 		}
 
@@ -2824,9 +2867,58 @@ public function get_attribute( $name ) {
 			return true;
 		}
 
+		return $this->get_decoded_source_attribute_value( $attribute );
+	}
+
+	/**
+	 * Returns the decoded value of an attribute found in the input document.
+	 *
+	 * The Tag Processor defers the HTML input-stream preprocessing and the
+	 * tokenizer's replacements while scanning; they must be applied when
+	 * reading a value out of the document: newlines are normalized before
+	 * character references decode, and U+0000 NULL bytes are replaced
+	 * with U+FFFD. The replacements operate on bytes; NULL bytes inside
+	 * invalid UTF-8 sequences are replaced individually where a browser,
+	 * decoding the byte stream into characters first, may differ.
+	 *
+	 * @see https://html.spec.whatwg.org/#preprocessing-the-input-stream
+	 * @see https://html.spec.whatwg.org/#attribute-value-(double-quoted)-state
+	 *
+	 * @since 7.1.0
+	 *
+	 * @param WP_HTML_Attribute_Token $attribute Attribute token from the input document.
+	 * @return string Decoded attribute value.
+	 */
+	private function get_decoded_source_attribute_value( WP_HTML_Attribute_Token $attribute ): string {
 		$raw_value = substr( $this->html, $attribute->value_starts_at, $attribute->value_length );
 
-		return WP_HTML_Decoder::decode_attribute( $raw_value );
+		/*
+		 * Newline normalization is part of preprocessing the input stream
+		 * and precedes character reference decoding: `&#13;` decodes into
+		 * a carriage return which must be preserved. The check avoids
+		 * scanning the value again when it contains no carriage return;
+		 * most values contain none.
+		 */
+		if ( false !== strpos( $raw_value, "\r" ) ) {
+			$raw_value = str_replace( "\r\n", "\n", $raw_value );
+			$raw_value = str_replace( "\r", "\n", $raw_value );
+		}
+
+		$decoded_value = WP_HTML_Decoder::decode_attribute( $raw_value );
+
+		/*
+		 * The tokenizer replaces U+0000 NULL bytes as it consumes input:
+		 * character references see the raw NULL byte — an unambiguous
+		 * follower for references without a terminating semicolon — and
+		 * no character reference decodes into NULL, so the replacement
+		 * applies equivalently after decoding, where it cannot disturb
+		 * how references parse.
+		 */
+		if ( false !== strpos( $decoded_value, "\x00" ) ) {
+			$decoded_value = str_replace( "\x00", "\u{FFFD}", $decoded_value );
+		}
+
+		return $decoded_value;
 	}
 
 	/**
@@ -2849,6 +2941,10 @@ public function get_attribute( $name ) {
 	 *     $p->get_attribute_names_with_prefix( 'data-' ) === null;
 	 *
 	 * @since 6.2.0
+	 * @since 7.1.0 NULL bytes in source attribute names are returned as U+FFFD,
+	 *              matching the tokenizer replacement browsers apply. The prefix
+	 *              is matched verbatim against these replaced names; a prefix
+	 *              containing a NULL byte matches nothing.
 	 *
 	 * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive
 	 *
@@ -2898,6 +2994,8 @@ public function get_namespace(): string {
 	 *     $p->get_tag() === null;
 	 *
 	 * @since 6.2.0
+	 * @since 7.1.0 NULL bytes in the source tag name are returned as U+FFFD,
+	 *              matching the tokenizer replacement browsers apply.
 	 *
 	 * @return string|null Name of currently matched tag in input HTML, or `null` if none found.
 	 */
@@ -2906,7 +3004,15 @@ public function get_tag(): ?string {
 			return null;
 		}
 
-		$tag_name = substr( $this->html, $this->tag_name_starts_at, $this->tag_name_length );
+		/*
+		 * The tokenizer would have replaced U+0000 NULL bytes in the tag
+		 * name with U+FFFD; this is deferred to this read boundary. The
+		 * replacement never applies to internal identification, which
+		 * compares raw bytes (`scr\x00ipt` is not SCRIPT in browsers either).
+		 *
+		 * @see https://html.spec.whatwg.org/#tag-name-state
+		 */
+		$tag_name = str_replace( "\x00", "\u{FFFD}", substr( $this->html, $this->tag_name_starts_at, $this->tag_name_length ) );
 
 		if ( self::STATE_MATCHED_TAG === $this->parser_state ) {
 			return strtoupper( $tag_name );
@@ -2927,6 +3033,8 @@ public function get_tag(): ?string {
 	 * account the current parsing context, whether HTML, SVG, or MathML.
 	 *
 	 * @since 6.7.0
+	 * @since 7.1.0 NULL bytes in source tag names are returned as U+FFFD,
+	 *              matching the tokenizer replacement browsers apply.
 	 *
 	 * @return string|null Name of current tag name.
 	 */
@@ -3071,6 +3179,12 @@ public function get_qualified_tag_name(): ?string {
 	 * Returns the adjusted attribute name for a given attribute, taking into
 	 * account the current parsing context, whether HTML, SVG, or MathML.
 	 *
+	 * In SVG and MathML contexts, adjusted foreign attributes with a namespace
+	 * prefix use a space between the prefix and local name. For example,
+	 * `xlink:href` is returned as `xlink href`, while the unprefixed `xmlns`
+	 * attribute is returned as `xmlns`. Non-adjusted attributes with a colon in
+	 * their name, such as `foo:bar`, are returned unchanged.
+	 *
 	 * @since 6.7.0
 	 *
 	 * @param string $attribute_name Which attribute to adjust.
@@ -3337,7 +3451,34 @@ public function has_self_closing_flag(): bool {
 		 *     <figure />
 		 *             ^ this appears one character before the end of the closing ">".
 		 */
-		return '/' === $this->html[ $this->token_starts_at + $this->token_length - 2 ];
+		$self_closing_flag_at = $this->token_starts_at + $this->token_length - 2;
+		if ( '/' !== $this->html[ $self_closing_flag_at ] ) {
+			return false;
+		}
+
+		foreach ( $this->attributes as $attribute ) {
+			$attribute_ends_at = $attribute->start + $attribute->length;
+			if (
+				$self_closing_flag_at >= $attribute->start &&
+				$self_closing_flag_at < $attribute_ends_at
+			) {
+				return false;
+			}
+		}
+
+		foreach ( $this->duplicate_attributes ?? array() as $duplicate_attributes ) {
+			foreach ( $duplicate_attributes as $attribute ) {
+				$attribute_ends_at = $attribute->start + $attribute->length;
+				if (
+					$self_closing_flag_at >= $attribute->start &&
+					$self_closing_flag_at < $attribute_ends_at
+				) {
+					return false;
+				}
+			}
+		}
+
+		return true;
 	}
 
 	/**
@@ -3425,6 +3566,8 @@ public function get_token_type(): ?string {
 	 * of the document without matching a token.
 	 *
 	 * @since 6.5.0
+	 * @since 7.1.0 NULL bytes in source tag names are returned as U+FFFD,
+	 *              matching the tokenizer replacement browsers apply.
 	 *
 	 * @return string|null Name of the matched token.
 	 */
@@ -3782,6 +3925,12 @@ public function get_modifiable_text(): string {
 	 *     // Renders as “Eggs &amp; Milk” in a browser, encoded as `<p>Eggs &amp;amp; Milk</p>`.
 	 *     $processor->set_modifiable_text( 'Eggs &amp; Milk' );
 	 *
+	 * Note: unlike attribute values set through `set_attribute()`, which read
+	 * back verbatim, text set through this method currently reads back through
+	 * `get_modifiable_text()` with newlines normalized and NULL bytes handled
+	 * as if the text had come from the input document. In the DOM, API-supplied
+	 * text round-trips verbatim; this asymmetry is a known limitation.
+	 *
 	 * @since 6.7.0
 	 * @since 6.9.0 Escapes all character references instead of trying to avoid double-escaping.
 	 *
@@ -4770,14 +4919,37 @@ private function matches(): bool {
 		}
 
 		// Does the tag name match the requested tag name in a case-insensitive manner?
-		if (
-			isset( $this->sought_tag_name ) &&
-			(
-				strlen( $this->sought_tag_name ) !== $this->tag_name_length ||
-				0 !== substr_compare( $this->html, $this->sought_tag_name, $this->tag_name_starts_at, $this->tag_name_length, true )
-			)
-		) {
-			return false;
+		if ( isset( $this->sought_tag_name ) ) {
+			$tag_name_matches = (
+				strlen( $this->sought_tag_name ) === $this->tag_name_length &&
+				0 === substr_compare( $this->html, $this->sought_tag_name, $this->tag_name_starts_at, $this->tag_name_length, true )
+			);
+
+			/*
+			 * Names are matched in the same alphabet `get_tag()` exposes,
+			 * where U+0000 NULL bytes appear as U+FFFD: a sought name
+			 * containing U+FFFD matches source names with NULL bytes in
+			 * its place, and a sought name containing a NULL byte matches
+			 * nothing, since no exposed name contains one. The byte
+			 * comparison above already agrees for names without NULL
+			 * bytes, so this only resolves the rare disagreements.
+			 */
+			if ( $tag_name_matches ) {
+				$tag_name_matches = false === strpos( $this->sought_tag_name, "\x00" );
+			} elseif ( false !== strpos( $this->sought_tag_name, "\u{FFFD}" ) ) {
+				$raw_name = substr( $this->html, $this->tag_name_starts_at, $this->tag_name_length );
+				if ( false !== strpos( $raw_name, "\x00" ) ) {
+					$exposed_name     = str_replace( "\x00", "\u{FFFD}", $raw_name );
+					$tag_name_matches = (
+						strlen( $this->sought_tag_name ) === strlen( $exposed_name ) &&
+						0 === substr_compare( $exposed_name, $this->sought_tag_name, 0, strlen( $exposed_name ), true )
+					);
+				}
+			}
+
+			if ( ! $tag_name_matches ) {
+				return false;
+			}
 		}
 
 		if ( null !== $this->sought_class_name && ! $this->has_class( $this->sought_class_name ) ) {
diff --git a/tests/phpunit/data/html5lib-tests/README.md b/tests/phpunit/data/html5lib-tests/README.md
index be775c8b497b5..18dd099f75bab 100644
--- a/tests/phpunit/data/html5lib-tests/README.md
+++ b/tests/phpunit/data/html5lib-tests/README.md
@@ -13,7 +13,7 @@ The necessary files have been copied to this directory:
 - `tree-construction/*.dat`
 
 The version of these files was taken from the git commit with
-SHA [`a9f44960a9fedf265093d22b2aa3c7ca123727b9`](https://github.com/html5lib/html5lib-tests/commit/a9f44960a9fedf265093d22b2aa3c7ca123727b9).
+SHA [`9fb614afaa42ce8787840f057b32084308e76549`](https://github.com/html5lib/html5lib-tests/commit/9fb614afaa42ce8787840f057b32084308e76549).
 
 ## Updating
 
diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/menuitem-element.dat b/tests/phpunit/data/html5lib-tests/tree-construction/menuitem-element.dat
index fb13c3c33b0ab..f7c8e2c3ca74e 100644
--- a/tests/phpunit/data/html5lib-tests/tree-construction/menuitem-element.dat
+++ b/tests/phpunit/data/html5lib-tests/tree-construction/menuitem-element.dat
@@ -161,13 +161,14 @@
 #data
 <!DOCTYPE html><select><menuitem></select>
 #errors
-33: Stray start tag “menuitem”.
+1:34: ERROR: End tag 'select' isn't allowed here. Currently open tags: html, body, select, menuitem.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <select>
+|       <menuitem>
 
 #data
 <!DOCTYPE html><option><menuitem>
diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/tables01.dat b/tests/phpunit/data/html5lib-tests/tree-construction/tables01.dat
index aa7915ebba7ea..4079888161887 100644
--- a/tests/phpunit/data/html5lib-tests/tree-construction/tables01.dat
+++ b/tests/phpunit/data/html5lib-tests/tree-construction/tables01.dat
@@ -100,8 +100,11 @@
 #data
 <table><select><option>3</select></table>
 #errors
-(1,7): expected-doctype-but-got-start-tag
-(1,15): unexpected-start-tag-implies-table-voodoo
+1:1: ERROR: Expected a doctype token
+1:8: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, table.
+1:16: ERROR: Start tag 'option' isn't allowed here. Currently open tags: html, body, table, select.
+1:24: ERROR: Character tokens aren't legal here
+1:25: ERROR: End tag 'select' isn't allowed here. Currently open tags: html, body, table, select, option.
 #document
 | <html>
 |   <head>
@@ -114,12 +117,11 @@
 #data
 <table><select><table></table></select></table>
 #errors
-(1,7): expected-doctype-but-got-start-tag
-(1,15): unexpected-start-tag-implies-table-voodoo
-(1,22): unexpected-table-element-start-tag-in-select-in-table
-(1,22): unexpected-start-tag-implies-end-tag
-(1,39): unexpected-end-tag
-(1,47): unexpected-end-tag
+1:1: ERROR: Expected a doctype token
+1:8: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, table.
+1:16: ERROR: Start tag 'table' isn't allowed here. Currently open tags: html, body, table, select.
+1:31: ERROR: End tag 'select' isn't allowed here. Currently open tags: html, body.
+1:40: ERROR: End tag 'table' isn't allowed here. Currently open tags: html, body.
 #document
 | <html>
 |   <head>
@@ -131,9 +133,8 @@
 #data
 <table><select></table>
 #errors
-(1,7): expected-doctype-but-got-start-tag
-(1,15): unexpected-start-tag-implies-table-voodoo
-(1,23): unexpected-table-element-end-tag-in-select-in-table
+1:1: ERROR: Expected a doctype token
+1:8: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, table.
 #document
 | <html>
 |   <head>
@@ -144,9 +145,10 @@
 #data
 <table><select><option>A<tr><td>B</td></tr></table>
 #errors
-(1,7): expected-doctype-but-got-start-tag
-(1,15): unexpected-start-tag-implies-table-voodoo
-(1,28): unexpected-table-element-start-tag-in-select-in-table
+1:1: ERROR: Expected a doctype token
+1:8: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, table.
+1:16: ERROR: Start tag 'option' isn't allowed here. Currently open tags: html, body, table, select.
+1:24: ERROR: Character tokens aren't legal here
 #document
 | <html>
 |   <head>
@@ -292,7 +294,6 @@
 1:13: 'svg' tag isn't allowed here. Currently open tags: html, body, div, table.
 1:33: 'select' tag isn't allowed here. Currently open tags: html, body, div, table, svg, foreignobject.
 1:41: 'table' tag isn't allowed here. Currently open tags: html, body, div, table, svg, foreignobject, select.
-1:41: 'table' tag isn't allowed here. Currently open tags: html, body, div, table, svg, foreignobject.
 1:48: 's' tag isn't allowed here. Currently open tags: html, body, div, table.
 1:51: Premature end of file. Currently open tags: html, body, div, table, s.
 #document
diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/tests1.dat b/tests/phpunit/data/html5lib-tests/tree-construction/tests1.dat
index e80e64016d919..15d496eb713de 100644
--- a/tests/phpunit/data/html5lib-tests/tree-construction/tests1.dat
+++ b/tests/phpunit/data/html5lib-tests/tree-construction/tests1.dat
@@ -355,19 +355,20 @@ Line1<br>Line2<br>Line3<br>Line4
 #data
 <select><b><option><select><option></b></select>X
 #errors
-(1,8): expected-doctype-but-got-start-tag
-(1,11): unexpected-start-tag-in-select
-(1,27): unexpected-select-in-select
-(1,39): unexpected-end-tag
-(1,48): unexpected-end-tag
+1:1: ERROR: Expected a doctype token
+1:20: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, select, b, option.
+1:36: ERROR: End tag 'b' isn't allowed here. Currently open tags: html, body, b, select, option.
+1:50: ERROR: Premature end of file. Currently open tags: html, body, b.
 #document
 | <html>
 |   <head>
 |   <body>
 |     <select>
+|       <b>
+|         <option>
+|     <b>
 |       <option>
-|     <option>
-|       "X"
+|     "X"
 
 #data
 <a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y
@@ -1532,18 +1533,19 @@ Line1<br>Line2<br>Line3<br>Line4
 #data
 <select><b><option><select><option></b></select>
 #errors
-(1,8): expected-doctype-but-got-start-tag
-(1,11): unexpected-start-tag-in-select
-(1,27): unexpected-select-in-select
-(1,39): unexpected-end-tag
-(1,48): unexpected-end-tag
+1:1: ERROR: Expected a doctype token
+1:20: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, select, b, option.
+1:36: ERROR: End tag 'b' isn't allowed here. Currently open tags: html, body, b, select, option.
+1:49: ERROR: Premature end of file. Currently open tags: html, body, b.
 #document
 | <html>
 |   <head>
 |   <body>
 |     <select>
+|       <b>
+|         <option>
+|     <b>
 |       <option>
-|     <option>
 
 #data
 <html><head><title></title><body></body></html>
diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/tests10.dat b/tests/phpunit/data/html5lib-tests/tree-construction/tests10.dat
index f84e2d546fab6..3b311e4e03cd8 100644
--- a/tests/phpunit/data/html5lib-tests/tree-construction/tests10.dat
+++ b/tests/phpunit/data/html5lib-tests/tree-construction/tests10.dat
@@ -35,20 +35,17 @@
 #data
 <!DOCTYPE html><body><select><svg></svg></select>
 #errors
-(1,34) unexpected-start-tag-in-select
-(1,40) unexpected-end-tag-in-select
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <select>
+|       <svg svg>
 
 #data
 <!DOCTYPE html><body><select><option><svg></svg></option></select>
 #errors
-(1,42) unexpected-start-tag-in-select
-(1,48) unexpected-end-tag-in-select
 #document
 | <!DOCTYPE html>
 | <html>
@@ -56,6 +53,7 @@
 |   <body>
 |     <select>
 |       <option>
+|         <svg svg>
 
 #data
 <!DOCTYPE html><body><table><svg></svg></table>
@@ -261,13 +259,8 @@
 #data
 <!DOCTYPE html><body><table><tr><td><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
 #errors
-(1,49) unexpected-start-tag-in-select
-(1,52) unexpected-start-tag-in-select
-(1,59) unexpected-end-tag-in-select
-(1,62) unexpected-start-tag-in-select
-(1,69) unexpected-end-tag-in-select
-(1,72) unexpected-start-tag-in-select
-(1,83) unexpected-table-element-end-tag-in-select-in-table
+1:70: ERROR: Start tag 'p' isn't allowed here. Currently open tags: html, body, table, tbody, tr, td, select, svg.
+1:76: ERROR: End tag 'table' isn't allowed here. Currently open tags: html, body, table, tbody, tr, td, select.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -278,28 +271,39 @@
 |         <tr>
 |           <td>
 |             <select>
-|               "foobarbaz"
+|               <svg svg>
+|                 <svg g>
+|                   "foo"
+|                 <svg g>
+|                   "bar"
+|               <p>
+|                 "baz"
 |     <p>
 |       "quux"
 
 #data
 <!DOCTYPE html><body><table><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
 #errors
-(1,36) unexpected-start-tag-implies-table-voodoo
-(1,41) unexpected-start-tag-in-select
-(1,44) unexpected-start-tag-in-select
-(1,51) unexpected-end-tag-in-select
-(1,54) unexpected-start-tag-in-select
-(1,61) unexpected-end-tag-in-select
-(1,64) unexpected-start-tag-in-select
-(1,75) unexpected-table-element-end-tag-in-select-in-table
+1:29: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, table.
+1:37: ERROR: Start tag 'svg' isn't allowed here. Currently open tags: html, body, table, select.
+1:62: ERROR: Start tag 'p' isn't allowed here. Currently open tags: html, body, table, select, svg.
+1:62: ERROR: Start tag 'p' isn't allowed here. Currently open tags: html, body, table, select.
+1:65: ERROR: Character tokens aren't legal here
+1:66: ERROR: Character tokens aren't legal here
+1:67: ERROR: Character tokens aren't legal here
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <select>
-|       "foobarbaz"
+|       <svg svg>
+|         <svg g>
+|           "foo"
+|         <svg g>
+|           "bar"
+|       <p>
+|         "baz"
 |     <table>
 |     <p>
 |       "quux"
diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/tests17.dat b/tests/phpunit/data/html5lib-tests/tree-construction/tests17.dat
index e49bcf03142dc..1c472d1dc1eb8 100644
--- a/tests/phpunit/data/html5lib-tests/tree-construction/tests17.dat
+++ b/tests/phpunit/data/html5lib-tests/tree-construction/tests17.dat
@@ -1,9 +1,8 @@
 #data
 <!doctype html><table><tbody><select><tr>
 #errors
-(1,37): unexpected-start-tag-implies-table-voodoo
-(1,41): unexpected-table-element-start-tag-in-select-in-table
-(1,41): eof-in-table
+(1,30): unexpected-start-tag
+(1,42): premature-eof
 #document
 | <!DOCTYPE html>
 | <html>
@@ -17,9 +16,8 @@
 #data
 <!doctype html><table><tr><select><td>
 #errors
-(1,34): unexpected-start-tag-implies-table-voodoo
-(1,38): unexpected-table-element-start-tag-in-select-in-table
-(1,38): expected-closing-tag-but-got-eof
+(1,27): unexpected-start-tag
+(1,39): premature-eof
 #document
 | <!DOCTYPE html>
 | <html>
diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/tests18.dat b/tests/phpunit/data/html5lib-tests/tree-construction/tests18.dat
index 0b6d5dc404a9d..a3bb69a16ef57 100644
--- a/tests/phpunit/data/html5lib-tests/tree-construction/tests18.dat
+++ b/tests/phpunit/data/html5lib-tests/tree-construction/tests18.dat
@@ -227,34 +227,42 @@
 #data
 <!doctype html><select><plaintext></plaintext>X
 #errors
-34: Stray start tag “plaintext”.
-46: Stray end tag “plaintext”.
-47: End of file seen and there were open elements.
+1:48: ERROR: Premature end of file. Currently open tags: html, body, select, plaintext.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <select>
-|       "X"
+|       <plaintext>
+|         "</plaintext>X"
 
 #data
 <!doctype html><table><select><plaintext>a<caption>b
 #errors
-30: Start tag “select” seen in “table”.
-41: Stray start tag “plaintext”.
-51: “caption” start tag with “select” open.
-52: End of file seen and there were open elements.
+1:23: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, table.
+1:31: ERROR: Start tag 'plaintext' isn't allowed here. Currently open tags: html, body, table, select.
+1:42: ERROR: Character tokens aren't legal here
+1:43: ERROR: Character tokens aren't legal here
+1:44: ERROR: Character tokens aren't legal here
+1:45: ERROR: Character tokens aren't legal here
+1:46: ERROR: Character tokens aren't legal here
+1:47: ERROR: Character tokens aren't legal here
+1:48: ERROR: Character tokens aren't legal here
+1:49: ERROR: Character tokens aren't legal here
+1:50: ERROR: Character tokens aren't legal here
+1:51: ERROR: Character tokens aren't legal here
+1:52: ERROR: Character tokens aren't legal here
+1:53: ERROR: Premature end of file. Currently open tags: html, body, table, select, plaintext.
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <select>
-|       "a"
+|       <plaintext>
+|         "a<caption>b"
 |     <table>
-|       <caption>
-|         "b"
 
 #data
 <!doctype html><template><plaintext>a</template>b
@@ -446,8 +454,11 @@
 #data
 <!doctype html><table><select><script></style></script>abc
 #errors
-(1,30): unexpected-start-tag-implies-table-voodoo
-(1,58): eof-in-select
+1:23: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, table.
+1:56: ERROR: Character tokens aren't legal here
+1:57: ERROR: Character tokens aren't legal here
+1:58: ERROR: Character tokens aren't legal here
+1:59: ERROR: Premature end of file. Currently open tags: html, body, table, select.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -462,8 +473,11 @@
 #data
 <!doctype html><table><tr><select><script></style></script>abc
 #errors
-(1,34): unexpected-start-tag-implies-table-voodoo
-(1,62): eof-in-select
+1:27: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, table, tbody, tr.
+1:60: ERROR: Character tokens aren't legal here
+1:61: ERROR: Character tokens aren't legal here
+1:62: ERROR: Character tokens aren't legal here
+1:63: ERROR: Premature end of file. Currently open tags: html, body, table, tbody, tr, select.
 #document
 | <!DOCTYPE html>
 | <html>
diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/tests2.dat b/tests/phpunit/data/html5lib-tests/tree-construction/tests2.dat
index 11ef9b1643ff4..9232edde30918 100644
--- a/tests/phpunit/data/html5lib-tests/tree-construction/tests2.dat
+++ b/tests/phpunit/data/html5lib-tests/tree-construction/tests2.dat
@@ -500,7 +500,7 @@
 #data
 <!DOCTYPE html><select><optgroup><option></optgroup><option><select><option>
 #errors
-(1,68): unexpected-select-in-select
+1:61: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, select, option.
 #document
 | <!DOCTYPE html>
 | <html>
diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/tests7.dat b/tests/phpunit/data/html5lib-tests/tree-construction/tests7.dat
index b2db4de1f3bdf..aa7e16b408e2e 100644
--- a/tests/phpunit/data/html5lib-tests/tree-construction/tests7.dat
+++ b/tests/phpunit/data/html5lib-tests/tree-construction/tests7.dat
@@ -200,7 +200,7 @@ X</listing>
 #data
 <!doctype html><select><input>X
 #errors
-(1,30): unexpected-input-in-select
+1:32: ERROR: Premature end of file. Currently open tags: html, body, select.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -213,7 +213,7 @@ X</listing>
 #data
 <!doctype html><select><select>X
 #errors
-(1,31): unexpected-select-in-select
+1:24: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, select.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -443,11 +443,11 @@ A<table><tr> B</tr> </em>C</table>
 #data
 <select><keygen>
 #errors
-(1,8): expected-doctype-but-got-start-tag
-(1,16): unexpected-input-in-select
+1:1: ERROR: Expected a doctype token
+1:17: ERROR: Premature end of file. Currently open tags: html, body, select.
 #document
 | <html>
 |   <head>
 |   <body>
 |     <select>
-|     <keygen>
+|       <keygen>
diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/tests9.dat b/tests/phpunit/data/html5lib-tests/tree-construction/tests9.dat
index f8d04b23bc0be..1456324ffc21f 100644
--- a/tests/phpunit/data/html5lib-tests/tree-construction/tests9.dat
+++ b/tests/phpunit/data/html5lib-tests/tree-construction/tests9.dat
@@ -48,20 +48,17 @@
 #data
 <!DOCTYPE html><body><select><math></math></select>
 #errors
-(1,35) unexpected-start-tag-in-select
-(1,42) unexpected-end-tag-in-select
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <select>
+|       <math math>
 
 #data
 <!DOCTYPE html><body><select><option><math></math></option></select>
 #errors
-(1,43) unexpected-start-tag-in-select
-(1,50) unexpected-end-tag-in-select
 #document
 | <!DOCTYPE html>
 | <html>
@@ -69,6 +66,7 @@
 |   <body>
 |     <select>
 |       <option>
+|         <math math>
 
 #data
 <!DOCTYPE html><body><table><math></math></table>
@@ -301,13 +299,8 @@
 #data
 <!DOCTYPE html><body><table><tr><td><select><math><mi>foo</mi><mi>bar</mi>baz</table>quux
 #errors
-(1,50) unexpected-start-tag-in-select
-(1,54) unexpected-start-tag-in-select
-(1,62) unexpected-end-tag-in-select
-(1,66) unexpected-start-tag-in-select
-(1,74) unexpected-end-tag-in-select
-(1,77) unexpected-start-tag-in-select
-(1,88) unexpected-table-element-end-tag-in-select-in-table
+1:75: ERROR: Start tag 'p' isn't allowed here. Currently open tags: html, body, table, tbody, tr, td, select, math.
+1:81: ERROR: End tag 'table' isn't allowed here. Currently open tags: html, body, table, tbody, tr, td, select.
 #document
 | <!DOCTYPE html>
 | <html>
@@ -318,28 +311,45 @@
 |         <tr>
 |           <td>
 |             <select>
-|               "foobarbaz"
+|               <math math>
+|                 <math mi>
+|                   "foo"
+|                 <math mi>
+|                   "bar"
+|               
+|                 "baz"
 |     
 |       "quux"
 
 #data
 <!DOCTYPE html><body><table><select><math><mi>foo</mi><mi>bar</mi>baz</table>quux
 #errors
-(1,36) unexpected-start-tag-implies-table-voodoo
-(1,42) unexpected-start-tag-in-select
-(1,46) unexpected-start-tag-in-select
-(1,54) unexpected-end-tag-in-select
-(1,58) unexpected-start-tag-in-select
-(1,66) unexpected-end-tag-in-select
-(1,69) unexpected-start-tag-in-select
-(1,80) unexpected-table-element-end-tag-in-select-in-table
+1:29: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, table.
+1:37: ERROR: Start tag 'math' isn't allowed here. Currently open tags: html, body, table, select.
+1:47: ERROR: Character tokens aren't legal here
+1:48: ERROR: Character tokens aren't legal here
+1:49: ERROR: Character tokens aren't legal here
+1:59: ERROR: Character tokens aren't legal here
+1:60: ERROR: Character tokens aren't legal here
+1:61: ERROR: Character tokens aren't legal here
+1:67: ERROR: Start tag 'p' isn't allowed here. Currently open tags: html, body, table, select, math.
+1:67: ERROR: Start tag 'p' isn't allowed here. Currently open tags: html, body, table, select.
+1:70: ERROR: Character tokens aren't legal here
+1:71: ERROR: Character tokens aren't legal here
+1:72: ERROR: Character tokens aren't legal here
 #document
 | <!DOCTYPE html>
 | <html>
 |   <head>
 |   <body>
 |     <select>
-|       "foobarbaz"
+|       <math math>
+|         <math mi>
+|           "foo"
+|         <math mi>
+|           "bar"
+|       
+|         "baz"
 |     <table>
 |     
 |       "quux"
diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/tests_innerHTML_1.dat b/tests/phpunit/data/html5lib-tests/tree-construction/tests_innerHTML_1.dat
index 1a37ee520ae34..09e0456f0f12d 100644
--- a/tests/phpunit/data/html5lib-tests/tree-construction/tests_innerHTML_1.dat
+++ b/tests/phpunit/data/html5lib-tests/tree-construction/tests_innerHTML_1.dat
@@ -790,7 +790,7 @@ select
 #data
 <input><option>
 #errors
-(1,7): unexpected-input-in-select
+(1,7): XXX-undefined-error
 #document-fragment
 select
 #document
@@ -799,20 +799,21 @@ select
 #data
 <keygen><option>
 #errors
-(1,8): unexpected-input-in-select
 #document-fragment
 select
 #document
+| <keygen>
 | <option>
 
 #data
 <textarea><option>
 #errors
-(1,10): unexpected-input-in-select
+1:19: ERROR: Premature end of file. Currently open tags: html, textarea.
 #document-fragment
 select
 #document
-| <option>
+| <textarea>
+|   "<option>"
 
 #data
 </html><!--abc-->
diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/void-in-phrasing.dat b/tests/phpunit/data/html5lib-tests/tree-construction/void-in-phrasing.dat
new file mode 100644
index 0000000000000..b7dbd1e7cec2a
--- /dev/null
+++ b/tests/phpunit/data/html5lib-tests/tree-construction/void-in-phrasing.dat
@@ -0,0 +1,151 @@
+#data
+<!DOCTYPE html><body>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       
+
+#data
+<!DOCTYPE html><body>text
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       
+|       "text"
+
+#data
+<!DOCTYPE html><body>beforeafter
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       "before"
+|       
+|       "after"
+
+#data
+<!DOCTYPE html><body>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       
+|       
+
+#data
+<!DOCTYPE html><body>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       "a"
+|       
+|       "b"
+|       
+|       "c"
+
+#data
+<!DOCTYPE html><body><h1></h1>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|       
+
+#data
+<!DOCTYPE html><body><input>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <input>
+
+#data
+<!DOCTYPE html><body><img>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <img>
+
+#data
+<!DOCTYPE html><body><wbr>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <wbr>
+
+#data
+<!DOCTYPE html><body><embed>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <embed>
+
+#data
+<!DOCTYPE html><body><h2><input></h2>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <h2>
+|       <input>
+
+#data
+<!DOCTYPE html><body><em></em>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <em>
+|       
+
+#data
+<!DOCTYPE html><body><strong>text</strong>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <strong>
+|       
+|       "text"
diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/webkit01.dat b/tests/phpunit/data/html5lib-tests/tree-construction/webkit01.dat
index d30e12e52766e..fc7f5c80e07e4 100644
--- a/tests/phpunit/data/html5lib-tests/tree-construction/webkit01.dat
+++ b/tests/phpunit/data/html5lib-tests/tree-construction/webkit01.dat
@@ -437,11 +437,11 @@ no-doctype
 #data
 <select><option>A<select><option>B<select><option>C<select><option>D<select><option>E<select><option>F<select><option>G<select>
 #errors
-(1,8): expected-doctype-but-got-start-tag
-(1,25): unexpected-select-in-select
-(1,59): unexpected-select-in-select
-(1,93): unexpected-select-in-select
-(1,127): unexpected-select-in-select
+1:1: ERROR: Expected a doctype token
+1:18: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, select, option.
+1:52: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, select, option.
+1:86: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, select, option.
+1:120: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, select, option.
 #document
 | <html>
 |   <head>
@@ -528,12 +528,11 @@ no-doctype
 #data
 <kbd><table></kbd><col><select><tr>
 #errors
-(1,5): expected-doctype-but-got-start-tag
-(1,18): unexpected-end-tag-implies-table-voodoo
-(1,18): unexpected-end-tag
-(1,31): unexpected-start-tag-implies-table-voodoo
-(1,35): unexpected-table-element-start-tag-in-select-in-table
-(1,35): eof-in-table
+1:1: ERROR: Expected a doctype token
+1:13: ERROR: End tag 'kbd' isn't allowed here. Currently open tags: html, body, kbd, table.
+1:13: ERROR: End tag 'kbd' isn't allowed here. Currently open tags: html, body, kbd, table.
+1:24: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, kbd, table.
+1:36: ERROR: Premature end of file. Currently open tags: html, body, kbd, table, tbody, tr.
 #document
 | <html>
 |   <head>
@@ -549,12 +548,11 @@ no-doctype
 #data
 <kbd><table></kbd><col><select><tr></table><div>
 #errors
-(1,5): expected-doctype-but-got-start-tag
-(1,18): unexpected-end-tag-implies-table-voodoo
-(1,18): unexpected-end-tag
-(1,31): unexpected-start-tag-implies-table-voodoo
-(1,35): unexpected-table-element-start-tag-in-select-in-table
-(1,48): expected-closing-tag-but-got-eof
+1:1: ERROR: Expected a doctype token
+1:13: ERROR: End tag 'kbd' isn't allowed here. Currently open tags: html, body, kbd, table.
+1:13: ERROR: End tag 'kbd' isn't allowed here. Currently open tags: html, body, kbd, table.
+1:24: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, kbd, table.
+1:49: ERROR: Premature end of file. Currently open tags: html, body, kbd, div.
 #document
 | <html>
 |   <head>
diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/webkit02.dat b/tests/phpunit/data/html5lib-tests/tree-construction/webkit02.dat
index 7d817ec69a700..47a3c5713ea86 100644
--- a/tests/phpunit/data/html5lib-tests/tree-construction/webkit02.dat
+++ b/tests/phpunit/data/html5lib-tests/tree-construction/webkit02.dat
@@ -309,12 +309,13 @@ div
 #data
 <option><XH<optgroup></optgroup>
 #errors
-(1,21): unexpected-start-tag-in-select
-(1,32): unexpected-end-tag-in-select
+1:22: ERROR: End tag 'optgroup' isn't allowed here. Currently open tags: html, option, xh<optgroup.
+1:33: ERROR: Premature end of file. Currently open tags: html, option, xh<optgroup.
 #document-fragment
 select
 #document
 | <option>
+|   <xh<optgroup>
 
 #data
 <svg><foreignObject><div>foo</div><plaintext></foreignObject></svg><div>bar</div>
@@ -444,7 +445,7 @@ eof-in-math
 <select><optgroup><hr>
 #errors
 1:1: ERROR: Expected a doctype token
-1:23: ERROR: Premature end of file. Currently open tags: html, body, select.
+1:23: ERROR: Premature end of file. Currently open tags: html, body, select, optgroup.
 #document
 | <html>
 |   <head>
@@ -457,7 +458,7 @@ eof-in-math
 <select><option><optgroup><hr>
 #errors
 1:1: ERROR: Expected a doctype token
-1:31: ERROR: Premature end of file. Currently open tags: html, body, select.
+1:31: ERROR: Premature end of file. Currently open tags: html, body, select, optgroup.
 #document
 | <html>
 |   <head>
@@ -504,7 +505,7 @@ eof-in-math
 <table><tr><td><select><optgroup><option><hr>
 #errors
 1:1: ERROR: Expected a doctype token
-1:46: ERROR: Premature end of file. Currently open tags: html, body, table, tbody, tr, td, select.
+1:46: ERROR: Premature end of file. Currently open tags: html, body, table, tbody, tr, td, select, optgroup.
 #document
 | <html>
 |   <head>
@@ -522,7 +523,7 @@ eof-in-math
 <table><tr><td><select><optgroup><hr>
 #errors
 1:1: ERROR: Expected a doctype token
-1:38: ERROR: Premature end of file. Currently open tags: html, body, table, tbody, tr, td, select.
+1:38: ERROR: Premature end of file. Currently open tags: html, body, table, tbody, tr, td, select, optgroup.
 #document
 | <html>
 |   <head>
@@ -539,7 +540,7 @@ eof-in-math
 <table><tr><td><select><option><optgroup><hr>
 #errors
 1:1: ERROR: Expected a doctype token
-1:46: ERROR: Premature end of file. Currently open tags: html, body, table, tbody, tr, td, select.
+1:38: ERROR: Premature end of file. Currently open tags: html, body, table, tbody, tr, td, select, optgroup.
 #document
 | <html>
 |   <head>
@@ -552,3 +553,223 @@ eof-in-math
 |               <option>
 |               <optgroup>
 |               <hr>
+
+#data
+<select><div><i></div><option>option
+#errors
+1:1: ERROR: Expected a doctype token
+1:17: ERROR: End tag 'div' isn't allowed here. Currently open tags: html, body, select, div, i.
+1:37: ERROR: Premature end of file. Currently open tags: html, body, select, option, i.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <div>
+|         <i>
+|       <i>
+|         <option>
+|           "option"
+
+#data
+<div><i></div><option>option
+#errors
+1:1: ERROR: Expected a doctype token
+1:9: ERROR: End tag 'div' isn't allowed here. Currently open tags: html, body, div, i.
+1:29: ERROR: Premature end of file. Currently open tags: html, body, i, option.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <i>
+|     <i>
+|       <option>
+|         "option"
+
+#data
+<select><div>div 1</div><button>button</button><div>div 2</div><datalist><option>option</option></datalist><div>div 3</div></select>
+#errors
+1:1: ERROR: Expected a doctype token
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <div>
+|         "div 1"
+|       <button>
+|         "button"
+|       <div>
+|         "div 2"
+|       <datalist>
+|         <option>
+|           "option"
+|       <div>
+|         "div 3"
+
+#data
+<select><button>button</select>
+#errors
+1:1: ERROR: Expected a doctype token
+1:23: ERROR: End tag 'select' isn't allowed here. Currently open tags: html, body, select, button.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <button>
+|         "button"
+
+#data
+<select><datalist>datalist</select>
+#errors
+1:1: ERROR: Expected a doctype token
+1:27: ERROR: End tag 'select' isn't allowed here. Currently open tags: html, body, select, datalist.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <datalist>
+|         "datalist"
+
+#data
+<select><button><select></select></button></select>
+#errors
+1:1: ERROR: Expected a doctype token
+1:17: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, select, button.
+1:25: ERROR: End tag 'select' isn't allowed here. Currently open tags: html, body.
+1:34: ERROR: End tag 'button' isn't allowed here. Currently open tags: html, body.
+1:43: ERROR: End tag 'select' isn't allowed here. Currently open tags: html, body.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <button>
+
+#data
+<select><button><div><select></select>
+#errors
+1:1: ERROR: Expected a doctype token
+1:22: ERROR: Start tag 'select' isn't allowed here. Currently open tags: html, body, select, button, div.
+1:30: ERROR: End tag 'select' isn't allowed here. Currently open tags: html, body.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <button>
+|         <div>
+
+#data
+<select><div><option><img>option</option></div></select>
+#errors
+1:1: ERROR: Expected a doctype token
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <div>
+|         <option>
+|           <img>
+|           "option"
+
+#data
+<select><input>
+#errors
+1:1: ERROR: Expected a doctype token
+1:16: ERROR: Premature end of file. Currently open tags: html, body, select.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <input>
+
+#data
+<select><button><selectedcontent></button><option>X
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <button>
+|         <selectedcontent>
+|           "X"
+|       <option>
+|         "X"
+
+#data
+<select><button><selectedcontent></button><option>x<i>i<b>ib</i>b
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <button>
+|         <selectedcontent>
+|           "x"
+|           <i>
+|             "i"
+|             <b>
+|               "ib"
+|           <b>
+|             "b"
+|       <option>
+|         "x"
+|         <i>
+|           "i"
+|           <b>
+|             "ib"
+|         <b>
+|           "b"
+
+#data
+<select><button><selectedcontent></button><option>X<option>Y
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <button>
+|         <selectedcontent>
+|           "X"
+|       <option>
+|         "X"
+|       <option>
+|         "Y"
+
+#data
+<select><button><selectedcontent></button><option>X<option selected>Y
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <button>
+|         <selectedcontent>
+|           "Y"
+|       <option>
+|         "X"
+|       <option>
+|         selected=""
+|         "Y"
+
+#data
+<font><select><option>a</option></font></select>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       <select>
+|         <option>
+|           "a"
diff --git a/tests/phpunit/includes/build-visual-html-tree.php b/tests/phpunit/includes/build-visual-html-tree.php
index 9d2850ce4947c..ddfe84234293f 100644
--- a/tests/phpunit/includes/build-visual-html-tree.php
+++ b/tests/phpunit/includes/build-visual-html-tree.php
@@ -69,6 +69,10 @@ function build_visual_html_tree( string $html, ?string $fragment_context ): stri
 		$token_type = $processor->get_token_type();
 		$is_closer  = $processor->is_tag_closer();
 
+		if ( '#presumptuous-tag' === $token_type ) {
+			continue;
+		}
+
 		if ( $was_text && '#text' !== $token_name ) {
 			if ( '' !== $text_node ) {
 				$output .= "{$text_node}\"\n";
@@ -281,11 +285,16 @@ static function ( $a, $b ) {
 		}
 	}
 
-	if ( null !== $processor->get_unsupported_exception() ) {
-		throw $processor->get_unsupported_exception();
+	$unsupported_exception = $processor->get_unsupported_exception();
+	$ignored_presumptuous_tag_exception = null !== $unsupported_exception
+		&& '#presumptuous-tag' === $unsupported_exception->token_name
+		&& '</>' === $unsupported_exception->token
+		&& 'Content outside of HTML is unsupported.' === $unsupported_exception->getMessage();
+	if ( null !== $unsupported_exception && ! $ignored_presumptuous_tag_exception ) {
+		throw $unsupported_exception;
 	}
 
-	if ( null !== $processor->get_last_error() ) {
+	if ( ! $ignored_presumptuous_tag_exception && null !== $processor->get_last_error() ) {
 		throw new Exception( "Parser error: {$processor->get_last_error()}" );
 	}
 
diff --git a/tests/phpunit/tests/dependencies/scripts.php b/tests/phpunit/tests/dependencies/scripts.php
index 41c9673915b93..ab92bcc82d43a 100644
--- a/tests/phpunit/tests/dependencies/scripts.php
+++ b/tests/phpunit/tests/dependencies/scripts.php
@@ -4563,4 +4563,169 @@ private function normalize_markup_for_snapshot( string $markup ): string {
 		}
 		return $processor->get_updated_html();
 	}
+
+	/**
+	 * Tests that script_data_{$handle} filter outputs JSON script tags before the script.
+	 *
+	 * @covers WP_Scripts::do_item
+	 */
+	public function test_script_data_filter_outputs_json_script_tag() {
+		wp_enqueue_script( 'test-script', '/test.js', array(), null );
+
+		add_filter(
+			'script_data_test-script',
+			function ( $data ) {
+				$data['foo'] = 'bar';
+				return $data;
+			}
+		);
+
+		$output = get_echo( 'wp_print_scripts' );
+
+		$this->assertStringContainsString( '<script id="wp-script-data-test-script" type="application/json">', $output );
+		$this->assertStringContainsString( '"foo":"bar"', $output );
+	}
+
+	/**
+	 * Tests that the script_data_{$handle} filter receives an empty array by default.
+	 *
+	 * @covers WP_Scripts::do_item
+	 */
+	public function test_script_data_filter_receives_empty_array() {
+		wp_enqueue_script( 'test-script', '/test.js', array(), null );
+
+		$filter_called = false;
+		add_filter(
+			'script_data_test-script',
+			function ( $data ) use ( &$filter_called ) {
+				$filter_called = true;
+				$this->assertSame( array(), $data );
+				return $data;
+			}
+		);
+
+		get_echo( 'wp_print_scripts' );
+
+		$this->assertTrue( $filter_called, 'Filter should have been called' );
+	}
+
+	/**
+	 * Tests that the script_data_{$handle} filter doesn't output anything for empty data.
+	 *
+	 * @covers WP_Scripts::do_item
+	 */
+	public function test_script_data_filter_no_output_for_empty_data() {
+		wp_enqueue_script( 'test-script', '/test.js', array(), null );
+
+		add_filter(
+			'script_data_test-script',
+			function ( $data ) {
+				// Return empty array.
+				return $data;
+			}
+		);
+
+		$output = get_echo( 'wp_print_scripts' );
+
+		// Should not contain data script tag.
+		$this->assertStringNotContainsString( 'wp-script-data-test-script', $output );
+	}
+
+	/**
+	 * Tests that the script_data_{$handle} filter is called for each enqueued script.
+	 *
+	 * @covers WP_Scripts::do_item
+	 */
+	public function test_script_data_filter_called_for_each_enqueued_script() {
+		wp_enqueue_script( 'script-1', '/script-1.js', array(), null );
+		wp_enqueue_script( 'script-2', '/script-2.js', array(), null );
+
+		$filter_calls = array();
+		add_filter(
+			'script_data_script-1',
+			function ( $data ) use ( &$filter_calls ) {
+				$filter_calls[] = 'script-1';
+				$data['script'] = '1';
+				return $data;
+			}
+		);
+
+		add_filter(
+			'script_data_script-2',
+			function ( $data ) use ( &$filter_calls ) {
+				$filter_calls[] = 'script-2';
+				$data['script'] = '2';
+				return $data;
+			}
+		);
+
+		$output = get_echo( 'wp_print_scripts' );
+
+		$this->assertSame( array( 'script-1', 'script-2' ), $filter_calls );
+		$this->assertStringContainsString( 'wp-script-data-script-1', $output );
+		$this->assertStringContainsString( 'wp-script-data-script-2', $output );
+		$this->assertStringContainsString( '"script":"1"', $output );
+		$this->assertStringContainsString( '"script":"2"', $output );
+	}
+
+	/**
+	 * Tests that the script_data_{$handle} filter is only called for enqueued scripts.
+	 *
+	 * @covers WP_Scripts::do_item
+	 */
+	public function test_script_data_filter_only_called_for_enqueued_scripts() {
+		wp_register_script( 'registered-only', '/registered-only.js', array(), null );
+		wp_enqueue_script( 'enqueued', '/enqueued.js', array(), null );
+
+		$filter_calls = array();
+		add_filter(
+			'script_data_registered-only',
+			function ( $data ) use ( &$filter_calls ) {
+				$filter_calls[] = 'registered-only';
+				return $data;
+			}
+		);
+
+		add_filter(
+			'script_data_enqueued',
+			function ( $data ) use ( &$filter_calls ) {
+				$filter_calls[] = 'enqueued';
+				$data['test'] = 'value';
+				return $data;
+			}
+		);
+
+		$output = get_echo( 'wp_print_scripts' );
+
+		$this->assertSame( array( 'enqueued' ), $filter_calls );
+		$this->assertStringNotContainsString( 'wp-script-data-registered-only', $output );
+		$this->assertStringContainsString( 'wp-script-data-enqueued', $output );
+	}
+
+	/**
+	 * Tests that the script_data_{$handle} filter outputs data before the script tag.
+	 *
+	 * @covers WP_Scripts::do_item
+	 */
+	public function test_script_data_filter_outputs_before_script() {
+		wp_enqueue_script( 'test-script', '/test.js', array(), null );
+
+		add_filter(
+			'script_data_test-script',
+			function ( $data ) {
+				$data['config'] = 'value';
+				return $data;
+			}
+		);
+
+		$output = get_echo( 'wp_print_scripts' );
+
+		// Find positions of data script tag and script tag.
+		$data_pos = strpos( $output, 'wp-script-data-test-script' );
+		$script_pos = strpos( $output, 'test-script-js' );
+
+		$this->assertNotFalse( $data_pos, 'Data script tag should be present' );
+		$this->assertNotFalse( $script_pos, 'Script tag should be present' );
+		$this->assertLessThan( $script_pos, $data_pos, 'Data script tag should come before script tag' );
+	}
 }
diff --git a/tests/phpunit/tests/html-api/wpHtmlDecoder.php b/tests/phpunit/tests/html-api/wpHtmlDecoder.php
index 97954f4eb3e30..9527739edd23b 100644
--- a/tests/phpunit/tests/html-api/wpHtmlDecoder.php
+++ b/tests/phpunit/tests/html-api/wpHtmlDecoder.php
@@ -61,6 +61,84 @@ static function ( int $errno, string $errstr ) use ( &$errors ) {
 		$this->assertSame( "&\x00b", $decoded, 'Should have decoded the text without changing it.' );
 	}
 
+	/**
+	 * Ensures that numeric character references for U+0000 decode to U+FFFD
+	 * while raw NULL bytes pass through the decoder untransformed.
+	 *
+	 * The tokenizer, not the decoder, is responsible for replacing raw NULL
+	 * bytes; in the Tag Processor that responsibility falls on the methods
+	 * which read values out of the input document.
+	 *
+	 * @ticket 65372
+	 *
+	 * @dataProvider data_null_code_points
+	 *
+	 * @param string $raw_value     Raw attribute value.
+	 * @param string $decoded_value The expected decoded attribute value.
+	 */
+	public function test_null_code_points_in_attribute_values( string $raw_value, string $decoded_value ) {
+		$this->assertSame(
+			$decoded_value,
+			WP_HTML_Decoder::decode_attribute( $raw_value ),
+			'Improperly decoded raw attribute value.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_null_code_points() {
+		return array(
+			'Decimal zero'                 => array( 'a&#0;b', "a\u{FFFD}b" ),
+			'Hexadecimal zero'             => array( 'a&#x0;b', "a\u{FFFD}b" ),
+			'Multiple zeros'               => array( 'a&#0000;b', "a\u{FFFD}b" ),
+			'Raw NULL byte passes through' => array( "a\x00b", "a\x00b" ),
+		);
+	}
+
+	/**
+	 * Ensures that the ambiguous-follower check for character references
+	 * lacking a terminating semicolon treats only ASCII alphanumerics and
+	 * the equals sign as ambiguous, regardless of the process locale.
+	 *
+	 * `ctype_alnum()` classifies bytes 0x80 and above as alphanumeric under
+	 * UTF-8 locales, wrongly suppressing decodes whose follower is a
+	 * non-ASCII byte, such as U+FFFD produced by NULL-byte replacement.
+	 *
+	 * @ticket 65372
+	 *
+	 * @see https://html.spec.whatwg.org/#named-character-reference-state
+	 *
+	 * @dataProvider data_semicolon_less_references_with_followers
+	 *
+	 * @param string $raw_value     Raw attribute value.
+	 * @param string $decoded_value The expected decoded attribute value.
+	 */
+	public function test_semicolon_less_reference_followers( string $raw_value, string $decoded_value ) {
+		$this->assertSame(
+			$decoded_value,
+			WP_HTML_Decoder::decode_attribute( $raw_value ),
+			'Improperly decoded raw attribute value.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_semicolon_less_references_with_followers() {
+		return array(
+			'U+FFFD follower decodes'            => array( "x&amp\u{FFFD};y", "x&\u{FFFD};y" ),
+			'Non-ASCII follower decodes'         => array( "x&amp\u{E9}y", "x&\u{E9}y" ),
+			'ASCII letter follower is ambiguous' => array( 'x&ampzy', 'x&ampzy' ),
+			'ASCII digit follower is ambiguous'  => array( 'x&amp1y', 'x&amp1y' ),
+			'Equals sign follower is ambiguous'  => array( 'x&amp=y', 'x&amp=y' ),
+		);
+	}
+
 	/**
 	 * Ensures proper detection of attribute prefixes ignoring ASCII case.
 	 *
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php
index e516addb6c314..23c0dfa812055 100644
--- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php
@@ -108,6 +108,129 @@ public function test_duplicate_attributes_are_removed() {
 		);
 	}
 
+	/**
+	 * Ensures that adjusted foreign attributes are serialized with their namespace prefix.
+	 *
+	 * @ticket 65372
+	 */
+	public function test_serializes_adjusted_foreign_attributes_with_namespace_prefix(): void {
+		$svg = '<svg><a xlink:actuate="onLoad" xlink:arcrole="arc" xlink:href="#target" xlink:role="role" xlink:show="new" xlink:title="title" xlink:type="simple" xml:lang="en" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"></a></svg>';
+
+		$this->assertSame(
+			$svg,
+			WP_HTML_Processor::normalize( $svg ),
+			'Should have preserved all adjusted foreign attributes when normalizing.'
+		);
+
+		$processor = WP_HTML_Processor::create_fragment( $svg );
+		$this->assertTrue( $processor->next_token() );
+		$this->assertSame( '<svg>', $processor->serialize_token(), 'Should serialize the opening SVG tag.' );
+		$this->assertTrue( $processor->next_token() );
+		$this->assertSame(
+			'<a xlink:actuate="onLoad" xlink:arcrole="arc" xlink:href="#target" xlink:role="role" xlink:show="new" xlink:title="title" xlink:type="simple" xml:lang="en" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">',
+			$processor->serialize_token(),
+			'Should have serialized all adjusted foreign attributes with their namespace prefixes.'
+		);
+	}
+
+	/**
+	 * Ensures that non-adjusted foreign attributes retain their colon.
+	 *
+	 * @ticket 65372
+	 *
+	 * @dataProvider data_non_adjusted_foreign_attributes_with_colon
+	 *
+	 * @param string $svg            SVG markup to normalize.
+	 * @param string $serialized_tag Expected serialized token.
+	 */
+	public function test_serializes_non_adjusted_foreign_attributes_with_colon( string $svg, string $serialized_tag ): void {
+		$this->assertSame(
+			$svg,
+			WP_HTML_Processor::normalize( $svg ),
+			'Should have preserved non-adjusted colon attributes when normalizing.'
+		);
+
+		$processor = WP_HTML_Processor::create_fragment( $svg );
+		$this->assertTrue( $processor->next_token() );
+		$this->assertSame( '<svg>', $processor->serialize_token(), 'Should serialize the opening SVG tag.' );
+		$this->assertTrue( $processor->next_token() );
+		$this->assertSame(
+			$serialized_tag,
+			$processor->serialize_token(),
+			'Should have preserved non-adjusted colon attributes when serializing the token.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array<string, array{0: string, 1: string}>
+	 */
+	public static function data_non_adjusted_foreign_attributes_with_colon(): array {
+		return array(
+			'xlink control' => array(
+				'<svg><a xlink:author="author" xlink:href="#target"></a></svg>',
+				'<a xlink:author="author" xlink:href="#target">',
+			),
+			'xml control'   => array(
+				'<svg><a xml:id="id" xml:lang="en"></a></svg>',
+				'<a xml:id="id" xml:lang="en">',
+			),
+			'xmlns control' => array(
+				'<svg><a xmlns:foo="urn:foo" xmlns:xlink="http://www.w3.org/1999/xlink"></a></svg>',
+				'<a xmlns:foo="urn:foo" xmlns:xlink="http://www.w3.org/1999/xlink">',
+			),
+			'source order'  => array(
+				'<svg><a foo:bar="baz" xlink:href="#target"></a></svg>',
+				'<a foo:bar="baz" xlink:href="#target">',
+			),
+		);
+	}
+
+	/**
+	 * Ensures that duplicate foreign attributes are removed upon serialization.
+	 *
+	 * @ticket 65372
+	 *
+	 * @dataProvider data_duplicate_foreign_attributes
+	 *
+	 * @param string $input    HTML containing duplicate foreign attributes.
+	 * @param string $expected Expected normalized HTML.
+	 */
+	public function test_duplicate_foreign_attributes_are_removed( string $input, string $expected ): void {
+		$this->assertSame(
+			$expected,
+			WP_HTML_Processor::normalize( $input ),
+			'Should have removed all but the first copy of a foreign attribute when duplicates exist.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array<string, array{0: string, 1: string}>
+	 */
+	public static function data_duplicate_foreign_attributes(): array {
+		return array(
+			'adjusted xlink duplicate'       => array(
+				'<svg><a xlink:href="#first" XLINK:HREF="#second"></a></svg>',
+				'<svg><a xlink:href="#first"></a></svg>',
+			),
+			'adjusted xml duplicate'         => array(
+				'<svg><a xml:lang="en" XML:LANG="fr"></a></svg>',
+				'<svg><a xml:lang="en"></a></svg>',
+			),
+			'non-adjusted colon duplicate'   => array(
+				'<svg><a foo:bar="one" FOO:BAR="two"></a></svg>',
+				'<svg><a foo:bar="one"></a></svg>',
+			),
+			'adjusted and non-adjusted pair' => array(
+				'<svg><a xlink:href="#target" xlink:author="author"></a></svg>',
+				'<svg><a xlink:href="#target" xlink:author="author"></a></svg>',
+			),
+		);
+	}
+
 	/**
 	 * Ensures that SCRIPT contents are not escaped, as they are not parsed like text nodes are.
 	 *
@@ -134,6 +257,118 @@ public function test_style_contents_are_not_escaped() {
 		);
 	}
 
+	/**
+	 * Ensures that XMP contents are not escaped, as they are not parsed like text nodes are.
+	 *
+	 * XMP contents are parsed as raw text: character references are never decoded.
+	 * Escaping the contents would change the document, e.g. a "<" would be replaced
+	 * by the literal text "&lt;" after serializing and re-parsing.
+	 *
+	 * @ticket 65372
+	 */
+	public function test_xmp_contents_are_not_escaped() {
+		$this->assertSame(
+			"<xmp>1 < 2 &amp; apples > or\u{FFFD}anges</xmp>",
+			WP_HTML_Processor::normalize( "<xmp>1 < 2 &amp; apples > or\x00anges</xmp>" ),
+			'Should have preserved text inside an XMP element, except for replacing NULL bytes.'
+		);
+	}
+
+	/**
+	 * Ensures that the contents of IFRAME, NOEMBED, and NOFRAMES elements are
+	 * preserved when serializing.
+	 *
+	 * These elements contain raw text which is part of the parsed document.
+	 * Dropping it would change the document's contents across a serialize and
+	 * re-parse cycle.
+	 *
+	 * @ticket 65372
+	 *
+	 * @dataProvider data_rawtext_elements_with_contents
+	 *
+	 * @param string $html Normalized HTML containing a rawtext element with contents.
+	 */
+	public function test_rawtext_element_contents_are_preserved_when_normalizing( string $html ) {
+		$this->assertSame(
+			$html,
+			WP_HTML_Processor::normalize( $html ),
+			'Should have preserved the rawtext element contents.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_rawtext_elements_with_contents() {
+		return array(
+			'IFRAME with following text'       => array( '<iframe>x</iframe>y' ),
+			'NOEMBED with following text'      => array( '<noembed>x</noembed>y' ),
+			'NOFRAMES with following text'     => array( '<section><noframes>x</noframes>y</section>' ),
+			'NOFRAMES before comment'          => array( '<section><noframes>x</noframes><!----></section>' ),
+			'IFRAME with markup-like contents' => array( '<iframe><div>inert</div></iframe>' ),
+			'NOEMBED with character reference' => array( '<noembed>&amp;</noembed>' ),
+			'IFRAME in foreign content'        => array( '<svg><iframe>1 &lt; 2</iframe></svg>' ),
+		);
+	}
+
+	/**
+	 * Ensures that the contents of IFRAME, NOEMBED, and NOFRAMES elements are
+	 * preserved when serializing full documents, including NOFRAMES elements
+	 * in the HEAD or after a FRAMESET.
+	 *
+	 * @ticket 65372
+	 *
+	 * @dataProvider data_full_documents_with_rawtext_elements
+	 *
+	 * @param string $html     Input HTML document.
+	 * @param string $expected Expected serialization of the full document.
+	 */
+	public function test_rawtext_element_contents_are_preserved_in_full_documents( string $html, string $expected ) {
+		$processor = WP_HTML_Processor::create_full_parser( $html );
+
+		$this->assertSame(
+			$expected,
+			$processor->serialize(),
+			'Should have preserved the rawtext element contents.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_full_documents_with_rawtext_elements() {
+		return array(
+			'IFRAME in BODY'          => array(
+				'<iframe>x</iframe>y',
+				'<html><head></head><body><iframe>x</iframe>y</body></html>',
+			),
+			'NOEMBED in BODY'         => array(
+				'a<noembed>x</noembed>',
+				'<html><head></head><body>a<noembed>x</noembed></body></html>',
+			),
+			'NOFRAMES in BODY'        => array(
+				'a<noframes>x</noframes>',
+				'<html><head></head><body>a<noframes>x</noframes></body></html>',
+			),
+			'NOFRAMES in HEAD'        => array(
+				'<head><noframes>x</noframes></head>z',
+				'<html><head><noframes>x</noframes></head><body>z</body></html>',
+			),
+			'NOFRAMES in FRAMESET'    => array(
+				'<html><frameset><noframes>x</noframes>',
+				'<html><head></head><frameset><noframes>x</noframes></frameset></html>',
+			),
+			'IFRAME before a comment' => array(
+				'<h3><div><small><dd><iframe>x</iframe><!---->',
+				'<html><head></head><body><h3><div><small><dd><iframe>x</iframe><!----></dd></small></div></h3></body></html>',
+			),
+		);
+	}
+
 	public function test_unexpected_closing_tags_are_removed() {
 		$this->assertSame(
 			WP_HTML_Processor::normalize( 'one</div>two</span>three' ),
@@ -142,6 +377,36 @@ public function test_unexpected_closing_tags_are_removed() {
 		);
 	}
 
+	/**
+	 * Ensures that unexpected closing formatting tags are ignored.
+	 *
+	 * @ticket 65372
+	 *
+	 * @dataProvider data_unexpected_closing_formatting_tags
+	 *
+	 * @param string $html     HTML containing an unexpected closing formatting tag.
+	 * @param string $expected Expected normalized output.
+	 */
+	public function test_unexpected_closing_formatting_tags_are_ignored( string $html, string $expected ) {
+		$this->assertSame(
+			$expected,
+			WP_HTML_Processor::normalize( $html ),
+			'Should have ignored unexpected closing formatting tags.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_unexpected_closing_formatting_tags() {
+		return array(
+			'Unexpected A end tag' => array( 'one</a>two', 'onetwo' ),
+			'Unexpected B end tag' => array( 'one</b>two', 'onetwo' ),
+		);
+	}
+
 	/**
 	 * Ensures that self-closing elements in foreign content retain their self-closing flag.
 	 *
@@ -281,6 +546,10 @@ public static function data_tokens_with_null_bytes() {
 			'Foreign content text' => array( "<svg>one\x00two</svg>", "<svg>one\u{FFFD}two</svg>" ),
 			'SCRIPT content'       => array( "<script>alert(\x00)</script>", "<script>alert(\u{FFFD})</script>" ),
 			'STYLE content'        => array( "<style>\x00 {}</style>", "<style>\u{FFFD} {}</style>" ),
+			'IFRAME content'       => array( "<iframe>a\x00b</iframe>", "<iframe>a\u{FFFD}b</iframe>" ),
+			'NOEMBED content'      => array( "<noembed>a\x00b</noembed>", "<noembed>a\u{FFFD}b</noembed>" ),
+			'NOFRAMES content'     => array( "<noframes>a\x00b</noframes>", "<noframes>a\u{FFFD}b</noframes>" ),
+			'XMP content'          => array( "<xmp>a\x00b</xmp>", "<xmp>a\u{FFFD}b</xmp>" ),
 			'Comment text'         => array( "<!-- \x00 -->", "<!-- \u{FFFD} -->" ),
 		);
 	}
@@ -300,6 +569,129 @@ public function test_full_document_serialize_includes_doctype( string $doctype_i
 		);
 	}
 
+	/**
+	 * Ensures full document serialization is idempotent when the body is implied after head content.
+	 *
+	 * @ticket 65372
+	 *
+	 * @dataProvider data_provider_full_document_serialize_includes_implied_body_after_head_at_eof
+	 *
+	 * @param string $input               Full document input ending after HEAD content with no explicit BODY.
+	 * @param string $expected_serialized Expected serialization with the implied empty BODY element.
+	 */
+	public function test_full_document_serialize_includes_implied_body_after_head_at_eof( string $input, string $expected_serialized ) {
+		$processor  = WP_HTML_Processor::create_full_parser( $input );
+		$serialized = $processor->serialize();
+
+		$this->assertSame(
+			$expected_serialized,
+			$serialized,
+			'Should have serialized the implied empty BODY element before HTML closes.'
+		);
+
+		$processor = WP_HTML_Processor::create_full_parser( $serialized );
+
+		$this->assertSame(
+			$serialized,
+			$processor->serialize(),
+			'Should have produced idempotent full document serialization.'
+		);
+
+		$processor  = WP_HTML_Processor::create_full_parser( $input );
+		$tag_events = array();
+		while ( $processor->next_token() ) {
+			if ( '#tag' !== $processor->get_token_type() ) {
+				continue;
+			}
+
+			$tag_events[] = array( $processor->is_tag_closer() ? '-' : '+', $processor->get_tag() );
+		}
+
+		$this->assertSame(
+			array(
+				array( '+', 'BODY' ),
+				array( '-', 'BODY' ),
+				array( '-', 'HTML' ),
+			),
+			array_slice( $tag_events, -3 ),
+			'Should visit the implied empty BODY element before closing HTML.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_provider_full_document_serialize_includes_implied_body_after_head_at_eof() {
+		return array(
+			'Closed HEAD at EOF'                 => array(
+				'<!DOCTYPE html><html><head><title>x</title></head>',
+				'<!DOCTYPE html><html><head><title>x</title></head><body></body></html>',
+			),
+			'Unclosed TEMPLATE in HEAD'          => array(
+				'<!DOCTYPE html><html><head><template>x',
+				'<!DOCTYPE html><html><head><template>x</template></head><body></body></html>',
+			),
+			'Unclosed table in TEMPLATE in HEAD' => array(
+				'<html><title>x</title><template><table><tr><td>x',
+				'<html><head><title>x</title><template><table><tbody><tr><td>x</td></tr></tbody></table></template></head><body></body></html>',
+			),
+			'Ignored BODY in TEMPLATE at EOF'    => array(
+				'<template><body>',
+				'<html><head><template></template></head><body></body></html>',
+			),
+			'Ignored BODY closer in NOSCRIPT'    => array(
+				'<noscript></body>',
+				'<html><head><noscript></noscript></head><body></body></html>',
+			),
+		);
+	}
+
+	/**
+	 * Ensures table insertion modes still close open elements at EOF.
+	 *
+	 * @ticket 65372
+	 *
+	 * @dataProvider data_provider_normalize_closes_tables_at_eof
+	 *
+	 * @param string $input    Fragment input ending in a table insertion mode.
+	 * @param string $expected Expected normalized fragment.
+	 */
+	public function test_normalize_closes_tables_at_eof( string $input, string $expected ) {
+		$this->assertSame(
+			$expected,
+			WP_HTML_Processor::normalize( $input ),
+			'Should have closed open table elements at EOF.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_provider_normalize_closes_tables_at_eof() {
+		return array(
+			'Open TABLE' => array(
+				'<table>',
+				'<table></table>',
+			),
+			'Open TBODY' => array(
+				'<table><tbody>',
+				'<table><tbody></tbody></table>',
+			),
+			'Open TR'    => array(
+				'<table><tr>',
+				'<table><tbody><tr></tr></tbody></table>',
+			),
+			'Open TD'    => array(
+				'<table><tr><td>x',
+				'<table><tbody><tr><td>x</td></tr></tbody></table>',
+			),
+		);
+	}
+
 	/**
 	 * Data provider.
 	 *
@@ -340,6 +732,88 @@ public function test_normalize_special_leading_newline_handling( string $input,
 		$this->assertEqualHTML( $expected, $normalized_twice );
 	}
 
+	/**
+	 * Ensures that the special leading newline rule applies only in the HTML namespace.
+	 *
+	 * @ticket 64607
+	 *
+	 * @dataProvider data_provider_special_leading_newline_namespace_serialization
+	 *
+	 * @param string $input    HTML input containing a PRE, LISTING, or TEXTAREA element.
+	 * @param string $expected Expected normalized output.
+	 */
+	public function test_special_leading_newline_rule_depends_on_namespace( string $input, string $expected ) {
+		$normalized = WP_HTML_Processor::normalize( $input );
+		$this->assertSame(
+			$expected,
+			$normalized,
+			'Should serialize special leading newlines according to the element namespace.'
+		);
+		$this->assertSame(
+			$expected,
+			WP_HTML_Processor::normalize( $normalized ),
+			'Normalizing already-normalized special leading newlines should not change them.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_provider_special_leading_newline_namespace_serialization() {
+		return array(
+			'MathML TEXTAREA'                            => array(
+				'<math><textarea>X</textarea></math>',
+				'<math><textarea>X</textarea></math>',
+			),
+			'MathML TEXTAREA with leading newline'       => array(
+				"<math><textarea>\nX</textarea></math>",
+				"<math><textarea>\nX</textarea></math>",
+			),
+			'SVG TEXTAREA'                               => array(
+				'<svg><textarea>X</textarea></svg>',
+				'<svg><textarea>X</textarea></svg>',
+			),
+			'SVG TEXTAREA with leading newline'          => array(
+				"<svg><textarea>\nX</textarea></svg>",
+				"<svg><textarea>\nX</textarea></svg>",
+			),
+			'HTML TEXTAREA inside SVG HTML integration point' => array(
+				'<svg><foreignObject><textarea>X</textarea></foreignObject></svg>',
+				"<svg><foreignObject><textarea>\nX</textarea></foreignObject></svg>",
+			),
+			'HTML TEXTAREA with leading newline inside SVG HTML integration point' => array(
+				"<svg><foreignObject><textarea>\n\nX</textarea></foreignObject></svg>",
+				"<svg><foreignObject><textarea>\n\nX</textarea></foreignObject></svg>",
+			),
+			'HTML TEXTAREA inside MathML text integration point' => array(
+				'<math><mtext><textarea>X</textarea></mtext></math>',
+				"<math><mtext><textarea>\nX</textarea></mtext></math>",
+			),
+			'HTML TEXTAREA with leading newline inside MathML text integration point' => array(
+				"<math><mtext><textarea>\n\nX</textarea></mtext></math>",
+				"<math><mtext><textarea>\n\nX</textarea></mtext></math>",
+			),
+			'HTML TEXTAREA inside MathML HTML integration point' => array(
+				'<math><annotation-xml encoding="text/html"><textarea>X</textarea></annotation-xml></math>',
+				"<math><annotation-xml encoding=\"text/html\"><textarea>\nX</textarea></annotation-xml></math>",
+			),
+			'HTML TEXTAREA with leading newline inside MathML HTML integration point' => array(
+				"<math><annotation-xml encoding=\"text/html\"><textarea>\n\nX</textarea></annotation-xml></math>",
+				"<math><annotation-xml encoding=\"text/html\"><textarea>\n\nX</textarea></annotation-xml></math>",
+			),
+			'HTML PRE after exiting SVG foreign content' => array(
+				'<svg><pre>X</pre></svg>',
+				"<svg></svg><pre>\nX</pre>",
+			),
+			'HTML LISTING after exiting MathML foreign content' => array(
+				'<math><listing>X</listing></math>',
+				"<math></math><listing>\nX</listing>",
+			),
+		);
+	}
+
 	/**
 	 * Ensures that fuzzer-discovered inputs do not emit native PHP errors.
 	 *
@@ -493,6 +967,7 @@ public static function data_provider_normalized_fuzzer_cases_that_should_be_idem
 			'FORM with SVG TITLE text edge'             => array( "<form ><svg ><title \"'></form><form>" ),
 			'FORM with TABLE and SCRIPT'                => array( '<form id><table te"><script></script><td srce" ID/></form><form claslicate">' ),
 			'FORM with TABLE CAPTION'                   => array( '<form><table><caption></form><form >' ),
+			'XMP rawtext with entity-looking text'      => array( '<xmp>apples > oranges &amp; <</xmp>' ),
 			'Short malformed G attribute C'             => array( '<g c/=>' ),
 			'Short malformed G attribute S'             => array( '<g s/=>' ),
 			'Duplicate SRC boundary'                    => array( '<g src=""g src="">' ),
@@ -509,6 +984,161 @@ public static function data_provider_normalized_fuzzer_cases_that_should_be_idem
 		);
 	}
 
+	/**
+	 * Ensures that decoded carriage returns are serialized as character references.
+	 *
+	 * @ticket 65372
+	 *
+	 * @dataProvider data_provider_decoded_carriage_returns
+	 *
+	 * @param string $input    HTML input containing a decoded carriage return.
+	 * @param string $expected Expected normalized output.
+	 */
+	public function test_normalize_serializes_decoded_carriage_returns_as_character_references( string $input, string $expected ) {
+		$normalized = WP_HTML_Processor::normalize( $input );
+
+		$this->assertSame( $expected, $normalized, 'Should have serialized the carriage return as a character reference.' );
+		$this->assertSame(
+			$expected,
+			WP_HTML_Processor::normalize( $normalized ),
+			'Normalizing already-normalized HTML should not change the serialized carriage return.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_provider_decoded_carriage_returns() {
+		return array(
+			'Regular text'    => array( '<p>a&#13;b</p>', '<p>a&#13;b</p>' ),
+			'Regular text with non-canonical character reference' => array( '<p>a&#x0D;b</p>', '<p>a&#13;b</p>' ),
+			'RCDATA title'    => array( '<title>a&#13;b</title>', '<title>a&#13;b</title>' ),
+			'RCDATA textarea with leading-newline preservation' => array( '<textarea>a&#13;b</textarea>', "<textarea>\na&#13;b</textarea>" ),
+			'Attribute value' => array( '<p title="a&#13;b"></p>', '<p title="a&#13;b"></p>' ),
+			'Table text'      => array( '<table><tr><td>x&#13;</td></tr></table>', '<table><tbody><tr><td>x&#13;</td></tr></tbody></table>' ),
+			'Template text'   => array( '<template><p>a&#13;b</p></template>', '<template><p>a&#13;b</p></template>' ),
+		);
+	}
+
+	/**
+	 * Ensures that raw carriage returns in attribute values are serialized as line feeds.
+	 *
+	 * @ticket 65372
+	 *
+	 * @dataProvider data_provider_raw_attribute_carriage_returns
+	 *
+	 * @param string $input    HTML input containing raw carriage returns.
+	 * @param string $expected Expected normalized output.
+	 */
+	public function test_normalize_serializes_raw_attribute_carriage_returns_as_line_feeds( string $input, string $expected ) {
+		$normalized = WP_HTML_Processor::normalize( $input );
+
+		$this->assertSame( $expected, $normalized, 'Should have serialized raw attribute carriage returns as line feeds.' );
+		$this->assertSame(
+			$expected,
+			WP_HTML_Processor::normalize( $normalized ),
+			'Normalizing already-normalized HTML should not change raw attribute newlines.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_provider_raw_attribute_carriage_returns() {
+		return array(
+			'Raw carriage return' => array( "<p title=\"a\rb\"></p>", "<p title=\"a\nb\"></p>" ),
+			'Raw CRLF pair'       => array( "<p title=\"a\r\nb\"></p>", "<p title=\"a\nb\"></p>" ),
+		);
+	}
+
+	/**
+	 * Ensures that raw carriage returns are normalized before class updates are serialized.
+	 *
+	 * @ticket 65372
+	 */
+	public function test_serialize_token_normalizes_raw_class_carriage_returns_before_class_updates() {
+		$processor = WP_HTML_Processor::create_fragment( "<p class=\"a\rb\"></p>" );
+
+		$this->assertTrue( $processor->next_tag( 'P' ), 'Should find the P element.' );
+
+		$processor->add_class( 'c' );
+
+		$serialized = $processor->serialize_token();
+		$this->assertSame(
+			"<p class=\"a\nb c\">",
+			$serialized,
+			'Should have serialized raw class carriage returns as line feeds before adding classes.'
+		);
+
+		$reparsed = WP_HTML_Processor::create_fragment( $serialized );
+		$this->assertTrue( $reparsed->next_tag( 'P' ), 'Should find the reparsed P element.' );
+		$this->assertSame( "a\nb c", $reparsed->get_attribute( 'class' ), 'The serialized class should parse back to the same value.' );
+	}
+
+	/**
+	 * Ensures rawtext element contents serialize without escaping:
+	 * character references do not decode inside SCRIPT and STYLE, so
+	 * escaping their contents or emitting `&#13;` there would corrupt them.
+	 *
+	 * @ticket 65372
+	 *
+	 * @dataProvider data_provider_rawtext_contents
+	 *
+	 * @param string $html HTML whose rawtext contents must serialize unchanged.
+	 */
+	public function test_normalize_preserves_rawtext_contents( string $html ) {
+		$this->assertSame(
+			$html,
+			WP_HTML_Processor::normalize( $html ),
+			'Should have serialized the rawtext contents unchanged.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_provider_rawtext_contents() {
+		return array(
+			'SCRIPT with character references' => array( '<script>a&#13;&amp;b</script>' ),
+			'STYLE with character references'  => array( '<style>a&#13;&amp;b</style>' ),
+		);
+	}
+
+	/**
+	 * Ensures NULL bytes in attribute values set through the API serialize
+	 * as U+FFFD so that serialized output parses back to the same value.
+	 *
+	 * Browsers serialize the raw NULL byte in innerHTML, which does not
+	 * round-trip: re-parsing replaces it with U+FFFD. Serializing U+FFFD
+	 * directly is a benign deviation which keeps output idempotent, like
+	 * serializing decoded carriage returns as &#13;.
+	 *
+	 * @ticket 65372
+	 */
+	public function test_serialize_token_replaces_null_bytes_in_enqueued_attribute_values() {
+		$processor = WP_HTML_Processor::create_fragment( '<p title="x"></p>' );
+
+		$this->assertTrue( $processor->next_tag( 'P' ), 'Should find the P element.' );
+		$this->assertTrue( $processor->set_attribute( 'title', "a\x00b" ), 'Should have set the attribute.' );
+
+		$serialized = $processor->serialize_token();
+		$this->assertSame(
+			"<p title=\"a\u{FFFD}b\">",
+			$serialized,
+			'Should have serialized the NULL byte as U+FFFD.'
+		);
+
+		$reparsed = WP_HTML_Processor::create_fragment( $serialized );
+		$this->assertTrue( $reparsed->next_tag( 'P' ), 'Should find the reparsed P element.' );
+		$this->assertSame( "a\u{FFFD}b", $reparsed->get_attribute( 'title' ), 'The serialized title should parse back to the same value.' );
+	}
+
 	/**
 	 * Data provider.
 	 *
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php
index a89014282df73..66c0bbb3ae1c7 100644
--- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php
@@ -583,6 +583,38 @@ public function test_expects_closer_foreign_content_self_closing() {
 		$this->assertTrue( $processor->expects_closer() );
 	}
 
+	/**
+	 * Ensures a trailing slash in an unquoted attribute value does not close foreign content.
+	 *
+	 * @ticket 61576
+	 */
+	public function test_trailing_slash_in_unquoted_attribute_value_does_not_self_close_foreign_content() {
+		$processor = WP_HTML_Processor::create_fragment( '<math><mi disabled=abc/>text</math>' );
+
+		$this->assertTrue( $processor->next_tag( 'MI' ), 'Could not find MI tag: check test setup.' );
+		$this->assertSame(
+			'abc/',
+			$processor->get_attribute( 'disabled' ),
+			'Trailing slash in unquoted attribute value should belong to the attribute value.'
+		);
+		$this->assertFalse(
+			$processor->has_self_closing_flag(),
+			'Trailing slash in unquoted attribute value should not be interpreted as a self-closing flag.'
+		);
+		$this->assertTrue(
+			$processor->expects_closer(),
+			'MI with a trailing slash in an unquoted attribute value should still expect a closer.'
+		);
+
+		$this->assertTrue( $processor->next_token(), 'Could not find text following MI tag: check test setup.' );
+		$this->assertSame( '#text', $processor->get_token_name(), 'Should have found the text node following the MI tag.' );
+		$this->assertSame(
+			array( 'HTML', 'BODY', 'MATH', 'MI', '#text' ),
+			$processor->get_breadcrumbs(),
+			'Text following the MI tag should remain inside the MI element.'
+		);
+	}
+
 	/**
 	 * Ensures that expects_closer works for void-like elements in foreign content.
 	 *
@@ -851,6 +883,51 @@ public function test_adjusts_for_html_integration_points_in_svg() {
 		);
 	}
 
+	/**
+	 * Ensures that CDATA sections remain available inside SVG HTML integration points.
+	 *
+	 * @ticket 61576
+	 */
+	public function test_cdata_sections_in_svg_html_integration_points() {
+		$processor = WP_HTML_Processor::create_fragment(
+			'<svg><foreignObject><![CDATA[foo]]></foreignObject></svg>'
+		);
+
+		$this->assertTrue(
+			$processor->next_tag( 'foreignObject' ),
+			'Failed to find "foreignObject" under test: check test setup.'
+		);
+
+		$this->assertSame(
+			'svg',
+			$processor->get_namespace(),
+			'Found the wrong namespace for the "foreignObject" element.'
+		);
+
+		$this->assertTrue(
+			$processor->next_token(),
+			'Failed to find expected CDATA section.'
+		);
+
+		$this->assertSame(
+			'#cdata-section',
+			$processor->get_token_name(),
+			"Should have found a CDATA section but found {$processor->get_token_name()} instead."
+		);
+
+		$this->assertSame(
+			'svg',
+			$processor->get_namespace(),
+			'Found the wrong namespace for the CDATA section.'
+		);
+
+		$this->assertSame(
+			'foo',
+			$processor->get_modifiable_text(),
+			'Found incorrect CDATA content.'
+		);
+	}
+
 	/**
 	 * Ensures that the processor correctly adjusts the namespace
 	 * for elements inside MathML integration points.
@@ -911,6 +988,51 @@ public function test_adjusts_for_mathml_integration_points() {
 		);
 	}
 
+	/**
+	 * Ensures that CDATA sections remain available inside MathML HTML integration points.
+	 *
+	 * @ticket 61576
+	 */
+	public function test_cdata_sections_in_mathml_html_integration_points() {
+		$processor = WP_HTML_Processor::create_fragment(
+			'<math><annotation-xml encoding="text/html"><![CDATA[x]]></annotation-xml></math>'
+		);
+
+		$this->assertTrue(
+			$processor->next_tag( 'ANNOTATION-XML' ),
+			'Failed to find "annotation-xml" under test: check test setup.'
+		);
+
+		$this->assertSame(
+			'math',
+			$processor->get_namespace(),
+			'Found the wrong namespace for the "annotation-xml" element.'
+		);
+
+		$this->assertTrue(
+			$processor->next_token(),
+			'Failed to find expected CDATA section.'
+		);
+
+		$this->assertSame(
+			'#cdata-section',
+			$processor->get_token_name(),
+			"Should have found a CDATA section but found {$processor->get_token_name()} instead."
+		);
+
+		$this->assertSame(
+			'math',
+			$processor->get_namespace(),
+			'Found the wrong namespace for the CDATA section.'
+		);
+
+		$this->assertSame(
+			'x',
+			$processor->get_modifiable_text(),
+			'Found incorrect CDATA content.'
+		);
+	}
+
 	/**
 	 * Ensures that the processor stops correctly on a FORM tag closer token.
 	 *
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
index 911fa8b910b37..2c18a10a94712 100644
--- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
@@ -418,6 +418,459 @@ public function test_remains_stable_when_editing_attributes() {
 		);
 	}
 
+	/**
+	 * Ensures that HTML elements inside MathML text integration points retain
+	 * the full path to their MathML parent.
+	 *
+	 * @ticket 61576
+	 *
+	 * @covers WP_HTML_Processor::get_breadcrumbs
+	 * @covers WP_HTML_Processor::get_namespace
+	 */
+	public function test_reports_nested_anchor_breadcrumbs_inside_mathml_text_integration_point() {
+		$processor = WP_HTML_Processor::create_fragment( '<a><math><mi>x<a>y' );
+
+		$this->assertTrue( $processor->next_tag( 'A' ), 'Failed to find the outer A element.' );
+		$this->assertTrue( $processor->next_tag( 'A' ), 'Failed to find the inner A element.' );
+
+		$this->assertSame(
+			array( 'HTML', 'BODY', 'A', 'MATH', 'MI', 'A' ),
+			$processor->get_breadcrumbs(),
+			'The inner A element should remain nested inside the MathML MI element.'
+		);
+
+		$this->assertSame(
+			'html',
+			$processor->get_namespace(),
+			'The inner A element should be an HTML element inside the MathML text integration point.'
+		);
+
+		$this->assertTrue( $processor->next_token(), 'Failed to find the text following the inner A element.' );
+
+		$this->assertSame(
+			'#text',
+			$processor->get_token_name(),
+			'The inner A element should contain the following text node.'
+		);
+
+		$this->assertSame(
+			array( 'HTML', 'BODY', 'A', 'MATH', 'MI', 'A', '#text' ),
+			$processor->get_breadcrumbs(),
+			'The text after the inner A element should remain nested inside that A element.'
+		);
+
+		$this->assertSame(
+			'y',
+			$processor->get_modifiable_text(),
+			'The inner A element should contain the expected text.'
+		);
+	}
+
+	/**
+	 * Ensures that an outer A element removed from the stack of open elements
+	 * is removed from breadcrumbs after its existing child subtree closes.
+	 *
+	 * @ticket 61576
+	 *
+	 * @covers WP_HTML_Processor::get_breadcrumbs
+	 * @covers WP_HTML_Processor::matches_breadcrumbs
+	 */
+	public function test_removes_outer_anchor_breadcrumb_after_mathml_text_integration_point_closes() {
+		$processor = WP_HTML_Processor::create_fragment( '<a><math><mi>x<a>y</a></mi></math>z<span target>t' );
+
+		$this->assertTrue( $processor->next_tag( 'SPAN' ), 'Failed to find the SPAN element after the MathML subtree.' );
+
+		$this->assertSame(
+			array( 'HTML', 'BODY', 'SPAN' ),
+			$processor->get_breadcrumbs(),
+			'The SPAN element after the MathML subtree should not remain nested inside the removed outer A element.'
+		);
+
+		$this->assertFalse(
+			$processor->matches_breadcrumbs( array( 'A', 'SPAN' ) ),
+			'The SPAN element should not match breadcrumbs inside the removed outer A element.'
+		);
+	}
+
+	/**
+	 * Ensures that a removed outer A element's breadcrumb is not confused with
+	 * a same-named foreign element between it and the integration point.
+	 *
+	 * Foreign A elements never participate in the active formatting elements,
+	 * so the removed node is the outer HTML A element, not the foreign one.
+	 *
+	 * @ticket 61576
+	 *
+	 * @covers WP_HTML_Processor::get_breadcrumbs
+	 * @covers WP_HTML_Processor::matches_breadcrumbs
+	 *
+	 * @dataProvider data_intervening_foreign_anchor_html
+	 *
+	 * @param string $html HTML with a foreign A element between the removed outer A element and the integration point.
+	 */
+	public function test_removes_outer_anchor_breadcrumb_with_intervening_foreign_anchor( string $html ) {
+		$processor = WP_HTML_Processor::create_fragment( $html );
+
+		$this->assertTrue( $processor->next_tag( 'SPAN' ), 'Failed to find the SPAN element after the foreign subtree.' );
+
+		$this->assertSame(
+			array( 'HTML', 'BODY', 'SPAN' ),
+			$processor->get_breadcrumbs(),
+			'The SPAN element after the foreign subtree should not remain nested inside the removed outer A element.'
+		);
+
+		$this->assertFalse(
+			$processor->matches_breadcrumbs( array( 'A', 'SPAN' ) ),
+			'The SPAN element should not match breadcrumbs inside the removed outer A element.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_intervening_foreign_anchor_html() {
+		return array(
+			'MathML A before text integration point' => array( '<a><math><a><mtext>x<a>y</a></mtext></a></math>z<span>t' ),
+			'SVG A before integration point'         => array( '<a><svg><a><foreignObject>x<a>y</a></foreignObject></a></svg>z<span>t' ),
+		);
+	}
+
+	/**
+	 * Ensures that an HTML heading end tag inside a MathML text integration
+	 * point is ignored, so following content stays inside the integration point.
+	 *
+	 * The `</h2>` is dispatched through the foreign-content rules, which walk up
+	 * to the HTML-namespace `H2` and hand off to the "in body" heading end-tag
+	 * steps. Those require the heading to be in scope, but a MathML text
+	 * integration point (`MI`) is a scope boundary, so `H2` is not in scope and
+	 * the end tag is dropped. The following `<x-0>` is therefore inserted into
+	 * `MI` rather than becoming a sibling of `H2`.
+	 *
+	 * This matches the HTML specification and browsers (verified against
+	 * Chromium); PHP's `Dom\HTMLDocument` reparents `<x-0>` out of `MI`, which is
+	 * a limitation of that parser, not of the HTML API.
+	 *
+	 * @see https://software.hixie.ch/utilities/js/live-dom-viewer/?%3Ch2%3E%3Cmath%3E%3Cmi%3Ea%3C%2Fh2%3E%3Cx-0%3Eb%3C%2Fx-0%3E
+	 *
+	 * @ticket 61576
+	 *
+	 * @covers WP_HTML_Processor::get_breadcrumbs
+	 */
+	public function test_heading_end_tag_in_mathml_text_integration_point_is_ignored() {
+		$processor = WP_HTML_Processor::create_fragment( '<h2><math><mi>a</h2><x-0>b</x-0>' );
+
+		$this->assertTrue( $processor->next_tag( 'X-0' ), 'Failed to find the X-0 element following the ignored heading end tag.' );
+
+		$this->assertSame(
+			array( 'HTML', 'BODY', 'H2', 'MATH', 'MI', 'X-0' ),
+			$processor->get_breadcrumbs(),
+			'The X-0 element should remain inside the MathML MI text integration point because the </h2> end tag is not in scope and is ignored.'
+		);
+	}
+
+	/**
+	 * Ensures that an outer A element removed from the stack of open elements
+	 * remains visitable as a virtual closer after its existing child subtree closes.
+	 *
+	 * @ticket 61576
+	 *
+	 * @covers WP_HTML_Processor::get_breadcrumbs
+	 * @covers WP_HTML_Processor::is_tag_closer
+	 */
+	public function test_visits_outer_anchor_virtual_closer_after_mathml_text_integration_point_closes() {
+		$processor = WP_HTML_Processor::create_fragment( '<a><math><mi>x<a>y</a></mi></math>z' );
+
+		$this->assertTrue(
+			$processor->next_tag(
+				array(
+					'tag_name'    => 'A',
+					'tag_closers' => 'visit',
+				)
+			),
+			'Failed to find the outer A opener.'
+		);
+
+		$this->assertFalse( $processor->is_tag_closer(), 'The first A should be the outer A opener.' );
+
+		$this->assertTrue(
+			$processor->next_tag(
+				array(
+					'tag_name'    => 'A',
+					'tag_closers' => 'visit',
+				)
+			),
+			'Failed to find the inner A opener.'
+		);
+
+		$this->assertFalse( $processor->is_tag_closer(), 'The second A should be the inner A opener.' );
+
+		$this->assertSame(
+			array( 'HTML', 'BODY', 'A', 'MATH', 'MI', 'A' ),
+			$processor->get_breadcrumbs(),
+			'The inner A opener should remain nested inside the MathML MI element.'
+		);
+
+		$this->assertTrue(
+			$processor->next_tag(
+				array(
+					'tag_name'    => 'A',
+					'tag_closers' => 'visit',
+				)
+			),
+			'Failed to find the inner A closer.'
+		);
+
+		$this->assertTrue( $processor->is_tag_closer(), 'The third A should be the inner A closer.' );
+
+		$this->assertSame(
+			array( 'HTML', 'BODY', 'A', 'MATH', 'MI' ),
+			$processor->get_breadcrumbs(),
+			'The inner A closer should report its parent breadcrumbs.'
+		);
+
+		$this->assertTrue(
+			$processor->next_tag(
+				array(
+					'tag_name'    => 'A',
+					'tag_closers' => 'visit',
+				)
+			),
+			'Failed to find the virtual outer A closer.'
+		);
+
+		$this->assertTrue( $processor->is_tag_closer(), 'The fourth A should be the virtual outer A closer.' );
+
+		$this->assertSame(
+			array( 'HTML', 'BODY' ),
+			$processor->get_breadcrumbs(),
+			'The virtual outer A closer should report its parent breadcrumbs.'
+		);
+	}
+
+	/**
+	 * Ensures that the removed outer A element's virtual closer is visited
+	 * before a new same-name opener immediately following the subtree.
+	 *
+	 * This is the one input where the adjusted-current-node guard and the
+	 * same-name next-event lookahead in the virtual-closer queueing must
+	 * cooperate: the new A opener shares the removed element's tag name, but
+	 * the virtual closer must still fire first so the new element opens as a
+	 * sibling, not a child.
+	 *
+	 * @ticket 61576
+	 *
+	 * @covers WP_HTML_Processor::get_breadcrumbs
+	 * @covers WP_HTML_Processor::is_tag_closer
+	 */
+	public function test_visits_outer_anchor_virtual_closer_before_same_name_opener() {
+		$processor = WP_HTML_Processor::create_fragment( '<a><math><mi>x<a>y</a></mi></math><a>z' );
+
+		$visits = array();
+		while ( $processor->next_tag(
+			array(
+				'tag_name'    => 'A',
+				'tag_closers' => 'visit',
+			)
+		) ) {
+			$visits[] = array(
+				$processor->is_tag_closer() ? 'closer' : 'opener',
+				$processor->get_breadcrumbs(),
+			);
+		}
+
+		$this->assertSame(
+			array(
+				array( 'opener', array( 'HTML', 'BODY', 'A' ) ),
+				array( 'opener', array( 'HTML', 'BODY', 'A', 'MATH', 'MI', 'A' ) ),
+				array( 'closer', array( 'HTML', 'BODY', 'A', 'MATH', 'MI' ) ),
+				array( 'closer', array( 'HTML', 'BODY' ) ),
+				array( 'opener', array( 'HTML', 'BODY', 'A' ) ),
+				array( 'closer', array( 'HTML', 'BODY' ) ),
+			),
+			$visits,
+			'Expected the removed outer A virtual closer to be visited before the new same-name A opener.'
+		);
+	}
+
+	/**
+	 * Ensures that an outer A element removed from the stack of open elements
+	 * remains visitable as a virtual closer when the fragment ends inside its
+	 * existing child subtree.
+	 *
+	 * @ticket 61576
+	 *
+	 * @covers WP_HTML_Processor::get_breadcrumbs
+	 * @covers WP_HTML_Processor::is_tag_closer
+	 */
+	public function test_visits_outer_anchor_virtual_closer_at_end_of_fragment() {
+		$processor = WP_HTML_Processor::create_fragment( '<a><math><mi>x<a>y' );
+
+		$this->assertTrue(
+			$processor->next_tag(
+				array(
+					'tag_name'    => 'A',
+					'tag_closers' => 'visit',
+				)
+			),
+			'Failed to find the outer A opener.'
+		);
+
+		$this->assertFalse( $processor->is_tag_closer(), 'The first A should be the outer A opener.' );
+
+		$this->assertTrue(
+			$processor->next_tag(
+				array(
+					'tag_name'    => 'A',
+					'tag_closers' => 'visit',
+				)
+			),
+			'Failed to find the inner A opener.'
+		);
+
+		$this->assertFalse( $processor->is_tag_closer(), 'The second A should be the inner A opener.' );
+
+		$this->assertSame(
+			array( 'HTML', 'BODY', 'A', 'MATH', 'MI', 'A' ),
+			$processor->get_breadcrumbs(),
+			'The inner A opener should remain nested inside the MathML MI element.'
+		);
+
+		$this->assertTrue(
+			$processor->next_tag(
+				array(
+					'tag_name'    => 'A',
+					'tag_closers' => 'visit',
+				)
+			),
+			'Failed to find the inner A closer.'
+		);
+
+		$this->assertTrue( $processor->is_tag_closer(), 'The third A should be the inner A closer.' );
+
+		$this->assertSame(
+			array( 'HTML', 'BODY', 'A', 'MATH', 'MI' ),
+			$processor->get_breadcrumbs(),
+			'The inner A closer should report its parent breadcrumbs.'
+		);
+
+		$this->assertTrue(
+			$processor->next_tag(
+				array(
+					'tag_name'    => 'A',
+					'tag_closers' => 'visit',
+				)
+			),
+			'Failed to find the virtual outer A closer.'
+		);
+
+		$this->assertTrue( $processor->is_tag_closer(), 'The fourth A should be the virtual outer A closer.' );
+
+		$this->assertSame(
+			array( 'HTML', 'BODY' ),
+			$processor->get_breadcrumbs(),
+			'The virtual outer A closer should report its parent breadcrumbs.'
+		);
+	}
+
+	/**
+	 * Ensures that an outer A element removed from the stack of open elements
+	 * remains visitable as a virtual closer before full-parser EOF closers.
+	 *
+	 * @ticket 61576
+	 *
+	 * @covers WP_HTML_Processor::get_breadcrumbs
+	 * @covers WP_HTML_Processor::is_tag_closer
+	 */
+	public function test_visits_outer_anchor_virtual_closer_before_full_parser_eof_closers() {
+		$processor = WP_HTML_Processor::create_full_parser( '<!DOCTYPE html><a><math><mi>x<a>y' );
+
+		$this->assertTrue(
+			$processor->next_tag(
+				array(
+					'tag_name'    => 'A',
+					'tag_closers' => 'visit',
+				)
+			),
+			'Failed to find the outer A opener.'
+		);
+
+		$this->assertFalse( $processor->is_tag_closer(), 'The first A should be the outer A opener.' );
+
+		$this->assertTrue(
+			$processor->next_tag(
+				array(
+					'tag_name'    => 'A',
+					'tag_closers' => 'visit',
+				)
+			),
+			'Failed to find the inner A opener.'
+		);
+
+		$this->assertFalse( $processor->is_tag_closer(), 'The second A should be the inner A opener.' );
+
+		$this->assertSame(
+			array( 'HTML', 'BODY', 'A', 'MATH', 'MI', 'A' ),
+			$processor->get_breadcrumbs(),
+			'The inner A opener should remain nested inside the MathML MI element.'
+		);
+
+		$this->assertTrue(
+			$processor->next_tag(
+				array(
+					'tag_name'    => 'A',
+					'tag_closers' => 'visit',
+				)
+			),
+			'Failed to find the inner A closer.'
+		);
+
+		$this->assertTrue( $processor->is_tag_closer(), 'The third A should be the inner A closer.' );
+
+		$this->assertSame(
+			array( 'HTML', 'BODY', 'A', 'MATH', 'MI' ),
+			$processor->get_breadcrumbs(),
+			'The inner A closer should report its parent breadcrumbs.'
+		);
+
+		$this->assertTrue(
+			$processor->next_tag(
+				array(
+					'tag_name'    => 'A',
+					'tag_closers' => 'visit',
+				)
+			),
+			'Failed to find the virtual outer A closer.'
+		);
+
+		$this->assertTrue( $processor->is_tag_closer(), 'The fourth A should be the virtual outer A closer.' );
+
+		$this->assertSame(
+			array( 'HTML', 'BODY' ),
+			$processor->get_breadcrumbs(),
+			'The virtual outer A closer should report its parent breadcrumbs.'
+		);
+
+		$this->assertTrue(
+			$processor->next_tag(
+				array(
+					'tag_name'    => 'BODY',
+					'tag_closers' => 'visit',
+				)
+			),
+			'Failed to find the full-parser BODY closer.'
+		);
+
+		$this->assertTrue( $processor->is_tag_closer(), 'The BODY token should be a closer.' );
+
+		$this->assertSame(
+			array( 'HTML' ),
+			$processor->get_breadcrumbs(),
+			'The BODY closer should not consume the stale outer A breadcrumb.'
+		);
+	}
+
 	/**
 	 * Ensures that the ability to set attributes isn't broken by the HTML Processor.
 	 *
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
index a03a9ab806a93..6431dd9b6678a 100644
--- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
@@ -178,6 +178,10 @@ private static function build_tree_representation( ?string $fragment_context, st
 			$token_type = $processor->get_token_type();
 			$is_closer  = $processor->is_tag_closer();
 
+			if ( '#presumptuous-tag' === $token_type ) {
+				continue;
+			}
+
 			if ( $was_text && '#text' !== $token_name ) {
 				if ( '' !== $text_node ) {
 					$output .= "{$text_node}\"\n";
@@ -317,11 +321,16 @@ static function ( $a, $b ) {
 			}
 		}
 
-		if ( null !== $processor->get_unsupported_exception() ) {
-			throw $processor->get_unsupported_exception();
+		$unsupported_exception = $processor->get_unsupported_exception();
+		$ignored_presumptuous_tag_exception = null !== $unsupported_exception
+			&& '#presumptuous-tag' === $unsupported_exception->token_name
+			&& '</>' === $unsupported_exception->token
+			&& 'Content outside of HTML is unsupported.' === $unsupported_exception->getMessage();
+		if ( null !== $unsupported_exception && ! $ignored_presumptuous_tag_exception ) {
+			throw $unsupported_exception;
 		}
 
-		if ( null !== $processor->get_last_error() ) {
+		if ( ! $ignored_presumptuous_tag_exception && null !== $processor->get_last_error() ) {
 			throw new WP_HTML_Unsupported_Exception( "Parser error: {$processor->get_last_error()}", '', 0, '', array(), array() );
 		}
 
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php
index ffc99ad58fd8e..b8cfbe36d2208 100644
--- a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php
@@ -405,6 +405,66 @@ public function test_in_body_any_other_end_tag_with_unclosed_non_special_element
 		$this->assertSame( array( 'HTML', 'BODY', 'DIV', 'DIV' ), $processor->get_breadcrumbs(), 'Failed to produce expected DOM nesting: SPAN should be closed and DIV should be its sibling.' );
 	}
 
+	/**
+	 * Verifies that when the adoption agency algorithm finds no matching
+	 * active formatting element, it acts like "any other end tag".
+	 *
+	 * @covers WP_HTML_Processor::step_in_body
+	 *
+	 * @ticket 65372
+	 *
+	 * @dataProvider data_in_body_adoption_agency_falls_back_to_any_other_end_tag
+	 *
+	 * @param string $formatting_tag_name Formatting tag name with no active formatting element.
+	 */
+	public function test_in_body_adoption_agency_falls_back_to_any_other_end_tag( string $formatting_tag_name ) {
+		$processor = WP_HTML_Processor::create_fragment( "<div><span></{$formatting_tag_name}><code target></code></span></div>" );
+
+		$processor->next_tag( 'SPAN' );
+		$this->assertSame( 'SPAN', $processor->get_tag(), "Expected to start test on SPAN element but found {$processor->get_tag()} instead." );
+		$this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN' ), $processor->get_breadcrumbs(), 'Failed to produce expected DOM nesting.' );
+
+		$this->assertTrue( $processor->next_tag( 'CODE' ), "Failed to ignore unexpected {$formatting_tag_name} closer and advance to CODE opener." );
+		$this->assertSame( 'CODE', $processor->get_tag(), "Expected to find CODE element, but found {$processor->get_tag()} instead." );
+		$this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN', 'CODE' ), $processor->get_breadcrumbs(), 'Failed to keep SPAN open after unexpected formatting closer.' );
+	}
+
+	/**
+	 * Verifies that the adoption agency fallback preserves the "any other end tag"
+	 * step result when the ignored token is followed by EOF.
+	 *
+	 * @covers WP_HTML_Processor::step_in_body
+	 *
+	 * @ticket 65372
+	 *
+	 * @dataProvider data_in_body_adoption_agency_falls_back_to_any_other_end_tag
+	 *
+	 * @param string $formatting_tag_name Formatting tag name with no active formatting element.
+	 */
+	public function test_in_body_adoption_agency_fallback_preserves_any_other_end_tag_step_result( string $formatting_tag_name ) {
+		$ordinary_processor = WP_HTML_Processor::create_fragment( '<span></x>' );
+		$this->assertTrue( $ordinary_processor->step(), 'Failed to find the SPAN opener before an ordinary unexpected end tag.' );
+		$this->assertSame( 'SPAN', $ordinary_processor->get_tag(), "Expected to start test on SPAN element but found {$ordinary_processor->get_tag()} instead." );
+		$this->assertFalse( $ordinary_processor->step(), 'Expected ordinary unexpected end tag followed by EOF to return false.' );
+
+		$formatting_processor = WP_HTML_Processor::create_fragment( "<span></{$formatting_tag_name}>" );
+		$this->assertTrue( $formatting_processor->step(), 'Failed to find the SPAN opener before an unexpected formatting end tag.' );
+		$this->assertSame( 'SPAN', $formatting_processor->get_tag(), "Expected to start test on SPAN element but found {$formatting_processor->get_tag()} instead." );
+		$this->assertFalse( $formatting_processor->step(), 'Expected unexpected formatting end tag followed by EOF to return false.' );
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_in_body_adoption_agency_falls_back_to_any_other_end_tag() {
+		return array(
+			'Unexpected A end tag' => array( 'a' ),
+			'Unexpected B end tag' => array( 'b' ),
+		);
+	}
+
 	/**
 	 * Ensures that closing `</br>` tags are appropriately treated as opening tags with no attributes.
 	 *
diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-input-preprocessing.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-input-preprocessing.php
new file mode 100644
index 0000000000000..1180fa7110c88
--- /dev/null
+++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-input-preprocessing.php
@@ -0,0 +1,428 @@
+<?php
+/**
+ * Unit tests covering WP_HTML_Tag_Processor input-stream preprocessing
+ * at its read boundaries.
+ *
+ * The HTML specification's "preprocessing the input stream" step (newline
+ * normalization) and the tokenizer's U+0000 NULL replacements are deferred
+ * by the Tag Processor while scanning and must be applied wherever parsed
+ * values are read out of the input document.
+ *
+ * @package WordPress
+ * @subpackage HTML-API
+ *
+ * @since 7.1.0
+ *
+ * @group html-api
+ *
+ * @coversDefaultClass WP_HTML_Tag_Processor
+ */
+class Tests_HtmlApi_WpHtmlTagProcessor_InputPreprocessing extends WP_UnitTestCase {
+	/**
+	 * Ensures that `get_attribute()` applies input-stream preprocessing and
+	 * tokenizer replacements to attribute values found in the input document.
+	 *
+	 * Newlines are normalized (CRLF → LF, CR → LF) and U+0000 NULL is replaced
+	 * with U+FFFD before character references decode, so `&#13;` produces a
+	 * real carriage return and `&#0;` produces U+FFFD. Browser-verified.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_attribute
+	 *
+	 * @dataProvider data_attribute_values_with_preprocessing
+	 *
+	 * @param string $html     HTML containing a tag with attribute `a`.
+	 * @param string $expected Expected attribute value after preprocessing and decoding.
+	 */
+	public function test_get_attribute_applies_input_preprocessing( string $html, string $expected ) {
+		$processor = new WP_HTML_Tag_Processor( $html );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertSame( $expected, $processor->get_attribute( 'a' ) );
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_attribute_values_with_preprocessing() {
+		return array(
+			'Raw CR'                      => array( "<div a='x\ry'>", "x\ny" ),
+			'Raw CRLF'                    => array( "<div a='x\r\ny'>", "x\ny" ),
+			'Raw CR then CRLF'            => array( "<div a='x\r\r\ny'>", "x\n\ny" ),
+			'Double-quoted raw CR'        => array( "<div a=\"x\ry\">", "x\ny" ),
+			'NULL byte'                   => array( "<div a='x\x00y'>", "x\u{FFFD}y" ),
+			'NULL byte unquoted'          => array( "<div a=x\x00y>", "x\u{FFFD}y" ),
+			'Encoded CR is preserved'     => array( "<div a='x&#13;y'>", "x\ry" ),
+			'Encoded NULL becomes U+FFFD' => array( "<div a='x&#0;y'>", "x\u{FFFD}y" ),
+			'Raw CR before encoded CR'    => array( "<div a='x\r&#13;y'>", "x\n\ry" ),
+			'Raw CR and NULL byte'        => array( "<div a='x\r\x00y'>", "x\n\u{FFFD}y" ),
+			'Named reference before NULL' => array( "<div a='x&amp\x00;y'>", "x&\u{FFFD};y" ),
+			'Named reference before CR'   => array( "<div a='x&amp\ry'>", "x&\ny" ),
+		);
+	}
+
+	/**
+	 * Ensures that values enqueued through `set_attribute()` are returned verbatim.
+	 *
+	 * Input-stream preprocessing applies only to the input document. API-supplied
+	 * values are plaintext, equivalent to DOM `setAttribute()`, which performs
+	 * no replacements. Browser-verified.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_attribute
+	 *
+	 * @dataProvider data_enqueued_attribute_values
+	 *
+	 * @param string $value Plaintext attribute value to set and expect back unchanged.
+	 */
+	public function test_get_attribute_returns_enqueued_values_verbatim( string $value ) {
+		$processor = new WP_HTML_Tag_Processor( '<div a="original">' );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertTrue( $processor->set_attribute( 'a', $value ), 'Should have enqueued the attribute update.' );
+		$this->assertSame( $value, $processor->get_attribute( 'a' ) );
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_enqueued_attribute_values() {
+		return array(
+			'Carriage return' => array( "x\ry" ),
+			'CRLF'            => array( "x\r\ny" ),
+			'NULL byte'       => array( "x\x00y" ),
+		);
+	}
+
+	/**
+	 * Ensures the existing class attribute value is preprocessed when enqueued
+	 * class updates are flushed into an attribute update.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::add_class
+	 *
+	 * @dataProvider data_class_updates_with_preprocessing
+	 *
+	 * @param string $html          HTML containing a tag with a class attribute.
+	 * @param string $expected_html Expected document after adding a class.
+	 */
+	public function test_class_updates_apply_input_preprocessing_to_existing_value( string $html, string $expected_html ) {
+		$processor = new WP_HTML_Tag_Processor( $html );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertTrue( $processor->add_class( 'added' ), 'Should have enqueued the class addition.' );
+		$this->assertSame( $expected_html, $processor->get_updated_html() );
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_class_updates_with_preprocessing() {
+		return array(
+			'Raw CR'                      => array( "<div class='a\rb'>", "<div class=\"a\nb added\">" ),
+			'Raw CRLF'                    => array( "<div class='a\r\nb'>", "<div class=\"a\nb added\">" ),
+			'NULL byte'                   => array( "<div class='a\x00b'>", "<div class=\"a\u{FFFD}b added\">" ),
+			'Named reference before NULL' => array( "<div class='&not\x00x'>", "<div class=\"\u{AC}\u{FFFD}x added\">" ),
+		);
+	}
+
+	/**
+	 * Ensures attribute names containing NULL bytes are exposed with U+FFFD and
+	 * are addressable only by their replaced name, as browsers expose them.
+	 *
+	 * Browser-verified: `getAttribute("da\u{FFFD}ta")` finds the attribute
+	 * parsed from `da\x00ta`; `getAttribute("da\x00ta")` does not.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_attribute
+	 * @covers ::get_attribute_names_with_prefix
+	 */
+	public function test_attribute_names_replace_null_bytes() {
+		$processor = new WP_HTML_Tag_Processor( "<div da\x00ta='1'>" );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertSame( array( "da\u{FFFD}ta" ), $processor->get_attribute_names_with_prefix( '' ) );
+		$this->assertSame( '1', $processor->get_attribute( "da\u{FFFD}ta" ), 'Should have found the attribute by its replaced name.' );
+		$this->assertNull( $processor->get_attribute( "da\x00ta" ), 'Should not have found the attribute by its raw source name.' );
+
+		$processor = new WP_HTML_Tag_Processor( "<div DA\x00TA='1'>" );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertSame( array( "da\u{FFFD}ta" ), $processor->get_attribute_names_with_prefix( '' ), 'Should have lowercased the name around the replacement character.' );
+	}
+
+	/**
+	 * Ensures attribute names which collapse to the same name after NULL-byte
+	 * replacement are duplicates of one attribute: the first in document order
+	 * provides the value and removal removes every collapsed copy.
+	 *
+	 * Browser-verified: `<div da\x00ta="1" da\u{FFFD}ta="2">` produces a single
+	 * attribute `da\u{FFFD}ta` with value "1".
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_attribute
+	 * @covers ::remove_attribute
+	 */
+	public function test_attribute_names_collapsing_after_null_replacement_are_duplicates() {
+		$processor = new WP_HTML_Tag_Processor( "<div da\x00ta='1' da\u{FFFD}ta='2'>" );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertSame( array( "da\u{FFFD}ta" ), $processor->get_attribute_names_with_prefix( '' ) );
+		$this->assertSame( '1', $processor->get_attribute( "da\u{FFFD}ta" ), 'First duplicate should provide the value.' );
+
+		$this->assertTrue( $processor->remove_attribute( "da\u{FFFD}ta" ), 'Should have removed the attribute.' );
+		$this->assertSame( '<div  >', $processor->get_updated_html(), 'Should have removed all duplicates of the attribute.' );
+	}
+
+	/**
+	 * Ensures setting an attribute by its U+FFFD-replaced name updates the
+	 * source attribute whose raw name contains a NULL byte instead of adding
+	 * a second attribute.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::set_attribute
+	 */
+	public function test_set_attribute_updates_attribute_with_null_byte_in_source_name() {
+		$processor = new WP_HTML_Tag_Processor( "<div da\x00ta='old'>" );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertTrue( $processor->set_attribute( "da\u{FFFD}ta", 'new' ), 'Should have set the attribute.' );
+		$this->assertSame( "<div da\u{FFFD}ta=\"new\">", $processor->get_updated_html() );
+	}
+
+	/**
+	 * Ensures tag names containing NULL bytes are exposed with U+FFFD,
+	 * matching the tokenizer's tag-name-state replacement in browsers.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_tag
+	 * @covers ::get_token_name
+	 */
+	public function test_get_tag_replaces_null_bytes() {
+		$processor = new WP_HTML_Tag_Processor( "<di\x00v>x</di\x00v>" );
+
+		$this->assertTrue( $processor->next_token(), 'Should have found the tag opener.' );
+		$this->assertSame( "DI\u{FFFD}V", $processor->get_tag() );
+		$this->assertSame( "DI\u{FFFD}V", $processor->get_token_name() );
+
+		$this->assertTrue( $processor->next_token(), 'Should have found the text node.' );
+		$this->assertSame( 'x', $processor->get_modifiable_text() );
+
+		$this->assertTrue( $processor->next_token(), 'Should have found the tag closer.' );
+		$this->assertTrue( $processor->is_tag_closer(), 'Should have matched the tag closer.' );
+		$this->assertSame( "DI\u{FFFD}V", $processor->get_tag() );
+	}
+
+	/**
+	 * Ensures NULL bytes in tag names do not affect special-element detection:
+	 * `<scr\x00ipt>` is not SCRIPT and does not switch into rawtext parsing,
+	 * in browsers or here. Internal identification uses raw bytes.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_tag
+	 */
+	public function test_null_byte_in_tag_name_does_not_select_rawtext_parsing() {
+		$processor = new WP_HTML_Tag_Processor( "<scr\x00ipt><b></b></scr\x00ipt>" );
+
+		$this->assertTrue( $processor->next_token(), 'Should have found the tag opener.' );
+		$this->assertSame( "SCR\u{FFFD}IPT", $processor->get_tag() );
+
+		$this->assertTrue( $processor->next_token(), 'Should have found the B tag, not raw text.' );
+		$this->assertSame( 'B', $processor->get_tag() );
+	}
+
+	/**
+	 * Ensures NULL bytes cannot appear in PI-lookalike comment tag names,
+	 * whose targets are restricted to ASCII name characters.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_tag
+	 */
+	public function test_pi_lookalike_target_stops_before_null_byte() {
+		$processor = new WP_HTML_Tag_Processor( "<?px\x00rest ?>" );
+
+		$this->assertTrue( $processor->next_token(), 'Should have found the comment.' );
+		$this->assertSame( WP_HTML_Tag_Processor::COMMENT_AS_PI_NODE_LOOKALIKE, $processor->get_comment_type() );
+		$this->assertSame( 'px', $processor->get_tag() );
+	}
+
+	/**
+	 * Ensures tag-name queries match in the same replaced alphabet that
+	 * `get_tag()` exposes: a sought name containing U+FFFD matches source
+	 * names whose raw bytes contain NULL in its place, a sought name
+	 * containing a raw NULL byte matches nothing, and the value returned
+	 * by `get_tag()` round-trips into a successful query.
+	 *
+	 * This is also how WP_HTML_Processor::next_tag() matches, since it
+	 * compares sought names against the token name.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::next_tag
+	 */
+	public function test_tag_name_queries_match_replaced_names() {
+		$processor = new WP_HTML_Tag_Processor( "<di\x00v>" );
+		$this->assertTrue( $processor->next_tag( "DI\u{FFFD}V" ), 'Should have matched the tag by its replaced name.' );
+
+		$processor = new WP_HTML_Tag_Processor( "<di\x00v>" );
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$tag_name  = $processor->get_tag();
+		$processor = new WP_HTML_Tag_Processor( "<di\x00v>" );
+		$this->assertTrue( $processor->next_tag( array( 'tag_name' => $tag_name ) ), 'The name returned by get_tag() should match in a query.' );
+
+		$processor = new WP_HTML_Tag_Processor( "<di\x00v>" );
+		$this->assertFalse( $processor->next_tag( "DI\x00V" ), 'Should not have matched the tag by its raw source name.' );
+
+		$processor = new WP_HTML_Tag_Processor( "<di\u{FFFD}v>" );
+		$this->assertTrue( $processor->next_tag( "DI\u{FFFD}V" ), 'Should have matched a raw U+FFFD name.' );
+
+		$processor = WP_HTML_Processor::create_full_parser( "<body><di\x00v>" );
+		$this->assertTrue( $processor->next_tag( array( 'tag_name' => "DI\u{FFFD}V" ) ), 'The HTML Processor should match the replaced name.' );
+
+		$processor = WP_HTML_Processor::create_full_parser( "<body><di\x00v>" );
+		$this->assertFalse( $processor->next_tag( array( 'tag_name' => "DI\x00V" ) ), 'The HTML Processor should not match the raw source name.' );
+	}
+
+	/**
+	 * Ensures class_list does not replace NULL bytes in API-supplied values.
+	 *
+	 * Browser-verified: `setAttribute('class', "a\x00b")` then reading
+	 * `classList` yields the token "a\x00b" with the NULL byte preserved;
+	 * U+0000 replacement happens only in the tokenizer, and values from the
+	 * input document already receive it through `get_attribute()`.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::class_list
+	 * @covers ::has_class
+	 */
+	public function test_class_list_preserves_null_bytes_in_enqueued_values() {
+		$processor = new WP_HTML_Tag_Processor( '<div>' );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertTrue( $processor->set_attribute( 'class', "a\x00b c\u{FFFD}d" ), 'Should have set the class attribute.' );
+		$this->assertSame( array( "a\x00b", "c\u{FFFD}d" ), iterator_to_array( $processor->class_list(), false ), 'Should have preserved the NULL byte in the API-supplied class.' );
+		$this->assertTrue( $processor->has_class( "a\x00b" ) );
+	}
+
+	/**
+	 * Ensures the class helpers operate on the replaced source value:
+	 * a class containing a NULL byte in the document is exposed, matched,
+	 * and queried by its U+FFFD spelling only.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::class_list
+	 * @covers ::has_class
+	 * @covers ::next_tag
+	 */
+	public function test_class_helpers_use_replaced_source_values() {
+		$processor = new WP_HTML_Tag_Processor( "<div class='a\x00b'>" );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertSame( array( "a\u{FFFD}b" ), iterator_to_array( $processor->class_list(), false ), 'Should have exposed the replaced class name.' );
+		$this->assertTrue( $processor->has_class( "a\u{FFFD}b" ), 'Should have matched the replaced class name.' );
+		$this->assertFalse( $processor->has_class( "a\x00b" ), 'Should not have matched the raw source class name.' );
+
+		$processor = new WP_HTML_Tag_Processor( "<div class='a\x00b'>" );
+		$this->assertTrue( $processor->next_tag( array( 'class_name' => "a\u{FFFD}b" ) ), 'Should have matched a class_name query by the replaced name.' );
+	}
+
+	/**
+	 * Ensures boolean attributes whose names contain NULL bytes are
+	 * addressable by their replaced name.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_attribute
+	 */
+	public function test_boolean_attribute_with_null_byte_in_name() {
+		$processor = new WP_HTML_Tag_Processor( "<div da\x00ta>" );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertTrue( $processor->get_attribute( "da\u{FFFD}ta" ), 'Should have reported the boolean attribute by its replaced name.' );
+	}
+
+	/**
+	 * Ensures attribute-name prefixes are matched verbatim against the
+	 * replaced names: a prefix spelled with U+FFFD matches, and a prefix
+	 * containing a raw NULL byte matches nothing.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_attribute_names_with_prefix
+	 */
+	public function test_attribute_name_prefixes_match_replaced_names() {
+		$processor = new WP_HTML_Tag_Processor( "<div da\x00ta='1'>" );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertSame( array( "da\u{FFFD}ta" ), $processor->get_attribute_names_with_prefix( "da\u{FFFD}" ), 'A replaced-name prefix should match.' );
+		$this->assertSame( array(), $processor->get_attribute_names_with_prefix( "da\x00" ), 'A raw NULL prefix should match nothing.' );
+	}
+
+	/**
+	 * Ensures the replaced tag names flow through HTML Processor tree
+	 * construction: an end tag spelled with U+FFFD closes an element
+	 * whose start tag was spelled with a raw NULL byte, as in browsers,
+	 * where both spellings tokenize to the same name.
+	 *
+	 * @ticket 65372
+	 */
+	public function test_html_processor_matches_end_tags_across_null_byte_spellings() {
+		$this->assertSame(
+			"<di\u{FFFD}v>x</di\u{FFFD}v>y",
+			WP_HTML_Processor::normalize( "<di\x00v>x</di\u{FFFD}v>y" ),
+			'The U+FFFD-spelled end tag should have closed the NULL-spelled element.'
+		);
+
+		$processor = WP_HTML_Processor::create_full_parser( "<body><di\x00v>x</di\u{FFFD}v>y" );
+		$this->assertTrue( $processor->next_tag( array( 'tag_name' => "DI\u{FFFD}V" ) ), 'Should have found the element by its replaced name.' );
+		$this->assertSame( array( 'HTML', 'BODY', "DI\u{FFFD}V" ), $processor->get_breadcrumbs(), 'Should have built breadcrumbs from replaced names.' );
+	}
+
+	/**
+	 * Ensures pending class updates are flushed for any case spelling of
+	 * the "class" attribute name, since attribute names are matched
+	 * ASCII-case-insensitively.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_attribute
+	 */
+	public function test_get_attribute_flushes_class_updates_case_insensitively() {
+		$processor = new WP_HTML_Tag_Processor( '<div class="a">' );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertTrue( $processor->add_class( 'b' ), 'Should have enqueued the class addition.' );
+		$this->assertSame( 'a b', $processor->get_attribute( 'CLASS' ), 'Should have included pending class updates for an uppercase lookup.' );
+	}
+
+	/**
+	 * Ensures numeric character references for U+0000 decode to U+FFFD in text.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_modifiable_text
+	 */
+	public function test_encoded_null_in_text_node_decodes_to_replacement_character() {
+		$processor = new WP_HTML_Tag_Processor( 'a&#0;b' );
+
+		$this->assertTrue( $processor->next_token(), 'Should have found the text node.' );
+		$this->assertSame( "a\u{FFFD}b", $processor->get_modifiable_text() );
+	}
+}
diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
index 22ace3890f469..a6e1844a332c2 100644
--- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
+++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
@@ -111,13 +111,38 @@ public static function data_has_self_closing_flag() {
 			'No self-closing flag on a foreign element'  => array( '<circle>', false ),
 			// These involve syntax peculiarities.
 			'Self-closing flag after extra spaces'       => array( '<div      />', true ),
-			'Self-closing flag after attribute'          => array( '<div id=test/>', true ),
+			'Self-closing flag after attribute'          => array( '<div id=test />', true ),
+			'Slash inside unquoted attribute value'      => array( '<div id=test/>', false ),
 			'Self-closing flag after quoted attribute'   => array( '<div id="test"/>', true ),
 			'Self-closing flag after boolean attribute'  => array( '<div enabled/>', true ),
 			'Boolean attribute that looks like a self-closer' => array( '<div / >', false ),
 		);
 	}
 
+	/**
+	 * Ensures a trailing slash in an unquoted attribute value is part of the value.
+	 *
+	 * @ticket 61576
+	 *
+	 * @covers WP_HTML_Tag_Processor::get_attribute
+	 * @covers WP_HTML_Tag_Processor::has_self_closing_flag
+	 */
+	public function test_trailing_slash_in_unquoted_attribute_value_is_not_self_closing_flag() {
+		$processor = new WP_HTML_Tag_Processor( '<mi disabled=abc/>text' );
+		$this->assertTrue( $processor->next_tag(), 'Could not find MI tag: check test setup.' );
+
+		$this->assertSame(
+			'abc/',
+			$processor->get_attribute( 'disabled' ),
+			'Trailing slash in unquoted attribute value should belong to the attribute value.'
+		);
+
+		$this->assertFalse(
+			$processor->has_self_closing_flag(),
+			'Trailing slash in unquoted attribute value should not be interpreted as a self-closing flag.'
+		);
+	}
+
 	/**
 	 * @ticket 56299
 	 *
diff --git a/tools/html-api-fuzz/README.md b/tools/html-api-fuzz/README.md
new file mode 100644
index 0000000000000..2798fe7d278a4
--- /dev/null
+++ b/tools/html-api-fuzz/README.md
@@ -0,0 +1,452 @@
+# HTML API Fuzzer
+
+Pure PHP fuzzer for the WordPress HTML API under `src/wp-includes/html-api`.
+It compares `WP_HTML_Processor` against a selectable tree-construction oracle
+using an html5lib-style textual tree, and separately checks a set of API
+invariants described under “Invariants” below. The default oracle is PHP's
+`Dom\HTMLDocument`, preserving the historical behavior.
+
+No browser, Playwright, Node, or `wp-env` is involved.
+
+## Requirements
+
+- PHP 8.4+ with ext-dom, for `Dom\HTMLDocument`.
+- Run from the repository root.
+- Optional source-built Lexbor oracle: `git`, `cmake`, and a C compiler.
+
+## Common Commands
+
+Run one generated seed:
+
+```sh
+php tools/html-api-fuzz/worker.php --seed 1 --output-dir artifacts/html-api-fuzz/seed-1
+```
+
+Run one generated seed with a specific terminal payload policy:
+
+```sh
+php tools/html-api-fuzz/worker.php --seed 1 --payload-policy valid-utf8 --output-dir artifacts/html-api-fuzz/seed-1
+```
+
+Run a batch in worker subprocesses (seeds are batched into shared worker
+processes, 25 per process by default; see `--batch-size`):
+
+```sh
+php tools/html-api-fuzz/runner.php --max-seeds 100 --duration-seconds 60
+```
+
+Run a structural UTF-8-biased batch with a post-generation byte cap:
+
+```sh
+php tools/html-api-fuzz/runner.php --max-seeds 100 --payload-policy valid-utf8 --max-input-bytes 4096
+```
+
+Build and run against the source-built Lexbor oracle:
+
+```sh
+tools/html-api-fuzz/oracles/lexbor/build.sh
+php tools/html-api-fuzz/worker.php --seed 1 --dom-oracle lexbor-source --output-dir artifacts/html-api-fuzz/seed-1-lexbor
+php tools/html-api-fuzz/runner.php --max-seeds 100 --dom-oracle lexbor-source --duration-seconds 60
+```
+
+Use `--lexbor-oracle-bin PATH` or `HTML_API_FUZZ_LEXBOR_ORACLE` when the
+oracle binary is not at
+`tools/html-api-fuzz/oracles/lexbor/build/lexbor-tree-oracle`.
+
+Run indefinitely:
+
+```sh
+php tools/html-api-fuzz/runner.php --duration-seconds 0 --max-seeds 0
+```
+
+Run parallel lanes and triage failures after completion:
+
+```sh
+php tools/html-api-fuzz/launcher.php --lanes 4 --max-seeds 1000 --watcher
+```
+
+For continuous fuzzing, run the launcher with `--duration-seconds 0 --max-seeds 0`
+and run `watcher.php` in a second shell against the same output directory.
+
+Stop an indefinite run gracefully (each lane finishes its current batch, the
+watcher performs a final scan, and the codex orchestrator drains its running
+jobs):
+
+```sh
+php tools/html-api-fuzz/stop.php --run-dir artifacts/html-api-fuzz/run-...
+```
+
+Without `--run-dir` the most recently active *unfinished* run under
+`artifacts/html-api-fuzz` is targeted. Finished and stale runs are not
+preferred; if nothing live is found, the most recent stopped-looking run is
+targeted with a warning. The script creates the stop file advertised by the
+run state and also `RUN_DIR/STOP` when a run directory is known, so watchers
+and orchestrators see the stop request. For a standalone runner with custom
+`--stop-file PATH`, both files are written. Relative custom stop files are
+resolved with the runner cwd recorded in new runner state; for older state,
+pass only `--stop-file PATH` to write a known stop file directly if needed.
+With `--run-dir --stop-file PATH`, the explicit path is added to the run-state
+and `RUN_DIR/STOP` targets. `touch` works just as well. The launcher and runner refuse to start while a stop file already
+exists — remove `STOP` (or the custom stop file) before reusing a run
+directory. (A stop requested in the sub-second window between the launcher's
+startup check and a lane's own makes that lane refuse rather than stop
+gracefully; the run still ends.)
+If state cannot be read or old state lacks enough context to locate a relative
+custom stop file, the tool still writes `RUN_DIR/STOP` but exits `2` with
+`ok: false` and warnings because a standalone custom stop file may be unknown.
+
+The watcher exits after a final scan once every runner under the run
+directory reports a stop reason. A runner whose state has gone silent is
+presumed dead after `--stop-stale-seconds` (default 120); per lane that
+threshold is floored at twice the lane's advertised batch budget
+(`timeout-ms × batch-size`), so long batches are not mistaken for crashes.
+The stop tool uses the same default stale threshold when auto-selecting the
+latest unfinished run.
+
+Replay a failure from a retained seed directory, or from the lane's SQLite
+store when the seed directory was pruned (see "Artifact Retention"):
+
+```sh
+php tools/html-api-fuzz/replay.php --replay artifacts/html-api-fuzz/run-.../seed-.../primary/replay.json
+php tools/html-api-fuzz/replay.php --store artifacts/html-api-fuzz/run-.../lane-00/results.sqlite --seed 12345
+```
+
+Minimize a failure while preserving the same signature:
+
+```sh
+php tools/html-api-fuzz/minimize.php --replay artifacts/html-api-fuzz/run-.../seed-.../primary/replay.json
+```
+
+By default (`--probe-mode auto`) the minimizer evaluates candidates in worker
+subprocesses so `--timeout-ms` can kill pathological candidates and each probe
+starts with fresh PHP state. Use `--probe-mode in-process` for faster
+exact-signature minimization when that isolation is not needed; in-process
+probes write only the final minimized artifacts unless
+`--keep-candidate-artifacts` is also passed. Use `--probe-mode process` to
+force subprocess probes explicitly.
+
+Watch an existing run directory and minimize new distinct signatures:
+
+```sh
+php tools/html-api-fuzz/watcher.php --run-dir artifacts/html-api-fuzz/run-... --once
+```
+
+Configured ceilings are reported as `failureClass: "resource-limit"` and remain
+in the watcher/minimizer triage path. This bucket includes tag/tree token
+ceilings (`tag-token-limit-exceeded`, `mutation-token-limit-exceeded`,
+`wordpress-token-limit-exceeded`) and oracle node ceilings
+(`node-limit-exceeded`, recorded as `dom-node-limit-exceeded` in historical
+signature facts). Process timeouts, PHP fatal errors, and memory failures are
+separate failures and are also in scope for triage.
+
+## Execution Model
+
+The runner batches consecutive seeds into one worker process
+(`worker.php --batch-count N`, default `--batch-size 25` on the runner) so the
+WordPress bootstrap and process spawn are paid once per batch rather than once
+per seed. Each seed still writes its own `seed-N/primary` artifacts. If a batch
+process dies or times out mid-way, seeds left without a `result.json` are
+re-run individually in isolation, so a crash on one input cannot take
+neighboring seeds' results with it.
+
+## Input Stages
+
+Seeds are deterministically split between two input stages:
+
+- **Generated** (default ~80%): the structural grammar described under
+  “Generator Profiles”.
+- **Corpus-mutated** (default ~20%, `--corpus-mutate-percent N` on
+  `worker.php`/`runner.php`): a `#data` section from the html5lib-tests
+  tree-construction corpus (`tests/phpunit/data/html5lib-tests`), passed
+  through 1–4 deterministic mutations (byte insert/replace, chunk
+  delete/duplicate, tag-name swap, case toggle, corpus splice). The stage,
+  corpus file, entry index, and operations are recorded in result metadata,
+  and the mutated input itself is in the replay manifest, so replays are
+  standalone. Inputs report `inputSource: "corpus-mutated"` and
+  `profile: "corpus-mutated"`.
+
+Both stages derive entirely from the seed, so seed N always produces the same
+input for the same fuzzer version and corpus.
+
+## Artifact Layout
+
+The runner writes:
+
+- `results.sqlite`: one row per attempted seed (table `attempts`, WAL mode).
+  Passing attempts store summary columns only — every attempt is regenerable
+  from its seed. Failure rows additionally store the summary, result, and
+  replay JSON documents; the replay embeds the input as base64, so a pruned
+  failure can be reproduced with `replay.php --store results.sqlite --seed N`.
+  `signature_hash` and `family_key` are indexed columns for grouping
+  failures without `json_extract`. `oracle_kind`, `oracle_version`,
+  `oracle_commit`, and `oracle_binary` record which oracle generated the
+  summary, including for passing rows whose JSON payloads are pruned. The
+  watcher tails these stores
+  incrementally by row id. (`summary.ndjson` files from older runs are still
+  scanned.) Durability is `synchronous=NORMAL`: an OS crash (not a process
+  crash) can lose the last moments of a run.
+- `events.ndjson`: runner lifecycle events, including batch boundaries.
+- `logs/batch-N.log`: output of a batch worker process, kept only when the
+  batch contained a retained failure or a seed that needed an isolated
+  re-run — over-cap repeats of a known signature do not accumulate batch
+  logs.
+- `state.json`: aggregate counters, stop reason, and compact Git metadata.
+  Oracle losses are counted per class: `oracleParseErrors` (inputs the
+  selected oracle rejects receive no differential coverage),
+  `oracleUnsupported` (tree shapes the oracle cannot represent), and
+  `oracleTolerated`
+  (comparisons that passed only under the documented scalar tolerance).
+- `seed-N/primary/input.bin`: raw generated bytes.
+- `seed-N/primary/replay.json`: base64 replay manifest, including the commit
+  hash, tracked-file dirty state, selected oracle, and fragment context needed
+  to interpret a standalone replay.
+- `seed-N/primary/result.json`: full worker result.
+- `seed-N/primary/wordpress-tree.txt` and `dom-tree.txt`: rendered trees when available.
+
+### Artifact Retention
+
+Seed directories are working space, not the archive. After each seed is
+recorded in `results.sqlite`, its `seed-N` directory is deleted unless the
+attempt failed *and* the failure's signature has fewer than
+`--max-keep-per-signature` (default 5, minimum 1) exemplar directories still
+on disk in this lane. The cap counts directories, not rows, so restarting a
+runner against the same output directory neither double-counts re-recorded
+seeds nor deletes a previously retained exemplar. The first exemplar of every
+new signature is always retained, so the watcher's minimization path keeps a
+replay file to work from; subsequent repeats of a known signature add a
+database row and nothing else. A failure whose replay document is missing
+(worker killed before writing it) always keeps its directory — the files are
+the only reproduction. Disk growth is therefore proportional to *new
+distinct failures*, not to seeds executed.
+
+The cap applies per lane: the launcher passes the same value to every lane,
+so a signature that appears in all lanes keeps up to `N × lanes` directories
+across the run. Use `--keep-all-artifacts` (runner or launcher) to keep every
+seed directory for debugging.
+
+The run-level Git metadata intentionally stays compact: full and short commit
+hash, current branch when available, commit date, and a dirty flag for
+tracked-file changes. The dirty flag is tri-state: `true`, `false`, or `null`
+when Git is unavailable or dirty detection fails. Full `git status` or diff
+output is not recorded because it is noisy, can expose local edits, and grows
+indefinitely in long runs. Launcher and runner processes collect this metadata
+once and pass it to workers so long runs do not invoke Git for every seed.
+Replayed and minimized manifests keep current checkout metadata at the top level
+and preserve discovery provenance in `sourceReplay`.
+
+The watcher writes triage state under `.triage-watcher` by default, or under
+`--state-dir` when provided. Each signature gets a stable directory containing
+`failure.json`, minimizer logs, and minimized replay/result artifacts. Failed
+minimizations are retried on later scans, up to `--max-minimize-retries`
+(default 3) attempts per signature.
+
+## Modes and Fragment Contexts
+
+- `fragment-body`: parse as a fragment. The selected oracle uses real fragment
+  parsing (the `innerHTML` setter on a context element of an empty document),
+  not a document-wrapping approximation.
+- `full-document`: parse as a full HTML document.
+- `auto`: weighted choice.
+
+In fragment mode a context element is selected per seed
+(`--fragment-context TAG` on `worker.php` for replays). `<body>` dominates;
+the other contexts (`div`, `p`, `td`, `tr`, `table`, `caption`, `colgroup`,
+`select`, `option`, `template`, `title`, `textarea`, `script`, `style`,
+`svg`, `math`) receive a small probe weight. `WP_HTML_Processor::create_fragment()`
+currently supports only `<body>`, so non-body contexts are recorded as
+`status: "unsupported"` today; when create_fragment() gains context support
+the fuzzer picks up the new coverage with no changes. The selected oracle already
+parses every context correctly.
+
+Unsupported `WP_HTML_Processor` cases are expected by default and are recorded
+as successful attempts with `status: "unsupported"`. Use `--fail-unsupported`
+when you want unsupported cases to become failures.
+
+## Invariants
+
+Each seed checks, in order, stopping at the first failing class:
+
+1. **Tag Processor invariants** (`tag-invariant-failed`): token loop
+   termination under the token ceiling; non-null token type/name/tag;
+   attribute getters and `class_list()` iteration do not throw;
+   `get_updated_html()` with no queued edits returns the input unchanged; a
+   simple `set_attribute()` mutation is visible to a re-scan; and
+   **seek consistency** — a bookmark set at a seed-chosen token, after
+   scanning to the end and seeking back, must reproduce the identical token
+   stream (`seek-token-stream-mismatch`).
+2. **Differential tree comparison** (`tree-mismatch` / `encoding-mismatch`):
+   the WordPress tree must equal the selected oracle tree (see “Tree Comparison”).
+3. **Breadcrumb consistency** (`breadcrumb-mismatch`): at every tag token,
+   `get_breadcrumbs()` must agree with the element stack derived from token
+   order and `expects_closer()`.
+4. **Mutation differential** (`mutation-tree-mismatch` /
+   `mutation-delta-mismatch`), only on a clean baseline: after setting
+   `data-fuzz="1"` on the first tag, the mutated document must parse
+   identically in WordPress and the selected oracle, and the WordPress tree must
+   change by exactly the one attribute line (unless formatting-element
+   reconstruction clones the attribute, or tree construction legitimately
+   drops the mutated element, in which case the differential comparison alone
+   applies).
+5. **Normalize tree preservation** (`normalize-tree-changed`), only on a
+   clean baseline: parsing `normalize()` output must produce the same tree as
+   the original input, modulo the documented scalar substitutions. This is
+   stricter than idempotence, which a consistently wrong serializer can pass.
+6. **Normalize idempotence** (`normalize-invariant-failed`):
+   `normalize()` / `serialize()` run twice must be a fixed point, with no
+   PHP native errors or throwables. Full documents use
+   `create_full_parser()->serialize()`; non-body fragment contexts use
+   `create_fragment(<context>)->serialize()`.
+
+### Known invariant oracle follow-ups
+
+- The simple `set_attribute()` mutation oracle needs to handle inputs that
+  begin with `</br>` using the same tag-selection semantics as the mutator.
+  `next_tag()` skips the raw closing token and mutates the following tag,
+  while a verifier that scans with `next_token()` can see the spec-special
+  `BR` element synthesized from `</br>` first and incorrectly report
+  `mutation-attribute-missing`. Fix the verifier by selecting the first
+  mutable tag through `next_tag()` too.
+
+## Generator Profiles
+
+The generator uses a structural HTML grammar with weighted profiles:
+
+- `balanced`
+- `full-document` (includes occasional frameset documents, quirks-mode
+  doctypes, and content after `</html>`)
+- `body-fragment`
+- `tables`
+- `template`
+- `select` (option/optgroup nesting, select-ending elements such as `input`,
+  `textarea` and `button`, nested selects, select-in-table)
+- `foreign-content` (MathML/SVG integration points, HTML breakout tags,
+  `<font>` with and without breakout attributes, `annotation-xml` encoding
+  variants, CDATA sections in foreign content, case-mangled `foreignObject`)
+- `rawtext-rcdata` (script/style/iframe/noembed/noframes/xmp/noscript,
+  title/textarea, occasional `plaintext`)
+- `text-fragment` (standalone terminal payloads, biased toward exact
+  0-10 byte inputs plus medium syntax-heavy text unless `stress-long` is
+  selected explicitly)
+- `formatting-adoption` (random formatting elements plus explicit
+  adoption-agency shapes: misnested closers, block-boundary formatting,
+  reconstruction across siblings, nested anchors, Noah's Ark overflow,
+  repeated closers)
+- `attributes-entities`
+- `comments-doctype-bogus`
+- `deep-nesting`
+- `resource-stress`
+- `incomplete-malformed` (includes spec-special closers such as `</br>` and
+  ``, stray closers, and `<image>`)
+
+All profiles can emit duplicate attribute names (first-wins coverage),
+auto-closing chains (`li`, `dd`/`dt`, headings, `p`), and named character
+references with longest-prefix-match ambiguity (`&notit;`, `&copyright;`,
+`&ngE`, ...).
+
+Terminal payloads are selected by a separate policy:
+
+- `valid-utf8`: structural cases with ASCII, Unicode, controls, and
+  entity references, including NUL byte coverage, but no raw invalid bytes.
+- `mostly-valid`: default-biased structural cases with valid UTF-8 Unicode,
+  controls, NUL bytes, and entity references.
+- `ascii-structural`: ASCII-only terminal text and attributes for tokenizer and
+  tree-construction coverage, including NUL byte coverage.
+- `stress-long`: long valid UTF-8 terminal payloads for deliberate
+  resource-stress runs.
+- `auto`: weighted choice. Normal structural profiles favor valid UTF-8 and
+  mostly-valid payloads; `resource-stress` favors `stress-long`.
+
+Use `--payload-policy POLICY` on `worker.php`, `runner.php`, or `launcher.php`.
+Use `--max-input-bytes N` to apply a post-generation byte cap before the worker
+records replay metadata. The cap preserves UTF-8 byte boundaries, but it is not
+grammar-aware and may cut through HTML tokens. Replay manifests and minimization
+summaries preserve the original payload policy when it was recorded; old or
+hand-supplied inputs leave `payloadPolicy` null unless an explicit policy label
+is provided. Historical `invalid-byte-heavy` labels are accepted only as replay
+metadata for direct inputs and are not selectable for generated runs.
+Replayed and minimized manifests keep immediate `inputSource` metadata separate
+from `originalGenerator` metadata.
+
+## Tree Comparison
+
+The tree renderer follows the html5lib test style used by
+`tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php`:
+
+- attributes sorted by their spec-scrubbed names (so a raw-NUL name on the
+  WordPress side and its U+FFFD substitution on the DOM side sort identically),
+  rendered raw
+- boolean attributes rendered as `=""`
+- namespace-qualified element and attribute names
+- template `content` marker
+- only the narrow auto-generated `html/head/body` wrapper tolerance
+
+When the default PHP DOM oracle is selected, template content is rendered
+through a self-contained serialization round-trip: PHP hides template child
+nodes, so the oracle re-parses the template's `innerHTML` serialization in a
+body context and accepts the result only when re-serializing reproduces the
+source byte-for-byte. Content that cannot round-trip (table parts, foreign
+fragments) is quarantined as `oracle-unsupported`. This check never consults
+the WordPress HTML API, which is the system under test.
+
+Raw bytes are rendered without normalization. The WordPress HTML API
+deliberately preserves NUL and CR bytes where spec-following parsers
+substitute U+FFFD and normalize newlines during input preprocessing, so the
+comparison tolerates a differing line only when that exact substitution
+explains the entire difference. The tolerance is additionally gated by line
+type: WordPress preserves raw bytes only in attribute values and
+tag/attribute names (verified empirically across text, RCDATA, rawtext,
+foreign text, CDATA, comment, and doctype contexts, where WordPress applies
+the spec substitutions itself), so only tag lines and attribute lines are
+eligible. A scalar difference on any other line type is a real divergence
+and fails. Tolerated lines are reported per seed
+(`comparison.scalarToleratedLines`) and per run (`oracleTolerated`), and the
+result is classified `oracle-tolerated` rather than silently passed. Any
+difference beyond the substitution fails as usual, and the first-difference
+record points at the first *unexplained* line.
+
+One known PHP DOM oracle bug is tolerated with a runtime probe: PHP's Lexbor parser
+fails to treat U+000C FORM FEED as ignorable whitespace in the pre-body
+insertion modes. When a full-document comparison fails, the input contains a
+form feed, and re-parsing with form feeds substituted by spaces makes the DOM
+oracle reproduce the WordPress tree exactly, the case is classified
+`oracle-tolerated` with `comparison.formFeedQuirk: true`. The probe disables
+the tolerance automatically when PHP fixes the bug.
+
+Invalid bytes are never normalized away. If WordPress and the selected oracle
+surface different byte sequences, the first-difference record includes bounded
+line previews, byte lengths, line hashes, the first differing byte offset, and
+hex previews, including a diff-window hex preview around the differing byte, so
+the mismatch remains inspectable even when JSON display substitutes replacement
+characters. Full comparison lines are kept out of `result.json` to avoid large
+artifacts from stress inputs.
+
+### Known classification gaps
+
+Two known issues affect labeling, not pass/fail correctness:
+
+- **Dual-axis lines classify as `tree-mismatch`, not `encoding-mismatch`.**
+  A differing line explained only by *both* an invalid-UTF-8 substitution and
+  a NUL/CR scalar substitution (e.g. an attribute value containing a raw NUL
+  *and* a raw `0x82`) matches neither single-axis check:
+  `linesMatchAfterWordPressUtf8Scrub` fails on the unscrubbed NUL, and the
+  scalar matcher fails on the invalid byte. Such results report
+  `tree-mismatch` although encoding is involved.
+- **CDATA at SVG/MathML integration points is a real WordPress divergence
+  the fuzzer will keep reporting.** For
+  `<svg><foreignObject><![CDATA[a\0b]]>` (likewise `<svg><desc>`,
+  `<math><mtext>`, HTML-encoded `annotation-xml`), WordPress substitutes
+  NUL with U+FFFD in CDATA text while the spec routes those characters
+  through the HTML insertion mode, which drops NUL — WordPress handles
+  plain text at integration points correctly; only the CDATA path diverges
+  (a known `@todo` in `WP_HTML_Tag_Processor`'s CDATA handling). The shape
+  is U+FFFD-versus-removed, which no tolerance covers in either direction,
+  so these report as genuine `tree-mismatch` findings. This is distinct
+  from the upstream-fixed PHP-DOM integration-point reparenting family.
+
+## Minimization
+
+`minimize.php` reduces in three phases under a shared attempt budget
+(`--max-attempts`, default 600): markup-aligned segment deletion, binary
+byte-chunk deletion, then per-byte deletion and canonicalization (replacements
+never grow the input). Every accepted candidate re-runs the worker and must
+reproduce the original signature hash (or any failure with `--any-failure`).
diff --git a/tools/html-api-fuzz/codex-triage-orchestrator.php b/tools/html-api-fuzz/codex-triage-orchestrator.php
new file mode 100755
index 0000000000000..17a62d6d6ea93
--- /dev/null
+++ b/tools/html-api-fuzz/codex-triage-orchestrator.php
@@ -0,0 +1,935 @@
+#!/usr/bin/env php
+<?php
+require_once __DIR__ . '/lib/autoload.php';
+
+const HTML_API_FUZZ_CODEX_CLASSIFICATIONS = array(
+	'wordpress-bug',
+	'oracle-bug',
+	'oracle-limitation',
+	'harness-bug',
+	'expected-unsupported',
+	'scalar-tolerance',
+	'WordPress HTML API bug',
+	'harness bug',
+	'DOM/oracle behavior',
+	'encoding-normalization mismatch',
+	'expected unsupported',
+	'resource-limit finding',
+	'inconclusive',
+);
+
+function html_api_fuzz_codex_usage(): void {
+	echo "Usage: php tools/html-api-fuzz/codex-triage-orchestrator.php --triage-dir DIR [--diagnostics-dir DIR] [--repo-root DIR] [--codex-bin BIN] [--model MODEL] [--interval-seconds N] [--max-concurrent N] [--max-launch-per-pass N] [--stale-after-seconds N] [--once] [--mode classify|fix] [--sandbox read-only|workspace-write|danger-full-access]\n";
+	echo "A STOP file in the run directory (from the watcher state's runDir, falling back to the triage dir's parent) stops new launches; running jobs finish before exit.\n";
+}
+
+function html_api_fuzz_codex_validate_cli_options( array $options ): void {
+	$value_options = array(
+		'triage-dir',
+		'diagnostics-dir',
+		'repo-root',
+		'codex-bin',
+		'model',
+		'interval-seconds',
+		'max-concurrent',
+		'max-launch-per-pass',
+		'stale-after-seconds',
+		'mode',
+		'sandbox',
+	);
+	$bool_options = array(
+		'help',
+		'once',
+	);
+	$allowed = array_merge( $value_options, $bool_options );
+
+	foreach ( $options as $name => $value ) {
+		if ( '_' === $name ) {
+			if ( ! empty( $value ) ) {
+				throw new InvalidArgumentException( 'Unexpected positional argument: ' . (string) reset( $value ) );
+			}
+			continue;
+		}
+
+		if ( ! in_array( $name, $allowed, true ) ) {
+			throw new InvalidArgumentException( 'Unknown option: --' . $name );
+		}
+		if ( in_array( $name, $value_options, true ) && true === $value ) {
+			throw new InvalidArgumentException( 'Expected --' . $name . ' to have a value.' );
+		}
+		if ( in_array( $name, $bool_options, true ) && true !== $value ) {
+			throw new InvalidArgumentException( 'Option --' . $name . ' does not accept a value.' );
+		}
+	}
+}
+
+function html_api_fuzz_codex_is_absolute_path( string $path ): bool {
+	return 1 === preg_match( '#^(?:/|[A-Za-z]:[\\\\/]|\\\\\\\\)#', $path );
+}
+
+function html_api_fuzz_codex_now_iso_z(): string {
+	return gmdate( 'Y-m-d\TH:i:s\Z' );
+}
+
+function html_api_fuzz_codex_normalize_path( string $path ): string {
+	if ( '' === $path ) {
+		throw new InvalidArgumentException( 'Expected a non-empty path.' );
+	}
+
+	if ( ! html_api_fuzz_codex_is_absolute_path( $path ) ) {
+		$cwd = getcwd();
+		if ( false === $cwd ) {
+			throw new RuntimeException( 'Could not determine current working directory.' );
+		}
+		$path = $cwd . DIRECTORY_SEPARATOR . $path;
+	}
+
+	$real = realpath( $path );
+	return false === $real ? rtrim( $path, DIRECTORY_SEPARATOR ) : $real;
+}
+
+function html_api_fuzz_codex_require_dir( string $path, string $label ): string {
+	$normalized = html_api_fuzz_codex_normalize_path( $path );
+	if ( ! is_dir( $normalized ) ) {
+		throw new InvalidArgumentException( "{$label} is not a directory: {$normalized}" );
+	}
+
+	return $normalized;
+}
+
+function html_api_fuzz_codex_path_join( string $base, string ...$parts ): string {
+	$path = rtrim( $base, DIRECTORY_SEPARATOR );
+	foreach ( $parts as $part ) {
+		$path .= DIRECTORY_SEPARATOR . ltrim( $part, DIRECTORY_SEPARATOR );
+	}
+
+	return $path;
+}
+
+function html_api_fuzz_codex_signature_dir_name( string $signature_hash ): string {
+	$dir = preg_replace( '/[^a-zA-Z0-9._-]+/', '_', $signature_hash );
+	return null === $dir || '' === $dir ? '_' : $dir;
+}
+
+function html_api_fuzz_codex_valid_signature_hash( string $signature_hash ): bool {
+	return 1 === preg_match( '/^[A-Za-z0-9._-]+$/', $signature_hash );
+}
+
+function html_api_fuzz_codex_safe_prompt_line( $value, string $field ): string {
+	if ( null === $value ) {
+		return '';
+	}
+	if ( ! is_scalar( $value ) ) {
+		throw new InvalidArgumentException( "Expected {$field} to be scalar." );
+	}
+
+	$text = (string) $value;
+	if ( 1 === preg_match( '/[\x00-\x1F\x7F]/', $text ) ) {
+		throw new InvalidArgumentException( "Unsafe control character in {$field}." );
+	}
+
+	return $text;
+}
+
+function html_api_fuzz_codex_path_has_control_chars( string $path ): bool {
+	return 1 === preg_match( '/[\x00-\x1F\x7F]/', $path );
+}
+
+function html_api_fuzz_codex_path_is_under( string $path, string $root ): bool {
+	$real_path = realpath( $path );
+	$real_root = realpath( $root );
+	if ( false === $real_path || false === $real_root ) {
+		return false;
+	}
+
+	$real_root = rtrim( $real_root, DIRECTORY_SEPARATOR );
+	return $real_path === $real_root || 0 === strpos( $real_path, $real_root . DIRECTORY_SEPARATOR );
+}
+
+function html_api_fuzz_codex_allowed_artifact_roots( array $args ): array {
+	$roots = array(
+		$args['triageDir'],
+		$args['runDir'] ?? dirname( $args['triageDir'] ),
+	);
+	$allowed = array();
+	foreach ( $roots as $root ) {
+		if ( is_string( $root ) && '' !== $root && is_dir( $root ) && ! html_api_fuzz_codex_path_has_control_chars( $root ) ) {
+			$real = realpath( $root );
+			if ( false !== $real ) {
+				$allowed[] = $real;
+			}
+		}
+	}
+
+	return array_values( array_unique( $allowed ) );
+}
+
+function html_api_fuzz_codex_sort_json_value( $value ) {
+	if ( ! is_array( $value ) ) {
+		return $value;
+	}
+
+	if ( array_is_list( $value ) ) {
+		return array_map( 'html_api_fuzz_codex_sort_json_value', $value );
+	}
+
+	foreach ( $value as $key => $item ) {
+		$value[ $key ] = html_api_fuzz_codex_sort_json_value( $item );
+	}
+	ksort( $value, SORT_STRING );
+
+	return $value;
+}
+
+function html_api_fuzz_codex_json_encode( $value ): string {
+	$json = json_encode(
+		html_api_fuzz_codex_sort_json_value( $value ),
+		JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE
+	);
+	if ( false === $json ) {
+		throw new RuntimeException( 'JSON encode failed: ' . json_last_error_msg() );
+	}
+
+	return $json;
+}
+
+function html_api_fuzz_codex_read_json( string $path, int $retries = 5 ) {
+	$last_error = null;
+	for ( $i = 0; $i < $retries; ++$i ) {
+		if ( ! file_exists( $path ) ) {
+			return null;
+		}
+
+		$text = @file_get_contents( $path );
+		if ( false === $text ) {
+			throw new RuntimeException( "Could not read JSON file: {$path}" );
+		}
+
+		$value = json_decode( $text, true );
+		if ( JSON_ERROR_NONE === json_last_error() ) {
+			return $value;
+		}
+
+		$last_error = json_last_error_msg();
+		usleep( 200000 );
+	}
+
+	throw new RuntimeException( "Could not parse JSON {$path}: {$last_error}" );
+}
+
+function html_api_fuzz_codex_exit_code_from_done( $done ): int {
+	if ( ! is_array( $done ) || ! array_key_exists( 'exitCode', $done ) || ! is_numeric( $done['exitCode'] ) ) {
+		return 1;
+	}
+
+	return (int) $done['exitCode'];
+}
+
+function html_api_fuzz_codex_write_json( string $path, $value ): void {
+	$dir = dirname( $path );
+	\HtmlApiFuzz\ensure_dir( $dir );
+	$tmp = tempnam( $dir, basename( $path ) . '.tmp.' );
+	if ( false === $tmp ) {
+		throw new RuntimeException( "Could not create temporary JSON file for: {$path}" );
+	}
+	$json = html_api_fuzz_codex_json_encode( $value ) . "\n";
+	if ( false === file_put_contents( $tmp, $json ) ) {
+		@unlink( $tmp );
+		throw new RuntimeException( "Could not write temporary JSON file: {$tmp}" );
+	}
+	if ( ! rename( $tmp, $path ) ) {
+		@unlink( $tmp );
+		throw new RuntimeException( "Could not replace JSON file: {$path}" );
+	}
+}
+
+function html_api_fuzz_codex_archive_path( string $dir, string $prefix ): string {
+	$suffix = (string) time();
+	$path   = html_api_fuzz_codex_path_join( $dir, "{$prefix}.{$suffix}.json" );
+	for ( $i = 1; file_exists( $path ); ++$i ) {
+		$path = html_api_fuzz_codex_path_join( $dir, "{$prefix}.{$suffix}.{$i}.json" );
+	}
+
+	return $path;
+}
+
+function html_api_fuzz_codex_process_alive( int $pid ): bool {
+	if ( $pid < 1 || ! function_exists( 'posix_kill' ) ) {
+		return false;
+	}
+
+	if ( @posix_kill( $pid, 0 ) ) {
+		return true;
+	}
+
+	if ( function_exists( 'posix_get_last_error' ) && defined( 'POSIX_EPERM' ) ) {
+		return POSIX_EPERM === posix_get_last_error();
+	}
+
+	return false;
+}
+
+function html_api_fuzz_codex_newest_minimize_dir( array $signature ): ?string {
+	$output_dir = $signature['minimizeOutputDir'] ?? null;
+	if ( is_string( $output_dir ) && '' !== $output_dir && is_dir( $output_dir ) ) {
+		return $output_dir;
+	}
+
+	$result_path = $signature['minimizeResult'] ?? null;
+	if ( is_string( $result_path ) && '' !== $result_path && is_file( $result_path ) ) {
+		return dirname( $result_path );
+	}
+
+	return null;
+}
+
+function html_api_fuzz_codex_claim_signature( string $diagnostics_dir, string $signature_hash, array $signature, int $stale_after_seconds ): ?string {
+	$signature_dir = html_api_fuzz_codex_path_join( $diagnostics_dir, html_api_fuzz_codex_signature_dir_name( $signature_hash ) );
+	$done_path     = html_api_fuzz_codex_path_join( $signature_dir, 'done.json' );
+	$claim_path    = html_api_fuzz_codex_path_join( $signature_dir, 'claim.json' );
+
+	if ( is_file( $done_path ) ) {
+		$done      = html_api_fuzz_codex_read_json( $done_path );
+		$exit_code = html_api_fuzz_codex_exit_code_from_done( $done );
+		if ( 0 === $exit_code ) {
+			return null;
+		}
+
+		if ( ! rename( $done_path, html_api_fuzz_codex_archive_path( $signature_dir, 'done.failed' ) ) && is_file( $done_path ) ) {
+			throw new RuntimeException( "Could not archive failed done file: {$done_path}" );
+		}
+		if ( is_file( $claim_path ) && ! rename( $claim_path, html_api_fuzz_codex_archive_path( $signature_dir, 'claim.failed' ) ) && is_file( $claim_path ) ) {
+			throw new RuntimeException( "Could not archive failed claim file: {$claim_path}" );
+		}
+	}
+
+	if ( is_file( $claim_path ) ) {
+		$claim      = html_api_fuzz_codex_read_json( $claim_path ) ?: array();
+		$pid        = (int) ( $claim['pid'] ?? 0 );
+		$claimed_at = (float) ( $claim['claimedAtUnix'] ?? 0 );
+		$stale      = microtime( true ) - $claimed_at > $stale_after_seconds;
+		if ( ! $stale ) {
+			if ( $pid > 0 && html_api_fuzz_codex_process_alive( $pid ) ) {
+				return null;
+			}
+			return null;
+		}
+		if ( ! rename( $claim_path, html_api_fuzz_codex_archive_path( $signature_dir, 'claim.stale' ) ) && is_file( $claim_path ) ) {
+			throw new RuntimeException( "Could not archive stale claim file: {$claim_path}" );
+		}
+	}
+
+	\HtmlApiFuzz\ensure_dir( $signature_dir );
+	$claim = array(
+		'schemaVersion'    => 1,
+		'kind'             => 'html-api-fuzz-codex-claim',
+		'hash'             => $signature_hash,
+		'pid'              => getmypid(),
+		'claimedAt'        => html_api_fuzz_codex_now_iso_z(),
+		'claimedAtUnix'    => microtime( true ),
+		'failureClass'     => $signature['failureClass'] ?? null,
+		'sourceReplayPath' => $signature['replayPath'] ?? null,
+		'sourceResultPath' => $signature['resultPath'] ?? null,
+	);
+
+	$handle = @fopen( $claim_path, 'xb' );
+	if ( false === $handle ) {
+		if ( is_file( $claim_path ) ) {
+			return null;
+		}
+		throw new RuntimeException( "Could not create claim file: {$claim_path}" );
+	}
+
+	$bytes = html_api_fuzz_codex_json_encode( $claim ) . "\n";
+	$write = fwrite( $handle, $bytes );
+	fclose( $handle );
+	if ( false === $write || $write !== strlen( $bytes ) ) {
+		throw new RuntimeException( "Could not write claim file: {$claim_path}" );
+	}
+
+	return $signature_dir;
+}
+
+function html_api_fuzz_codex_existing_paths( array $paths, array $allowed_roots ): array {
+	$existing = array();
+	foreach ( $paths as $path ) {
+		if ( ! is_string( $path ) || '' === $path || html_api_fuzz_codex_path_has_control_chars( $path ) || ! file_exists( $path ) ) {
+			continue;
+		}
+
+		foreach ( $allowed_roots as $root ) {
+			if ( html_api_fuzz_codex_path_is_under( $path, $root ) ) {
+				$real = realpath( $path );
+				if ( false !== $real ) {
+					$existing[] = $real;
+				}
+				continue 2;
+			}
+		}
+	}
+
+	return array_values( array_unique( $existing ) );
+}
+
+function html_api_fuzz_codex_prompt_for_signature( array $args, string $signature_hash, array $signature ): string {
+	if ( ! html_api_fuzz_codex_valid_signature_hash( $signature_hash ) ) {
+		throw new InvalidArgumentException( 'Unsafe signature hash: ' . $signature_hash );
+	}
+
+	$signature_dir_name   = html_api_fuzz_codex_signature_dir_name( $signature_hash );
+	$signature_triage_dir = html_api_fuzz_codex_path_join( $args['triageDir'], 'signatures', $signature_dir_name );
+	$minimize_dir         = html_api_fuzz_codex_newest_minimize_dir( $signature );
+	$minimized_dir        = null === $minimize_dir ? null : html_api_fuzz_codex_path_join( $minimize_dir, 'minimized' );
+	$report_path          = html_api_fuzz_codex_path_join( $args['diagnosticsDir'], $signature_dir_name, 'report.md' );
+
+	$paths = array(
+		html_api_fuzz_codex_path_join( $signature_triage_dir, 'failure.json' ),
+		is_string( $signature['minimizeResult'] ?? null ) ? $signature['minimizeResult'] : null,
+		null === $minimized_dir ? null : html_api_fuzz_codex_path_join( $minimized_dir, 'result.json' ),
+		null === $minimized_dir ? null : html_api_fuzz_codex_path_join( $minimized_dir, 'replay.json' ),
+		null === $minimized_dir ? null : html_api_fuzz_codex_path_join( $minimized_dir, 'wordpress-tree.txt' ),
+		null === $minimized_dir ? null : html_api_fuzz_codex_path_join( $minimized_dir, 'dom-tree.txt' ),
+		is_string( $signature['replayPath'] ?? null ) ? $signature['replayPath'] : null,
+		is_string( $signature['resultPath'] ?? null ) ? $signature['resultPath'] : null,
+	);
+	$existing_paths = html_api_fuzz_codex_existing_paths( $paths, html_api_fuzz_codex_allowed_artifact_roots( $args ) );
+
+	if ( 'classify' === $args['mode'] ) {
+		$fix_policy = 'This is classify-only mode. Do not edit files. Do not run write commands. Return the Markdown report as your final response; the Codex CLI will save it. If you believe a fix is needed, describe the smallest fix and the focused verification.';
+	} else {
+		$fix_policy = 'You may implement the smallest justified fix. Preserve the minimized replay and run focused verification.';
+	}
+
+	$artifact_lines       = implode( "\n", array_map( static fn( string $path ): string => '- ' . $path, $existing_paths ) );
+	$classification_lines = implode( "\n", array_map( static fn( string $classification ): string => '- ' . $classification, HTML_API_FUZZ_CODEX_CLASSIFICATIONS ) );
+	$failure_class        = html_api_fuzz_codex_safe_prompt_line( $signature['failureClass'] ?? '', 'failureClass' );
+	$triage_kind          = html_api_fuzz_codex_safe_prompt_line( $signature['triageKind'] ?? 'failure', 'triageKind' );
+	$oracle_type          = html_api_fuzz_codex_safe_prompt_line( $signature['oracleFindingType'] ?? '', 'oracleFindingType' );
+	$suspected_owner      = html_api_fuzz_codex_safe_prompt_line( $signature['suspectedOwner'] ?? '', 'suspectedOwner' );
+	$upstream_issue       = html_api_fuzz_codex_safe_prompt_line( $signature['upstreamIssueUrl'] ?? '', 'upstreamIssueUrl' );
+
+	return <<<PROMPT
+Diagnose HTML API fuzz signature {$signature_hash}.
+
+Repository root:
+{$args['repoRoot']}
+
+Diagnostics report path:
+{$report_path}
+
+Failure class from watcher:
+{$failure_class}
+
+Triage kind:
+{$triage_kind}
+
+Oracle finding type:
+{$oracle_type}
+
+Suspected owner:
+{$suspected_owner}
+
+Upstream issue:
+{$upstream_issue}
+
+Mode:
+{$args['mode']}
+
+Policy:
+{$fix_policy}
+
+Read these artifacts first:
+{$artifact_lines}
+
+Then inspect relevant source under:
+- {$args['repoRoot']}/tools/html-api-fuzz
+- {$args['repoRoot']}/src/wp-includes/html-api
+- {$args['repoRoot']}/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
+
+Classify the signature as exactly one of:
+{$classification_lines}
+
+Return a concise Markdown report with:
+- signature hash
+- classification
+- minimized input summary
+- evidence from the rendered trees/result JSON
+- suspected owner and whether this is actionable
+- recommended next step
+
+If this is a likely WordPress HTML API bug or harness bug, include a proposed focused regression test.
+Do not disturb the running fuzzer.
+
+PROMPT;
+}
+
+function html_api_fuzz_codex_write_stream( $stream, string $bytes ): void {
+	$offset = 0;
+	$length = strlen( $bytes );
+	while ( $offset < $length ) {
+		$written = fwrite( $stream, substr( $bytes, $offset ) );
+		if ( false === $written || 0 === $written ) {
+			throw new RuntimeException( 'Could not write prompt to Codex process.' );
+		}
+		$offset += $written;
+	}
+}
+
+function html_api_fuzz_codex_command( array $args, string $report_path ): array {
+	$command = array(
+		$args['codexBin'],
+		'-a',
+		'never',
+	);
+	if ( null !== $args['model'] ) {
+		$command[] = '--model';
+		$command[] = $args['model'];
+	}
+
+	return array_merge(
+		$command,
+		array(
+			'exec',
+			'-C',
+			$args['repoRoot'],
+			'-s',
+			$args['sandbox'],
+			'--color',
+			'never',
+			'--output-last-message',
+			$report_path,
+			'--json',
+			'-',
+		)
+	);
+}
+
+function html_api_fuzz_codex_setsid_bin(): ?string {
+	foreach ( array( '/usr/bin/setsid', '/bin/setsid', '/usr/local/bin/setsid', '/opt/homebrew/bin/setsid' ) as $path ) {
+		if ( is_executable( $path ) ) {
+			return $path;
+		}
+	}
+
+	return null;
+}
+
+function html_api_fuzz_codex_launch( array $args, string $signature_hash, array $signature, string $signature_dir ): array {
+	$report_path = html_api_fuzz_codex_path_join( $signature_dir, 'report.md' );
+	$prompt_path = html_api_fuzz_codex_path_join( $signature_dir, 'prompt.md' );
+	$stdout_path = html_api_fuzz_codex_path_join( $signature_dir, 'codex.jsonl' );
+	$stderr_path = html_api_fuzz_codex_path_join( $signature_dir, 'codex.stderr.log' );
+	$done_path   = html_api_fuzz_codex_path_join( $signature_dir, 'done.json' );
+
+	$prompt = html_api_fuzz_codex_prompt_for_signature( $args, $signature_hash, $signature );
+	if ( false === file_put_contents( $prompt_path, $prompt ) ) {
+		throw new RuntimeException( "Could not write prompt file: {$prompt_path}" );
+	}
+
+	$command             = html_api_fuzz_codex_command( $args, $report_path );
+	$setsid_bin          = html_api_fuzz_codex_setsid_bin();
+	$process_command     = null === $setsid_bin ? $command : array_merge( array( $setsid_bin ), $command );
+	$uses_process_group  = null !== $setsid_bin;
+	$metadata = array(
+		'schemaVersion'  => 1,
+		'kind'           => 'html-api-fuzz-codex-job',
+		'hash'           => $signature_hash,
+		'startedAt'      => html_api_fuzz_codex_now_iso_z(),
+		'startedAtUnix'  => microtime( true ),
+		'mode'           => $args['mode'],
+		'sandbox'        => $args['sandbox'],
+		'command'        => $command,
+		'processCommand' => $process_command,
+		'promptPath'     => $prompt_path,
+		'stdoutPath'     => $stdout_path,
+		'stderrPath'     => $stderr_path,
+		'reportPath'     => $report_path,
+		'donePath'       => $done_path,
+	);
+	html_api_fuzz_codex_write_json( html_api_fuzz_codex_path_join( $signature_dir, 'job.json' ), $metadata );
+
+	$descriptor_spec = array(
+		0 => array( 'pipe', 'r' ),
+		1 => array( 'file', $stdout_path, 'w' ),
+		2 => array( 'file', $stderr_path, 'w' ),
+	);
+	$process = proc_open( $process_command, $descriptor_spec, $pipes, $args['repoRoot'] );
+	if ( ! is_resource( $process ) ) {
+		throw new RuntimeException( 'Could not start Codex subprocess.' );
+	}
+
+	try {
+		html_api_fuzz_codex_write_stream( $pipes[0], $prompt );
+		fclose( $pipes[0] );
+	} catch ( Throwable $error ) {
+		if ( is_resource( $pipes[0] ) ) {
+			fclose( $pipes[0] );
+		}
+		proc_terminate( $process );
+		proc_close( $process );
+		throw $error;
+	}
+	$status = proc_get_status( $process );
+
+	return array(
+		'process'         => $process,
+		'pid'             => (int) ( $status['pid'] ?? 0 ),
+		'processGroupPid' => $uses_process_group ? (int) ( $status['pid'] ?? 0 ) : null,
+		'knownExitCode'   => ( ! ( $status['running'] ?? false ) && array_key_exists( 'exitcode', $status ) && -1 !== $status['exitcode'] ) ? (int) $status['exitcode'] : null,
+		'signatureDir'    => $signature_dir,
+		'hash'            => $signature_hash,
+	);
+}
+
+function html_api_fuzz_codex_poll_exit_code( array &$job ): ?int {
+	if ( null !== ( $job['knownExitCode'] ?? null ) ) {
+		$exit_code = (int) $job['knownExitCode'];
+		proc_close( $job['process'] );
+		return $exit_code;
+	}
+
+	$status = proc_get_status( $job['process'] );
+	if ( $status['running'] ?? false ) {
+		return null;
+	}
+
+	$exit_code = null;
+	if ( array_key_exists( 'exitcode', $status ) && -1 !== $status['exitcode'] ) {
+		$exit_code = (int) $status['exitcode'];
+	} elseif ( array_key_exists( 'cached_exitcode', $status ) && false !== $status['cached_exitcode'] ) {
+		$exit_code = (int) $status['cached_exitcode'];
+	}
+
+	$closed_code = proc_close( $job['process'] );
+	if ( null === $exit_code ) {
+		$exit_code = (int) $closed_code;
+	}
+
+	return $exit_code;
+}
+
+function html_api_fuzz_codex_mark_done( array $job, int $exit_code, array $extra = array() ): void {
+	$signature_dir  = $job['signatureDir'];
+	$signature_hash = $job['hash'];
+	$done = array_merge(
+		array(
+		'schemaVersion'   => 1,
+		'kind'            => 'html-api-fuzz-codex-done',
+		'hash'            => $signature_hash,
+		'finishedAt'      => html_api_fuzz_codex_now_iso_z(),
+		'finishedAtUnix' => microtime( true ),
+		'exitCode'        => $exit_code,
+		'reportPath'      => html_api_fuzz_codex_path_join( $signature_dir, 'report.md' ),
+		'stdoutPath'      => html_api_fuzz_codex_path_join( $signature_dir, 'codex.jsonl' ),
+		'stderrPath'      => html_api_fuzz_codex_path_join( $signature_dir, 'codex.stderr.log' ),
+		),
+		$extra
+	);
+	html_api_fuzz_codex_write_json( html_api_fuzz_codex_path_join( $signature_dir, 'done.json' ), $done );
+}
+
+function html_api_fuzz_codex_mark_launch_failed( string $signature_dir, string $signature_hash, Throwable $error ): void {
+	html_api_fuzz_codex_mark_done(
+		array(
+			'signatureDir' => $signature_dir,
+			'hash'         => $signature_hash,
+		),
+		1,
+		array(
+			'launchFailed' => true,
+			'errorClass'   => get_class( $error ),
+			'errorMessage' => $error->getMessage(),
+		)
+	);
+}
+
+function html_api_fuzz_codex_load_candidate_signatures( array $state ): array {
+	$signatures = $state['signatures'] ?? array();
+	if ( ! is_array( $signatures ) ) {
+		return array();
+	}
+
+	ksort( $signatures, SORT_STRING );
+	$candidates = array();
+	foreach ( $signatures as $signature_hash => $signature ) {
+		if ( ! is_array( $signature ) ) {
+			continue;
+		}
+		$signature_hash = (string) $signature_hash;
+		if ( ! html_api_fuzz_codex_valid_signature_hash( $signature_hash ) ) {
+			continue;
+		}
+		if ( 'minimized' !== ( $signature['status'] ?? null ) ) {
+			continue;
+		}
+		if ( null === html_api_fuzz_codex_newest_minimize_dir( $signature ) ) {
+			continue;
+		}
+		$candidates[] = array( $signature_hash, $signature );
+	}
+
+	return $candidates;
+}
+
+function html_api_fuzz_codex_parse_args( array $argv ): array {
+	if ( in_array( '-h', $argv, true ) ) {
+		html_api_fuzz_codex_usage();
+		exit( 0 );
+	}
+
+	$options = \HtmlApiFuzz\parse_cli_options( $argv );
+	html_api_fuzz_codex_validate_cli_options( $options );
+	if ( \HtmlApiFuzz\option_bool( $options, 'help', false ) ) {
+		html_api_fuzz_codex_usage();
+		exit( 0 );
+	}
+
+	$triage_dir = \HtmlApiFuzz\option_string( $options, 'triage-dir' );
+	if ( null === $triage_dir ) {
+		html_api_fuzz_codex_usage();
+		exit( 1 );
+	}
+
+	$mode = \HtmlApiFuzz\option_string( $options, 'mode', 'classify' );
+	if ( ! in_array( $mode, array( 'classify', 'fix' ), true ) ) {
+		throw new InvalidArgumentException( 'Expected --mode to be classify or fix.' );
+	}
+
+	$sandbox = \HtmlApiFuzz\option_string( $options, 'sandbox', 'fix' === $mode ? 'workspace-write' : 'read-only' );
+	if ( ! in_array( $sandbox, array( 'read-only', 'workspace-write', 'danger-full-access' ), true ) ) {
+		throw new InvalidArgumentException( 'Expected --sandbox to be read-only, workspace-write, or danger-full-access.' );
+	}
+	if ( 'fix' === $mode && 'read-only' === $sandbox ) {
+		throw new InvalidArgumentException( 'Expected --mode fix to use a writable sandbox.' );
+	}
+
+	$repo_root       = html_api_fuzz_codex_require_dir( \HtmlApiFuzz\option_string( $options, 'repo-root', getcwd() ?: '.' ), 'Repo root' );
+	$triage_dir     = html_api_fuzz_codex_require_dir( $triage_dir, 'Triage directory' );
+	$diagnostics_dir = \HtmlApiFuzz\option_string( $options, 'diagnostics-dir' );
+	if ( null === $diagnostics_dir ) {
+		$diagnostics_dir = html_api_fuzz_codex_path_join( dirname( $triage_dir ), 'diagnostics' );
+	}
+
+	$interval_seconds      = \HtmlApiFuzz\option_float( $options, 'interval-seconds', 120.0 );
+	$max_concurrent        = \HtmlApiFuzz\option_int( $options, 'max-concurrent', 1 );
+	$max_launch_per_pass   = \HtmlApiFuzz\option_int( $options, 'max-launch-per-pass', 1 );
+	$stale_after_seconds   = \HtmlApiFuzz\option_int( $options, 'stale-after-seconds', 6 * 60 * 60 );
+	if ( $interval_seconds < 0 ) {
+		throw new InvalidArgumentException( 'Expected --interval-seconds to be at least 0.' );
+	}
+	if ( $max_concurrent < 0 ) {
+		throw new InvalidArgumentException( 'Expected --max-concurrent to be at least 0.' );
+	}
+	if ( $max_launch_per_pass < 0 ) {
+		throw new InvalidArgumentException( 'Expected --max-launch-per-pass to be at least 0.' );
+	}
+	if ( $stale_after_seconds < 0 ) {
+		throw new InvalidArgumentException( 'Expected --stale-after-seconds to be at least 0.' );
+	}
+
+	return array(
+		'triageDir'           => $triage_dir,
+		'diagnosticsDir'      => html_api_fuzz_codex_normalize_path( $diagnostics_dir ),
+		'repoRoot'            => $repo_root,
+		'codexBin'            => \HtmlApiFuzz\option_string( $options, 'codex-bin', 'codex' ),
+		'model'               => \HtmlApiFuzz\option_string( $options, 'model' ),
+		'intervalSeconds'     => $interval_seconds,
+		'maxConcurrent'       => $max_concurrent,
+		'maxLaunchPerPass'    => $max_launch_per_pass,
+		'staleAfterSeconds'   => $stale_after_seconds,
+		'once'                => \HtmlApiFuzz\option_bool( $options, 'once', false ),
+		'mode'                => $mode,
+		'sandbox'             => $sandbox,
+	);
+}
+
+function html_api_fuzz_codex_install_signal_handlers( bool &$stopping ): void {
+	if ( ! function_exists( 'pcntl_signal' ) ) {
+		throw new RuntimeException( 'The pcntl extension is required for signal-safe Codex job orchestration.' );
+	}
+
+	if ( function_exists( 'pcntl_async_signals' ) ) {
+		pcntl_async_signals( true );
+	}
+	if ( defined( 'SIGTERM' ) ) {
+		pcntl_signal(
+			SIGTERM,
+			static function () use ( &$stopping ): void {
+				$stopping = true;
+			}
+		);
+	}
+	if ( defined( 'SIGINT' ) ) {
+		pcntl_signal(
+			SIGINT,
+			static function () use ( &$stopping ): void {
+				$stopping = true;
+			}
+		);
+	}
+}
+
+function html_api_fuzz_codex_dispatch_signals(): void {
+	if ( function_exists( 'pcntl_signal_dispatch' ) ) {
+		pcntl_signal_dispatch();
+	}
+}
+
+function html_api_fuzz_codex_sleep( float $seconds, bool &$stopping ): void {
+	$deadline = microtime( true ) + max( 0.0, $seconds );
+	do {
+		html_api_fuzz_codex_dispatch_signals();
+		if ( $stopping ) {
+			return;
+		}
+
+		$remaining = $deadline - microtime( true );
+		if ( $remaining <= 0 ) {
+			return;
+		}
+
+		usleep( (int) min( 250000, max( 1000, round( $remaining * 1000000 ) ) ) );
+	} while ( true );
+}
+
+function html_api_fuzz_codex_stop_job( array &$job ): void {
+	$status = proc_get_status( $job['process'] );
+	if ( $status['running'] ?? false ) {
+		if ( null !== ( $job['processGroupPid'] ?? null ) && function_exists( 'posix_kill' ) && defined( 'SIGTERM' ) ) {
+			@posix_kill( -1 * (int) $job['processGroupPid'], SIGTERM );
+		} else {
+			proc_terminate( $job['process'] );
+		}
+		usleep( 200000 );
+		$status = proc_get_status( $job['process'] );
+		if ( $status['running'] ?? false ) {
+			if ( null !== ( $job['processGroupPid'] ?? null ) && function_exists( 'posix_kill' ) ) {
+				@posix_kill( -1 * (int) $job['processGroupPid'], 9 );
+			} else {
+				proc_terminate( $job['process'], 9 );
+			}
+		}
+	}
+	proc_close( $job['process'] );
+	html_api_fuzz_codex_mark_done(
+		$job,
+		143,
+		array(
+			'interrupted' => true,
+		)
+	);
+}
+
+function html_api_fuzz_codex_main( array $argv ): int {
+	$args       = html_api_fuzz_codex_parse_args( $argv );
+	$state_path = html_api_fuzz_codex_path_join( $args['triageDir'], 'state.json' );
+	$running    = array();
+	$stopping   = false;
+	$scanned    = false;
+
+	html_api_fuzz_codex_install_signal_handlers( $stopping );
+	\HtmlApiFuzz\ensure_dir( $args['diagnosticsDir'] );
+	html_api_fuzz_codex_write_json(
+		html_api_fuzz_codex_path_join( $args['diagnosticsDir'], 'orchestrator-state.json' ),
+		array(
+			'schemaVersion' => 1,
+			'kind'          => 'html-api-fuzz-codex-orchestrator-state',
+			'startedAt'     => html_api_fuzz_codex_now_iso_z(),
+			'triageDir'     => $args['triageDir'],
+			'repoRoot'      => $args['repoRoot'],
+			'mode'          => $args['mode'],
+			'sandbox'       => $args['sandbox'],
+		)
+	);
+
+	try {
+		while ( ! $stopping ) {
+			html_api_fuzz_codex_dispatch_signals();
+			foreach ( array_keys( $running ) as $key ) {
+				$exit_code = html_api_fuzz_codex_poll_exit_code( $running[ $key ] );
+				if ( null === $exit_code ) {
+					continue;
+				}
+
+				html_api_fuzz_codex_mark_done( $running[ $key ], $exit_code );
+				echo 'completed ' . $running[ $key ]['hash'] . ' exit=' . $exit_code . "\n";
+				unset( $running[ $key ] );
+			}
+
+			/*
+			 * Graceful stop: a STOP file in the run directory stops new
+			 * launches; running jobs finish before the orchestrator exits.
+			 * The run directory comes from the watcher state when available
+			 * (the triage dir may live outside the run dir), matching how
+			 * launches resolve it below.
+			 */
+			$stop_run_dir = dirname( $args['triageDir'] );
+			$stop_state   = html_api_fuzz_codex_read_json( $state_path );
+			if ( is_array( $stop_state ) && is_string( $stop_state['runDir'] ?? null ) && ! html_api_fuzz_codex_path_has_control_chars( $stop_state['runDir'] ) ) {
+				$stop_run_dir = html_api_fuzz_codex_normalize_path( $stop_state['runDir'] );
+			}
+			$stop_file_present = is_file( html_api_fuzz_codex_path_join( $stop_run_dir, 'STOP' ) );
+			if ( $stop_file_present && empty( $running ) ) {
+				echo "stop requested; exiting\n";
+				break;
+			}
+
+			$can_scan = ( ! $args['once'] || ! $scanned ) && ! $stop_file_present;
+			if ( $can_scan ) {
+				$available = max( 0, $args['maxConcurrent'] - count( $running ) );
+				$launched  = 0;
+				if ( $available > 0 ) {
+					$state = html_api_fuzz_codex_read_json( $state_path );
+					if ( is_array( $state ) ) {
+						$launch_args = $args;
+						if ( is_string( $state['runDir'] ?? null ) && ! html_api_fuzz_codex_path_has_control_chars( $state['runDir'] ) ) {
+							$launch_args['runDir'] = html_api_fuzz_codex_normalize_path( $state['runDir'] );
+						} else {
+							$launch_args['runDir'] = dirname( $args['triageDir'] );
+						}
+
+						foreach ( html_api_fuzz_codex_load_candidate_signatures( $state ) as $candidate ) {
+							if ( $launched >= $available || $launched >= $args['maxLaunchPerPass'] ) {
+								break;
+							}
+							list( $signature_hash, $signature ) = $candidate;
+							$signature_dir = html_api_fuzz_codex_claim_signature( $args['diagnosticsDir'], $signature_hash, $signature, $args['staleAfterSeconds'] );
+							if ( null === $signature_dir ) {
+								continue;
+							}
+
+							try {
+								$job       = html_api_fuzz_codex_launch( $launch_args, $signature_hash, $signature, $signature_dir );
+								$running[] = $job;
+								++$launched;
+								echo 'launched ' . $signature_hash . ' pid=' . $job['pid'] . "\n";
+							} catch ( Throwable $error ) {
+								html_api_fuzz_codex_mark_launch_failed( $signature_dir, $signature_hash, $error );
+								echo 'launch failed ' . $signature_hash . ' error=' . $error->getMessage() . "\n";
+							}
+						}
+					}
+				}
+				$scanned = true;
+			}
+
+			if ( $args['once'] && empty( $running ) ) {
+				break;
+			}
+
+			$sleep_seconds = $args['once'] ? min( 1.0, $args['intervalSeconds'] ) : $args['intervalSeconds'];
+			html_api_fuzz_codex_sleep( $sleep_seconds, $stopping );
+		}
+	} finally {
+		foreach ( array_keys( $running ) as $key ) {
+			html_api_fuzz_codex_stop_job( $running[ $key ] );
+			unset( $running[ $key ] );
+		}
+	}
+
+	return 0;
+}
+
+if ( ! defined( 'HTML_API_FUZZ_CODEX_SELF_TESTING' ) || ! HTML_API_FUZZ_CODEX_SELF_TESTING ) {
+	exit( html_api_fuzz_codex_main( $argv ) );
+}
diff --git a/tools/html-api-fuzz/launcher.php b/tools/html-api-fuzz/launcher.php
new file mode 100755
index 0000000000000..6eadad14e6a53
--- /dev/null
+++ b/tools/html-api-fuzz/launcher.php
@@ -0,0 +1,359 @@
+#!/usr/bin/env php
+<?php
+require_once __DIR__ . '/lib/autoload.php';
+
+function html_api_fuzz_launcher_usage(): void {
+	echo "Usage: php tools/html-api-fuzz/launcher.php [--lanes N] [--output-dir DIR] [--duration-seconds N] [--max-seeds N] [--payload-policy POLICY] [--max-input-bytes N] [--dom-oracle php-dom|lexbor-source] [--lexbor-oracle-bin PATH] [--max-keep-per-signature N] [--keep-all-artifacts] [--watcher] [--triage-oracle-findings]\n";
+	echo "Create OUTPUT_DIR/STOP (see stop.php) to stop all lanes gracefully: each finishes its current batch and exits.\n";
+	echo "--max-keep-per-signature is applied per lane; a signature seen in every lane keeps up to N x lanes exemplar directories.\n";
+	echo "--triage-oracle-findings passes oracle findings to the watcher/minimizer when --watcher is used.\n";
+}
+
+function html_api_fuzz_launcher_validate_generator_options( string $profile, string $mode, string $payload_policy ): void {
+	if ( 'auto' !== $profile && ! in_array( $profile, \HtmlApiFuzz\Generator::profiles(), true ) ) {
+		throw new InvalidArgumentException( 'Unknown generator profile: ' . $profile );
+	}
+	if ( 'auto' !== $mode && ! in_array( $mode, \HtmlApiFuzz\Generator::modes(), true ) ) {
+		throw new InvalidArgumentException( 'Unknown generator mode: ' . $mode );
+	}
+	if ( 'auto' !== $payload_policy && ! in_array( $payload_policy, \HtmlApiFuzz\Generator::payload_policies(), true ) ) {
+		throw new InvalidArgumentException( 'Unknown generator payload policy: ' . $payload_policy );
+	}
+}
+
+function html_api_fuzz_launcher_validate_runtime_options( int $max_seeds, float $duration_seconds, int $timeout_ms, int $max_input_bytes, int $max_tokens, int $max_nodes ): void {
+	if ( $max_seeds < 0 ) {
+		throw new InvalidArgumentException( 'Expected --max-seeds to be at least 0.' );
+	}
+	if ( $duration_seconds < 0 ) {
+		throw new InvalidArgumentException( 'Expected --duration-seconds to be at least 0.' );
+	}
+	if ( $timeout_ms < 1 ) {
+		throw new InvalidArgumentException( 'Expected --timeout-ms to be at least 1.' );
+	}
+	if ( $max_input_bytes < 0 ) {
+		throw new InvalidArgumentException( 'Expected --max-input-bytes to be at least 0.' );
+	}
+	if ( $max_tokens < 1 ) {
+		throw new InvalidArgumentException( 'Expected --max-tokens to be at least 1.' );
+	}
+	if ( $max_nodes < 1 ) {
+		throw new InvalidArgumentException( 'Expected --max-nodes to be at least 1.' );
+	}
+}
+
+function html_api_fuzz_launcher_start_lane( array $command, string $cwd, string $log_path ) {
+	$spec = array(
+		0 => array( 'pipe', 'r' ),
+		1 => array( 'pipe', 'w' ),
+		2 => array( 'pipe', 'w' ),
+	);
+
+	$process = proc_open( $command, $spec, $pipes, $cwd );
+	if ( ! is_resource( $process ) ) {
+		throw new RuntimeException( 'Could not start runner lane.' );
+	}
+
+	fclose( $pipes[0] );
+	stream_set_blocking( $pipes[1], false );
+	stream_set_blocking( $pipes[2], false );
+	\HtmlApiFuzz\ensure_dir( dirname( $log_path ) );
+	file_put_contents( $log_path, '' );
+
+	return array(
+		'process'   => $process,
+		'pipes'     => $pipes,
+		'logPath'   => $log_path,
+		'startedAt' => microtime( true ),
+		'stdout'    => '',
+		'stderr'    => '',
+	);
+}
+
+function html_api_fuzz_launcher_drain_lane( array &$lane ): void {
+	$stdout = stream_get_contents( $lane['pipes'][1] );
+	$stderr = stream_get_contents( $lane['pipes'][2] );
+	if ( '' !== $stdout ) {
+		$lane['stdout'] .= $stdout;
+		file_put_contents( $lane['logPath'], $stdout, FILE_APPEND );
+	}
+	if ( '' !== $stderr ) {
+		$lane['stderr'] .= $stderr;
+		file_put_contents( $lane['logPath'], $stderr, FILE_APPEND );
+	}
+}
+
+function html_api_fuzz_launcher_close_lane( array &$lane ): array {
+	html_api_fuzz_launcher_drain_lane( $lane );
+	fclose( $lane['pipes'][1] );
+	fclose( $lane['pipes'][2] );
+	$code = proc_close( $lane['process'] );
+
+	return array(
+		'code'       => $code,
+		'durationMs' => (int) round( ( microtime( true ) - $lane['startedAt'] ) * 1000 ),
+		'stdoutTail' => substr( $lane['stdout'], -2000 ),
+		'stderrTail' => substr( $lane['stderr'], -2000 ),
+	);
+}
+
+$options = \HtmlApiFuzz\parse_cli_options( $argv );
+if ( \HtmlApiFuzz\option_bool( $options, 'help', false ) || \HtmlApiFuzz\option_bool( $options, 'h', false ) ) {
+	html_api_fuzz_launcher_usage();
+	exit( 0 );
+}
+
+$repo_root        = \HtmlApiFuzz\repo_root();
+$output_dir       = \HtmlApiFuzz\option_string( $options, 'output-dir', $repo_root . '/artifacts/html-api-fuzz/launch-' . \HtmlApiFuzz\timestamp() );
+$lanes            = max( 1, \HtmlApiFuzz\option_int( $options, 'lanes', 2 ) );
+$start_seed       = \HtmlApiFuzz\option_int( $options, 'start-seed', 1 );
+$max_seeds        = \HtmlApiFuzz\option_int( $options, 'max-seeds', 0 );
+$duration_seconds = \HtmlApiFuzz\option_float( $options, 'duration-seconds', 60.0 );
+$timeout_ms       = \HtmlApiFuzz\option_int( $options, 'timeout-ms', 2500 );
+$profile          = \HtmlApiFuzz\option_string( $options, 'profile', 'auto' );
+$mode             = \HtmlApiFuzz\option_string( $options, 'mode', 'auto' );
+$payload_policy   = \HtmlApiFuzz\option_string( $options, 'payload-policy', 'auto' );
+$max_input_bytes  = \HtmlApiFuzz\option_int( $options, 'max-input-bytes', 0 );
+$max_tokens       = \HtmlApiFuzz\option_int( $options, 'max-tokens', 2000 );
+$max_nodes        = \HtmlApiFuzz\option_int( $options, 'max-nodes', 3000 );
+$stop_on_failure  = \HtmlApiFuzz\option_bool( $options, 'stop-on-failure', false );
+$fail_unsupported = \HtmlApiFuzz\option_bool( $options, 'fail-unsupported', false );
+$run_watcher      = \HtmlApiFuzz\option_bool( $options, 'watcher', false );
+$triage_oracle_findings = \HtmlApiFuzz\option_bool( $options, 'triage-oracle-findings', false );
+$max_keep_per_signature = \HtmlApiFuzz\option_int( $options, 'max-keep-per-signature', 5 );
+$keep_all_artifacts     = \HtmlApiFuzz\option_bool( $options, 'keep-all-artifacts', false );
+if ( $max_keep_per_signature < 1 ) {
+	throw new InvalidArgumentException( 'Expected --max-keep-per-signature to be at least 1.' );
+}
+html_api_fuzz_launcher_validate_generator_options( $profile, $mode, $payload_policy );
+html_api_fuzz_launcher_validate_runtime_options( $max_seeds, $duration_seconds, $timeout_ms, $max_input_bytes, $max_tokens, $max_nodes );
+
+if ( is_file( $output_dir . '/STOP' ) ) {
+	// A leftover stop request must not silently turn this launch into a
+	// 0-seed success; starting again is an explicit operator decision.
+	fwrite( STDERR, "Stop file already exists: {$output_dir}/STOP\nRemove it to start a run in this directory.\n" );
+	exit( 1 );
+}
+
+\HtmlApiFuzz\ensure_dir( $output_dir );
+$events_path = $output_dir . '/events.ndjson';
+$state_path  = $output_dir . '/launcher-state.json';
+$git_metadata = null === \HtmlApiFuzz\option_string( $options, 'git-metadata-base64', null )
+	? \HtmlApiFuzz\git_metadata()
+	: \HtmlApiFuzz\git_metadata_from_base64( \HtmlApiFuzz\option_string( $options, 'git-metadata-base64' ) );
+$git_metadata_base64 = \HtmlApiFuzz\git_metadata_base64( $git_metadata );
+$oracle_renderer      = \HtmlApiFuzz\OracleRenderer::from_options( $options );
+$oracle_metadata      = $oracle_renderer->metadata();
+$oracle_worker_args   = $oracle_renderer->worker_args();
+
+$state = array(
+	'schemaVersion' => 1,
+	'kind'          => 'html-api-fuzz-launcher-state',
+	'startedAt'     => gmdate( 'c' ),
+	'updatedAt'     => gmdate( 'c' ),
+	'outputDir'     => $output_dir,
+	'lanes'         => $lanes,
+	'startSeed'     => $start_seed,
+	'seedStride'    => $lanes,
+	'profile'       => $profile,
+	'mode'          => $mode,
+	'payloadPolicy' => $payload_policy,
+	'maxInputBytes' => $max_input_bytes > 0 ? $max_input_bytes : null,
+	'git'           => $git_metadata,
+	'oracle'        => $oracle_metadata,
+	'finished'      => false,
+	'laneResults'   => array(),
+);
+\HtmlApiFuzz\write_json_file( $state_path, $state );
+\HtmlApiFuzz\append_ndjson( $events_path, array( 'at' => gmdate( 'c' ), 'kind' => 'launcher-start', 'outputDir' => $output_dir, 'lanes' => $lanes, 'git' => $git_metadata, 'oracle' => $oracle_metadata ) );
+
+$running = array();
+for ( $i = 0; $i < $lanes; ++$i ) {
+	$lane_max_seeds = 0;
+	if ( 0 !== $max_seeds ) {
+		$lane_max_seeds = intdiv( $max_seeds, $lanes ) + ( $i < ( $max_seeds % $lanes ) ? 1 : 0 );
+		if ( 0 === $lane_max_seeds ) {
+			$lane_dir = $output_dir . '/lane-' . str_pad( (string) $i, 2, '0', STR_PAD_LEFT );
+			$state['laneResults'][ $i ] = array(
+				'lane'      => $i,
+				'status'    => 'skipped',
+				'outputDir' => $lane_dir,
+				'reason'    => 'no seeds assigned',
+			);
+			continue;
+		}
+	}
+
+	$lane_dir = $output_dir . '/lane-' . str_pad( (string) $i, 2, '0', STR_PAD_LEFT );
+	$command  = array(
+		PHP_BINARY,
+		__DIR__ . '/runner.php',
+		'--output-dir',
+		$lane_dir,
+		'--start-seed',
+		(string) ( $start_seed + $i ),
+		'--seed-stride',
+		(string) $lanes,
+		'--duration-seconds',
+		(string) $duration_seconds,
+		'--timeout-ms',
+		(string) $timeout_ms,
+		'--profile',
+		$profile,
+		'--mode',
+		$mode,
+		'--payload-policy',
+		$payload_policy,
+		'--max-tokens',
+		(string) $max_tokens,
+		'--max-nodes',
+		(string) $max_nodes,
+		'--git-metadata-base64',
+		$git_metadata_base64,
+		'--max-keep-per-signature',
+		(string) $max_keep_per_signature,
+		'--stop-file',
+		$output_dir . '/STOP',
+	);
+	foreach ( $oracle_worker_args as $arg ) {
+		$command[] = $arg;
+	}
+
+	if ( 0 !== $max_seeds ) {
+		$command[] = '--max-seeds';
+		$command[] = (string) $lane_max_seeds;
+	}
+	if ( $keep_all_artifacts ) {
+		$command[] = '--keep-all-artifacts';
+	}
+	if ( $stop_on_failure ) {
+		$command[] = '--stop-on-failure';
+	}
+	if ( $fail_unsupported ) {
+		$command[] = '--fail-unsupported';
+	}
+	if ( $max_input_bytes > 0 ) {
+		$command[] = '--max-input-bytes';
+		$command[] = (string) $max_input_bytes;
+	}
+
+	$running[ $i ] = html_api_fuzz_launcher_start_lane( $command, $repo_root, $lane_dir . '/runner.stdout.log' );
+	$state['laneResults'][ $i ] = array(
+		'lane'      => $i,
+		'status'    => 'running',
+		'command'   => \HtmlApiFuzz\command_string( $command ),
+		'outputDir' => $lane_dir,
+		'logPath'   => $lane_dir . '/runner.stdout.log',
+	);
+	\HtmlApiFuzz\append_ndjson( $events_path, array( 'at' => gmdate( 'c' ), 'kind' => 'lane-start', 'lane' => $i, 'outputDir' => $lane_dir, 'oracle' => $oracle_metadata ) );
+}
+\HtmlApiFuzz\write_json_file( $state_path, $state );
+
+while ( $running ) {
+	foreach ( array_keys( $running ) as $lane_id ) {
+		html_api_fuzz_launcher_drain_lane( $running[ $lane_id ] );
+		$status = proc_get_status( $running[ $lane_id ]['process'] );
+		if ( $status['running'] ) {
+			continue;
+		}
+
+		$closed = html_api_fuzz_launcher_close_lane( $running[ $lane_id ] );
+		unset( $running[ $lane_id ] );
+		$lane_dir = $state['laneResults'][ $lane_id ]['outputDir'];
+		$runner_state = \HtmlApiFuzz\read_json_file( $lane_dir . '/state.json' );
+		$state['laneResults'][ $lane_id ] = array_merge(
+			$state['laneResults'][ $lane_id ],
+			array(
+				'status'      => 0 === $closed['code'] ? 'completed' : 'failed',
+				'code'        => $closed['code'],
+				'durationMs'  => $closed['durationMs'],
+				'runnerState' => $runner_state,
+				'stdoutTail'  => $closed['stdoutTail'],
+				'stderrTail'  => $closed['stderrTail'],
+			)
+		);
+		$state['updatedAt'] = gmdate( 'c' );
+		\HtmlApiFuzz\write_json_file( $state_path, $state );
+		\HtmlApiFuzz\append_ndjson( $events_path, array( 'at' => gmdate( 'c' ), 'kind' => 'lane-stop', 'lane' => $lane_id, 'code' => $closed['code'] ) );
+	}
+	usleep( 100000 );
+}
+
+$aggregate = array(
+	'successes'         => 0,
+	'failures'          => 0,
+	'unsupported'       => 0,
+	'oracleParseErrors' => 0,
+	'oracleUnsupported' => 0,
+	'oracleTolerated'   => 0,
+	'oracleFindings'    => 0,
+);
+foreach ( $state['laneResults'] as $lane ) {
+	$runner_state = $lane['runnerState'] ?? array();
+	foreach ( $aggregate as $name => $count ) {
+		$aggregate[ $name ] += (int) ( $runner_state[ $name ] ?? 0 );
+	}
+}
+
+$state['finished']  = true;
+$state['updatedAt'] = gmdate( 'c' );
+$state['aggregate'] = $aggregate;
+\HtmlApiFuzz\write_json_file( $state_path, $state );
+\HtmlApiFuzz\append_ndjson( $events_path, array( 'at' => gmdate( 'c' ), 'kind' => 'launcher-stop', 'aggregate' => $aggregate ) );
+
+$watcher_result = null;
+if ( $run_watcher ) {
+	$triage_dir = $output_dir . '/triage';
+	$minimize_timeout_ms = \HtmlApiFuzz\option_int( $options, 'minimize-timeout-ms', 300000 );
+	$watcher_timeout_ms  = \HtmlApiFuzz\option_int( $options, 'watcher-timeout-ms', max( 600000, $minimize_timeout_ms + 60000 ) );
+	$proc = \HtmlApiFuzz\run_php_process(
+		array_values(
+			array_filter(
+				array(
+			__DIR__ . '/watcher.php',
+			'--run-dir',
+			$output_dir,
+			'--state-dir',
+			$triage_dir,
+			'--once',
+			'--minimize-timeout-ms',
+			(string) $minimize_timeout_ms,
+			'--timeout-ms',
+			(string) $timeout_ms,
+			array_key_exists( 'max-attempts', $options ) ? '--max-attempts' : null,
+			array_key_exists( 'max-attempts', $options ) ? (string) \HtmlApiFuzz\option_int( $options, 'max-attempts', 250 ) : null,
+			array_key_exists( 'max-minimize', $options ) ? '--max-minimize' : null,
+			array_key_exists( 'max-minimize', $options ) ? (string) \HtmlApiFuzz\option_int( $options, 'max-minimize', 0 ) : null,
+			array_key_exists( 'probe-mode', $options ) ? '--probe-mode' : null,
+			array_key_exists( 'probe-mode', $options ) ? \HtmlApiFuzz\option_string( $options, 'probe-mode', 'auto' ) : null,
+			\HtmlApiFuzz\option_bool( $options, 'keep-candidate-artifacts', false ) ? '--keep-candidate-artifacts' : null,
+			\HtmlApiFuzz\option_bool( $options, 'no-minimize', false ) ? '--no-minimize' : null,
+			\HtmlApiFuzz\option_bool( $options, 'any-failure', false ) ? '--any-failure' : null,
+			$triage_oracle_findings ? '--triage-oracle-findings' : null,
+				),
+				static function ( $value ) {
+					return null !== $value;
+				}
+			)
+		),
+		$repo_root,
+		$watcher_timeout_ms,
+		$triage_dir . '/watcher.log'
+	);
+	$watcher_result = array(
+		'code'       => $proc['code'],
+		'timedOut'   => $proc['timedOut'],
+		'durationMs' => $proc['durationMs'],
+		'logPath'    => $proc['logPath'],
+	);
+}
+
+echo \HtmlApiFuzz\json_encode_safe(
+	array(
+		'ok'            => true,
+		'outputDir'     => $output_dir,
+		'statePath'     => $state_path,
+		'aggregate'     => $aggregate,
+		'watcherResult' => $watcher_result,
+	)
+) . "\n";
diff --git a/tools/html-api-fuzz/lib/Corpus.php b/tools/html-api-fuzz/lib/Corpus.php
new file mode 100644
index 0000000000000..005a91dc8a40b
--- /dev/null
+++ b/tools/html-api-fuzz/lib/Corpus.php
@@ -0,0 +1,78 @@
+<?php
+namespace HtmlApiFuzz;
+
+/**
+ * Seed corpus drawn from the html5lib-tests tree-construction suite. These
+ * inputs encode decades of parser edge cases; mutating them explores
+ * neighborhoods that the structural generator's grammar never reaches.
+ */
+class Corpus {
+	private static $entries = null;
+
+	public static function default_directory(): string {
+		return repo_root() . '/tests/phpunit/data/html5lib-tests/tree-construction';
+	}
+
+	/**
+	 * Returns the corpus entries: every #data section from every .dat file,
+	 * sorted deterministically. Cached per process.
+	 */
+	public static function entries( ?string $directory = null ): array {
+		if ( null === $directory && null !== self::$entries ) {
+			return self::$entries;
+		}
+
+		$dir     = $directory ?? self::default_directory();
+		$entries = array();
+		$files = is_dir( $dir ) ? glob( $dir . '/*.dat' ) : false;
+		$files = false === $files ? array() : $files;
+		sort( $files );
+		foreach ( $files as $file ) {
+			$contents = file_get_contents( $file );
+			if ( false === $contents ) {
+				continue;
+			}
+			foreach ( self::parse_dat_data_sections( $contents ) as $data ) {
+				$entries[] = array(
+					'file' => basename( $file ),
+					'data' => $data,
+				);
+			}
+		}
+
+		if ( null === $directory ) {
+			self::$entries = $entries;
+		}
+		return $entries;
+	}
+
+	/**
+	 * Extracts #data sections from html5lib .dat content. A section runs from
+	 * the line after `#data` to the line before the next `#` directive, with
+	 * the trailing newline removed.
+	 */
+	private static function parse_dat_data_sections( string $contents ): array {
+		$sections = array();
+		$lines    = explode( "\n", $contents );
+		$current  = null;
+		foreach ( $lines as $line ) {
+			if ( '#data' === $line ) {
+				$current = array();
+				continue;
+			}
+			if ( null !== $current ) {
+				if ( '' !== $line && '#' === $line[0] ) {
+					$sections[] = implode( "\n", $current );
+					$current    = null;
+					continue;
+				}
+				$current[] = $line;
+			}
+		}
+		if ( null !== $current && array() !== $current ) {
+			$sections[] = implode( "\n", $current );
+		}
+
+		return $sections;
+	}
+}
diff --git a/tools/html-api-fuzz/lib/Generator.php b/tools/html-api-fuzz/lib/Generator.php
new file mode 100644
index 0000000000000..346b783829abf
--- /dev/null
+++ b/tools/html-api-fuzz/lib/Generator.php
@@ -0,0 +1,1306 @@
+<?php
+namespace HtmlApiFuzz;
+
+class Generator {
+	const MODE_FRAGMENT_BODY = 'fragment-body';
+	const MODE_FULL_DOCUMENT = 'full-document';
+
+	private $rng;
+	private $profile;
+	private $payload_policy;
+	private $features = array();
+
+	private $normal_tags = array( 'div', 'p', 'span', 'section', 'article', 'main', 'header', 'footer', 'a', 'b', 'i', 'em', 'strong', 'small', 'mark', 'code', 'pre', 'blockquote', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'h1', 'h2', 'h3', 'button', 'form', 'label', 'select', 'option' );
+	private $void_tags   = array( 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'source', 'track', 'wbr' );
+	private $raw_tags    = array( 'script', 'style', 'iframe', 'noembed', 'noframes', 'xmp', 'noscript' );
+	private $rcdata_tags = array( 'title', 'textarea' );
+	private $named_character_references = array( 'amp', 'AMP', 'lt', 'LT', 'gt', 'GT', 'quot', 'QUOT', 'apos', 'nbsp', 'copy', 'COPY', 'reg', 'not', 'notin', 'notinva', 'AElig', 'NotEqualTilde', 'CounterClockwiseContourIntegral', 'centerdot', 'divideontimes', 'ncaron', 'ngt', 'nGt' );
+	private $legacy_semicolonless_named_character_references = array( 'amp', 'AMP', 'lt', 'LT', 'gt', 'GT', 'quot', 'QUOT', 'nbsp', 'copy', 'COPY', 'reg', 'not', 'AElig' );
+	private $invalid_semicolonless_named_character_references = array( 'apos', 'notin', 'NotEqualTilde', 'CounterClockwiseContourIntegral' );
+	private $unusual_attr_names         = array( 'aria-label', 'data-id', 'data--x', '_', ':colon', '@click', '[data-x]', 'xml:space', 'xmlns:xlink', 'xlink:href', 'on:click', 'data.thing', 'data🙂' );
+	private $unusual_tag_names          = array( 'x-widget', 'x-0', 'a-b-c', 'foo:bar', 'foo.bar', 'foo_bar', 'x🙂', 'MiXeD-Custom' );
+
+	public static function profiles(): array {
+		return array(
+			'balanced',
+			'full-document',
+			'body-fragment',
+			'tables',
+			'template',
+			'select',
+			'foreign-content',
+			'rawtext-rcdata',
+			'text-fragment',
+			'formatting-adoption',
+			'attributes-entities',
+			'comments-doctype-bogus',
+			'deep-nesting',
+			'resource-stress',
+			'incomplete-malformed',
+		);
+	}
+
+	public static function payload_policies(): array {
+		return array(
+			'valid-utf8',
+			'mostly-valid',
+			'ascii-structural',
+			'stress-long',
+		);
+	}
+
+	public static function payload_policy_labels(): array {
+		return array_merge(
+			self::payload_policies(),
+			array(
+				'invalid-byte-heavy',
+			)
+		);
+	}
+
+	public static function modes(): array {
+		return array(
+			self::MODE_FRAGMENT_BODY,
+			self::MODE_FULL_DOCUMENT,
+		);
+	}
+
+	/**
+	 * Fragment parsing context elements. WordPress currently supports only
+	 * `<body>`; the others receive a small probe weight so the fuzzer
+	 * exercises the unsupported-context path today and picks up real
+	 * coverage automatically when create_fragment() gains context support.
+	 */
+	public static function fragment_contexts(): array {
+		return array( 'body', 'div', 'p', 'td', 'tr', 'table', 'caption', 'colgroup', 'select', 'option', 'template', 'title', 'textarea', 'script', 'style', 'svg', 'math' );
+	}
+
+	public static function generate( int $seed, string $profile = 'auto', string $mode = 'auto', string $payload_policy = 'auto', ?int $max_input_bytes = null ): array {
+		$rng = new Prng( $seed );
+		$requested_profile        = $profile;
+		$requested_mode           = $mode;
+		$requested_payload_policy = $payload_policy;
+		if ( 'auto' === $profile ) {
+			$profile = $rng->weighted(
+				array(
+					'balanced'               => 22,
+					'full-document'          => 8,
+					'body-fragment'          => 8,
+					'tables'                 => 11,
+					'template'               => 8,
+					'select'                 => 8,
+					'foreign-content'        => 10,
+					'rawtext-rcdata'         => 8,
+					'text-fragment'          => 4,
+					'formatting-adoption'    => 10,
+					'attributes-entities'    => 7,
+					'comments-doctype-bogus' => 5,
+					'deep-nesting'           => 2,
+					'resource-stress'        => 1,
+					'incomplete-malformed'   => 2,
+				)
+			);
+		} elseif ( ! in_array( $profile, self::profiles(), true ) ) {
+			throw new \InvalidArgumentException( 'Unknown generator profile: ' . $profile );
+		}
+
+		if ( 'auto' === $payload_policy ) {
+			$payload_policy = 'resource-stress' === $profile
+				? $rng->weighted(
+					array(
+						'stress-long'  => 75,
+						'mostly-valid' => 15,
+						'valid-utf8'   => 10,
+					)
+				)
+				: $rng->weighted(
+					array(
+						'valid-utf8'       => 52,
+						'mostly-valid'     => 33,
+						'ascii-structural' => 15,
+					)
+				);
+		} elseif ( ! in_array( $payload_policy, self::payload_policies(), true ) ) {
+			throw new \InvalidArgumentException( 'Unknown generator payload policy: ' . $payload_policy );
+		}
+
+		if ( 'auto' === $mode ) {
+			$mode = 'full-document' === $profile ? self::MODE_FULL_DOCUMENT : ( in_array( $profile, array( 'body-fragment', 'text-fragment' ), true ) ? self::MODE_FRAGMENT_BODY : $rng->weighted( array( self::MODE_FRAGMENT_BODY => 70, self::MODE_FULL_DOCUMENT => 30 ) ) );
+		} elseif ( ! in_array( $mode, self::modes(), true ) ) {
+			throw new \InvalidArgumentException( 'Unknown generator mode: ' . $mode );
+		}
+
+		$fragment_context = 'body';
+		if ( self::MODE_FRAGMENT_BODY === $mode ) {
+			$weights = array( 'body' => 240 );
+			foreach ( self::fragment_contexts() as $context ) {
+				if ( 'body' !== $context ) {
+					$weights[ $context ] = 1;
+				}
+			}
+			$fragment_context = $rng->weighted( $weights );
+		}
+
+		$generator = new self( $rng, $profile, $payload_policy );
+		if ( 'body' !== $fragment_context ) {
+			$generator->mark_feature( 'fragment-context:' . $fragment_context );
+		}
+		$max_depth = $generator->depth_for_profile();
+		$body      = 'text-fragment' === $profile ? $generator->text_fragment_input() : $generator->nodes( $max_depth, 'body' );
+
+		if ( self::MODE_FULL_DOCUMENT === $mode ) {
+			$html = $generator->full_document( $body );
+		} else {
+			$html = $body;
+		}
+		$truncated = false;
+		if ( null !== $max_input_bytes && $max_input_bytes > 0 && strlen( $html ) > $max_input_bytes ) {
+			$html      = self::trim_to_max_bytes( $html, $max_input_bytes );
+			$truncated = true;
+			$generator->mark_feature( 'generator:truncated' );
+		}
+
+		return array(
+			'input'           => $html,
+			'mode'            => $mode,
+			'profile'         => $profile,
+			'payloadPolicy'   => $payload_policy,
+			'fragmentContext' => $fragment_context,
+			'parameters'      => array(
+				'seed'                   => $seed,
+				'requestedProfile'       => $requested_profile,
+				'requestedMode'          => $requested_mode,
+				'requestedPayloadPolicy' => $requested_payload_policy,
+				'profile'                => $profile,
+				'mode'                   => $mode,
+				'payloadPolicy'          => $payload_policy,
+				'fragmentContext'        => $fragment_context,
+				'maxDepth'               => $max_depth,
+				'maxInputBytes'          => $max_input_bytes,
+				'truncated'              => $truncated,
+				'byteLength'             => strlen( $html ),
+				'features'               => $generator->features(),
+			),
+		);
+	}
+
+	private function __construct( Prng $rng, string $profile, string $payload_policy ) {
+		$this->rng            = $rng;
+		$this->profile        = $profile;
+		$this->payload_policy = $payload_policy;
+	}
+
+	private static function trim_to_max_bytes( string $html, int $max_input_bytes ): string {
+		$trimmed = substr( $html, 0, $max_input_bytes );
+		while ( '' !== $trimmed && 1 !== preg_match( '//u', $trimmed ) ) {
+			$trimmed = substr( $trimmed, 0, -1 );
+		}
+
+		return $trimmed;
+	}
+
+	private function mark_feature( string $feature ): void {
+		$this->features[ $feature ] = true;
+	}
+
+	private function features(): array {
+		$features = array_keys( $this->features );
+		sort( $features );
+		return $features;
+	}
+
+	private function depth_for_profile(): int {
+		if ( 'resource-stress' === $this->profile ) {
+			return $this->rng->int( 10, 20 );
+		}
+		if ( 'deep-nesting' === $this->profile ) {
+			return $this->rng->int( 8, 18 );
+		}
+		if ( 'balanced' === $this->profile ) {
+			return $this->rng->int( 3, 5 );
+		}
+		return $this->rng->int( 3, 6 );
+	}
+
+	private function full_document( string $body ): string {
+		$doctype = $this->rng->chance( 70 ) ? $this->doctype() : '';
+		$head    = $this->rng->chance( 65 ) ? '<head>' . $this->head_nodes() . '</head>' : $this->head_nodes();
+		$attrs   = $this->attrs();
+		$body_at = $this->attrs();
+
+		if ( $this->rng->chance( 4 ) ) {
+			$this->mark_feature( 'frameset' );
+			$frames = '<frame src="a"><frame' . $this->attrs() . '><noframes>' . $this->terminal_text( true ) . '</noframes>';
+			return $doctype . '<html' . $attrs . '>' . $head . '<frameset' . $this->attrs() . '>' . $frames . ( $this->rng->chance( 70 ) ? '</frameset>' : '' ) . ( $this->rng->chance( 40 ) ? $body : '' );
+		}
+
+		if ( $this->rng->chance( 20 ) ) {
+			return $doctype . $head . $body;
+		}
+
+		$trailer = '';
+		if ( $this->rng->chance( 8 ) ) {
+			$this->mark_feature( 'content-after-html' );
+			$trailer = $this->rng->choice( array( $this->terminal_ascii( 6 ), '<!--x-->', '<div>tail</div>', '<!DOCTYPE html>' ) );
+		}
+
+		return $doctype . '<html' . $attrs . '>' . $head . '<body' . $body_at . '>' . $body . ( $this->rng->chance( 75 ) ? '</body></html>' . $trailer : '' );
+	}
+
+	private function head_nodes(): string {
+		$out = '';
+		if ( $this->rng->chance( 45 ) ) {
+			$out .= '<title>' . $this->terminal_text() . '</title>';
+		}
+		if ( $this->rng->chance( 35 ) ) {
+			$out .= '<meta' . $this->attrs() . '>';
+		}
+		if ( $this->rng->chance( 25 ) ) {
+			$out .= '<template>' . $this->nodes( 2, 'body' ) . '</template>';
+		}
+		return $out;
+	}
+
+	private function nodes( int $depth, string $context ): string {
+		if ( 'deep-nesting' === $this->profile ) {
+			$count = $this->rng->int( 1, 2 );
+		} elseif ( $depth > 4 ) {
+			$count = $this->rng->int( 1, 3 );
+		} elseif ( $depth > 2 ) {
+			$count = $this->rng->int( 1, 5 );
+		} else {
+			$count = $this->rng->int( 1, 7 );
+		}
+
+		$out = '';
+		for ( $i = 0; $i < $count; ++$i ) {
+			$out .= $this->node( $depth, $context );
+			if ( strlen( $out ) > 131072 ) {
+				$this->mark_feature( 'generator:hard-truncated' );
+				return self::trim_to_max_bytes( $out, 131072 );
+			}
+		}
+		return $out;
+	}
+
+	private function node( int $depth, string $context ): string {
+		if ( $depth <= 0 ) {
+			return $this->leaf();
+		}
+
+		$weights = array(
+			'element'         => 33,
+			'text'            => 16,
+			'charref'         => 1,
+			'comment'         => 7,
+			'void'            => 8,
+			'raw'             => 5,
+			'template'        => 5,
+			'table'           => 5,
+			'select'          => 2,
+			'foreign'         => 5,
+			'adoption'        => 2,
+			'list-chain'      => 1,
+			'special-closers' => 1,
+			'weird-tag'       => 0,
+			'doctype'         => 2,
+			'bogus'           => 2,
+		);
+
+		if ( 'tables' === $this->profile ) {
+			$weights['table'] = 35;
+			$weights['element'] = 15;
+		} elseif ( 'template' === $this->profile ) {
+			$weights['template'] = 35;
+		} elseif ( 'select' === $this->profile ) {
+			$weights['select'] = 35;
+			$weights['table']  = 8;
+			$weights['element'] = 15;
+		} elseif ( 'foreign-content' === $this->profile ) {
+			$weights['foreign'] = 35;
+		} elseif ( 'rawtext-rcdata' === $this->profile ) {
+			$weights['raw'] = 35;
+		} elseif ( 'comments-doctype-bogus' === $this->profile ) {
+			$weights['comment'] = 25;
+			$weights['doctype'] = 12;
+			$weights['bogus'] = 15;
+		} elseif ( 'attributes-entities' === $this->profile ) {
+			$weights['element'] = 50;
+			$weights['text'] = 22;
+			$weights['charref'] = 12;
+			$weights['weird-tag'] = 8;
+		} elseif ( 'formatting-adoption' === $this->profile ) {
+			$weights['element'] = 40;
+			$weights['adoption'] = 20;
+		} elseif ( 'incomplete-malformed' === $this->profile ) {
+			$weights['bogus'] = 25;
+			$weights['element'] = 28;
+			$weights['weird-tag'] = 15;
+			$weights['special-closers'] = 4;
+		}
+
+		switch ( $this->rng->weighted( $weights ) ) {
+			case 'text':
+				$this->mark_feature( 'text' );
+				return $this->terminal_text();
+			case 'charref':
+				$this->mark_feature( 'text' );
+				return $this->character_reference( 'text' ) . $this->terminal_payload();
+			case 'comment':
+				$this->mark_feature( 'comment' );
+				return $this->comment();
+			case 'void':
+				$this->mark_feature( 'void-element' );
+				return '<' . $this->rng->choice( $this->void_tags ) . $this->attrs() . ( $this->rng->chance( 25 ) ? '/>' : '>' );
+			case 'raw':
+				return $this->raw_element();
+			case 'template':
+				$this->mark_feature( 'template' );
+				return '<template' . $this->attrs() . '>' . $this->nodes( $depth - 1, 'body' ) . ( $this->rng->chance( 80 ) ? '</template>' : '' );
+			case 'table':
+				return $this->table( $depth - 1 );
+			case 'select':
+				return $this->select_stress( $depth - 1 );
+			case 'foreign':
+				return $this->foreign( $depth - 1 );
+			case 'adoption':
+				return $this->adoption_pattern( $depth - 1 );
+			case 'list-chain':
+				return $this->auto_closing_chain( $depth - 1 );
+			case 'special-closers':
+				return $this->special_closers();
+			case 'weird-tag':
+				return $this->weird_element( $depth - 1 );
+			case 'doctype':
+				$this->mark_feature( 'doctype' );
+				return $this->doctype();
+			case 'bogus':
+				$this->mark_feature( 'bogus-markup' );
+				return $this->bogus();
+			case 'element':
+			default:
+				return $this->element( $depth - 1 );
+		}
+	}
+
+	private function element( int $depth ): string {
+		if ( 'attributes-entities' === $this->profile && $this->rng->chance( 16 ) ) {
+			return $this->weird_element( $depth );
+		}
+		if ( 'incomplete-malformed' === $this->profile && $this->rng->chance( 12 ) ) {
+			return $this->weird_element( $depth );
+		}
+
+		$tag = $this->tag_name();
+		if ( 'formatting-adoption' === $this->profile ) {
+			$tag = $this->rng->choice( array( 'a', 'b', 'big', 'button', 'em', 'font', 'i', 'nobr', 'p', 'span', 'strong' ) );
+			$this->mark_feature( 'formatting-adoption-candidate' );
+		}
+
+		$close = $this->rng->chance( 'incomplete-malformed' === $this->profile ? 55 : 85 );
+		$end   = $close ? '</' . ( $this->rng->chance( 88 ) ? $tag : $this->tag_name() ) . '>' : '';
+		return '<' . $tag . $this->attrs() . '>' . $this->nodes( $depth, 'body' ) . $end;
+	}
+
+	private function weird_element( int $depth ): string {
+		$this->mark_feature( 'tag:weird-syntax' );
+		$case = $this->rng->weighted(
+			array(
+				'unusual-name'      => 36,
+				'invalid-name'      => 30,
+				'boundary-spacing'  => 18,
+				'malformed-closer'  => 16,
+			)
+		);
+
+		if ( 'invalid-name' === $case ) {
+			$tag = $this->invalid_tag_name();
+			return '<' . $tag . $this->attrs() . '>' . $this->nodes( $depth, 'body' ) . ( $this->rng->chance( 40 ) ? '</' . $tag . '>' : '' );
+		}
+
+		$tag = 'unusual-name' === $case ? $this->unusual_tag_name() : $this->tag_name();
+		if ( 'boundary-spacing' === $case ) {
+			$this->mark_feature( 'tag:weird-spacing' );
+			return '<' . $tag . $this->tag_gap() . $this->attrs() . $this->tag_gap() . ( $this->rng->chance( 25 ) ? '/' . $this->tag_gap() : '' ) . '>' . $this->nodes( $depth, 'body' ) . ( $this->rng->chance( 70 ) ? '</' . $tag . $this->tag_gap() . '>' : '' );
+		}
+
+		if ( 'malformed-closer' === $case ) {
+			return '<' . $tag . $this->attrs() . '>' . $this->nodes( $depth, 'body' ) . '</' . $this->invalid_tag_name() . $this->tag_gap() . '>';
+		}
+
+		return '<' . $tag . $this->attrs() . '>' . $this->nodes( $depth, 'body' ) . ( $this->rng->chance( 75 ) ? '</' . $tag . '>' : '' );
+	}
+
+	/**
+	 * select/option/optgroup nesting stress, including elements that end or
+	 * are disallowed in select (input, textarea, button, nested select) and
+	 * select inside table structures.
+	 */
+	private function select_stress( int $depth ): string {
+		$this->mark_feature( 'select' );
+		$out   = '';
+		$count = $this->rng->int( 1, 4 );
+		for ( $i = 0; $i < $count; ++$i ) {
+			switch ( $this->rng->weighted( array( 'option' => 38, 'optgroup' => 22, 'breaker' => 18, 'nested-select' => 10, 'other' => 12 ) ) ) {
+				case 'option':
+					$this->mark_feature( 'select:option' );
+					$out .= '<option' . $this->attrs() . '>' . $this->terminal_text() . ( $this->rng->chance( 55 ) ? '</option>' : '' );
+					break;
+				case 'optgroup':
+					$this->mark_feature( 'select:optgroup' );
+					$out .= '<optgroup' . $this->attrs() . '><option>' . $this->terminal_text() . ( $this->rng->chance( 50 ) ? '</optgroup>' : '' );
+					break;
+				case 'breaker':
+					$this->mark_feature( 'select:breaker' );
+					$out .= $this->rng->choice( array( '<input>', '<textarea>x</textarea>', '<keygen>', '<button>b</button>', '<hr>', '<datalist><option>d</datalist>' ) );
+					break;
+				case 'nested-select':
+					$this->mark_feature( 'select:nested' );
+					$out .= '<select' . $this->attrs() . '><option>' . $this->terminal_text();
+					break;
+				default:
+					$out .= $depth > 0 ? $this->node( max( 0, $depth - 1 ), 'select' ) : $this->terminal_text();
+					break;
+			}
+		}
+
+		$select = '<select' . $this->attrs() . '>' . $out . ( $this->rng->chance( 70 ) ? '</select>' : '' );
+		if ( $this->rng->chance( 20 ) ) {
+			$this->mark_feature( 'select:in-table' );
+			$cell = $this->rng->choice( array( 'td', 'caption', 'tr' ) );
+			return '<table><' . $cell . '>' . $select . '</' . $cell . '></table>';
+		}
+		return $select;
+	}
+
+	/**
+	 * Classic adoption-agency and formatting-reconstruction shapes. Random
+	 * unclosed formatting tags reach these interleavings too rarely to rely
+	 * on, so emit the canonical patterns directly with random filler.
+	 */
+	private function adoption_pattern( int $depth ): string {
+		$this->mark_feature( 'adoption-agency-pattern' );
+		$f1 = $this->rng->choice( array( 'b', 'i', 'em', 'strong', 'a', 'font', 'nobr', 'big' ) );
+		$f2 = $this->rng->choice( array( 'b', 'i', 'em', 'strong', 'a', 'font', 'nobr', 'big' ) );
+		$block = $this->rng->choice( array( 'p', 'div', 'address', 'blockquote' ) );
+		$t1 = $this->terminal_ascii( $this->rng->int( 1, 6 ) );
+		$t2 = $this->terminal_ascii( $this->rng->int( 1, 6 ) );
+		$t3 = $this->terminal_ascii( $this->rng->int( 1, 6 ) );
+		$inner = $depth > 0 ? $this->node( max( 0, $depth - 1 ), 'body' ) : $t3;
+
+		switch ( $this->rng->int( 1, 6 ) ) {
+			case 1:
+				// Misnested closers: <b><i></b></i>
+				$this->mark_feature( 'adoption:misnested-closers' );
+				return "<{$f1}>{$t1}<{$f2}>{$t2}</{$f1}>{$t3}</{$f2}>";
+			case 2:
+				// Formatting element spanning a block: <b>...</b>...
+				$this->mark_feature( 'adoption:block-boundary' );
+				return "<{$f1}>{$t1}<{$block}>{$t2}</{$f1}>{$inner}</{$block}>";
+			case 3:
+				// Reconstruction across sibling blocks: <b>xy
+				$this->mark_feature( 'adoption:reconstruction' );
+				return "<{$block}><{$f1}>{$t1}</{$block}><{$block}>{$t2}</{$block}>";
+			case 4:
+				// Nested anchors: <a>1<div>2<a>3
+				$this->mark_feature( 'adoption:nested-anchor' );
+				return "<a>{$t1}<{$block}>{$t2}<a>{$t3}";
+			case 5:
+				// Noah's Ark: more than three identical formatting entries.
+				$this->mark_feature( 'adoption:noahs-ark' );
+				$open = str_repeat( "<{$f1}>{$t1}", $this->rng->int( 4, 8 ) );
+				return "<{$block}>{$open}</{$block}>{$t2}";
+			default:
+				// Repeated closers with content between.
+				$this->mark_feature( 'adoption:repeated-closers' );
+				return "<{$f1}><{$f2}>{$t1}</{$f1}>{$t2}</{$f1}>{$t3}</{$f2}>";
+		}
+	}
+
+	/**
+	 * Elements that auto-close same-kind predecessors: li, dd/dt, headings,
+	 * option, p — chains without explicit closers.
+	 */
+	private function auto_closing_chain( int $depth ): string {
+		$this->mark_feature( 'auto-closing-chain' );
+		switch ( $this->rng->int( 1, 4 ) ) {
+			case 1:
+				$wrap  = $this->rng->choice( array( 'ul', 'ol', 'menu' ) );
+				$items = '';
+				for ( $i = $this->rng->int( 2, 5 ); $i > 0; $i-- ) {
+					$items .= '<li' . $this->attrs() . '>' . $this->terminal_text();
+				}
+				return '<' . $wrap . '>' . $items . ( $this->rng->chance( 70 ) ? '</' . $wrap . '>' : '' );
+			case 2:
+				$items = '';
+				for ( $i = $this->rng->int( 2, 5 ); $i > 0; $i-- ) {
+					$items .= '<' . $this->rng->choice( array( 'dt', 'dd' ) ) . '>' . $this->terminal_text();
+				}
+				return '<dl>' . $items . ( $this->rng->chance( 70 ) ? '</dl>' : '' );
+			case 3:
+				$out = '';
+				for ( $i = $this->rng->int( 2, 4 ); $i > 0; $i-- ) {
+					$out .= '<h' . $this->rng->int( 1, 6 ) . '>' . $this->terminal_text();
+				}
+				return $out;
+			default:
+				$out = '';
+				for ( $i = $this->rng->int( 2, 5 ); $i > 0; $i-- ) {
+					$out .= 'attrs() . '>' . $this->terminal_text();
+				}
+				return $out;
+		}
+	}
+
+	/**
+	 * Spec-special end tags: </br> and  create elements, plus assorted
+	 * stray closers and the <image> to <img> rename.
+	 */
+	private function special_closers(): string {
+		$this->mark_feature( 'special-closers' );
+		return $this->rng->choice(
+			array(
+				'</br>',
+				'',
+				'' . $this->terminal_ascii( 4 ),
+				'<image' . $this->attrs() . '>',
+				'</body>' . $this->terminal_ascii( 4 ),
+				'</html>' . $this->terminal_ascii( 4 ),
+				'</head>' . $this->terminal_ascii( 4 ),
+				'</' . $this->rng->choice( array( 'div', 'span', 'b', 'table', 'select', 'option', 'li' ) ) . '>',
+			)
+		);
+	}
+
+	private function table( int $depth ): string {
+		$this->mark_feature( 'table' );
+		$cells = '';
+		for ( $r = 0; $r < $this->rng->int( 1, 4 ); ++$r ) {
+			$row = '';
+			for ( $c = 0; $c < $this->rng->int( 1, 4 ); ++$c ) {
+				$cell_tag = $this->rng->choice( array( 'td', 'th' ) );
+				$row .= '<' . $cell_tag . $this->attrs() . '>' . $this->nodes( max( 0, $depth - 1 ), 'body' ) . ( $this->rng->chance( 80 ) ? '</' . $cell_tag . '>' : '' );
+			}
+			$cells .= '<tr' . $this->attrs() . '>' . $row . ( $this->rng->chance( 80 ) ? '</tr>' : '' );
+		}
+
+		$section = $this->rng->chance( 50 ) ? '<' . $this->rng->choice( array( 'tbody', 'thead', 'tfoot' ) ) . '>' . $cells . '</' . $this->rng->choice( array( 'tbody', 'thead', 'tfoot' ) ) . '>' : $cells;
+		$noise   = $this->rng->chance( 45 ) ? $this->terminal_text() . $this->element( max( 0, $depth - 1 ) ) : '';
+		if ( '' !== $noise ) {
+			$this->mark_feature( 'foster-parenting-candidate' );
+		}
+		return '<table' . $this->attrs() . '>' . $noise . $section . ( $this->rng->chance( 82 ) ? '</table>' : '' );
+	}
+
+	private function foreign( int $depth ): string {
+		$this->mark_feature( 'foreign-content' );
+
+		if ( $this->rng->chance( 18 ) ) {
+			return $this->foreign_breakout( $depth );
+		}
+
+		if ( $this->rng->chance( 50 ) ) {
+			$this->mark_feature( 'mathml-html-integration-point' );
+			$encoding = $this->rng->weighted(
+				array(
+					'encoding="text/html"'              => 55,
+					'encoding="application/xhtml+xml"'  => 15,
+					'ENCODING="TEXT/HTML"'              => 10,
+					'encoding="text/bogus"'             => 10,
+					''                                  => 10,
+				)
+			);
+			if ( 'encoding="text/html"' !== $encoding ) {
+				$this->mark_feature( 'foreign:annotation-xml-encoding-variant' );
+			}
+			$inner = '<mi' . $this->attrs() . '>' . $this->terminal_text() . '</mi><annotation-xml' . ( '' === $encoding ? '' : ' ' . $encoding ) . '>' . $this->nodes( max( 0, $depth - 1 ), 'body' ) . '</annotation-xml>';
+			if ( $this->rng->chance( 12 ) ) {
+				$this->mark_feature( 'foreign:cdata' );
+				$inner .= '<![CDATA[' . $this->terminal_ascii( $this->rng->int( 1, 12 ) ) . ']]>';
+			}
+			return '<math' . $this->attrs() . '>' . $inner . ( $this->rng->chance( 85 ) ? '</math>' : '' );
+		}
+
+		$this->mark_feature( 'svg-foreignobject' );
+		$foreign_object = $this->rng->weighted(
+			array(
+				'foreignObject' => 70,
+				'foreignobject' => 15,
+				'FOREIGNOBJECT' => 8,
+				'foreignObjecT' => 7,
+			)
+		);
+		if ( 'foreignObject' !== $foreign_object ) {
+			$this->mark_feature( 'foreign:case-mangled-name' );
+		}
+		$inner = '<g><title>' . $this->terminal_text() . '</title><' . $foreign_object . '>' . $this->nodes( max( 0, $depth - 1 ), 'body' ) . '</' . $foreign_object . '>';
+		if ( $this->rng->chance( 12 ) ) {
+			$this->mark_feature( 'foreign:cdata' );
+			$inner .= '<![CDATA[' . $this->terminal_ascii( $this->rng->int( 1, 12 ) ) . ']]>';
+		}
+		return '<svg' . $this->attrs() . ' viewBox="0 0 10 10">' . $inner . ( $this->rng->chance( 85 ) ? '</svg>' : '' );
+	}
+
+	/**
+	 * HTML breakout constructs inside foreign content: breakout start tags
+	 * (div, p, table, ...) and <font> with color/face/size, which exit
+	 * foreign content; <font> without those attributes, which does not.
+	 */
+	private function foreign_breakout( int $depth ): string {
+		$this->mark_feature( 'foreign:breakout' );
+		$root = $this->rng->chance( 50 ) ? 'svg' : 'math';
+		switch ( $this->rng->int( 1, 4 ) ) {
+			case 1:
+				$breaker = $this->rng->choice( array( 'div', 'p', 'table', 'ul', 'h1', 'blockquote', 'body', 'br', 'center', 'dl', 'pre' ) );
+				$this->mark_feature( 'foreign:breakout-tag' );
+				return '<' . $root . '><g>' . $this->terminal_ascii( 3 ) . '<' . $breaker . '>' . $this->nodes( max( 0, $depth - 1 ), 'body' ) . ( $this->rng->chance( 50 ) ? '</' . $breaker . '>' : '' );
+			case 2:
+				$this->mark_feature( 'foreign:font-breakout' );
+				$attr = $this->rng->choice( array( 'color="red"', 'face="serif"', 'size="3"', 'COLOR="x"' ) );
+				return '<' . $root . '><font ' . $attr . '>' . $this->terminal_ascii( 4 );
+			case 3:
+				$this->mark_feature( 'foreign:font-no-breakout' );
+				return '<' . $root . '><font ' . $this->rng->choice( array( 'data-x="1"', 'href="x"', '' ) ) . '>' . $this->terminal_ascii( 4 );
+			default:
+				$this->mark_feature( 'foreign:breakout-closer' );
+				return '<' . $root . '><g>' . $this->terminal_ascii( 3 ) . '</' . $root . '>' . $this->terminal_ascii( 3 );
+		}
+	}
+
+	private function raw_element(): string {
+		if ( $this->rng->chance( 4 ) ) {
+			$this->mark_feature( 'plaintext' );
+			return '<plaintext' . $this->attrs() . '>' . $this->terminal_text( true ) . $this->rng->choice( array( '', '</plaintext>', '<div>x' ) );
+		}
+
+		if ( $this->rng->chance( 45 ) ) {
+			$tag = $this->rng->choice( $this->rcdata_tags );
+			$this->mark_feature( 'rcdata' );
+			return '<' . $tag . $this->attrs() . '>' . $this->terminal_rcdata() . ( $this->rng->chance( 82 ) ? '</' . $tag . '>' : '' );
+		}
+
+		$tag = $this->rng->choice( $this->raw_tags );
+		$this->mark_feature( 'rawtext' );
+		return '<' . $tag . $this->attrs() . '>' . $this->terminal_text( true ) . ( $this->rng->chance( 82 ) ? '</' . $tag . '>' : '' );
+	}
+
+	private function leaf(): string {
+		return $this->rng->chance( 70 ) ? $this->terminal_text() : '<!--' . $this->terminal_comment() . '-->';
+	}
+
+	private function tag_name(): string {
+		if ( $this->rng->chance( 8 ) ) {
+			return $this->unusual_tag_name();
+		}
+		if ( $this->rng->chance( 12 ) ) {
+			return $this->custom_name();
+		}
+		return $this->rng->choice( $this->normal_tags );
+	}
+
+	private function unusual_tag_name(): string {
+		$this->mark_feature( 'tag:unusual-name' );
+		return $this->rng->choice( $this->unusual_tag_names );
+	}
+
+	private function invalid_tag_name(): string {
+		switch ( $this->rng->int( 1, 12 ) ) {
+			case 1:
+				$this->mark_feature( 'tag:invalid-name' );
+				$this->mark_feature( 'tag:alpha-invalid-name' );
+				return 'x' . "\0" . $this->terminal_ascii( $this->rng->int( 1, 6 ) );
+			case 2:
+				$this->mark_feature( 'tag:alpha-weird-name' );
+				return 'x🙂' . $this->terminal_ascii( $this->rng->int( 1, 6 ) );
+			case 3:
+				$this->mark_feature( 'tag:alpha-weird-name' );
+				return 'x<' . $this->terminal_ascii( $this->rng->int( 1, 6 ) );
+			case 4:
+				$this->mark_feature( 'tag:alpha-weird-name' );
+				return 'x"' . $this->terminal_ascii( $this->rng->int( 1, 6 ) );
+			case 5:
+				$this->mark_feature( 'tag:alpha-weird-name' );
+				return 'x=' . $this->terminal_ascii( $this->rng->int( 1, 6 ) );
+			case 6:
+				$this->mark_feature( 'tag:invalid-name' );
+				$this->mark_feature( 'tag:bogus-open-name' );
+				return '1' . $this->terminal_special_ascii( $this->rng->int( 1, 5 ) );
+			case 7:
+				$this->mark_feature( 'tag:invalid-name' );
+				$this->mark_feature( 'tag:bogus-open-name' );
+				return ':' . $this->terminal_special_ascii( $this->rng->int( 1, 5 ) );
+			case 8:
+				$this->mark_feature( 'tag:invalid-name' );
+				$this->mark_feature( 'tag:bogus-open-name' );
+				return '?' . $this->terminal_ascii( $this->rng->int( 1, 6 ) );
+			case 9:
+				$this->mark_feature( 'tag:invalid-name' );
+				$this->mark_feature( 'tag:bogus-open-name' );
+				return '!' . $this->terminal_ascii( $this->rng->int( 1, 6 ) );
+			case 10:
+				$this->mark_feature( 'tag:invalid-name' );
+				$this->mark_feature( 'tag:bogus-open-name' );
+				return '=' . $this->terminal_ascii( $this->rng->int( 1, 6 ) );
+			case 11:
+				$this->mark_feature( 'tag:invalid-name' );
+				$this->mark_feature( 'tag:bogus-open-name' );
+				return '"' . $this->terminal_ascii( $this->rng->int( 1, 6 ) );
+			default:
+				$this->mark_feature( 'tag:invalid-name' );
+				$this->mark_feature( 'tag:bogus-open-name' );
+				return $this->rng->choice( array( '#', '@', '$', '%', '|', '~' ) ) . $this->terminal_special_ascii( $this->rng->int( 1, 7 ) );
+		}
+	}
+
+	private function custom_name(): string {
+		$start = $this->rng->choice( array( 'x', 'wp', 'custom', 'a', 'z' ) );
+		$payload = $this->rng->chance( 45 )
+			? $this->terminal_payload()
+			: $this->terminal_ascii( $this->rng->int( 1, 12 ) );
+		$name = preg_replace( '/[\x09\x0a\x0c\x0d\x20<>\/]+/', '-', $payload );
+		$name = trim( (string) $name, '-' );
+		if ( '' === $name ) {
+			$name = 'x';
+		}
+
+		return $start . '-' . $name;
+	}
+
+	private function attrs(): string {
+		$count = 'attributes-entities' === $this->profile ? $this->rng->int( 1, 8 ) : $this->rng->int( 0, 4 );
+		$out   = '';
+		$weird_attr_chance = 'attributes-entities' === $this->profile ? 28 : ( 'incomplete-malformed' === $this->profile ? 18 : 0 );
+		$seen_names = array();
+		for ( $i = 0; $i < $count; ++$i ) {
+			if ( $this->rng->chance( $weird_attr_chance ) ) {
+				$out .= $this->weird_attr_chunk();
+				continue;
+			}
+
+			if ( ! empty( $seen_names ) && $this->rng->chance( 8 ) ) {
+				// Duplicate attribute names: the parser must keep the first.
+				$this->mark_feature( 'attr:duplicate' );
+				$name = $this->rng->choice( $seen_names );
+				if ( $this->rng->chance( 40 ) ) {
+					$name = $this->rng->chance( 50 ) ? strtoupper( $name ) : ucfirst( $name );
+				}
+			} else {
+				$name = $this->attr_name();
+			}
+			if ( '' === $name ) {
+				continue;
+			}
+			$seen_names[] = $name;
+			$gap = $this->attribute_gap();
+			if ( $this->rng->chance( 18 ) ) {
+				$out .= $gap . $name;
+				continue;
+			}
+			$value = $this->terminal_attr_value();
+			$quote = $this->rng->choice( array( '"', "'", '', '"' ) );
+			if ( '' === $quote ) {
+				$this->mark_feature( 'attr:unquoted' );
+				$value = $this->unquoted_attr_value( $value );
+				$out .= $gap . $name . $this->attribute_equals() . $value;
+			} else {
+				$this->mark_feature( 'attr:quoted' );
+				$value = $this->quoted_attr_value( $value, $quote );
+				$out .= $gap . $name . $this->attribute_equals() . $quote . $value . $quote;
+			}
+		}
+		return $out;
+	}
+
+	private function attr_name(): string {
+		switch ( $this->rng->weighted( array( 'common' => 62, 'unusual' => 18, 'generated' => 20 ) ) ) {
+			case 'common':
+				return $this->rng->choice( array( 'id', 'class', 'href', 'src', 'alt', 'title', 'data-x', 'data-Foo', 'xlink:href', 'xml:lang', 'checked', 'disabled', 'style' ) );
+			case 'unusual':
+				$this->mark_feature( 'attr:weird-name' );
+				return $this->rng->choice( $this->unusual_attr_names );
+		}
+
+		$name = $this->rng->chance( 45 )
+			? $this->terminal_payload()
+			: $this->terminal_special_ascii( $this->rng->int( 1, 14 ) );
+		$name = preg_replace( '/[\x09\x0a\x0c\x0d\x20"\'<>\/=]+/', '-', $name );
+		$name = trim( (string) $name, '-' );
+		if ( '' === $name ) {
+			return 'data-empty';
+		}
+		if ( 1 === preg_match( '/[^A-Za-z0-9_:\.-]/', $name ) ) {
+			$this->mark_feature( 'attr:weird-name' );
+		}
+		return $name;
+	}
+
+	private function weird_attr_chunk(): string {
+		$this->mark_feature( 'attr:malformed' );
+		$this->mark_feature( 'attr:weird-name' );
+		$name  = $this->terminal_attr_special_name();
+		$value = $this->terminal_attr_value();
+		$gap   = $this->attribute_gap();
+
+		switch ( $this->rng->int( 1, 8 ) ) {
+			case 1:
+				return $gap . '@' . $name . $this->attribute_equals() . '"' . $value . '"';
+			case 2:
+				return $gap . '<' . $name . $this->attribute_equals() . "'" . $value . "'";
+			case 3:
+				return $gap . $name . '/' . $this->attribute_equals() . '"' . $value . '"';
+			case 4:
+				return $gap . '"' . $name . '"' . $this->attribute_equals() . "'" . $value . "'";
+			case 5:
+				return $gap . '=' . '"' . $value . '"';
+			case 6:
+				return $gap . $name . $this->attribute_gap() . $this->attribute_equals() . $this->attribute_gap() . $this->unquoted_attr_value( $value );
+			case 7:
+				return $gap . $name . '<' . $this->terminal_attr_special_name();
+			default:
+				return $gap . $name . $this->attribute_equals() . $this->character_reference( 'attr' ) . $this->terminal_ascii( 2 );
+		}
+	}
+
+	private function terminal_attr_special_name(): string {
+		switch ( $this->rng->int( 1, 8 ) ) {
+			case 1:
+				return 'data-' . $this->terminal_special_ascii( $this->rng->int( 1, 5 ) );
+			case 2:
+				return '[' . $this->terminal_ascii( $this->rng->int( 1, 4 ) ) . ']';
+			case 3:
+				return ':' . $this->terminal_ascii( $this->rng->int( 1, 5 ) );
+			case 4:
+				return '.' . $this->terminal_ascii( $this->rng->int( 1, 5 ) );
+			case 5:
+				return '#' . $this->terminal_ascii( $this->rng->int( 1, 5 ) );
+			case 6:
+				return "'" . $this->terminal_ascii( $this->rng->int( 1, 5 ) );
+			case 7:
+				return '"' . $this->terminal_ascii( $this->rng->int( 1, 5 ) );
+			default:
+				return 'x' . "\0" . $this->terminal_ascii( $this->rng->int( 1, 4 ) );
+		}
+	}
+
+	private function attribute_gap(): string {
+		$gap = $this->rng->choice( array( ' ', ' ', ' ', "\t", "\n", "\f", "\r\n", '  ', " \t " ) );
+		if ( ' ' !== $gap ) {
+			$this->mark_feature( 'attr:weird-spacing' );
+		}
+		return $gap;
+	}
+
+	private function attribute_equals(): string {
+		$equals = $this->rng->choice( array( '=', '=', '=', ' = ', "\t=\n", "\f= ", " =\t" ) );
+		if ( '=' !== $equals ) {
+			$this->mark_feature( 'attr:weird-spacing' );
+		}
+		return $equals;
+	}
+
+	private function unquoted_attr_value( string $value ): string {
+		return (string) preg_replace( '/[\x00-\x20"\'<>`=]+/', '_', $value );
+	}
+
+	private function quoted_attr_value( string $value, string $quote ): string {
+		return str_replace( $quote, '"' === $quote ? "'" : '"', $value );
+	}
+
+	private function tag_gap(): string {
+		$gap = $this->rng->choice( array( ' ', ' ', "\t", "\n", "\f", "\r\n", '  ', " \t " ) );
+		if ( ' ' !== $gap ) {
+			$this->mark_feature( 'tag:weird-spacing' );
+		}
+		return $gap;
+	}
+
+	private function doctype(): string {
+		if ( $this->rng->chance( 70 ) ) {
+			return '<!DOCTYPE html>';
+		}
+		if ( $this->rng->chance( 35 ) ) {
+			// Quirks and limited-quirks doctypes change tree construction.
+			$this->mark_feature( 'doctype:quirky' );
+			return $this->rng->choice(
+				array(
+					'<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">',
+					'<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">',
+					'<!DOCTYPE html SYSTEM "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd">',
+					'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">',
+					'<!doctype HtMl>',
+					'<!DOCTYPE>',
+				)
+			);
+		}
+		return '<!DOCTYPE ' . $this->rng->choice( array( 'html', 'HTML', 'svg', 'bogus' ) ) . ' "' . $this->terminal_ascii( 8 ) . '">';
+	}
+
+	private function bogus(): string {
+		switch ( $this->rng->weighted( array( 'cdata' => 28, 'pi' => 20, 'bogus-declaration' => 18, 'bad-end-tag' => 14, 'short-open' => 12, 'double-slash' => 8 ) ) ) {
+			case 'pi':
+				$this->mark_feature( 'bogus:pi-comment' );
+				return '<?target?>';
+			case 'bogus-declaration':
+				$this->mark_feature( 'bogus:declaration-comment' );
+				return '<!not-a-comment>';
+			case 'bad-end-tag':
+				return '</ ' . $this->terminal_ascii( 5 );
+			case 'short-open':
+				return '<' . $this->terminal_ascii( $this->rng->int( 0, 12 ) );
+			case 'double-slash':
+				return '<//' . $this->terminal_ascii( 10 ) . '>';
+			case 'cdata':
+			default:
+				return '<![CDATA[' . $this->terminal_text() . ']]>';
+		}
+	}
+
+	private function comment(): string {
+		switch ( $this->rng->weighted( array( 'ordinary' => 40, 'ordinary-simple' => 8, 'empty' => 10, 'space' => 6, 'short' => 8, 'short-hyphen' => 8, 'nested-hyphens' => 8, 'malformed-bang' => 4, 'malformed-greater-than' => 4, 'abrupt-eof' => 4, 'bogus-pi' => 4, 'bogus-declaration' => 4 ) ) ) {
+			case 'ordinary-simple':
+				$this->mark_feature( 'comment:ordinary-simple' );
+				return '<!--comment-->';
+			case 'empty':
+				$this->mark_feature( 'comment:empty' );
+				return '<!---->';
+			case 'space':
+				$this->mark_feature( 'comment:space' );
+				return '<!-- -->';
+			case 'short':
+				$this->mark_feature( 'comment:short-empty-end' );
+				return '<!-->';
+			case 'short-hyphen':
+				$this->mark_feature( 'comment:short-hyphen-end' );
+				return '<!--->';
+			case 'nested-hyphens':
+				$this->mark_feature( 'comment:nested-hyphens' );
+				return '<!--a<!--b--c-->';
+			case 'malformed-bang':
+				$this->mark_feature( 'comment:malformed-ending' );
+				$this->mark_feature( 'comment:malformed-bang-ending' );
+				return '<!--x--!>';
+			case 'malformed-greater-than':
+				$this->mark_feature( 'comment:malformed-ending' );
+				$this->mark_feature( 'comment:malformed-greater-than-ending' );
+				return '<!--x>';
+			case 'abrupt-eof':
+				$this->mark_feature( 'comment:malformed-ending' );
+				$this->mark_feature( 'comment:unterminated' );
+				return '<!--x';
+			case 'bogus-pi':
+				$this->mark_feature( 'comment:bogus-pi' );
+				return '<?target?>';
+			case 'bogus-declaration':
+				$this->mark_feature( 'comment:bogus-declaration' );
+				return '<!not-a-comment>';
+			case 'ordinary':
+			default:
+				$this->mark_feature( 'comment:ordinary' );
+				return '<!--' . $this->terminal_comment() . '-->';
+		}
+	}
+
+	private function terminal_text( bool $raw = false ): string {
+		$parts = array();
+		$count = $this->rng->int( 1, 5 );
+		for ( $i = 0; $i < $count; ++$i ) {
+			$parts[] = $this->terminal_payload();
+			if ( ! $raw && $this->rng->chance( 35 ) ) {
+				$parts[] = $this->entity_or_markup_text();
+			}
+		}
+		return implode( '', $parts );
+	}
+
+	private function terminal_rcdata(): string {
+		$parts = array();
+		$count = $this->rng->int( 1, 5 );
+		for ( $i = 0; $i < $count; ++$i ) {
+			$parts[] = $this->terminal_payload();
+			if ( $this->rng->chance( 55 ) ) {
+				$parts[] = $this->character_reference( 'rcdata' );
+			}
+		}
+		return implode( '', $parts );
+	}
+
+	private function terminal_comment(): string {
+		return str_replace( '-->', '-- >', $this->terminal_text( true ) );
+	}
+
+	private function terminal_attr_value(): string {
+		return $this->terminal_payload() . ( $this->rng->chance( 45 ) ? $this->entity_or_markup_attr() : '' );
+	}
+
+	private function terminal_payload(): string {
+		switch ( $this->rng->weighted( $this->payload_weights() ) ) {
+			case 'utf8':
+				$this->mark_feature( 'payload:utf8' );
+				return $this->rng->choice( array( 'é', '雪', '🙂', 'β', 'עברית', 'مرحبا', 'नमस्ते' ) );
+			case 'nulls':
+				$this->mark_feature( 'payload:nul' );
+				return $this->terminal_payload_ascii( 3 ) . "\0" . $this->terminal_payload_ascii( 3 );
+			case 'controls':
+				$this->mark_feature( 'payload:control' );
+				return $this->terminal_control();
+			case 'repeat':
+				$this->mark_feature( 'payload:repeat' );
+				return $this->terminal_repeat();
+			case 'long-ascii':
+				$this->mark_feature( 'payload:long-ascii' );
+				return $this->terminal_payload_ascii( $this->rng->int( 64, 512 ) );
+			case 'ascii':
+			default:
+				$this->mark_feature( 'payload:ascii' );
+				return $this->terminal_payload_ascii( $this->terminal_ascii_payload_length() );
+		}
+	}
+
+	private function text_fragment_input(): string {
+		$this->mark_feature( 'input:text-fragment' );
+		if ( 'stress-long' === $this->payload_policy ) {
+			$this->mark_feature( 'input:long' );
+			return $this->terminal_payload_ascii( $this->rng->int( 64, 1024 ) );
+		}
+
+		if ( $this->rng->chance( 78 ) ) {
+			$length = $this->rng->int( 0, 10 );
+			$this->mark_feature( 'input:short' );
+			$this->mark_feature( 'input:length-' . $length );
+			return $this->terminal_payload_ascii( $length );
+		}
+
+		$this->mark_feature( 'input:medium' );
+		return $this->terminal_payload_ascii( $this->rng->int( 11, 256 ) );
+	}
+
+	private function terminal_ascii_payload_length(): int {
+		if ( $this->rng->chance( 72 ) ) {
+			$length = $this->rng->int( 0, 10 );
+			$this->mark_feature( 'payload:short-ascii' );
+			$this->mark_feature( 'payload:ascii-length-' . $length );
+			if ( 0 === $length ) {
+				$this->mark_feature( 'payload:empty-ascii' );
+			}
+			return $length;
+		}
+
+		$this->mark_feature( 'payload:medium-ascii' );
+		return $this->rng->int( 11, 96 );
+	}
+
+	private function payload_weights(): array {
+		switch ( $this->payload_policy ) {
+			case 'valid-utf8':
+				return array( 'ascii' => 54, 'utf8' => 32, 'nulls' => 4, 'controls' => 4, 'repeat' => 6 );
+			case 'ascii-structural':
+				return array( 'ascii' => 74, 'nulls' => 4, 'controls' => 6, 'repeat' => 16 );
+			case 'stress-long':
+				return array( 'ascii' => 20, 'utf8' => 8, 'nulls' => 5, 'controls' => 5, 'repeat' => 42, 'long-ascii' => 20 );
+			case 'mostly-valid':
+			default:
+				return array( 'ascii' => 50, 'utf8' => 25, 'nulls' => 7, 'controls' => 8, 'repeat' => 10 );
+		}
+	}
+
+	private function entity_or_markup_text(): string {
+		if ( $this->rng->chance( 78 ) ) {
+			return $this->character_reference( 'text' );
+		}
+
+		$value = $this->rng->choice( array( '<', '>' ) );
+		$this->mark_entity_feature( $value );
+		return $value;
+	}
+
+	private function entity_or_markup_attr(): string {
+		if ( $this->rng->chance( 82 ) ) {
+			return $this->character_reference( 'attr' );
+		}
+
+		$value = $this->rng->choice( array( '<tag>', '<', '>' ) );
+		$this->mark_entity_feature( $value );
+		return $value;
+	}
+
+	private function character_reference( string $context ): string {
+		$this->mark_feature( 'charref:' . $context );
+		switch ( $this->rng->weighted( array( 'named-semicolon' => 28, 'named-missing-semicolon' => 24, 'decimal' => 18, 'hex' => 18, 'invalid' => 12 ) ) ) {
+			case 'named-semicolon':
+				$this->mark_character_reference_feature( $context, 'named' );
+				$this->mark_character_reference_feature( $context, 'named-semicolon' );
+				$name = $this->known_named_character_reference( $context );
+				return '&' . $name . ';';
+
+			case 'named-missing-semicolon':
+				$this->mark_character_reference_feature( $context, 'named' );
+				$this->mark_character_reference_feature( $context, 'named-missing-semicolon' );
+				if ( $this->rng->chance( 65 ) ) {
+					$this->mark_character_reference_feature( $context, 'named-missing-semicolon-legacy' );
+					$name = $this->rng->choice( $this->legacy_semicolonless_named_character_references );
+				} else {
+					$this->mark_character_reference_feature( $context, 'named-missing-semicolon-invalid' );
+					$name = $this->rng->choice( $this->invalid_semicolonless_named_character_references );
+				}
+				if ( strtolower( $name ) !== $name ) {
+					$this->mark_character_reference_feature( $context, 'casing' );
+				}
+				return '&' . $name;
+
+			case 'decimal':
+				$this->mark_character_reference_feature( $context, 'numeric-decimal' );
+				$this->mark_character_reference_feature( $context, 'numeric-valid' );
+				return '&#' . $this->decimal_character_reference_digits( $context ) . ';';
+
+			case 'hex':
+				$this->mark_character_reference_feature( $context, 'numeric-hex' );
+				$this->mark_character_reference_feature( $context, 'numeric-valid' );
+				return '&#' . ( $this->rng->chance( 50 ) ? 'x' : 'X' ) . $this->hex_character_reference_digits( $context ) . ';';
+
+			case 'invalid':
+			default:
+				return $this->invalid_character_reference( $context );
+		}
+	}
+
+	private function mark_character_reference_feature( string $context, string $feature ): void {
+		$this->mark_feature( 'charref:' . $feature );
+		$this->mark_feature( 'charref:' . $context . ':' . $feature );
+	}
+
+	private function known_named_character_reference( string $context ): string {
+		$name = $this->rng->choice( $this->named_character_references );
+		if ( strtolower( $name ) !== $name ) {
+			$this->mark_character_reference_feature( $context, 'casing' );
+		}
+		return $name;
+	}
+
+	private function decimal_character_reference_digits( string $context ): string {
+		$digits = $this->rng->choice( array( '34', '38', '60', '62', '65', '160', '169', '65533', '128578' ) );
+		if ( $this->rng->chance( 45 ) ) {
+			$this->mark_character_reference_feature( $context, 'leading-zero' );
+			$digits = str_repeat( '0', $this->rng->int( 1, 8 ) ) . $digits;
+		}
+		return $digits;
+	}
+
+	private function hex_character_reference_digits( string $context ): string {
+		$digits = $this->rng->choice( array( '22', '26', '3C', '3e', '41', 'a0', '00A9', '1F642', 'FFFD' ) );
+		if ( $this->rng->chance( 45 ) ) {
+			$this->mark_character_reference_feature( $context, 'leading-zero' );
+			$digits = str_repeat( '0', $this->rng->int( 1, 8 ) ) . $digits;
+		}
+		$out = '';
+		foreach ( str_split( $digits ) as $char ) {
+			if ( ctype_alpha( $char ) ) {
+				$this->mark_character_reference_feature( $context, 'casing' );
+				$out .= $this->rng->chance( 50 ) ? strtolower( $char ) : strtoupper( $char );
+			} else {
+				$out .= $char;
+			}
+		}
+		return $out;
+	}
+
+	private function invalid_character_reference( string $context ): string {
+		$this->mark_character_reference_feature( $context, 'invalid' );
+		switch ( $this->rng->weighted( array( 'named' => 55, 'decimal' => 20, 'hex' => 25 ) ) ) {
+			case 'decimal':
+				$this->mark_character_reference_feature( $context, 'numeric-decimal' );
+				$this->mark_character_reference_feature( $context, 'numeric-invalid' );
+				return $this->rng->choice( array( '&#;', '&#0;', '&#00000000;', '&#13;', '&#99999999;', '&#-1;' ) );
+
+			case 'hex':
+				$this->mark_character_reference_feature( $context, 'numeric-hex' );
+				$this->mark_character_reference_feature( $context, 'numeric-invalid' );
+				return $this->rng->choice( array( '&#x;', '&#x0;', '&#X0000;', '&#xD800;', '&#x110000;' ) );
+
+			case 'named':
+			default:
+				$this->mark_character_reference_feature( $context, 'named' );
+				return $this->rng->choice( array( '&bogus;', '&NoSuchEntity', '&;', '&amp ;', '&noti;', '&notit;', '&copyright;', '&centerdo;', '&ngE', '&divideontime;', '&amp&amp;', '&&gt;' ) );
+		}
+	}
+
+	private function mark_entity_feature( string $value ): void {
+		if ( 1 === preg_match( '/^&#(?:0+)?0;$/', $value ) || 1 === preg_match( '/^&#[xX](?:0+)?0;$/', $value ) ) {
+			$this->mark_feature( 'entity:numeric-zero' );
+		} elseif ( 1 === preg_match( '/^&#[xX](?:0+)?[fF]{3}[dD];$/', $value ) || '&#65533;' === $value ) {
+			$this->mark_feature( 'entity:fffd' );
+		} elseif ( '<' === $value || '>' === $value || '<tag>' === $value ) {
+			$this->mark_feature( 'entity:markup-like' );
+		} elseif ( 0 === strpos( $value, '&' ) ) {
+			$this->mark_feature( 'entity:named' );
+		}
+	}
+
+	private function terminal_control(): string {
+		return $this->rng->choice( array( "\r", "\n", "\r\n", "\t", "\f", "\x01", "\x1f" ) ) . $this->terminal_payload_ascii( 4 );
+	}
+
+	private function terminal_repeat(): string {
+		$chars  = array( 'a', '<', '&', ' ' );
+		$length = 'stress-long' === $this->payload_policy ? $this->rng->int( 64, 1024 ) : $this->rng->int( 4, 64 );
+		if ( $length > 64 ) {
+			$this->mark_feature( 'payload:long-repeat' );
+		}
+		return str_repeat( $this->rng->choice( $chars ), $length );
+	}
+
+	private function terminal_ascii( int $length ): string {
+		$alphabet = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -_:/.,;#[](){}';
+		$out      = '';
+		for ( $i = 0; $i < $length; ++$i ) {
+			$out .= $alphabet[ $this->rng->int( 0, strlen( $alphabet ) - 1 ) ];
+		}
+		return $out;
+	}
+
+	private function terminal_special_ascii( int $length ): string {
+		$alphabet = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_:.@#[](){}!?$%^*+~,|';
+		$out      = '';
+		for ( $i = 0; $i < $length; ++$i ) {
+			$out .= $alphabet[ $this->rng->int( 0, strlen( $alphabet ) - 1 ) ];
+		}
+		return $out;
+	}
+
+	private function terminal_payload_ascii( int $length ): string {
+		$alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -_:/.,;#[](){}&&&&<<<<>>>>\"\"''====";
+		$out      = '';
+		for ( $i = 0; $i < $length; ++$i ) {
+			$char = $alphabet[ $this->rng->int( 0, strlen( $alphabet ) - 1 ) ];
+			$this->mark_ascii_syntax_feature( $char );
+			$out .= $char;
+		}
+		return $out;
+	}
+
+	private function mark_ascii_syntax_feature( string $char ): void {
+		switch ( $char ) {
+			case '&':
+				$this->mark_feature( 'ascii:syntax-ampersand' );
+				break;
+			case '<':
+				$this->mark_feature( 'ascii:syntax-less-than' );
+				break;
+			case '>':
+				$this->mark_feature( 'ascii:syntax-greater-than' );
+				break;
+			case '"':
+				$this->mark_feature( 'ascii:syntax-double-quote' );
+				break;
+			case "'":
+				$this->mark_feature( 'ascii:syntax-single-quote' );
+				break;
+			case '=':
+				$this->mark_feature( 'ascii:syntax-equals' );
+				break;
+			default:
+				return;
+		}
+
+		$this->mark_feature( 'ascii:syntax-char' );
+	}
+}
diff --git a/tools/html-api-fuzz/lib/HtmlApiBootstrap.php b/tools/html-api-fuzz/lib/HtmlApiBootstrap.php
new file mode 100644
index 0000000000000..edfc5466f0fc9
--- /dev/null
+++ b/tools/html-api-fuzz/lib/HtmlApiBootstrap.php
@@ -0,0 +1,52 @@
+<?php
+namespace HtmlApiFuzz;
+
+class HtmlApiBootstrap {
+	private static $loaded = false;
+
+	public static function load(): void {
+		if ( self::$loaded ) {
+			return;
+		}
+
+		if ( ! function_exists( '__' ) ) {
+			require_once __DIR__ . '/wp-stubs.php';
+		}
+
+		if ( ! function_exists( '_doing_it_wrong' ) ) {
+			require_once __DIR__ . '/wp-stubs.php';
+		}
+
+		if ( ! function_exists( 'wp_trigger_error' ) ) {
+			require_once __DIR__ . '/wp-stubs.php';
+		}
+
+		$root  = repo_root();
+		$files = array(
+			'src/wp-includes/compat.php',
+			'src/wp-includes/compat-utf8.php',
+			'src/wp-includes/utf8.php',
+			'src/wp-includes/class-wp-token-map.php',
+			'src/wp-includes/html-api/html5-named-character-references.php',
+			'src/wp-includes/html-api/class-wp-html-attribute-token.php',
+			'src/wp-includes/html-api/class-wp-html-span.php',
+			'src/wp-includes/html-api/class-wp-html-doctype-info.php',
+			'src/wp-includes/html-api/class-wp-html-text-replacement.php',
+			'src/wp-includes/html-api/class-wp-html-decoder.php',
+			'src/wp-includes/html-api/class-wp-html-tag-processor.php',
+			'src/wp-includes/html-api/class-wp-html-unsupported-exception.php',
+			'src/wp-includes/html-api/class-wp-html-active-formatting-elements.php',
+			'src/wp-includes/html-api/class-wp-html-open-elements.php',
+			'src/wp-includes/html-api/class-wp-html-token.php',
+			'src/wp-includes/html-api/class-wp-html-stack-event.php',
+			'src/wp-includes/html-api/class-wp-html-processor-state.php',
+			'src/wp-includes/html-api/class-wp-html-processor.php',
+		);
+
+		foreach ( $files as $file ) {
+			require_once $root . DIRECTORY_SEPARATOR . $file;
+		}
+
+		self::$loaded = true;
+	}
+}
diff --git a/tools/html-api-fuzz/lib/Mutator.php b/tools/html-api-fuzz/lib/Mutator.php
new file mode 100644
index 0000000000000..9f77321a17cf0
--- /dev/null
+++ b/tools/html-api-fuzz/lib/Mutator.php
@@ -0,0 +1,111 @@
+<?php
+namespace HtmlApiFuzz;
+
+/**
+ * Deterministic byte- and markup-level mutations over corpus inputs. All
+ * randomness flows through the provided Prng, so a (seed, corpus) pair always
+ * produces the same mutated input.
+ */
+class Mutator {
+	private const INTERESTING_BYTES = array( '<', '>', '&', '"', "'", '=', '/', '!', '-', ' ', "\n", "\r", "\t", "\f", "\0", ';', '`' );
+	private const SWAP_TAG_NAMES    = array( 'select', 'option', 'table', 'td', 'tr', 'template', 'b', 'a', 'p', 'div', 'svg', 'math', 'title', 'textarea', 'script', 'style', 'plaintext', 'noframes', 'body', 'html', 'head', 'frameset' );
+
+	/**
+	 * Applies 1-4 mutations to the input and reports which operations ran.
+	 *
+	 * @return array{input: string, operations: string[]}
+	 */
+	public static function mutate( string $input, Prng $rng, array $corpus_entries = array() ): array {
+		$operations = array();
+		$rounds     = $rng->int( 1, 4 );
+		for ( $i = 0; $i < $rounds; $i++ ) {
+			$operation = $rng->weighted(
+				array(
+					'insert-byte'    => 18,
+					'delete-chunk'   => 18,
+					'duplicate-chunk' => 12,
+					'replace-byte'   => 16,
+					'swap-tag-name'  => 14,
+					'toggle-case'    => 8,
+					'splice-corpus'  => 14,
+				)
+			);
+
+			$mutated = self::apply( $operation, $input, $rng, $corpus_entries );
+			if ( null !== $mutated ) {
+				$input        = $mutated;
+				$operations[] = $operation;
+			}
+		}
+
+		return array(
+			'input'      => $input,
+			'operations' => $operations,
+		);
+	}
+
+	private static function apply( string $operation, string $input, Prng $rng, array $corpus_entries ): ?string {
+		$length = strlen( $input );
+
+		switch ( $operation ) {
+			case 'insert-byte':
+				$at = $length > 0 ? $rng->int( 0, $length ) : 0;
+				return substr( $input, 0, $at ) . $rng->choice( self::INTERESTING_BYTES ) . substr( $input, $at );
+
+			case 'delete-chunk':
+				if ( $length < 2 ) {
+					return null;
+				}
+				$at  = $rng->int( 0, $length - 1 );
+				$len = $rng->int( 1, max( 1, min( 32, $length - $at ) ) );
+				return substr( $input, 0, $at ) . substr( $input, $at + $len );
+
+			case 'duplicate-chunk':
+				if ( $length < 1 ) {
+					return null;
+				}
+				$at  = $rng->int( 0, $length - 1 );
+				$len = $rng->int( 1, max( 1, min( 24, $length - $at ) ) );
+				$chunk = substr( $input, $at, $len );
+				return substr( $input, 0, $at + $len ) . $chunk . substr( $input, $at + $len );
+
+			case 'replace-byte':
+				if ( $length < 1 ) {
+					return null;
+				}
+				$at = $rng->int( 0, $length - 1 );
+				return substr( $input, 0, $at ) . $rng->choice( self::INTERESTING_BYTES ) . substr( $input, $at + 1 );
+
+			case 'swap-tag-name':
+				if ( ! preg_match_all( '/<\/?([a-zA-Z][a-zA-Z0-9-]*)/', $input, $m, PREG_OFFSET_CAPTURE ) ) {
+					return null;
+				}
+				$pick    = $m[1][ $rng->int( 0, count( $m[1] ) - 1 ) ];
+				$replace = $rng->choice( self::SWAP_TAG_NAMES );
+				return substr( $input, 0, $pick[1] ) . $replace . substr( $input, $pick[1] + strlen( $pick[0] ) );
+
+			case 'toggle-case':
+				if ( $length < 1 ) {
+					return null;
+				}
+				$at   = $rng->int( 0, $length - 1 );
+				$len  = $rng->int( 1, max( 1, min( 16, $length - $at ) ) );
+				$head = substr( $input, $at, $len );
+				$head = $rng->chance( 50 ) ? strtoupper( $head ) : strtolower( $head );
+				return substr( $input, 0, $at ) . $head . substr( $input, $at + $len );
+
+			case 'splice-corpus':
+				if ( array() === $corpus_entries ) {
+					return null;
+				}
+				$other = $corpus_entries[ $rng->int( 0, count( $corpus_entries ) - 1 ) ]['data'];
+				$at    = $length > 0 ? $rng->int( 0, $length ) : 0;
+				$from  = strlen( $other ) > 0 ? $rng->int( 0, strlen( $other ) - 1 ) : 0;
+				$len   = $rng->int( 1, max( 1, min( 96, strlen( $other ) - $from ) ) );
+				return substr( $input, 0, $at ) . substr( $other, $from, $len ) . substr( $input, $at );
+
+			default:
+				return null;
+		}
+	}
+}
diff --git a/tools/html-api-fuzz/lib/OracleFinding.php b/tools/html-api-fuzz/lib/OracleFinding.php
new file mode 100644
index 0000000000000..fe97dd22e3506
--- /dev/null
+++ b/tools/html-api-fuzz/lib/OracleFinding.php
@@ -0,0 +1,273 @@
+<?php
+namespace HtmlApiFuzz;
+
+class OracleFinding {
+	private const ISSUE_REGISTRY = array(
+		'dom-xlink-dropped-local-name-after-xlink' => array(
+			'issue'   => 'lexbor/lexbor#372',
+			'issueUrl' => 'https://github.com/lexbor/lexbor/issues/372',
+			'fixedBy' => 'https://github.com/lexbor/lexbor/commit/445c0a20b171533b4be762e18b10d359556eb68c',
+		),
+		'dom-mathml-heading-scope-reparenting' => array(
+			'issue'   => 'lexbor/lexbor#373',
+			'issueUrl' => 'https://github.com/lexbor/lexbor/issues/373',
+			'fixedBy' => 'https://github.com/lexbor/lexbor/commit/481c444261a132190a3fb746d6d2f60824af3717',
+		),
+	);
+
+	public static function from_result( array $result ): ?array {
+		$mode             = (string) ( $result['mode'] ?? 'unknown' );
+		$fragment_context = (string) ( $result['fragmentContext'] ?? 'body' );
+		$comparison       = is_array( $result['comparison'] ?? null ) ? $result['comparison'] : array();
+		$dom              = is_array( $result['dom'] ?? null ) ? $result['dom'] : array();
+		$oracle           = is_array( $result['oracle'] ?? null ) ? $result['oracle'] : ( is_array( $dom['oracle'] ?? null ) ? $dom['oracle'] : array() );
+		$oracle_kind      = (string) ( $oracle['kind'] ?? OracleRenderer::KIND_PHP_DOM );
+
+		if ( is_string( $comparison['oracleFindingType'] ?? null ) ) {
+			return self::from_type( $comparison['oracleFindingType'], $mode, $fragment_context, $comparison['firstDifference'] ?? array() );
+		}
+
+		if ( 'oracle-unsupported' === ( $result['status'] ?? null ) || TreeRenderer::STATUS_UNSUPPORTED === ( $dom['status'] ?? null ) ) {
+			if ( OracleRenderer::KIND_PHP_DOM !== $oracle_kind ) {
+				return self::build(
+					'oracle-limitation',
+					$oracle_kind . '-unsupported',
+					self::oracle_owner( $oracle_kind ),
+					'The selected oracle cannot expose this tree shape faithfully.',
+					array(
+						'mode'               => $mode,
+						'fragmentContext'    => $fragment_context,
+						'oracleKind'         => $oracle_kind,
+						'oracleFailureClass' => $dom['failureClass'] ?? null,
+						'unsupportedMessage' => $dom['unsupported']['message'] ?? $dom['error'] ?? null,
+						'family'             => $oracle_kind . '-unsupported',
+					)
+				);
+			}
+			return self::build(
+				'oracle-limitation',
+				'dom-template-context-unsupported',
+				'PHP DOM API',
+				'The DOM oracle cannot expose this tree shape faithfully.',
+				array(
+					'mode'               => $mode,
+					'fragmentContext'    => $fragment_context,
+					'domFailureClass'    => $dom['failureClass'] ?? null,
+					'unsupportedMessage' => $dom['unsupported']['message'] ?? $dom['error'] ?? null,
+					'family'             => 'dom-template-context-unsupported',
+				)
+			);
+		}
+
+		if ( 'oracle-parse-error' === ( $result['status'] ?? null ) ) {
+			if ( OracleRenderer::KIND_PHP_DOM !== $oracle_kind ) {
+				return self::build(
+					'oracle-limitation',
+					$oracle_kind . '-parse-error',
+					self::oracle_owner( $oracle_kind ),
+					'The selected oracle could not parse the input, so differential coverage was unavailable.',
+					array(
+						'mode'               => $mode,
+						'fragmentContext'    => $fragment_context,
+						'oracleKind'         => $oracle_kind,
+						'oracleFailureClass' => $dom['failureClass'] ?? null,
+						'message'            => Signature::normalize_message_for_finding( $dom['error'] ?? '' ),
+						'family'             => $oracle_kind . '-parse-error',
+					)
+				);
+			}
+			return self::build(
+				'oracle-limitation',
+				'dom-parse-error',
+				'Lexbor/PHP DOM',
+				'The DOM oracle could not parse the input, so differential coverage was unavailable.',
+				array(
+					'mode'            => $mode,
+					'fragmentContext' => $fragment_context,
+					'domFailureClass' => $dom['failureClass'] ?? null,
+					'message'         => Signature::normalize_message_for_finding( $dom['error'] ?? '' ),
+					'family'          => 'dom-parse-error',
+				)
+			);
+		}
+
+		$dom_oracle_line_tolerances = $result['wordpress']['domOracleLineTolerances'] ?? array();
+		if ( OracleRenderer::KIND_PHP_DOM === $oracle_kind && true === ( $comparison['ok'] ?? null ) && is_array( $dom_oracle_line_tolerances ) && ! empty( $dom_oracle_line_tolerances ) ) {
+			return self::from_type(
+				'dom-xlink-dropped-local-name-after-xlink',
+				$mode,
+				$fragment_context,
+				array(
+					'toleratedLineCount' => count( $dom_oracle_line_tolerances ),
+				)
+			);
+		}
+
+		if ( ! empty( $comparison['scalarToleratedLines'] ) ) {
+			return self::build(
+				'scalar-tolerance',
+				'scalar-substitution-tolerance',
+				'WordPress HTML API scalar policy',
+				'WordPress deliberately preserves raw scalar bytes that spec-following parsers substitute during preprocessing.',
+				array(
+					'mode'               => $mode,
+					'fragmentContext'    => $fragment_context,
+					'toleratedLineCount' => count( (array) $comparison['scalarToleratedLines'] ),
+					'family'             => 'scalar-substitution-tolerance',
+				)
+			);
+		}
+
+		return null;
+	}
+
+	private static function from_type( string $type, string $mode, string $fragment_context, array $details = array() ): ?array {
+		if ( 'dom-form-feed-pre-body-whitespace' === $type ) {
+			return self::build(
+				'oracle-bug',
+				$type,
+				'Lexbor/PHP DOM',
+				'The DOM oracle mishandles form-feed whitespace before body content.',
+				array(
+					'mode'            => $mode,
+					'fragmentContext' => $fragment_context,
+					'family'          => $type,
+				)
+			);
+		}
+
+		if ( 'dom-xlink-dropped-local-name-after-xlink' === $type ) {
+			return self::build(
+				'oracle-bug',
+				$type,
+				'Lexbor/PHP DOM',
+				'The DOM oracle drops a bare SVG/XLink local-name attribute after the namespaced attribute appears first.',
+				array(
+					'mode'               => $mode,
+					'fragmentContext'    => $fragment_context,
+					'toleratedLineCount' => $details['toleratedLineCount'] ?? null,
+					'family'             => $type,
+				),
+				self::ISSUE_REGISTRY[ $type ] ?? null
+			);
+		}
+
+		if ( 'dom-mathml-heading-scope-reparenting' === $type ) {
+			$wordpress_path = is_string( $details['wordpressPath'] ?? null ) ? self::path_pattern( $details['wordpressPath'] ) : null;
+			$dom_path       = is_string( $details['domPath'] ?? null ) ? self::path_pattern( $details['domPath'] ) : null;
+			return self::build(
+				'oracle-bug',
+				$type,
+				'Lexbor/PHP DOM',
+				'The DOM oracle reparents content after an ignored heading end tag inside a MathML text integration point.',
+				array(
+					'mode'            => $mode,
+					'fragmentContext' => $fragment_context,
+					'wordpressNorm'   => $details['wordpressNorm'] ?? null,
+					'domNorm'         => $details['domNorm'] ?? null,
+					'wordpressPath'   => $wordpress_path,
+					'domPath'         => $dom_path,
+					'family'          => $type,
+				),
+				self::ISSUE_REGISTRY[ $type ] ?? null
+			);
+		}
+
+		return null;
+	}
+
+	private static function build( string $classification, string $type, string $suspected_owner, string $reason, array $facts, ?array $upstream = null ): array {
+		$facts = array_merge(
+			array(
+				'classification' => $classification,
+				'type'           => $type,
+			),
+			self::without_nulls( $facts )
+		);
+
+		return array(
+			'schemaVersion'  => 1,
+			'kind'           => 'html-api-fuzz-oracle-finding',
+			'classification' => $classification,
+			'type'           => $type,
+			'suspectedOwner' => $suspected_owner,
+			'reason'         => $reason,
+			'upstream'       => $upstream,
+			'signature'      => self::signature( $classification, $type, $facts ),
+		);
+	}
+
+	private static function oracle_owner( string $oracle_kind ): string {
+		if ( OracleRenderer::KIND_LEXBOR_SOURCE === $oracle_kind ) {
+			return 'Lexbor source oracle';
+		}
+
+		return $oracle_kind;
+	}
+
+	private static function signature( string $classification, string $type, array $facts ): array {
+		$signature_facts = self::sort_json_value( $facts );
+		$hash = 'oracle-' . substr( sha1( json_encode( $signature_facts, JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE ) ), 0, 12 );
+		$family_fact = (string) ( $facts['family'] ?? $type );
+
+		return array(
+			'hash'             => $hash,
+			'equivalenceClass' => $classification,
+			'familyKey'        => 'oracle-' . substr( sha1( $classification . ':' . $family_fact ), 0, 12 ),
+			'facts'            => $signature_facts,
+			'normalized'       => self::normalized_text( $signature_facts ),
+		);
+	}
+
+	private static function without_nulls( array $value ): array {
+		foreach ( $value as $key => $item ) {
+			if ( null === $item ) {
+				unset( $value[ $key ] );
+			}
+		}
+
+		return $value;
+	}
+
+	private static function sort_json_value( $value ) {
+		if ( ! is_array( $value ) ) {
+			return $value;
+		}
+		if ( array_is_list( $value ) ) {
+			return array_map( array( __CLASS__, 'sort_json_value' ), $value );
+		}
+		foreach ( $value as $key => $item ) {
+			$value[ $key ] = self::sort_json_value( $item );
+		}
+		ksort( $value, SORT_STRING );
+
+		return $value;
+	}
+
+	private static function normalized_text( array $facts ): string {
+		$parts = array();
+		foreach ( $facts as $key => $value ) {
+			if ( is_array( $value ) ) {
+				$value = json_encode( $value, JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE );
+			}
+			$parts[] = "{$key}={$value}";
+		}
+
+		return implode( "\n", $parts );
+	}
+
+	private static function path_pattern( string $path ): string {
+		$parts = explode( '/', strtolower( $path ) );
+		foreach ( $parts as &$part ) {
+			if ( '' === $part ) {
+				continue;
+			}
+			if ( ! in_array( $part, array( 'html', 'head', 'body', 'template', 'content', 'math math', 'math annotation-xml', 'svg svg', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'area' ), true ) ) {
+				$part = '*';
+			}
+		}
+		unset( $part );
+
+		return implode( '/', $parts );
+	}
+}
diff --git a/tools/html-api-fuzz/lib/OracleRenderer.php b/tools/html-api-fuzz/lib/OracleRenderer.php
new file mode 100644
index 0000000000000..02370c344c839
--- /dev/null
+++ b/tools/html-api-fuzz/lib/OracleRenderer.php
@@ -0,0 +1,307 @@
+<?php
+namespace HtmlApiFuzz;
+
+class OracleRenderer {
+	public const KIND_PHP_DOM       = 'php-dom';
+	public const KIND_LEXBOR_SOURCE = 'lexbor-source';
+
+	private string $kind;
+	private ?string $lexbor_oracle_bin;
+	private int $timeout_ms;
+	private ?array $metadata = null;
+
+	private function __construct( string $kind, ?string $lexbor_oracle_bin = null, int $timeout_ms = 2500 ) {
+		$this->kind              = $kind;
+		$this->lexbor_oracle_bin = $lexbor_oracle_bin;
+		$this->timeout_ms        = $timeout_ms;
+	}
+
+	public static function from_options( array $options ): self {
+		$kind = option_string( $options, 'dom-oracle', self::KIND_PHP_DOM );
+		if ( ! in_array( $kind, self::kinds(), true ) ) {
+			throw new \InvalidArgumentException( 'Expected --dom-oracle to be php-dom or lexbor-source.' );
+		}
+
+		$lexbor_oracle_bin = option_string( $options, 'lexbor-oracle-bin', getenv( 'HTML_API_FUZZ_LEXBOR_ORACLE' ) ?: null );
+		if ( self::KIND_LEXBOR_SOURCE === $kind && ( null === $lexbor_oracle_bin || '' === $lexbor_oracle_bin ) ) {
+			$lexbor_oracle_bin = repo_root() . '/tools/html-api-fuzz/oracles/lexbor/build/lexbor-tree-oracle';
+		}
+
+		return new self(
+			$kind,
+			$lexbor_oracle_bin,
+			option_int( $options, 'oracle-timeout-ms', 2500 )
+		);
+	}
+
+	public static function kinds(): array {
+		return array( self::KIND_PHP_DOM, self::KIND_LEXBOR_SOURCE );
+	}
+
+	public function kind(): string {
+		return $this->kind;
+	}
+
+	public function is_php_dom(): bool {
+		return self::KIND_PHP_DOM === $this->kind;
+	}
+
+	public function metadata(): array {
+		if ( null !== $this->metadata ) {
+			return $this->metadata;
+		}
+
+		if ( self::KIND_PHP_DOM === $this->kind ) {
+			$this->metadata = array(
+				'kind'              => self::KIND_PHP_DOM,
+				'phpVersion'        => PHP_VERSION,
+				'domHTMLDocument'   => class_exists( 'Dom\\HTMLDocument' ),
+			);
+			return $this->metadata;
+		}
+
+		$metadata = array(
+			'kind'   => self::KIND_LEXBOR_SOURCE,
+			'binary' => $this->lexbor_oracle_bin,
+		);
+
+		if ( is_string( $this->lexbor_oracle_bin ) && is_file( $this->lexbor_oracle_bin ) && is_executable( $this->lexbor_oracle_bin ) ) {
+			$version = $this->run_process( array( $this->lexbor_oracle_bin, '--version' ) );
+			$decoded = json_decode( trim( $version['stdout'] ), true );
+			if ( is_array( $decoded['oracle'] ?? null ) ) {
+				$metadata = array_merge( $metadata, $decoded['oracle'] );
+				$metadata['binary'] = $this->lexbor_oracle_bin;
+			} else {
+				$metadata['versionError'] = trim( $version['output'] );
+			}
+		} else {
+			$metadata['available'] = false;
+		}
+
+		$this->metadata = $metadata;
+		return $this->metadata;
+	}
+
+	public function replay_options(): array {
+		$options = array(
+			'domOracle' => $this->kind,
+		);
+		if ( self::KIND_LEXBOR_SOURCE === $this->kind && null !== $this->lexbor_oracle_bin ) {
+			$options['lexborOracleBin'] = $this->lexbor_oracle_bin;
+		}
+		if ( 2500 !== $this->timeout_ms ) {
+			$options['oracleTimeoutMs'] = $this->timeout_ms;
+		}
+
+		return $options;
+	}
+
+	public function worker_args(): array {
+		$args = array( '--dom-oracle', $this->kind );
+		if ( self::KIND_LEXBOR_SOURCE === $this->kind && null !== $this->lexbor_oracle_bin ) {
+			$args[] = '--lexbor-oracle-bin';
+			$args[] = $this->lexbor_oracle_bin;
+		}
+		if ( 2500 !== $this->timeout_ms ) {
+			$args[] = '--oracle-timeout-ms';
+			$args[] = (string) $this->timeout_ms;
+		}
+
+		return $args;
+	}
+
+	public function render( string $html, string $mode, array $limits = array(), string $fragment_context = 'body' ): array {
+		if ( self::KIND_PHP_DOM === $this->kind ) {
+			$result = TreeRenderer::render_dom( $html, $mode, $limits, $fragment_context );
+			$result['oracle'] = $this->metadata();
+			return $result;
+		}
+
+		return $this->render_lexbor_source( $html, $mode, $limits, $fragment_context );
+	}
+
+	private function render_lexbor_source( string $html, string $mode, array $limits, string $fragment_context ): array {
+		if ( null === $this->lexbor_oracle_bin || ! is_file( $this->lexbor_oracle_bin ) || ! is_executable( $this->lexbor_oracle_bin ) ) {
+			return array(
+				'status'       => TreeRenderer::STATUS_ERROR,
+				'error'        => 'Lexbor source oracle binary is not available. Build it or pass --lexbor-oracle-bin.',
+				'failureClass' => 'oracle-unavailable',
+				'oracle'       => $this->metadata(),
+			);
+		}
+
+		$tmp = tempnam( sys_get_temp_dir(), 'html-api-fuzz-lexbor-input-' );
+		if ( false === $tmp ) {
+			return array(
+				'status'       => TreeRenderer::STATUS_ERROR,
+				'error'        => 'Could not create a temporary input file for the Lexbor source oracle.',
+				'failureClass' => 'oracle-renderer-error',
+				'oracle'       => $this->metadata(),
+			);
+		}
+
+		try {
+			if ( false === file_put_contents( $tmp, $html ) ) {
+				return array(
+					'status'       => TreeRenderer::STATUS_ERROR,
+					'error'        => 'Could not write the temporary input file for the Lexbor source oracle.',
+					'failureClass' => 'oracle-renderer-error',
+					'oracle'       => $this->metadata(),
+				);
+			}
+			$proc = $this->run_process(
+				array(
+					$this->lexbor_oracle_bin,
+					'--mode',
+					$mode,
+					'--context',
+					$fragment_context,
+					'--max-nodes',
+					(string) ( $limits['maxNodes'] ?? 3000 ),
+					'--input',
+					$tmp,
+				)
+			);
+		} finally {
+			@unlink( $tmp );
+		}
+
+		if ( $proc['timedOut'] ) {
+			return array(
+				'status'       => TreeRenderer::STATUS_ERROR,
+				'error'        => 'Lexbor source oracle timed out.',
+				'failureClass' => 'oracle-renderer-error',
+				'oracle'       => $this->metadata(),
+				'process'      => self::compact_process( $proc ),
+			);
+		}
+
+		$decoded = json_decode( $proc['stdout'], true );
+		if ( ! is_array( $decoded ) || ! is_string( $decoded['status'] ?? null ) ) {
+			return array(
+				'status'       => TreeRenderer::STATUS_ERROR,
+				'error'        => 'Lexbor source oracle did not return a valid JSON result.',
+				'failureClass' => 'oracle-renderer-error',
+				'oracle'       => $this->metadata(),
+				'process'      => self::compact_process( $proc ),
+			);
+		}
+
+		$status = $decoded['status'];
+		if ( ! in_array( $status, array( TreeRenderer::STATUS_OK, TreeRenderer::STATUS_UNSUPPORTED, TreeRenderer::STATUS_ERROR ), true ) ) {
+			$status = TreeRenderer::STATUS_ERROR;
+		}
+
+		$result = array(
+			'status'       => $status,
+			'oracle'       => is_array( $decoded['oracle'] ?? null ) ? array_merge( $this->metadata(), $decoded['oracle'] ) : $this->metadata(),
+			'nodeCount'    => $decoded['nodeCount'] ?? null,
+			'process'      => self::compact_process( $proc ),
+		);
+
+		if ( TreeRenderer::STATUS_OK === $status && is_string( $decoded['treeBase64'] ?? null ) ) {
+			$tree = base64_decode( $decoded['treeBase64'], true );
+			if ( false === $tree ) {
+				$result['status']       = TreeRenderer::STATUS_ERROR;
+				$result['error']        = 'Lexbor source oracle returned invalid treeBase64.';
+				$result['failureClass'] = 'oracle-renderer-error';
+				return $result;
+			}
+			$result['tree'] = $tree;
+		} elseif ( TreeRenderer::STATUS_OK === $status && is_string( $decoded['tree'] ?? null ) ) {
+			$result['tree'] = $decoded['tree'];
+		}
+		if ( is_string( $decoded['failureClass'] ?? null ) ) {
+			$result['failureClass'] = $decoded['failureClass'];
+		}
+		if ( is_string( $decoded['error'] ?? null ) ) {
+			$result['error'] = $decoded['error'];
+		}
+		if ( is_array( $decoded['unsupported'] ?? null ) ) {
+			$result['unsupported'] = $decoded['unsupported'];
+		}
+
+		if ( TreeRenderer::STATUS_OK === $status && ! is_string( $result['tree'] ?? null ) ) {
+			$result['status']       = TreeRenderer::STATUS_ERROR;
+			$result['error']        = 'Lexbor source oracle returned ok without a tree.';
+			$result['failureClass'] = 'oracle-renderer-error';
+		}
+
+		return $result;
+	}
+
+	private function run_process( array $command ): array {
+		$spec = array(
+			0 => array( 'pipe', 'r' ),
+			1 => array( 'pipe', 'w' ),
+			2 => array( 'pipe', 'w' ),
+		);
+
+		$process = proc_open( $command, $spec, $pipes, repo_root() );
+		if ( ! is_resource( $process ) ) {
+			throw new \RuntimeException( 'Could not start oracle subprocess.' );
+		}
+
+		fclose( $pipes[0] );
+		stream_set_blocking( $pipes[1], false );
+		stream_set_blocking( $pipes[2], false );
+
+		$stdout    = '';
+		$stderr    = '';
+		$start     = microtime( true );
+		$timed_out = false;
+
+		while ( true ) {
+			$stdout .= stream_get_contents( $pipes[1] );
+			$stderr .= stream_get_contents( $pipes[2] );
+
+			$status = proc_get_status( $process );
+			if ( ! $status['running'] ) {
+				break;
+			}
+
+			if ( ( microtime( true ) - $start ) * 1000 > $this->timeout_ms ) {
+				$timed_out = true;
+				proc_terminate( $process );
+				usleep( 200000 );
+				$status = proc_get_status( $process );
+				if ( $status['running'] ) {
+					proc_terminate( $process, 9 );
+				}
+				break;
+			}
+
+			usleep( 10000 );
+		}
+
+		$stdout .= stream_get_contents( $pipes[1] );
+		$stderr .= stream_get_contents( $pipes[2] );
+		fclose( $pipes[1] );
+		fclose( $pipes[2] );
+
+		$exit_code = proc_close( $process );
+		if ( $timed_out ) {
+			$exit_code = null;
+		}
+
+		return array(
+			'command'    => command_string( $command ),
+			'code'       => $exit_code,
+			'timedOut'   => $timed_out,
+			'durationMs' => (int) round( ( microtime( true ) - $start ) * 1000 ),
+			'stdout'     => $stdout,
+			'stderr'     => $stderr,
+			'output'     => $stdout . $stderr,
+		);
+	}
+
+	private static function compact_process( array $process ): array {
+		return array(
+			'command'    => $process['command'] ?? null,
+			'code'       => $process['code'] ?? null,
+			'timedOut'   => $process['timedOut'] ?? null,
+			'durationMs' => $process['durationMs'] ?? null,
+			'stderrTail' => substr( (string) ( $process['stderr'] ?? '' ), -1000 ),
+		);
+	}
+}
diff --git a/tools/html-api-fuzz/lib/Prng.php b/tools/html-api-fuzz/lib/Prng.php
new file mode 100644
index 0000000000000..3044678c9e8ab
--- /dev/null
+++ b/tools/html-api-fuzz/lib/Prng.php
@@ -0,0 +1,57 @@
+<?php
+namespace HtmlApiFuzz;
+
+class Prng {
+	private $seed;
+	private $counter = 0;
+	private $buffer  = '';
+
+	public function __construct( $seed ) {
+		$this->seed = (string) $seed;
+	}
+
+	public function bytes( int $length ): string {
+		while ( strlen( $this->buffer ) < $length ) {
+			$this->buffer .= hash( 'sha256', $this->seed . ':' . $this->counter++, true );
+		}
+
+		$out          = substr( $this->buffer, 0, $length );
+		$this->buffer = substr( $this->buffer, $length );
+		return $out;
+	}
+
+	public function uint32(): int {
+		$parts = unpack( 'Nvalue', $this->bytes( 4 ) );
+		return (int) $parts['value'];
+	}
+
+	public function int( int $min, int $max ): int {
+		if ( $max <= $min ) {
+			return $min;
+		}
+
+		return $min + ( $this->uint32() % ( $max - $min + 1 ) );
+	}
+
+	public function chance( int $numerator, int $denominator = 100 ): bool {
+		return $this->int( 1, $denominator ) <= $numerator;
+	}
+
+	public function choice( array $values ) {
+		return $values[ $this->int( 0, count( $values ) - 1 ) ];
+	}
+
+	public function weighted( array $weights ) {
+		$total = array_sum( $weights );
+		$pick  = $this->int( 1, max( 1, (int) $total ) );
+		foreach ( $weights as $value => $weight ) {
+			$pick -= $weight;
+			if ( $pick <= 0 ) {
+				return $value;
+			}
+		}
+
+		return array_key_first( $weights );
+	}
+}
+
diff --git a/tools/html-api-fuzz/lib/ResultStore.php b/tools/html-api-fuzz/lib/ResultStore.php
new file mode 100644
index 0000000000000..e5360eff60b10
--- /dev/null
+++ b/tools/html-api-fuzz/lib/ResultStore.php
@@ -0,0 +1,406 @@
+<?php
+namespace HtmlApiFuzz;
+
+/**
+ * Per-lane SQLite store of one row per attempted seed.
+ *
+ * Replaces the append-only summary.ndjson stream and the per-seed artifact
+ * directories for attempts whose artifacts are not retained on disk. Failure
+ * rows carry the full result and replay JSON (the replay embeds the input as
+ * base64), so a failure remains reproducible after its seed directory is
+ * pruned.
+ *
+ * The scalar columns are denormalized copies of summary fields for direct SQL
+ * querying (camelCase JSON keys become snake_case columns); the JSON
+ * documents are what programmatic consumers read and are authoritative when
+ * the two disagree.
+ */
+class ResultStore {
+	public const FILENAME = 'results.sqlite';
+
+	/** @var \SQLite3 */
+	private $db;
+
+	public function __construct( string $path, bool $read_only = false ) {
+		$flags    = $read_only ? SQLITE3_OPEN_READONLY : ( SQLITE3_OPEN_READWRITE | SQLITE3_OPEN_CREATE );
+		$this->db = new \SQLite3( $path, $flags );
+		$this->db->busyTimeout( 5000 );
+		$this->db->enableExceptions( true );
+		if ( ! $read_only ) {
+			$this->db->exec( 'PRAGMA journal_mode = WAL' );
+			$this->db->exec( 'PRAGMA synchronous = NORMAL' );
+			$this->create_schema();
+		}
+	}
+
+	private function create_schema(): void {
+		$this->db->exec(
+			'CREATE TABLE IF NOT EXISTS attempts (
+				id INTEGER PRIMARY KEY,
+				created_at TEXT NOT NULL,
+				seed INTEGER NOT NULL,
+				ok INTEGER NOT NULL,
+				status TEXT NOT NULL,
+				failure_class TEXT,
+				signature_hash TEXT,
+				family_key TEXT,
+				oracle_finding_class TEXT,
+				oracle_finding_type TEXT,
+				oracle_suspected_owner TEXT,
+				oracle_signature_hash TEXT,
+				oracle_family_key TEXT,
+				oracle_kind TEXT,
+				oracle_version TEXT,
+				oracle_commit TEXT,
+				oracle_binary TEXT,
+				profile TEXT,
+				mode TEXT,
+				payload_policy TEXT,
+				input_source TEXT,
+				input_sha1 TEXT,
+				input_length INTEGER,
+				duration_ms INTEGER,
+				worker_code INTEGER,
+				worker_timed_out INTEGER NOT NULL DEFAULT 0,
+				artifacts_retained INTEGER NOT NULL DEFAULT 0,
+				failure_artifacts_retained INTEGER,
+				oracle_artifacts_retained INTEGER,
+				summary_json TEXT,
+				result_json TEXT,
+				replay_json TEXT
+			)'
+		);
+		$this->ensure_column( 'attempts', 'oracle_finding_class', 'TEXT' );
+		$this->ensure_column( 'attempts', 'oracle_finding_type', 'TEXT' );
+		$this->ensure_column( 'attempts', 'oracle_suspected_owner', 'TEXT' );
+		$this->ensure_column( 'attempts', 'oracle_signature_hash', 'TEXT' );
+		$this->ensure_column( 'attempts', 'oracle_family_key', 'TEXT' );
+		$this->ensure_column( 'attempts', 'oracle_kind', 'TEXT' );
+		$this->ensure_column( 'attempts', 'oracle_version', 'TEXT' );
+		$this->ensure_column( 'attempts', 'oracle_commit', 'TEXT' );
+		$this->ensure_column( 'attempts', 'oracle_binary', 'TEXT' );
+		$this->ensure_column( 'attempts', 'failure_artifacts_retained', 'INTEGER' );
+		$this->ensure_column( 'attempts', 'oracle_artifacts_retained', 'INTEGER' );
+		if ( (int) $this->db->querySingle( 'PRAGMA user_version' ) < 2 ) {
+			$this->db->exec( 'PRAGMA user_version = 2' );
+		}
+		$this->db->exec( 'CREATE INDEX IF NOT EXISTS attempts_signature_hash ON attempts ( signature_hash )' );
+		$this->db->exec( 'CREATE INDEX IF NOT EXISTS attempts_family_key ON attempts ( family_key )' );
+		$this->db->exec( 'CREATE INDEX IF NOT EXISTS attempts_oracle_signature_hash ON attempts ( oracle_signature_hash )' );
+		$this->db->exec( 'CREATE INDEX IF NOT EXISTS attempts_oracle_family_key ON attempts ( oracle_family_key )' );
+		$this->db->exec( 'CREATE INDEX IF NOT EXISTS attempts_oracle_kind ON attempts ( oracle_kind )' );
+		$this->db->exec( 'CREATE INDEX IF NOT EXISTS attempts_ok ON attempts ( ok )' );
+		$this->db->exec( 'CREATE INDEX IF NOT EXISTS attempts_seed ON attempts ( seed )' );
+	}
+
+	private function ensure_column( string $table, string $column, string $definition ): void {
+		if ( $this->has_column( $table, $column ) ) {
+			return;
+		}
+
+		$this->db->exec( "ALTER TABLE {$table} ADD COLUMN {$column} {$definition}" );
+	}
+
+	private function has_column( string $table, string $column ): bool {
+		$result = $this->db->query( 'PRAGMA table_info(' . $table . ')' );
+		while ( false !== ( $row = $result->fetchArray( SQLITE3_ASSOC ) ) ) {
+			if ( $column === ( $row['name'] ?? null ) ) {
+				return true;
+			}
+		}
+
+		return false;
+	}
+
+	/**
+	 * Records one attempt. Passing attempts store summary columns only; the
+	 * attempt is regenerable from its seed. Failures additionally store the
+	 * summary, result, and replay JSON documents.
+	 */
+	public function record_attempt( array $summary, ?array $result = null, ?array $replay = null ): int {
+		$ok             = (bool) ( $summary['ok'] ?? false );
+		$oracle_finding = is_array( $summary['oracleFinding'] ?? null ) ? $summary['oracleFinding'] : null;
+		$oracle         = is_array( $summary['oracle'] ?? null ) ? $summary['oracle'] : null;
+		$store_json     = ! $ok || null !== $oracle_finding;
+		$artifacts_retained = (bool) ( $summary['artifactsRetained'] ?? false );
+		$failure_artifacts_retained = array_key_exists( 'failureArtifactsRetained', $summary )
+			? (bool) $summary['failureArtifactsRetained']
+			: ( ! $ok && $artifacts_retained );
+		$oracle_artifacts_retained = array_key_exists( 'oracleArtifactsRetained', $summary )
+			? (bool) $summary['oracleArtifactsRetained']
+			: ( null !== $oracle_finding && $artifacts_retained );
+		$statement = $this->db->prepare(
+			'INSERT INTO attempts (
+				created_at, seed, ok, status, failure_class, signature_hash, family_key,
+				oracle_finding_class, oracle_finding_type, oracle_suspected_owner, oracle_signature_hash, oracle_family_key,
+				oracle_kind, oracle_version, oracle_commit, oracle_binary,
+				profile, mode, payload_policy, input_source, input_sha1, input_length,
+				duration_ms, worker_code, worker_timed_out, artifacts_retained,
+				failure_artifacts_retained, oracle_artifacts_retained,
+				summary_json, result_json, replay_json
+			) VALUES (
+				:created_at, :seed, :ok, :status, :failure_class, :signature_hash, :family_key,
+				:oracle_finding_class, :oracle_finding_type, :oracle_suspected_owner, :oracle_signature_hash, :oracle_family_key,
+				:oracle_kind, :oracle_version, :oracle_commit, :oracle_binary,
+				:profile, :mode, :payload_policy, :input_source, :input_sha1, :input_length,
+				:duration_ms, :worker_code, :worker_timed_out, :artifacts_retained,
+				:failure_artifacts_retained, :oracle_artifacts_retained,
+				:summary_json, :result_json, :replay_json
+			)'
+		);
+
+		// A row must never become invisible to triage because one value could
+		// not be encoded; fall back to a minimal document instead.
+		$encode = static function ( $value ) use ( $summary ): ?string {
+			if ( null === $value ) {
+				return null;
+			}
+			$json = json_encode( $value, JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE );
+			if ( false !== $json ) {
+				return $json;
+			}
+			return json_encode(
+				array(
+					'kind'        => 'html-api-fuzz-encode-fallback',
+					'encodeError' => json_last_error_msg(),
+					'ok'          => (bool) ( $summary['ok'] ?? false ),
+					'seed'        => (int) ( $summary['seed'] ?? 0 ),
+					'status'      => (string) ( $summary['status'] ?? 'unknown' ),
+					'signature'   => array( 'hash' => $summary['signature']['hash'] ?? null ),
+					'oracleFinding' => array(
+						'classification' => $summary['oracleFinding']['classification'] ?? null,
+						'type'           => $summary['oracleFinding']['type'] ?? null,
+						'suspectedOwner' => $summary['oracleFinding']['suspectedOwner'] ?? null,
+						'signature'      => array(
+							'hash'      => $summary['oracleFinding']['signature']['hash'] ?? null,
+							'familyKey' => $summary['oracleFinding']['signature']['familyKey'] ?? null,
+						),
+					),
+				),
+				JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE
+			);
+		};
+
+		$statement->bindValue( ':created_at', gmdate( 'c' ), SQLITE3_TEXT );
+		$statement->bindValue( ':seed', (int) ( $summary['seed'] ?? 0 ), SQLITE3_INTEGER );
+		$statement->bindValue( ':ok', $ok ? 1 : 0, SQLITE3_INTEGER );
+		$statement->bindValue( ':status', (string) ( $summary['status'] ?? 'unknown' ), SQLITE3_TEXT );
+		$statement->bindValue( ':failure_class', $summary['failureClass'] ?? null, null === ( $summary['failureClass'] ?? null ) ? SQLITE3_NULL : SQLITE3_TEXT );
+		$signature_hash = $summary['signature']['hash'] ?? null;
+		$statement->bindValue( ':signature_hash', $signature_hash, null === $signature_hash ? SQLITE3_NULL : SQLITE3_TEXT );
+		$family_key = $summary['signature']['familyKey'] ?? null;
+		$statement->bindValue( ':family_key', $family_key, null === $family_key ? SQLITE3_NULL : SQLITE3_TEXT );
+		$oracle_finding_class = $oracle_finding['classification'] ?? null;
+		$statement->bindValue( ':oracle_finding_class', $oracle_finding_class, null === $oracle_finding_class ? SQLITE3_NULL : SQLITE3_TEXT );
+		$oracle_finding_type = $oracle_finding['type'] ?? null;
+		$statement->bindValue( ':oracle_finding_type', $oracle_finding_type, null === $oracle_finding_type ? SQLITE3_NULL : SQLITE3_TEXT );
+		$oracle_suspected_owner = $oracle_finding['suspectedOwner'] ?? null;
+		$statement->bindValue( ':oracle_suspected_owner', $oracle_suspected_owner, null === $oracle_suspected_owner ? SQLITE3_NULL : SQLITE3_TEXT );
+		$oracle_signature_hash = $oracle_finding['signature']['hash'] ?? null;
+		$statement->bindValue( ':oracle_signature_hash', $oracle_signature_hash, null === $oracle_signature_hash ? SQLITE3_NULL : SQLITE3_TEXT );
+		$oracle_family_key = $oracle_finding['signature']['familyKey'] ?? null;
+		$statement->bindValue( ':oracle_family_key', $oracle_family_key, null === $oracle_family_key ? SQLITE3_NULL : SQLITE3_TEXT );
+		$oracle_kind = $oracle['kind'] ?? null;
+		$statement->bindValue( ':oracle_kind', $oracle_kind, null === $oracle_kind ? SQLITE3_NULL : SQLITE3_TEXT );
+		$oracle_version = $oracle['lexborVersion'] ?? $oracle['phpVersion'] ?? null;
+		$statement->bindValue( ':oracle_version', $oracle_version, null === $oracle_version ? SQLITE3_NULL : SQLITE3_TEXT );
+		$oracle_commit = $oracle['lexborCommit'] ?? null;
+		$statement->bindValue( ':oracle_commit', $oracle_commit, null === $oracle_commit ? SQLITE3_NULL : SQLITE3_TEXT );
+		$oracle_binary = $oracle['binary'] ?? null;
+		$statement->bindValue( ':oracle_binary', $oracle_binary, null === $oracle_binary ? SQLITE3_NULL : SQLITE3_TEXT );
+		$statement->bindValue( ':profile', $summary['profile'] ?? null, null === ( $summary['profile'] ?? null ) ? SQLITE3_NULL : SQLITE3_TEXT );
+		$statement->bindValue( ':mode', $summary['mode'] ?? null, null === ( $summary['mode'] ?? null ) ? SQLITE3_NULL : SQLITE3_TEXT );
+		$statement->bindValue( ':payload_policy', $summary['payloadPolicy'] ?? null, null === ( $summary['payloadPolicy'] ?? null ) ? SQLITE3_NULL : SQLITE3_TEXT );
+		$statement->bindValue( ':input_source', $summary['inputSource'] ?? null, null === ( $summary['inputSource'] ?? null ) ? SQLITE3_NULL : SQLITE3_TEXT );
+		$statement->bindValue( ':input_sha1', $summary['inputSha1'] ?? null, null === ( $summary['inputSha1'] ?? null ) ? SQLITE3_NULL : SQLITE3_TEXT );
+		$statement->bindValue( ':input_length', $summary['inputLength'] ?? null, null === ( $summary['inputLength'] ?? null ) ? SQLITE3_NULL : SQLITE3_INTEGER );
+		$statement->bindValue( ':duration_ms', $summary['durationMs'] ?? null, null === ( $summary['durationMs'] ?? null ) ? SQLITE3_NULL : SQLITE3_INTEGER );
+		$statement->bindValue( ':worker_code', $summary['workerCode'] ?? null, null === ( $summary['workerCode'] ?? null ) ? SQLITE3_NULL : SQLITE3_INTEGER );
+		$statement->bindValue( ':worker_timed_out', ( $summary['workerTimedOut'] ?? false ) ? 1 : 0, SQLITE3_INTEGER );
+		$statement->bindValue( ':artifacts_retained', $artifacts_retained ? 1 : 0, SQLITE3_INTEGER );
+		$statement->bindValue( ':failure_artifacts_retained', $failure_artifacts_retained ? 1 : 0, SQLITE3_INTEGER );
+		$statement->bindValue( ':oracle_artifacts_retained', $oracle_artifacts_retained ? 1 : 0, SQLITE3_INTEGER );
+		$statement->bindValue( ':summary_json', $store_json ? $encode( $summary ) : null, $store_json ? SQLITE3_TEXT : SQLITE3_NULL );
+		$statement->bindValue( ':result_json', $store_json ? $encode( $result ) : null, ( ! $store_json || null === $result ) ? SQLITE3_NULL : SQLITE3_TEXT );
+		$statement->bindValue( ':replay_json', $store_json ? $encode( $replay ) : null, ( ! $store_json || null === $replay ) ? SQLITE3_NULL : SQLITE3_TEXT );
+		$statement->execute();
+		$statement->close();
+
+		return $this->db->lastInsertRowID();
+	}
+
+	/**
+	 * Distinct seeds for a signature whose artifact directories were retained.
+	 * The caller checks which of those directories still exist on disk, so the
+	 * exemplar cap survives runner restarts that re-record the same seeds.
+	 */
+	public function retained_seeds( string $signature_hash ): array {
+		$retained_column = $this->has_column( 'attempts', 'failure_artifacts_retained' )
+			? 'COALESCE(failure_artifacts_retained, artifacts_retained)'
+			: 'artifacts_retained';
+		$statement = $this->db->prepare( "SELECT DISTINCT seed FROM attempts WHERE signature_hash = :hash AND {$retained_column} = 1" );
+		$statement->bindValue( ':hash', $signature_hash, SQLITE3_TEXT );
+		$result = $statement->execute();
+
+		$seeds = array();
+		while ( false !== ( $row = $result->fetchArray( SQLITE3_NUM ) ) ) {
+			$seeds[] = (int) $row[0];
+		}
+		$statement->close();
+
+		return $seeds;
+	}
+
+	public function oracle_retained_seeds( string $signature_hash ): array {
+		if ( ! $this->has_column( 'attempts', 'oracle_signature_hash' ) ) {
+			return array();
+		}
+
+		$retained_column = $this->has_column( 'attempts', 'oracle_artifacts_retained' )
+			? 'COALESCE(oracle_artifacts_retained, artifacts_retained)'
+			: 'artifacts_retained';
+		$statement = $this->db->prepare( "SELECT DISTINCT seed FROM attempts WHERE oracle_signature_hash = :hash AND {$retained_column} = 1" );
+		$statement->bindValue( ':hash', $signature_hash, SQLITE3_TEXT );
+		$result = $statement->execute();
+
+		$seeds = array();
+		while ( false !== ( $row = $result->fetchArray( SQLITE3_NUM ) ) ) {
+			$seeds[] = (int) $row[0];
+		}
+		$statement->close();
+
+		return $seeds;
+	}
+
+	/**
+	 * Whether any recorded attempt for this seed retained its artifact
+	 * directory. Guards re-runs of a previously retained seed from deleting
+	 * the exemplar directory the earlier row points at.
+	 */
+	public function seed_artifacts_retained( int $seed ): bool {
+		$statement = $this->db->prepare( 'SELECT 1 FROM attempts WHERE seed = :seed AND artifacts_retained = 1 LIMIT 1' );
+		$statement->bindValue( ':seed', $seed, SQLITE3_INTEGER );
+		$result = $statement->execute();
+		$row    = $result->fetchArray( SQLITE3_NUM );
+		$statement->close();
+
+		return false !== $row && null !== $row;
+	}
+
+	/**
+	 * The most recent stored replay document for a seed, for reproducing a
+	 * failure whose artifact directory was pruned.
+	 */
+	public function replay_for_seed( int $seed ): ?array {
+		$statement = $this->db->prepare( 'SELECT replay_json FROM attempts WHERE seed = :seed AND replay_json IS NOT NULL ORDER BY id DESC LIMIT 1' );
+		$statement->bindValue( ':seed', $seed, SQLITE3_INTEGER );
+		$result = $statement->execute();
+		$row    = $result->fetchArray( SQLITE3_NUM );
+		$statement->close();
+
+		if ( false === $row || ! is_string( $row[0] ?? null ) ) {
+			return null;
+		}
+		$replay = json_decode( $row[0], true );
+
+		return is_array( $replay ) ? $replay : null;
+	}
+
+	public function replay_for_attempt_id( int $id ): ?array {
+		$statement = $this->db->prepare( 'SELECT replay_json FROM attempts WHERE id = :id AND replay_json IS NOT NULL LIMIT 1' );
+		$statement->bindValue( ':id', $id, SQLITE3_INTEGER );
+		$result = $statement->execute();
+		$row    = $result->fetchArray( SQLITE3_NUM );
+		$statement->close();
+
+		if ( false === $row || ! is_string( $row[0] ?? null ) ) {
+			return null;
+		}
+		$replay = json_decode( $row[0], true );
+
+		return is_array( $replay ) ? $replay : null;
+	}
+
+	public function update_replay_for_attempt( int $id, array $replay ): void {
+		$json = json_encode( $replay, JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE );
+		if ( false === $json ) {
+			throw new \RuntimeException( 'Could not encode replay JSON: ' . json_last_error_msg() );
+		}
+
+		$statement = $this->db->prepare( 'UPDATE attempts SET replay_json = :replay_json WHERE id = :id' );
+		$statement->bindValue( ':replay_json', $json, SQLITE3_TEXT );
+		$statement->bindValue( ':id', $id, SQLITE3_INTEGER );
+		$statement->execute();
+		$statement->close();
+	}
+
+	public function max_id(): int {
+		$row = $this->db->querySingle( 'SELECT MAX(id) FROM attempts' );
+
+		return (int) $row;
+	}
+
+	/**
+	 * Failure rows in the id range (after_id, up_to_id], oldest first. Each
+	 * entry carries the row id and the decoded summary record.
+	 */
+	public function failures_after( int $after_id, int $up_to_id ): array {
+		$statement = $this->db->prepare(
+			'SELECT id, summary_json FROM attempts WHERE id > :after AND id <= :up_to AND ok = 0 ORDER BY id'
+		);
+		$statement->bindValue( ':after', $after_id, SQLITE3_INTEGER );
+		$statement->bindValue( ':up_to', $up_to_id, SQLITE3_INTEGER );
+		$result = $statement->execute();
+
+		$rows = array();
+		while ( false !== ( $row = $result->fetchArray( SQLITE3_ASSOC ) ) ) {
+			$record = null === $row['summary_json'] ? null : json_decode( $row['summary_json'], true );
+			if ( ! is_array( $record ) ) {
+				continue;
+			}
+			$rows[] = array(
+				'id'     => (int) $row['id'],
+				'record' => $record,
+			);
+		}
+		$statement->close();
+
+		return $rows;
+	}
+
+	public function oracle_findings_after( int $after_id, int $up_to_id ): array {
+		if ( ! $this->has_column( 'attempts', 'oracle_signature_hash' ) ) {
+			return array();
+		}
+
+		$statement = $this->db->prepare(
+			'SELECT id, summary_json FROM attempts WHERE id > :after AND id <= :up_to AND oracle_signature_hash IS NOT NULL ORDER BY id'
+		);
+		$statement->bindValue( ':after', $after_id, SQLITE3_INTEGER );
+		$statement->bindValue( ':up_to', $up_to_id, SQLITE3_INTEGER );
+		$result = $statement->execute();
+
+		$rows = array();
+		while ( false !== ( $row = $result->fetchArray( SQLITE3_ASSOC ) ) ) {
+			$record = null === $row['summary_json'] ? null : json_decode( $row['summary_json'], true );
+			if ( ! is_array( $record ) ) {
+				continue;
+			}
+			$rows[] = array(
+				'id'     => (int) $row['id'],
+				'record' => $record,
+			);
+		}
+		$statement->close();
+
+		return $rows;
+	}
+
+	public function count_attempts(): int {
+		return (int) $this->db->querySingle( 'SELECT COUNT(*) FROM attempts' );
+	}
+
+	public function close(): void {
+		$this->db->close();
+	}
+}
diff --git a/tools/html-api-fuzz/lib/Signature.php b/tools/html-api-fuzz/lib/Signature.php
new file mode 100644
index 0000000000000..7babac4a13383
--- /dev/null
+++ b/tools/html-api-fuzz/lib/Signature.php
@@ -0,0 +1,142 @@
+<?php
+namespace HtmlApiFuzz;
+
+class Signature {
+	public static function from_result( array $result ): ?array {
+		if ( $result['ok'] ?? false ) {
+			return null;
+		}
+
+		$failure_class = $result['failureClass'] ?? 'unknown';
+		$facts         = array(
+			'failureClass' => $failure_class,
+			'mode'         => $result['mode'] ?? 'unknown',
+		);
+
+		if ( in_array( $failure_class, array( 'tree-mismatch', 'encoding-mismatch' ), true ) ) {
+			$diff = $result['comparison']['firstDifference'] ?? array();
+			$facts['treePath']      = $diff['path'] ?? null;
+			$facts['wordpressNorm'] = $diff['wordpressNorm'] ?? null;
+			$facts['domNorm']       = $diff['domNorm'] ?? null;
+		} elseif ( 'tag-invariant-failed' === $failure_class ) {
+			$failure = $result['tagProcessor']['failures'][0] ?? array();
+			$facts['invariant'] = $failure['name'] ?? 'unknown';
+			$facts['throwable'] = $failure['throwable'] ?? null;
+		} elseif ( 'normalize-invariant-failed' === $failure_class ) {
+			/*
+			 * Facts must not include input-derived values (hashes, hex windows,
+			 * byte offsets): every distinct failing input would mint a distinct
+			 * signature and the watcher would treat one normalize bug as an
+			 * unbounded stream of new findings. Forensics stay in result.json.
+			 */
+			$normalize = $result['tagProcessor']['normalize'] ?? array();
+			$failure   = $normalize['failure'] ?? array();
+			$facts['invariant']       = $failure['name'] ?? 'normalize-unknown';
+			$facts['normalizeStatus'] = $normalize['status'] ?? null;
+			$facts['normalizeApi']    = $normalize['api'] ?? null;
+			$facts['throwable']       = $normalize['throwable'] ?? $failure['throwable'] ?? null;
+			$facts['message']         = self::normalize_message( $failure['message'] ?? '' );
+		} elseif ( 'normalize-tree-changed' === $failure_class ) {
+			$diff = $result['normalizePreservation']['firstDifference'] ?? array();
+			$facts['treePath']      = $diff['path'] ?? null;
+			$facts['wordpressNorm'] = $diff['wordpressNorm'] ?? null;
+			$facts['domNorm']       = $diff['domNorm'] ?? null;
+		} elseif ( in_array( $failure_class, array( 'mutation-tree-mismatch', 'mutation-delta-mismatch' ), true ) ) {
+			$diff = $result['mutation']['firstDifference'] ?? array();
+			$facts['treePath']      = $diff['path'] ?? null;
+			$facts['wordpressNorm'] = $diff['wordpressNorm'] ?? null;
+			$facts['domNorm']       = $diff['domNorm'] ?? null;
+		} elseif ( 'breadcrumb-mismatch' === $failure_class ) {
+			$breadcrumbs = $result['wordpress']['breadcrumbs'] ?? array();
+			$divergence  = $breadcrumbs['divergenceDepth'] ?? null;
+			$facts['kind']            = $breadcrumbs['kind'] ?? null;
+			$facts['divergenceDepth'] = $divergence;
+			$facts['expectedAt']      = null === $divergence ? null : ( $breadcrumbs['expected'][ $divergence ] ?? null );
+			$facts['actualAt']        = null === $divergence ? null : ( $breadcrumbs['actual'][ $divergence ] ?? null );
+		} elseif ( 'resource-limit' === $failure_class ) {
+			$limit_failures = self::resource_limit_failures( $result );
+			$limit_failures = array_values( array_unique( $limit_failures ) );
+			sort( $limit_failures );
+			$facts['invariant']     = $limit_failures[0] ?? 'resource-limit';
+			$facts['limitFailures'] = $limit_failures;
+			$facts['tokenCount']    = $result['tagProcessor']['tokenCount'] ?? $result['wordpress']['tokenCount'] ?? null;
+			$facts['nodeCount']     = $result['dom']['nodeCount'] ?? null;
+			$facts['tagTokenCount'] = $result['tagProcessor']['tokenCount'] ?? null;
+			$facts['wordpressTokenCount'] = $result['wordpress']['tokenCount'] ?? null;
+			$facts['domNodeCount']        = $result['dom']['nodeCount'] ?? null;
+		} elseif ( 'unsupported' === $failure_class ) {
+			$unsupported = $result['wordpress']['unsupported'] ?? array();
+			$facts['unsupportedMessage'] = $unsupported['message'] ?? null;
+			$facts['unsupportedToken']   = $unsupported['tokenName'] ?? null;
+		} elseif ( in_array( $failure_class, array( 'fatal-error', 'oracle-parse-error' ), true ) ) {
+			$source = ( ( $result['wordpress']['status'] ?? '' ) === 'error' )
+				? ( $result['wordpress'] ?? array() )
+				: ( $result['dom'] ?? array() );
+			$facts['throwable'] = $source['throwable'] ?? null;
+			$facts['message']   = self::normalize_message( $source['error'] ?? $result['failureSnippet'] ?? '' );
+		} else {
+			$facts['message'] = self::normalize_message( $result['failureSnippet'] ?? $result['status'] ?? '' );
+		}
+
+		$hash = substr( sha1( json_encode( $facts, JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE ) ), 0, 12 );
+
+		/*
+		 * The family key clusters likely-same-root-cause findings. Prefer the
+		 * masked line pair over the tree path: paths embed generated element
+		 * names and spread one bug across many families.
+		 */
+		if ( isset( $facts['wordpressNorm'] ) || isset( $facts['domNorm'] ) ) {
+			$family_fact = ( $facts['wordpressNorm'] ?? '' ) . '|' . ( $facts['domNorm'] ?? '' );
+		} else {
+			$family_fact = $facts['invariant'] ?? $facts['unsupportedMessage'] ?? $facts['kind'] ?? '';
+		}
+
+		return array(
+			'hash'             => $hash,
+			'equivalenceClass' => $failure_class,
+			'familyKey'        => substr( sha1( $failure_class . ':' . $family_fact ), 0, 12 ),
+			'facts'            => $facts,
+			'normalized'       => self::normalized_text( $facts ),
+		);
+	}
+
+	private static function normalize_message( string $message ): string {
+		$message = preg_replace( '/\/[^ \n]+/', '<path>', $message );
+		$message = preg_replace( '/\d+/', '<n>', (string) $message );
+		return trim( $message );
+	}
+
+	public static function normalize_message_for_finding( string $message ): string {
+		return self::normalize_message( $message );
+	}
+
+	private static function resource_limit_failures( array $result ): array {
+		$limit_failures = array();
+		foreach ( $result['tagProcessor']['failures'] ?? array() as $failure ) {
+			$name = $failure['name'] ?? null;
+			if ( is_string( $name ) ) {
+				$limit_failures[] = $name;
+			}
+		}
+
+		foreach ( array( 'wordpress', 'dom' ) as $source ) {
+			$failure_class = $result[ $source ]['failureClass'] ?? null;
+			if ( in_array( $failure_class, array( 'token-limit-exceeded', 'node-limit-exceeded' ), true ) ) {
+				$limit_failures[] = $source . '-' . $failure_class;
+			}
+		}
+
+		return $limit_failures;
+	}
+
+	private static function normalized_text( array $facts ): string {
+		$parts = array();
+		foreach ( $facts as $key => $value ) {
+			if ( is_array( $value ) ) {
+				$value = json_encode( $value, JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE );
+			}
+			$parts[] = "{$key}={$value}";
+		}
+		return implode( "\n", $parts );
+	}
+}
diff --git a/tools/html-api-fuzz/lib/Support.php b/tools/html-api-fuzz/lib/Support.php
new file mode 100644
index 0000000000000..aba26be969d27
--- /dev/null
+++ b/tools/html-api-fuzz/lib/Support.php
@@ -0,0 +1,493 @@
+<?php
+namespace HtmlApiFuzz;
+
+function repo_root(): string {
+	return dirname( __DIR__, 3 );
+}
+
+function timestamp(): string {
+	$now      = microtime( true );
+	$seconds  = (int) $now;
+	$fraction = max( 0, (int) round( ( $now - $seconds ) * 1000000 ) );
+	if ( 1000000 === $fraction ) {
+		++$seconds;
+		$fraction = 0;
+	}
+
+	return gmdate( 'Ymd\THis', $seconds ) . sprintf( '%06dZ', $fraction );
+}
+
+function parse_cli_options( array $argv ): array {
+	$options = array( '_' => array() );
+	$count   = count( $argv );
+
+	for ( $i = 1; $i < $count; ++$i ) {
+		$arg = $argv[ $i ];
+		if ( 0 !== strpos( $arg, '--' ) ) {
+			$options['_'][] = $arg;
+			continue;
+		}
+
+		$arg = substr( $arg, 2 );
+		if ( false !== strpos( $arg, '=' ) ) {
+			list( $name, $value ) = explode( '=', $arg, 2 );
+			$options[ $name ]     = $value;
+			continue;
+		}
+
+		if ( $i + 1 < $count && 0 !== strpos( $argv[ $i + 1 ], '--' ) ) {
+			$options[ $arg ] = $argv[ ++$i ];
+		} else {
+			$options[ $arg ] = true;
+		}
+	}
+
+	return $options;
+}
+
+function option_string( array $options, string $name, ?string $fallback = null ): ?string {
+	return array_key_exists( $name, $options ) && true !== $options[ $name ] ? (string) $options[ $name ] : $fallback;
+}
+
+function option_bool( array $options, string $name, bool $fallback = false ): bool {
+	if ( ! array_key_exists( $name, $options ) ) {
+		return $fallback;
+	}
+
+	$value = $options[ $name ];
+	if ( true === $value ) {
+		return true;
+	}
+
+	return in_array( strtolower( (string) $value ), array( '1', 'true', 'yes', 'on' ), true );
+}
+
+function option_int( array $options, string $name, int $fallback ): int {
+	if ( ! array_key_exists( $name, $options ) || true === $options[ $name ] ) {
+		return $fallback;
+	}
+
+	$value = filter_var( $options[ $name ], FILTER_VALIDATE_INT );
+	if ( false === $value ) {
+		throw new \InvalidArgumentException( "Expected --{$name} to be an integer." );
+	}
+
+	return (int) $value;
+}
+
+function option_float( array $options, string $name, float $fallback ): float {
+	if ( ! array_key_exists( $name, $options ) || true === $options[ $name ] ) {
+		return $fallback;
+	}
+
+	if ( ! is_numeric( $options[ $name ] ) ) {
+		throw new \InvalidArgumentException( "Expected --{$name} to be numeric." );
+	}
+
+	return (float) $options[ $name ];
+}
+
+function ensure_dir( string $path ): void {
+	if ( ! is_dir( $path ) && ! mkdir( $path, 0777, true ) && ! is_dir( $path ) ) {
+		throw new \RuntimeException( "Could not create directory: {$path}" );
+	}
+}
+
+function remove_dir_recursive( string $path ): void {
+	if ( is_link( $path ) || is_file( $path ) ) {
+		@unlink( $path );
+		return;
+	}
+	if ( ! is_dir( $path ) ) {
+		return;
+	}
+
+	$items = scandir( $path );
+	if ( false !== $items ) {
+		foreach ( $items as $item ) {
+			if ( '.' === $item || '..' === $item ) {
+				continue;
+			}
+			remove_dir_recursive( $path . DIRECTORY_SEPARATOR . $item );
+		}
+	}
+	@rmdir( $path );
+}
+
+function json_encode_safe( $value, int $flags = 0 ): string {
+	$json = json_encode( $value, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE | $flags );
+	if ( false === $json ) {
+		throw new \RuntimeException( 'JSON encode failed: ' . json_last_error_msg() );
+	}
+
+	return $json;
+}
+
+function write_json_file( string $path, $value ): void {
+	ensure_dir( dirname( $path ) );
+	file_put_contents( $path, json_encode_safe( $value ) . "\n" );
+}
+
+function read_json_file( string $path ) {
+	$text = @file_get_contents( $path );
+	if ( false === $text ) {
+		return null;
+	}
+
+	$value = json_decode( $text, true );
+	if ( JSON_ERROR_NONE !== json_last_error() ) {
+		throw new \RuntimeException( "Could not parse JSON {$path}: " . json_last_error_msg() );
+	}
+
+	return $value;
+}
+
+function append_ndjson( string $path, $value ): void {
+	ensure_dir( dirname( $path ) );
+	file_put_contents( $path, json_encode( $value, JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE ) . "\n", FILE_APPEND );
+}
+
+function preview_bytes( string $bytes, int $limit = 240 ): string {
+	$slice = substr( $bytes, 0, $limit );
+	$shown = json_encode( $slice, JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE );
+	if ( false === $shown ) {
+		$shown = base64_encode( $slice );
+	}
+
+	return strlen( $bytes ) > $limit ? $shown . '...' : $shown;
+}
+
+function normalize_payload_policy_label( ?string $payload_policy ): ?string {
+	if ( null === $payload_policy ) {
+		return null;
+	}
+
+	return in_array( $payload_policy, Generator::payload_policy_labels(), true ) ? $payload_policy : null;
+}
+
+function command_string( array $command ): string {
+	return implode( ' ', array_map( 'escapeshellarg', $command ) );
+}
+
+function run_git_command( array $args, int $timeout_ms = 1000, ?string $root = null ): array {
+	$root    = $root ?? repo_root();
+	$command = array_merge( array( 'git', '-C', $root ), $args );
+	$spec    = array(
+		0 => array( 'pipe', 'r' ),
+		1 => array( 'pipe', 'w' ),
+		2 => array( 'pipe', 'w' ),
+	);
+
+	$process = @proc_open( $command, $spec, $pipes, $root );
+	if ( ! is_resource( $process ) ) {
+		return array(
+			'code'     => null,
+			'timedOut' => false,
+			'stdout'   => '',
+			'stderr'   => '',
+		);
+	}
+
+	fclose( $pipes[0] );
+	stream_set_blocking( $pipes[1], false );
+	stream_set_blocking( $pipes[2], false );
+
+	$stdout    = '';
+	$stderr    = '';
+	$start     = microtime( true );
+	$timed_out = false;
+
+	while ( true ) {
+		$stdout .= stream_get_contents( $pipes[1] );
+		$stderr .= stream_get_contents( $pipes[2] );
+
+		$status = proc_get_status( $process );
+		if ( ! $status['running'] ) {
+			break;
+		}
+
+		if ( ( microtime( true ) - $start ) * 1000 > $timeout_ms ) {
+			$timed_out = true;
+			proc_terminate( $process );
+			usleep( 200000 );
+			$status = proc_get_status( $process );
+			if ( $status['running'] ) {
+				proc_terminate( $process, 9 );
+			}
+			break;
+		}
+
+		usleep( 10000 );
+	}
+
+	$stdout .= stream_get_contents( $pipes[1] );
+	$stderr .= stream_get_contents( $pipes[2] );
+	fclose( $pipes[1] );
+	fclose( $pipes[2] );
+
+	$exit_code = proc_close( $process );
+	if ( $timed_out ) {
+		$exit_code = null;
+	}
+
+	return array(
+		'code'     => $exit_code,
+		'timedOut' => $timed_out,
+		'stdout'   => $stdout,
+		'stderr'   => $stderr,
+	);
+}
+
+function git_command_output( array $args, int $timeout_ms = 1000, ?string $root = null ): ?string {
+	$result = run_git_command( $args, $timeout_ms, $root );
+	if ( 0 !== $result['code'] ) {
+		return null;
+	}
+
+	return trim( $result['stdout'] );
+}
+
+function unavailable_git_metadata(): array {
+	return array(
+		'available'  => false,
+		'commit'     => null,
+		'short'      => null,
+		'branch'     => null,
+		'commitDate' => null,
+		'dirty'      => null,
+	);
+}
+
+function normalize_git_metadata( $metadata ): array {
+	if ( ! is_array( $metadata ) || ! ( $metadata['available'] ?? false ) || ! is_string( $metadata['commit'] ?? null ) ) {
+		return unavailable_git_metadata();
+	}
+
+	return array(
+		'available'  => true,
+		'commit'     => is_string( $metadata['commit'] ?? null ) ? $metadata['commit'] : null,
+		'short'      => is_string( $metadata['short'] ?? null ) ? $metadata['short'] : null,
+		'branch'     => is_string( $metadata['branch'] ?? null ) ? $metadata['branch'] : null,
+		'commitDate' => is_string( $metadata['commitDate'] ?? null ) ? $metadata['commitDate'] : null,
+		'dirty'      => is_bool( $metadata['dirty'] ?? null ) ? $metadata['dirty'] : null,
+	);
+}
+
+function git_metadata_base64( array $metadata ): string {
+	$json = json_encode( normalize_git_metadata( $metadata ), JSON_UNESCAPED_SLASHES | JSON_INVALID_UTF8_SUBSTITUTE );
+	if ( false === $json ) {
+		throw new \RuntimeException( 'JSON encode failed: ' . json_last_error_msg() );
+	}
+
+	return base64_encode( $json );
+}
+
+function git_metadata_from_base64( string $encoded ): array {
+	$json = base64_decode( $encoded, true );
+	if ( false === $json ) {
+		throw new \InvalidArgumentException( 'Invalid --git-metadata-base64.' );
+	}
+
+	$metadata = json_decode( $json, true );
+	if ( JSON_ERROR_NONE !== json_last_error() ) {
+		throw new \InvalidArgumentException( 'Invalid --git-metadata-base64 JSON: ' . json_last_error_msg() );
+	}
+
+	return normalize_git_metadata( $metadata );
+}
+
+function replay_source_metadata( string $replay_path, array $replay ): array {
+	$source = array(
+		'path'          => $replay_path,
+		'createdAt'     => $replay['createdAt'] ?? null,
+		'repoRoot'      => $replay['repoRoot'] ?? null,
+		'repoCommit'    => $replay['repoCommit'] ?? null,
+		'repoDirty'     => $replay['repoDirty'] ?? null,
+		'signatureHash' => $replay['signature']['hash'] ?? $replay['result']['signature']['hash'] ?? null,
+	);
+
+	if ( is_array( $replay['sourceReplay'] ?? null ) ) {
+		$source['sourceReplay'] = $replay['sourceReplay'];
+	}
+
+	return $source;
+}
+
+function git_metadata( int $timeout_ms = 1000, ?string $root = null, bool $use_cache = true ): array {
+	static $cache = array();
+
+	$root      = $root ?? repo_root();
+	$real_root = realpath( $root );
+	$cache_key = ( false === $real_root ? $root : $real_root ) . ':' . $timeout_ms;
+
+	if ( $use_cache && array_key_exists( $cache_key, $cache ) ) {
+		return $cache[ $cache_key ];
+	}
+
+	$top_level = git_command_output( array( 'rev-parse', '--show-toplevel' ), $timeout_ms, $root );
+	if ( null === $top_level || '' === $top_level || false === $real_root || realpath( $top_level ) !== $real_root ) {
+		$metadata = unavailable_git_metadata();
+		if ( $use_cache ) {
+			$cache[ $cache_key ] = $metadata;
+		}
+		return $metadata;
+	}
+
+	$commit = git_command_output( array( 'rev-parse', 'HEAD' ), $timeout_ms, $root );
+	if ( null === $commit || '' === $commit ) {
+		$metadata = unavailable_git_metadata();
+		if ( $use_cache ) {
+			$cache[ $cache_key ] = $metadata;
+		}
+		return $metadata;
+	}
+
+	$branch = git_command_output( array( 'branch', '--show-current' ), $timeout_ms, $root );
+	if ( '' === $branch ) {
+		$branch = null;
+	}
+
+	$dirty_result = run_git_command( array( 'diff', '--quiet', 'HEAD', '--' ), $timeout_ms, $root );
+	$dirty        = in_array( $dirty_result['code'], array( 0, 1 ), true ) ? 1 === $dirty_result['code'] : null;
+
+	$metadata = array(
+		'available'  => true,
+		'commit'     => $commit,
+		'short'      => git_command_output( array( 'rev-parse', '--short=12', 'HEAD' ), $timeout_ms, $root ),
+		'branch'     => $branch,
+		'commitDate' => git_command_output( array( 'show', '-s', '--format=%cI', 'HEAD' ), $timeout_ms, $root ),
+		'dirty'      => $dirty,
+	);
+
+	if ( $use_cache ) {
+		$cache[ $cache_key ] = $metadata;
+	}
+
+	return $metadata;
+}
+
+function run_php_process( array $script_args, string $cwd, int $timeout_ms, ?string $log_path = null ): array {
+	$command = array_merge( array( PHP_BINARY ), $script_args );
+	$spec    = array(
+		0 => array( 'pipe', 'r' ),
+		1 => array( 'pipe', 'w' ),
+		2 => array( 'pipe', 'w' ),
+	);
+
+	$process = proc_open( $command, $spec, $pipes, $cwd );
+	if ( ! is_resource( $process ) ) {
+		throw new \RuntimeException( 'Could not start PHP subprocess.' );
+	}
+
+	fclose( $pipes[0] );
+	stream_set_blocking( $pipes[1], false );
+	stream_set_blocking( $pipes[2], false );
+
+	$stdout    = '';
+	$stderr    = '';
+	$start     = microtime( true );
+	$timed_out = false;
+
+	while ( true ) {
+		$stdout .= stream_get_contents( $pipes[1] );
+		$stderr .= stream_get_contents( $pipes[2] );
+
+		$status = proc_get_status( $process );
+		if ( ! $status['running'] ) {
+			break;
+		}
+
+		if ( ( microtime( true ) - $start ) * 1000 > $timeout_ms ) {
+			$timed_out = true;
+			proc_terminate( $process );
+			usleep( 200000 );
+			$status = proc_get_status( $process );
+			if ( $status['running'] ) {
+				proc_terminate( $process, 9 );
+			}
+			break;
+		}
+
+		usleep( 10000 );
+	}
+
+	$stdout .= stream_get_contents( $pipes[1] );
+	$stderr .= stream_get_contents( $pipes[2] );
+	fclose( $pipes[1] );
+	fclose( $pipes[2] );
+
+	$exit_code = proc_close( $process );
+	if ( $timed_out ) {
+		$exit_code = null;
+	}
+
+	$output = $stdout . $stderr;
+	if ( null !== $log_path ) {
+		ensure_dir( dirname( $log_path ) );
+		file_put_contents( $log_path, $output );
+	}
+
+	return array(
+		'command'    => command_string( $command ),
+		'code'       => $exit_code,
+		'ok'         => 0 === $exit_code && ! $timed_out,
+		'timedOut'   => $timed_out,
+		'durationMs' => (int) round( ( microtime( true ) - $start ) * 1000 ),
+		'stdout'     => $stdout,
+		'stderr'     => $stderr,
+		'output'     => $output,
+		'logPath'    => $log_path,
+	);
+}
+
+function read_ndjson_records( string $path ): array {
+	$text = @file_get_contents( $path );
+	if ( false === $text ) {
+		return array();
+	}
+
+	$records = array();
+	foreach ( explode( "\n", $text ) as $line ) {
+		$line = trim( $line );
+		if ( '' === $line ) {
+			continue;
+		}
+
+		$record = json_decode( $line, true );
+		if ( JSON_ERROR_NONE === json_last_error() ) {
+			$records[] = $record;
+		}
+	}
+
+	return $records;
+}
+
+function find_files_named( string $dir, string $filename, array $skip_dirs = array() ): array {
+	if ( ! is_dir( $dir ) ) {
+		return array();
+	}
+
+	$files = array();
+	$items = scandir( $dir );
+	if ( false === $items ) {
+		return array();
+	}
+
+	foreach ( $items as $item ) {
+		if ( '.' === $item || '..' === $item ) {
+			continue;
+		}
+
+		$path = $dir . DIRECTORY_SEPARATOR . $item;
+		if ( is_dir( $path ) ) {
+			if ( in_array( $item, $skip_dirs, true ) ) {
+				continue;
+			}
+			$files = array_merge( $files, find_files_named( $path, $filename, $skip_dirs ) );
+		} elseif ( $item === $filename ) {
+			$files[] = $path;
+		}
+	}
+
+	return $files;
+}
diff --git a/tools/html-api-fuzz/lib/TagInvariants.php b/tools/html-api-fuzz/lib/TagInvariants.php
new file mode 100644
index 0000000000000..1f777dfcba0e3
--- /dev/null
+++ b/tools/html-api-fuzz/lib/TagInvariants.php
@@ -0,0 +1,437 @@
+<?php
+namespace HtmlApiFuzz;
+
+class TagInvariants {
+	public static function check( string $html, array $limits = array(), string $mode = Generator::MODE_FRAGMENT_BODY, string $fragment_context = 'body' ): array {
+		HtmlApiBootstrap::load();
+		$max_tokens = $limits['maxTokens'] ?? 2000;
+		$failures   = array();
+		$tokens     = 0;
+		$normalize  = array(
+			'status' => 'not-run',
+			'ok'     => true,
+		);
+
+		try {
+			$processor = new \WP_HTML_Tag_Processor( $html );
+			while ( $processor->next_token() ) {
+				++$tokens;
+				if ( $tokens > $max_tokens ) {
+					$failures[] = array(
+						'name'    => 'tag-token-limit-exceeded',
+						'message' => 'WP_HTML_Tag_Processor exceeded token limit.',
+					);
+					break;
+				}
+
+				$type = $processor->get_token_type();
+				$name = $processor->get_token_name();
+				if ( null === $type ) {
+					$failures[] = array(
+						'name'    => 'null-token-type-after-next-token',
+						'message' => 'get_token_type() returned null after next_token().',
+					);
+					break;
+				}
+				if ( null === $name ) {
+					$failures[] = array(
+						'name'    => 'null-token-name-after-next-token',
+						'message' => 'get_token_name() returned null after next_token().',
+					);
+					break;
+				}
+
+				if ( '#tag' === $type && ! $processor->is_tag_closer() ) {
+					$tag = $processor->get_tag();
+					if ( null === $tag ) {
+						$failures[] = array(
+							'name'    => 'null-tag-for-tag-token',
+							'message' => 'get_tag() returned null for a tag token.',
+						);
+						break;
+					}
+					$attrs = $processor->get_attribute_names_with_prefix( '' );
+					if ( is_array( $attrs ) ) {
+						foreach ( $attrs as $attr ) {
+							$processor->get_attribute( $attr );
+							$processor->get_qualified_attribute_name( $attr );
+						}
+					}
+					if ( null !== $processor->get_attribute( 'class' ) ) {
+						foreach ( $processor->class_list() as $_class_name ) {
+							// Iteration itself is the invariant check.
+						}
+					}
+				}
+
+				$processor->get_modifiable_text();
+			}
+
+			if ( $processor->get_updated_html() !== $html ) {
+				$failures[] = array(
+					'name'    => 'updated-html-changed-without-edits',
+					'message' => 'get_updated_html() changed HTML even though no edits were queued.',
+				);
+			}
+
+			$mutation = self::check_simple_mutation( $html, $max_tokens );
+			if ( ! $mutation['ok'] ) {
+				$failures[] = $mutation['failure'];
+			}
+
+			$seek = self::check_seek_consistency( $html, $max_tokens, $tokens );
+			if ( ! $seek['ok'] ) {
+				$failures[] = $seek['failure'];
+			}
+		} catch ( \Throwable $e ) {
+			$failures[] = array(
+				'name'      => 'tag-processor-throwable',
+				'message'   => $e->getMessage(),
+				'throwable' => get_class( $e ),
+			);
+		}
+
+		if ( self::has_resource_limit_failure( $failures ) ) {
+			$normalize = array(
+				'status' => 'skipped-resource-limit',
+				'ok'     => true,
+			);
+		} else {
+			$normalize = self::check_normalize_idempotence( $html, $mode, $fragment_context );
+		}
+
+		return array(
+			'ok'         => empty( $failures ),
+			'failures'   => $failures,
+			'tokenCount' => $tokens,
+			'normalize'  => $normalize,
+		);
+	}
+
+	private static function has_resource_limit_failure( array $failures ): bool {
+		foreach ( $failures as $failure ) {
+			if ( in_array( $failure['name'] ?? null, array( 'tag-token-limit-exceeded', 'mutation-token-limit-exceeded' ), true ) ) {
+				return true;
+			}
+		}
+		return false;
+	}
+
+	private static function check_normalize_idempotence( string $html, string $mode, string $fragment_context ): array {
+		$errors = array();
+		$normalized = null;
+		$normalized_twice = null;
+		$throwable = null;
+
+		set_error_handler(
+			static function ( int $errno, string $errstr ) use ( &$errors ): bool {
+				$errors[] = "{$errno}: {$errstr}";
+				return true;
+			}
+		);
+
+		try {
+			$normalized = self::normalize_html( $html, $mode, $fragment_context );
+			$normalized_twice = is_string( $normalized ) ? self::normalize_html( $normalized, $mode, $fragment_context ) : null;
+		} catch ( \Throwable $e ) {
+			$throwable = $e;
+		} finally {
+			restore_error_handler();
+		}
+
+		if ( null !== $throwable ) {
+			return array(
+				'ok'        => false,
+				'status'    => 'failed',
+				'mode'      => $mode,
+				'api'       => self::normalize_api_for_mode( $mode, $fragment_context ),
+				'failure' => array(
+					'name'      => 'normalize-throwable',
+					'message'   => $throwable->getMessage(),
+					'throwable' => get_class( $throwable ),
+				),
+				'throwable' => get_class( $throwable ),
+			);
+		}
+
+		if ( ! empty( $errors ) ) {
+			return array(
+				'ok'     => false,
+				'status' => 'failed',
+				'mode'   => $mode,
+				'api'    => self::normalize_api_for_mode( $mode, $fragment_context ),
+				'failure' => array(
+					'name'    => 'normalize-native-error',
+					'message' => 'WP_HTML_Processor::normalize() emitted native PHP errors.',
+					'errors'  => $errors,
+				),
+				'errors' => $errors,
+			);
+		}
+
+		if ( null === $normalized ) {
+			return array(
+				'ok'          => true,
+				'status'      => 'unsupported',
+				'mode'        => $mode,
+				'api'         => self::normalize_api_for_mode( $mode, $fragment_context ),
+				'inputLength' => strlen( $html ),
+			);
+		}
+
+		if ( null === $normalized_twice ) {
+			return array(
+				'ok'               => false,
+				'status'           => 'failed',
+				'mode'             => $mode,
+				'api'              => self::normalize_api_for_mode( $mode, $fragment_context ),
+				'normalizedLength' => strlen( $normalized ),
+				'normalizedSha1'   => sha1( $normalized ),
+				'failure' => array(
+					'name'              => 'normalize-output-unsupported',
+					'message'           => 'WP_HTML_Processor::normalize() returned HTML that normalize() could not normalize again.',
+					'normalizedLength'  => strlen( $normalized ),
+					'normalizedSha1'    => sha1( $normalized ),
+					'normalizedPreview' => preview_bytes( $normalized ),
+				),
+			);
+		}
+
+		if ( $normalized !== $normalized_twice ) {
+			$first_difference = self::first_string_difference( $normalized, $normalized_twice );
+			return array(
+				'ok'                    => false,
+				'status'                => 'failed',
+				'mode'                  => $mode,
+				'api'                   => self::normalize_api_for_mode( $mode, $fragment_context ),
+				'normalizedLength'      => strlen( $normalized ),
+				'normalizedTwiceLength' => strlen( $normalized_twice ),
+				'normalizedSha1'        => sha1( $normalized ),
+				'normalizedTwiceSha1'   => sha1( $normalized_twice ),
+				'firstDifference'       => $first_difference,
+				'failure' => array(
+					'name'                   => 'normalize-not-idempotent',
+					'message'                => 'Normalizing already-normalized HTML changed the output.',
+					'normalizedLength'       => strlen( $normalized ),
+					'normalizedTwiceLength'  => strlen( $normalized_twice ),
+					'normalizedSha1'         => sha1( $normalized ),
+					'normalizedTwiceSha1'    => sha1( $normalized_twice ),
+					'normalizedPreview'      => preview_bytes( $normalized ),
+					'normalizedTwicePreview' => preview_bytes( $normalized_twice ),
+					'firstDifference'        => $first_difference,
+				),
+			);
+		}
+
+		return array(
+			'ok'               => true,
+			'status'           => 'idempotent',
+			'mode'             => $mode,
+			'api'              => self::normalize_api_for_mode( $mode, $fragment_context ),
+			'inputLength'      => strlen( $html ),
+			'normalizedLength' => strlen( $normalized ),
+			'normalizedSha1'   => sha1( $normalized ),
+			// In-memory only: the worker uses this for the tree-preservation
+			// check and strips it before persisting results.
+			'normalizedHtml'   => $normalized,
+		);
+	}
+
+	private static function normalize_html( string $html, string $mode, string $fragment_context ): ?string {
+		if ( Generator::MODE_FULL_DOCUMENT === $mode ) {
+			$processor = \WP_HTML_Processor::create_full_parser( $html );
+			return null === $processor ? null : $processor->serialize();
+		}
+
+		if ( 'body' !== $fragment_context ) {
+			$processor = \WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" );
+			return null === $processor ? null : $processor->serialize();
+		}
+
+		return \WP_HTML_Processor::normalize( $html );
+	}
+
+	private static function normalize_api_for_mode( string $mode, string $fragment_context = 'body' ): string {
+		if ( Generator::MODE_FULL_DOCUMENT === $mode ) {
+			return 'create_full_parser()->serialize()';
+		}
+		if ( 'body' !== $fragment_context ) {
+			return "create_fragment(<{$fragment_context}>)->serialize()";
+		}
+		return 'normalize()';
+	}
+
+	private static function first_string_difference( string $a, string $b ): array {
+		$max = min( strlen( $a ), strlen( $b ) );
+		$offset = 0;
+		while ( $offset < $max && $a[ $offset ] === $b[ $offset ] ) {
+			++$offset;
+		}
+
+		$window_start = max( 0, $offset - 16 );
+		return array(
+			'firstByteOffset'        => $offset,
+			'normalizedDiffHex'      => bin2hex( substr( $a, $window_start, 64 ) ),
+			'normalizedTwiceDiffHex' => bin2hex( substr( $b, $window_start, 64 ) ),
+		);
+	}
+
+	/**
+	 * Verifies that seeking back to a bookmarked token reproduces the exact
+	 * token stream that followed it on the first pass.
+	 *
+	 * The bookmark target is derived deterministically from the input so the
+	 * checked position varies across seeds without extra generator plumbing.
+	 */
+	private static function check_seek_consistency( string $html, int $max_tokens, int $token_count ): array {
+		if ( $token_count < 1 ) {
+			return array( 'ok' => true );
+		}
+
+		$target = 1 + ( crc32( $html ) % min( $token_count, $max_tokens ) );
+
+		$processor = new \WP_HTML_Tag_Processor( $html );
+		$index     = 0;
+		$bookmarked = false;
+		$first_pass = array();
+		while ( $processor->next_token() ) {
+			++$index;
+			if ( $index > $max_tokens ) {
+				break;
+			}
+			if ( ! $bookmarked && $index === $target ) {
+				if ( ! $processor->set_bookmark( 'fuzz-seek' ) ) {
+					return array(
+						'ok'      => false,
+						'failure' => array(
+							'name'    => 'seek-bookmark-failed',
+							'message' => 'set_bookmark() failed on a token reached by next_token().',
+						),
+					);
+				}
+				$bookmarked = true;
+			}
+			if ( $bookmarked ) {
+				$first_pass[] = self::token_fingerprint( $processor );
+			}
+		}
+
+		if ( ! $bookmarked ) {
+			return array( 'ok' => true );
+		}
+
+		if ( ! $processor->seek( 'fuzz-seek' ) ) {
+			return array(
+				'ok'      => false,
+				'failure' => array(
+					'name'    => 'seek-failed',
+					'message' => 'seek() failed for a bookmark that set_bookmark() accepted.',
+				),
+			);
+		}
+
+		$second_pass   = array( self::token_fingerprint( $processor ) );
+		$second_tokens = 1;
+		while ( $second_tokens < count( $first_pass ) && $processor->next_token() ) {
+			++$second_tokens;
+			$second_pass[] = self::token_fingerprint( $processor );
+		}
+
+		if ( $first_pass !== $second_pass ) {
+			$divergence = 0;
+			$limit      = min( count( $first_pass ), count( $second_pass ) );
+			while ( $divergence < $limit && $first_pass[ $divergence ] === $second_pass[ $divergence ] ) {
+				++$divergence;
+			}
+			return array(
+				'ok'      => false,
+				'failure' => array(
+					'name'             => 'seek-token-stream-mismatch',
+					'message'          => 'Re-scanning after seek() produced a different token stream.',
+					'bookmarkIndex'    => $target,
+					'divergenceOffset' => $divergence,
+					'firstPassCount'   => count( $first_pass ),
+					'secondPassCount'  => count( $second_pass ),
+					'firstFingerprint' => $first_pass[ $divergence ] ?? null,
+					'secondFingerprint' => $second_pass[ $divergence ] ?? null,
+				),
+			);
+		}
+
+		return array( 'ok' => true );
+	}
+
+	private static function token_fingerprint( \WP_HTML_Tag_Processor $processor ): string {
+		$parts = array(
+			(string) $processor->get_token_type(),
+			(string) $processor->get_token_name(),
+			$processor->is_tag_closer() ? '/' : '',
+			(string) $processor->get_modifiable_text(),
+		);
+
+		if ( '#tag' === $processor->get_token_type() && ! $processor->is_tag_closer() ) {
+			$names = $processor->get_attribute_names_with_prefix( '' );
+			if ( is_array( $names ) ) {
+				foreach ( $names as $name ) {
+					$value   = $processor->get_attribute( $name );
+					$parts[] = $name . '=' . ( true === $value ? '(true)' : (string) $value );
+				}
+			}
+		}
+
+		return sha1( implode( "\x1f", $parts ) );
+	}
+
+	private static function check_simple_mutation( string $html, int $max_tokens ): array {
+		$processor = new \WP_HTML_Tag_Processor( $html );
+		if ( ! $processor->next_tag() ) {
+			return array( 'ok' => true );
+		}
+
+		if ( ! $processor->set_attribute( 'data-fuzz', '1' ) ) {
+			return array( 'ok' => true );
+		}
+
+		$updated = $processor->get_updated_html();
+		$scan    = new \WP_HTML_Tag_Processor( $updated );
+		$tokens  = 0;
+		$checked_first_tag = false;
+		while ( $scan->next_token() ) {
+			++$tokens;
+			if ( $tokens > $max_tokens ) {
+				return array(
+					'ok'      => false,
+					'failure' => array(
+						'name'    => 'mutation-token-limit-exceeded',
+						'message' => 'A simple set_attribute() mutation produced HTML that exceeded the token limit.',
+					),
+				);
+			}
+
+			if ( ! $checked_first_tag && '#tag' === $scan->get_token_type() && ! $scan->is_tag_closer() ) {
+				$checked_first_tag = true;
+				if ( '1' !== $scan->get_attribute( 'data-fuzz' ) ) {
+					return array(
+						'ok'      => false,
+						'failure' => array(
+							'name'    => 'mutation-attribute-missing',
+							'message' => 'set_attribute() reported success, but the updated first tag does not contain data-fuzz="1".',
+						),
+					);
+				}
+			}
+		}
+
+		if ( ! $checked_first_tag ) {
+			return array(
+				'ok'      => false,
+				'failure' => array(
+					'name'    => 'mutation-removed-first-tag',
+					'message' => 'A simple set_attribute() mutation removed the first tag from the updated HTML.',
+				),
+			);
+		}
+
+		return array( 'ok' => true );
+	}
+}
diff --git a/tools/html-api-fuzz/lib/TreeRenderer.php b/tools/html-api-fuzz/lib/TreeRenderer.php
new file mode 100644
index 0000000000000..0b73bfe00f4f4
--- /dev/null
+++ b/tools/html-api-fuzz/lib/TreeRenderer.php
@@ -0,0 +1,1309 @@
+<?php
+namespace HtmlApiFuzz;
+
+class TreeRenderer {
+	const STATUS_OK          = 'ok';
+	const STATUS_UNSUPPORTED = 'unsupported';
+	const STATUS_ERROR       = 'error';
+	private const DOM_TEMPLATE_CONTEXT_UNSUPPORTED = 'DOM template content does not round-trip through body-context fragment parsing.';
+	private const XLINK_LOCAL_NAMES = array(
+		'actuate' => true,
+		'arcrole' => true,
+		'href'    => true,
+		'role'    => true,
+		'show'    => true,
+		'title'   => true,
+		'type'    => true,
+	);
+
+	public static function render_wordpress( string $html, string $mode, array $limits = array(), string $fragment_context = 'body' ): array {
+		HtmlApiBootstrap::load();
+		$max_tokens = $limits['maxTokens'] ?? 2000;
+		$processor  = Generator::MODE_FULL_DOCUMENT === $mode
+			? \WP_HTML_Processor::create_full_parser( $html )
+			: \WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" );
+
+		if ( null === $processor ) {
+			if ( Generator::MODE_FULL_DOCUMENT !== $mode && 'body' !== $fragment_context ) {
+				return array(
+					'status'      => self::STATUS_UNSUPPORTED,
+					'unsupported' => array(
+						'message' => "create_fragment() does not support the <{$fragment_context}> context.",
+					),
+				);
+			}
+			return array(
+				'status' => self::STATUS_ERROR,
+				'error'  => 'Could not create WP_HTML_Processor.',
+			);
+		}
+
+		$output       = '';
+		$indent_level = 0;
+		$was_text     = false;
+		$text_node    = '';
+		$tokens       = 0;
+		$line_count   = 0;
+		$dom_oracle_line_tolerances = array();
+
+		/*
+		 * The renderer derives tree structure from token order and
+		 * expects_closer(). get_breadcrumbs() reports the processor's own
+		 * stack of open elements; the two must agree at every tag token, or
+		 * the processor's stack bookkeeping and its token stream have
+		 * diverged.
+		 */
+		if ( Generator::MODE_FULL_DOCUMENT === $mode ) {
+			$breadcrumb_prefix = array();
+		} elseif ( 'body' === $fragment_context ) {
+			$breadcrumb_prefix = array( 'HTML', 'BODY' );
+		} else {
+			// Unknown context ancestry; calibrated from the first tag token.
+			$breadcrumb_prefix = null;
+		}
+		$element_stack = array();
+
+		try {
+			while ( $processor->next_token() ) {
+				++$tokens;
+				if ( $tokens > $max_tokens ) {
+					return array(
+						'status'       => self::STATUS_ERROR,
+						'error'        => 'Token limit exceeded.',
+						'failureClass' => 'token-limit-exceeded',
+						'tokenCount'   => $tokens,
+					);
+				}
+
+				if ( null !== $processor->get_last_error() ) {
+					break;
+				}
+
+				$token_name = $processor->get_token_name();
+				$token_type = $processor->get_token_type();
+				$is_closer  = $processor->is_tag_closer();
+
+				if ( '#presumptuous-tag' === $token_type ) {
+					continue;
+				}
+
+				if ( $was_text && '#text' !== $token_name ) {
+					if ( '' !== $text_node ) {
+						$output .= "{$text_node}\"\n";
+						++$line_count;
+					}
+					$was_text  = false;
+					$text_node = '';
+				}
+
+				switch ( $token_type ) {
+					case '#doctype':
+						$doctype = $processor->get_doctype_info();
+						if ( null === $doctype ) {
+							break;
+						}
+						$output .= '<!DOCTYPE ' . self::escape_tree_scalar( (string) $doctype->name );
+						if ( null !== $doctype->public_identifier || null !== $doctype->system_identifier ) {
+							$output .= ' "' . self::escape_tree_scalar( (string) $doctype->public_identifier ) . '" "' . self::escape_tree_scalar( (string) $doctype->system_identifier ) . '"';
+						}
+						$output .= ">\n";
+						++$line_count;
+						break;
+
+					case '#tag':
+						$namespace = $processor->get_namespace();
+						$tag_name  = 'html' === $namespace
+							? strtolower( (string) $processor->get_tag() )
+							: "{$namespace} {$processor->get_qualified_tag_name()}";
+
+						if ( $is_closer ) {
+							array_pop( $element_stack );
+							$breadcrumb_mismatch = self::breadcrumb_mismatch( $processor, $breadcrumb_prefix, $element_stack, null );
+							if ( null !== $breadcrumb_mismatch ) {
+								return $breadcrumb_mismatch;
+							}
+							--$indent_level;
+							if ( 'html' === $namespace && 'TEMPLATE' === $token_name ) {
+								--$indent_level;
+							}
+							break;
+						}
+
+						$breadcrumb_mismatch = self::breadcrumb_mismatch( $processor, $breadcrumb_prefix, $element_stack, $token_name );
+						if ( null !== $breadcrumb_mismatch ) {
+							return $breadcrumb_mismatch;
+						}
+
+						$tag_indent = $indent_level;
+						if ( $processor->expects_closer() ) {
+							++$indent_level;
+							$element_stack[] = $token_name;
+						}
+
+						$output .= str_repeat( '  ', $tag_indent ) . '<' . self::escape_tree_scalar( $tag_name ) . ">\n";
+						++$line_count;
+						$output .= self::render_wp_attributes( $processor, $tag_indent + 1, $line_count, $dom_oracle_line_tolerances );
+
+						$modifiable_text = $processor->get_modifiable_text();
+						if ( '' !== $modifiable_text ) {
+							$output .= str_repeat( '  ', $tag_indent + 1 ) . '"' . self::escape_tree_scalar( $modifiable_text ) . "\"\n";
+							++$line_count;
+						}
+
+						if ( 'html' === $namespace && 'TEMPLATE' === $token_name ) {
+							$output .= str_repeat( '  ', $indent_level ) . "content\n";
+							++$line_count;
+							++$indent_level;
+						}
+						break;
+
+					case '#cdata-section':
+					case '#text':
+						$text_content = $processor->get_modifiable_text();
+						if ( '' === $text_content ) {
+							break;
+						}
+						$was_text = true;
+						if ( '' === $text_node ) {
+							$text_node .= str_repeat( '  ', $indent_level ) . '"';
+						}
+						$text_node .= self::escape_tree_scalar( $text_content );
+						break;
+
+					case '#funky-comment':
+						$output .= str_repeat( '  ', $indent_level ) . '<!-- ' . self::escape_tree_scalar( $processor->get_modifiable_text() ) . " -->\n";
+						++$line_count;
+						break;
+
+					case '#comment':
+						$output .= str_repeat( '  ', $indent_level ) . '<!-- ' . self::escape_tree_scalar( $processor->get_full_comment_text() ) . " -->\n";
+						++$line_count;
+						break;
+
+					default:
+						return array(
+							'status' => self::STATUS_ERROR,
+							'error'  => "Unhandled WordPress token type: {$token_type}",
+						);
+				}
+			}
+		} catch ( \Throwable $e ) {
+			return array(
+				'status'       => self::STATUS_ERROR,
+				'error'        => $e->getMessage(),
+				'throwable'    => get_class( $e ),
+				'failureClass' => 'fatal-error',
+			);
+		}
+
+		$unsupported_exception = $processor->get_unsupported_exception();
+		if ( null !== $unsupported_exception && self::is_ignored_presumptuous_tag_exception( $unsupported_exception ) ) {
+			if ( '' !== $text_node ) {
+				$output .= "{$text_node}\"\n";
+				++$line_count;
+			}
+
+			return array(
+				'status'                   => self::STATUS_OK,
+				'tree'                     => $output . "\n",
+				'tokenCount'               => $tokens,
+				'domOracleLineTolerances'  => $dom_oracle_line_tolerances,
+			);
+		}
+
+		if ( null !== $unsupported_exception ) {
+			return array(
+				'status'      => self::STATUS_UNSUPPORTED,
+				'tree'        => $output,
+				'tokenCount'  => $tokens,
+				'unsupported' => self::unsupported_details( $unsupported_exception ),
+			);
+		}
+
+		if ( null !== $processor->get_last_error() ) {
+			return array(
+				'status'       => self::STATUS_ERROR,
+				'tree'         => $output,
+				'tokenCount'   => $tokens,
+				'error'        => $processor->get_last_error(),
+				'failureClass' => $processor->get_last_error(),
+			);
+		}
+
+		if ( $processor->paused_at_incomplete_token() ) {
+			return array(
+				'status'      => self::STATUS_UNSUPPORTED,
+				'tree'        => $output,
+				'tokenCount'  => $tokens,
+				'unsupported' => array(
+					'message' => 'Paused at incomplete token.',
+				),
+			);
+		}
+
+		if ( '' !== $text_node ) {
+			$output .= "{$text_node}\"\n";
+			++$line_count;
+		}
+
+		return array(
+			'status'                   => self::STATUS_OK,
+			'tree'                     => $output . "\n",
+			'tokenCount'               => $tokens,
+			'domOracleLineTolerances'  => $dom_oracle_line_tolerances,
+		);
+	}
+
+	/**
+	 * Compares get_breadcrumbs() with the renderer's element stack at a tag
+	 * token. $current is the token name for openers (breadcrumbs include the
+	 * element being opened) and null for closers (the element is already
+	 * popped). Returns an error result on divergence, null when consistent.
+	 */
+	private static function breadcrumb_mismatch( \WP_HTML_Processor $processor, ?array &$prefix, array $element_stack, ?string $current ): ?array {
+		$actual = $processor->get_breadcrumbs();
+
+		if ( null === $prefix ) {
+			$suffix_length = count( $element_stack ) + ( null === $current ? 0 : 1 );
+			$prefix        = array_slice( $actual, 0, max( 0, count( $actual ) - $suffix_length ) );
+		}
+
+		$expected = array_merge( $prefix, $element_stack );
+		if ( null !== $current ) {
+			$expected[] = $current;
+		}
+		if ( $actual === $expected ) {
+			return null;
+		}
+
+		$divergence = 0;
+		$limit      = min( count( $expected ), count( $actual ) );
+		while ( $divergence < $limit && $expected[ $divergence ] === $actual[ $divergence ] ) {
+			++$divergence;
+		}
+
+		return array(
+			'status'       => self::STATUS_ERROR,
+			'error'        => 'get_breadcrumbs() diverged from the token-derived element stack.',
+			'failureClass' => 'breadcrumb-mismatch',
+			'breadcrumbs'  => array(
+				'kind'            => count( $expected ) === count( $actual ) ? 'name' : 'depth',
+				'divergenceDepth' => $divergence,
+				'expectedDepth'   => count( $expected ),
+				'actualDepth'     => count( $actual ),
+				'expected'        => array_slice( $expected, 0, 40 ),
+				'actual'          => array_slice( $actual, 0, 40 ),
+			),
+		);
+	}
+
+	private static function is_ignored_presumptuous_tag_exception( \WP_HTML_Unsupported_Exception $e ): bool {
+		return '#presumptuous-tag' === $e->token_name
+			&& '</>' === $e->token
+			&& 'Content outside of HTML is unsupported.' === $e->getMessage();
+	}
+
+	private static function render_wp_attributes( \WP_HTML_Processor $processor, int $indent_level, int &$line_count, array &$dom_oracle_line_tolerances ): string {
+		$attribute_names = $processor->get_attribute_names_with_prefix( '' );
+		if ( ! $attribute_names ) {
+			return '';
+		}
+
+		$dom_oracle_dropped_attributes = self::dom_oracle_xlink_dropped_attribute_names( $processor, $attribute_names );
+
+		$sorted = array();
+		foreach ( $attribute_names as $attribute_name ) {
+			$display_name = (string) $processor->get_qualified_attribute_name( $attribute_name );
+			$sorted[ $attribute_name ] = self::attribute_record( $display_name );
+		}
+		uasort( $sorted, array( __CLASS__, 'compare_attribute_records' ) );
+
+		$output = '';
+		foreach ( $sorted as $attribute_name => $display ) {
+			if ( isset( $dom_oracle_dropped_attributes[ $attribute_name ] ) ) {
+				$dom_oracle_line_tolerances[] = $line_count;
+			}
+			$value = $processor->get_attribute( $attribute_name );
+			if ( true === $value ) {
+				$value = '';
+			}
+			$output .= str_repeat( '  ', $indent_level ) . $display['renderName'] . '="' . self::escape_tree_scalar( (string) $value ) . "\"\n";
+			++$line_count;
+		}
+		return $output;
+	}
+
+	private static function dom_oracle_xlink_dropped_attribute_names( \WP_HTML_Processor $processor, array $attribute_names ): array {
+		if ( 'html' === $processor->get_namespace() || ! self::dom_oracle_drops_bare_xlink_local_name_after_xlink() ) {
+			return array();
+		}
+
+		$dropped_attribute_names = array();
+		$seen_xlink_local_names  = array();
+		foreach ( $attribute_names as $attribute_name ) {
+			$lower_name = strtolower( $attribute_name );
+			if ( str_starts_with( $lower_name, 'xlink:' ) ) {
+				$local_name = substr( $lower_name, strlen( 'xlink:' ) );
+				if ( isset( self::XLINK_LOCAL_NAMES[ $local_name ] ) ) {
+					$seen_xlink_local_names[ $local_name ] = true;
+				}
+				continue;
+			}
+
+			if ( isset( $seen_xlink_local_names[ $lower_name ] ) ) {
+				$dropped_attribute_names[ $attribute_name ] = true;
+			}
+		}
+
+		return $dropped_attribute_names;
+	}
+
+	private static function dom_oracle_drops_bare_xlink_local_name_after_xlink(): bool {
+		static $drops = null;
+		if ( null !== $drops ) {
+			return $drops;
+		}
+
+		if ( ! class_exists( 'Dom\\HTMLDocument' ) ) {
+			$drops = false;
+			return $drops;
+		}
+
+		$previous = libxml_use_internal_errors( true );
+		try {
+			$document = \Dom\HTMLDocument::createFromString( '<svg xlink:href href></svg>', LIBXML_NOERROR );
+			$svg      = $document->getElementsByTagName( 'svg' )->item( 0 );
+			$drops    = null !== $svg && $svg->hasAttributeNS( 'http://www.w3.org/1999/xlink', 'href' ) && ! $svg->hasAttribute( 'href' );
+		} catch ( \Throwable $e ) {
+			$drops = false;
+		}
+		libxml_clear_errors();
+		libxml_use_internal_errors( $previous );
+
+		return $drops;
+	}
+
+	/**
+	 * PHP's Lexbor-based parser fails to treat U+000C FORM FEED as ignorable
+	 * whitespace in the pre-body insertion modes (initial, before html,
+	 * before head, in head, after head), leaking it into body text where
+	 * spec-following parsers — including WordPress — drop it. Probed at
+	 * runtime so the tolerance disables itself when PHP fixes the bug.
+	 */
+	public static function dom_oracle_mishandles_form_feed(): bool {
+		static $mishandles = null;
+		if ( null !== $mishandles ) {
+			return $mishandles;
+		}
+
+		if ( ! class_exists( 'Dom\\HTMLDocument' ) ) {
+			$mishandles = false;
+			return $mishandles;
+		}
+
+		$previous = libxml_use_internal_errors( true );
+		try {
+			$document   = \Dom\HTMLDocument::createFromString( "\fa", LIBXML_NOERROR );
+			$body       = $document->getElementsByTagName( 'body' )->item( 0 );
+			$mishandles = null !== $body && "\fa" === $body->textContent;
+		} catch ( \Throwable $e ) {
+			$mishandles = false;
+		}
+		libxml_clear_errors();
+		libxml_use_internal_errors( $previous );
+
+		return $mishandles;
+	}
+
+	public static function render_dom( string $html, string $mode, array $limits = array(), string $fragment_context = 'body' ): array {
+		if ( ! class_exists( 'Dom\\HTMLDocument' ) ) {
+			return array(
+				'status'       => self::STATUS_ERROR,
+				'error'        => 'Dom\\HTMLDocument is not available. PHP 8.4+ with ext-dom is required.',
+				'failureClass' => 'oracle-unavailable',
+			);
+		}
+
+		$max_nodes = $limits['maxNodes'] ?? 3000;
+
+		$node_count = 0;
+		$output     = '';
+		try {
+			if ( Generator::MODE_FRAGMENT_BODY === $mode ) {
+				try {
+					$context = self::parse_dom_fragment( $html, $fragment_context );
+				} catch ( \Throwable $e ) {
+					return array(
+						'status'       => self::STATUS_ERROR,
+						'error'        => $e->getMessage(),
+						'throwable'    => get_class( $e ),
+						'failureClass' => 'oracle-parse-error',
+					);
+				}
+				foreach ( $context->childNodes as $child ) {
+					$output .= self::render_dom_node( $child, 0, $node_count, $max_nodes );
+				}
+			} else {
+				$previous = libxml_use_internal_errors( true );
+				try {
+					$document = \Dom\HTMLDocument::createFromString( $html, LIBXML_NOERROR );
+				} catch ( \Throwable $e ) {
+					return array(
+						'status'       => self::STATUS_ERROR,
+						'error'        => $e->getMessage(),
+						'throwable'    => get_class( $e ),
+						'failureClass' => 'oracle-parse-error',
+					);
+				} finally {
+					libxml_clear_errors();
+					libxml_use_internal_errors( $previous );
+				}
+				foreach ( $document->childNodes as $child ) {
+					$output .= self::render_dom_node( $child, 0, $node_count, $max_nodes );
+				}
+			}
+		} catch ( \RuntimeException $e ) {
+			if ( 'DOM node limit exceeded.' === $e->getMessage() ) {
+				return array(
+					'status'       => self::STATUS_ERROR,
+					'error'        => $e->getMessage(),
+					'failureClass' => 'node-limit-exceeded',
+					'nodeCount'    => $node_count,
+				);
+			}
+			if ( self::DOM_TEMPLATE_CONTEXT_UNSUPPORTED === $e->getMessage() ) {
+				return array(
+					'status'       => self::STATUS_UNSUPPORTED,
+					'error'        => $e->getMessage(),
+					'failureClass' => 'oracle-unsupported',
+					'unsupported'  => array(
+						'message' => $e->getMessage(),
+					),
+					'nodeCount'    => $node_count,
+				);
+			}
+			throw $e;
+		}
+
+		return array(
+			'status'    => self::STATUS_OK,
+			'tree'      => $output . "\n",
+			'nodeCount' => $node_count,
+		);
+	}
+
+	/**
+	 * Parses HTML as a fragment in the given context element using the DOM
+	 * innerHTML setter, which performs context-aware fragment parsing.
+	 *
+	 * Returns the context element whose children are the parsed fragment.
+	 */
+	private static function parse_dom_fragment( string $html, string $context_tag ) {
+		$document = \Dom\HTMLDocument::createEmpty();
+		$lower    = strtolower( $context_tag );
+		if ( 'svg' === $lower ) {
+			$context = $document->createElementNS( 'http://www.w3.org/2000/svg', 'svg' );
+		} elseif ( 'math' === $lower ) {
+			$context = $document->createElementNS( 'http://www.w3.org/1998/Math/MathML', 'math' );
+		} else {
+			$context = $document->createElement( $lower );
+		}
+
+		$previous = libxml_use_internal_errors( true );
+		try {
+			$context->innerHTML = $html;
+		} finally {
+			libxml_clear_errors();
+			libxml_use_internal_errors( $previous );
+		}
+
+		return $context;
+	}
+
+	private static function render_dom_node( $node, int $indent_level, int &$node_count, int $max_nodes ): string {
+		++$node_count;
+		if ( $node_count > $max_nodes ) {
+			throw new \RuntimeException( 'DOM node limit exceeded.' );
+		}
+
+		switch ( $node->nodeType ) {
+			case XML_DOCUMENT_TYPE_NODE:
+				$name   = $node->name ?? $node->nodeName;
+				$output = '<!DOCTYPE ' . self::escape_tree_scalar( (string) $name );
+				$public = $node->publicId ?? '';
+				$system = $node->systemId ?? '';
+				if ( '' !== $public || '' !== $system ) {
+					$output .= ' "' . self::escape_tree_scalar( (string) $public ) . '" "' . self::escape_tree_scalar( (string) $system ) . '"';
+				}
+				return $output . ">\n";
+
+			case XML_ELEMENT_NODE:
+				return self::render_dom_element( $node, $indent_level, $node_count, $max_nodes );
+
+			case XML_TEXT_NODE:
+			case XML_CDATA_SECTION_NODE:
+				return '' === $node->nodeValue ? '' : str_repeat( '  ', $indent_level ) . '"' . self::escape_tree_scalar( (string) $node->nodeValue ) . "\"\n";
+
+			case XML_COMMENT_NODE:
+				return str_repeat( '  ', $indent_level ) . '<!-- ' . self::escape_tree_scalar( (string) $node->nodeValue ) . " -->\n";
+
+			default:
+				return '';
+		}
+	}
+
+	private static function render_dom_element( $node, int $indent_level, int &$node_count, int $max_nodes ): string {
+		$tag_name = self::dom_element_display_name( $node );
+		$output   = str_repeat( '  ', $indent_level ) . '<' . self::escape_tree_scalar( $tag_name ) . ">\n";
+		$output  .= self::render_dom_attributes( $node, $indent_level + 1 );
+
+		$is_html_template = 'http://www.w3.org/1999/xhtml' === ( $node->namespaceURI ?? '' ) && 'template' === strtolower( (string) $node->localName );
+		if ( $is_html_template ) {
+			$output .= str_repeat( '  ', $indent_level + 1 ) . "content\n";
+			$output .= self::render_dom_template_children( $node, $indent_level + 2, $node_count, $max_nodes );
+			return $output;
+		}
+
+		foreach ( $node->childNodes as $child ) {
+			$output .= self::render_dom_node( $child, $indent_level + 1, $node_count, $max_nodes );
+		}
+		return $output;
+	}
+
+	/**
+	 * Renders the children of a template element.
+	 *
+	 * PHP's Dom\HTMLDocument hides template content (childNodes is empty and no
+	 * `content` property is exposed), but the innerHTML getter serializes the
+	 * true, template-context-parsed content. Re-parse that serialization in a
+	 * body context and verify fidelity by re-serializing: when the round-trip
+	 * reproduces the source serialization byte-for-byte, the body-context tree
+	 * is the template content tree. When it does not (table parts, foreign
+	 * fragments, and other template-mode-sensitive content), declare the case
+	 * unsupported rather than render a wrong tree. This check is deliberately
+	 * self-contained: it must not consult the WordPress HTML API, which is the
+	 * system under test.
+	 */
+	private static function render_dom_template_children( $node, int $indent_level, int &$node_count, int $max_nodes ): string {
+		$output = '';
+		foreach ( $node->childNodes as $child ) {
+			$output .= self::render_dom_node( $child, $indent_level, $node_count, $max_nodes );
+		}
+		if ( '' !== $output ) {
+			return $output;
+		}
+
+		$inner_html = (string) ( $node->innerHTML ?? '' );
+		if ( '' === $inner_html ) {
+			return '';
+		}
+
+		try {
+			$body = self::parse_dom_fragment( $inner_html, 'body' );
+		} catch ( \Throwable $e ) {
+			throw new \RuntimeException( 'Could not render DOM template innerHTML: ' . $e->getMessage(), 0, $e );
+		}
+
+		if ( (string) ( $body->innerHTML ?? '' ) !== $inner_html ) {
+			// Count the nodes that were parsed so resource-stress template
+			// content reports node-limit-exceeded ahead of unsupported.
+			$scratch_count = $node_count;
+			self::count_dom_children( $body, $scratch_count, $max_nodes );
+			throw new \RuntimeException( self::DOM_TEMPLATE_CONTEXT_UNSUPPORTED );
+		}
+
+		foreach ( $body->childNodes as $child ) {
+			$output .= self::render_dom_node( $child, $indent_level, $node_count, $max_nodes );
+		}
+		return $output;
+	}
+
+	private static function count_dom_children( $node, int &$node_count, int $max_nodes ): void {
+		foreach ( $node->childNodes as $child ) {
+			++$node_count;
+			if ( $node_count > $max_nodes ) {
+				throw new \RuntimeException( 'DOM node limit exceeded.' );
+			}
+			if ( XML_ELEMENT_NODE === $child->nodeType ) {
+				self::count_dom_children( $child, $node_count, $max_nodes );
+			}
+		}
+	}
+
+	private static function dom_element_display_name( $node ): string {
+		$namespace = $node->namespaceURI ?? '';
+		$local     = $node->localName ?? $node->nodeName;
+		if ( 'http://www.w3.org/1999/xhtml' === $namespace ) {
+			return strtolower( $local );
+		}
+		if ( 'http://www.w3.org/2000/svg' === $namespace ) {
+			return 'svg ' . $local;
+		}
+		if ( 'http://www.w3.org/1998/Math/MathML' === $namespace ) {
+			return 'math ' . $local;
+		}
+		return $node->nodeName;
+	}
+
+	private static function render_dom_attributes( $node, int $indent_level ): string {
+		if ( ! $node->hasAttributes() ) {
+			return '';
+		}
+
+		$attrs = array();
+		foreach ( $node->attributes as $attr ) {
+			$record          = self::attribute_record( self::dom_attribute_display_name( $attr ) );
+			$record['value'] = $attr->nodeValue;
+			$attrs[]         = $record;
+		}
+
+		usort( $attrs, array( __CLASS__, 'compare_attribute_records' ) );
+
+		$output = '';
+		foreach ( $attrs as $attr ) {
+			$output .= str_repeat( '  ', $indent_level ) . $attr['renderName'] . '="' . self::escape_tree_scalar( (string) $attr['value'] ) . "\"\n";
+		}
+		return $output;
+	}
+
+	private static function dom_attribute_display_name( $attr ): string {
+		$namespace = $attr->namespaceURI ?? '';
+		$local     = $attr->localName ?? $attr->nodeName;
+		if ( 'http://www.w3.org/1999/xlink' === $namespace ) {
+			return 'xlink ' . $local;
+		}
+		if ( 'http://www.w3.org/XML/1998/namespace' === $namespace ) {
+			return 'xml ' . $local;
+		}
+		if ( 'http://www.w3.org/2000/xmlns/' === $namespace ) {
+			return 'xmlns ' . $local;
+		}
+		return $attr->nodeName;
+	}
+
+	public static function compare_attribute_display_names( string $a, string $b ): int {
+		$a_has_ns = false !== strpos( $a, ':' );
+		$b_has_ns = false !== strpos( $b, ':' );
+		if ( $a_has_ns !== $b_has_ns ) {
+			return $a_has_ns ? 1 : -1;
+		}
+
+		$a_has_sp = false !== strpos( $a, ' ' );
+		$b_has_sp = false !== strpos( $b, ' ' );
+		if ( $a_has_sp !== $b_has_sp ) {
+			return $a_has_sp ? 1 : -1;
+		}
+
+		return $a <=> $b;
+	}
+
+	/**
+	 * Builds the per-attribute record used for sorting and rendering.
+	 *
+	 * Sorting must use the spec-scrubbed name (NUL as U+FFFD, newlines
+	 * normalized): WordPress preserves raw bytes that the DOM oracle
+	 * substitutes, and sorting each side by its own raw rendering would put
+	 * the same logical attribute at different positions in the two trees.
+	 * Rendering keeps the raw escaped name so divergent bytes stay visible.
+	 */
+	private static function attribute_record( string $display_name ): array {
+		return array(
+			'sortName'    => self::escape_tree_scalar( self::scrub_scalar( $display_name ) ),
+			'renderName'  => self::escape_tree_scalar( $display_name ),
+		);
+	}
+
+	/**
+	 * Applies the spec-mandated scalar substitutions to a raw string:
+	 * NUL becomes U+FFFD and CR / CRLF become LF.
+	 */
+	private static function scrub_scalar( string $value ): string {
+		$value = str_replace( "\0", "\xEF\xBF\xBD", $value );
+		return str_replace( array( "\r\n", "\r" ), "\n", $value );
+	}
+
+	private static function compare_attribute_records( array $a, array $b ): int {
+		$sorted = self::compare_attribute_display_names( $a['sortName'], $b['sortName'] );
+		if ( 0 !== $sorted ) {
+			return $sorted;
+		}
+
+		return self::compare_attribute_display_names( $a['renderName'], $b['renderName'] );
+	}
+
+	public static function compare_trees( string $wordpress_tree, string $dom_tree, array $wordpress_line_tolerances = array() ): array {
+		$adjusted = self::remove_tolerated_wordpress_lines( $wordpress_tree, $wordpress_line_tolerances );
+		$adjusted_wordpress = self::apply_wrapper_tolerance( $adjusted['tree'], $dom_tree );
+		if ( $adjusted_wordpress === $dom_tree ) {
+			return array(
+				'ok' => true,
+			);
+		}
+
+		/*
+		 * The WordPress HTML API deliberately preserves raw NUL and CR bytes
+		 * where spec-following parsers substitute U+FFFD and normalize
+		 * newlines during input preprocessing. Render both trees raw, and
+		 * tolerate a differing line only when that exact substitution
+		 * explains the whole difference. Tolerated lines are reported so
+		 * runs account for them instead of silently scrubbing both sides.
+		 */
+		$wordpress_lines   = explode( "\n", $adjusted_wordpress );
+		$dom_lines         = explode( "\n", $dom_tree );
+		$tolerated         = array();
+		$first_unexplained = null;
+		$shared_line_count = min( count( $wordpress_lines ), count( $dom_lines ) );
+		for ( $i = 0; $i < $shared_line_count; ++$i ) {
+			if ( $wordpress_lines[ $i ] === $dom_lines[ $i ] ) {
+				continue;
+			}
+			if (
+				self::scalar_tolerance_eligible_line( $wordpress_lines[ $i ] ) &&
+				self::scalar_tolerance_eligible_line( $dom_lines[ $i ] ) &&
+				self::escaped_scalar_lines_match( $wordpress_lines[ $i ], $dom_lines[ $i ] )
+			) {
+				$tolerated[] = $adjusted['lineMap'][ $i ] ?? $i;
+				continue;
+			}
+			$first_unexplained = $i;
+			break;
+		}
+		if ( null === $first_unexplained ) {
+			if ( count( $wordpress_lines ) === count( $dom_lines ) ) {
+				return array(
+					'ok'                  => true,
+					'scalarToleratedLines' => $tolerated,
+				);
+			}
+			// One tree has extra trailing lines; report from the tail.
+			$first_unexplained = $shared_line_count;
+		}
+
+		// Report the first line the scalar tolerance cannot explain, not
+		// merely the first line that differs.
+		return array(
+			'ok'              => false,
+			'firstDifference' => self::first_difference( $adjusted_wordpress, $dom_tree, $adjusted['lineMap'], $first_unexplained ?? 0 ),
+		);
+	}
+
+	/**
+	 * Indicates whether a rendered tree line is one where WordPress
+	 * deliberately preserves raw NUL/CR bytes that spec-following parsers
+	 * substitute: tag lines (NUL survives in foreign tag names) and
+	 * attribute lines (NUL/CR in values, NUL in names). Everywhere else —
+	 * text, RCDATA, rawtext, comments, doctypes — WordPress applies the
+	 * spec substitutions itself, so a scalar difference on those lines is
+	 * a real divergence the tolerance must not mask.
+	 *
+	 * Operates on the escaped rendering, where the shapes are disjoint: a
+	 * text line is exactly one quoted escaped string (interior quotes
+	 * render as `\"`, so its content can never contain an unescaped `="`),
+	 * comment, doctype, and tag lines end with `>` or ` -->`, and only an
+	 * attribute line ends with `="…"`. The attribute shape is therefore
+	 * tested before the comment prefix: the tokenizer permits `<` and `!`
+	 * in attribute names, so an attribute line may begin with `<!--`.
+	 */
+	private static function scalar_tolerance_eligible_line( string $line ): bool {
+		$trimmed = ltrim( $line, ' ' );
+		if ( '' === $trimmed ) {
+			return false;
+		}
+		// Text line: a single quoted escaped string. The escape loop is
+		// possessive: its branches are disjoint, so backtracking can never
+		// help, and PCRE's JIT stack gives out near 8KB when it tracks
+		// backtrack frames anyway. Same for every escape loop below.
+		if ( preg_match( '/^"(?:\\\\.|[^"\\\\])*+"$/', $trimmed ) ) {
+			return false;
+		}
+		// Attribute line: name followed by a quoted escaped value.
+		if ( preg_match( '/="(?:\\\\.|[^"\\\\])*+"$/', $trimmed ) ) {
+			return true;
+		}
+		if ( str_starts_with( $trimmed, '<!--' ) || str_starts_with( $trimmed, '<!DOCTYPE' ) ) {
+			return false;
+		}
+		// Template content marker.
+		if ( 'content' === $trimmed ) {
+			return false;
+		}
+		// Tag line.
+		return '<' === $trimmed[0];
+	}
+
+	/**
+	 * Indicates whether the spec-mandated scalar substitutions explain the
+	 * entire difference between a WordPress tree line and a DOM tree line:
+	 * NUL becomes U+FFFD and CR / CRLF become LF.
+	 *
+	 * The substitutions apply per occurrence, only where the DOM side holds
+	 * the substituted form. A whole-line rewrite would also rewrite escapes
+	 * the two sides agree on — a decoded `&#13;` renders as `\r` in both
+	 * trees — and the tolerance would then fail to fire.
+	 *
+	 * Operates on the escaped rendering produced by escape_tree_scalar(),
+	 * where `\` starts an escape sequence and a literal backslash is `\\`.
+	 *
+	 * One alignment is ambiguous: WordPress `\r\n` opposite DOM `\n` is
+	 * either a raw CRLF the DOM collapsed to one LF, or a raw CR mapped to
+	 * LF followed by a decoded LF both sides agree on (`\r&#10;` renders
+	 * `\r\n` in WordPress and `\n\n` in the DOM). Both are legitimate, so
+	 * that site backtracks. Every other step is deterministic. The step
+	 * budget bounds pathological backtracking; exceeding it fails closed,
+	 * reporting a mismatch rather than tolerating one.
+	 */
+	private static function escaped_scalar_lines_match( string $wordpress_line, string $dom_line ): bool {
+		$failed = array();
+		$steps  = 0;
+		return self::escaped_scalar_match_at( $wordpress_line, $dom_line, 0, 0, $failed, $steps );
+	}
+
+	/**
+	 * Matches a WordPress escaped line suffix against a DOM line suffix,
+	 * branching at the ambiguous CR alignment and memoizing dead ends.
+	 */
+	private static function escaped_scalar_match_at( string $wordpress_line, string $dom_line, int $i, int $j, array &$failed, int &$steps ): bool {
+		$wordpress_length = strlen( $wordpress_line );
+		$dom_length       = strlen( $dom_line );
+		while ( $i < $wordpress_length && $j < $dom_length ) {
+			if ( ++$steps > 1000000 ) {
+				return false;
+			}
+
+			if ( '\\' === $wordpress_line[ $i ] ) {
+				if (
+					0 === substr_compare( $wordpress_line, '\\0', $i, 2 ) &&
+					$j + 3 <= $dom_length &&
+					0 === substr_compare( $dom_line, "\xEF\xBF\xBD", $j, 3 )
+				) {
+					$i += 2;
+					$j += 3;
+					continue;
+				}
+				if (
+					0 === substr_compare( $wordpress_line, '\\r', $i, 2 ) &&
+					$j + 2 <= $dom_length &&
+					0 === substr_compare( $dom_line, '\\n', $j, 2 )
+				) {
+					if (
+						$i + 4 <= $wordpress_length &&
+						0 === substr_compare( $wordpress_line, '\\r\\n', $i, 4 )
+					) {
+						$key = $i . ':' . $j;
+						if ( isset( $failed[ $key ] ) ) {
+							return false;
+						}
+						// CR maps to LF and the WordPress `\n` matches on its
+						// own (a raw CR before a decoded LF), or the raw CRLF
+						// pair collapsed to the one DOM LF. Lockstep first:
+						// it resolves the common case without backtracking.
+						if (
+							self::escaped_scalar_match_at( $wordpress_line, $dom_line, $i + 2, $j + 2, $failed, $steps ) ||
+							self::escaped_scalar_match_at( $wordpress_line, $dom_line, $i + 4, $j + 2, $failed, $steps )
+						) {
+							return true;
+						}
+						$failed[ $key ] = true;
+						return false;
+					}
+					$i += 2;
+					$j += 2;
+					continue;
+				}
+				// Any other escape must match the DOM side byte for byte,
+				// including both bytes of a literal `\\`.
+				if (
+					$i + 1 < $wordpress_length &&
+					$j + 1 < $dom_length &&
+					0 === substr_compare( $dom_line, $wordpress_line[ $i ] . $wordpress_line[ $i + 1 ], $j, 2 )
+				) {
+					$i += 2;
+					$j += 2;
+					continue;
+				}
+				return false;
+			}
+			if ( $wordpress_line[ $i ] !== $dom_line[ $j ] ) {
+				return false;
+			}
+			++$i;
+			++$j;
+		}
+
+		return $i === $wordpress_length && $j === $dom_length;
+	}
+
+	private static function remove_tolerated_wordpress_lines( string $wordpress_tree, array $line_tolerances ): array {
+		if ( empty( $line_tolerances ) ) {
+			return array(
+				'tree'    => $wordpress_tree,
+				'lineMap' => array(),
+			);
+		}
+
+		$tolerated_lines = array();
+		foreach ( $line_tolerances as $line ) {
+			if ( is_int( $line ) || ctype_digit( (string) $line ) ) {
+				$tolerated_lines[ (int) $line ] = true;
+			}
+		}
+
+		if ( empty( $tolerated_lines ) ) {
+			return array(
+				'tree'    => $wordpress_tree,
+				'lineMap' => array(),
+			);
+		}
+
+		$lines = explode( "\n", $wordpress_tree );
+		$line_map = array();
+		foreach ( array_keys( $lines ) as $line_number ) {
+			if ( isset( $tolerated_lines[ $line_number ] ) ) {
+				unset( $lines[ $line_number ] );
+			} else {
+				$line_map[] = $line_number;
+			}
+		}
+
+		return array(
+			'tree'    => implode( "\n", $lines ),
+			'lineMap' => $line_map,
+		);
+	}
+
+	private static function apply_wrapper_tolerance( string $processed_tree, string $expected_tree ): string {
+		$html_head_body = "<html>\n  <head>\n  <body>\n\n";
+		$head_body      = "  <head>\n  <body>\n\n";
+		$body           = "  <body>\n\n";
+
+		if ( self::ends_with( $expected_tree, $html_head_body ) && ! self::ends_with( $processed_tree, $html_head_body ) ) {
+			if ( self::ends_with( $processed_tree, "<html>\n  <head>\n\n" ) ) {
+				return substr( $processed_tree, 0, -1 ) . "  <body>\n\n";
+			}
+			if ( self::ends_with( $processed_tree, "<html>\n\n" ) ) {
+				return substr( $processed_tree, 0, -1 ) . "  <head>\n  <body>\n\n";
+			}
+			return substr( $processed_tree, 0, -1 ) . $html_head_body;
+		}
+		if ( self::ends_with( $expected_tree, $head_body ) && ! self::ends_with( $processed_tree, $head_body ) ) {
+			if ( self::ends_with( $processed_tree, "<head>\n\n" ) ) {
+				return substr( $processed_tree, 0, -1 ) . "  <body>\n\n";
+			}
+			return substr( $processed_tree, 0, -1 ) . $head_body;
+		}
+		if ( self::ends_with( $expected_tree, $body ) && ! self::ends_with( $processed_tree, $body ) ) {
+			return substr( $processed_tree, 0, -1 ) . $body;
+		}
+
+		return $processed_tree;
+	}
+
+	private static function ends_with( string $haystack, string $needle ): bool {
+		if ( '' === $needle ) {
+			return true;
+		}
+		return substr( $haystack, -strlen( $needle ) ) === $needle;
+	}
+
+	/**
+	 * Public first-difference diff between two rendered trees, for invariant
+	 * checks that compare WordPress trees against each other.
+	 */
+	public static function diff_trees( string $left, string $right ): array {
+		return self::first_difference( $left, $right );
+	}
+
+	private static function first_difference( string $left, string $right, array $left_line_map = array(), int $start_line = 0 ): array {
+		$left_lines  = explode( "\n", $left );
+		$right_lines = explode( "\n", $right );
+		$max         = max( count( $left_lines ), count( $right_lines ) );
+		$left_paths  = self::line_paths( $left_lines );
+		$right_paths = self::line_paths( $right_lines );
+
+		for ( $i = $start_line; $i < $max; ++$i ) {
+			$l = $left_lines[ $i ] ?? null;
+			$r = $right_lines[ $i ] ?? null;
+			if ( $l !== $r ) {
+				$first_byte_offset = self::first_different_byte_offset( $l, $r );
+				$left_line         = $left_line_map[ $i ] ?? $i;
+				return array(
+					'line'                                  => $left_line + 1,
+					'comparisonLine'                        => $i + 1,
+					'domLineNumber'                         => $i + 1,
+					'wordpressLinePreview'                  => self::line_preview( $l ),
+					'domLinePreview'                        => self::line_preview( $r ),
+					'wordpressLineBytes'                    => null === $l ? null : strlen( $l ),
+					'domLineBytes'                          => null === $r ? null : strlen( $r ),
+					'wordpressLineSha1'                     => null === $l ? null : sha1( $l ),
+					'domLineSha1'                           => null === $r ? null : sha1( $r ),
+					'firstByteOffset'                       => $first_byte_offset,
+					'diffWindowStart'                       => self::diff_window_start( $first_byte_offset ),
+					'wordpressHex'                          => null === $l ? null : self::hex_preview( $l ),
+					'domHex'                                => null === $r ? null : self::hex_preview( $r ),
+					'wordpressDiffHex'                      => self::hex_window( $l, $first_byte_offset ),
+					'domDiffHex'                            => self::hex_window( $r, $first_byte_offset ),
+					'wordpressPath'                         => $left_paths[ $i ] ?? null,
+					'domPath'                               => $right_paths[ $i ] ?? null,
+					'path'                                  => $left_paths[ $i ] ?? $right_paths[ $i ] ?? null,
+					'wordpressNorm'                         => self::normalize_tree_line( $l ),
+					'domNorm'                               => self::normalize_tree_line( $r ),
+					'linesMatchAfterWordPressUtf8Scrub'     => self::lines_match_after_wordpress_utf8_scrub( $l, $r ),
+				);
+			}
+		}
+
+		return array();
+	}
+
+	private static function escape_tree_scalar( string $value ): string {
+		$output = '';
+		$length = strlen( $value );
+		for ( $i = 0; $i < $length; ++$i ) {
+			$byte = $value[ $i ];
+			switch ( $byte ) {
+				case "\n":
+					$output .= '\\n';
+					break;
+				case "\r":
+					$output .= '\\r';
+					break;
+				case "\t":
+					$output .= '\\t';
+					break;
+				case "\0":
+					$output .= '\\0';
+					break;
+				case '\\':
+					$output .= '\\\\';
+					break;
+				case '"':
+					$output .= '\\"';
+					break;
+				default:
+					$ord = ord( $byte );
+					$output .= ( $ord < 0x20 || 0x7f === $ord )
+						? sprintf( '\\x%02X', $ord )
+						: $byte;
+					break;
+			}
+		}
+
+		return $output;
+	}
+
+	private static function line_preview( ?string $line ): ?string {
+		return null === $line ? null : preview_bytes( $line, 240 );
+	}
+
+	private static function first_different_byte_offset( ?string $left, ?string $right ): ?int {
+		if ( $left === $right ) {
+			return null;
+		}
+		if ( null === $left || null === $right ) {
+			return 0;
+		}
+
+		$limit = min( strlen( $left ), strlen( $right ) );
+		for ( $i = 0; $i < $limit; ++$i ) {
+			if ( $left[ $i ] !== $right[ $i ] ) {
+				return $i;
+			}
+		}
+
+		return $limit;
+	}
+
+	private static function diff_window_start( ?int $offset ): ?int {
+		return null === $offset ? null : max( 0, $offset - 32 );
+	}
+
+	private static function hex_window( ?string $line, ?int $offset ): ?string {
+		if ( null === $line || null === $offset ) {
+			return null;
+		}
+
+		$bytes = unpack( 'H*', substr( $line, self::diff_window_start( $offset ) ?? 0, 96 ) );
+		return $bytes[1] ?? '';
+	}
+
+	private static function lines_match_after_wordpress_utf8_scrub( ?string $left, ?string $right ): ?bool {
+		if ( null === $left || null === $right || ! function_exists( 'wp_scrub_utf8' ) ) {
+			return null;
+		}
+
+		return wp_scrub_utf8( $left ) === $right;
+	}
+
+	private static function hex_preview( string $line ): string {
+		$bytes = unpack( 'H*', substr( $line, 0, 160 ) );
+		return $bytes[1] ?? '';
+	}
+
+	private static function line_paths( array $lines ): array {
+		$stack = array();
+		$paths = array();
+		foreach ( $lines as $i => $line ) {
+			$trim = ltrim( $line, ' ' );
+			$level = (int) floor( ( strlen( $line ) - strlen( $trim ) ) / 2 );
+			if ( preg_match( '/^<([^!][^>]*)>$/', $trim, $m ) ) {
+				$stack = array_slice( $stack, 0, $level );
+				$stack[ $level ] = $m[1];
+				$paths[ $i ] = '/' . implode( '/', $stack );
+			} elseif ( 'content' === $trim ) {
+				$stack = array_slice( $stack, 0, $level );
+				$stack[ $level ] = 'content';
+				$paths[ $i ] = '/' . implode( '/', $stack );
+			} elseif ( preg_match( '/^([^=]+)=/', $trim, $m ) ) {
+				$paths[ $i ] = '/' . implode( '/', array_slice( $stack, 0, $level ) ) . '/@' . $m[1];
+			} elseif ( '' !== $trim ) {
+				$paths[ $i ] = '/' . implode( '/', array_slice( $stack, 0, $level ) ) . '/#text';
+			}
+		}
+		return $paths;
+	}
+
+	private const KNOWN_HTML_TREE_ELEMENT_NAMES = array(
+		'a', 'abbr', 'acronym', 'address', 'applet', 'area', 'article', 'aside',
+		'audio', 'b', 'base', 'basefont', 'bdi', 'bdo', 'bgsound', 'big',
+		'blink', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption',
+		'center', 'cite', 'code', 'col', 'colgroup', 'command', 'content',
+		'data', 'datalist', 'dd', 'del', 'details', 'dfn', 'dialog', 'dir',
+		'div', 'dl', 'dt', 'element', 'em', 'embed', 'fencedframe', 'fieldset',
+		'figcaption', 'figure', 'font', 'footer', 'form', 'frame', 'frameset',
+		'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr',
+		'html', 'i', 'iframe', 'image', 'img', 'input', 'ins', 'isindex', 'kbd',
+		'keygen', 'label', 'legend', 'li', 'link', 'listing', 'main', 'map',
+		'mark', 'marquee', 'menu', 'menuitem', 'meta', 'meter', 'multicol',
+		'nav', 'nextid', 'nobr', 'noembed', 'noframes', 'noscript', 'object',
+		'ol', 'optgroup', 'option', 'output', 'p', 'param', 'picture',
+		'plaintext', 'portal', 'pre', 'progress', 'q', 'rb', 'rp', 'rt', 'rtc',
+		'ruby', 's', 'samp', 'script', 'search', 'section', 'select',
+		'selectedcontent', 'shadow', 'slot', 'small', 'source', 'spacer',
+		'span', 'strike', 'strong', 'style', 'sub', 'summary', 'sup', 'table',
+		'tbody', 'td', 'template', 'textarea', 'tfoot', 'th', 'thead', 'time',
+		'title', 'tr', 'track', 'tt', 'u', 'ul', 'var', 'video', 'wbr', 'xmp',
+	);
+
+	private const KNOWN_SVG_TREE_ELEMENT_NAMES = array(
+		'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate',
+		'animateColor', 'animateMotion', 'animateTransform', 'circle', 'clipPath',
+		'color-profile', 'cursor', 'defs', 'desc', 'discard', 'ellipse',
+		'feBlend', 'feColorMatrix', 'feComponentTransfer', 'feComposite',
+		'feConvolveMatrix', 'feDiffuseLighting', 'feDisplacementMap',
+		'feDistantLight', 'feDropShadow', 'feFlood', 'feFuncA', 'feFuncB',
+		'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage', 'feMerge',
+		'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
+		'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
+		'flowDiv', 'flowLine', 'flowPara', 'flowRegion', 'flowRegionBreak',
+		'flowRoot', 'flowSpan', 'font', 'font-face', 'font-face-format',
+		'font-face-name', 'font-face-src', 'font-face-uri', 'foreignObject', 'g',
+		'glyph', 'glyphRef', 'hatch', 'hatchpath', 'hkern', 'image', 'line',
+		'linearGradient', 'marker', 'mask', 'mesh', 'meshgradient', 'meshpatch',
+		'meshrow', 'metadata', 'missing-glyph', 'mpath', 'path', 'pattern',
+		'polygon', 'polyline', 'radialGradient', 'rect', 'script', 'set',
+		'solidColor', 'solidcolor', 'stop', 'style', 'svg', 'switch', 'symbol',
+		'text', 'textPath', 'title', 'tref', 'tspan', 'use', 'view', 'vkern',
+	);
+
+	private const KNOWN_MATHML_TREE_ELEMENT_NAMES = array(
+		'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx',
+		'arccos', 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec',
+		'arcsech', 'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind',
+		'bvar', 'card', 'cartesianproduct', 'cbytes', 'ceiling', 'cerror',
+		'ci', 'cn', 'codomain', 'complexes', 'compose', 'condition',
+		'conjugate', 'cos', 'cosh', 'cot', 'coth', 'cs', 'csc', 'csch',
+		'csymbol', 'curl', 'declare', 'degree', 'determinant', 'diff',
+		'divergence', 'divide', 'domain', 'domainofapplication', 'emptyset',
+		'eq', 'equivalent', 'eulergamma', 'exists', 'exp', 'exponentiale',
+		'factorial', 'factorof', 'false', 'floor', 'fn', 'forall', 'gcd', 'geq',
+		'grad', 'gt', 'ident', 'image', 'imaginary', 'imaginaryi', 'implies',
+		'in', 'infinity', 'int', 'integers', 'intersect', 'interval', 'inverse',
+		'lambda', 'laplacian', 'lcm', 'leq', 'limit', 'list', 'ln', 'log',
+		'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup', 'malignmark',
+		'math', 'matrix', 'matrixrow', 'max', 'mean', 'median', 'menclose',
+		'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'min', 'minus',
+		'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode', 'moment',
+		'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts', 'mroot',
+		'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline', 'mspace',
+		'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
+		'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover',
+		'naturalnumbers', 'neq', 'none', 'not', 'notanumber', 'notin',
+		'notprsubset', 'notsubset', 'or', 'otherwise', 'outerproduct',
+		'partialdiff', 'pi', 'piece', 'piecewise', 'plus', 'power', 'primes',
+		'product', 'prsubset', 'quotient',
+		'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
+		'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
+		'share', 'sin', 'sinh', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
+		'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
+		'vectorproduct', 'xor',
+	);
+
+	public static function normalize_tree_line( ?string $line ): ?string {
+		if ( null === $line ) {
+			return null;
+		}
+		$trimmed = trim( $line );
+		if ( preg_match( '/^([^=]+)="(?:\\\\.|[^"\\\\])*+"$/s', $trimmed, $m ) ) {
+			return $m[1] . '="<value>"';
+		}
+		/*
+		 * Element lines whose names are not recognized spec names are masked:
+		 * the generator mints random custom and invalid tag names, and
+		 * leaving them in normalized lines spreads one root cause across
+		 * many signatures and families.
+		 */
+		if ( preg_match( '/^<([^!>][^>]*)>$/s', $trimmed, $m ) && ! self::is_known_tree_element_name( $m[1] ) ) {
+			return '<custom-element>';
+		}
+		$line = preg_replace( '/"(?:\\\\.|[^"\\\\])*+"/s', '"<value>"', $line );
+		$line = preg_replace( '/<!--.*-->/s', '<!-- <comment> -->', $line );
+		return trim( (string) $line );
+	}
+
+	private static function is_known_tree_element_name( string $display_name ): bool {
+		$namespace = 'html';
+		$local = $display_name;
+		if ( str_starts_with( $display_name, 'svg ' ) ) {
+			$namespace = 'svg';
+			$local = substr( $display_name, strpos( $display_name, ' ' ) + 1 );
+		} elseif ( str_starts_with( $display_name, 'math ' ) ) {
+			$namespace = 'math';
+			$local = substr( $display_name, strpos( $display_name, ' ' ) + 1 );
+		}
+
+		$known = self::known_tree_element_names( $namespace );
+		return isset( $known[ $local ] ) || isset( $known[ strtolower( $local ) ] );
+	}
+
+	private static function known_tree_element_names( string $namespace ): array {
+		static $known = array();
+		if ( ! isset( $known[ $namespace ] ) ) {
+			if ( 'svg' === $namespace ) {
+				$names = self::KNOWN_SVG_TREE_ELEMENT_NAMES;
+			} elseif ( 'math' === $namespace ) {
+				$names = self::KNOWN_MATHML_TREE_ELEMENT_NAMES;
+			} else {
+				$names = self::KNOWN_HTML_TREE_ELEMENT_NAMES;
+			}
+
+			$known[ $namespace ] = array();
+			foreach ( $names as $name ) {
+				$known[ $namespace ][ $name ] = true;
+				$known[ $namespace ][ strtolower( $name ) ] = true;
+			}
+		}
+
+		return $known[ $namespace ];
+	}
+
+	private static function unsupported_details( \WP_HTML_Unsupported_Exception $e ): array {
+		return array(
+			'message'                  => $e->getMessage(),
+			'tokenName'                => $e->token_name,
+			'tokenAt'                  => $e->token_at,
+			'tokenPreview'             => preview_bytes( $e->token, 160 ),
+			'stackOfOpenElements'      => $e->stack_of_open_elements,
+			'activeFormattingElements' => $e->active_formatting_elements,
+		);
+	}
+}
diff --git a/tools/html-api-fuzz/lib/Worker.php b/tools/html-api-fuzz/lib/Worker.php
new file mode 100644
index 0000000000000..8bbbc7197030f
--- /dev/null
+++ b/tools/html-api-fuzz/lib/Worker.php
@@ -0,0 +1,711 @@
+<?php
+namespace HtmlApiFuzz;
+
+class Worker {
+	public static function run( array $options ): array {
+		$output_dir = option_string( $options, 'output-dir', getcwd() . DIRECTORY_SEPARATOR . 'html-api-fuzz-worker' );
+		ensure_dir( $output_dir );
+
+		$seed    = option_int( $options, 'seed', 1 );
+		$profile = option_string( $options, 'profile', 'auto' );
+		$mode    = option_string( $options, 'mode', 'auto' );
+		$payload_policy_option = option_string( $options, 'payload-policy', null );
+		$payload_policy        = $payload_policy_option ?? 'auto';
+		$max_input_bytes_value = option_int( $options, 'max-input-bytes', 0 );
+		$max_input_bytes       = $max_input_bytes_value > 0 ? $max_input_bytes_value : null;
+		$fragment_context      = option_string( $options, 'fragment-context', 'body' );
+		self::validate_fragment_context_metadata( $fragment_context );
+		$generator_parameters  = null;
+		$input_source          = 'generated';
+		$git_metadata          = null === option_string( $options, 'git-metadata-base64', null )
+			? git_metadata( 100 )
+			: git_metadata_from_base64( option_string( $options, 'git-metadata-base64' ) );
+
+		if ( null !== option_string( $options, 'input-base64', null ) ) {
+			$input = base64_decode( option_string( $options, 'input-base64' ), true );
+			if ( false === $input ) {
+				throw new \InvalidArgumentException( 'Invalid --input-base64.' );
+			}
+			$profile = option_string( $options, 'profile', 'replay' );
+			$mode    = option_string( $options, 'mode', Generator::MODE_FRAGMENT_BODY );
+			$payload_policy = $payload_policy_option;
+			$input_source   = 'input-base64';
+			self::validate_profile_metadata( $profile );
+			self::validate_mode_metadata( $mode );
+			self::validate_payload_policy_metadata( $payload_policy );
+		} elseif ( null !== option_string( $options, 'input-file', null ) ) {
+			$input = file_get_contents( option_string( $options, 'input-file' ) );
+			if ( false === $input ) {
+				throw new \InvalidArgumentException( 'Could not read --input-file.' );
+			}
+			$profile = option_string( $options, 'profile', 'replay' );
+			$mode    = option_string( $options, 'mode', Generator::MODE_FRAGMENT_BODY );
+			$payload_policy = $payload_policy_option;
+			$input_source   = 'input-file';
+			self::validate_profile_metadata( $profile );
+			self::validate_mode_metadata( $mode );
+			self::validate_payload_policy_metadata( $payload_policy );
+		} elseif ( self::seed_selects_corpus_stage( $seed, option_int( $options, 'corpus-mutate-percent', 20 ) ) ) {
+			$corpus = self::corpus_mutated_input( $seed, $mode, $max_input_bytes );
+			if ( null === $corpus ) {
+				// Corpus unavailable: fall back to the generator.
+				$generated            = Generator::generate( $seed, $profile ?? 'auto', $mode ?? 'auto', $payload_policy ?? 'auto', $max_input_bytes );
+				$input                = $generated['input'];
+				$profile              = $generated['profile'];
+				$mode                 = $generated['mode'];
+				$payload_policy       = $generated['payloadPolicy'];
+				$fragment_context     = $generated['fragmentContext'];
+				$generator_parameters = $generated['parameters'];
+			} else {
+				$input                = $corpus['input'];
+				$profile              = 'corpus-mutated';
+				$mode                 = $corpus['mode'];
+				$payload_policy       = null;
+				$fragment_context     = 'body';
+				$generator_parameters = $corpus['parameters'];
+				$input_source         = 'corpus-mutated';
+			}
+		} else {
+			$generated            = Generator::generate( $seed, $profile ?? 'auto', $mode ?? 'auto', $payload_policy ?? 'auto', $max_input_bytes );
+			$input                = $generated['input'];
+			$profile              = $generated['profile'];
+			$mode                 = $generated['mode'];
+			$payload_policy       = $generated['payloadPolicy'];
+			$fragment_context     = $generated['fragmentContext'];
+			$generator_parameters = $generated['parameters'];
+		}
+
+		$limits = array(
+			'maxTokens' => option_int( $options, 'max-tokens', 2000 ),
+			'maxNodes'  => option_int( $options, 'max-nodes', 3000 ),
+		);
+		$fail_unsupported = option_bool( $options, 'fail-unsupported', false );
+		$oracle_renderer  = OracleRenderer::from_options( $options );
+		$oracle_metadata  = $oracle_renderer->metadata();
+
+		$replay_path = $output_dir . DIRECTORY_SEPARATOR . 'replay.json';
+		$result_path = $output_dir . DIRECTORY_SEPARATOR . 'result.json';
+		$input_path  = $output_dir . DIRECTORY_SEPARATOR . 'input.bin';
+		file_put_contents( $input_path, $input );
+
+		$replay = self::base_replay( $seed, $profile, $mode, $payload_policy, $fragment_context, $generator_parameters, $input_source, $input, $output_dir, $limits, $fail_unsupported, $git_metadata, $oracle_metadata, $oracle_renderer->replay_options() );
+		write_json_file( $replay_path, $replay );
+
+		$result = self::evaluate_input(
+			$input,
+			$seed,
+			$profile,
+			$mode,
+			$payload_policy,
+			$fragment_context,
+			$generator_parameters,
+			$input_source,
+			$limits,
+			$fail_unsupported,
+			$oracle_renderer
+		);
+		$result['paths'] = array(
+			'outputDir'  => $output_dir,
+			'inputPath'  => $input_path,
+			'replayPath' => $replay_path,
+			'resultPath' => $result_path,
+		);
+		$result['wordpress'] = self::compact_parse_result( $result['wordpress'], $output_dir, 'wordpress-tree.txt' );
+		$result['dom']       = self::compact_parse_result( $result['dom'], $output_dir, 'dom-tree.txt' );
+		$signature           = $result['signature'] ?? null;
+
+		$replay['result']    = array(
+			'ok'           => $result['ok'],
+			'status'       => $result['status'],
+			'failureClass' => $result['failureClass'] ?? null,
+			'signature'    => $signature,
+			'oracleFinding' => $result['oracleFinding'] ?? null,
+			'oracle'       => $result['oracle'] ?? $oracle_metadata,
+			'resultPath'   => $result_path,
+		);
+		$replay['signature'] = $signature;
+		$replay['oracleFinding'] = $result['oracleFinding'] ?? null;
+		write_json_file( $replay_path, $replay );
+		write_json_file( $result_path, $result );
+
+		return $result;
+	}
+
+	public static function evaluate_input( string $input, int $seed, string $profile, string $mode, ?string $payload_policy, string $fragment_context, ?array $generator_parameters, string $input_source, array $limits, bool $fail_unsupported, OracleRenderer $oracle_renderer ): array {
+		$oracle_metadata = $oracle_renderer->metadata();
+		$tag_result      = TagInvariants::check( $input, $limits, $mode, $fragment_context );
+		$wp_result       = TreeRenderer::render_wordpress( $input, $mode, $limits, $fragment_context );
+		if ( ! $oracle_renderer->is_php_dom() ) {
+			$wp_result['domOracleLineTolerances'] = array();
+		}
+		$dom_result = array( 'status' => TreeRenderer::STATUS_ERROR, 'error' => 'Not run.', 'oracle' => $oracle_metadata );
+
+		$result = array(
+			'schemaVersion' => 1,
+			'kind'          => 'html-api-fuzz-worker-result',
+			'createdAt'     => gmdate( 'c' ),
+			'ok'            => true,
+			'status'        => 'passed',
+			'seed'          => $seed,
+			'profile'       => $profile,
+			'mode'          => $mode,
+			'payloadPolicy' => $payload_policy,
+			'fragmentContext' => $fragment_context,
+			'generator'     => $generator_parameters,
+			'inputSource'   => $input_source,
+			'inputSha1'     => sha1( $input ),
+			'inputLength'   => strlen( $input ),
+			'inputPreview'  => preview_bytes( $input ),
+			'oracle'        => $oracle_metadata,
+			'tagProcessor'  => $tag_result,
+			'wordpress'     => $wp_result,
+			'dom'           => $dom_result,
+			'comparison'    => null,
+		);
+
+		if ( ! $tag_result['ok'] ) {
+			$result['ok']           = false;
+			$result['failureClass'] = self::tag_invariant_failure_class( $tag_result );
+			$result['status']       = 'resource-limit' === $result['failureClass'] ? 'resource-limit' : 'failed';
+		} elseif ( TreeRenderer::STATUS_UNSUPPORTED === $wp_result['status'] ) {
+			$result['status']       = 'unsupported';
+			$result['failureClass'] = 'unsupported';
+			if ( $fail_unsupported ) {
+				$result['ok'] = false;
+			}
+		} elseif ( TreeRenderer::STATUS_ERROR === $wp_result['status'] ) {
+			$wp_failure_class       = $wp_result['failureClass'] ?? 'wordpress-parse-error';
+			$result['ok']           = false;
+			$result['status']       = self::is_resource_limit_failure( $wp_failure_class ) ? 'resource-limit' : 'failed';
+			$result['failureClass'] = self::is_resource_limit_failure( $wp_failure_class ) ? 'resource-limit' : $wp_failure_class;
+		} else {
+			try {
+				$dom_result = $oracle_renderer->render( $input, $mode, $limits, $fragment_context );
+			} catch ( \Throwable $e ) {
+				$dom_result = array(
+					'status'       => TreeRenderer::STATUS_ERROR,
+					'error'        => $e->getMessage(),
+					'throwable'    => get_class( $e ),
+					'failureClass' => 'oracle-renderer-error',
+					'oracle'       => $oracle_metadata,
+				);
+			}
+			$result['dom'] = $dom_result;
+
+			if ( TreeRenderer::STATUS_ERROR === $dom_result['status'] ) {
+				$dom_failure_class      = $dom_result['failureClass'] ?? 'oracle-renderer-error';
+				$result['failureClass'] = self::is_resource_limit_failure( $dom_failure_class ) ? 'resource-limit' : $dom_failure_class;
+				$result['status']       = self::is_resource_limit_failure( $dom_failure_class )
+					? 'resource-limit'
+					: ( 'oracle-parse-error' === $result['failureClass'] ? 'oracle-parse-error' : 'failed' );
+				if ( 'oracle-parse-error' !== $result['failureClass'] ) {
+					$result['ok'] = false;
+				}
+			} elseif ( TreeRenderer::STATUS_UNSUPPORTED === $dom_result['status'] ) {
+				$result['status']       = 'oracle-unsupported';
+				$result['failureClass'] = $dom_result['failureClass'] ?? 'oracle-unsupported';
+			} else {
+				$dom_oracle_line_tolerances = $oracle_renderer->is_php_dom() ? ( $wp_result['domOracleLineTolerances'] ?? array() ) : array();
+				$comparison = TreeRenderer::compare_trees( $wp_result['tree'], $dom_result['tree'], $dom_oracle_line_tolerances );
+				if ( $oracle_renderer->is_php_dom() && ! $comparison['ok'] && self::is_oracle_form_feed_quirk( $input, $mode, $limits, $fragment_context, $wp_result['tree'] ?? null, $dom_oracle_line_tolerances, $oracle_renderer ) ) {
+					$comparison = array(
+						'ok'                => true,
+						'formFeedQuirk'     => true,
+						'oracleFindingType' => 'dom-form-feed-pre-body-whitespace',
+					);
+					$result['status']       = 'oracle-tolerated';
+					$result['failureClass'] = 'oracle-tolerated';
+				} elseif ( $oracle_renderer->is_php_dom() && ! $comparison['ok'] && self::is_dom_mathml_heading_scope_quirk( $input, $mode, $comparison, $wp_result['tree'] ?? null, $dom_result['tree'] ?? null ) ) {
+					$comparison = array(
+						'ok'                => true,
+						'oracleTolerated'   => true,
+						'oracleFindingType' => 'dom-mathml-heading-scope-reparenting',
+						'firstDifference'   => $comparison['firstDifference'] ?? array(),
+					);
+					$result['status']       = 'oracle-tolerated';
+					$result['failureClass'] = 'oracle-tolerated';
+				}
+				$result['comparison'] = $comparison;
+				if ( ! $comparison['ok'] ) {
+					$result['ok']           = false;
+					$result['status']       = 'failed';
+					$result['failureClass'] = self::is_encoding_mismatch( $input, $comparison['firstDifference'] ?? array() )
+						? 'encoding-mismatch'
+						: 'tree-mismatch';
+				} elseif ( ! empty( $dom_oracle_line_tolerances ) || ! empty( $comparison['scalarToleratedLines'] ) ) {
+					$result['status']       = 'oracle-tolerated';
+					$result['failureClass'] = 'oracle-tolerated';
+				}
+			}
+		}
+
+		$normalize_result = $tag_result['normalize'] ?? array( 'ok' => true );
+		$normalized_html  = $normalize_result['normalizedHtml'] ?? null;
+		unset( $result['tagProcessor']['normalize']['normalizedHtml'] );
+
+		$has_clean_baseline = true === ( $result['ok'] ?? false )
+			&& in_array( $result['status'] ?? null, array( 'passed', 'oracle-tolerated' ), true )
+			&& true === ( $result['comparison']['ok'] ?? false );
+
+		if ( $has_clean_baseline ) {
+			$mutation           = self::check_mutation_differential( $input, $mode, $limits, $wp_result['tree'] ?? null, $oracle_renderer, $fragment_context );
+			$result['mutation'] = $mutation;
+			if ( false === $mutation['ok'] ) {
+				$result['ok']           = false;
+				$result['status']       = 'failed';
+				$result['failureClass'] = $mutation['failureClass'];
+				$has_clean_baseline     = false;
+			}
+		}
+
+		if ( $has_clean_baseline && is_string( $normalized_html ) ) {
+			$preservation                     = self::check_normalize_tree_preservation( $normalized_html, $mode, $limits, $wp_result['tree'] ?? null, $fragment_context );
+			$result['normalizePreservation'] = $preservation;
+			if ( false === $preservation['ok'] ) {
+				$result['ok']           = false;
+				$result['status']       = 'failed';
+				$result['failureClass'] = 'normalize-tree-changed';
+				$has_clean_baseline     = false;
+			}
+		}
+
+		if (
+			false === ( $normalize_result['ok'] ?? true ) &&
+			true === ( $result['ok'] ?? false ) &&
+			in_array( $result['status'] ?? null, array( 'passed', 'oracle-tolerated' ), true )
+		) {
+			$result['ok']           = false;
+			$result['status']       = 'failed';
+			$result['failureClass'] = 'normalize-invariant-failed';
+		}
+
+		$oracle_finding = OracleFinding::from_result( $result );
+		if ( null !== $oracle_finding ) {
+			$result['oracleFinding'] = $oracle_finding;
+		}
+
+		$signature = Signature::from_result( $result );
+		if ( null !== $signature ) {
+			$result['signature'] = $signature;
+		}
+
+		return $result;
+	}
+
+	private static function validate_payload_policy_metadata( ?string $payload_policy ): void {
+		if ( null !== $payload_policy && ! in_array( $payload_policy, Generator::payload_policy_labels(), true ) ) {
+			throw new \InvalidArgumentException( 'Unknown generator payload policy: ' . $payload_policy );
+		}
+	}
+
+	private static function validate_profile_metadata( string $profile ): void {
+		if ( ! in_array( $profile, array( 'replay', 'corpus-mutated' ), true ) && ! in_array( $profile, Generator::profiles(), true ) ) {
+			throw new \InvalidArgumentException( 'Unknown generator profile: ' . $profile );
+		}
+	}
+
+	/**
+	 * Deterministically routes a share of seeds to the corpus-mutation stage.
+	 * Uses a hash of the seed rather than a modulus so stride-partitioned
+	 * lanes (which see arithmetic seed progressions) still sample evenly.
+	 */
+	private static function seed_selects_corpus_stage( int $seed, int $percent ): bool {
+		if ( $percent <= 0 ) {
+			return false;
+		}
+		$bucket = hexdec( substr( hash( 'sha256', 'corpus-stage:' . $seed ), 0, 8 ) ) % 100;
+		return $bucket < min( 100, $percent );
+	}
+
+	/**
+	 * Builds a deterministic mutated input from the html5lib corpus.
+	 */
+	private static function corpus_mutated_input( int $seed, ?string $mode, ?int $max_input_bytes ): ?array {
+		$entries = Corpus::entries();
+		if ( array() === $entries ) {
+			return null;
+		}
+
+		$rng   = new Prng( 'corpus:' . $seed );
+		$index = $rng->int( 0, count( $entries ) - 1 );
+		$entry = $entries[ $index ];
+
+		$mutation = Mutator::mutate( $entry['data'], $rng, $entries );
+		$input    = $mutation['input'];
+		$truncated = false;
+		if ( null !== $max_input_bytes && $max_input_bytes > 0 && strlen( $input ) > $max_input_bytes ) {
+			$input     = substr( $input, 0, $max_input_bytes );
+			$truncated = true;
+		}
+
+		$resolved_mode = ( null === $mode || 'auto' === $mode )
+			? $rng->weighted( array( Generator::MODE_FRAGMENT_BODY => 60, Generator::MODE_FULL_DOCUMENT => 40 ) )
+			: $mode;
+
+		return array(
+			'input'      => $input,
+			'mode'       => $resolved_mode,
+			'parameters' => array(
+				'seed'        => $seed,
+				'stage'       => 'corpus-mutated',
+				'corpusFile'  => $entry['file'],
+				'corpusIndex' => $index,
+				'operations'  => $mutation['operations'],
+				'truncated'   => $truncated,
+				'byteLength'  => strlen( $input ),
+			),
+		);
+	}
+
+	private static function validate_fragment_context_metadata( string $fragment_context ): void {
+		if ( ! in_array( $fragment_context, Generator::fragment_contexts(), true ) ) {
+			throw new \InvalidArgumentException( 'Unknown fragment context: ' . $fragment_context );
+		}
+	}
+
+	private static function validate_mode_metadata( string $mode ): void {
+		if ( ! in_array( $mode, Generator::modes(), true ) ) {
+			throw new \InvalidArgumentException( 'Unknown generator mode: ' . $mode );
+		}
+	}
+
+	private static function base_replay( int $seed, string $profile, string $mode, ?string $payload_policy, string $fragment_context, ?array $generator_parameters, string $input_source, string $input, string $output_dir, array $limits, bool $fail_unsupported, array $git_metadata, array $oracle_metadata, array $oracle_options ): array {
+		return array(
+			'schemaVersion' => 1,
+			'kind'          => 'html-api-fuzz-replay',
+			'createdAt'     => gmdate( 'c' ),
+			'repoRoot'      => repo_root(),
+			'repoCommit'    => $git_metadata['commit'] ?? '',
+			'repoDirty'     => $git_metadata['dirty'] ?? null,
+			'phpVersion'    => PHP_VERSION,
+			'seed'          => $seed,
+			'profile'       => $profile,
+			'mode'          => $mode,
+			'payloadPolicy' => $payload_policy,
+			'fragmentContext' => $fragment_context,
+			'generator'     => $generator_parameters,
+			'inputSource'   => $input_source,
+			'inputBase64'   => base64_encode( $input ),
+			'inputSha1'     => sha1( $input ),
+			'inputLength'   => strlen( $input ),
+			'inputPreview'  => preview_bytes( $input ),
+			'limits'        => $limits,
+			'oracle'        => $oracle_metadata,
+			'options'       => array(
+				'failUnsupported' => $fail_unsupported,
+				'domOracle'       => $oracle_options['domOracle'] ?? OracleRenderer::KIND_PHP_DOM,
+				'lexborOracleBin' => $oracle_options['lexborOracleBin'] ?? null,
+				'oracleTimeoutMs' => $oracle_options['oracleTimeoutMs'] ?? null,
+			),
+			'command'       => array(
+				'program' => PHP_BINARY,
+				'args'    => array(
+					'tools/html-api-fuzz/replay.php',
+					'--replay',
+					$output_dir . DIRECTORY_SEPARATOR . 'replay.json',
+				),
+				'cwd'     => repo_root(),
+			),
+		);
+	}
+
+	/**
+	 * Differential check of a simple mutation: set data-fuzz="1" on the first
+	 * tag, then verify the mutated document still parses identically in
+	 * WordPress and the selected oracle, and that the WordPress tree changed by
+	 * exactly that one attribute line.
+	 *
+	 * Runs only on a clean baseline so a failure is attributable to the
+	 * mutation machinery rather than to a pre-existing divergence.
+	 */
+	private static function check_mutation_differential( string $input, string $mode, array $limits, ?string $original_tree, OracleRenderer $oracle_renderer, string $fragment_context = 'body' ): array {
+		if ( ! is_string( $original_tree ) ) {
+			return array(
+				'ok'     => true,
+				'status' => 'skipped-no-baseline-tree',
+			);
+		}
+		if ( false !== strpos( $input, 'data-fuzz' ) ) {
+			return array(
+				'ok'     => true,
+				'status' => 'skipped-input-contains-marker',
+			);
+		}
+
+		$processor = new \WP_HTML_Tag_Processor( $input );
+		if ( ! $processor->next_tag() || ! $processor->set_attribute( 'data-fuzz', '1' ) ) {
+			return array(
+				'ok'     => true,
+				'status' => 'skipped-no-mutable-tag',
+			);
+		}
+		$updated = $processor->get_updated_html();
+
+		$wp_updated = TreeRenderer::render_wordpress( $updated, $mode, $limits, $fragment_context );
+		if ( TreeRenderer::STATUS_OK !== $wp_updated['status'] ) {
+			return array(
+				'ok'     => true,
+				'status' => 'skipped-wordpress-' . $wp_updated['status'],
+			);
+		}
+		$dom_updated = $oracle_renderer->render( $updated, $mode, $limits, $fragment_context );
+		if ( TreeRenderer::STATUS_OK !== $dom_updated['status'] ) {
+			return array(
+				'ok'     => true,
+				'status' => 'skipped-oracle-' . $dom_updated['status'],
+			);
+		}
+
+		$dom_oracle_line_tolerances = $oracle_renderer->is_php_dom() ? ( $wp_updated['domOracleLineTolerances'] ?? array() ) : array();
+		$comparison = TreeRenderer::compare_trees( $wp_updated['tree'], $dom_updated['tree'], $dom_oracle_line_tolerances );
+		if ( ! $comparison['ok'] ) {
+			if ( $oracle_renderer->is_php_dom() && self::is_oracle_form_feed_quirk( $updated, $mode, $limits, $fragment_context, $wp_updated['tree'], $dom_oracle_line_tolerances, $oracle_renderer ) ) {
+				return array(
+					'ok'     => true,
+					'status' => 'skipped-oracle-form-feed-quirk',
+				);
+			}
+			if ( $oracle_renderer->is_php_dom() && self::is_dom_mathml_heading_scope_quirk( $updated, $mode, $comparison, $wp_updated['tree'] ?? null, $dom_updated['tree'] ?? null ) ) {
+				return array(
+					'ok'     => true,
+					'status' => 'skipped-oracle-mathml-heading-scope-quirk',
+				);
+			}
+			return array(
+				'ok'              => false,
+				'status'          => 'failed',
+				'failureClass'    => 'mutation-tree-mismatch',
+				'message'         => 'The mutated document parses differently in WordPress and the selected oracle.',
+				'firstDifference' => $comparison['firstDifference'] ?? array(),
+			);
+		}
+
+		$marker        = 'data-fuzz="1"';
+		$updated_lines = explode( "\n", $wp_updated['tree'] );
+		$marker_lines  = array();
+		foreach ( $updated_lines as $i => $line ) {
+			if ( ltrim( $line, ' ' ) === $marker ) {
+				$marker_lines[] = $i;
+			}
+		}
+
+		if ( 0 === count( $marker_lines ) ) {
+			/*
+			 * Tree construction can legitimately drop the mutated element
+			 * (for example <col> outside a table), taking the attribute with
+			 * it. The mutation must then be a tree no-op.
+			 */
+			if ( $wp_updated['tree'] !== $original_tree ) {
+				return array(
+					'ok'              => false,
+					'status'          => 'failed',
+					'failureClass'    => 'mutation-delta-mismatch',
+					'message'         => 'The attribute is missing from the re-parsed tree, yet the tree changed.',
+					'firstDifference' => TreeRenderer::diff_trees( $wp_updated['tree'], $original_tree ),
+				);
+			}
+			return array(
+				'ok'     => true,
+				'status' => 'passed-element-dropped',
+			);
+		}
+
+		if ( count( $marker_lines ) > 1 ) {
+			/*
+			 * Active formatting element reconstruction clones attributes onto
+			 * reconstructed elements, so a strict one-line delta does not
+			 * apply. Correctness is still covered by the differential
+			 * comparison of the mutated document above.
+			 */
+			return array(
+				'ok'     => true,
+				'status' => 'passed-differential-only',
+			);
+		}
+
+		unset( $updated_lines[ $marker_lines[0] ] );
+		$delta_tree = implode( "\n", $updated_lines );
+		if ( $delta_tree !== $original_tree ) {
+			return array(
+				'ok'              => false,
+				'status'          => 'failed',
+				'failureClass'    => 'mutation-delta-mismatch',
+				'message'         => 'The mutation changed the parsed tree beyond the added attribute.',
+				'firstDifference' => TreeRenderer::diff_trees( $delta_tree, $original_tree ),
+			);
+		}
+
+		return array(
+			'ok'     => true,
+			'status' => 'passed',
+		);
+	}
+
+	/**
+	 * Verifies that normalize() output parses to the same tree as its input:
+	 * normalization may rewrite syntax but must not change document
+	 * structure. Stricter than idempotence, which a consistently wrong
+	 * serializer can satisfy.
+	 */
+	private static function check_normalize_tree_preservation( string $normalized_html, string $mode, array $limits, ?string $original_tree, string $fragment_context = 'body' ): array {
+		if ( ! is_string( $original_tree ) ) {
+			return array(
+				'ok'     => true,
+				'status' => 'skipped-no-baseline-tree',
+			);
+		}
+
+		$rendered = TreeRenderer::render_wordpress( $normalized_html, $mode, $limits, $fragment_context );
+		if ( TreeRenderer::STATUS_OK !== $rendered['status'] ) {
+			return array(
+				'ok'     => true,
+				'status' => 'skipped-render-' . $rendered['status'],
+			);
+		}
+
+		/*
+		 * normalize() applies the spec scalar substitutions (NUL to U+FFFD,
+		 * CR to LF) when serializing raw bytes the parser preserves, so the
+		 * re-parsed tree may differ from the original by exactly those
+		 * substitutions. Apply the same scrub-explained tolerance as the
+		 * WordPress/DOM comparison; anything beyond it is a real change.
+		 */
+		$comparison = TreeRenderer::compare_trees( $original_tree, $rendered['tree'] );
+		if ( ! $comparison['ok'] ) {
+			return array(
+				'ok'              => false,
+				'status'          => 'failed',
+				'failureClass'    => 'normalize-tree-changed',
+				'message'         => 'Parsing normalize() output produced a different tree than the original input.',
+				'firstDifference' => $comparison['firstDifference'] ?? array(),
+			);
+		}
+
+		return array(
+			'ok'                   => true,
+			'status'               => empty( $comparison['scalarToleratedLines'] ) ? 'passed' : 'passed-scalar-tolerated',
+			'scalarToleratedLines' => $comparison['scalarToleratedLines'] ?? array(),
+		);
+	}
+
+	/**
+	 * Detects mismatches fully explained by the DOM oracle's form feed bug
+	 * (see TreeRenderer::dom_oracle_mishandles_form_feed()): when the oracle,
+	 * given the input with form feeds substituted by spaces, produces exactly
+	 * the WordPress tree, the divergence is the oracle's, not WordPress's.
+	 * The substitution is conservative: form feeds in attribute values or
+	 * body text make the trees differ and the tolerance simply not apply.
+	 */
+	private static function is_oracle_form_feed_quirk( string $input, string $mode, array $limits, string $fragment_context, ?string $wp_tree, array $dom_oracle_line_tolerances, OracleRenderer $oracle_renderer ): bool {
+		if (
+			! $oracle_renderer->is_php_dom() ||
+			! is_string( $wp_tree ) ||
+			Generator::MODE_FULL_DOCUMENT !== $mode ||
+			false === strpos( $input, "" ) ||
+			! TreeRenderer::dom_oracle_mishandles_form_feed()
+		) {
+			return false;
+		}
+
+		$substituted = $oracle_renderer->render( str_replace( "", ' ', $input ), $mode, $limits, $fragment_context );
+		if ( TreeRenderer::STATUS_OK !== $substituted['status'] ) {
+			return false;
+		}
+
+		$comparison = TreeRenderer::compare_trees( $wp_tree, $substituted['tree'], $dom_oracle_line_tolerances );
+		return true === $comparison['ok'] && empty( $comparison['scalarToleratedLines'] );
+	}
+
+	private static function is_dom_mathml_heading_scope_quirk( string $input, string $mode, array $comparison, ?string $wp_tree = null, ?string $dom_tree = null ): bool {
+		if ( ! in_array( $mode, array( Generator::MODE_FRAGMENT_BODY, Generator::MODE_FULL_DOCUMENT ), true ) ) {
+			return false;
+		}
+
+		if (
+			! preg_match( '/<math\b/i', $input ) ||
+			! preg_match( '/<annotation-xml\b[^>]*\bencoding\s*=\s*(?:"|\')?\s*(?:text\/html|application\/xhtml\+xml)/i', $input ) ||
+			! preg_match( '/<\/h[1-6]\s*>/i', $input )
+		) {
+			return false;
+		}
+
+		if ( ! is_string( $wp_tree ) || ! is_string( $dom_tree ) || ! self::tree_lines_match_ignoring_indentation( $wp_tree, $dom_tree ) ) {
+			return false;
+		}
+
+		$diff = $comparison['firstDifference'] ?? array();
+		if ( ! is_array( $diff ) || ( $diff['wordpressNorm'] ?? null ) !== ( $diff['domNorm'] ?? null ) ) {
+			return false;
+		}
+
+		$wordpress_path = strtolower( (string) ( $diff['wordpressPath'] ?? '' ) );
+		$dom_path       = strtolower( (string) ( $diff['domPath'] ?? '' ) );
+		if ( '' === $wordpress_path || $wordpress_path === $dom_path ) {
+			return false;
+		}
+
+		return false !== strpos( $wordpress_path, 'math annotation-xml' );
+	}
+
+	private static function tree_lines_match_ignoring_indentation( string $left, string $right ): bool {
+		$left_lines  = explode( "\n", $left );
+		$right_lines = explode( "\n", $right );
+		if ( count( $left_lines ) !== count( $right_lines ) ) {
+			return false;
+		}
+
+		foreach ( $left_lines as $i => $left_line ) {
+			if ( ltrim( $left_line, ' ' ) !== ltrim( $right_lines[ $i ], ' ' ) ) {
+				return false;
+			}
+		}
+
+		return true;
+	}
+
+	/**
+	 * The wp_scrub_utf8() line comparison happens in
+	 * TreeRenderer::first_difference(), where the full differing lines are
+	 * available; the diff carries only truncated previews, which would
+	 * misclassify long lines if scrubbed and compared here.
+	 */
+	private static function is_encoding_mismatch( string $input, array $diff ): bool {
+		if ( function_exists( 'wp_is_valid_utf8' ) && wp_is_valid_utf8( $input ) ) {
+			return false;
+		}
+
+		return true === ( $diff['linesMatchAfterWordPressUtf8Scrub'] ?? null );
+	}
+
+	private static function tag_invariant_failure_class( array $tag_result ): string {
+		$failures = $tag_result['failures'] ?? array();
+		if ( empty( $failures ) ) {
+			return 'tag-invariant-failed';
+		}
+
+		$resource_limit_names = array( 'tag-token-limit-exceeded', 'mutation-token-limit-exceeded' );
+		foreach ( $failures as $failure ) {
+			if ( ! in_array( $failure['name'] ?? null, $resource_limit_names, true ) ) {
+				return 'tag-invariant-failed';
+			}
+		}
+
+		return 'resource-limit';
+	}
+
+	private static function is_resource_limit_failure( ?string $failure_class ): bool {
+		return in_array( $failure_class, array( 'token-limit-exceeded', 'node-limit-exceeded' ), true );
+	}
+
+	private static function compact_parse_result( array $parse_result, string $output_dir, string $tree_filename ): array {
+		if ( isset( $parse_result['tree'] ) ) {
+			$tree_path = $output_dir . DIRECTORY_SEPARATOR . $tree_filename;
+			file_put_contents( $tree_path, $parse_result['tree'] );
+			$parse_result['treePath']    = $tree_path;
+			$parse_result['treeSha1']    = sha1( $parse_result['tree'] );
+			$parse_result['treePreview'] = preview_bytes( $parse_result['tree'], 400 );
+			unset( $parse_result['tree'] );
+		}
+		return $parse_result;
+	}
+}
diff --git a/tools/html-api-fuzz/lib/autoload.php b/tools/html-api-fuzz/lib/autoload.php
new file mode 100644
index 0000000000000..6ece9c1754cee
--- /dev/null
+++ b/tools/html-api-fuzz/lib/autoload.php
@@ -0,0 +1,14 @@
+<?php
+require_once __DIR__ . '/Support.php';
+require_once __DIR__ . '/HtmlApiBootstrap.php';
+require_once __DIR__ . '/Prng.php';
+require_once __DIR__ . '/Corpus.php';
+require_once __DIR__ . '/Mutator.php';
+require_once __DIR__ . '/Generator.php';
+require_once __DIR__ . '/TreeRenderer.php';
+require_once __DIR__ . '/OracleRenderer.php';
+require_once __DIR__ . '/TagInvariants.php';
+require_once __DIR__ . '/Signature.php';
+require_once __DIR__ . '/OracleFinding.php';
+require_once __DIR__ . '/ResultStore.php';
+require_once __DIR__ . '/Worker.php';
diff --git a/tools/html-api-fuzz/lib/wp-stubs.php b/tools/html-api-fuzz/lib/wp-stubs.php
new file mode 100644
index 0000000000000..ee29acbee701b
--- /dev/null
+++ b/tools/html-api-fuzz/lib/wp-stubs.php
@@ -0,0 +1,46 @@
+<?php
+
+if ( ! function_exists( '__' ) ) {
+	function __( $text ) {
+		return $text;
+	}
+}
+
+if ( ! function_exists( '_doing_it_wrong' ) ) {
+	function _doing_it_wrong( $function_name, $message, $version ) {
+	}
+}
+
+if ( ! function_exists( '_deprecated_argument' ) ) {
+	function _deprecated_argument( $function_name, $version, $message = '' ) {
+	}
+}
+
+if ( ! function_exists( 'wp_trigger_error' ) ) {
+	function wp_trigger_error( $function_name, $message, $error_level = E_USER_NOTICE ) {
+	}
+}
+
+if ( ! function_exists( 'wp_kses_uri_attributes' ) ) {
+	function wp_kses_uri_attributes() {
+		return array(
+			'action',
+			'archive',
+			'background',
+			'cite',
+			'classid',
+			'codebase',
+			'data',
+			'formaction',
+			'href',
+			'icon',
+			'longdesc',
+			'manifest',
+			'poster',
+			'profile',
+			'src',
+			'usemap',
+			'xmlns',
+		);
+	}
+}
diff --git a/tools/html-api-fuzz/minimize.php b/tools/html-api-fuzz/minimize.php
new file mode 100755
index 0000000000000..265525b804915
--- /dev/null
+++ b/tools/html-api-fuzz/minimize.php
@@ -0,0 +1,494 @@
+#!/usr/bin/env php
+<?php
+require_once __DIR__ . '/lib/autoload.php';
+
+function html_api_fuzz_min_accepts_result( ?array $result, array $base, bool $any_failure ): bool {
+	if ( null === $result ) {
+		return false;
+	}
+
+	return 'oracle-finding' === $base['targetKind']
+		? ( ( $result['oracleFinding']['signature']['hash'] ?? null ) === $base['targetHash'] )
+		: ( $any_failure ? ! ( $result['ok'] ?? true ) : ( ( $result['signature']['hash'] ?? null ) === $base['targetHash'] ) );
+}
+
+function html_api_fuzz_min_worker_options( string $candidate, array $base, string $output_dir ): array {
+	$options = array(
+		'input-base64' => base64_encode( $candidate ),
+		'mode'         => $base['mode'],
+		'profile'      => $base['profile'],
+		'seed'         => (string) $base['seed'],
+		'output-dir'   => $output_dir,
+		'max-tokens'   => (string) $base['maxTokens'],
+		'max-nodes'    => (string) $base['maxNodes'],
+	);
+	if ( null !== $base['gitMetadataBase64'] ) {
+		$options['git-metadata-base64'] = $base['gitMetadataBase64'];
+	}
+	if ( $base['failUnsupported'] ) {
+		$options['fail-unsupported'] = true;
+	}
+	if ( null !== $base['payloadPolicy'] ) {
+		$options['payload-policy'] = $base['payloadPolicy'];
+	}
+	if ( 'body' !== $base['fragmentContext'] ) {
+		$options['fragment-context'] = $base['fragmentContext'];
+	}
+	foreach ( $base['oracleOptions'] as $name => $value ) {
+		if ( null !== $value ) {
+			$options[ $name ] = $value;
+		}
+	}
+
+	return $options;
+}
+
+function html_api_fuzz_min_fatal_result( array $base, \Throwable $e, int $duration_ms ): array {
+	$result = array(
+		'schemaVersion'  => 1,
+		'kind'           => 'html-api-fuzz-worker-result',
+		'createdAt'      => gmdate( 'c' ),
+		'ok'             => false,
+		'status'         => 'worker-fatal',
+		'failureClass'   => 'fatal-error',
+		'failureSnippet' => $e->getMessage(),
+		'throwable'      => get_class( $e ),
+		'seed'           => $base['seed'],
+		'profile'        => $base['profile'],
+		'mode'           => $base['mode'],
+		'payloadPolicy'  => $base['payloadPolicy'],
+		'fragmentContext' => $base['fragmentContext'],
+		'inputSource'    => 'minimize-candidate',
+		'oracle'         => $base['oracle'],
+		'process'        => array(
+			'code'       => null,
+			'timedOut'   => false,
+			'durationMs' => $duration_ms,
+		),
+	);
+	$signature = \HtmlApiFuzz\Signature::from_result( $result );
+	if ( null !== $signature ) {
+		$result['signature'] = $signature;
+	}
+
+	return $result;
+}
+
+function html_api_fuzz_min_process_test( string $candidate, array $base, string $work_dir, int $attempt, int $timeout_ms, bool $any_failure ): array {
+	$dir = $work_dir . '/candidates/candidate-' . str_pad( (string) $attempt, 4, '0', STR_PAD_LEFT );
+	\HtmlApiFuzz\ensure_dir( $dir );
+	$input_path = $dir . '/input.bin';
+	file_put_contents( $input_path, $candidate );
+	$args = array(
+		__DIR__ . '/worker.php',
+		'--input-file',
+		$input_path,
+		'--mode',
+		$base['mode'],
+		'--profile',
+		$base['profile'],
+		'--seed',
+		(string) $base['seed'],
+		'--output-dir',
+		$dir,
+		'--max-tokens',
+		(string) $base['maxTokens'],
+		'--max-nodes',
+		(string) $base['maxNodes'],
+	);
+	if ( null !== $base['gitMetadataBase64'] ) {
+		$args[] = '--git-metadata-base64';
+		$args[] = $base['gitMetadataBase64'];
+	}
+	if ( $base['failUnsupported'] ) {
+		$args[] = '--fail-unsupported';
+	}
+	if ( null !== $base['payloadPolicy'] ) {
+		$args[] = '--payload-policy';
+		$args[] = $base['payloadPolicy'];
+	}
+	if ( 'body' !== $base['fragmentContext'] ) {
+		$args[] = '--fragment-context';
+		$args[] = $base['fragmentContext'];
+	}
+	foreach ( $base['oracleWorkerArgs'] as $arg ) {
+		$args[] = $arg;
+	}
+	$proc   = \HtmlApiFuzz\run_php_process( $args, \HtmlApiFuzz\repo_root(), $timeout_ms, $dir . '/worker.log' );
+	$result = \HtmlApiFuzz\read_json_file( $dir . '/result.json' );
+	if ( null === $result ) {
+		return array( 'accepted' => false, 'result' => null, 'process' => $proc );
+	}
+
+	$accepted = html_api_fuzz_min_accepts_result( $result, $base, $any_failure );
+	return array( 'accepted' => $accepted, 'result' => $result, 'process' => $proc );
+}
+
+function html_api_fuzz_min_in_process_test( string $candidate, array $base, string $work_dir, int $attempt, bool $any_failure ): array {
+	$started_at = microtime( true );
+
+	try {
+		if ( $base['keepCandidateArtifacts'] ) {
+			$dir    = $work_dir . '/candidates/candidate-' . str_pad( (string) $attempt, 4, '0', STR_PAD_LEFT );
+			$result = \HtmlApiFuzz\Worker::run( html_api_fuzz_min_worker_options( $candidate, $base, $dir ) );
+		} else {
+			$result = \HtmlApiFuzz\Worker::evaluate_input(
+				$candidate,
+				$base['seed'],
+				$base['profile'],
+				$base['mode'],
+				$base['payloadPolicy'],
+				$base['fragmentContext'],
+				is_array( $base['originalGenerator'] ) ? $base['originalGenerator'] : null,
+				'minimize-candidate',
+				array(
+					'maxTokens' => $base['maxTokens'],
+					'maxNodes'  => $base['maxNodes'],
+				),
+				$base['failUnsupported'],
+				$base['oracleRenderer']
+			);
+		}
+		$duration_ms = (int) round( ( microtime( true ) - $started_at ) * 1000 );
+	} catch ( \Throwable $e ) {
+		$duration_ms = (int) round( ( microtime( true ) - $started_at ) * 1000 );
+		$result      = html_api_fuzz_min_fatal_result( $base, $e, $duration_ms );
+	}
+
+	return array(
+		'accepted' => html_api_fuzz_min_accepts_result( $result, $base, $any_failure ),
+		'result'   => $result,
+		'process'  => array(
+			'code'       => null,
+			'timedOut'   => false,
+			'durationMs' => $duration_ms,
+			'mode'       => 'in-process',
+		),
+	);
+}
+
+function html_api_fuzz_min_test( string $candidate, array $base, string $work_dir, int $attempt, int $timeout_ms, bool $any_failure ): array {
+	if ( 'process' === $base['probeMode'] ) {
+		return html_api_fuzz_min_process_test( $candidate, $base, $work_dir, $attempt, $timeout_ms, $any_failure );
+	}
+
+	return html_api_fuzz_min_in_process_test( $candidate, $base, $work_dir, $attempt, $any_failure );
+}
+
+function html_api_fuzz_min_record_probe( array &$stats, array $test ): void {
+	$duration_ms = $test['process']['durationMs'] ?? null;
+	if ( ! is_numeric( $duration_ms ) ) {
+		return;
+	}
+
+	$duration_ms = (int) $duration_ms;
+	$stats['durationMs'] += $duration_ms;
+	$stats['maxDurationMs'] = max( $stats['maxDurationMs'], $duration_ms );
+	if ( $test['accepted'] ?? false ) {
+		++$stats['accepted'];
+	}
+}
+
+function html_api_fuzz_min_target( array $replay, array $options ): array {
+	$target_hash = \HtmlApiFuzz\option_string( $options, 'target-hash', null );
+	if ( null !== $target_hash ) {
+		$target_kind = \HtmlApiFuzz\option_string( $options, 'target-kind', 'failure' );
+		if ( ! in_array( $target_kind, array( 'failure', 'oracle-finding' ), true ) ) {
+			throw new InvalidArgumentException( 'Expected --target-kind to be failure or oracle-finding.' );
+		}
+		return array(
+			'kind' => $target_kind,
+			'hash' => $target_hash,
+		);
+	}
+
+	$failure_hash = $replay['signature']['hash'] ?? $replay['result']['signature']['hash'] ?? null;
+	if ( is_string( $failure_hash ) && '' !== $failure_hash ) {
+		return array(
+			'kind' => 'failure',
+			'hash' => $failure_hash,
+		);
+	}
+
+	$oracle_hash = $replay['oracleFinding']['signature']['hash'] ?? $replay['result']['oracleFinding']['signature']['hash'] ?? null;
+	if ( is_string( $oracle_hash ) && '' !== $oracle_hash ) {
+		return array(
+			'kind' => 'oracle-finding',
+			'hash' => $oracle_hash,
+		);
+	}
+
+	return array(
+		'kind' => null,
+		'hash' => null,
+	);
+}
+
+function html_api_fuzz_min_probe_mode( array $options ): string {
+	$mode = \HtmlApiFuzz\option_string( $options, 'probe-mode', 'auto' );
+	if ( ! in_array( $mode, array( 'auto', 'in-process', 'process' ), true ) ) {
+		throw new InvalidArgumentException( 'Expected --probe-mode to be auto, in-process, or process.' );
+	}
+
+	if ( 'auto' !== $mode ) {
+		return $mode;
+	}
+
+	return 'process';
+}
+
+$options = \HtmlApiFuzz\parse_cli_options( $argv );
+$replay_path = \HtmlApiFuzz\option_string( $options, 'replay', $options['_'][0] ?? null );
+if ( null === $replay_path || \HtmlApiFuzz\option_bool( $options, 'help', false ) ) {
+	echo "Usage: php tools/html-api-fuzz/minimize.php --replay path/to/replay.json [--output-dir DIR] [--target-kind failure|oracle-finding --target-hash HASH] [--dom-oracle php-dom|lexbor-source] [--lexbor-oracle-bin PATH] [--probe-mode auto|in-process|process] [--keep-candidate-artifacts]\n";
+	exit( null === $replay_path ? 1 : 0 );
+}
+
+$replay = \HtmlApiFuzz\read_json_file( $replay_path );
+if ( ! $replay || ! array_key_exists( 'inputBase64', $replay ) ) {
+	fwrite( STDERR, "Invalid replay file: {$replay_path}\n" );
+	exit( 1 );
+}
+
+$target      = html_api_fuzz_min_target( $replay, $options );
+$target_hash = $target['hash'];
+$any_failure = \HtmlApiFuzz\option_bool( $options, 'any-failure', false );
+if ( null === $target_hash && ! $any_failure ) {
+	fwrite( STDERR, "Replay does not contain a target failure or oracle-finding signature. Use --any-failure to minimize any failure.\n" );
+	exit( 1 );
+}
+
+$output_dir = \HtmlApiFuzz\option_string( $options, 'output-dir', dirname( $replay_path ) . '/minimized-' . \HtmlApiFuzz\timestamp() );
+\HtmlApiFuzz\ensure_dir( $output_dir );
+
+$input = base64_decode( $replay['inputBase64'], true );
+if ( false === $input ) {
+	fwrite( STDERR, "Invalid base64 input in replay file: {$replay_path}\n" );
+	exit( 1 );
+}
+$original_generator = is_array( $replay['generator'] ?? null ) ? $replay['generator'] : ( $replay['originalGenerator'] ?? null );
+$source_replay = \HtmlApiFuzz\replay_source_metadata( $replay_path, $replay );
+$oracle_options = $options;
+if ( null === \HtmlApiFuzz\option_string( $oracle_options, 'dom-oracle', null ) ) {
+	$oracle_options['dom-oracle'] = $replay['options']['domOracle'] ?? $replay['oracle']['kind'] ?? \HtmlApiFuzz\OracleRenderer::KIND_PHP_DOM;
+}
+if ( null === \HtmlApiFuzz\option_string( $oracle_options, 'lexbor-oracle-bin', null ) && is_string( $replay['options']['lexborOracleBin'] ?? null ) ) {
+	$oracle_options['lexbor-oracle-bin'] = $replay['options']['lexborOracleBin'];
+}
+$stored_oracle_timeout_ms = $replay['options']['oracleTimeoutMs'] ?? null;
+if ( null === \HtmlApiFuzz\option_string( $oracle_options, 'oracle-timeout-ms', null ) && is_numeric( $stored_oracle_timeout_ms ) ) {
+	$oracle_options['oracle-timeout-ms'] = (string) (int) $stored_oracle_timeout_ms;
+}
+$oracle_renderer = \HtmlApiFuzz\OracleRenderer::from_options( $oracle_options );
+$probe_mode      = html_api_fuzz_min_probe_mode( $options );
+$base = array(
+	'mode'              => $replay['mode'] ?? \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+	'profile'           => $replay['profile'] ?? 'replay',
+	'payloadPolicy'     => \HtmlApiFuzz\normalize_payload_policy_label( $replay['payloadPolicy'] ?? null )
+		?? \HtmlApiFuzz\normalize_payload_policy_label( $replay['generator']['payloadPolicy'] ?? null ),
+	'fragmentContext'   => is_string( $replay['fragmentContext'] ?? null ) ? $replay['fragmentContext'] : 'body',
+	'originalGenerator' => $original_generator,
+	'seed'              => (int) ( $replay['seed'] ?? 1 ),
+	'targetHash'        => $target_hash,
+	'targetKind'        => $target['kind'] ?? 'failure',
+	'sourceReplay'      => $source_replay,
+	'oracle'            => $oracle_renderer->metadata(),
+	'oracleRenderer'    => $oracle_renderer,
+	'oracleOptions'     => array(
+		'dom-oracle'         => \HtmlApiFuzz\option_string( $oracle_options, 'dom-oracle', \HtmlApiFuzz\OracleRenderer::KIND_PHP_DOM ),
+		'lexbor-oracle-bin'  => \HtmlApiFuzz\option_string( $oracle_options, 'lexbor-oracle-bin', null ),
+		'oracle-timeout-ms'  => \HtmlApiFuzz\option_string( $oracle_options, 'oracle-timeout-ms', null ),
+	),
+	'oracleWorkerArgs'  => $oracle_renderer->worker_args(),
+	'gitMetadataBase64' => \HtmlApiFuzz\git_metadata_base64( \HtmlApiFuzz\git_metadata() ),
+	'failUnsupported'   => (bool) ( $replay['options']['failUnsupported'] ?? ( 'unsupported' === ( $replay['result']['failureClass'] ?? null ) ) ),
+	'maxTokens'         => (int) ( $replay['limits']['maxTokens'] ?? 2000 ),
+	'maxNodes'          => (int) ( $replay['limits']['maxNodes'] ?? 3000 ),
+	'probeMode'         => $probe_mode,
+	'keepCandidateArtifacts' => \HtmlApiFuzz\option_bool( $options, 'keep-candidate-artifacts', false ),
+);
+$timeout_ms    = \HtmlApiFuzz\option_int( $options, 'timeout-ms', 2500 );
+$max_attempts  = \HtmlApiFuzz\option_int( $options, 'max-attempts', 600 );
+$attempt_count = 0;
+$probe_stats   = array(
+	'durationMs'    => 0,
+	'maxDurationMs' => 0,
+	'accepted'      => 0,
+);
+
+$current = $input;
+
+/*
+ * Phase 1: markup-aligned segment deletion. Splitting on tag boundaries is
+ * token-naive (rawtext contents split incorrectly), but unsound candidates
+ * simply fail the signature check; aligned deletions converge far faster on
+ * HTML than blind byte chunks.
+ */
+$progress = true;
+while ( $progress && $attempt_count < $max_attempts ) {
+	$progress = false;
+	preg_match_all( '/<[^>]*>?|[^<]+/s', $current, $matches );
+	$segments = $matches[0];
+	if ( count( $segments ) < 2 ) {
+		break;
+	}
+	for ( $i = count( $segments ) - 1; $i >= 0 && $attempt_count < $max_attempts; $i-- ) {
+		$candidate_segments = $segments;
+		unset( $candidate_segments[ $i ] );
+		$candidate = implode( '', $candidate_segments );
+		if ( $candidate === $current || '' === $candidate ) {
+			continue;
+		}
+		++$attempt_count;
+		$test = html_api_fuzz_min_test( $candidate, $base, $output_dir, $attempt_count, $timeout_ms, $any_failure );
+		html_api_fuzz_min_record_probe( $probe_stats, $test );
+		if ( $test['accepted'] ) {
+			$current  = $candidate;
+			$progress = true;
+			break;
+		}
+	}
+}
+
+// Phase 2: byte-chunk deletion for reductions that cross tag boundaries.
+$chunks = 2;
+while ( strlen( $current ) > 0 && $attempt_count < $max_attempts ) {
+	$length     = strlen( $current );
+	$chunk_size = (int) ceil( $length / $chunks );
+	$changed    = false;
+
+	for ( $offset = 0; $offset < $length && $attempt_count < $max_attempts; $offset += $chunk_size ) {
+		$candidate = substr( $current, 0, $offset ) . substr( $current, min( $length, $offset + $chunk_size ) );
+		if ( $candidate === $current ) {
+			continue;
+		}
+		++$attempt_count;
+		$test = html_api_fuzz_min_test( $candidate, $base, $output_dir, $attempt_count, $timeout_ms, $any_failure );
+		html_api_fuzz_min_record_probe( $probe_stats, $test );
+		if ( $test['accepted'] ) {
+			$current = $candidate;
+			$chunks  = max( 2, $chunks - 1 );
+			$changed = true;
+			break;
+		}
+	}
+
+	if ( ! $changed ) {
+		if ( $chunks >= $length ) {
+			break;
+		}
+		$chunks = min( $length, $chunks * 2 );
+	}
+}
+
+/*
+ * Phase 3: per-byte canonicalization. Deletion is tried first; replacements
+ * never grow the input. After a deletion the same index holds the next byte,
+ * so stay in place; after a substitution move on.
+ */
+$simple_replacements = array( '', 'a', ' ', "\n" );
+for ( $i = 0; $i < strlen( $current ) && $attempt_count < $max_attempts; ++$i ) {
+	foreach ( $simple_replacements as $replacement ) {
+		$candidate = substr( $current, 0, $i ) . $replacement . substr( $current, $i + 1 );
+		if ( $candidate === $current ) {
+			continue;
+		}
+		++$attempt_count;
+		$test = html_api_fuzz_min_test( $candidate, $base, $output_dir, $attempt_count, $timeout_ms, $any_failure );
+		html_api_fuzz_min_record_probe( $probe_stats, $test );
+		if ( $test['accepted'] ) {
+			$current = $candidate;
+			if ( '' === $replacement ) {
+				--$i;
+			}
+			break;
+		}
+	}
+}
+
+$final_dir = $output_dir . '/minimized';
+\HtmlApiFuzz\ensure_dir( $final_dir );
+$final_input_path = $final_dir . '/input.bin';
+file_put_contents( $final_input_path, $current );
+$args = array(
+	__DIR__ . '/worker.php',
+	'--input-file',
+	$final_input_path,
+	'--mode',
+	$base['mode'],
+	'--profile',
+	$base['profile'],
+	'--seed',
+	(string) $base['seed'],
+	'--output-dir',
+	$final_dir,
+	'--max-tokens',
+	(string) $base['maxTokens'],
+	'--max-nodes',
+	(string) $base['maxNodes'],
+);
+if ( null !== $base['gitMetadataBase64'] ) {
+	$args[] = '--git-metadata-base64';
+	$args[] = $base['gitMetadataBase64'];
+}
+if ( $base['failUnsupported'] ) {
+	$args[] = '--fail-unsupported';
+}
+if ( null !== $base['payloadPolicy'] ) {
+	$args[] = '--payload-policy';
+	$args[] = $base['payloadPolicy'];
+}
+if ( 'body' !== $base['fragmentContext'] ) {
+	$args[] = '--fragment-context';
+	$args[] = $base['fragmentContext'];
+}
+foreach ( $base['oracleWorkerArgs'] as $arg ) {
+	$args[] = $arg;
+}
+\HtmlApiFuzz\run_php_process( $args, \HtmlApiFuzz\repo_root(), $timeout_ms, $final_dir . '/worker.log' );
+$final_result = \HtmlApiFuzz\read_json_file( $final_dir . '/result.json' );
+$final_replay = \HtmlApiFuzz\read_json_file( $final_dir . '/replay.json' );
+if ( is_array( $final_replay ) && is_array( $base['originalGenerator'] ) ) {
+	$final_replay['originalGenerator'] = $base['originalGenerator'];
+}
+if ( is_array( $final_replay ) ) {
+	$final_replay['sourceReplay'] = $base['sourceReplay'];
+	\HtmlApiFuzz\write_json_file( $final_dir . '/replay.json', $final_replay );
+}
+
+$summary = array(
+	'schemaVersion'     => 1,
+	'kind'              => 'html-api-fuzz-minimize-result',
+	'createdAt'         => gmdate( 'c' ),
+	'ok'                => null !== $final_result && ( 'oracle-finding' === $base['targetKind'] ? ( ( $final_result['oracleFinding']['signature']['hash'] ?? null ) === $target_hash ) : ( $any_failure ? ! ( $final_result['ok'] ?? true ) : ( ( $final_result['signature']['hash'] ?? null ) === $target_hash ) ) ),
+	'targetHash'        => $target_hash,
+	'targetKind'        => $base['targetKind'],
+	'finalHash'         => $final_result['signature']['hash'] ?? null,
+	'finalOracleHash'   => $final_result['oracleFinding']['signature']['hash'] ?? null,
+	'profile'           => $base['profile'],
+	'mode'              => $base['mode'],
+	'payloadPolicy'     => $base['payloadPolicy'],
+	'originalGenerator' => $base['originalGenerator'],
+	'sourceReplay'      => $base['sourceReplay'],
+	'oracle'            => $final_result['oracle'] ?? $base['oracle'],
+	'finalFailureClass' => $final_result['failureClass'] ?? null,
+	'finalStatus'       => $final_result['status'] ?? null,
+	'originalLength'    => strlen( $input ),
+	'minimizedLength'   => strlen( $current ),
+	'attempts'          => $attempt_count,
+	'probeMode'         => $base['probeMode'],
+	'candidateArtifactsRetained' => 'process' === $base['probeMode'] || $base['keepCandidateArtifacts'],
+	'probeTiming'       => array(
+		'totalDurationMs' => $probe_stats['durationMs'],
+		'maxDurationMs'   => $probe_stats['maxDurationMs'],
+		'acceptedProbes'  => $probe_stats['accepted'],
+		'averageDurationMs' => $attempt_count > 0 ? round( $probe_stats['durationMs'] / $attempt_count, 2 ) : null,
+	),
+	'minimizedReplay'   => $final_dir . '/replay.json',
+	'minimizedResult'   => $final_dir . '/result.json',
+	'inputBase64'       => base64_encode( $current ),
+	'phpunitSnippet'    => '$html = base64_decode( ' . var_export( base64_encode( $current ), true ) . ' );',
+);
+\HtmlApiFuzz\write_json_file( $output_dir . '/minimize-result.json', $summary );
+echo \HtmlApiFuzz\json_encode_safe( $summary ) . "\n";
+exit( $summary['ok'] ? 0 : 1 );
diff --git a/tools/html-api-fuzz/oracles/lexbor/README.md b/tools/html-api-fuzz/oracles/lexbor/README.md
new file mode 100644
index 0000000000000..a7f7a882eabdf
--- /dev/null
+++ b/tools/html-api-fuzz/oracles/lexbor/README.md
@@ -0,0 +1,66 @@
+# Lexbor Source Oracle
+
+This directory contains a standalone oracle binary for comparing the WordPress
+HTML API against a source-built Lexbor checkout instead of PHP's bundled
+`Dom\HTMLDocument` runtime.
+
+Build upstream `master`:
+
+```sh
+tools/html-api-fuzz/oracles/lexbor/build.sh
+```
+
+The script clones Lexbor under `.cache/lexbor/<ref>/source`, builds and
+installs a static Lexbor library under the same cache entry, then writes:
+
+```text
+tools/html-api-fuzz/oracles/lexbor/build/lexbor-tree-oracle
+```
+
+Use it from the fuzzer by selecting the non-default oracle:
+
+```sh
+php tools/html-api-fuzz/worker.php \
+  --seed 1 \
+  --dom-oracle lexbor-source \
+  --output-dir artifacts/html-api-fuzz/seed-1-lexbor
+
+php tools/html-api-fuzz/runner.php \
+  --max-seeds 100 \
+  --dom-oracle lexbor-source
+```
+
+Pass `--lexbor-oracle-bin PATH` or set `HTML_API_FUZZ_LEXBOR_ORACLE` when the
+binary is not at the default build path above. Replays preserve the selected
+oracle and binary path.
+
+The binary records the resolved Lexbor commit in its JSON metadata, even when
+building from a moving ref such as `master`.
+
+Use a different checkout or commit when bisecting upstream behavior:
+
+```sh
+LEXBOR_SOURCE_DIR=/path/to/lexbor \
+LEXBOR_COMMIT=481c444261a132190a3fb746d6d2f60824af3717 \
+tools/html-api-fuzz/oracles/lexbor/build.sh
+```
+
+Direct CLI examples:
+
+```sh
+tools/html-api-fuzz/oracles/lexbor/build/lexbor-tree-oracle \
+  --mode full-document \
+  --max-nodes 3000 \
+  --input /path/to/input.bin
+
+tools/html-api-fuzz/oracles/lexbor/build/lexbor-tree-oracle \
+  --mode fragment-body \
+  --context body \
+  --max-nodes 3000 \
+  --input /path/to/input.bin
+```
+
+The oracle returns JSON with `status`, `oracle` metadata, `tree`,
+`treeBase64`, and `nodeCount`. The `treeBase64` field is the exact
+html5lib-style tree bytes consumed by the PHP adapter; `tree` is the same tree
+as a JSON-safe display string. Neither field is serialized HTML.
diff --git a/tools/html-api-fuzz/oracles/lexbor/build.sh b/tools/html-api-fuzz/oracles/lexbor/build.sh
new file mode 100755
index 0000000000000..b7dd09f5f19d9
--- /dev/null
+++ b/tools/html-api-fuzz/oracles/lexbor/build.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env sh
+set -eu
+
+ref="${LEXBOR_COMMIT:-master}"
+script_dir="$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)"
+repo_root="$(CDPATH= cd -- "$script_dir/../../../.." && pwd)"
+cache_dir="${LEXBOR_CACHE_DIR:-$repo_root/.cache/lexbor/$ref}"
+source_dir="${LEXBOR_SOURCE_DIR:-$cache_dir/source}"
+build_dir="${LEXBOR_BUILD_DIR:-$cache_dir/build}"
+install_dir="${LEXBOR_INSTALL_DIR:-$cache_dir/install}"
+oracle_build_dir="$script_dir/build"
+oracle_bin="$oracle_build_dir/lexbor-tree-oracle"
+
+if [ ! -d "$source_dir/.git" ]; then
+	mkdir -p "$(dirname "$source_dir")"
+	git clone https://github.com/lexbor/lexbor.git "$source_dir"
+fi
+
+git -C "$source_dir" fetch --tags origin
+
+checkout_ref="$ref"
+if git -C "$source_dir" rev-parse --verify --quiet "origin/$ref^{commit}" >/dev/null; then
+	checkout_ref="origin/$ref"
+fi
+git -C "$source_dir" checkout --detach "$checkout_ref"
+commit="$(git -C "$source_dir" rev-parse HEAD)"
+
+cmake -S "$source_dir" -B "$build_dir" \
+	-DLEXBOR_BUILD_SHARED=OFF \
+	-DLEXBOR_BUILD_STATIC=ON \
+	-DLEXBOR_BUILD_SEPARATELY=OFF \
+	-DLEXBOR_BUILD_EXAMPLES=OFF \
+	-DLEXBOR_BUILD_TESTS=OFF \
+	-DLEXBOR_BUILD_UTILS=OFF \
+	-DCMAKE_INSTALL_PREFIX="$install_dir"
+
+cmake --build "$build_dir" --target lexbor_static
+cmake --install "$build_dir" --prefix "$install_dir"
+
+mkdir -p "$oracle_build_dir"
+
+cc ${CFLAGS:-} \
+	-std=c99 \
+	-Wall \
+	-Wextra \
+	-Werror \
+	-I"$install_dir/include" \
+	-DHTML_API_FUZZ_LEXBOR_COMMIT="\"$commit\"" \
+	"$script_dir/lexbor-tree-oracle.c" \
+	"$install_dir/lib/liblexbor_static.a" \
+	-o "$oracle_bin" \
+	${LDFLAGS:-}
+
+printf '%s\n' "$oracle_bin"
diff --git a/tools/html-api-fuzz/oracles/lexbor/lexbor-tree-oracle.c b/tools/html-api-fuzz/oracles/lexbor/lexbor-tree-oracle.c
new file mode 100644
index 0000000000000..701e25fc791b6
--- /dev/null
+++ b/tools/html-api-fuzz/oracles/lexbor/lexbor-tree-oracle.c
@@ -0,0 +1,1243 @@
+/*
+ * Source-built Lexbor tree oracle for the HTML API fuzzer.
+ *
+ * Parses one input with Lexbor and emits a JSON result whose "tree" field uses
+ * the same html5lib-style text format as HtmlApiFuzz\TreeRenderer.
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <lexbor/dom/interfaces/character_data.h>
+#include <lexbor/dom/interfaces/document_type.h>
+#include <lexbor/html/html.h>
+#include <lexbor/ns/const.h>
+
+#ifndef HTML_API_FUZZ_LEXBOR_COMMIT
+#define HTML_API_FUZZ_LEXBOR_COMMIT "unknown"
+#endif
+
+typedef struct {
+	char *data;
+	size_t length;
+	size_t capacity;
+	bool failed;
+} buffer_t;
+
+typedef struct {
+	char *sort_name;
+	char *render_name;
+	char *value;
+} attr_record_t;
+
+typedef enum {
+	ORACLE_OK,
+	ORACLE_UNSUPPORTED,
+	ORACLE_ERROR,
+} oracle_status_t;
+
+typedef struct {
+	oracle_status_t status;
+	const char *failure_class;
+	const char *message;
+	buffer_t tree;
+	size_t node_count;
+	size_t max_nodes;
+} render_ctx_t;
+
+typedef struct {
+	const char *mode;
+	const char *context;
+	const char *input_path;
+	size_t max_nodes;
+	bool show_help;
+	bool show_version;
+} cli_options_t;
+
+static void buffer_init(buffer_t *buf);
+static void buffer_destroy(buffer_t *buf);
+static bool buffer_reserve(buffer_t *buf, size_t extra);
+static bool buffer_append_mem(buffer_t *buf, const char *data, size_t len);
+static bool buffer_append_cstr(buffer_t *buf, const char *data);
+static bool buffer_append_char(buffer_t *buf, char ch);
+static bool buffer_append_repeat(buffer_t *buf, const char *data, size_t len, size_t count);
+static char *buffer_take_cstr(buffer_t *buf);
+static bool append_escaped_scalar(buffer_t *buf, const lxb_char_t *data, size_t len, bool scrub);
+static bool append_json_string(buffer_t *buf, const char *data, size_t len);
+static bool append_json_base64(buffer_t *buf, const char *data, size_t len);
+static bool append_tree_line_indent(buffer_t *buf, int indent_level);
+static bool append_display_element_name(buffer_t *buf, lxb_dom_element_t *element);
+static bool append_escaped_display_element_name(buffer_t *buf, lxb_dom_element_t *element);
+static bool append_display_attribute_name(buffer_t *buf, lxb_dom_attr_t *attr);
+static int compare_attr_records(const void *a_ptr, const void *b_ptr);
+static bool render_attributes(render_ctx_t *ctx, lxb_dom_element_t *element, int indent_level);
+static void destroy_attr_records(attr_record_t *records, size_t count);
+static void render_node(render_ctx_t *ctx, lxb_dom_node_t *node, int indent_level);
+static void render_children(render_ctx_t *ctx, lxb_dom_node_t *first, int indent_level);
+static bool read_file(const char *path, lxb_char_t **data, size_t *len, const char **message);
+static bool parse_size(const char *value, size_t *out);
+static bool parse_args(int argc, char **argv, cli_options_t *options, const char **message);
+static void print_usage(FILE *stream);
+static void print_version(void);
+static void print_result(render_ctx_t *ctx);
+static void print_cli_error(const char *message);
+static bool context_to_tag(const char *context, lxb_tag_id_t *tag_id, lxb_ns_id_t *ns_id);
+static void render_full_document(render_ctx_t *ctx, const lxb_char_t *input, size_t input_len);
+static void render_fragment(render_ctx_t *ctx, const lxb_char_t *input, size_t input_len, const char *context);
+
+static void
+buffer_init(buffer_t *buf)
+{
+	buf->data = NULL;
+	buf->length = 0;
+	buf->capacity = 0;
+	buf->failed = false;
+}
+
+static void
+buffer_destroy(buffer_t *buf)
+{
+	free(buf->data);
+	buffer_init(buf);
+}
+
+static bool
+buffer_reserve(buffer_t *buf, size_t extra)
+{
+	size_t needed;
+	size_t next_capacity;
+	char *next;
+
+	if (buf->failed) {
+		return false;
+	}
+
+	if (extra > SIZE_MAX - buf->length - 1) {
+		buf->failed = true;
+		return false;
+	}
+
+	needed = buf->length + extra + 1;
+	if (needed <= buf->capacity) {
+		return true;
+	}
+
+	next_capacity = buf->capacity == 0 ? 256 : buf->capacity;
+	while (next_capacity < needed) {
+		if (next_capacity > SIZE_MAX / 2) {
+			next_capacity = needed;
+			break;
+		}
+		next_capacity *= 2;
+	}
+
+	next = (char *) realloc(buf->data, next_capacity);
+	if (next == NULL) {
+		buf->failed = true;
+		return false;
+	}
+
+	buf->data = next;
+	buf->capacity = next_capacity;
+	buf->data[buf->length] = '\0';
+	return true;
+}
+
+static bool
+buffer_append_mem(buffer_t *buf, const char *data, size_t len)
+{
+	if (!buffer_reserve(buf, len)) {
+		return false;
+	}
+
+	if (len > 0) {
+		memcpy(buf->data + buf->length, data, len);
+		buf->length += len;
+	}
+
+	buf->data[buf->length] = '\0';
+	return true;
+}
+
+static bool
+buffer_append_cstr(buffer_t *buf, const char *data)
+{
+	return buffer_append_mem(buf, data, strlen(data));
+}
+
+static bool
+buffer_append_char(buffer_t *buf, char ch)
+{
+	return buffer_append_mem(buf, &ch, 1);
+}
+
+static bool
+buffer_append_repeat(buffer_t *buf, const char *data, size_t len, size_t count)
+{
+	size_t i;
+
+	for (i = 0; i < count; i++) {
+		if (!buffer_append_mem(buf, data, len)) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static char *
+buffer_take_cstr(buffer_t *buf)
+{
+	char *data;
+
+	if (!buffer_reserve(buf, 0)) {
+		return NULL;
+	}
+
+	data = buf->data;
+	buf->data = NULL;
+	buf->length = 0;
+	buf->capacity = 0;
+	return data;
+}
+
+static bool
+append_escaped_byte(buffer_t *buf, unsigned char byte)
+{
+	char hex[5];
+
+	switch (byte) {
+		case '\n':
+			return buffer_append_cstr(buf, "\\n");
+		case '\r':
+			return buffer_append_cstr(buf, "\\r");
+		case '\t':
+			return buffer_append_cstr(buf, "\\t");
+		case '\0':
+			return buffer_append_cstr(buf, "\\0");
+		case '\\':
+			return buffer_append_cstr(buf, "\\\\");
+		case '"':
+			return buffer_append_cstr(buf, "\\\"");
+		default:
+			if (byte < 0x20 || byte == 0x7f) {
+				snprintf(hex, sizeof(hex), "\\x%02X", byte);
+				return buffer_append_cstr(buf, hex);
+			}
+			return buffer_append_char(buf, (char) byte);
+	}
+}
+
+static bool
+append_escaped_scalar(buffer_t *buf, const lxb_char_t *data, size_t len, bool scrub)
+{
+	size_t i;
+	static const char replacement[] = "\xEF\xBF\xBD";
+
+	if (data == NULL) {
+		len = 0;
+	}
+
+	for (i = 0; i < len; i++) {
+		unsigned char byte = (unsigned char) data[i];
+
+		if (scrub) {
+			if (byte == '\0') {
+				if (!buffer_append_mem(buf, replacement, sizeof(replacement) - 1)) {
+					return false;
+				}
+				continue;
+			}
+
+			if (byte == '\r') {
+				if (i + 1 < len && data[i + 1] == '\n') {
+					i++;
+				}
+				byte = '\n';
+			}
+		}
+
+		if (!append_escaped_byte(buf, byte)) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static bool
+append_json_string(buffer_t *buf, const char *data, size_t len)
+{
+	size_t i;
+	char hex[7];
+	static const char replacement[] = "\\uFFFD";
+
+	if (!buffer_append_char(buf, '"')) {
+		return false;
+	}
+
+	for (i = 0; i < len; i++) {
+		unsigned char byte = (unsigned char) data[i];
+
+		switch (byte) {
+			case '"':
+				if (!buffer_append_cstr(buf, "\\\"")) {
+					return false;
+				}
+				break;
+			case '\\':
+				if (!buffer_append_cstr(buf, "\\\\")) {
+					return false;
+				}
+				break;
+			case '\b':
+				if (!buffer_append_cstr(buf, "\\b")) {
+					return false;
+				}
+				break;
+			case '\f':
+				if (!buffer_append_cstr(buf, "\\f")) {
+					return false;
+				}
+				break;
+			case '\n':
+				if (!buffer_append_cstr(buf, "\\n")) {
+					return false;
+				}
+				break;
+			case '\r':
+				if (!buffer_append_cstr(buf, "\\r")) {
+					return false;
+				}
+				break;
+			case '\t':
+				if (!buffer_append_cstr(buf, "\\t")) {
+					return false;
+				}
+				break;
+			default:
+				if (byte < 0x20) {
+					snprintf(hex, sizeof(hex), "\\u%04X", byte);
+					if (!buffer_append_cstr(buf, hex)) {
+						return false;
+					}
+				} else if (byte < 0x80) {
+					if (!buffer_append_char(buf, (char) byte)) {
+						return false;
+					}
+				} else {
+					size_t sequence_len = 0;
+					bool valid = false;
+
+					if (byte >= 0xC2 && byte <= 0xDF) {
+						sequence_len = 2;
+					} else if (byte >= 0xE0 && byte <= 0xEF) {
+						sequence_len = 3;
+					} else if (byte >= 0xF0 && byte <= 0xF4) {
+						sequence_len = 4;
+					}
+
+					if (sequence_len > 0 && i + sequence_len <= len) {
+						unsigned char b1 = sequence_len > 1 ? (unsigned char) data[i + 1] : 0;
+						unsigned char b2 = sequence_len > 2 ? (unsigned char) data[i + 2] : 0;
+						unsigned char b3 = sequence_len > 3 ? (unsigned char) data[i + 3] : 0;
+						valid = true;
+						if (sequence_len >= 2 && (b1 < 0x80 || b1 > 0xBF)) {
+							valid = false;
+						}
+						if (sequence_len >= 3 && (b2 < 0x80 || b2 > 0xBF)) {
+							valid = false;
+						}
+						if (sequence_len >= 4 && (b3 < 0x80 || b3 > 0xBF)) {
+							valid = false;
+						}
+						if (byte == 0xE0 && b1 < 0xA0) {
+							valid = false;
+						}
+						if (byte == 0xED && b1 > 0x9F) {
+							valid = false;
+						}
+						if (byte == 0xF0 && b1 < 0x90) {
+							valid = false;
+						}
+						if (byte == 0xF4 && b1 > 0x8F) {
+							valid = false;
+						}
+					}
+
+					if (valid) {
+						if (!buffer_append_mem(buf, data + i, sequence_len)) {
+							return false;
+						}
+						i += sequence_len - 1;
+					} else if (!buffer_append_cstr(buf, replacement)) {
+						return false;
+					}
+				}
+				break;
+		}
+	}
+
+	return buffer_append_char(buf, '"');
+}
+
+static bool
+append_json_base64(buffer_t *buf, const char *data, size_t len)
+{
+	static const char alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+	size_t i;
+
+	if (!buffer_append_char(buf, '"')) {
+		return false;
+	}
+
+	for (i = 0; i < len; i += 3) {
+		unsigned int b0 = (unsigned char) data[i];
+		unsigned int b1 = i + 1 < len ? (unsigned char) data[i + 1] : 0;
+		unsigned int b2 = i + 2 < len ? (unsigned char) data[i + 2] : 0;
+		char encoded[4];
+
+		encoded[0] = alphabet[b0 >> 2];
+		encoded[1] = alphabet[((b0 & 0x03) << 4) | (b1 >> 4)];
+		encoded[2] = i + 1 < len ? alphabet[((b1 & 0x0F) << 2) | (b2 >> 6)] : '=';
+		encoded[3] = i + 2 < len ? alphabet[b2 & 0x3F] : '=';
+
+		if (!buffer_append_mem(buf, encoded, sizeof(encoded))) {
+			return false;
+		}
+	}
+
+	return buffer_append_char(buf, '"');
+}
+
+static bool
+append_tree_line_indent(buffer_t *buf, int indent_level)
+{
+	return buffer_append_repeat(buf, "  ", 2, (size_t) indent_level);
+}
+
+static bool
+append_ascii_lower(buffer_t *buf, const lxb_char_t *data, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		unsigned char byte = (unsigned char) data[i];
+		if (byte >= 'A' && byte <= 'Z') {
+			byte = (unsigned char) tolower(byte);
+		}
+		if (!buffer_append_char(buf, (char) byte)) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static bool
+append_display_element_name(buffer_t *buf, lxb_dom_element_t *element)
+{
+	size_t len = 0;
+	const lxb_char_t *name;
+	lxb_ns_id_t ns = lxb_dom_element_ns_id(element);
+
+	name = lxb_dom_element_local_name(element, &len);
+	if (ns == LXB_NS_HTML) {
+		return append_ascii_lower(buf, name, len);
+	}
+
+	if (ns == LXB_NS_SVG) {
+		name = lxb_dom_element_qualified_name(element, &len);
+		return buffer_append_cstr(buf, "svg ") && buffer_append_mem(buf, (const char *) name, len);
+	}
+
+	if (ns == LXB_NS_MATH) {
+		return buffer_append_cstr(buf, "math ") && buffer_append_mem(buf, (const char *) name, len);
+	}
+
+	name = lxb_dom_element_qualified_name(element, &len);
+	return buffer_append_mem(buf, (const char *) name, len);
+}
+
+static bool
+append_escaped_display_element_name(buffer_t *buf, lxb_dom_element_t *element)
+{
+	buffer_t display;
+	bool ok;
+
+	buffer_init(&display);
+	ok = append_display_element_name(&display, element)
+		&& append_escaped_scalar(buf, (const lxb_char_t *) display.data, display.length, false);
+	buffer_destroy(&display);
+
+	return ok;
+}
+
+static bool
+append_display_attribute_name(buffer_t *buf, lxb_dom_attr_t *attr)
+{
+	size_t len = 0;
+	const lxb_char_t *name;
+	lxb_ns_id_t ns = (lxb_ns_id_t) lxb_dom_interface_node(attr)->ns;
+
+	if (ns == LXB_NS_XLINK) {
+		name = lxb_dom_attr_local_name(attr, &len);
+		return buffer_append_cstr(buf, "xlink ") && buffer_append_mem(buf, (const char *) name, len);
+	}
+
+	if (ns == LXB_NS_XML) {
+		name = lxb_dom_attr_local_name(attr, &len);
+		return buffer_append_cstr(buf, "xml ") && buffer_append_mem(buf, (const char *) name, len);
+	}
+
+	if (ns == LXB_NS_XMLNS) {
+		name = lxb_dom_attr_local_name(attr, &len);
+		return buffer_append_cstr(buf, "xmlns ") && buffer_append_mem(buf, (const char *) name, len);
+	}
+
+	name = lxb_dom_attr_qualified_name(attr, &len);
+	return buffer_append_mem(buf, (const char *) name, len);
+}
+
+static int
+compare_display_names(const char *a, const char *b)
+{
+	bool a_has_colon = strchr(a, ':') != NULL;
+	bool b_has_colon = strchr(b, ':') != NULL;
+	bool a_has_space = strchr(a, ' ') != NULL;
+	bool b_has_space = strchr(b, ' ') != NULL;
+	int compared;
+
+	if (a_has_colon != b_has_colon) {
+		return a_has_colon ? 1 : -1;
+	}
+
+	if (a_has_space != b_has_space) {
+		return a_has_space ? 1 : -1;
+	}
+
+	compared = strcmp(a, b);
+	if (compared < 0) {
+		return -1;
+	}
+	if (compared > 0) {
+		return 1;
+	}
+	return 0;
+}
+
+static int
+compare_attr_records(const void *a_ptr, const void *b_ptr)
+{
+	const attr_record_t *a = (const attr_record_t *) a_ptr;
+	const attr_record_t *b = (const attr_record_t *) b_ptr;
+	int compared = compare_display_names(a->sort_name, b->sort_name);
+
+	if (compared != 0) {
+		return compared;
+	}
+
+	return compare_display_names(a->render_name, b->render_name);
+}
+
+static bool
+render_attributes(render_ctx_t *ctx, lxb_dom_element_t *element, int indent_level)
+{
+	lxb_dom_attr_t *attr;
+	attr_record_t *records = NULL;
+	size_t count = 0;
+	size_t index = 0;
+	size_t i;
+	bool ok = false;
+
+	for (attr = lxb_dom_element_first_attribute(element); attr != NULL; attr = lxb_dom_element_next_attribute(attr)) {
+		count++;
+	}
+
+	if (count == 0) {
+		return true;
+	}
+
+	records = (attr_record_t *) calloc(count, sizeof(attr_record_t));
+	if (records == NULL) {
+		ctx->status = ORACLE_ERROR;
+		ctx->failure_class = "oracle-renderer-error";
+		ctx->message = "Could not allocate attribute records.";
+		return false;
+	}
+
+	for (attr = lxb_dom_element_first_attribute(element); attr != NULL; attr = lxb_dom_element_next_attribute(attr)) {
+		buffer_t display;
+		buffer_t sort;
+		buffer_t render;
+		buffer_t value;
+		size_t value_len = 0;
+		const lxb_char_t *value_data;
+
+		buffer_init(&display);
+		buffer_init(&sort);
+		buffer_init(&render);
+		buffer_init(&value);
+
+		value_data = lxb_dom_attr_value(attr, &value_len);
+		if (
+			!append_display_attribute_name(&display, attr) ||
+			!append_escaped_scalar(&sort, (const lxb_char_t *) display.data, display.length, true) ||
+			!append_escaped_scalar(&render, (const lxb_char_t *) display.data, display.length, false) ||
+			!append_escaped_scalar(&value, value_data, value_len, false)
+		) {
+			buffer_destroy(&display);
+			buffer_destroy(&sort);
+			buffer_destroy(&render);
+			buffer_destroy(&value);
+			ctx->status = ORACLE_ERROR;
+			ctx->failure_class = "oracle-renderer-error";
+			ctx->message = "Could not render attributes.";
+			goto cleanup;
+		}
+
+		records[index].sort_name = buffer_take_cstr(&sort);
+		records[index].render_name = buffer_take_cstr(&render);
+		records[index].value = buffer_take_cstr(&value);
+
+		buffer_destroy(&display);
+		buffer_destroy(&sort);
+		buffer_destroy(&render);
+		buffer_destroy(&value);
+
+		if (records[index].sort_name == NULL || records[index].render_name == NULL || records[index].value == NULL) {
+			ctx->status = ORACLE_ERROR;
+			ctx->failure_class = "oracle-renderer-error";
+			ctx->message = "Could not store attribute records.";
+			goto cleanup;
+		}
+
+		index++;
+	}
+
+	qsort(records, count, sizeof(attr_record_t), compare_attr_records);
+
+	for (i = 0; i < count; i++) {
+		if (
+			!append_tree_line_indent(&ctx->tree, indent_level) ||
+			!buffer_append_cstr(&ctx->tree, records[i].render_name) ||
+			!buffer_append_cstr(&ctx->tree, "=\"") ||
+			!buffer_append_cstr(&ctx->tree, records[i].value) ||
+			!buffer_append_cstr(&ctx->tree, "\"\n")
+		) {
+			ctx->status = ORACLE_ERROR;
+			ctx->failure_class = "oracle-renderer-error";
+			ctx->message = "Could not append attribute lines.";
+			goto cleanup;
+		}
+	}
+
+	ok = true;
+
+cleanup:
+	destroy_attr_records(records, count);
+	return ok;
+}
+
+static void
+destroy_attr_records(attr_record_t *records, size_t count)
+{
+	size_t i;
+
+	if (records == NULL) {
+		return;
+	}
+
+	for (i = 0; i < count; i++) {
+		free(records[i].sort_name);
+		free(records[i].render_name);
+		free(records[i].value);
+	}
+
+	free(records);
+}
+
+static bool
+increment_node_count(render_ctx_t *ctx)
+{
+	ctx->node_count++;
+	if (ctx->node_count > ctx->max_nodes) {
+		ctx->status = ORACLE_ERROR;
+		ctx->failure_class = "node-limit-exceeded";
+		ctx->message = "DOM node limit exceeded.";
+		return false;
+	}
+
+	return true;
+}
+
+static void
+render_node(render_ctx_t *ctx, lxb_dom_node_t *node, int indent_level)
+{
+	if (ctx->status != ORACLE_OK || node == NULL) {
+		return;
+	}
+
+	if (!increment_node_count(ctx)) {
+		return;
+	}
+
+	switch (node->type) {
+		case LXB_DOM_NODE_TYPE_DOCUMENT_TYPE: {
+			lxb_dom_document_type_t *doctype = lxb_dom_interface_document_type(node);
+			size_t name_len = 0;
+			size_t public_len = 0;
+			size_t system_len = 0;
+			const lxb_char_t *name = lxb_dom_document_type_name(doctype, &name_len);
+			const lxb_char_t *public_id = lxb_dom_document_type_public_id(doctype, &public_len);
+			const lxb_char_t *system_id = lxb_dom_document_type_system_id(doctype, &system_len);
+
+			if (
+				!buffer_append_cstr(&ctx->tree, "<!DOCTYPE ") ||
+				!append_escaped_scalar(&ctx->tree, name, name_len, false)
+			) {
+				ctx->status = ORACLE_ERROR;
+				ctx->failure_class = "oracle-renderer-error";
+				ctx->message = "Could not render doctype.";
+				return;
+			}
+
+			if (public_len > 0 || system_len > 0) {
+				if (
+					!buffer_append_cstr(&ctx->tree, " \"") ||
+					!append_escaped_scalar(&ctx->tree, public_id, public_len, false) ||
+					!buffer_append_cstr(&ctx->tree, "\" \"") ||
+					!append_escaped_scalar(&ctx->tree, system_id, system_len, false) ||
+					!buffer_append_char(&ctx->tree, '"')
+				) {
+					ctx->status = ORACLE_ERROR;
+					ctx->failure_class = "oracle-renderer-error";
+					ctx->message = "Could not render doctype identifiers.";
+					return;
+				}
+			}
+
+			if (!buffer_append_cstr(&ctx->tree, ">\n")) {
+				ctx->status = ORACLE_ERROR;
+				ctx->failure_class = "oracle-renderer-error";
+				ctx->message = "Could not finish doctype.";
+			}
+			return;
+		}
+
+		case LXB_DOM_NODE_TYPE_ELEMENT: {
+			lxb_dom_element_t *element = lxb_dom_interface_element(node);
+
+			if (
+				!append_tree_line_indent(&ctx->tree, indent_level) ||
+				!buffer_append_char(&ctx->tree, '<') ||
+				!append_escaped_display_element_name(&ctx->tree, element) ||
+				!buffer_append_cstr(&ctx->tree, ">\n") ||
+				!render_attributes(ctx, element, indent_level + 1)
+			) {
+				if (ctx->status == ORACLE_OK) {
+					ctx->status = ORACLE_ERROR;
+					ctx->failure_class = "oracle-renderer-error";
+					ctx->message = "Could not render element.";
+				}
+				return;
+			}
+
+			if (node->local_name == LXB_TAG_TEMPLATE && node->ns == LXB_NS_HTML) {
+				lxb_html_template_element_t *template_element = lxb_html_interface_template(node);
+				if (!append_tree_line_indent(&ctx->tree, indent_level + 1) || !buffer_append_cstr(&ctx->tree, "content\n")) {
+					ctx->status = ORACLE_ERROR;
+					ctx->failure_class = "oracle-renderer-error";
+					ctx->message = "Could not render template content marker.";
+					return;
+				}
+				if (template_element->content != NULL) {
+					render_children(ctx, template_element->content->node.first_child, indent_level + 2);
+				}
+				return;
+			}
+
+			render_children(ctx, node->first_child, indent_level + 1);
+			return;
+		}
+
+		case LXB_DOM_NODE_TYPE_TEXT:
+		case LXB_DOM_NODE_TYPE_CDATA_SECTION: {
+			lxb_dom_character_data_t *character_data = lxb_dom_interface_character_data(node);
+			if (character_data->data.length == 0) {
+				return;
+			}
+			if (
+				!append_tree_line_indent(&ctx->tree, indent_level) ||
+				!buffer_append_char(&ctx->tree, '"') ||
+				!append_escaped_scalar(&ctx->tree, character_data->data.data, character_data->data.length, false) ||
+				!buffer_append_cstr(&ctx->tree, "\"\n")
+			) {
+				ctx->status = ORACLE_ERROR;
+				ctx->failure_class = "oracle-renderer-error";
+				ctx->message = "Could not render text.";
+			}
+			return;
+		}
+
+		case LXB_DOM_NODE_TYPE_COMMENT: {
+			lxb_dom_character_data_t *character_data = lxb_dom_interface_character_data(node);
+			if (
+				!append_tree_line_indent(&ctx->tree, indent_level) ||
+				!buffer_append_cstr(&ctx->tree, "<!-- ") ||
+				!append_escaped_scalar(&ctx->tree, character_data->data.data, character_data->data.length, false) ||
+				!buffer_append_cstr(&ctx->tree, " -->\n")
+			) {
+				ctx->status = ORACLE_ERROR;
+				ctx->failure_class = "oracle-renderer-error";
+				ctx->message = "Could not render comment.";
+			}
+			return;
+		}
+
+		default:
+			return;
+	}
+}
+
+static void
+render_children(render_ctx_t *ctx, lxb_dom_node_t *first, int indent_level)
+{
+	lxb_dom_node_t *child;
+
+	for (child = first; child != NULL && ctx->status == ORACLE_OK; child = child->next) {
+		render_node(ctx, child, indent_level);
+	}
+}
+
+static bool
+read_file(const char *path, lxb_char_t **data, size_t *len, const char **message)
+{
+	FILE *file;
+	long size;
+	size_t read_len;
+	lxb_char_t *bytes;
+
+	file = fopen(path, "rb");
+	if (file == NULL) {
+		*message = strerror(errno);
+		return false;
+	}
+
+	if (fseek(file, 0, SEEK_END) != 0) {
+		fclose(file);
+		*message = "Could not seek input file.";
+		return false;
+	}
+
+	size = ftell(file);
+	if (size < 0) {
+		fclose(file);
+		*message = "Could not determine input size.";
+		return false;
+	}
+
+	if (fseek(file, 0, SEEK_SET) != 0) {
+		fclose(file);
+		*message = "Could not rewind input file.";
+		return false;
+	}
+
+	bytes = (lxb_char_t *) malloc((size_t) size + 1);
+	if (bytes == NULL) {
+		fclose(file);
+		*message = "Could not allocate input buffer.";
+		return false;
+	}
+
+	read_len = fread(bytes, 1, (size_t) size, file);
+	if (read_len != (size_t) size || ferror(file)) {
+		free(bytes);
+		fclose(file);
+		*message = "Could not read input file.";
+		return false;
+	}
+
+	fclose(file);
+	bytes[read_len] = '\0';
+	*data = bytes;
+	*len = read_len;
+	return true;
+}
+
+static bool
+parse_size(const char *value, size_t *out)
+{
+	char *end = NULL;
+	unsigned long parsed;
+
+	errno = 0;
+	parsed = strtoul(value, &end, 10);
+	if (errno != 0 || end == value || *end != '\0' || parsed == 0) {
+		return false;
+	}
+
+	*out = (size_t) parsed;
+	return true;
+}
+
+static bool
+parse_args(int argc, char **argv, cli_options_t *options, const char **message)
+{
+	int i;
+
+	options->mode = NULL;
+	options->context = "body";
+	options->input_path = NULL;
+	options->max_nodes = 3000;
+	options->show_help = false;
+	options->show_version = false;
+
+	for (i = 1; i < argc; i++) {
+		const char *arg = argv[i];
+
+		if (strcmp(arg, "--help") == 0 || strcmp(arg, "-h") == 0) {
+			options->show_help = true;
+			return true;
+		}
+		if (strcmp(arg, "--version") == 0) {
+			options->show_version = true;
+			return true;
+		}
+
+		if (i + 1 >= argc) {
+			*message = "Missing option value.";
+			return false;
+		}
+
+		if (strcmp(arg, "--mode") == 0) {
+			options->mode = argv[++i];
+		} else if (strcmp(arg, "--context") == 0) {
+			options->context = argv[++i];
+		} else if (strcmp(arg, "--input") == 0) {
+			options->input_path = argv[++i];
+		} else if (strcmp(arg, "--max-nodes") == 0) {
+			if (!parse_size(argv[++i], &options->max_nodes)) {
+				*message = "Expected --max-nodes to be a positive integer.";
+				return false;
+			}
+		} else {
+			*message = "Unknown option.";
+			return false;
+		}
+	}
+
+	if (options->mode == NULL) {
+		*message = "Missing --mode.";
+		return false;
+	}
+	if (strcmp(options->mode, "full-document") != 0 && strcmp(options->mode, "fragment-body") != 0) {
+		*message = "Expected --mode full-document or fragment-body.";
+		return false;
+	}
+	if (options->input_path == NULL) {
+		*message = "Missing --input.";
+		return false;
+	}
+
+	return true;
+}
+
+static void
+print_usage(FILE *stream)
+{
+	fprintf(
+		stream,
+		"Usage: lexbor-tree-oracle --mode full-document|fragment-body --input PATH [--context TAG] [--max-nodes N]\n"
+	);
+}
+
+static void
+print_version(void)
+{
+	printf(
+		"{\"status\":\"ok\",\"oracle\":{\"kind\":\"lexbor-source\",\"lexborCommit\":\"%s\",\"lexborVersion\":\"%s\"}}\n",
+		HTML_API_FUZZ_LEXBOR_COMMIT,
+		LXB_HTML_VERSION_STRING
+	);
+}
+
+static void
+print_result(render_ctx_t *ctx)
+{
+	buffer_t json;
+	const char *status_text = ctx->status == ORACLE_OK
+		? "ok"
+		: (ctx->status == ORACLE_UNSUPPORTED ? "unsupported" : "error");
+
+	buffer_init(&json);
+	buffer_append_cstr(&json, "{\n  \"status\": ");
+	append_json_string(&json, status_text, strlen(status_text));
+	buffer_append_cstr(&json, ",\n  \"oracle\": {\n    \"kind\": \"lexbor-source\",\n    \"lexborCommit\": ");
+	append_json_string(&json, HTML_API_FUZZ_LEXBOR_COMMIT, strlen(HTML_API_FUZZ_LEXBOR_COMMIT));
+	buffer_append_cstr(&json, ",\n    \"lexborVersion\": ");
+	append_json_string(&json, LXB_HTML_VERSION_STRING, strlen(LXB_HTML_VERSION_STRING));
+	buffer_append_cstr(&json, "\n  }");
+
+	if (ctx->status == ORACLE_OK) {
+		if (ctx->tree.length == 0) {
+			buffer_append_char(&ctx->tree, '\n');
+		} else {
+			if (ctx->tree.data[ctx->tree.length - 1] != '\n') {
+				buffer_append_char(&ctx->tree, '\n');
+			}
+			buffer_append_char(&ctx->tree, '\n');
+		}
+		buffer_append_cstr(&json, ",\n  \"tree\": ");
+		append_json_string(&json, ctx->tree.data == NULL ? "" : ctx->tree.data, ctx->tree.length);
+		buffer_append_cstr(&json, ",\n  \"treeBase64\": ");
+		append_json_base64(&json, ctx->tree.data == NULL ? "" : ctx->tree.data, ctx->tree.length);
+	}
+
+	buffer_append_cstr(&json, ",\n  \"nodeCount\": ");
+	{
+		char count[32];
+		snprintf(count, sizeof(count), "%zu", ctx->node_count);
+		buffer_append_cstr(&json, count);
+	}
+
+	if (ctx->failure_class != NULL) {
+		buffer_append_cstr(&json, ",\n  \"failureClass\": ");
+		append_json_string(&json, ctx->failure_class, strlen(ctx->failure_class));
+	}
+
+	if (ctx->message != NULL) {
+		const char *key = ctx->status == ORACLE_UNSUPPORTED ? "unsupported" : "error";
+		buffer_append_cstr(&json, ",\n  \"");
+		buffer_append_cstr(&json, key);
+		if (ctx->status == ORACLE_UNSUPPORTED) {
+			buffer_append_cstr(&json, "\": {\n    \"message\": ");
+			append_json_string(&json, ctx->message, strlen(ctx->message));
+			buffer_append_cstr(&json, "\n  }");
+		} else {
+			buffer_append_cstr(&json, "\": ");
+			append_json_string(&json, ctx->message, strlen(ctx->message));
+		}
+	}
+
+	buffer_append_cstr(&json, "\n}\n");
+
+	if (json.failed) {
+		fputs("{\"status\":\"error\",\"failureClass\":\"oracle-renderer-error\",\"error\":\"Could not encode JSON result.\"}\n", stdout);
+	} else {
+		fwrite(json.data, 1, json.length, stdout);
+	}
+
+	buffer_destroy(&json);
+}
+
+static void
+print_cli_error(const char *message)
+{
+	render_ctx_t ctx;
+
+	ctx.status = ORACLE_ERROR;
+	ctx.failure_class = "oracle-cli-error";
+	ctx.message = message;
+	ctx.node_count = 0;
+	ctx.max_nodes = 0;
+	buffer_init(&ctx.tree);
+	print_result(&ctx);
+	buffer_destroy(&ctx.tree);
+}
+
+static bool
+context_to_tag(const char *context, lxb_tag_id_t *tag_id, lxb_ns_id_t *ns_id)
+{
+	*ns_id = LXB_NS_HTML;
+
+	if (strcmp(context, "body") == 0) {
+		*tag_id = LXB_TAG_BODY;
+	} else if (strcmp(context, "div") == 0) {
+		*tag_id = LXB_TAG_DIV;
+	} else if (strcmp(context, "p") == 0) {
+		*tag_id = LXB_TAG_P;
+	} else if (strcmp(context, "td") == 0) {
+		*tag_id = LXB_TAG_TD;
+	} else if (strcmp(context, "tr") == 0) {
+		*tag_id = LXB_TAG_TR;
+	} else if (strcmp(context, "table") == 0) {
+		*tag_id = LXB_TAG_TABLE;
+	} else if (strcmp(context, "caption") == 0) {
+		*tag_id = LXB_TAG_CAPTION;
+	} else if (strcmp(context, "colgroup") == 0) {
+		*tag_id = LXB_TAG_COLGROUP;
+	} else if (strcmp(context, "select") == 0) {
+		*tag_id = LXB_TAG_SELECT;
+	} else if (strcmp(context, "option") == 0) {
+		*tag_id = LXB_TAG_OPTION;
+	} else if (strcmp(context, "template") == 0) {
+		*tag_id = LXB_TAG_TEMPLATE;
+	} else if (strcmp(context, "title") == 0) {
+		*tag_id = LXB_TAG_TITLE;
+	} else if (strcmp(context, "textarea") == 0) {
+		*tag_id = LXB_TAG_TEXTAREA;
+	} else if (strcmp(context, "script") == 0) {
+		*tag_id = LXB_TAG_SCRIPT;
+	} else if (strcmp(context, "style") == 0) {
+		*tag_id = LXB_TAG_STYLE;
+	} else if (strcmp(context, "svg") == 0) {
+		*tag_id = LXB_TAG_SVG;
+		*ns_id = LXB_NS_SVG;
+	} else if (strcmp(context, "math") == 0) {
+		*tag_id = LXB_TAG_MATH;
+		*ns_id = LXB_NS_MATH;
+	} else {
+		return false;
+	}
+
+	return true;
+}
+
+static void
+render_full_document(render_ctx_t *ctx, const lxb_char_t *input, size_t input_len)
+{
+	lxb_status_t status;
+	lxb_html_document_t *document = lxb_html_document_create();
+
+	if (document == NULL) {
+		ctx->status = ORACLE_ERROR;
+		ctx->failure_class = "oracle-renderer-error";
+		ctx->message = "Could not create Lexbor document.";
+		return;
+	}
+
+	status = lxb_html_document_parse(document, input, input_len);
+	if (status != LXB_STATUS_OK) {
+		lxb_html_document_destroy(document);
+		ctx->status = ORACLE_ERROR;
+		ctx->failure_class = "oracle-parse-error";
+		ctx->message = "Lexbor could not parse the input.";
+		return;
+	}
+
+	render_children(ctx, lxb_dom_interface_node(document)->first_child, 0);
+	lxb_html_document_destroy(document);
+}
+
+static void
+render_fragment(render_ctx_t *ctx, const lxb_char_t *input, size_t input_len, const char *context)
+{
+	lxb_status_t status;
+	lxb_html_parser_t *parser = NULL;
+	lxb_html_document_t *document = NULL;
+	lxb_dom_node_t *fragment = NULL;
+	lxb_tag_id_t tag_id;
+	lxb_ns_id_t ns_id;
+
+	if (!context_to_tag(context, &tag_id, &ns_id)) {
+		ctx->status = ORACLE_UNSUPPORTED;
+		ctx->failure_class = "oracle-unsupported";
+		ctx->message = "Unsupported fragment context.";
+		return;
+	}
+
+	parser = lxb_html_parser_create();
+	if (parser == NULL) {
+		ctx->status = ORACLE_ERROR;
+		ctx->failure_class = "oracle-renderer-error";
+		ctx->message = "Could not create Lexbor parser.";
+		return;
+	}
+
+	status = lxb_html_parser_init(parser);
+	if (status != LXB_STATUS_OK) {
+		lxb_html_parser_destroy(parser);
+		ctx->status = ORACLE_ERROR;
+		ctx->failure_class = "oracle-renderer-error";
+		ctx->message = "Could not initialize Lexbor parser.";
+		return;
+	}
+
+	document = lxb_html_document_create();
+	if (document == NULL) {
+		lxb_html_parser_destroy(parser);
+		ctx->status = ORACLE_ERROR;
+		ctx->failure_class = "oracle-renderer-error";
+		ctx->message = "Could not create Lexbor document.";
+		return;
+	}
+
+	fragment = lxb_html_parse_fragment_by_tag_id(parser, document, tag_id, ns_id, input, input_len);
+	if (fragment == NULL || lxb_html_parser_status(parser) != LXB_STATUS_OK) {
+		lxb_html_document_destroy(document);
+		lxb_html_parser_destroy(parser);
+		ctx->status = ORACLE_ERROR;
+		ctx->failure_class = "oracle-parse-error";
+		ctx->message = "Lexbor could not parse the fragment.";
+		return;
+	}
+
+	render_children(ctx, fragment->first_child, 0);
+	lxb_html_document_destroy(document);
+	lxb_html_parser_destroy(parser);
+}
+
+int
+main(int argc, char **argv)
+{
+	cli_options_t options;
+	const char *message = NULL;
+	lxb_char_t *input = NULL;
+	size_t input_len = 0;
+	render_ctx_t ctx;
+
+	if (!parse_args(argc, argv, &options, &message)) {
+		print_cli_error(message);
+		return EXIT_FAILURE;
+	}
+
+	if (options.show_help) {
+		print_usage(stdout);
+		return EXIT_SUCCESS;
+	}
+
+	if (options.show_version) {
+		print_version();
+		return EXIT_SUCCESS;
+	}
+
+	buffer_init(&ctx.tree);
+	ctx.status = ORACLE_OK;
+	ctx.failure_class = NULL;
+	ctx.message = NULL;
+	ctx.node_count = 0;
+	ctx.max_nodes = options.max_nodes;
+
+	if (!read_file(options.input_path, &input, &input_len, &message)) {
+		ctx.status = ORACLE_ERROR;
+		ctx.failure_class = "oracle-cli-error";
+		ctx.message = message;
+		print_result(&ctx);
+		buffer_destroy(&ctx.tree);
+		return EXIT_FAILURE;
+	}
+
+	if (strcmp(options.mode, "full-document") == 0) {
+		render_full_document(&ctx, input, input_len);
+	} else {
+		render_fragment(&ctx, input, input_len, options.context);
+	}
+
+	if (ctx.tree.failed && ctx.status == ORACLE_OK) {
+		ctx.status = ORACLE_ERROR;
+		ctx.failure_class = "oracle-renderer-error";
+		ctx.message = "Could not allocate tree output.";
+	}
+
+	print_result(&ctx);
+	free(input);
+	buffer_destroy(&ctx.tree);
+
+	return ctx.status == ORACLE_OK || ctx.status == ORACLE_UNSUPPORTED ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/tools/html-api-fuzz/replay.php b/tools/html-api-fuzz/replay.php
new file mode 100755
index 0000000000000..29ac6d1362e74
--- /dev/null
+++ b/tools/html-api-fuzz/replay.php
@@ -0,0 +1,142 @@
+#!/usr/bin/env php
+<?php
+require_once __DIR__ . '/lib/autoload.php';
+
+$options = \HtmlApiFuzz\parse_cli_options( $argv );
+$replay_path = \HtmlApiFuzz\option_string( $options, 'replay', $options['_'][0] ?? null );
+$store_path  = \HtmlApiFuzz\option_string( $options, 'store', null );
+if ( ( null === $replay_path && null === $store_path ) || \HtmlApiFuzz\option_bool( $options, 'help', false ) ) {
+	echo "Usage: php tools/html-api-fuzz/replay.php --replay path/to/replay.json [--output-dir DIR] [--payload-policy POLICY] [--dom-oracle php-dom|lexbor-source] [--lexbor-oracle-bin PATH]\n";
+	echo "       php tools/html-api-fuzz/replay.php --store path/to/results.sqlite (--id N|--seed N) [--output-dir DIR] [--payload-policy POLICY] [--dom-oracle php-dom|lexbor-source] [--lexbor-oracle-bin PATH]\n";
+	echo "The --store form reproduces a failure whose seed directory was pruned, from the replay stored in the lane's results.sqlite.\n";
+	exit( ( null === $replay_path && null === $store_path ) ? 1 : 0 );
+}
+
+if ( null !== $store_path ) {
+	// Materialize the stored replay as a file and proceed exactly as if it
+	// had been read from a retained seed directory.
+	$store_id   = \HtmlApiFuzz\option_int( $options, 'id', -1 );
+	$store_seed = \HtmlApiFuzz\option_int( $options, 'seed', -1 );
+	if ( $store_id < 0 && $store_seed < 0 ) {
+		fwrite( STDERR, "The --store form requires --id N or --seed N.\n" );
+		exit( 1 );
+	}
+	try {
+		$store        = new \HtmlApiFuzz\ResultStore( $store_path, true );
+		$store_replay = $store_id >= 0 ? $store->replay_for_attempt_id( $store_id ) : $store->replay_for_seed( $store_seed );
+		$store->close();
+	} catch ( \Throwable $e ) {
+		fwrite( STDERR, "Could not read store {$store_path}: {$e->getMessage()}\n" );
+		exit( 1 );
+	}
+	if ( null === $store_replay ) {
+		fwrite( STDERR, ( $store_id >= 0 ? "No stored replay for id {$store_id}" : "No stored replay for seed {$store_seed}" ) . " in {$store_path}.\n" );
+		exit( 1 );
+	}
+	$store_label = $store_id >= 0 ? 'id-' . $store_id : 'seed-' . $store_seed;
+	$replay_dir  = \HtmlApiFuzz\option_string( $options, 'output-dir', dirname( $store_path ) . '/replay-' . $store_label . '-' . \HtmlApiFuzz\timestamp() );
+	\HtmlApiFuzz\ensure_dir( $replay_dir );
+	$replay_path = $replay_dir . '/source-replay.json';
+	\HtmlApiFuzz\write_json_file( $replay_path, $store_replay );
+	$options['output-dir'] = $replay_dir;
+}
+
+$replay = \HtmlApiFuzz\read_json_file( $replay_path );
+if ( ! $replay || ! array_key_exists( 'inputBase64', $replay ) ) {
+	fwrite( STDERR, "Invalid replay file: {$replay_path}\n" );
+	exit( 1 );
+}
+
+$output_dir = \HtmlApiFuzz\option_string( $options, 'output-dir', dirname( $replay_path ) . '/replay-' . \HtmlApiFuzz\timestamp() );
+$input      = base64_decode( $replay['inputBase64'], true );
+if ( false === $input ) {
+	fwrite( STDERR, "Invalid base64 input in replay file: {$replay_path}\n" );
+	exit( 1 );
+}
+\HtmlApiFuzz\ensure_dir( $output_dir );
+$input_path = $output_dir . '/input.bin';
+file_put_contents( $input_path, $input );
+$payload_policy = \HtmlApiFuzz\option_string( $options, 'payload-policy', null );
+if ( null === $payload_policy ) {
+	$payload_policy = \HtmlApiFuzz\normalize_payload_policy_label( $replay['payloadPolicy'] ?? null )
+		?? \HtmlApiFuzz\normalize_payload_policy_label( $replay['generator']['payloadPolicy'] ?? null );
+}
+$original_generator = is_array( $replay['generator'] ?? null ) ? $replay['generator'] : ( $replay['originalGenerator'] ?? null );
+$source_replay = \HtmlApiFuzz\replay_source_metadata( $replay_path, $replay );
+$git_metadata_base64 = \HtmlApiFuzz\git_metadata_base64( \HtmlApiFuzz\git_metadata() );
+$oracle_options = $options;
+if ( null === \HtmlApiFuzz\option_string( $oracle_options, 'dom-oracle', null ) ) {
+	$oracle_options['dom-oracle'] = $replay['options']['domOracle'] ?? $replay['oracle']['kind'] ?? \HtmlApiFuzz\OracleRenderer::KIND_PHP_DOM;
+}
+if ( null === \HtmlApiFuzz\option_string( $oracle_options, 'lexbor-oracle-bin', null ) && is_string( $replay['options']['lexborOracleBin'] ?? null ) ) {
+	$oracle_options['lexbor-oracle-bin'] = $replay['options']['lexborOracleBin'];
+}
+$stored_oracle_timeout_ms = $replay['options']['oracleTimeoutMs'] ?? null;
+if ( null === \HtmlApiFuzz\option_string( $oracle_options, 'oracle-timeout-ms', null ) && is_numeric( $stored_oracle_timeout_ms ) ) {
+	$oracle_options['oracle-timeout-ms'] = (string) (int) $stored_oracle_timeout_ms;
+}
+$oracle_renderer    = \HtmlApiFuzz\OracleRenderer::from_options( $oracle_options );
+$oracle_worker_args = $oracle_renderer->worker_args();
+
+$args = array(
+	__DIR__ . '/worker.php',
+	'--input-file',
+	$input_path,
+	'--mode',
+	$replay['mode'] ?? \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+	'--profile',
+	$replay['profile'] ?? 'replay',
+	'--seed',
+	(string) ( $replay['seed'] ?? 1 ),
+	'--output-dir',
+	$output_dir,
+	'--max-tokens',
+	(string) \HtmlApiFuzz\option_int( $options, 'max-tokens', (int) ( $replay['limits']['maxTokens'] ?? 2000 ) ),
+	'--max-nodes',
+	(string) \HtmlApiFuzz\option_int( $options, 'max-nodes', (int) ( $replay['limits']['maxNodes'] ?? 3000 ) ),
+	'--git-metadata-base64',
+	$git_metadata_base64,
+);
+if ( null !== $payload_policy ) {
+	$args[] = '--payload-policy';
+	$args[] = $payload_policy;
+}
+$fragment_context = $replay['fragmentContext'] ?? null;
+if ( is_string( $fragment_context ) && 'body' !== $fragment_context ) {
+	$args[] = '--fragment-context';
+	$args[] = $fragment_context;
+}
+if ( \HtmlApiFuzz\option_bool( $options, 'fail-unsupported', (bool) ( $replay['options']['failUnsupported'] ?? false ) ) ) {
+	$args[] = '--fail-unsupported';
+}
+foreach ( $oracle_worker_args as $arg ) {
+	$args[] = $arg;
+}
+
+$proc = \HtmlApiFuzz\run_php_process( $args, \HtmlApiFuzz\repo_root(), \HtmlApiFuzz\option_int( $options, 'timeout-ms', 2500 ), $output_dir . '/worker.log' );
+$result = \HtmlApiFuzz\read_json_file( $output_dir . '/result.json' );
+$output_replay = \HtmlApiFuzz\read_json_file( $output_dir . '/replay.json' );
+if ( is_array( $output_replay ) && is_array( $original_generator ) ) {
+	$output_replay['originalGenerator'] = $original_generator;
+}
+if ( is_array( $output_replay ) ) {
+	$output_replay['sourceReplay'] = $source_replay;
+	\HtmlApiFuzz\write_json_file( $output_dir . '/replay.json', $output_replay );
+}
+echo \HtmlApiFuzz\json_encode_safe(
+	array(
+		'ok'       => $result['ok'] ?? false,
+		'status'   => $result['status'] ?? 'missing-result',
+		'result'   => $output_dir . '/result.json',
+		'replay'   => $output_dir . '/replay.json',
+		'worker'   => array(
+			'code'       => $proc['code'],
+			'timedOut'   => $proc['timedOut'],
+			'durationMs' => $proc['durationMs'],
+			'logPath'    => $proc['logPath'],
+		),
+		'signature'     => $result['signature'] ?? null,
+		'oracleFinding' => $result['oracleFinding'] ?? null,
+	)
+) . "\n";
+exit( ( $result['ok'] ?? false ) ? 0 : 2 );
diff --git a/tools/html-api-fuzz/runner.php b/tools/html-api-fuzz/runner.php
new file mode 100755
index 0000000000000..50f115909c2f2
--- /dev/null
+++ b/tools/html-api-fuzz/runner.php
@@ -0,0 +1,511 @@
+#!/usr/bin/env php
+<?php
+require_once __DIR__ . '/lib/autoload.php';
+
+function html_api_fuzz_runner_usage(): void {
+	echo "Usage: php tools/html-api-fuzz/runner.php [--output-dir DIR] [--start-seed N] [--seed-stride N] [--max-seeds N] [--duration-seconds N] [--payload-policy POLICY] [--max-input-bytes N] [--dom-oracle php-dom|lexbor-source] [--lexbor-oracle-bin PATH] [--max-keep-per-signature N] [--keep-all-artifacts] [--stop-file PATH]\n";
+	echo "Use --duration-seconds 0 with --max-seeds 0 for an indefinite run.\n";
+	echo "Create the stop file (default OUTPUT_DIR/STOP) to stop gracefully: the current batch finishes and no new batch starts.\n";
+	echo "Oracle findings are recorded separately from failures; pass --triage-oracle-findings to watcher.php to process them.\n";
+}
+
+function html_api_fuzz_runner_validate_generator_options( string $profile, string $mode, string $payload_policy ): void {
+	if ( 'auto' !== $profile && ! in_array( $profile, \HtmlApiFuzz\Generator::profiles(), true ) ) {
+		throw new InvalidArgumentException( 'Unknown generator profile: ' . $profile );
+	}
+	if ( 'auto' !== $mode && ! in_array( $mode, \HtmlApiFuzz\Generator::modes(), true ) ) {
+		throw new InvalidArgumentException( 'Unknown generator mode: ' . $mode );
+	}
+	if ( 'auto' !== $payload_policy && ! in_array( $payload_policy, \HtmlApiFuzz\Generator::payload_policies(), true ) ) {
+		throw new InvalidArgumentException( 'Unknown generator payload policy: ' . $payload_policy );
+	}
+}
+
+function html_api_fuzz_runner_validate_runtime_options( int $seed_stride, int $max_seeds, float $duration_seconds, int $timeout_ms, int $max_input_bytes, int $max_tokens, int $max_nodes, int $max_keep_per_signature ): void {
+	if ( $max_keep_per_signature < 1 ) {
+		// The first exemplar of every signature must stay on disk: the
+		// watcher's minimizer works from a replay file, not the store.
+		throw new InvalidArgumentException( 'Expected --max-keep-per-signature to be at least 1.' );
+	}
+	if ( $seed_stride < 1 ) {
+		throw new InvalidArgumentException( 'Expected --seed-stride to be at least 1.' );
+	}
+	if ( $max_seeds < 0 ) {
+		throw new InvalidArgumentException( 'Expected --max-seeds to be at least 0.' );
+	}
+	if ( $duration_seconds < 0 ) {
+		throw new InvalidArgumentException( 'Expected --duration-seconds to be at least 0.' );
+	}
+	if ( $timeout_ms < 1 ) {
+		throw new InvalidArgumentException( 'Expected --timeout-ms to be at least 1.' );
+	}
+	if ( $max_input_bytes < 0 ) {
+		throw new InvalidArgumentException( 'Expected --max-input-bytes to be at least 0.' );
+	}
+	if ( $max_tokens < 1 ) {
+		throw new InvalidArgumentException( 'Expected --max-tokens to be at least 1.' );
+	}
+	if ( $max_nodes < 1 ) {
+		throw new InvalidArgumentException( 'Expected --max-nodes to be at least 1.' );
+	}
+}
+
+$options = \HtmlApiFuzz\parse_cli_options( $argv );
+if ( \HtmlApiFuzz\option_bool( $options, 'help', false ) || \HtmlApiFuzz\option_bool( $options, 'h', false ) ) {
+	html_api_fuzz_runner_usage();
+	exit( 0 );
+}
+
+$repo_root        = \HtmlApiFuzz\repo_root();
+$output_dir       = \HtmlApiFuzz\option_string( $options, 'output-dir', $repo_root . '/artifacts/html-api-fuzz/run-' . \HtmlApiFuzz\timestamp() );
+$start_seed       = \HtmlApiFuzz\option_int( $options, 'start-seed', 1 );
+$seed_stride      = \HtmlApiFuzz\option_int( $options, 'seed-stride', 1 );
+$max_seeds        = \HtmlApiFuzz\option_int( $options, 'max-seeds', 0 );
+$duration_seconds = \HtmlApiFuzz\option_float( $options, 'duration-seconds', 60.0 );
+$timeout_ms       = \HtmlApiFuzz\option_int( $options, 'timeout-ms', 2500 );
+$stop_on_failure  = \HtmlApiFuzz\option_bool( $options, 'stop-on-failure', false );
+$profile          = \HtmlApiFuzz\option_string( $options, 'profile', 'auto' );
+$mode             = \HtmlApiFuzz\option_string( $options, 'mode', 'auto' );
+$payload_policy   = \HtmlApiFuzz\option_string( $options, 'payload-policy', 'auto' );
+$max_input_bytes  = \HtmlApiFuzz\option_int( $options, 'max-input-bytes', 0 );
+$corpus_percent   = \HtmlApiFuzz\option_int( $options, 'corpus-mutate-percent', 20 );
+$batch_size       = max( 1, \HtmlApiFuzz\option_int( $options, 'batch-size', 25 ) );
+$max_tokens       = \HtmlApiFuzz\option_int( $options, 'max-tokens', 2000 );
+$max_nodes        = \HtmlApiFuzz\option_int( $options, 'max-nodes', 3000 );
+$fail_unsupported = \HtmlApiFuzz\option_bool( $options, 'fail-unsupported', false );
+$max_keep_per_signature = \HtmlApiFuzz\option_int( $options, 'max-keep-per-signature', 5 );
+$keep_all_artifacts     = \HtmlApiFuzz\option_bool( $options, 'keep-all-artifacts', false );
+$stop_file              = \HtmlApiFuzz\option_string( $options, 'stop-file', $output_dir . '/STOP' );
+if ( array_key_exists( 'stop-file', $options ) && ( true === $options['stop-file'] || null === $stop_file || '' === $stop_file ) ) {
+	fwrite( STDERR, "Expected --stop-file to be a non-empty path.\n" );
+	exit( 1 );
+}
+html_api_fuzz_runner_validate_generator_options( $profile, $mode, $payload_policy );
+html_api_fuzz_runner_validate_runtime_options( $seed_stride, $max_seeds, $duration_seconds, $timeout_ms, $max_input_bytes, $max_tokens, $max_nodes, $max_keep_per_signature );
+
+if ( is_file( $stop_file ) ) {
+	// A leftover stop request must not silently turn this run into a 0-seed
+	// success; starting again is an explicit operator decision.
+	fwrite( STDERR, "Stop file already exists: {$stop_file}\nRemove it (or pass a different --stop-file) to start this run.\n" );
+	exit( 1 );
+}
+
+\HtmlApiFuzz\ensure_dir( $output_dir );
+$result_store = new \HtmlApiFuzz\ResultStore( $output_dir . '/' . \HtmlApiFuzz\ResultStore::FILENAME );
+$events_path  = $output_dir . '/events.ndjson';
+$state_path   = $output_dir . '/state.json';
+$runner_log   = $output_dir . '/runner.log';
+$git_metadata = null === \HtmlApiFuzz\option_string( $options, 'git-metadata-base64', null )
+	? \HtmlApiFuzz\git_metadata()
+	: \HtmlApiFuzz\git_metadata_from_base64( \HtmlApiFuzz\option_string( $options, 'git-metadata-base64' ) );
+$git_metadata_base64 = \HtmlApiFuzz\git_metadata_base64( $git_metadata );
+$oracle_renderer      = \HtmlApiFuzz\OracleRenderer::from_options( $options );
+$oracle_metadata      = $oracle_renderer->metadata();
+$oracle_worker_args   = $oracle_renderer->worker_args();
+
+$state = array(
+	'schemaVersion' => 1,
+	'kind'          => 'html-api-fuzz-runner-state',
+	'startedAt'     => gmdate( 'c' ),
+	'updatedAt'     => gmdate( 'c' ),
+	'outputDir'     => $output_dir,
+	'cwd'           => getcwd() ?: null,
+	'startSeed'     => $start_seed,
+	'seedStride'    => $seed_stride,
+	'nextSeed'      => $start_seed,
+	'profile'       => $profile,
+	'mode'          => $mode,
+	'payloadPolicy' => $payload_policy,
+	'maxInputBytes' => $max_input_bytes > 0 ? $max_input_bytes : null,
+	'git'           => $git_metadata,
+	'oracle'        => $oracle_metadata,
+	'maxKeepPerSignature' => $max_keep_per_signature,
+	'keepAllArtifacts'    => $keep_all_artifacts,
+	'stopFile'            => $stop_file,
+	// Longest legitimate silence between state writes: one full batch worker
+	// run. The watcher floors its dead-runner presumption on this.
+	'batchBudgetMs'       => $timeout_ms * $batch_size,
+	'successes'         => 0,
+	'failures'          => 0,
+	'unsupported'       => 0,
+	'oracleParseErrors' => 0,
+	'oracleUnsupported' => 0,
+	'oracleTolerated'   => 0,
+	'oracleFindings'    => 0,
+	'stopReason'        => null,
+);
+\HtmlApiFuzz\write_json_file( $state_path, $state );
+\HtmlApiFuzz\append_ndjson( $events_path, array( 'at' => gmdate( 'c' ), 'kind' => 'runner-start', 'outputDir' => $output_dir, 'git' => $git_metadata, 'oracle' => $oracle_metadata ) );
+file_put_contents( $runner_log, '[' . gmdate( 'c' ) . "] runner started outputDir={$output_dir}\n", FILE_APPEND );
+
+$has_deadline = $duration_seconds > 0;
+$deadline     = $has_deadline ? microtime( true ) + $duration_seconds : null;
+$seed         = $start_seed;
+$count        = 0;
+
+function html_api_fuzz_runner_worker_args( int $seed, string $output_dir, string $profile, string $mode, string $payload_policy, int $max_tokens, int $max_nodes, string $git_metadata_base64, bool $fail_unsupported, int $max_input_bytes, int $corpus_percent, int $batch_count, int $seed_stride, array $oracle_worker_args ): array {
+	$args = array(
+		__DIR__ . '/worker.php',
+		'--seed',
+		(string) $seed,
+		'--profile',
+		$profile,
+		'--mode',
+		$mode,
+		'--payload-policy',
+		$payload_policy,
+		'--output-dir',
+		$output_dir,
+		'--max-tokens',
+		(string) $max_tokens,
+		'--max-nodes',
+		(string) $max_nodes,
+		'--git-metadata-base64',
+		$git_metadata_base64,
+	);
+	if ( $batch_count > 1 ) {
+		$args[] = '--batch-count';
+		$args[] = (string) $batch_count;
+		$args[] = '--seed-stride';
+		$args[] = (string) $seed_stride;
+	}
+	if ( $fail_unsupported ) {
+		$args[] = '--fail-unsupported';
+	}
+	if ( $max_input_bytes > 0 ) {
+		$args[] = '--max-input-bytes';
+		$args[] = (string) $max_input_bytes;
+	}
+	$args[] = '--corpus-mutate-percent';
+	$args[] = (string) $corpus_percent;
+	foreach ( $oracle_worker_args as $arg ) {
+		$args[] = $arg;
+	}
+	return $args;
+}
+
+/**
+ * A batch worker killed mid-write can leave truncated JSON behind; such a
+ * file must behave like a missing one so the seed takes the isolation
+ * fallback instead of fataling the lane.
+ */
+function html_api_fuzz_runner_read_json_or_null( string $path ) {
+	try {
+		return \HtmlApiFuzz\read_json_file( $path );
+	} catch ( \RuntimeException $e ) {
+		return null;
+	}
+}
+
+$pending_batch  = array();
+$batch_log      = null;
+$batch_keep_log = false;
+
+// Already-computed batch results are always processed; the deadline and seed
+// budget gate only the formation of new batches.
+while ( array() !== $pending_batch || ( ( ! $has_deadline || microtime( true ) < $deadline ) && ( 0 === $max_seeds || $count < $max_seeds ) ) ) {
+	if ( array() === $pending_batch ) {
+		/*
+		 * Graceful stop: the already-computed batch above has fully drained
+		 * and is recorded; honor a stop request (or a stop-on-failure from
+		 * inside the batch) before committing to a new batch.
+		 */
+		if ( null !== $state['stopReason'] ) {
+			break;
+		}
+		if ( is_file( $stop_file ) ) {
+			$state['stopReason'] = 'stop-requested';
+			\HtmlApiFuzz\append_ndjson( $events_path, array( 'at' => gmdate( 'c' ), 'kind' => 'stop-requested', 'stopFile' => $stop_file ) );
+			break;
+		}
+
+		/*
+		 * Run a batch of seeds in one worker process: per-seed process spawns
+		 * dominate wall-clock otherwise. Seeds missing a result.json after
+		 * the batch (the batch process died or timed out mid-way) are re-run
+		 * individually below.
+		 */
+		$batch_count = $batch_size;
+		if ( 0 !== $max_seeds ) {
+			$batch_count = min( $batch_count, $max_seeds - $count );
+		}
+		$batch_count = max( 1, $batch_count );
+		$batch_seeds = array();
+		for ( $i = 0; $i < $batch_count; $i++ ) {
+			$batch_seeds[] = $seed + ( $i * $seed_stride );
+		}
+
+		// Batch logs live outside the prunable seed directories; clean
+		// batches drop theirs once fully recorded (see end of loop).
+		$batch_log      = $output_dir . '/logs/batch-' . $batch_seeds[0] . '.log';
+		$batch_keep_log = false;
+		\HtmlApiFuzz\ensure_dir( dirname( $batch_log ) );
+		\HtmlApiFuzz\append_ndjson( $events_path, array( 'at' => gmdate( 'c' ), 'kind' => 'batch-start', 'seeds' => $batch_seeds, 'logPath' => $batch_log ) );
+		$batch_args = html_api_fuzz_runner_worker_args( $batch_seeds[0], $output_dir, $profile, $mode, $payload_policy, $max_tokens, $max_nodes, $git_metadata_base64, $fail_unsupported, $max_input_bytes, $corpus_percent, $batch_count, $seed_stride, $oracle_worker_args );
+		$batch_proc = \HtmlApiFuzz\run_php_process( $batch_args, $repo_root, $timeout_ms * $batch_count, $batch_log );
+		$pending_batch = $batch_seeds;
+	}
+
+	$current_seed = array_shift( $pending_batch );
+	$attempt_dir  = $output_dir . '/seed-' . $current_seed . '/primary';
+	\HtmlApiFuzz\ensure_dir( $attempt_dir );
+	$log_path = $attempt_dir . '/worker.log';
+
+	$result = html_api_fuzz_runner_read_json_or_null( $attempt_dir . '/result.json' );
+	$proc   = $batch_proc;
+	if ( null === $result ) {
+		// Isolation fallback: re-run this seed in its own process.
+		$batch_keep_log = true;
+		$args = html_api_fuzz_runner_worker_args( $current_seed, $attempt_dir, $profile, $mode, $payload_policy, $max_tokens, $max_nodes, $git_metadata_base64, $fail_unsupported, $max_input_bytes, $corpus_percent, 1, $seed_stride, $oracle_worker_args );
+		$proc   = \HtmlApiFuzz\run_php_process( $args, $repo_root, $timeout_ms, $log_path );
+		$result = html_api_fuzz_runner_read_json_or_null( $attempt_dir . '/result.json' );
+	}
+
+	if ( null === $result ) {
+		$replay = html_api_fuzz_runner_read_json_or_null( $attempt_dir . '/replay.json' );
+		$result = array(
+			'ok'             => false,
+			'status'         => $proc['timedOut'] ? 'timeout' : 'worker-failed',
+			'failureClass'   => $proc['timedOut'] ? 'timeout' : 'worker-failed',
+			'failureSnippet' => substr( $proc['output'], -2000 ),
+			'seed'           => $current_seed,
+			'profile'        => is_array( $replay ) ? ( $replay['profile'] ?? $profile ) : $profile,
+			'mode'           => is_array( $replay ) ? ( $replay['mode'] ?? $mode ) : $mode,
+			'payloadPolicy'  => is_array( $replay ) ? ( $replay['payloadPolicy'] ?? $payload_policy ) : $payload_policy,
+			'generator'      => is_array( $replay ) ? ( $replay['generator'] ?? null ) : null,
+			'inputSource'    => is_array( $replay ) ? ( $replay['inputSource'] ?? null ) : null,
+			'inputSha1'      => is_array( $replay ) ? ( $replay['inputSha1'] ?? null ) : null,
+			'inputLength'    => is_array( $replay ) ? ( $replay['inputLength'] ?? null ) : null,
+			'oracle'         => is_array( $replay ) ? ( $replay['oracle'] ?? $oracle_metadata ) : $oracle_metadata,
+			'paths'          => array(
+				'outputDir'  => $attempt_dir,
+				'resultPath' => $attempt_dir . '/result.json',
+				'replayPath' => $attempt_dir . '/replay.json',
+			),
+		);
+		$signature = \HtmlApiFuzz\Signature::from_result( $result );
+		if ( null !== $signature ) {
+			$result['signature'] = $signature;
+		}
+		\HtmlApiFuzz\write_json_file( $attempt_dir . '/result.json', $result );
+	}
+
+	$result['seed']    = $result['seed'] ?? $current_seed;
+	$result['profile'] = $result['profile'] ?? $profile;
+	$result['mode']    = $result['mode'] ?? $mode;
+	$result['payloadPolicy'] = $result['payloadPolicy'] ?? $payload_policy;
+	$result['paths']   = $result['paths'] ?? array(
+		'outputDir'  => $attempt_dir,
+		'resultPath' => $attempt_dir . '/result.json',
+		'replayPath' => $attempt_dir . '/replay.json',
+	);
+	if ( ! ( $result['ok'] ?? false ) && empty( $result['signature'] ) ) {
+		$signature = \HtmlApiFuzz\Signature::from_result( $result );
+		if ( null !== $signature ) {
+			$result['signature'] = $signature;
+		}
+		\HtmlApiFuzz\write_json_file( $attempt_dir . '/result.json', $result );
+	}
+
+	$attempt_ok = (bool) ( $result['ok'] ?? false );
+	$has_oracle_finding = is_array( $result['oracleFinding'] ?? null );
+
+	/*
+	 * Artifact retention: every attempt is regenerable from its seed, so seed
+	 * directories are kept only for failures and oracle findings, and only
+	 * until their signature has enough exemplars on disk. Everything else
+	 * lives in the SQLite store (failures and oracle findings include their
+	 * result and replay JSON there, so a pruned finding remains reproducible).
+	 */
+	$retain_artifacts = $keep_all_artifacts;
+	$retain_failure_artifacts = $keep_all_artifacts && ! $attempt_ok;
+	$retain_oracle_artifacts  = $keep_all_artifacts && $has_oracle_finding;
+	$replay           = null;
+	if ( ! $attempt_ok || $has_oracle_finding ) {
+		$replay_path = $attempt_dir . '/replay.json';
+		$replay      = html_api_fuzz_runner_read_json_or_null( $replay_path );
+
+		if ( ! $keep_all_artifacts ) {
+			$retention_targets = array();
+			if ( ! $attempt_ok ) {
+				$retention_targets[] = array(
+					'hash' => $result['signature']['hash'] ?? null,
+					'kind' => 'failure',
+				);
+			}
+			if ( $has_oracle_finding ) {
+				$retention_targets[] = array(
+					'hash' => $result['oracleFinding']['signature']['hash'] ?? null,
+					'kind' => 'oracle',
+				);
+			}
+
+			if ( ! is_array( $replay ) ) {
+				// Without a replay document the files are the only reproduction.
+				$retain_failure_artifacts = ! $attempt_ok;
+				$retain_oracle_artifacts  = $has_oracle_finding;
+			} else {
+				/*
+				 * Count exemplar directories still on disk rather than rows
+				 * ever written: a restarted runner re-records seeds, and row
+				 * counting would saturate the cap without keeping anything.
+				 */
+				foreach ( $retention_targets as $target ) {
+					$signature_hash = $target['hash'];
+					if ( null === $signature_hash ) {
+						if ( 'oracle' === $target['kind'] ) {
+							$retain_oracle_artifacts = true;
+						} else {
+							$retain_failure_artifacts = true;
+						}
+						continue;
+					}
+					$retained_seeds = 'oracle' === $target['kind']
+						? $result_store->oracle_retained_seeds( $signature_hash )
+						: $result_store->retained_seeds( $signature_hash );
+					$retained_on_disk = 0;
+					foreach ( $retained_seeds as $retained_seed ) {
+						if ( is_dir( $output_dir . '/seed-' . $retained_seed ) ) {
+							++$retained_on_disk;
+						}
+					}
+					if ( $retained_on_disk < $max_keep_per_signature ) {
+						if ( 'oracle' === $target['kind'] ) {
+							$retain_oracle_artifacts = true;
+						} else {
+							$retain_failure_artifacts = true;
+						}
+					}
+				}
+			}
+			$retain_artifacts = $retain_failure_artifacts || $retain_oracle_artifacts;
+		}
+
+		if ( $retain_artifacts ) {
+			// Over-cap repeats of a known signature do not justify keeping
+			// their batch log; retained exemplars and re-runs do.
+			$batch_keep_log = true;
+		}
+
+		if ( is_array( $replay ) ) {
+			$replay['result'] = array(
+				'ok'           => $result['ok'] ?? false,
+				'status'       => $result['status'] ?? 'unknown',
+				'failureClass' => $result['failureClass'] ?? null,
+				'signature'    => $result['signature'] ?? null,
+				'oracleFinding' => $result['oracleFinding'] ?? null,
+				'oracle'       => $result['oracle'] ?? $replay['oracle'] ?? $oracle_metadata,
+				'resultPath'   => $retain_artifacts ? $attempt_dir . '/result.json' : null,
+			);
+			$replay['oracle'] = $replay['oracle'] ?? $result['oracle'] ?? $oracle_metadata;
+			$replay['signature'] = $result['signature'] ?? null;
+			$replay['oracleFinding'] = $result['oracleFinding'] ?? null;
+			if ( $retain_artifacts ) {
+				\HtmlApiFuzz\write_json_file( $replay_path, $replay );
+			}
+		}
+	}
+
+	$summary = array(
+		'kind'          => $attempt_ok ? ( $has_oracle_finding ? 'oracle-finding' : 'attempt' ) : 'failure',
+		'ok'            => $attempt_ok,
+		'status'        => $result['status'] ?? 'unknown',
+		'failureClass'  => $result['failureClass'] ?? null,
+		'seed'          => $current_seed,
+		'profile'       => $result['profile'] ?? $profile,
+		'mode'          => $result['mode'] ?? $mode,
+		'payloadPolicy' => $result['payloadPolicy'] ?? $payload_policy,
+		'generator'     => $result['generator'] ?? null,
+		'inputSource'   => $result['inputSource'] ?? null,
+		'inputSha1'     => $result['inputSha1'] ?? null,
+		'inputLength'   => $result['inputLength'] ?? null,
+		'signature'     => $result['signature'] ?? null,
+		'oracleFinding' => $result['oracleFinding'] ?? null,
+		'oracle'        => $result['oracle'] ?? $oracle_metadata,
+		'artifactsRetained' => $retain_artifacts,
+		'failureArtifactsRetained' => $retain_failure_artifacts,
+		'oracleArtifactsRetained'  => $retain_oracle_artifacts,
+		'resultPath'    => $retain_artifacts ? $attempt_dir . '/result.json' : null,
+		'replayPath'    => $retain_artifacts ? $attempt_dir . '/replay.json' : null,
+		// Batch-executed seeds have no per-seed worker.log; point at a log
+		// that exists (isolation re-run log, else the shared batch log).
+		'logPath'       => $retain_artifacts ? ( is_file( $log_path ) ? $log_path : $batch_log ) : null,
+		'durationMs'    => $proc['durationMs'],
+		'workerCode'    => $proc['code'],
+		'workerTimedOut'=> $proc['timedOut'],
+	);
+	$attempt_id = $result_store->record_attempt(
+		$summary,
+		( $attempt_ok && ! $has_oracle_finding ) ? null : $result,
+		( ( $attempt_ok && ! $has_oracle_finding ) || ! is_array( $replay ) ) ? null : $replay
+	);
+	if ( ! $retain_artifacts && is_array( $replay ) && ( ! $attempt_ok || $has_oracle_finding ) ) {
+		// The stored copy outlives the pruned files; its replay command must
+		// point at this exact row, not just at a seed that a later restart may
+		// record again with different input.
+		$replay['command'] = array(
+			'program' => PHP_BINARY,
+			'args'    => array(
+				'tools/html-api-fuzz/replay.php',
+				'--store',
+				$output_dir . '/' . \HtmlApiFuzz\ResultStore::FILENAME,
+				'--id',
+				(string) $attempt_id,
+			),
+			'cwd'     => $repo_root,
+		);
+		$result_store->update_replay_for_attempt( $attempt_id, $replay );
+	}
+	if ( ! $retain_artifacts && ! $result_store->seed_artifacts_retained( $current_seed ) ) {
+		// The retained check covers earlier rows: a re-run of a previously
+		// retained seed must not delete the exemplar directory they cite.
+		\HtmlApiFuzz\remove_dir_recursive( $output_dir . '/seed-' . $current_seed );
+	}
+
+	if ( $summary['ok'] ) {
+		if ( $has_oracle_finding ) {
+			++$state['oracleFindings'];
+		}
+		if ( 'unsupported' === $summary['status'] ) {
+			++$state['unsupported'];
+		} elseif ( 'oracle-parse-error' === $summary['status'] ) {
+			// Inputs the selected oracle cannot parse receive no differential
+			// coverage; track the loss per class so long runs surface how
+			// much of the input space the oracle gives up on.
+			++$state['oracleParseErrors'];
+		} elseif ( 'oracle-unsupported' === $summary['status'] ) {
+			++$state['oracleUnsupported'];
+		} elseif ( 'oracle-tolerated' === $summary['status'] ) {
+			++$state['oracleTolerated'];
+		} else {
+			++$state['successes'];
+		}
+	} else {
+		++$state['failures'];
+		if ( $stop_on_failure ) {
+			$state['stopReason'] = 'stop-on-failure';
+		}
+	}
+
+	$seed += $seed_stride;
+	++$count;
+	$state['nextSeed']  = $seed;
+	$state['updatedAt'] = gmdate( 'c' );
+	\HtmlApiFuzz\write_json_file( $state_path, $state );
+
+	// A stop-on-failure stop reason is honored at the top of the loop, after
+	// the rest of the batch has been recorded and pruned. Under
+	// --keep-all-artifacts every recorded logPath must keep existing.
+	if ( array() === $pending_batch && ! $batch_keep_log && ! $keep_all_artifacts && null !== $batch_log ) {
+		@unlink( $batch_log );
+	}
+}
+
+if ( null === $state['stopReason'] ) {
+	$state['stopReason'] = ( 0 !== $max_seeds && $count >= $max_seeds ) ? 'max-seeds' : 'duration-elapsed';
+}
+$state['updatedAt'] = gmdate( 'c' );
+\HtmlApiFuzz\write_json_file( $state_path, $state );
+\HtmlApiFuzz\append_ndjson( $events_path, array( 'at' => gmdate( 'c' ), 'kind' => 'runner-stop', 'stopReason' => $state['stopReason'], 'nextSeed' => $seed ) );
+$result_store->close();
+echo \HtmlApiFuzz\json_encode_safe( $state ) . "\n";
diff --git a/tools/html-api-fuzz/start-continuous-run-tmux.sh b/tools/html-api-fuzz/start-continuous-run-tmux.sh
new file mode 100755
index 0000000000000..f7c4b78f73d9e
--- /dev/null
+++ b/tools/html-api-fuzz/start-continuous-run-tmux.sh
@@ -0,0 +1,166 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd -P)"
+
+SESSION="${SESSION:-html-api-fuzz-$(date -u +%Y%m%dT%H%M%SZ)}"
+RUN_DIR="${RUN_DIR:-artifacts/html-api-fuzz/run-$(date -u +%Y%m%dT%H%M%SZ)}"
+TRIAGE_DIR="${TRIAGE_DIR:-$RUN_DIR/triage}"
+
+LANES="${LANES:-4}"
+MIN_FREE_GB="${MIN_FREE_GB:-25}"
+MAX_INPUT_BYTES="${MAX_INPUT_BYTES:-4096}"
+PAYLOAD_POLICY="${PAYLOAD_POLICY:-}"
+MAX_MINIMIZE="${MAX_MINIMIZE:-1}"
+WATCHER_INTERVAL_SECONDS="${WATCHER_INTERVAL_SECONDS:-10}"
+ORCHESTRATOR_INTERVAL_SECONDS="${ORCHESTRATOR_INTERVAL_SECONDS:-120}"
+ORCHESTRATOR_MAX_CONCURRENT="${ORCHESTRATOR_MAX_CONCURRENT:-1}"
+ORCHESTRATOR_MODE="${ORCHESTRATOR_MODE:-classify}"
+if [[ -z "${ORCHESTRATOR_SANDBOX+x}" ]]; then
+	if [[ "$ORCHESTRATOR_MODE" == "fix" ]]; then
+		ORCHESTRATOR_SANDBOX="workspace-write"
+	else
+		ORCHESTRATOR_SANDBOX="read-only"
+	fi
+fi
+CODEX_BIN="${CODEX_BIN:-codex}"
+CODEX_MODEL="${CODEX_MODEL:-}"
+
+if ! command -v tmux >/dev/null 2>&1; then
+	echo "tmux is required." >&2
+	exit 1
+fi
+
+if ! command -v php >/dev/null 2>&1; then
+	echo "php is required." >&2
+	exit 1
+fi
+
+if tmux has-session -t "$SESSION" 2>/dev/null; then
+	echo "tmux session already exists: $SESSION" >&2
+	exit 1
+fi
+
+available_kb="$(df -Pk "$REPO_ROOT" | awk 'NR == 2 { print $4 }')"
+required_kb="$(( MIN_FREE_GB * 1024 * 1024 ))"
+if (( available_kb < required_kb )); then
+	echo "Refusing to start: only $(( available_kb / 1024 / 1024 )) GiB free under $REPO_ROOT; need at least ${MIN_FREE_GB} GiB." >&2
+	echo "Free space or lower MIN_FREE_GB if this is intentional." >&2
+	exit 1
+fi
+
+shell_join() {
+	local out=""
+	local arg
+	for arg in "$@"; do
+		printf -v arg "%q" "$arg"
+		out+=" $arg"
+	done
+	printf "%s" "${out# }"
+}
+
+pane_command() {
+	local label="$1"
+	shift
+
+	local command
+	command="$(shell_join "$@")"
+	printf "export RUN_DIR=%q TRIAGE_DIR=%q; echo '[%s] starting'; echo '[%s] RUN_DIR=%s'; %s; status=\$?; echo; echo '[%s] exited with status '\$status; exec \"\${SHELL:-/bin/zsh}\" -l" \
+		"$RUN_DIR" \
+		"$TRIAGE_DIR" \
+		"$label" \
+		"$label" \
+		"$RUN_DIR" \
+		"$command" \
+		"$label"
+}
+
+launcher_command=(
+	php tools/html-api-fuzz/launcher.php
+	--lanes "$LANES"
+	--duration-seconds 0
+	--max-seeds 0
+	--output-dir "$RUN_DIR"
+)
+
+if [[ "$MAX_INPUT_BYTES" != "0" ]]; then
+	launcher_command+=( --max-input-bytes "$MAX_INPUT_BYTES" )
+fi
+
+if [[ "$PAYLOAD_POLICY" != "" ]]; then
+	launcher_command+=( --payload-policy "$PAYLOAD_POLICY" )
+fi
+
+watcher_command=(
+	php tools/html-api-fuzz/watcher.php
+	--run-dir "$RUN_DIR"
+	--state-dir "$TRIAGE_DIR"
+	--interval-seconds "$WATCHER_INTERVAL_SECONDS"
+	--max-minimize "$MAX_MINIMIZE"
+)
+
+orchestrator_command=(
+	php tools/html-api-fuzz/codex-triage-orchestrator.php
+	--triage-dir "$TRIAGE_DIR"
+	--diagnostics-dir "$RUN_DIR/diagnostics"
+	--repo-root "$REPO_ROOT"
+	--codex-bin "$CODEX_BIN"
+	--mode "$ORCHESTRATOR_MODE"
+	--sandbox "$ORCHESTRATOR_SANDBOX"
+	--max-concurrent "$ORCHESTRATOR_MAX_CONCURRENT"
+	--interval-seconds "$ORCHESTRATOR_INTERVAL_SECONDS"
+)
+
+if [[ "$CODEX_MODEL" != "" ]]; then
+	orchestrator_command+=( --model "$CODEX_MODEL" )
+fi
+
+launcher_pane="$(
+	tmux new-session -d -P -F '#{pane_id}' -s "$SESSION" -n fuzz -c "$REPO_ROOT" \
+		"$(pane_command launcher "${launcher_command[@]}")"
+)"
+
+for _ in {1..30}; do
+	if [[ -f "$REPO_ROOT/$RUN_DIR/launcher-state.json" ]]; then
+		break
+	fi
+	sleep 1
+done
+
+if [[ ! -f "$REPO_ROOT/$RUN_DIR/launcher-state.json" ]]; then
+	echo "Launcher did not create $RUN_DIR/launcher-state.json within 30 seconds." >&2
+	echo "Inspect with: tmux attach -t $SESSION" >&2
+	exit 1
+fi
+
+watcher_pane="$(
+	tmux split-window -P -F '#{pane_id}' -t "$launcher_pane" -h -c "$REPO_ROOT" \
+		"$(pane_command watcher "${watcher_command[@]}")"
+)"
+
+for _ in {1..30}; do
+	if [[ -f "$REPO_ROOT/$TRIAGE_DIR/state.json" ]]; then
+		break
+	fi
+	sleep 1
+done
+
+if [[ ! -f "$REPO_ROOT/$TRIAGE_DIR/state.json" ]]; then
+	echo "Watcher did not create $TRIAGE_DIR/state.json within 30 seconds." >&2
+	echo "Inspect with: tmux attach -t $SESSION" >&2
+	exit 1
+fi
+
+tmux split-window -P -F '#{pane_id}' -t "$watcher_pane" -v -c "$REPO_ROOT" \
+	"$(pane_command orchestrator "${orchestrator_command[@]}")" >/dev/null
+
+tmux select-layout -t "$SESSION" tiled >/dev/null
+tmux set-environment -t "$SESSION" RUN_DIR "$RUN_DIR"
+tmux set-environment -t "$SESSION" TRIAGE_DIR "$TRIAGE_DIR"
+
+echo "session=$SESSION"
+echo "run_dir=$RUN_DIR"
+echo "triage_dir=$TRIAGE_DIR"
+echo "attach=tmux attach -t $SESSION"
+tmux list-panes -t "$SESSION" -F '#{pane_index}: #{pane_current_command} dead=#{pane_dead}'
diff --git a/tools/html-api-fuzz/stop.php b/tools/html-api-fuzz/stop.php
new file mode 100644
index 0000000000000..9246d46e94d0b
--- /dev/null
+++ b/tools/html-api-fuzz/stop.php
@@ -0,0 +1,390 @@
+#!/usr/bin/env php
+<?php
+require_once __DIR__ . '/lib/autoload.php';
+
+/**
+ * Requests a graceful stop of a fuzz run by creating the stop file watched by
+ * the run. Each runner lane finishes its current batch and exits; the watcher
+ * performs a final scan once all lanes have stopped; the codex orchestrator
+ * stops launching and exits when its running jobs finish.
+ */
+function html_api_fuzz_stop_usage(): void {
+	echo "Usage: php tools/html-api-fuzz/stop.php [--run-dir DIR] [--artifacts-dir DIR] [--stop-file PATH] [--stop-stale-seconds N]\n";
+	echo "Without --run-dir, targets the most recently active unfinished run under artifacts/html-api-fuzz (or --artifacts-dir), unless --stop-file is passed by itself to write a known stop file directly.\n";
+}
+
+function html_api_fuzz_stop_add_stop_file( array &$stop_files, string $stop_file ): void {
+	if ( '' !== $stop_file && ! in_array( $stop_file, $stop_files, true ) ) {
+		$stop_files[] = $stop_file;
+	}
+}
+
+function html_api_fuzz_stop_path_is_absolute( string $path ): bool {
+	if ( '' === $path ) {
+		return false;
+	}
+	if ( '/' === $path[0] ) {
+		return true;
+	}
+
+	return '\\' === DIRECTORY_SEPARATOR && ( '\\' === $path[0] || ( strlen( $path ) > 2 && ':' === $path[1] && ( '/' === $path[2] || '\\' === $path[2] ) ) );
+}
+
+function html_api_fuzz_stop_add_advertised_stop_file( array &$stop_files, array &$warnings, string $stop_file, array $state ): void {
+	if ( '' === $stop_file ) {
+		$warnings[] = 'runner stopFile is empty; wrote the run-dir stop file, but the exact watched file may be unknown.';
+		return;
+	}
+
+	if ( html_api_fuzz_stop_path_is_absolute( $stop_file ) ) {
+		html_api_fuzz_stop_add_stop_file( $stop_files, $stop_file );
+		return;
+	}
+
+	if ( is_string( $state['cwd'] ?? null ) && '' !== $state['cwd'] && html_api_fuzz_stop_path_is_absolute( $state['cwd'] ) ) {
+		html_api_fuzz_stop_add_stop_file( $stop_files, rtrim( $state['cwd'], DIRECTORY_SEPARATOR ) . DIRECTORY_SEPARATOR . $stop_file );
+		return;
+	}
+
+	// Older or malformed runner state did not record an absolute cwd; the
+	// exact relative path is unknowable from another process.
+	html_api_fuzz_stop_add_stop_file( $stop_files, $stop_file );
+	$warnings[] = 'relative runner stopFile has no recorded absolute cwd; wrote a caller-cwd candidate, but the exact watched file may be unknown.';
+}
+
+function html_api_fuzz_stop_state_is_stale( array $state, float $stale_seconds, int $fallback_mtime = 0 ): bool {
+	$updated_at = strtotime( (string) ( $state['updatedAt'] ?? '' ) );
+	if ( false === $updated_at && $fallback_mtime > 0 ) {
+		$updated_at = $fallback_mtime;
+	}
+
+	return false !== $updated_at && ( time() - $updated_at ) > $stale_seconds;
+}
+
+function html_api_fuzz_stop_runner_is_active( array $state, float $stale_seconds, int $state_mtime ): bool {
+	if ( ! array_key_exists( 'stopReason', $state ) || null !== $state['stopReason'] ) {
+		return false;
+	}
+
+	$runner_stale_seconds = max( $stale_seconds, 2.0 * ( (int) ( $state['batchBudgetMs'] ?? 0 ) ) / 1000.0 );
+
+	return ! html_api_fuzz_stop_state_is_stale( $state, $runner_stale_seconds, $state_mtime );
+}
+
+function html_api_fuzz_stop_state_looks_runner_like( array $state ): bool {
+	$kind = $state['kind'] ?? null;
+	if ( 'html-api-fuzz-runner-state' === $kind ) {
+		return true;
+	}
+	if ( 'html-api-fuzz-launcher-state' === $kind ) {
+		return false;
+	}
+
+	return array_key_exists( 'stopFile', $state ) || array_key_exists( 'stopReason', $state ) || array_key_exists( 'batchBudgetMs', $state );
+}
+
+function html_api_fuzz_stop_read_state_with_retry( string $state_path, int $attempts = 3 ) {
+	$last_exception = null;
+	for ( $i = 0; $i < $attempts; ++$i ) {
+		try {
+			return \HtmlApiFuzz\read_json_file( $state_path );
+		} catch ( \RuntimeException $e ) {
+			$last_exception = $e;
+			usleep( 50000 );
+		}
+	}
+
+	if ( null !== $last_exception ) {
+		throw $last_exception;
+	}
+
+	return null;
+}
+
+/**
+ * Describes one candidate run directory: whether any of its runners or its
+ * launcher still looks unfinished, and how recently its state files changed.
+ * Directory mtimes are useless here — lanes write into subdirectories.
+ */
+function html_api_fuzz_stop_inspect_run_dir( string $path, float $stale_seconds ): ?array {
+	$default_stop_file = rtrim( $path, DIRECTORY_SEPARATOR ) . '/STOP';
+	$state_paths = array_merge(
+		is_file( $path . '/launcher-state.json' ) ? array( $path . '/launcher-state.json' ) : array(),
+		is_file( $path . '/state.json' ) ? array( $path . '/state.json' ) : array(),
+		glob( $path . '/lane-*/state.json' ) ?: array()
+	);
+	if ( array() === $state_paths ) {
+		return null;
+	}
+
+	$active       = false;
+	$latest_mtime = 0;
+	$stop_files   = array();
+	$warnings     = array();
+	foreach ( $state_paths as $state_path ) {
+		$mtime = filemtime( $state_path );
+		$state_mtime = false !== $mtime ? (int) $mtime : 0;
+		if ( false !== $mtime ) {
+			$latest_mtime = max( $latest_mtime, (int) $mtime );
+		}
+		try {
+			$state = html_api_fuzz_stop_read_state_with_retry( $state_path );
+		} catch ( \RuntimeException $e ) {
+			// Mid-write or corrupt state: the advertised stop file is unknowable.
+			$is_stale = 0 !== $state_mtime && ( time() - $state_mtime ) > $stale_seconds;
+			if ( ! $is_stale ) {
+				$active = true;
+			}
+			html_api_fuzz_stop_add_stop_file( $stop_files, $default_stop_file );
+			$warnings[] = "could not read {$state_path}; writing only the run-dir stop file for that state.";
+			continue;
+		}
+		if ( ! is_array( $state ) ) {
+			$is_stale = 0 !== $state_mtime && ( time() - $state_mtime ) > $stale_seconds;
+			if ( ! $is_stale ) {
+				$active = true;
+			}
+			html_api_fuzz_stop_add_stop_file( $stop_files, $default_stop_file );
+			$warnings[] = "could not read {$state_path}; writing only the run-dir stop file for that state.";
+			continue;
+		}
+			$kind = $state['kind'] ?? null;
+			if ( 'html-api-fuzz-launcher-state' === $kind && false === ( $state['finished'] ?? null ) && ! html_api_fuzz_stop_state_is_stale( $state, $stale_seconds, $state_mtime ) ) {
+				$active = true;
+				html_api_fuzz_stop_add_stop_file( $stop_files, $default_stop_file );
+			}
+			if ( html_api_fuzz_stop_state_looks_runner_like( $state ) ) {
+				if ( 'html-api-fuzz-runner-state' !== $kind ) {
+					$warnings[] = "runner-like state {$state_path} has missing or unknown kind; treating it as runner state.";
+				}
+				$runner_active  = html_api_fuzz_stop_runner_is_active( $state, $stale_seconds, $state_mtime );
+				$runner_unknown = ! array_key_exists( 'stopReason', $state );
+				if ( array_key_exists( 'stopFile', $state ) && is_string( $state['stopFile'] ) ) {
+					html_api_fuzz_stop_add_advertised_stop_file( $stop_files, $warnings, $state['stopFile'], $state );
+				} elseif ( $runner_active || $runner_unknown ) {
+					html_api_fuzz_stop_add_stop_file( $stop_files, $default_stop_file );
+					$warnings[] = 'runner stopFile is missing or malformed; wrote the run-dir stop file, but the exact watched file may be unknown.';
+			} else {
+				html_api_fuzz_stop_add_stop_file( $stop_files, $default_stop_file );
+			}
+			if ( $runner_active ) {
+				$active = true;
+				html_api_fuzz_stop_add_stop_file( $stop_files, $default_stop_file );
+			}
+		}
+	}
+	html_api_fuzz_stop_add_stop_file( $stop_files, $default_stop_file );
+
+	return array(
+		'path'      => $path,
+		'active'    => $active,
+		'mtime'     => $latest_mtime,
+		'stopFiles' => $stop_files,
+		'warnings'  => $warnings,
+	);
+}
+
+/**
+ * The most recently active unfinished run, falling back to the most recently
+ * active run of any state.
+ */
+function html_api_fuzz_stop_candidate_is_better( array $candidate, ?array $best ): bool {
+	if ( null === $best ) {
+		return true;
+	}
+	if ( $candidate['active'] !== $best['active'] ) {
+		return $candidate['active'];
+	}
+	if ( $candidate['mtime'] !== $best['mtime'] ) {
+		return $candidate['mtime'] > $best['mtime'];
+	}
+
+	return strcmp( $candidate['path'], $best['path'] ) > 0;
+}
+
+function html_api_fuzz_stop_latest_run_dir( string $artifacts_dir, float $stale_seconds ): ?array {
+	$items = @scandir( $artifacts_dir );
+	if ( false === $items ) {
+		return null;
+	}
+
+	$best = null;
+	foreach ( $items as $item ) {
+		if ( '.' === $item || '..' === $item ) {
+			continue;
+		}
+		$path = $artifacts_dir . DIRECTORY_SEPARATOR . $item;
+		if ( ! is_dir( $path ) ) {
+			continue;
+		}
+		$candidate = html_api_fuzz_stop_inspect_run_dir( $path, $stale_seconds );
+		if ( null === $candidate ) {
+			continue;
+		}
+		if ( html_api_fuzz_stop_candidate_is_better( $candidate, $best ) ) {
+			$best = $candidate;
+		}
+	}
+
+	return $best;
+}
+
+$options = \HtmlApiFuzz\parse_cli_options( $argv );
+if ( \HtmlApiFuzz\option_bool( $options, 'help', false ) || \HtmlApiFuzz\option_bool( $options, 'h', false ) ) {
+	html_api_fuzz_stop_usage();
+	exit( 0 );
+}
+
+$run_dir = \HtmlApiFuzz\option_string( $options, 'run-dir', $options['_'][0] ?? null );
+if ( array_key_exists( 'run-dir', $options ) && ( true === $options['run-dir'] || null === $run_dir || '' === $run_dir ) ) {
+	fwrite( STDERR, "Expected --run-dir to be a non-empty path.\n" );
+	exit( 1 );
+}
+$stop_file_override = \HtmlApiFuzz\option_string( $options, 'stop-file', null );
+if ( array_key_exists( 'stop-file', $options ) && ( true === $options['stop-file'] || null === $stop_file_override || '' === $stop_file_override ) ) {
+	fwrite( STDERR, "Expected --stop-file to be a non-empty path.\n" );
+	exit( 1 );
+}
+if ( array_key_exists( 'stop-stale-seconds', $options ) && true === $options['stop-stale-seconds'] ) {
+	fwrite( STDERR, "Expected --stop-stale-seconds to be numeric.\n" );
+	exit( 1 );
+}
+if ( array_key_exists( 'stop-stale-seconds', $options ) && ! is_numeric( $options['stop-stale-seconds'] ) ) {
+	fwrite( STDERR, "Expected --stop-stale-seconds to be numeric.\n" );
+	exit( 1 );
+}
+if ( array_key_exists( 'artifacts-dir', $options ) && ( true === $options['artifacts-dir'] || '' === $options['artifacts-dir'] ) ) {
+	fwrite( STDERR, "Expected --artifacts-dir to be a non-empty path.\n" );
+	exit( 1 );
+}
+if ( null === $run_dir && null !== $stop_file_override && array_key_exists( 'artifacts-dir', $options ) ) {
+	fwrite( STDERR, "Pass --run-dir with --artifacts-dir --stop-file, or pass only --stop-file to write a known stop file directly.\n" );
+	exit( 1 );
+}
+
+$looks_finished       = false;
+$stale_seconds        = max( 10.0, \HtmlApiFuzz\option_float( $options, 'stop-stale-seconds', 120.0 ) );
+$candidate            = null;
+$direct_stop_file_only = null === $run_dir && null !== $stop_file_override;
+if ( null === $run_dir && ! $direct_stop_file_only ) {
+	$artifacts_dir = \HtmlApiFuzz\option_string( $options, 'artifacts-dir', \HtmlApiFuzz\repo_root() . '/artifacts/html-api-fuzz' );
+	$candidate     = html_api_fuzz_stop_latest_run_dir( $artifacts_dir, $stale_seconds );
+	if ( null === $candidate ) {
+		fwrite( STDERR, "No run directory found under {$artifacts_dir}; pass --run-dir.\n" );
+		exit( 1 );
+	}
+	$run_dir        = $candidate['path'];
+	$looks_finished = ! $candidate['active'];
+	if ( $looks_finished ) {
+		fwrite( STDERR, "Warning: no unfinished run found; targeting {$run_dir}, which already looks stopped.\n" );
+	}
+}
+
+if ( null !== $run_dir && ! is_dir( $run_dir ) ) {
+	fwrite( STDERR, "Not a directory: {$run_dir}\n" );
+	exit( 1 );
+}
+
+if ( null !== $run_dir && null === $candidate ) {
+	$candidate = html_api_fuzz_stop_inspect_run_dir( $run_dir, $stale_seconds );
+	if ( null === $candidate && null === $stop_file_override ) {
+		$candidate = array(
+			'path'      => $run_dir,
+			'active'    => true,
+			'mtime'     => 0,
+			'stopFiles' => array(),
+			'warnings'  => array( 'no run state found; writing only the run-dir stop file.' ),
+		);
+	}
+}
+
+$warnings = array();
+foreach ( $candidate['warnings'] ?? array() as $warning ) {
+	if ( is_string( $warning ) ) {
+		$warnings[] = $warning;
+	}
+}
+
+$stop_files = array();
+if ( null !== $stop_file_override ) {
+	html_api_fuzz_stop_add_stop_file( $stop_files, $stop_file_override );
+}
+foreach ( $candidate['stopFiles'] ?? array() as $stop_file ) {
+	if ( is_string( $stop_file ) ) {
+		html_api_fuzz_stop_add_stop_file( $stop_files, $stop_file );
+	}
+}
+if ( array() === $stop_files && null !== $run_dir ) {
+	html_api_fuzz_stop_add_stop_file( $stop_files, rtrim( $run_dir, DIRECTORY_SEPARATOR ) . '/STOP' );
+}
+if ( null !== $run_dir ) {
+	html_api_fuzz_stop_add_stop_file( $stop_files, rtrim( $run_dir, DIRECTORY_SEPARATOR ) . '/STOP' );
+}
+if ( array() === $stop_files ) {
+	fwrite( STDERR, "No stop file could be determined.\n" );
+	exit( 1 );
+}
+
+$write_stop_files = $stop_files;
+$primary_stop_file = $stop_files[0];
+if ( null !== $run_dir ) {
+	$run_stop_file    = rtrim( $run_dir, DIRECTORY_SEPARATOR ) . '/STOP';
+	$primary_stop_file = $run_stop_file;
+	$write_stop_files = array( $run_stop_file );
+	foreach ( $stop_files as $stop_file ) {
+		html_api_fuzz_stop_add_stop_file( $write_stop_files, $stop_file );
+	}
+}
+
+$already            = true;
+foreach ( $write_stop_files as $stop_file ) {
+	$already = $already && is_file( $stop_file );
+}
+
+$write_failures = array();
+foreach ( $write_stop_files as $stop_file ) {
+	if ( is_file( $stop_file ) ) {
+		continue;
+	}
+	try {
+		\HtmlApiFuzz\write_json_file(
+			$stop_file,
+			array(
+				'kind'        => 'html-api-fuzz-stop-request',
+				'requestedAt' => gmdate( 'c' ),
+			)
+		);
+	} catch ( \Throwable $e ) {
+		$write_failures[] = "Could not write {$stop_file}: {$e->getMessage()}";
+		continue;
+	}
+	// write_json_file does not check the file_put_contents result; the file's
+	// existence is the truth condition the runner acts on.
+	if ( ! is_file( $stop_file ) ) {
+		$write_failures[] = "Could not write {$stop_file}.";
+	}
+}
+if ( array() !== $write_failures ) {
+	foreach ( $write_failures as $failure ) {
+		fwrite( STDERR, "{$failure}\n" );
+	}
+	exit( 1 );
+}
+
+$ok = array() === $warnings;
+foreach ( $warnings as $warning ) {
+	fwrite( STDERR, "Warning: {$warning}\n" );
+}
+
+echo \HtmlApiFuzz\json_encode_safe(
+	array(
+		'ok'               => $ok,
+		'runDir'           => $run_dir,
+		'stopFile'         => $primary_stop_file,
+		'stopFiles'        => $write_stop_files,
+		'alreadyRequested' => $already,
+		'looksFinished'    => $looks_finished,
+		'warnings'         => $warnings,
+	)
+) . "\n";
+exit( $ok ? 0 : 2 );
diff --git a/tools/html-api-fuzz/tests/codex-triage-orchestrator-smoke.php b/tools/html-api-fuzz/tests/codex-triage-orchestrator-smoke.php
new file mode 100755
index 0000000000000..dd835dee19a17
--- /dev/null
+++ b/tools/html-api-fuzz/tests/codex-triage-orchestrator-smoke.php
@@ -0,0 +1,179 @@
+#!/usr/bin/env php
+<?php
+define( 'HTML_API_FUZZ_CODEX_SELF_TESTING', true );
+require_once dirname( __DIR__ ) . '/codex-triage-orchestrator.php';
+
+function html_api_fuzz_codex_test_assert( bool $condition, string $message ): void {
+	if ( ! $condition ) {
+		throw new RuntimeException( $message );
+	}
+}
+
+function html_api_fuzz_codex_test_tmpdir(): string {
+	$dir = sys_get_temp_dir() . '/html-api-fuzz-codex-test-' . bin2hex( random_bytes( 6 ) );
+	if ( ! mkdir( $dir, 0777, true ) && ! is_dir( $dir ) ) {
+		throw new RuntimeException( 'Could not create test directory: ' . $dir );
+	}
+
+	return $dir;
+}
+
+function html_api_fuzz_codex_test_expect_exception( callable $callback, string $message ): void {
+	try {
+		$callback();
+	} catch ( Throwable $error ) {
+		return;
+	}
+
+	throw new RuntimeException( $message );
+}
+
+$repo_root = dirname( __DIR__, 3 );
+$tmp       = html_api_fuzz_codex_test_tmpdir();
+$triage    = $tmp . '/triage';
+$run_dir   = $tmp . '/run';
+$diag      = $tmp . '/diagnostics';
+\HtmlApiFuzz\ensure_dir( $triage );
+\HtmlApiFuzz\ensure_dir( $run_dir );
+\HtmlApiFuzz\ensure_dir( $diag );
+
+html_api_fuzz_codex_test_expect_exception(
+	static function () use ( $triage, $repo_root ): void {
+		html_api_fuzz_codex_parse_args(
+			array(
+				'codex-triage-orchestrator.php',
+				'--triage-dir',
+				$triage,
+				'--repo-root',
+				$repo_root,
+				'--max-concurent',
+				'4',
+			)
+		);
+	},
+	'Unknown CLI options should be rejected.'
+);
+
+html_api_fuzz_codex_test_expect_exception(
+	static function () use ( $triage, $repo_root ): void {
+		html_api_fuzz_codex_parse_args(
+			array(
+				'codex-triage-orchestrator.php',
+				'--triage-dir',
+				$triage,
+				'--repo-root',
+				$repo_root,
+				'--mode',
+				'fix',
+				'--sandbox',
+				'read-only',
+			)
+		);
+	},
+	'Fix mode should reject a read-only sandbox.'
+);
+
+$candidates = html_api_fuzz_codex_load_candidate_signatures(
+	array(
+		'signatures' => array(
+			123456 => array(
+				'status'            => 'minimized',
+				'minimizeOutputDir' => $triage,
+			),
+			"bad\nhash" => array(
+				'status'            => 'minimized',
+				'minimizeOutputDir' => $triage,
+			),
+		),
+	)
+);
+html_api_fuzz_codex_test_assert( 1 === count( $candidates ), 'Numeric signature hashes should be accepted and unsafe hashes skipped.' );
+html_api_fuzz_codex_test_assert( '123456' === $candidates[0][0], 'Numeric signature hash should be cast to a string.' );
+
+$signature = array(
+	'failureClass'     => 'tree-mismatch',
+	'status'           => 'minimized',
+	'resultPath'       => '/etc/passwd',
+	'minimizeResult'   => $run_dir . '/minimize-result.json',
+	'minimizeOutputDir'=> $run_dir,
+);
+file_put_contents( $run_dir . '/minimize-result.json', "{}\n" );
+$prompt = html_api_fuzz_codex_prompt_for_signature(
+	array(
+		'triageDir'      => $triage,
+		'diagnosticsDir' => $diag,
+		'repoRoot'       => $repo_root,
+		'runDir'         => $run_dir,
+		'mode'           => 'classify',
+	),
+	'123456',
+	$signature
+);
+html_api_fuzz_codex_test_assert( false === strpos( $prompt, '/etc/passwd' ), 'Prompt should not include artifacts outside the run or triage directories.' );
+html_api_fuzz_codex_test_assert( false !== strpos( $prompt, $run_dir . '/minimize-result.json' ), 'Prompt should include expected run artifacts.' );
+
+html_api_fuzz_codex_test_expect_exception(
+	static function () use ( $triage, $diag, $repo_root, $run_dir, $signature ): void {
+		$unsafe = $signature;
+		$unsafe['failureClass'] = "tree\nmismatch";
+		html_api_fuzz_codex_prompt_for_signature(
+			array(
+				'triageDir'      => $triage,
+				'diagnosticsDir' => $diag,
+				'repoRoot'       => $repo_root,
+				'runDir'         => $run_dir,
+				'mode'           => 'classify',
+			),
+			'123456',
+			$unsafe
+		);
+	},
+	'Prompt metadata with control characters should be rejected.'
+);
+
+$signature_dir = $diag . '/123456';
+\HtmlApiFuzz\ensure_dir( $signature_dir );
+file_put_contents( $signature_dir . '/claim.json', json_encode( array( 'pid' => getmypid(), 'claimedAtUnix' => 0 ) ) . "\n" );
+$claimed = html_api_fuzz_codex_claim_signature( $diag, '123456', $signature, 1 );
+html_api_fuzz_codex_test_assert( null !== $claimed, 'Stale claims should be reclaimed even when the old PID is alive.' );
+html_api_fuzz_codex_test_assert( 1 === count( glob( $signature_dir . '/claim.stale.*.json' ) ), 'Stale claim should be archived.' );
+
+file_put_contents( $signature_dir . '/done.json', "null\n" );
+$claimed = html_api_fuzz_codex_claim_signature( $diag, '123456', $signature, 1 );
+html_api_fuzz_codex_test_assert( null !== $claimed, 'Malformed done metadata should not be treated as a successful completed job.' );
+html_api_fuzz_codex_test_assert( 1 <= count( glob( $signature_dir . '/done.failed.*.json' ) ), 'Malformed done metadata should be archived for retry.' );
+
+/*
+ * Main-loop STOP handling: with a STOP file in the run directory (resolved
+ * from the watcher state's runDir), the orchestrator must exit cleanly
+ * without launching anything.
+ */
+\HtmlApiFuzz\write_json_file(
+	$triage . '/state.json',
+	array(
+		'kind'       => 'html-api-fuzz-triage-state',
+		'runDir'     => $run_dir,
+		'signatures' => array(),
+	)
+);
+file_put_contents( $run_dir . '/STOP', "{}\n" );
+$stop_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/codex-triage-orchestrator.php',
+		'--triage-dir',
+		$triage,
+		'--diagnostics-dir',
+		$diag,
+		'--repo-root',
+		$repo_root,
+		'--codex-bin',
+		'false',
+	),
+	$repo_root,
+	30000
+);
+html_api_fuzz_codex_test_assert( 0 === $stop_proc['code'] && ! $stop_proc['timedOut'], 'Orchestrator should exit cleanly when a STOP file is present.' );
+html_api_fuzz_codex_test_assert( false !== strpos( $stop_proc['output'], 'stop requested' ), 'Orchestrator should report the stop request.' );
+html_api_fuzz_codex_test_assert( false === strpos( $stop_proc['output'], 'launched ' ), 'Orchestrator should not launch jobs after a stop request.' );
+
+echo "codex triage orchestrator smoke tests passed\n";
diff --git a/tools/html-api-fuzz/tests/generator-policy-smoke.php b/tools/html-api-fuzz/tests/generator-policy-smoke.php
new file mode 100644
index 0000000000000..2fdb84913f5af
--- /dev/null
+++ b/tools/html-api-fuzz/tests/generator-policy-smoke.php
@@ -0,0 +1,1560 @@
+#!/usr/bin/env php
+<?php
+require_once dirname( __DIR__ ) . '/lib/autoload.php';
+
+function html_api_fuzz_smoke_fail( string $message ): void {
+	fwrite( STDERR, "FAIL: {$message}\n" );
+	exit( 1 );
+}
+
+function html_api_fuzz_smoke_assert( bool $condition, string $message ): void {
+	if ( ! $condition ) {
+		html_api_fuzz_smoke_fail( $message );
+	}
+}
+
+function html_api_fuzz_smoke_valid_utf8( string $bytes ): bool {
+	return 1 === preg_match( '//u', $bytes );
+}
+
+function html_api_fuzz_smoke_has_non_whitespace_c0_control( string $bytes ): bool {
+	return 1 === preg_match( '/[\x01-\x08\x0b\x0e-\x1f]/', $bytes );
+}
+
+function html_api_fuzz_smoke_note_syntax_chars( string $bytes, array &$found ): void {
+	foreach ( array_keys( $found ) as $char ) {
+		if ( false !== strpos( $bytes, $char ) ) {
+			$found[ $char ] = true;
+		}
+	}
+}
+
+function html_api_fuzz_smoke_expect_invalid_argument( callable $callback, string $message ): void {
+	try {
+		$callback();
+	} catch ( InvalidArgumentException $e ) {
+		return;
+	}
+
+	html_api_fuzz_smoke_fail( $message );
+}
+
+function html_api_fuzz_smoke_dom_drops_bare_xlink_local_name_after_xlink(): bool {
+	if ( ! class_exists( 'Dom\\HTMLDocument' ) ) {
+		return false;
+	}
+
+	$previous = libxml_use_internal_errors( true );
+	try {
+		$document = Dom\HTMLDocument::createFromString( '<svg xlink:href href></svg>', LIBXML_NOERROR );
+		$svg      = $document->getElementsByTagName( 'svg' )->item( 0 );
+		$drops    = null !== $svg && $svg->hasAttributeNS( 'http://www.w3.org/1999/xlink', 'href' ) && ! $svg->hasAttribute( 'href' );
+	} catch ( Throwable $e ) {
+		$drops = false;
+	}
+	libxml_clear_errors();
+	libxml_use_internal_errors( $previous );
+
+	return $drops;
+}
+
+function html_api_fuzz_smoke_dom_reparents_heading_after_mathml_text_integration_point( string $encoding = 'text/html' ): bool {
+	$fixture = '<h1><math><annotation-xml encoding="' . $encoding . '"><p>x</h1><area data-x>';
+	$wp = \HtmlApiFuzz\TreeRenderer::render_wordpress(
+		$fixture,
+		\HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		array( 'maxTokens' => 200, 'maxNodes' => 200 )
+	);
+	$dom = \HtmlApiFuzz\TreeRenderer::render_dom(
+		$fixture,
+		\HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		array( 'maxTokens' => 200, 'maxNodes' => 200 )
+	);
+	if ( \HtmlApiFuzz\TreeRenderer::STATUS_OK !== ( $wp['status'] ?? null ) || \HtmlApiFuzz\TreeRenderer::STATUS_OK !== ( $dom['status'] ?? null ) ) {
+		return false;
+	}
+	$comparison = \HtmlApiFuzz\TreeRenderer::compare_trees( $wp['tree'], $dom['tree'], $wp['domOracleLineTolerances'] ?? array() );
+
+	return false === ( $comparison['ok'] ?? true );
+}
+
+function html_api_fuzz_smoke_rm_tree( string $path ): void {
+	if ( ! file_exists( $path ) ) {
+		return;
+	}
+	if ( is_file( $path ) || is_link( $path ) ) {
+		@unlink( $path );
+		return;
+	}
+	foreach ( scandir( $path ) ?: array() as $item ) {
+		if ( '.' === $item || '..' === $item ) {
+			continue;
+		}
+		html_api_fuzz_smoke_rm_tree( $path . DIRECTORY_SEPARATOR . $item );
+	}
+	@rmdir( $path );
+}
+
+$valid = null;
+for ( $truncation_seed = 1; $truncation_seed <= 64; $truncation_seed++ ) {
+	$candidate = \HtmlApiFuzz\Generator::generate(
+		$truncation_seed,
+		'balanced',
+		\HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'valid-utf8',
+		64
+	);
+	html_api_fuzz_smoke_assert( 'valid-utf8' === $candidate['payloadPolicy'], 'valid-utf8 policy should be resolved.' );
+	html_api_fuzz_smoke_assert( html_api_fuzz_smoke_valid_utf8( $candidate['input'] ), 'valid-utf8 policy should produce valid UTF-8 bytes.' );
+	html_api_fuzz_smoke_assert( strlen( $candidate['input'] ) <= 64, 'max-input-bytes should cap generated input.' );
+	if ( true === $candidate['parameters']['truncated'] ) {
+		$valid = $candidate;
+		break;
+	}
+}
+html_api_fuzz_smoke_assert( null !== $valid, 'max-input-bytes smoke should exercise truncation within the seed budget.' );
+html_api_fuzz_smoke_assert( in_array( 'generator:truncated', $valid['parameters']['features'], true ), 'truncation should be recorded as a feature.' );
+html_api_fuzz_smoke_assert( 'valid-utf8' === $valid['parameters']['payloadPolicy'], 'parameters should include payload policy.' );
+html_api_fuzz_smoke_expect_invalid_argument(
+	static function (): void {
+		\HtmlApiFuzz\Generator::generate( 1, 'balanced', 'bogus-mode', 'valid-utf8' );
+	},
+	'invalid generator mode should throw.'
+);
+html_api_fuzz_smoke_assert( ! in_array( 'invalid-byte-heavy', \HtmlApiFuzz\Generator::payload_policies(), true ), 'invalid-byte-heavy should not be selectable for generated inputs.' );
+html_api_fuzz_smoke_assert( in_array( 'invalid-byte-heavy', \HtmlApiFuzz\Generator::payload_policy_labels(), true ), 'invalid-byte-heavy should remain a recognized replay metadata label.' );
+html_api_fuzz_smoke_expect_invalid_argument(
+	static function (): void {
+		\HtmlApiFuzz\Generator::generate( 1, 'attributes-entities', \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY, 'invalid-byte-heavy' );
+	},
+	'invalid-byte-heavy policy should be rejected for generated inputs.'
+);
+
+foreach ( \HtmlApiFuzz\Generator::payload_policies() as $payload_policy ) {
+	foreach ( \HtmlApiFuzz\Generator::profiles() as $profile ) {
+		foreach ( \HtmlApiFuzz\Generator::modes() as $mode ) {
+			for ( $seed = 1; $seed <= 8; ++$seed ) {
+				$generated = \HtmlApiFuzz\Generator::generate( $seed, $profile, $mode, $payload_policy, 4096 );
+				html_api_fuzz_smoke_assert( html_api_fuzz_smoke_valid_utf8( $generated['input'] ), "{$payload_policy}/{$profile}/{$mode}/{$seed} should produce valid UTF-8 bytes." );
+				html_api_fuzz_smoke_assert( ! in_array( 'payload:invalid-byte', $generated['parameters']['features'], true ), "{$payload_policy}/{$profile}/{$mode}/{$seed} should not record invalid-byte generation." );
+			}
+		}
+	}
+}
+$found_non_whitespace_c0_control = false;
+for ( $seed = 1; $seed <= 512; ++$seed ) {
+	$generated = \HtmlApiFuzz\Generator::generate( $seed, 'balanced', \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY, 'mostly-valid', 4096 );
+	if ( html_api_fuzz_smoke_has_non_whitespace_c0_control( $generated['input'] ) ) {
+		$found_non_whitespace_c0_control = true;
+		html_api_fuzz_smoke_assert( html_api_fuzz_smoke_valid_utf8( $generated['input'] ), 'non-whitespace C0 control sample should still be valid UTF-8.' );
+		break;
+	}
+}
+html_api_fuzz_smoke_assert( $found_non_whitespace_c0_control, 'generated valid UTF-8 payloads should retain non-whitespace C0 control coverage.' );
+
+$required_generator_features = array(
+	'charref:text',
+	'charref:attr',
+	'charref:rcdata',
+	'charref:text:named-semicolon',
+	'charref:text:named-missing-semicolon-legacy',
+	'charref:text:named-missing-semicolon-invalid',
+	'charref:text:numeric-valid',
+	'charref:text:numeric-invalid',
+	'charref:attr:named-semicolon',
+	'charref:attr:named-missing-semicolon-legacy',
+	'charref:attr:named-missing-semicolon-invalid',
+	'charref:attr:numeric-valid',
+	'charref:attr:numeric-invalid',
+	'charref:rcdata:named-semicolon',
+	'charref:rcdata:invalid',
+	'charref:rcdata:numeric-valid',
+	'charref:rcdata:numeric-invalid',
+	'charref:leading-zero',
+	'attr:weird-name',
+	'attr:weird-spacing',
+	'attr:malformed',
+	'ascii:syntax-char',
+	'ascii:syntax-ampersand',
+	'ascii:syntax-less-than',
+	'ascii:syntax-greater-than',
+	'ascii:syntax-double-quote',
+	'ascii:syntax-single-quote',
+	'ascii:syntax-equals',
+	'payload:short-ascii',
+	'payload:empty-ascii',
+	'payload:medium-ascii',
+	'payload:ascii-length-0',
+	'payload:ascii-length-1',
+	'payload:ascii-length-2',
+	'payload:ascii-length-3',
+	'payload:ascii-length-4',
+	'payload:ascii-length-5',
+	'payload:ascii-length-6',
+	'payload:ascii-length-7',
+	'payload:ascii-length-8',
+	'payload:ascii-length-9',
+	'payload:ascii-length-10',
+	'tag:unusual-name',
+	'tag:invalid-name',
+	'tag:alpha-invalid-name',
+	'tag:alpha-weird-name',
+	'tag:bogus-open-name',
+	'tag:weird-spacing',
+	'attr:duplicate',
+	'select',
+	'select:option',
+	'select:optgroup',
+	'select:breaker',
+	'select:nested',
+	'adoption-agency-pattern',
+	'adoption:misnested-closers',
+	'adoption:reconstruction',
+	'adoption:noahs-ark',
+	'auto-closing-chain',
+	'special-closers',
+	'foreign:breakout',
+	'foreign:annotation-xml-encoding-variant',
+	'foreign:cdata',
+	'foreign:case-mangled-name',
+	'plaintext',
+);
+$found_generator_features = array_fill_keys( $required_generator_features, false );
+$all_generator_features_found = false;
+foreach ( array( 'attributes-entities', 'rawtext-rcdata', 'text-fragment', 'incomplete-malformed', 'balanced', 'select', 'formatting-adoption', 'foreign-content' ) as $feature_profile ) {
+	for ( $seed = 1; $seed <= 128; ++$seed ) {
+		$generated = \HtmlApiFuzz\Generator::generate( $seed, $feature_profile, \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY, 'mostly-valid', null );
+		html_api_fuzz_smoke_assert( html_api_fuzz_smoke_valid_utf8( $generated['input'] ), "{$feature_profile}/{$seed} feature-coverage sample should produce valid UTF-8 bytes." );
+		$features = $generated['parameters']['features'];
+		if ( in_array( 'generator:truncated', $features, true ) || in_array( 'generator:hard-truncated', $features, true ) ) {
+			continue;
+		}
+		foreach ( $features as $feature ) {
+			if ( array_key_exists( $feature, $found_generator_features ) ) {
+				$found_generator_features[ $feature ] = true;
+			}
+		}
+		if ( ! in_array( false, $found_generator_features, true ) ) {
+			$all_generator_features_found = true;
+			break 2;
+		}
+	}
+}
+html_api_fuzz_smoke_assert( $all_generator_features_found, 'generated samples should cover all required generator features before exhausting the smoke seed budget.' );
+foreach ( $found_generator_features as $feature => $found ) {
+	html_api_fuzz_smoke_assert( $found, "generated samples should cover {$feature}." );
+}
+
+$required_comment_forms = array(
+	'comment:ordinary-simple'              => array(
+		'example' => '<!--comment-->',
+		'matches' => static function ( string $input ): bool {
+			return false !== strpos( $input, '<!--comment-->' );
+		},
+	),
+	'comment:empty'                        => array(
+		'example' => '<!---->',
+		'matches' => static function ( string $input ): bool {
+			return false !== strpos( $input, '<!---->' );
+		},
+	),
+	'comment:space'                        => array(
+		'example' => '<!-- -->',
+		'matches' => static function ( string $input ): bool {
+			return false !== strpos( $input, '<!-- -->' );
+		},
+	),
+	'comment:short-empty-end'              => array(
+		'example' => '<!-->',
+		'matches' => static function ( string $input ): bool {
+			return false !== strpos( $input, '<!-->' );
+		},
+	),
+	'comment:short-hyphen-end'             => array(
+		'example' => '<!--->',
+		'matches' => static function ( string $input ): bool {
+			return false !== strpos( $input, '<!--->' );
+		},
+	),
+	'comment:nested-hyphens'               => array(
+		'example' => '<!--a<!--b--c-->',
+		'matches' => static function ( string $input ): bool {
+			return false !== strpos( $input, '<!--a<!--b--c-->' );
+		},
+	),
+	'comment:malformed-bang-ending'        => array(
+		'example' => '<!--x--!>',
+		'matches' => static function ( string $input ): bool {
+			return false !== strpos( $input, '<!--x--!>' );
+		},
+	),
+	'comment:malformed-greater-than-ending' => array(
+		'example' => '<!--x>',
+		'matches' => static function ( string $input ): bool {
+			return str_ends_with( $input, '<!--x>' );
+		},
+	),
+	'comment:unterminated'                 => array(
+		'example' => '<!--x',
+		'matches' => static function ( string $input ): bool {
+			return str_ends_with( $input, '<!--x' );
+		},
+	),
+	'comment:bogus-pi'                     => array(
+		'example' => '<?target?>',
+		'matches' => static function ( string $input ): bool {
+			return false !== strpos( $input, '<?target?>' );
+		},
+	),
+	'comment:bogus-declaration'            => array(
+		'example' => '<!not-a-comment>',
+		'matches' => static function ( string $input ): bool {
+			return false !== strpos( $input, '<!not-a-comment>' );
+		},
+	),
+);
+$found_comment_forms = array_fill_keys( array_keys( $required_comment_forms ), false );
+for ( $seed = 1; $seed <= 1024; ++$seed ) {
+	$generated = \HtmlApiFuzz\Generator::generate( $seed, 'comments-doctype-bogus', \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY, 'mostly-valid', null );
+	html_api_fuzz_smoke_assert( html_api_fuzz_smoke_valid_utf8( $generated['input'] ), "comments-doctype-bogus/{$seed} comment-form sample should produce valid UTF-8 bytes." );
+	foreach ( $required_comment_forms as $feature => $form ) {
+		if ( in_array( $feature, $generated['parameters']['features'], true ) && $form['matches']( $generated['input'] ) ) {
+			$found_comment_forms[ $feature ] = true;
+		}
+	}
+	if ( ! in_array( false, $found_comment_forms, true ) ) {
+		break;
+	}
+}
+foreach ( $found_comment_forms as $feature => $found ) {
+	html_api_fuzz_smoke_assert( $found, "comments-doctype-bogus generation should cover {$feature}." );
+}
+
+$syntax_chars = array( '&', '<', '>', '"', "'", '=' );
+$found_syntax_contexts = array(
+	'standalone input'        => array_fill_keys( $syntax_chars, false ),
+	'quoted attribute value'  => array_fill_keys( $syntax_chars, false ),
+	'rawtext element content' => array_fill_keys( $syntax_chars, false ),
+	'comment content'         => array_fill_keys( $syntax_chars, false ),
+);
+$found_text_fragment_lengths = array_fill_keys( range( 0, 10 ), false );
+$found_medium_text_fragment  = false;
+for ( $seed = 1; $seed <= 512; ++$seed ) {
+	$text_fragment = \HtmlApiFuzz\Generator::generate( $seed, 'text-fragment', \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY, 'mostly-valid', null );
+	$input_length  = strlen( $text_fragment['input'] );
+	if ( $input_length <= 10 ) {
+		$found_text_fragment_lengths[ $input_length ] = true;
+	} else {
+		$found_medium_text_fragment = true;
+	}
+	html_api_fuzz_smoke_note_syntax_chars( $text_fragment['input'], $found_syntax_contexts['standalone input'] );
+
+	foreach ( array( 'rawtext-rcdata', 'attributes-entities', 'comments-doctype-bogus' ) as $profile ) {
+		$generated = \HtmlApiFuzz\Generator::generate( $seed, $profile, \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY, 'mostly-valid', null );
+		if ( preg_match_all( '~<(script|style|iframe|noembed|noframes|xmp|noscript)\b(?:[^"\'<>]|"[^"]*"|\'[^\']*\')*>(.*?)</\1>~is', $generated['input'], $matches ) ) {
+			foreach ( $matches[2] as $rawtext ) {
+				html_api_fuzz_smoke_note_syntax_chars( $rawtext, $found_syntax_contexts['rawtext element content'] );
+			}
+		}
+		if ( preg_match_all( '/<!--(.*?)-->/s', $generated['input'], $matches ) ) {
+			foreach ( $matches[1] as $comment ) {
+				html_api_fuzz_smoke_note_syntax_chars( $comment, $found_syntax_contexts['comment content'] );
+			}
+		}
+		if ( preg_match_all( '~\s[-A-Za-z0-9_:.]+\s*=\s*(?:"([^"]*)"|\'([^\']*)\')~', $generated['input'], $matches, PREG_SET_ORDER ) ) {
+			foreach ( $matches as $attribute ) {
+				html_api_fuzz_smoke_note_syntax_chars( ( $attribute[1] ?? '' ) . ( $attribute[2] ?? '' ), $found_syntax_contexts['quoted attribute value'] );
+			}
+		}
+	}
+}
+foreach ( $found_text_fragment_lengths as $length => $found ) {
+	html_api_fuzz_smoke_assert( $found, "text-fragment generation should cover exact {$length}-byte inputs." );
+}
+html_api_fuzz_smoke_assert( $found_medium_text_fragment, 'text-fragment generation should cover medium-sized inputs.' );
+for ( $seed = 1; $seed <= 16; ++$seed ) {
+	$stress_text_fragment = \HtmlApiFuzz\Generator::generate( $seed, 'text-fragment', \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY, 'stress-long', null );
+	html_api_fuzz_smoke_assert( html_api_fuzz_smoke_valid_utf8( $stress_text_fragment['input'] ), 'text-fragment stress-long generation should produce valid UTF-8 bytes.' );
+	html_api_fuzz_smoke_assert( strlen( $stress_text_fragment['input'] ) >= 64, 'text-fragment stress-long generation should honor the lower long-input bound.' );
+	html_api_fuzz_smoke_assert( strlen( $stress_text_fragment['input'] ) <= 1024, 'text-fragment stress-long generation should honor the upper long-input bound.' );
+	html_api_fuzz_smoke_assert( in_array( 'input:long', $stress_text_fragment['parameters']['features'], true ), 'text-fragment stress-long generation should record the long-input feature.' );
+}
+foreach ( $found_syntax_contexts as $context => $found_chars ) {
+	foreach ( $found_chars as $char => $found ) {
+		html_api_fuzz_smoke_assert( $found, "{$context} should expose syntax character {$char} in final generated HTML." );
+	}
+}
+
+$found_resource_stress = false;
+$found_resource_stress_long = false;
+for ( $seed = 1; $seed <= 512; ++$seed ) {
+	$generated = \HtmlApiFuzz\Generator::generate( $seed, 'auto', 'auto', 'auto', 4096 );
+	html_api_fuzz_smoke_assert( html_api_fuzz_smoke_valid_utf8( $generated['input'] ), 'auto generation should produce valid UTF-8 bytes.' );
+	html_api_fuzz_smoke_assert( ! in_array( 'payload:invalid-byte', $generated['parameters']['features'], true ), 'auto generation should not record invalid-byte payload features.' );
+	if ( ! in_array( $generated['profile'], array( 'attributes-entities', 'incomplete-malformed' ), true ) ) {
+		html_api_fuzz_smoke_assert( ! in_array( 'attr:malformed', $generated['parameters']['features'], true ), 'auto generation should keep malformed attributes in targeted profiles.' );
+		html_api_fuzz_smoke_assert( ! in_array( 'tag:weird-syntax', $generated['parameters']['features'], true ), 'auto generation should keep weird tag syntax in targeted profiles.' );
+	}
+	if ( 'resource-stress' === $generated['profile'] ) {
+		$found_resource_stress = true;
+	}
+	if ( 'stress-long' === $generated['payloadPolicy'] ) {
+		html_api_fuzz_smoke_assert( 'resource-stress' === $generated['profile'], 'auto stress-long payloads should stay in the resource-stress profile.' );
+		$found_resource_stress_long = true;
+	}
+}
+html_api_fuzz_smoke_assert( $found_resource_stress, 'auto generation should retain the resource-stress bucket.' );
+html_api_fuzz_smoke_assert( $found_resource_stress_long, 'resource-stress auto generation should retain stress-long payload coverage.' );
+for ( $seed = 1; $seed <= 64; ++$seed ) {
+	$generated = \HtmlApiFuzz\Generator::generate( $seed, 'balanced', 'auto', 'auto', 4096 );
+	html_api_fuzz_smoke_assert( 'stress-long' !== $generated['payloadPolicy'], 'non-resource explicit profiles should not auto-resolve stress-long.' );
+}
+
+$tmp = tempnam( sys_get_temp_dir(), 'html-api-fuzz-policy-smoke-' );
+if ( false === $tmp ) {
+	html_api_fuzz_smoke_fail( 'Could not create temp path.' );
+}
+@unlink( $tmp );
+\HtmlApiFuzz\ensure_dir( $tmp );
+register_shutdown_function( 'html_api_fuzz_smoke_rm_tree', $tmp );
+html_api_fuzz_smoke_expect_invalid_argument(
+	static function () use ( $tmp ): void {
+		\HtmlApiFuzz\Worker::run(
+			array(
+				'input-base64'   => base64_encode( '<p>x</p>' ),
+				'payload-policy' => 'valid-ut8',
+				'output-dir'     => $tmp . '/invalid-policy',
+			)
+		);
+	},
+	'invalid direct-input payload policy should throw.'
+);
+html_api_fuzz_smoke_expect_invalid_argument(
+	static function () use ( $tmp ): void {
+		\HtmlApiFuzz\Worker::run(
+			array(
+				'seed'           => '1',
+				'profile'        => 'balanced',
+				'mode'           => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+				'payload-policy' => 'invalid-byte-heavy',
+				'output-dir'     => $tmp . '/generated-invalid-heavy',
+			)
+		);
+	},
+	'generated worker inputs should reject legacy invalid-byte-heavy policy.'
+);
+
+$worker_dir = $tmp . '/worker';
+\HtmlApiFuzz\Worker::run(
+	array(
+		'seed'            => '17',
+		'profile'         => 'balanced',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'payload-policy'  => 'valid-utf8',
+		'max-input-bytes' => '2048',
+		'output-dir'      => $worker_dir,
+	)
+);
+$worker_replay = \HtmlApiFuzz\read_json_file( $worker_dir . '/replay.json' );
+$worker_result = \HtmlApiFuzz\read_json_file( $worker_dir . '/result.json' );
+html_api_fuzz_smoke_assert( 'valid-utf8' === ( $worker_replay['payloadPolicy'] ?? null ), 'replay should persist top-level payload policy.' );
+html_api_fuzz_smoke_assert( 'valid-utf8' === ( $worker_replay['generator']['payloadPolicy'] ?? null ), 'replay generator parameters should persist payload policy.' );
+html_api_fuzz_smoke_assert( 'generated' === ( $worker_replay['inputSource'] ?? null ), 'generated replay should record generated input source.' );
+html_api_fuzz_smoke_assert( 'valid-utf8' === ( $worker_result['payloadPolicy'] ?? null ), 'result should persist payload policy.' );
+html_api_fuzz_smoke_assert( ! empty( $worker_result['generator']['features'] ?? array() ), 'result should persist non-empty generator features.' );
+html_api_fuzz_smoke_assert( ( $worker_replay['generator']['features'] ?? null ) === ( $worker_result['generator']['features'] ?? null ), 'result and replay should persist the same generator features.' );
+$git_metadata = \HtmlApiFuzz\git_metadata();
+html_api_fuzz_smoke_assert( array_key_exists( 'available', $git_metadata ), 'git metadata should report availability.' );
+html_api_fuzz_smoke_assert( array_key_exists( 'dirty', $git_metadata ), 'git metadata should report dirty state.' );
+if ( $git_metadata['available'] ?? false ) {
+	html_api_fuzz_smoke_assert( 1 === preg_match( '/^[0-9a-f]{7,}$/', $git_metadata['commit'] ?? '' ), 'git metadata should include a full hex commit hash.' );
+	html_api_fuzz_smoke_assert( ( $git_metadata['commit'] ?? null ) === ( $worker_replay['repoCommit'] ?? null ), 'replay should persist the current commit hash.' );
+	html_api_fuzz_smoke_assert( ( $git_metadata['dirty'] ?? null ) === ( $worker_replay['repoDirty'] ?? null ), 'replay should persist the tracked-file dirty flag.' );
+}
+$ancestor_repo = $tmp . '/ancestor-repo';
+$ancestor_child = $ancestor_repo . '/child';
+\HtmlApiFuzz\ensure_dir( $ancestor_child );
+$ancestor_init = \HtmlApiFuzz\run_git_command( array( 'init' ), 1000, $ancestor_repo );
+if ( 0 === $ancestor_init['code'] ) {
+	$ancestor_child_metadata = \HtmlApiFuzz\git_metadata( 1000, $ancestor_child );
+	html_api_fuzz_smoke_assert( false === ( $ancestor_child_metadata['available'] ?? null ), 'git metadata should not report an ancestor repository as the current repo.' );
+}
+$dirty_repo = $tmp . '/dirty-repo';
+\HtmlApiFuzz\ensure_dir( $dirty_repo );
+$dirty_init = \HtmlApiFuzz\run_git_command( array( 'init' ), 1000, $dirty_repo );
+if ( 0 === $dirty_init['code'] ) {
+	file_put_contents( $dirty_repo . '/tracked.txt', "clean\n" );
+	$dirty_add = \HtmlApiFuzz\run_git_command( array( 'add', 'tracked.txt' ), 1000, $dirty_repo );
+	$dirty_commit = \HtmlApiFuzz\run_git_command(
+		array(
+			'-c',
+			'user.email=html-api-fuzz@example.invalid',
+			'-c',
+			'user.name=HTML API Fuzz',
+			'-c',
+			'commit.gpgsign=false',
+			'commit',
+			'--no-gpg-sign',
+			'--no-verify',
+			'-m',
+			'initial',
+		),
+		1000,
+		$dirty_repo
+	);
+	html_api_fuzz_smoke_assert( 0 === $dirty_add['code'], 'temp git repo should stage the tracked dirty fixture.' );
+	html_api_fuzz_smoke_assert( 0 === $dirty_commit['code'], 'temp git repo should commit the tracked dirty fixture.' );
+	$clean_repo_metadata = \HtmlApiFuzz\git_metadata( 1000, $dirty_repo, false );
+	html_api_fuzz_smoke_assert( true === ( $clean_repo_metadata['available'] ?? null ), 'temp git repo metadata should be available.' );
+	html_api_fuzz_smoke_assert( false === ( $clean_repo_metadata['dirty'] ?? null ), 'clean tracked temp git repo should report dirty false.' );
+	file_put_contents( $dirty_repo . '/tracked.txt', "dirty\n" );
+	$dirty_repo_metadata = \HtmlApiFuzz\git_metadata( 1000, $dirty_repo, false );
+	html_api_fuzz_smoke_assert( true === ( $dirty_repo_metadata['dirty'] ?? null ), 'modified tracked temp git repo should report dirty true.' );
+}
+$fake_git_root = $tmp . '/fake-git-root';
+$fake_git_bin = $tmp . '/fake-git-bin';
+\HtmlApiFuzz\ensure_dir( $fake_git_root );
+\HtmlApiFuzz\ensure_dir( $fake_git_bin );
+$fake_git = $fake_git_bin . '/git';
+file_put_contents(
+	$fake_git,
+	"#!/bin/sh\n" .
+	"if [ \"\$1\" = \"-C\" ]; then root=\"\$2\"; shift 2; else root=\"\$PWD\"; fi\n" .
+	"if [ \"\$1\" = \"rev-parse\" ] && [ \"\$2\" = \"--show-toplevel\" ]; then printf '%s\\n' \"\$root\"; exit 0; fi\n" .
+	"if [ \"\$1\" = \"rev-parse\" ] && [ \"\$2\" = \"HEAD\" ]; then printf '%s\\n' abcdef1234567890abcdef1234567890abcdef12; exit 0; fi\n" .
+	"if [ \"\$1\" = \"rev-parse\" ] && [ \"\$2\" = \"--short=12\" ]; then printf '%s\\n' abcdef123456; exit 0; fi\n" .
+	"if [ \"\$1\" = \"branch\" ] && [ \"\$2\" = \"--show-current\" ]; then printf '%s\\n' main; exit 0; fi\n" .
+	"if [ \"\$1\" = \"show\" ]; then printf '%s\\n' 2026-01-01T00:00:00+00:00; exit 0; fi\n" .
+	"if [ \"\$1\" = \"diff\" ]; then exit 2; fi\n" .
+	"exit 1\n"
+);
+chmod( $fake_git, 0755 );
+$old_path = getenv( 'PATH' );
+putenv( 'PATH=' . $fake_git_bin . PATH_SEPARATOR . ( false === $old_path ? '' : $old_path ) );
+$unknown_dirty_metadata = \HtmlApiFuzz\git_metadata( 1000, $fake_git_root, false );
+if ( false === $old_path ) {
+	putenv( 'PATH' );
+} else {
+	putenv( 'PATH=' . $old_path );
+}
+html_api_fuzz_smoke_assert( true === ( $unknown_dirty_metadata['available'] ?? null ), 'git metadata should remain available when only dirty detection fails.' );
+html_api_fuzz_smoke_assert( array_key_exists( 'dirty', $unknown_dirty_metadata ) && null === $unknown_dirty_metadata['dirty'], 'dirty detection failures should report dirty null.' );
+
+$replay_cli_dir = $tmp . '/replay-cli';
+$replay_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/replay.php',
+		'--replay',
+		$worker_dir . '/replay.json',
+		'--output-dir',
+		$replay_cli_dir,
+	),
+	\HtmlApiFuzz\repo_root(),
+	10000,
+	$tmp . '/replay-cli.log'
+);
+html_api_fuzz_smoke_assert( ! $replay_proc['timedOut'] && in_array( $replay_proc['code'], array( 0, 2 ), true ), 'replay CLI should complete.' );
+$replay_cli_replay = \HtmlApiFuzz\read_json_file( $replay_cli_dir . '/replay.json' );
+html_api_fuzz_smoke_assert( null === ( $replay_cli_replay['generator'] ?? null ), 'replay CLI output should not invent immediate generator metadata.' );
+html_api_fuzz_smoke_assert( 'input-file' === ( $replay_cli_replay['inputSource'] ?? null ), 'replay CLI output should record immediate input source.' );
+html_api_fuzz_smoke_assert( ( $worker_replay['generator'] ?? null ) === ( $replay_cli_replay['originalGenerator'] ?? null ), 'replay CLI output should preserve original generator metadata.' );
+html_api_fuzz_smoke_assert( ( $worker_replay['repoCommit'] ?? null ) === ( $replay_cli_replay['sourceReplay']['repoCommit'] ?? null ), 'replay CLI output should preserve source replay commit metadata.' );
+html_api_fuzz_smoke_assert( ( $worker_replay['repoDirty'] ?? null ) === ( $replay_cli_replay['sourceReplay']['repoDirty'] ?? null ), 'replay CLI output should preserve source replay dirty metadata.' );
+
+$legacy_replay = $worker_replay;
+$legacy_replay['payloadPolicy'] = 'replay';
+if ( isset( $legacy_replay['generator']['payloadPolicy'] ) ) {
+	$legacy_replay['generator']['payloadPolicy'] = 'replay';
+}
+$legacy_replay_path = $tmp . '/legacy-payload-policy-replay.json';
+\HtmlApiFuzz\write_json_file( $legacy_replay_path, $legacy_replay );
+$legacy_replay_dir = $tmp . '/legacy-replay-cli';
+$legacy_replay_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/replay.php',
+		'--replay',
+		$legacy_replay_path,
+		'--output-dir',
+		$legacy_replay_dir,
+	),
+	\HtmlApiFuzz\repo_root(),
+	10000,
+	$tmp . '/legacy-replay-cli.log'
+);
+html_api_fuzz_smoke_assert( ! $legacy_replay_proc['timedOut'] && in_array( $legacy_replay_proc['code'], array( 0, 2 ), true ), 'legacy replay payload policy labels should not make replay fatal.' );
+$legacy_replay_cli_replay = \HtmlApiFuzz\read_json_file( $legacy_replay_dir . '/replay.json' );
+html_api_fuzz_smoke_assert( null === ( $legacy_replay_cli_replay['payloadPolicy'] ?? null ), 'legacy replay payload policy labels should be treated as unlabeled direct input.' );
+html_api_fuzz_smoke_assert( 'replay' === ( $legacy_replay_cli_replay['originalGenerator']['payloadPolicy'] ?? null ), 'legacy replay should preserve original generator metadata.' );
+
+$legacy_invalid_replay = $worker_replay;
+$legacy_invalid_replay['payloadPolicy'] = 'invalid-byte-heavy';
+if ( isset( $legacy_invalid_replay['generator']['payloadPolicy'] ) ) {
+	$legacy_invalid_replay['generator']['payloadPolicy'] = 'invalid-byte-heavy';
+}
+$legacy_invalid_replay_path = $tmp . '/legacy-invalid-payload-policy-replay.json';
+\HtmlApiFuzz\write_json_file( $legacy_invalid_replay_path, $legacy_invalid_replay );
+$legacy_invalid_replay_dir = $tmp . '/legacy-invalid-replay-cli';
+$legacy_invalid_replay_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/replay.php',
+		'--replay',
+		$legacy_invalid_replay_path,
+		'--output-dir',
+		$legacy_invalid_replay_dir,
+	),
+	\HtmlApiFuzz\repo_root(),
+	10000,
+	$tmp . '/legacy-invalid-replay-cli.log'
+);
+html_api_fuzz_smoke_assert( ! $legacy_invalid_replay_proc['timedOut'] && in_array( $legacy_invalid_replay_proc['code'], array( 0, 2 ), true ), 'legacy invalid-byte-heavy replay payload policy label should not make replay fatal.' );
+$legacy_invalid_replay_cli_replay = \HtmlApiFuzz\read_json_file( $legacy_invalid_replay_dir . '/replay.json' );
+html_api_fuzz_smoke_assert( 'invalid-byte-heavy' === ( $legacy_invalid_replay_cli_replay['payloadPolicy'] ?? null ), 'legacy invalid-byte-heavy replay payload policy label should be preserved as direct-input metadata.' );
+html_api_fuzz_smoke_assert( 'invalid-byte-heavy' === ( $legacy_invalid_replay_cli_replay['originalGenerator']['payloadPolicy'] ?? null ), 'legacy invalid-byte-heavy replay should preserve original generator metadata.' );
+
+$invalid_byte_replay_source_dir = $tmp . '/invalid-byte-replay-source';
+$invalid_byte_replay_source = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( '<p>' . str_repeat( 'a', 220 ) . "\xC0" . '</p>' ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'payload-policy'  => 'invalid-byte-heavy',
+		'output-dir'      => $invalid_byte_replay_source_dir,
+		'max-tokens'      => '2000',
+		'max-nodes'       => '3000',
+	)
+);
+html_api_fuzz_smoke_assert( 'encoding-mismatch' === ( $invalid_byte_replay_source['failureClass'] ?? null ), 'invalid-byte replay fixture should be a real invalid-byte failure.' );
+
+$invalid_byte_replay_cli_dir = $tmp . '/invalid-byte-replay-cli';
+$invalid_byte_replay_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/replay.php',
+		'--replay',
+		$invalid_byte_replay_source_dir . '/replay.json',
+		'--output-dir',
+		$invalid_byte_replay_cli_dir,
+		'--timeout-ms',
+		'10000',
+	),
+	\HtmlApiFuzz\repo_root(),
+	10000,
+	$tmp . '/invalid-byte-replay-cli.log'
+);
+html_api_fuzz_smoke_assert( ! $invalid_byte_replay_proc['timedOut'] && 2 === $invalid_byte_replay_proc['code'], 'real invalid-byte replay should complete as a replayed failure.' );
+$invalid_byte_replay_cli_replay = \HtmlApiFuzz\read_json_file( $invalid_byte_replay_cli_dir . '/replay.json' );
+html_api_fuzz_smoke_assert( 'invalid-byte-heavy' === ( $invalid_byte_replay_cli_replay['payloadPolicy'] ?? null ), 'real invalid-byte replay should preserve legacy payload policy metadata.' );
+
+$invalid_byte_exact_minimize_dir = $tmp . '/invalid-byte-exact-minimize';
+$invalid_byte_exact_minimize_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/minimize.php',
+		'--replay',
+		$invalid_byte_replay_source_dir . '/replay.json',
+		'--output-dir',
+		$invalid_byte_exact_minimize_dir,
+		'--max-attempts',
+		'1',
+		'--timeout-ms',
+		'10000',
+	),
+	\HtmlApiFuzz\repo_root(),
+	20000,
+	$tmp . '/invalid-byte-exact-minimize.log'
+);
+html_api_fuzz_smoke_assert( ! $invalid_byte_exact_minimize_proc['timedOut'] && 0 === $invalid_byte_exact_minimize_proc['code'], 'exact-signature invalid-byte replay should minimize.' );
+$invalid_byte_exact_minimize_result = \HtmlApiFuzz\read_json_file( $invalid_byte_exact_minimize_dir . '/minimize-result.json' );
+html_api_fuzz_smoke_assert( true === ( $invalid_byte_exact_minimize_result['ok'] ?? null ), 'exact-signature invalid-byte minimization should preserve the target signature.' );
+html_api_fuzz_smoke_assert( 'process' === ( $invalid_byte_exact_minimize_result['probeMode'] ?? null ), 'auto exact-signature minimization should use timeout-enforced process probes by default.' );
+html_api_fuzz_smoke_assert( true === ( $invalid_byte_exact_minimize_result['candidateArtifactsRetained'] ?? null ), 'auto process minimization should report retained candidate artifacts.' );
+html_api_fuzz_smoke_assert( 1 === ( $invalid_byte_exact_minimize_result['attempts'] ?? null ), 'exact-signature smoke minimization should run the requested single probe.' );
+html_api_fuzz_smoke_assert( is_array( $invalid_byte_exact_minimize_result['probeTiming'] ?? null ), 'exact-signature minimization should report probe timing.' );
+html_api_fuzz_smoke_assert( is_dir( $invalid_byte_exact_minimize_dir . '/candidates' ), 'auto process minimization should retain per-candidate artifact directories.' );
+
+$invalid_byte_in_process_minimize_dir = $tmp . '/invalid-byte-in-process-minimize';
+$invalid_byte_in_process_minimize_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/minimize.php',
+		'--replay',
+		$invalid_byte_replay_source_dir . '/replay.json',
+		'--output-dir',
+		$invalid_byte_in_process_minimize_dir,
+		'--probe-mode',
+		'in-process',
+		'--max-attempts',
+		'1',
+		'--timeout-ms',
+		'10000',
+	),
+	\HtmlApiFuzz\repo_root(),
+	20000,
+	$tmp . '/invalid-byte-in-process-minimize.log'
+);
+html_api_fuzz_smoke_assert( ! $invalid_byte_in_process_minimize_proc['timedOut'] && 0 === $invalid_byte_in_process_minimize_proc['code'], 'explicit in-process invalid-byte replay should minimize.' );
+$invalid_byte_in_process_minimize_result = \HtmlApiFuzz\read_json_file( $invalid_byte_in_process_minimize_dir . '/minimize-result.json' );
+html_api_fuzz_smoke_assert( true === ( $invalid_byte_in_process_minimize_result['ok'] ?? null ), 'explicit in-process minimization should preserve the target signature.' );
+html_api_fuzz_smoke_assert( 'in-process' === ( $invalid_byte_in_process_minimize_result['probeMode'] ?? null ), 'explicit in-process minimization should use in-process probes.' );
+html_api_fuzz_smoke_assert( false === ( $invalid_byte_in_process_minimize_result['candidateArtifactsRetained'] ?? null ), 'in-process minimization should not retain candidate artifacts by default.' );
+html_api_fuzz_smoke_assert( ! is_dir( $invalid_byte_in_process_minimize_dir . '/candidates' ), 'in-process minimization should avoid per-candidate artifact directories by default.' );
+
+$invalid_byte_minimize_dir = $tmp . '/invalid-byte-minimize';
+$invalid_byte_minimize_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/minimize.php',
+		'--replay',
+		$invalid_byte_replay_source_dir . '/replay.json',
+		'--output-dir',
+		$invalid_byte_minimize_dir,
+		'--any-failure',
+		'--max-attempts',
+		'1',
+		'--timeout-ms',
+		'10000',
+	),
+	\HtmlApiFuzz\repo_root(),
+	20000,
+	$tmp . '/invalid-byte-minimize.log'
+);
+html_api_fuzz_smoke_assert( ! $invalid_byte_minimize_proc['timedOut'] && 0 === $invalid_byte_minimize_proc['code'], 'real invalid-byte replay should remain minimizable.' );
+$invalid_byte_minimize_result = \HtmlApiFuzz\read_json_file( $invalid_byte_minimize_dir . '/minimize-result.json' );
+$invalid_byte_minimize_replay = \HtmlApiFuzz\read_json_file( $invalid_byte_minimize_result['minimizedReplay'] ?? '' );
+$invalid_byte_source_replay = \HtmlApiFuzz\read_json_file( $invalid_byte_replay_source_dir . '/replay.json' );
+html_api_fuzz_smoke_assert( 'process' === ( $invalid_byte_minimize_result['probeMode'] ?? null ), 'any-failure minimization should use process probes by default.' );
+html_api_fuzz_smoke_assert( true === ( $invalid_byte_minimize_result['candidateArtifactsRetained'] ?? null ), 'process-mode minimization should report retained candidate artifacts.' );
+html_api_fuzz_smoke_assert( is_dir( $invalid_byte_minimize_dir . '/candidates' ), 'process-mode minimization should retain per-candidate artifact directories.' );
+html_api_fuzz_smoke_assert( 'invalid-byte-heavy' === ( $invalid_byte_minimize_result['payloadPolicy'] ?? null ), 'invalid-byte minimization should preserve legacy payload policy metadata.' );
+html_api_fuzz_smoke_assert( ( $invalid_byte_source_replay['repoCommit'] ?? null ) === ( $invalid_byte_minimize_result['sourceReplay']['repoCommit'] ?? null ), 'minimize result should preserve source replay commit metadata.' );
+html_api_fuzz_smoke_assert( ( $invalid_byte_source_replay['repoDirty'] ?? null ) === ( $invalid_byte_minimize_result['sourceReplay']['repoDirty'] ?? null ), 'minimize result should preserve source replay dirty metadata.' );
+html_api_fuzz_smoke_assert( ( $invalid_byte_source_replay['repoCommit'] ?? null ) === ( $invalid_byte_minimize_replay['sourceReplay']['repoCommit'] ?? null ), 'minimized replay should preserve source replay commit metadata.' );
+html_api_fuzz_smoke_assert( ( $invalid_byte_source_replay['repoDirty'] ?? null ) === ( $invalid_byte_minimize_replay['sourceReplay']['repoDirty'] ?? null ), 'minimized replay should preserve source replay dirty metadata.' );
+
+$bad_runner_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/runner.php',
+		'--payload-policy',
+		'valid-ut8',
+		'--max-seeds',
+		'1',
+		'--output-dir',
+		$tmp . '/bad-runner',
+	),
+	\HtmlApiFuzz\repo_root(),
+	5000,
+	$tmp . '/bad-runner.log'
+);
+html_api_fuzz_smoke_assert( 0 !== $bad_runner_proc['code'], 'runner CLI should reject invalid payload policy before starting workers.' );
+
+$legacy_invalid_runner_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/runner.php',
+		'--payload-policy',
+		'invalid-byte-heavy',
+		'--max-seeds',
+		'1',
+		'--output-dir',
+		$tmp . '/legacy-invalid-runner',
+	),
+	\HtmlApiFuzz\repo_root(),
+	5000,
+	$tmp . '/legacy-invalid-runner.log'
+);
+html_api_fuzz_smoke_assert( 0 !== $legacy_invalid_runner_proc['code'], 'runner CLI should reject legacy invalid-byte-heavy generation policy.' );
+
+$legacy_invalid_launcher_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/launcher.php',
+		'--payload-policy',
+		'invalid-byte-heavy',
+		'--max-seeds',
+		'1',
+		'--duration-seconds',
+		'0',
+		'--output-dir',
+		$tmp . '/legacy-invalid-launcher',
+	),
+	\HtmlApiFuzz\repo_root(),
+	5000,
+	$tmp . '/legacy-invalid-launcher.log'
+);
+html_api_fuzz_smoke_assert( 0 !== $legacy_invalid_launcher_proc['code'], 'launcher CLI should reject legacy invalid-byte-heavy generation policy.' );
+
+$metadata_runner_dir = $tmp . '/metadata-runner';
+$metadata_runner_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/runner.php',
+		'--max-seeds',
+		'1',
+		'--duration-seconds',
+		'0',
+		// Passing seed directories are pruned by default; this run asserts on
+		// the on-disk replay document.
+		'--keep-all-artifacts',
+		'--output-dir',
+		$metadata_runner_dir,
+	),
+	\HtmlApiFuzz\repo_root(),
+	15000,
+	$tmp . '/metadata-runner.log'
+);
+html_api_fuzz_smoke_assert( ! $metadata_runner_proc['timedOut'] && 0 === $metadata_runner_proc['code'], 'runner metadata smoke run should complete.' );
+$metadata_runner_state = \HtmlApiFuzz\read_json_file( $metadata_runner_dir . '/state.json' );
+html_api_fuzz_smoke_assert( 'html-api-fuzz-runner-state' === ( $metadata_runner_state['kind'] ?? null ), 'runner state should be written.' );
+html_api_fuzz_smoke_assert( is_array( $metadata_runner_state['git'] ?? null ), 'runner state should include compact git metadata.' );
+$metadata_runner_events = \HtmlApiFuzz\read_ndjson_records( $metadata_runner_dir . '/events.ndjson' );
+html_api_fuzz_smoke_assert( is_array( $metadata_runner_events[0]['git'] ?? null ), 'runner start event should include compact git metadata.' );
+$metadata_runner_replay = \HtmlApiFuzz\read_json_file( $metadata_runner_dir . '/seed-1/primary/replay.json' );
+if ( $git_metadata['available'] ?? false ) {
+	html_api_fuzz_smoke_assert( $git_metadata['commit'] === ( $metadata_runner_state['git']['commit'] ?? null ), 'runner state git metadata should match the current commit.' );
+	html_api_fuzz_smoke_assert( $git_metadata['commit'] === ( $metadata_runner_events[0]['git']['commit'] ?? null ), 'runner start event git metadata should match the current commit.' );
+	html_api_fuzz_smoke_assert( $metadata_runner_state['git']['commit'] === ( $metadata_runner_replay['repoCommit'] ?? null ), 'runner worker replay should use runner-provided git metadata.' );
+	html_api_fuzz_smoke_assert( $metadata_runner_state['git']['dirty'] === ( $metadata_runner_replay['repoDirty'] ?? null ), 'runner worker replay should use runner-provided dirty metadata.' );
+}
+
+$metadata_launcher_dir = $tmp . '/metadata-launcher';
+$metadata_launcher_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/launcher.php',
+		'--lanes',
+		'1',
+		'--max-seeds',
+		'1',
+		'--duration-seconds',
+		'0',
+		// Passing seed directories are pruned by default; this run asserts on
+		// the on-disk replay document.
+		'--keep-all-artifacts',
+		// Non-default value pins the launcher-to-lane flag passthrough.
+		'--max-keep-per-signature',
+		'3',
+		'--output-dir',
+		$metadata_launcher_dir,
+	),
+	\HtmlApiFuzz\repo_root(),
+	20000,
+	$tmp . '/metadata-launcher.log'
+);
+html_api_fuzz_smoke_assert( ! $metadata_launcher_proc['timedOut'] && 0 === $metadata_launcher_proc['code'], 'launcher metadata smoke run should complete.' );
+$metadata_launcher_state = \HtmlApiFuzz\read_json_file( $metadata_launcher_dir . '/launcher-state.json' );
+html_api_fuzz_smoke_assert( 'html-api-fuzz-launcher-state' === ( $metadata_launcher_state['kind'] ?? null ), 'launcher state should be written.' );
+html_api_fuzz_smoke_assert( is_array( $metadata_launcher_state['git'] ?? null ), 'launcher state should include compact git metadata.' );
+$metadata_launcher_events = \HtmlApiFuzz\read_ndjson_records( $metadata_launcher_dir . '/events.ndjson' );
+html_api_fuzz_smoke_assert( is_array( $metadata_launcher_events[0]['git'] ?? null ), 'launcher start event should include compact git metadata.' );
+$metadata_launcher_lane_state = \HtmlApiFuzz\read_json_file( $metadata_launcher_dir . '/lane-00/state.json' );
+html_api_fuzz_smoke_assert( 3 === ( $metadata_launcher_lane_state['maxKeepPerSignature'] ?? null ), 'launcher should pass --max-keep-per-signature through to lanes.' );
+$metadata_launcher_replay = \HtmlApiFuzz\read_json_file( $metadata_launcher_dir . '/lane-00/seed-1/primary/replay.json' );
+if ( $git_metadata['available'] ?? false ) {
+	html_api_fuzz_smoke_assert( $git_metadata['commit'] === ( $metadata_launcher_state['git']['commit'] ?? null ), 'launcher state git metadata should match the current commit.' );
+	html_api_fuzz_smoke_assert( $git_metadata['commit'] === ( $metadata_launcher_events[0]['git']['commit'] ?? null ), 'launcher start event git metadata should match the current commit.' );
+	html_api_fuzz_smoke_assert( $metadata_launcher_state['git']['commit'] === ( $metadata_launcher_replay['repoCommit'] ?? null ), 'launcher worker replay should use launcher-provided git metadata.' );
+	html_api_fuzz_smoke_assert( $metadata_launcher_state['git']['dirty'] === ( $metadata_launcher_replay['repoDirty'] ?? null ), 'launcher worker replay should use launcher-provided dirty metadata.' );
+}
+
+$launcher_oracle_watcher_dir = $tmp . '/launcher-oracle-watcher';
+$launcher_oracle_watcher_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/launcher.php',
+		'--lanes',
+		'1',
+		'--max-seeds',
+		'1',
+		'--duration-seconds',
+		'0',
+		'--watcher',
+		'--no-minimize',
+		'--triage-oracle-findings',
+		'--output-dir',
+		$launcher_oracle_watcher_dir,
+	),
+	\HtmlApiFuzz\repo_root(),
+	30000,
+	$tmp . '/launcher-oracle-watcher.log'
+);
+html_api_fuzz_smoke_assert( ! $launcher_oracle_watcher_proc['timedOut'] && 0 === $launcher_oracle_watcher_proc['code'], 'launcher oracle watcher passthrough run should complete.' );
+$launcher_oracle_watcher = json_decode( $launcher_oracle_watcher_proc['stdout'], true );
+html_api_fuzz_smoke_assert( 0 === ( $launcher_oracle_watcher['watcherResult']['code'] ?? null ), 'launcher oracle watcher should exit cleanly.' );
+$launcher_oracle_watcher_log = trim( (string) file_get_contents( $launcher_oracle_watcher['watcherResult']['logPath'] ?? '' ) );
+$launcher_oracle_watcher_scan = json_decode( $launcher_oracle_watcher_log, true );
+html_api_fuzz_smoke_assert( true === ( $launcher_oracle_watcher_scan['triageOracleFindings'] ?? null ), 'launcher should pass --triage-oracle-findings through to watcher.' );
+
+$bad_stride_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/runner.php',
+		'--seed-stride',
+		'0',
+		'--max-seeds',
+		'1',
+		'--output-dir',
+		$tmp . '/bad-stride-runner',
+	),
+	\HtmlApiFuzz\repo_root(),
+	5000,
+	$tmp . '/bad-stride-runner.log'
+);
+html_api_fuzz_smoke_assert( 0 !== $bad_stride_proc['code'], 'runner CLI should reject non-positive seed strides before starting workers.' );
+
+$unlabeled_dir = $tmp . '/unlabeled-direct';
+$unlabeled_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64' => base64_encode( '<p>x</p>' ),
+		'profile'      => 'replay',
+		'mode'         => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'output-dir'   => $unlabeled_dir,
+	)
+);
+$unlabeled_replay = \HtmlApiFuzz\read_json_file( $unlabeled_dir . '/replay.json' );
+html_api_fuzz_smoke_assert( null === ( $unlabeled_result['payloadPolicy'] ?? null ), 'unlabeled direct input result should leave payloadPolicy null.' );
+html_api_fuzz_smoke_assert( null === ( $unlabeled_replay['payloadPolicy'] ?? null ), 'unlabeled direct input replay should leave payloadPolicy null.' );
+html_api_fuzz_smoke_assert( null === ( $unlabeled_result['generator'] ?? null ), 'unlabeled direct input should not invent generator metadata.' );
+html_api_fuzz_smoke_assert( 'input-base64' === ( $unlabeled_replay['inputSource'] ?? null ), 'unlabeled direct input replay should record inputSource.' );
+html_api_fuzz_smoke_assert( 'idempotent' === ( $unlabeled_result['tagProcessor']['normalize']['status'] ?? null ), 'worker result should persist normalize() idempotence metadata.' );
+
+$normalize_idempotent = \HtmlApiFuzz\TagInvariants::check( '<p a=1 a=2>One&nbsp</p>', array( 'maxTokens' => 2000 ) );
+html_api_fuzz_smoke_assert( true === ( $normalize_idempotent['ok'] ?? null ), 'normalizable input should pass tag invariants.' );
+html_api_fuzz_smoke_assert( 'idempotent' === ( $normalize_idempotent['normalize']['status'] ?? null ), 'normalizable input should record an idempotent normalize() status.' );
+html_api_fuzz_smoke_assert( is_string( $normalize_idempotent['normalize']['normalizedSha1'] ?? null ), 'idempotent normalize() metadata should include the normalized hash.' );
+
+$normalize_unsupported = \HtmlApiFuzz\TagInvariants::check( '<A><I><A>', array( 'maxTokens' => 2000 ) );
+html_api_fuzz_smoke_assert( true === ( $normalize_unsupported['ok'] ?? null ), 'unsupported normalize() input should not fail unrelated tag invariants.' );
+html_api_fuzz_smoke_assert( 'unsupported' === ( $normalize_unsupported['normalize']['status'] ?? null ), 'unsupported normalize() input should be recorded without an idempotence failure.' );
+
+$normalize_not_idempotent = \HtmlApiFuzz\TagInvariants::check( '<svg xlink:href href></svg>', array( 'maxTokens' => 2000 ) );
+html_api_fuzz_smoke_assert( true === ( $normalize_not_idempotent['ok'] ?? null ), 'normalize() metadata should not fail unrelated tag invariants directly.' );
+if ( false === ( $normalize_not_idempotent['normalize']['ok'] ?? true ) ) {
+	html_api_fuzz_smoke_assert( 'normalize-not-idempotent' === ( $normalize_not_idempotent['normalize']['failure']['name'] ?? null ), 'non-idempotent normalize() input should report the normalize-not-idempotent invariant.' );
+	html_api_fuzz_smoke_assert( 'failed' === ( $normalize_not_idempotent['normalize']['status'] ?? null ), 'non-idempotent normalize() input should record failed normalize() metadata.' );
+	html_api_fuzz_smoke_assert( is_int( $normalize_not_idempotent['normalize']['firstDifference']['firstByteOffset'] ?? null ), 'non-idempotent normalize() metadata should include a first byte difference.' );
+}
+
+$resource_dir = $tmp . '/resource-limit';
+$resource_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( str_repeat( '<span>', 12 ) ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'payload-policy'  => 'ascii-structural',
+		'output-dir'      => $resource_dir,
+		'max-tokens'      => '1',
+		'max-nodes'       => '100',
+	)
+);
+html_api_fuzz_smoke_assert( 'resource-limit' === ( $resource_result['failureClass'] ?? null ), 'token ceilings should be bucketed as resource-limit.' );
+html_api_fuzz_smoke_assert( 'resource-limit' === ( $resource_result['status'] ?? null ), 'token ceilings should use resource-limit status.' );
+html_api_fuzz_smoke_assert( in_array( 'tag-token-limit-exceeded', $resource_result['signature']['facts']['limitFailures'] ?? array(), true ), 'resource-limit signature should include concrete limit failure names.' );
+html_api_fuzz_smoke_assert( 'input-base64' === ( $resource_result['inputSource'] ?? null ), 'provided base64 input should record inputSource.' );
+html_api_fuzz_smoke_assert( null === ( $resource_result['generator'] ?? null ), 'provided input should not invent generator metadata.' );
+
+$normalize_resource_dir = $tmp . '/normalize-resource-limit';
+$normalize_resource_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( '<svg xlink:href href></svg>' ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'payload-policy'  => 'ascii-structural',
+		'output-dir'      => $normalize_resource_dir,
+		'max-tokens'      => '1',
+		'max-nodes'       => '100',
+	)
+);
+html_api_fuzz_smoke_assert( 'resource-limit' === ( $normalize_resource_result['failureClass'] ?? null ), 'tag token ceilings should not be masked by normalize() failures.' );
+html_api_fuzz_smoke_assert( 'resource-limit' === ( $normalize_resource_result['status'] ?? null ), 'tag token ceilings should retain resource-limit status when normalize() would otherwise fail.' );
+html_api_fuzz_smoke_assert( 'skipped-resource-limit' === ( $normalize_resource_result['tagProcessor']['normalize']['status'] ?? null ), 'normalize() idempotence should be skipped after tag resource limits.' );
+html_api_fuzz_smoke_assert( in_array( 'tag-token-limit-exceeded', $normalize_resource_result['signature']['facts']['limitFailures'] ?? array(), true ), 'resource-limit signature should still include tag token limit failures when normalize() is skipped.' );
+
+$dom_resource_dir = $tmp . '/dom-resource-limit';
+$dom_resource_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( '<p>x</p>' ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'payload-policy'  => 'ascii-structural',
+		'output-dir'      => $dom_resource_dir,
+		'max-tokens'      => '100',
+		'max-nodes'       => '1',
+	)
+);
+html_api_fuzz_smoke_assert( 'resource-limit' === ( $dom_resource_result['failureClass'] ?? null ), 'DOM node ceilings should be bucketed as resource-limit.' );
+html_api_fuzz_smoke_assert( 'resource-limit' === ( $dom_resource_result['status'] ?? null ), 'DOM node ceilings should use resource-limit status.' );
+html_api_fuzz_smoke_assert( 'node-limit-exceeded' === ( $dom_resource_result['dom']['failureClass'] ?? null ), 'DOM result should preserve the concrete node limit failure.' );
+html_api_fuzz_smoke_assert( in_array( 'dom-node-limit-exceeded', $dom_resource_result['signature']['facts']['limitFailures'] ?? array(), true ), 'resource-limit signature should include DOM node limit failures.' );
+
+$dom_template_context_dir = $tmp . '/dom-template-context-sensitive-col';
+$dom_template_context_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( '<html><template><col>' ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT,
+		'payload-policy'  => 'ascii-structural',
+		'output-dir'      => $dom_template_context_dir,
+		'max-tokens'      => '100',
+		'max-nodes'       => '100',
+	)
+);
+html_api_fuzz_smoke_assert( true === ( $dom_template_context_result['ok'] ?? null ), 'DOM template table-sensitive fallback should not be a failing tree mismatch.' );
+html_api_fuzz_smoke_assert( 'oracle-unsupported' === ( $dom_template_context_result['status'] ?? null ), 'DOM template table-sensitive fallback should be quarantined as oracle-unsupported.' );
+html_api_fuzz_smoke_assert( 'oracle-unsupported' === ( $dom_template_context_result['failureClass'] ?? null ), 'DOM template table-sensitive fallback should preserve the oracle-unsupported failure class.' );
+html_api_fuzz_smoke_assert( 'unsupported' === ( $dom_template_context_result['dom']['status'] ?? null ), 'DOM template table-sensitive fallback should preserve the DOM unsupported status.' );
+html_api_fuzz_smoke_assert( null === ( $dom_template_context_result['comparison'] ?? null ), 'DOM template table-sensitive fallback should not compare a lossy DOM tree.' );
+html_api_fuzz_smoke_assert( null === ( $dom_template_context_result['signature'] ?? null ), 'DOM template table-sensitive fallback should not produce a fuzz signature.' );
+html_api_fuzz_smoke_assert( 'oracle-limitation' === ( $dom_template_context_result['oracleFinding']['classification'] ?? null ), 'DOM template table-sensitive fallback should produce an oracle-limitation finding.' );
+html_api_fuzz_smoke_assert( 'dom-template-context-unsupported' === ( $dom_template_context_result['oracleFinding']['type'] ?? null ), 'DOM template table-sensitive fallback should identify the oracle finding type.' );
+html_api_fuzz_smoke_assert( str_starts_with( (string) ( $dom_template_context_result['oracleFinding']['signature']['hash'] ?? '' ), 'oracle-' ), 'DOM template table-sensitive fallback should produce an oracle finding signature.' );
+$dom_template_context_wp_tree = file_get_contents( $dom_template_context_result['wordpress']['treePath'] ?? '' );
+html_api_fuzz_smoke_assert( false !== $dom_template_context_wp_tree && false !== strpos( $dom_template_context_wp_tree, "        <col>\n" ), 'DOM template context regression should exercise WordPress <col> preservation.' );
+
+$dom_nested_template_context_dir = $tmp . '/dom-nested-template-context-sensitive-col';
+$dom_nested_template_context_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( '<body><template><col><template>x</template></template></body>' ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT,
+		'payload-policy'  => 'ascii-structural',
+		'output-dir'      => $dom_nested_template_context_dir,
+		'max-tokens'      => '200',
+		'max-nodes'       => '200',
+	)
+);
+html_api_fuzz_smoke_assert( true === ( $dom_nested_template_context_result['ok'] ?? null ), 'Nested DOM template table-sensitive fallback should not be a failing tree mismatch.' );
+html_api_fuzz_smoke_assert( 'oracle-unsupported' === ( $dom_nested_template_context_result['status'] ?? null ), 'Nested DOM template table-sensitive fallback should be quarantined as oracle-unsupported.' );
+html_api_fuzz_smoke_assert( null === ( $dom_nested_template_context_result['comparison'] ?? null ), 'Nested DOM template table-sensitive fallback should not compare a lossy DOM tree.' );
+html_api_fuzz_smoke_assert( null === ( $dom_nested_template_context_result['signature'] ?? null ), 'Nested DOM template table-sensitive fallback should not produce a fuzz signature.' );
+
+/*
+ * Template content that round-trips faithfully through body-context fragment
+ * parsing is rendered and counted: node ceilings still apply inside template
+ * content and must win over any other classification.
+ */
+$dom_template_context_resource_dir = $tmp . '/dom-template-context-resource-limit';
+$dom_template_context_resource_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( '<template><i>a</i><i>b</i></template>' ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT,
+		'payload-policy'  => 'ascii-structural',
+		'output-dir'      => $dom_template_context_resource_dir,
+		'max-tokens'      => '200',
+		'max-nodes'       => '3',
+	)
+);
+html_api_fuzz_smoke_assert( false === ( $dom_template_context_resource_result['ok'] ?? null ), 'DOM template rendering should not mask DOM node ceilings.' );
+html_api_fuzz_smoke_assert( 'resource-limit' === ( $dom_template_context_resource_result['status'] ?? null ), 'DOM template node ceilings should use resource-limit status.' );
+html_api_fuzz_smoke_assert( 'resource-limit' === ( $dom_template_context_resource_result['failureClass'] ?? null ), 'DOM template node ceilings should use resource-limit failure class.' );
+html_api_fuzz_smoke_assert( 'node-limit-exceeded' === ( $dom_template_context_resource_result['dom']['failureClass'] ?? null ), 'DOM template rendering should preserve the concrete DOM node ceiling.' );
+
+/*
+ * Template content that cannot round-trip is quarantined as unsupported even
+ * under a small node budget: the count of its (lossy) body-context parse is
+ * bounded, so quarantine stays deterministic for resource-stress inputs.
+ */
+$dom_template_context_quarantine_dir = $tmp . '/dom-template-context-quarantine-small-budget';
+$dom_template_context_quarantine_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( '<template><col><x></x></template>' ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT,
+		'payload-policy'  => 'ascii-structural',
+		'output-dir'      => $dom_template_context_quarantine_dir,
+		'max-tokens'      => '200',
+		'max-nodes'       => '3',
+	)
+);
+html_api_fuzz_smoke_assert( true === ( $dom_template_context_quarantine_result['ok'] ?? null ), 'Non-round-trippable DOM template content should be quarantined, not failed.' );
+html_api_fuzz_smoke_assert( 'oracle-unsupported' === ( $dom_template_context_quarantine_result['status'] ?? null ), 'Non-round-trippable DOM template content should be quarantined as oracle-unsupported.' );
+html_api_fuzz_smoke_assert( 'oracle-limitation' === ( $dom_template_context_quarantine_result['oracleFinding']['classification'] ?? null ), 'Non-round-trippable DOM template content should remain visible as an oracle limitation.' );
+
+$dom_template_table_context_dir = $tmp . '/dom-template-table-context';
+$dom_template_table_context_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( '<template><table><tr><td>x</td></tr></table></template>' ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT,
+		'payload-policy'  => 'ascii-structural',
+		'output-dir'      => $dom_template_table_context_dir,
+		'max-tokens'      => '200',
+		'max-nodes'       => '200',
+	)
+);
+html_api_fuzz_smoke_assert( true === ( $dom_template_table_context_result['comparison']['ok'] ?? null ), 'DOM template table-contained content should compare cleanly.' );
+if ( false === ( $dom_template_table_context_result['tagProcessor']['normalize']['ok'] ?? true ) ) {
+	html_api_fuzz_smoke_assert( false === ( $dom_template_table_context_result['ok'] ?? null ), 'DOM template table-contained content should surface normalize() failures after comparison.' );
+	html_api_fuzz_smoke_assert( 'normalize-invariant-failed' === ( $dom_template_table_context_result['failureClass'] ?? null ), 'DOM template table-contained normalize() failures should be classified separately.' );
+} else {
+	html_api_fuzz_smoke_assert( true === ( $dom_template_table_context_result['ok'] ?? null ), 'DOM template fallback should compare table-contained content.' );
+	html_api_fuzz_smoke_assert( 'passed' === ( $dom_template_table_context_result['status'] ?? null ), 'DOM template fallback should not quarantine table-contained content.' );
+	html_api_fuzz_smoke_assert( null === ( $dom_template_table_context_result['signature'] ?? null ), 'DOM template table-contained content should not produce a fuzz signature.' );
+}
+
+$dom_template_foreign_context_dir = $tmp . '/dom-template-foreign-context';
+$dom_template_foreign_context_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( '<template><svg><td></td></svg></template>' ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT,
+		'payload-policy'  => 'ascii-structural',
+		'output-dir'      => $dom_template_foreign_context_dir,
+		'max-tokens'      => '200',
+		'max-nodes'       => '200',
+	)
+);
+html_api_fuzz_smoke_assert( true === ( $dom_template_foreign_context_result['comparison']['ok'] ?? null ), 'DOM template foreign-content overlap should compare cleanly.' );
+if ( false === ( $dom_template_foreign_context_result['tagProcessor']['normalize']['ok'] ?? true ) ) {
+	html_api_fuzz_smoke_assert( false === ( $dom_template_foreign_context_result['ok'] ?? null ), 'DOM template foreign-content overlap should surface normalize() failures after comparison.' );
+	html_api_fuzz_smoke_assert( 'normalize-invariant-failed' === ( $dom_template_foreign_context_result['failureClass'] ?? null ), 'DOM template foreign-content normalize() failures should be classified separately.' );
+} else {
+	html_api_fuzz_smoke_assert( true === ( $dom_template_foreign_context_result['ok'] ?? null ), 'DOM template fallback should compare foreign-content tag names that overlap table names.' );
+	html_api_fuzz_smoke_assert( 'passed' === ( $dom_template_foreign_context_result['status'] ?? null ), 'DOM template fallback should not quarantine foreign-content tag names that overlap table names.' );
+	html_api_fuzz_smoke_assert( null === ( $dom_template_foreign_context_result['signature'] ?? null ), 'DOM template foreign-content overlap should not produce a fuzz signature.' );
+}
+
+$dom_oracle_needs_xlink_tolerance = html_api_fuzz_smoke_dom_drops_bare_xlink_local_name_after_xlink();
+$dom_oracle_xlink_fragment_wp = \HtmlApiFuzz\TreeRenderer::render_wordpress(
+	'<svg xlink:href href></svg>',
+	\HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+	array( 'maxTokens' => 100, 'maxNodes' => 100 )
+);
+$dom_oracle_xlink_fragment_dom = \HtmlApiFuzz\TreeRenderer::render_dom(
+	'<svg xlink:href href></svg>',
+	\HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+	array( 'maxTokens' => 100, 'maxNodes' => 100 )
+);
+html_api_fuzz_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $dom_oracle_xlink_fragment_wp['status'] ?? null ), 'DOM XLink fragment fixture should render with WordPress.' );
+html_api_fuzz_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $dom_oracle_xlink_fragment_dom['status'] ?? null ), 'DOM XLink fragment fixture should render with the DOM oracle.' );
+$dom_oracle_xlink_fragment_comparison = \HtmlApiFuzz\TreeRenderer::compare_trees(
+	$dom_oracle_xlink_fragment_wp['tree'],
+	$dom_oracle_xlink_fragment_dom['tree'],
+	$dom_oracle_xlink_fragment_wp['domOracleLineTolerances'] ?? array()
+);
+html_api_fuzz_smoke_assert( true === ( $dom_oracle_xlink_fragment_comparison['ok'] ?? null ), 'DOM XLink oracle limitation should compare after fragment tolerance.' );
+if ( $dom_oracle_needs_xlink_tolerance ) {
+	html_api_fuzz_smoke_assert( array( 1 ) === ( $dom_oracle_xlink_fragment_wp['domOracleLineTolerances'] ?? null ), 'DOM XLink fragment tolerance should identify only the dropped WordPress attribute line.' );
+} else {
+	html_api_fuzz_smoke_assert( array() === ( $dom_oracle_xlink_fragment_wp['domOracleLineTolerances'] ?? null ), 'Fixed DOM runtimes should not record XLink oracle tolerance lines.' );
+}
+
+$dom_oracle_xlink_worker_dir = $tmp . '/dom-oracle-xlink-worker';
+$dom_oracle_xlink_worker_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( '<svg xlink:href href></svg>' ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'payload-policy'  => 'ascii-structural',
+		'output-dir'      => $dom_oracle_xlink_worker_dir,
+		'max-tokens'      => '100',
+		'max-nodes'       => '100',
+	)
+);
+html_api_fuzz_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $dom_oracle_xlink_worker_result['dom']['status'] ?? null ), 'Worker should still render the DOM oracle when normalize() fails independently.' );
+html_api_fuzz_smoke_assert( true === ( $dom_oracle_xlink_worker_result['comparison']['ok'] ?? null ), 'Worker should still compare DOM trees when normalize() fails independently.' );
+if ( $dom_oracle_needs_xlink_tolerance ) {
+	html_api_fuzz_smoke_assert( 'oracle-bug' === ( $dom_oracle_xlink_worker_result['oracleFinding']['classification'] ?? null ), 'Worker should preserve the XLink DOM tolerance as an oracle-bug finding.' );
+	html_api_fuzz_smoke_assert( 'dom-xlink-dropped-local-name-after-xlink' === ( $dom_oracle_xlink_worker_result['oracleFinding']['type'] ?? null ), 'Worker should identify the XLink oracle finding type.' );
+	html_api_fuzz_smoke_assert( 'https://github.com/lexbor/lexbor/issues/372' === ( $dom_oracle_xlink_worker_result['oracleFinding']['upstream']['issueUrl'] ?? null ), 'Worker should link the XLink oracle finding to the upstream Lexbor issue.' );
+} else {
+	html_api_fuzz_smoke_assert( null === ( $dom_oracle_xlink_worker_result['oracleFinding'] ?? null ), 'Fixed DOM runtimes should not produce an XLink oracle finding.' );
+}
+if ( false === ( $normalize_not_idempotent['normalize']['ok'] ?? true ) ) {
+	html_api_fuzz_smoke_assert( false === ( $dom_oracle_xlink_worker_result['ok'] ?? null ), 'Worker should fail when normalize() is non-idempotent.' );
+	html_api_fuzz_smoke_assert( 'normalize-invariant-failed' === ( $dom_oracle_xlink_worker_result['failureClass'] ?? null ), 'Worker should classify normalize() idempotence failures separately.' );
+	html_api_fuzz_smoke_assert( 'normalize-not-idempotent' === ( $dom_oracle_xlink_worker_result['signature']['facts']['invariant'] ?? null ), 'Normalize failure signature should record the concrete invariant.' );
+	html_api_fuzz_smoke_assert( ( $normalize_not_idempotent['normalize']['normalizedSha1'] ?? null ) === ( $dom_oracle_xlink_worker_result['signature']['facts']['normalizedSha1'] ?? null ), 'Normalize failure signature should include the normalized hash.' );
+	html_api_fuzz_smoke_assert( is_int( $dom_oracle_xlink_worker_result['signature']['facts']['firstByteOffset'] ?? null ), 'Normalize failure signature should include first-difference metadata.' );
+} else {
+	html_api_fuzz_smoke_assert( true === ( $dom_oracle_xlink_worker_result['ok'] ?? null ), 'Worker should pass the XLink fixture once normalize() is idempotent.' );
+	html_api_fuzz_smoke_assert( ( $dom_oracle_needs_xlink_tolerance ? 'oracle-tolerated' : 'passed' ) === ( $dom_oracle_xlink_worker_result['status'] ?? null ), 'Worker should keep normal DOM oracle status once normalize() is idempotent.' );
+}
+
+$dom_oracle_mathml_heading_fixture = '<h1><math><annotation-xml encoding="text/html"><p>x</h1><area data-x>';
+$dom_oracle_needs_mathml_heading_tolerance = html_api_fuzz_smoke_dom_reparents_heading_after_mathml_text_integration_point();
+$dom_oracle_mathml_heading_dir = $tmp . '/dom-oracle-mathml-heading-worker';
+$dom_oracle_mathml_heading_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( $dom_oracle_mathml_heading_fixture ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'payload-policy'  => 'ascii-structural',
+		'output-dir'      => $dom_oracle_mathml_heading_dir,
+		'max-tokens'      => '200',
+		'max-nodes'       => '200',
+	)
+);
+if ( $dom_oracle_needs_mathml_heading_tolerance ) {
+	html_api_fuzz_smoke_assert( true === ( $dom_oracle_mathml_heading_result['ok'] ?? null ), 'Known MathML heading DOM oracle bug should not be a WordPress failure.' );
+	html_api_fuzz_smoke_assert( 'oracle-tolerated' === ( $dom_oracle_mathml_heading_result['status'] ?? null ), 'Known MathML heading DOM oracle bug should be reported as oracle-tolerated.' );
+	html_api_fuzz_smoke_assert( null === ( $dom_oracle_mathml_heading_result['signature'] ?? null ), 'Known MathML heading DOM oracle bug should not produce a failure signature.' );
+	html_api_fuzz_smoke_assert( 'oracle-bug' === ( $dom_oracle_mathml_heading_result['oracleFinding']['classification'] ?? null ), 'Known MathML heading DOM oracle bug should produce an oracle-bug finding.' );
+	html_api_fuzz_smoke_assert( 'dom-mathml-heading-scope-reparenting' === ( $dom_oracle_mathml_heading_result['oracleFinding']['type'] ?? null ), 'Known MathML heading DOM oracle bug should identify the oracle finding type.' );
+	html_api_fuzz_smoke_assert( 'https://github.com/lexbor/lexbor/issues/373' === ( $dom_oracle_mathml_heading_result['oracleFinding']['upstream']['issueUrl'] ?? null ), 'Known MathML heading DOM oracle bug should link to the upstream Lexbor issue.' );
+	$dom_oracle_mathml_minimize_proc = \HtmlApiFuzz\run_php_process(
+		array(
+			dirname( __DIR__ ) . '/minimize.php',
+			'--replay',
+			$dom_oracle_mathml_heading_dir . '/replay.json',
+			'--output-dir',
+			$tmp . '/dom-oracle-mathml-heading-minimize',
+			'--max-attempts',
+			'0',
+			'--timeout-ms',
+			'2500',
+			'--any-failure',
+		),
+		\HtmlApiFuzz\repo_root(),
+		30000,
+		$tmp . '/dom-oracle-mathml-heading-minimize.log'
+	);
+	$dom_oracle_mathml_minimize = json_decode( $dom_oracle_mathml_minimize_proc['stdout'], true );
+	html_api_fuzz_smoke_assert( 0 === $dom_oracle_mathml_minimize_proc['code'] && ! $dom_oracle_mathml_minimize_proc['timedOut'], 'Minimizer should accept oracle finding replays as targets.' );
+	html_api_fuzz_smoke_assert( true === ( $dom_oracle_mathml_minimize['ok'] ?? null ), 'Minimizer should reproduce the MathML oracle finding target.' );
+	html_api_fuzz_smoke_assert( 'oracle-finding' === ( $dom_oracle_mathml_minimize['targetKind'] ?? null ), 'Minimizer should mark the MathML target as an oracle finding.' );
+	html_api_fuzz_smoke_assert( ( $dom_oracle_mathml_heading_result['oracleFinding']['signature']['hash'] ?? null ) === ( $dom_oracle_mathml_minimize['finalOracleHash'] ?? null ), 'Minimizer should report the final oracle finding hash.' );
+	$dom_oracle_mathml_mixed_replay = \HtmlApiFuzz\read_json_file( $dom_oracle_mathml_heading_dir . '/replay.json' );
+	$dom_oracle_mathml_mixed_replay['signature'] = array(
+		'hash'      => 'fake-failure-hash',
+		'familyKey' => 'fake-failure-family',
+	);
+	$dom_oracle_mathml_mixed_replay_path = $tmp . '/dom-oracle-mathml-heading-mixed-replay.json';
+	\HtmlApiFuzz\write_json_file( $dom_oracle_mathml_mixed_replay_path, $dom_oracle_mathml_mixed_replay );
+	$dom_oracle_mathml_mixed_minimize_proc = \HtmlApiFuzz\run_php_process(
+		array(
+			dirname( __DIR__ ) . '/minimize.php',
+			'--replay',
+			$dom_oracle_mathml_mixed_replay_path,
+			'--output-dir',
+			$tmp . '/dom-oracle-mathml-heading-mixed-minimize',
+			'--max-attempts',
+			'0',
+			'--timeout-ms',
+			'2500',
+			'--any-failure',
+			'--target-kind',
+			'oracle-finding',
+			'--target-hash',
+			$dom_oracle_mathml_heading_result['oracleFinding']['signature']['hash'],
+		),
+		\HtmlApiFuzz\repo_root(),
+		30000,
+		$tmp . '/dom-oracle-mathml-heading-mixed-minimize.log'
+	);
+	$dom_oracle_mathml_mixed_minimize = json_decode( $dom_oracle_mathml_mixed_minimize_proc['stdout'], true );
+	html_api_fuzz_smoke_assert( 0 === $dom_oracle_mathml_mixed_minimize_proc['code'] && ! $dom_oracle_mathml_mixed_minimize_proc['timedOut'], 'Minimizer should accept explicit oracle targets on mixed replays.' );
+	html_api_fuzz_smoke_assert( true === ( $dom_oracle_mathml_mixed_minimize['ok'] ?? null ), 'Minimizer should reproduce the explicit oracle target on mixed replays.' );
+	html_api_fuzz_smoke_assert( ( $dom_oracle_mathml_heading_result['oracleFinding']['signature']['hash'] ?? null ) === ( $dom_oracle_mathml_mixed_minimize['finalOracleHash'] ?? null ), 'Mixed replay minimization should report the requested oracle finding hash.' );
+} else {
+	html_api_fuzz_smoke_assert( null === ( $dom_oracle_mathml_heading_result['oracleFinding'] ?? null ), 'Fixed DOM runtimes should not produce a MathML heading oracle finding.' );
+}
+
+$dom_oracle_needs_mathml_xhtml_tolerance = html_api_fuzz_smoke_dom_reparents_heading_after_mathml_text_integration_point( 'application/xhtml+xml' );
+$dom_oracle_mathml_xhtml_dir = $tmp . '/dom-oracle-mathml-xhtml-worker';
+$dom_oracle_mathml_xhtml_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( '<h1><math><annotation-xml encoding="application/xhtml+xml"><p>x</h1><area data-x>' ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'payload-policy'  => 'ascii-structural',
+		'output-dir'      => $dom_oracle_mathml_xhtml_dir,
+		'max-tokens'      => '200',
+		'max-nodes'       => '200',
+	)
+);
+if ( $dom_oracle_needs_mathml_xhtml_tolerance ) {
+	html_api_fuzz_smoke_assert( true === ( $dom_oracle_mathml_xhtml_result['ok'] ?? null ), 'MathML application/xhtml+xml DOM oracle bug should not be a WordPress failure.' );
+	html_api_fuzz_smoke_assert( 'dom-mathml-heading-scope-reparenting' === ( $dom_oracle_mathml_xhtml_result['oracleFinding']['type'] ?? null ), 'MathML application/xhtml+xml should use the MathML heading oracle finding type.' );
+} else {
+	html_api_fuzz_smoke_assert( null === ( $dom_oracle_mathml_xhtml_result['oracleFinding'] ?? null ), 'Fixed DOM runtimes should not produce a MathML XHTML oracle finding.' );
+}
+
+$dom_oracle_xlink_full_document_wp = \HtmlApiFuzz\TreeRenderer::render_wordpress(
+	'<svg xlink:href href></svg>',
+	\HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT,
+	array( 'maxTokens' => 100, 'maxNodes' => 100 )
+);
+$dom_oracle_xlink_full_document_dom = \HtmlApiFuzz\TreeRenderer::render_dom(
+	'<svg xlink:href href></svg>',
+	\HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT,
+	array( 'maxTokens' => 100, 'maxNodes' => 100 )
+);
+html_api_fuzz_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $dom_oracle_xlink_full_document_wp['status'] ?? null ), 'DOM XLink full-document fixture should render with WordPress.' );
+html_api_fuzz_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $dom_oracle_xlink_full_document_dom['status'] ?? null ), 'DOM XLink full-document fixture should render with the DOM oracle.' );
+$dom_oracle_xlink_full_document_comparison = \HtmlApiFuzz\TreeRenderer::compare_trees(
+	$dom_oracle_xlink_full_document_wp['tree'],
+	$dom_oracle_xlink_full_document_dom['tree'],
+	$dom_oracle_xlink_full_document_wp['domOracleLineTolerances'] ?? array()
+);
+html_api_fuzz_smoke_assert( true === ( $dom_oracle_xlink_full_document_comparison['ok'] ?? null ), 'DOM XLink oracle limitation should compare after full-document tolerance.' );
+html_api_fuzz_smoke_assert( $dom_oracle_needs_xlink_tolerance ? array( 4 ) === ( $dom_oracle_xlink_full_document_wp['domOracleLineTolerances'] ?? null ) : array() === ( $dom_oracle_xlink_full_document_wp['domOracleLineTolerances'] ?? null ), 'DOM XLink full-document fixture should record tolerance lines only while the runtime needs them.' );
+
+$dom_oracle_xlink_minimized_input = base64_decode( 'Pjx3cC14PjxiIHNyYz0i16oiPjxzdHJvbmcgZDw8PDw8PCI+YWFhYWFhYc6yPHN2ZyBwWDgxRzY4QndxPSJudWtyUSBhbXA7IiB4bGluazpocmVmIGhyZWYgdml0bGU+Ri1qOA==', true );
+html_api_fuzz_smoke_assert( false !== $dom_oracle_xlink_minimized_input, 'DOM XLink minimized fixture should decode.' );
+$dom_oracle_xlink_minimized_wp = \HtmlApiFuzz\TreeRenderer::render_wordpress(
+	$dom_oracle_xlink_minimized_input,
+	\HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT,
+	array( 'maxTokens' => 2000, 'maxNodes' => 3000 )
+);
+$dom_oracle_xlink_minimized_dom = \HtmlApiFuzz\TreeRenderer::render_dom(
+	$dom_oracle_xlink_minimized_input,
+	\HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT,
+	array( 'maxTokens' => 2000, 'maxNodes' => 3000 )
+);
+html_api_fuzz_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $dom_oracle_xlink_minimized_wp['status'] ?? null ), 'DOM XLink minimized fixture should render with WordPress.' );
+html_api_fuzz_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $dom_oracle_xlink_minimized_dom['status'] ?? null ), 'DOM XLink minimized fixture should render with the DOM oracle.' );
+$dom_oracle_xlink_minimized_comparison = \HtmlApiFuzz\TreeRenderer::compare_trees(
+	$dom_oracle_xlink_minimized_wp['tree'],
+	$dom_oracle_xlink_minimized_dom['tree'],
+	$dom_oracle_xlink_minimized_wp['domOracleLineTolerances'] ?? array()
+);
+html_api_fuzz_smoke_assert( true === ( $dom_oracle_xlink_minimized_comparison['ok'] ?? null ), 'DOM XLink minimized fixture comparison should pass.' );
+html_api_fuzz_smoke_assert( $dom_oracle_needs_xlink_tolerance ? 1 === count( $dom_oracle_xlink_minimized_wp['domOracleLineTolerances'] ?? array() ) : array() === ( $dom_oracle_xlink_minimized_wp['domOracleLineTolerances'] ?? null ), 'DOM XLink minimized fixture should record tolerance lines only while the runtime needs them.' );
+
+$dom_oracle_xlink_inverse_wp = \HtmlApiFuzz\TreeRenderer::render_wordpress(
+	'<svg href xlink:href></svg>',
+	\HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+	array( 'maxTokens' => 100, 'maxNodes' => 100 )
+);
+$dom_oracle_xlink_inverse_dom = \HtmlApiFuzz\TreeRenderer::render_dom(
+	'<svg href xlink:href></svg>',
+	\HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+	array( 'maxTokens' => 100, 'maxNodes' => 100 )
+);
+html_api_fuzz_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $dom_oracle_xlink_inverse_wp['status'] ?? null ), 'Bare attribute before XLink should render with WordPress.' );
+html_api_fuzz_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $dom_oracle_xlink_inverse_dom['status'] ?? null ), 'Bare attribute before XLink should render with the DOM oracle.' );
+$dom_oracle_xlink_inverse_comparison = \HtmlApiFuzz\TreeRenderer::compare_trees(
+	$dom_oracle_xlink_inverse_wp['tree'],
+	$dom_oracle_xlink_inverse_dom['tree'],
+	$dom_oracle_xlink_inverse_wp['domOracleLineTolerances'] ?? array()
+);
+html_api_fuzz_smoke_assert( true === ( $dom_oracle_xlink_inverse_comparison['ok'] ?? null ), 'Bare attribute before XLink should remain comparable.' );
+html_api_fuzz_smoke_assert( array() === ( $dom_oracle_xlink_inverse_wp['domOracleLineTolerances'] ?? null ), 'Bare attribute before XLink should not record DOM oracle tolerance lines.' );
+
+$dom_oracle_xlink_resource_dom = \HtmlApiFuzz\TreeRenderer::render_dom(
+	'<svg xlink:href href><pass>x</pass></svg>',
+	\HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+	array( 'maxTokens' => 100, 'maxNodes' => 1 )
+);
+html_api_fuzz_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_ERROR === ( $dom_oracle_xlink_resource_dom['status'] ?? null ), 'DOM XLink node ceiling should fail the DOM renderer.' );
+html_api_fuzz_smoke_assert( 'node-limit-exceeded' === ( $dom_oracle_xlink_resource_dom['failureClass'] ?? null ), 'DOM XLink oracle tolerance should preserve the concrete DOM node limit failure.' );
+$dom_oracle_xlink_resource_worker_dir = $tmp . '/dom-oracle-xlink-resource-worker';
+$dom_oracle_xlink_resource_worker_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( '<svg xlink:href href><pass>x</pass></svg>' ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'payload-policy'  => 'ascii-structural',
+		'output-dir'      => $dom_oracle_xlink_resource_worker_dir,
+		'max-tokens'      => '100',
+		'max-nodes'       => '1',
+	)
+);
+html_api_fuzz_smoke_assert( false === ( $dom_oracle_xlink_resource_worker_result['ok'] ?? null ), 'DOM XLink node ceiling should remain a failing resource-limit result.' );
+html_api_fuzz_smoke_assert( 'resource-limit' === ( $dom_oracle_xlink_resource_worker_result['failureClass'] ?? null ), 'DOM XLink node ceiling should preserve the resource-limit failure class.' );
+html_api_fuzz_smoke_assert( null === ( $dom_oracle_xlink_resource_worker_result['oracleFinding'] ?? null ), 'DOM XLink tolerance lines should not create an oracle finding when DOM comparison did not succeed.' );
+
+$wp_resource_dir = $tmp . '/wordpress-resource-limit';
+$wp_resource_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( '<p>x</p>' ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT,
+		'payload-policy'  => 'ascii-structural',
+		'output-dir'      => $wp_resource_dir,
+		'max-tokens'      => '3',
+		'max-nodes'       => '100',
+	)
+);
+html_api_fuzz_smoke_assert( 'resource-limit' === ( $wp_resource_result['failureClass'] ?? null ), 'WordPress tree token ceilings should be bucketed as resource-limit.' );
+html_api_fuzz_smoke_assert( 'resource-limit' === ( $wp_resource_result['status'] ?? null ), 'WordPress tree token ceilings should use resource-limit status.' );
+html_api_fuzz_smoke_assert( 'token-limit-exceeded' === ( $wp_resource_result['wordpress']['failureClass'] ?? null ), 'WordPress result should preserve the concrete token limit failure.' );
+html_api_fuzz_smoke_assert( in_array( 'wordpress-token-limit-exceeded', $wp_resource_result['signature']['facts']['limitFailures'] ?? array(), true ), 'resource-limit signature should include WordPress token limit failures.' );
+
+$resource_watcher_run_dir = $tmp . '/resource-watcher-run';
+\HtmlApiFuzz\ensure_dir( $resource_watcher_run_dir );
+\HtmlApiFuzz\append_ndjson(
+	$resource_watcher_run_dir . '/summary.ndjson',
+	array(
+		'ok'            => false,
+		'status'        => $resource_result['status'] ?? null,
+		'failureClass'  => $resource_result['failureClass'] ?? null,
+		'profile'       => $resource_result['profile'] ?? null,
+		'mode'          => $resource_result['mode'] ?? null,
+		'payloadPolicy' => $resource_result['payloadPolicy'] ?? null,
+		'generator'     => $resource_result['generator'] ?? null,
+		'inputSource'   => $resource_result['inputSource'] ?? null,
+		'inputSha1'     => $resource_result['inputSha1'] ?? null,
+		'inputLength'   => $resource_result['inputLength'] ?? null,
+		'signature'     => $resource_result['signature'] ?? null,
+		'resultPath'    => $resource_dir . '/result.json',
+		'replayPath'    => $resource_dir . '/replay.json',
+	)
+);
+$resource_watcher_state_dir = $tmp . '/resource-watcher-state';
+$resource_watcher_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/watcher.php',
+		'--run-dir',
+		$resource_watcher_run_dir,
+		'--state-dir',
+		$resource_watcher_state_dir,
+		'--once',
+		'--no-minimize',
+		'--max-minimize',
+		'1',
+	),
+	\HtmlApiFuzz\repo_root(),
+	10000,
+	$tmp . '/resource-watcher.log'
+);
+html_api_fuzz_smoke_assert( ! $resource_watcher_proc['timedOut'] && 0 === $resource_watcher_proc['code'], 'watcher should process resource-limit summaries.' );
+$resource_watcher_state = \HtmlApiFuzz\read_json_file( $resource_watcher_state_dir . '/state.json' );
+$resource_watcher_hash = $resource_result['signature']['hash'] ?? null;
+$resource_watcher_record = is_string( $resource_watcher_hash ) ? ( $resource_watcher_state['signatures'][ $resource_watcher_hash ] ?? array() ) : array();
+html_api_fuzz_smoke_assert( 'queued' === ( $resource_watcher_record['status'] ?? null ), 'watcher should queue resource-limit signatures for minimization.' );
+html_api_fuzz_smoke_assert( ! isset( $resource_watcher_record['minimizeResult'] ), 'watcher --no-minimize should not start resource-limit minimization.' );
+$resource_watcher_second_proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/watcher.php',
+		'--run-dir',
+		$resource_watcher_run_dir,
+		'--state-dir',
+		$resource_watcher_state_dir,
+		'--once',
+		'--no-minimize',
+	),
+	\HtmlApiFuzz\repo_root(),
+	10000,
+	$tmp . '/resource-watcher-second.log'
+);
+$resource_watcher_second = json_decode( $resource_watcher_second_proc['stdout'], true );
+html_api_fuzz_smoke_assert( ! $resource_watcher_second_proc['timedOut'] && 0 === $resource_watcher_second_proc['code'], 'watcher should process a second scan.' );
+html_api_fuzz_smoke_assert( 0 === ( $resource_watcher_second['failuresSeen'] ?? null ), 'watcher should not reread already-scanned summary records.' );
+
+$encoding_dir = $tmp . '/encoding-mismatch';
+$encoding_input = '<p>' . str_repeat( 'a', 220 ) . "\xC0" . '</p>';
+$encoding_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( $encoding_input ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'payload-policy'  => 'invalid-byte-heavy',
+		'output-dir'      => $encoding_dir,
+		'max-tokens'      => '2000',
+		'max-nodes'       => '3000',
+	)
+);
+html_api_fuzz_smoke_assert( 'encoding-mismatch' === ( $encoding_result['failureClass'] ?? null ), 'invalid byte beyond hex preview should be classified as encoding-mismatch.' );
+html_api_fuzz_smoke_assert( null === ( $encoding_result['generator'] ?? null ), 'replayed invalid input should not invent generator metadata.' );
+$encoding_diff = $encoding_result['comparison']['firstDifference'] ?? array();
+html_api_fuzz_smoke_assert( ( $encoding_diff['firstByteOffset'] ?? 0 ) > 160, 'long encoding mismatch should exercise an offset beyond the leading hex preview.' );
+html_api_fuzz_smoke_assert( ( $encoding_diff['wordpressHex'] ?? null ) === ( $encoding_diff['domHex'] ?? null ), 'long encoding mismatch should show why leading hex previews alone are insufficient.' );
+html_api_fuzz_smoke_assert( false !== strpos( $encoding_diff['wordpressDiffHex'] ?? '', 'c0' ), 'long encoding mismatch should include the differing WordPress byte in the diff window.' );
+html_api_fuzz_smoke_assert( false !== strpos( $encoding_diff['domDiffHex'] ?? '', 'efbfbd' ), 'long encoding mismatch should include the differing DOM replacement bytes in the diff window.' );
+
+/*
+ * The encoding-mismatch classifier must not relabel structural differences:
+ * exercised directly because no live WordPress/DOM structural divergence is
+ * available to drive a worker-level fixture (parser fixes resolved them).
+ * Diffs carry the linesMatchAfterWordPressUtf8Scrub flag, computed by
+ * TreeRenderer::first_difference() on the full differing lines.
+ */
+$is_encoding_mismatch = new \ReflectionMethod( \HtmlApiFuzz\Worker::class, 'is_encoding_mismatch' );
+$is_encoding_mismatch->setAccessible( true );
+$invalid_input = "<p>\xC0</p>";
+html_api_fuzz_smoke_assert(
+	false === $is_encoding_mismatch->invoke(
+		null,
+		$invalid_input,
+		array(
+			'linesMatchAfterWordPressUtf8Scrub' => false,
+		)
+	),
+	'invalid bytes elsewhere should not relabel structural tree mismatches as encoding-mismatch.'
+);
+html_api_fuzz_smoke_assert(
+	true === $is_encoding_mismatch->invoke(
+		null,
+		$invalid_input,
+		array(
+			'linesMatchAfterWordPressUtf8Scrub' => true,
+		)
+	),
+	'invalid-byte line differences explained by the UTF-8 scrub should classify as encoding-mismatch.'
+);
+html_api_fuzz_smoke_assert(
+	false === $is_encoding_mismatch->invoke(
+		null,
+		'<p>valid</p>',
+		array(
+			'linesMatchAfterWordPressUtf8Scrub' => true,
+		)
+	),
+	'valid UTF-8 input should never classify as encoding-mismatch.'
+);
+
+$newline_scalar_cases = array(
+	'text'    => array(
+		'input' => '<p>a' . "\n" . 'b' . "\xC0" . '</p>',
+		'path'  => '/p/#text',
+	),
+	'attr'    => array(
+		'input' => '<p title="a' . "\n" . 'b' . "\xC0" . '"></p>',
+		'path'  => '/p/@title',
+	),
+	'comment' => array(
+		'input' => '<p><!-- a' . "\n" . 'b' . "\xC0" . ' --></p>',
+		'path'  => '/p/#text',
+	),
+);
+foreach ( $newline_scalar_cases as $name => $case ) {
+	$newline_scalar_dir = $tmp . '/newline-scalar-' . $name;
+	$newline_scalar_result = \HtmlApiFuzz\Worker::run(
+		array(
+			'input-base64'    => base64_encode( $case['input'] ),
+			'profile'         => 'replay',
+			'mode'            => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+			'payload-policy'  => 'invalid-byte-heavy',
+			'output-dir'      => $newline_scalar_dir,
+			'max-tokens'      => '2000',
+			'max-nodes'       => '3000',
+		)
+	);
+	$newline_scalar_diff = $newline_scalar_result['comparison']['firstDifference'] ?? array();
+	html_api_fuzz_smoke_assert( 'encoding-mismatch' === ( $newline_scalar_result['failureClass'] ?? null ), $name . ' newline scalar invalid byte should be classified as encoding-mismatch.' );
+	html_api_fuzz_smoke_assert( $case['path'] === ( $newline_scalar_diff['path'] ?? null ), $name . ' newline scalar diff should preserve the logical tree path.' );
+	html_api_fuzz_smoke_assert( ! array_key_exists( 'wordpressLine', $newline_scalar_diff ), $name . ' newline scalar diff should not persist the full WordPress line.' );
+	html_api_fuzz_smoke_assert( ! array_key_exists( 'domLine', $newline_scalar_diff ), $name . ' newline scalar diff should not persist the full DOM line.' );
+	html_api_fuzz_smoke_assert( isset( $newline_scalar_diff['wordpressLinePreview'], $newline_scalar_diff['domLinePreview'] ), $name . ' newline scalar diff should persist bounded previews.' );
+	html_api_fuzz_smoke_assert( isset( $newline_scalar_diff['wordpressLineBytes'], $newline_scalar_diff['domLineBytes'] ), $name . ' newline scalar diff should persist line byte lengths.' );
+	html_api_fuzz_smoke_assert( isset( $newline_scalar_diff['wordpressLineSha1'], $newline_scalar_diff['domLineSha1'] ), $name . ' newline scalar diff should persist line hashes.' );
+	html_api_fuzz_smoke_assert( isset( $newline_scalar_diff['firstByteOffset'] ), $name . ' newline scalar diff should persist first differing byte offset.' );
+}
+
+$template_content_dir = $tmp . '/template-content-path';
+$template_content_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( '<template><p>a' . "\xC0" . '</p></template>' ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'payload-policy'  => 'invalid-byte-heavy',
+		'output-dir'      => $template_content_dir,
+		'max-tokens'      => '2000',
+		'max-nodes'       => '3000',
+	)
+);
+$template_content_diff = $template_content_result['comparison']['firstDifference'] ?? array();
+html_api_fuzz_smoke_assert( 'encoding-mismatch' === ( $template_content_result['failureClass'] ?? null ), 'template content invalid byte should be classified as encoding-mismatch.' );
+html_api_fuzz_smoke_assert( '/template/content/p/#text' === ( $template_content_diff['path'] ?? null ), 'template content descendants should include the content pseudo-node in diff paths.' );
+
+$quoted_attribute_dir = $tmp . '/quoted-attribute-name';
+$quoted_attribute_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'    => base64_encode( '<p a"b="x' . "\xC0" . '"></p>' ),
+		'profile'         => 'replay',
+		'mode'            => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'payload-policy'  => 'invalid-byte-heavy',
+		'output-dir'      => $quoted_attribute_dir,
+		'max-tokens'      => '2000',
+		'max-nodes'       => '3000',
+	)
+);
+$quoted_attribute_diff = $quoted_attribute_result['comparison']['firstDifference'] ?? array();
+html_api_fuzz_smoke_assert( 'encoding-mismatch' === ( $quoted_attribute_result['failureClass'] ?? null ), 'quoted attribute-name invalid byte should be classified as encoding-mismatch.' );
+html_api_fuzz_smoke_assert( '/p/@a\\"b' === ( $quoted_attribute_diff['path'] ?? null ), 'quoted attribute-name diff should preserve the attribute path.' );
+html_api_fuzz_smoke_assert( 'a\\"b="<value>"' === ( $quoted_attribute_diff['wordpressNorm'] ?? null ), 'quoted attribute-name normalization should preserve the WordPress attribute name.' );
+html_api_fuzz_smoke_assert( 'a\\"b="<value>"' === ( $quoted_attribute_diff['domNorm'] ?? null ), 'quoted attribute-name normalization should preserve the DOM attribute name.' );
+
+echo "OK\n";
diff --git a/tools/html-api-fuzz/tests/lexbor-oracle-smoke.php b/tools/html-api-fuzz/tests/lexbor-oracle-smoke.php
new file mode 100755
index 0000000000000..c8f01b73802cd
--- /dev/null
+++ b/tools/html-api-fuzz/tests/lexbor-oracle-smoke.php
@@ -0,0 +1,145 @@
+#!/usr/bin/env php
+<?php
+require_once dirname( __DIR__ ) . '/lib/autoload.php';
+
+function html_api_fuzz_lexbor_smoke_fail( string $message ): void {
+	fwrite( STDERR, "FAIL: {$message}\n" );
+	exit( 1 );
+}
+
+function html_api_fuzz_lexbor_smoke_assert( bool $condition, string $message ): void {
+	if ( ! $condition ) {
+		html_api_fuzz_lexbor_smoke_fail( $message );
+	}
+}
+
+$binary = \HtmlApiFuzz\repo_root() . '/tools/html-api-fuzz/oracles/lexbor/build/lexbor-tree-oracle';
+if ( ! is_file( $binary ) || ! is_executable( $binary ) ) {
+	echo "SKIP lexbor-oracle-smoke: build tools/html-api-fuzz/oracles/lexbor/build/lexbor-tree-oracle first\n";
+	exit( 0 );
+}
+
+$oracle = \HtmlApiFuzz\OracleRenderer::from_options(
+	array(
+		'dom-oracle'       => \HtmlApiFuzz\OracleRenderer::KIND_LEXBOR_SOURCE,
+		'lexbor-oracle-bin' => $binary,
+	)
+);
+$metadata = $oracle->metadata();
+html_api_fuzz_lexbor_smoke_assert( \HtmlApiFuzz\OracleRenderer::KIND_LEXBOR_SOURCE === ( $metadata['kind'] ?? null ), 'Expected Lexbor source oracle metadata.' );
+html_api_fuzz_lexbor_smoke_assert( is_string( $metadata['lexborCommit'] ?? null ) && 1 === preg_match( '/^[0-9a-f]{40}$/', $metadata['lexborCommit'] ), 'Expected the resolved Lexbor commit in oracle metadata.' );
+
+$limits = array(
+	'maxTokens' => 200,
+	'maxNodes'  => 200,
+);
+
+$work_dir = sys_get_temp_dir() . '/html-api-fuzz-lexbor-oracle-' . \HtmlApiFuzz\timestamp();
+\HtmlApiFuzz\ensure_dir( $work_dir );
+
+$empty_fragment = $oracle->render( '', \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY, $limits, 'body' );
+html_api_fuzz_lexbor_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $empty_fragment['status'] ?? null ), 'Expected Lexbor to parse an empty fragment.' );
+html_api_fuzz_lexbor_smoke_assert( "\n" === ( $empty_fragment['tree'] ?? null ), 'Expected Lexbor empty fragment rendering to match the fuzzer tree newline contract.' );
+
+$empty_worker_result = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'      => base64_encode( '' ),
+		'mode'              => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'profile'           => 'replay',
+		'seed'              => '100',
+		'output-dir'        => $work_dir . '/empty-fragment',
+		'max-tokens'        => '200',
+		'max-nodes'         => '200',
+		'dom-oracle'        => \HtmlApiFuzz\OracleRenderer::KIND_LEXBOR_SOURCE,
+		'lexbor-oracle-bin' => $binary,
+	)
+);
+html_api_fuzz_lexbor_smoke_assert( true === ( $empty_worker_result['ok'] ?? null ), 'Expected empty fragment Worker run to pass against the Lexbor source oracle.' );
+
+$escaped_tag_name = $oracle->render( '<a"b></a"b>', \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY, $limits, 'body' );
+html_api_fuzz_lexbor_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $escaped_tag_name['status'] ?? null ), 'Expected Lexbor to parse a quoted tag-name fixture.' );
+html_api_fuzz_lexbor_smoke_assert( false !== strpos( $escaped_tag_name['tree'] ?? '', "<a\\\"b>\n" ), 'Expected Lexbor to escape odd tag-name bytes in tree output.' );
+$dom_escaped_tag_name = \HtmlApiFuzz\TreeRenderer::render_dom( '<a"b></a"b>', \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY, $limits, 'body' );
+html_api_fuzz_lexbor_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $dom_escaped_tag_name['status'] ?? null ), 'Expected PHP DOM oracle to parse the quoted tag-name fixture.' );
+html_api_fuzz_lexbor_smoke_assert( $dom_escaped_tag_name['tree'] === $escaped_tag_name['tree'], 'Expected Lexbor quoted tag-name tree to match PHP DOM escaping.' );
+
+$adjusted_svg_names = '<svg><foreignobject><div></div></foreignobject><altglyph attributename=x attributetype=XML></altglyph><lineargradient gradientunits=userSpaceOnUse></lineargradient></svg>';
+$rendered_adjusted_svg_names = $oracle->render( $adjusted_svg_names, \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY, $limits, 'body' );
+html_api_fuzz_lexbor_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $rendered_adjusted_svg_names['status'] ?? null ), 'Expected Lexbor to parse adjusted SVG names fixture.' );
+html_api_fuzz_lexbor_smoke_assert( false !== strpos( $rendered_adjusted_svg_names['tree'] ?? '', "<svg foreignObject>\n" ), 'Expected Lexbor to render adjusted SVG foreignObject casing.' );
+html_api_fuzz_lexbor_smoke_assert( false !== strpos( $rendered_adjusted_svg_names['tree'] ?? '', "<svg altGlyph>\n" ), 'Expected Lexbor to render adjusted SVG altGlyph casing.' );
+html_api_fuzz_lexbor_smoke_assert( false !== strpos( $rendered_adjusted_svg_names['tree'] ?? '', "<svg linearGradient>\n" ), 'Expected Lexbor to render adjusted SVG linearGradient casing.' );
+html_api_fuzz_lexbor_smoke_assert( false !== strpos( $rendered_adjusted_svg_names['tree'] ?? '', "attributeName=\"x\"" ), 'Expected Lexbor to render adjusted SVG attributeName casing.' );
+html_api_fuzz_lexbor_smoke_assert( false !== strpos( $rendered_adjusted_svg_names['tree'] ?? '', "attributeType=\"XML\"" ), 'Expected Lexbor to render adjusted SVG attributeType casing.' );
+$dom_adjusted_svg_names = \HtmlApiFuzz\TreeRenderer::render_dom( $adjusted_svg_names, \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY, $limits, 'body' );
+html_api_fuzz_lexbor_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $dom_adjusted_svg_names['status'] ?? null ), 'Expected PHP DOM oracle to parse adjusted SVG names fixture.' );
+html_api_fuzz_lexbor_smoke_assert( $dom_adjusted_svg_names['tree'] === $rendered_adjusted_svg_names['tree'], 'Expected Lexbor adjusted SVG names tree to match PHP DOM.' );
+
+$issue_372 = '<svg xlink:href=qual href=plain></svg>';
+$rendered_372 = $oracle->render( $issue_372, \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY, $limits, 'body' );
+html_api_fuzz_lexbor_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $rendered_372['status'] ?? null ), 'Expected Lexbor to parse issue 372 fixture.' );
+html_api_fuzz_lexbor_smoke_assert( false !== strpos( $rendered_372['tree'] ?? '', "href=\"plain\"" ), 'Expected Lexbor to keep the bare href attribute from issue 372.' );
+html_api_fuzz_lexbor_smoke_assert( false !== strpos( $rendered_372['tree'] ?? '', "xlink href=\"qual\"" ), 'Expected Lexbor to keep the namespaced xlink:href attribute from issue 372.' );
+
+$issue_373 = '<h2><math><mi>x</h2>k';
+$rendered_373 = $oracle->render( $issue_373, \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY, $limits, 'body' );
+html_api_fuzz_lexbor_smoke_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $rendered_373['status'] ?? null ), 'Expected Lexbor to parse issue 373 fixture.' );
+html_api_fuzz_lexbor_smoke_assert( false !== strpos( $rendered_373['tree'] ?? '', "<math mi>\n      \"xk\"" ), 'Expected Lexbor to keep post-heading text in the MathML mi element for issue 373.' );
+
+$worker_result_372 = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'      => base64_encode( $issue_372 ),
+		'mode'              => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'profile'           => 'replay',
+		'seed'              => '372',
+		'output-dir'        => $work_dir . '/issue-372',
+		'max-tokens'        => '200',
+		'max-nodes'         => '200',
+		'dom-oracle'        => \HtmlApiFuzz\OracleRenderer::KIND_LEXBOR_SOURCE,
+		'lexbor-oracle-bin' => $binary,
+	)
+);
+html_api_fuzz_lexbor_smoke_assert( true === ( $worker_result_372['ok'] ?? null ), 'Expected issue 372 to pass Worker against the Lexbor source oracle.' );
+html_api_fuzz_lexbor_smoke_assert( \HtmlApiFuzz\OracleRenderer::KIND_LEXBOR_SOURCE === ( $worker_result_372['oracle']['kind'] ?? null ), 'Expected Worker result to record Lexbor source oracle kind.' );
+
+$worker_replay_372 = \HtmlApiFuzz\read_json_file( $work_dir . '/issue-372/replay.json' );
+html_api_fuzz_lexbor_smoke_assert( \HtmlApiFuzz\OracleRenderer::KIND_LEXBOR_SOURCE === ( $worker_replay_372['options']['domOracle'] ?? null ), 'Expected replay options to preserve the Lexbor source oracle kind.' );
+html_api_fuzz_lexbor_smoke_assert( $binary === ( $worker_replay_372['options']['lexborOracleBin'] ?? null ), 'Expected replay options to preserve the Lexbor source oracle binary.' );
+html_api_fuzz_lexbor_smoke_assert( ( $metadata['lexborCommit'] ?? null ) === ( $worker_replay_372['oracle']['lexborCommit'] ?? null ), 'Expected replay metadata to preserve the Lexbor source commit.' );
+
+$replay_dir = $work_dir . '/issue-372-replay';
+$proc = \HtmlApiFuzz\run_php_process(
+	array(
+		dirname( __DIR__ ) . '/replay.php',
+		'--replay',
+		$work_dir . '/issue-372/replay.json',
+		'--output-dir',
+		$replay_dir,
+	),
+	\HtmlApiFuzz\repo_root(),
+	10000,
+	$work_dir . '/replay.log'
+);
+html_api_fuzz_lexbor_smoke_assert( 0 === $proc['code'], 'Expected replay to pass while preserving the Lexbor source oracle.' );
+$replayed = \HtmlApiFuzz\read_json_file( $replay_dir . '/result.json' );
+html_api_fuzz_lexbor_smoke_assert( \HtmlApiFuzz\OracleRenderer::KIND_LEXBOR_SOURCE === ( $replayed['oracle']['kind'] ?? null ), 'Expected replayed result to use the Lexbor source oracle.' );
+
+$worker_result_373 = \HtmlApiFuzz\Worker::run(
+	array(
+		'input-base64'      => base64_encode( $issue_373 ),
+		'mode'              => \HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+		'profile'           => 'replay',
+		'seed'              => '373',
+		'output-dir'        => $work_dir . '/issue-373',
+		'max-tokens'        => '200',
+		'max-nodes'         => '200',
+		'dom-oracle'        => \HtmlApiFuzz\OracleRenderer::KIND_LEXBOR_SOURCE,
+		'lexbor-oracle-bin' => $binary,
+	)
+);
+html_api_fuzz_lexbor_smoke_assert( true === ( $worker_result_373['ok'] ?? null ), 'Expected issue 373 to pass Worker against the Lexbor source oracle.' );
+
+\HtmlApiFuzz\remove_dir_recursive( $work_dir );
+html_api_fuzz_lexbor_smoke_assert( ! is_dir( $work_dir ), 'Expected smoke work directory cleanup.' );
+
+echo "OK lexbor-oracle-smoke\n";
diff --git a/tools/html-api-fuzz/tests/result-store-smoke.php b/tools/html-api-fuzz/tests/result-store-smoke.php
new file mode 100644
index 0000000000000..6b9f5aa9b52c7
--- /dev/null
+++ b/tools/html-api-fuzz/tests/result-store-smoke.php
@@ -0,0 +1,234 @@
+#!/usr/bin/env php
+<?php
+require_once dirname( __DIR__ ) . '/lib/autoload.php';
+
+function html_api_fuzz_smoke_fail( string $message ): void {
+	fwrite( STDERR, "FAIL: {$message}\n" );
+	exit( 1 );
+}
+
+function html_api_fuzz_smoke_assert( bool $condition, string $message ): void {
+	if ( ! $condition ) {
+		html_api_fuzz_smoke_fail( $message );
+	}
+}
+
+$work_dir = sys_get_temp_dir() . '/html-api-fuzz-result-store-' . \HtmlApiFuzz\timestamp();
+\HtmlApiFuzz\ensure_dir( $work_dir );
+$db_path = $work_dir . '/' . \HtmlApiFuzz\ResultStore::FILENAME;
+
+$store = new \HtmlApiFuzz\ResultStore( $db_path );
+
+$php_oracle = array(
+	'kind'       => 'php-dom',
+	'phpVersion' => PHP_VERSION,
+);
+$lexbor_oracle = array(
+	'kind'          => 'lexbor-source',
+	'lexborVersion' => '2.10.0',
+	'lexborCommit'  => '481c444261a132190a3fb746d6d2f60824af3717',
+	'binary'        => '/tmp/lexbor-tree-oracle',
+);
+
+$pass_summary = array(
+	'kind'              => 'attempt',
+	'ok'                => true,
+	'status'            => 'passed',
+	'failureClass'      => null,
+	'seed'              => 11,
+	'profile'           => 'document',
+	'mode'              => 'document',
+	'payloadPolicy'     => 'utf8',
+	'inputSource'       => 'generated',
+	'inputSha1'         => sha1( 'pass' ),
+	'inputLength'       => 4,
+	'signature'         => null,
+	'oracle'            => $php_oracle,
+	'artifactsRetained' => false,
+	'resultPath'        => null,
+	'replayPath'        => null,
+	'logPath'           => null,
+	'durationMs'        => 12,
+	'workerCode'        => 0,
+	'workerTimedOut'    => false,
+);
+$pass_id = $store->record_attempt( $pass_summary );
+
+$failure_summary = array(
+	'kind'              => 'failure',
+	'ok'                => false,
+	'status'            => 'failed',
+	'failureClass'      => 'tree-mismatch',
+	'seed'              => 12,
+	'profile'           => 'document',
+	'mode'              => 'document',
+	'payloadPolicy'     => 'utf8',
+	'inputSource'       => 'generated',
+	'inputSha1'         => sha1( 'fail' ),
+	'inputLength'       => 8,
+	'signature'         => array(
+		'hash'      => 'abc123def456',
+		'familyKey' => 'fam456789abc',
+	),
+	'oracle'            => $lexbor_oracle,
+	'artifactsRetained' => true,
+	'resultPath'        => $work_dir . '/seed-12/primary/result.json',
+	'replayPath'        => $work_dir . '/seed-12/primary/replay.json',
+	'logPath'           => null,
+	'durationMs'        => 30,
+	'workerCode'        => 2,
+	'workerTimedOut'    => false,
+);
+$failure_result = array(
+	'ok'           => false,
+	'status'       => 'failed',
+	'failureClass' => 'tree-mismatch',
+	'signature'    => array( 'hash' => 'abc123def456' ),
+);
+$failure_replay = array(
+	'kind'        => 'html-api-fuzz-replay',
+	'seed'        => 12,
+	'inputBase64' => base64_encode( '<i>fail</i>' ),
+);
+$failure_summary['failureArtifactsRetained'] = true;
+$failure_summary['oracleArtifactsRetained']  = false;
+$failure_id = $store->record_attempt( $failure_summary, $failure_result, $failure_replay );
+
+$pruned_summary                      = $failure_summary;
+$pruned_summary['seed']              = 13;
+$pruned_summary['artifactsRetained'] = false;
+$pruned_summary['failureArtifactsRetained'] = false;
+$pruned_summary['oracleArtifactsRetained']  = false;
+$pruned_summary['resultPath']        = null;
+$pruned_summary['replayPath']        = null;
+$pruned_id = $store->record_attempt( $pruned_summary, $failure_result, $failure_replay );
+
+$same_seed_summary = $pruned_summary;
+$same_seed_summary['signature'] = array(
+	'hash'      => 'newseedabc123',
+	'familyKey' => 'newseedfam456',
+);
+$same_seed_result = array(
+	'ok'           => false,
+	'status'       => 'failed',
+	'failureClass' => 'tree-mismatch',
+	'signature'    => array( 'hash' => 'newseedabc123' ),
+);
+$same_seed_replay = array(
+	'kind'        => 'html-api-fuzz-replay',
+	'seed'        => 13,
+	'inputBase64' => base64_encode( '<b>new replay</b>' ),
+);
+$same_seed_id = $store->record_attempt( $same_seed_summary, $same_seed_result, $same_seed_replay );
+
+$oracle_summary = array(
+	'kind'              => 'oracle-finding',
+	'ok'                => true,
+	'status'            => 'oracle-tolerated',
+	'failureClass'      => 'oracle-tolerated',
+	'seed'              => 14,
+	'profile'           => 'document',
+	'mode'              => 'document',
+	'payloadPolicy'     => 'utf8',
+	'inputSource'       => 'generated',
+	'inputSha1'         => sha1( 'oracle' ),
+	'inputLength'       => 12,
+	'signature'         => null,
+	'oracle'            => $php_oracle,
+	'oracleFinding'     => array(
+		'classification' => 'oracle-bug',
+		'type'           => 'dom-xlink-dropped-local-name-after-xlink',
+		'suspectedOwner' => 'Lexbor/PHP DOM',
+		'signature'      => array(
+			'hash'      => 'oracle-abc123',
+			'familyKey' => 'oracle-fam123',
+		),
+	),
+	'artifactsRetained' => true,
+	'failureArtifactsRetained' => false,
+	'oracleArtifactsRetained'  => true,
+	'resultPath'        => $work_dir . '/seed-14/primary/result.json',
+	'replayPath'        => $work_dir . '/seed-14/primary/replay.json',
+	'logPath'           => null,
+	'durationMs'        => 18,
+	'workerCode'        => 0,
+	'workerTimedOut'    => false,
+);
+$oracle_result = array(
+	'ok'            => true,
+	'status'        => 'oracle-tolerated',
+	'oracleFinding' => $oracle_summary['oracleFinding'],
+);
+$oracle_replay = array(
+	'kind'          => 'html-api-fuzz-replay',
+	'seed'          => 14,
+	'inputBase64'   => base64_encode( '<svg></svg>' ),
+	'oracleFinding' => $oracle_summary['oracleFinding'],
+);
+$oracle_id = $store->record_attempt( $oracle_summary, $oracle_result, $oracle_replay );
+
+html_api_fuzz_smoke_assert( 5 === $store->count_attempts(), 'Expected five recorded attempts.' );
+html_api_fuzz_smoke_assert( array( 12 ) === $store->retained_seeds( 'abc123def456' ), 'Expected seed 12 as the retained exemplar for the signature.' );
+html_api_fuzz_smoke_assert( array() === $store->retained_seeds( 'unseen' ), 'Expected no retained exemplars for an unseen signature.' );
+html_api_fuzz_smoke_assert( array( 14 ) === $store->oracle_retained_seeds( 'oracle-abc123' ), 'Expected seed 14 as the retained exemplar for the oracle signature.' );
+html_api_fuzz_smoke_assert( $store->seed_artifacts_retained( 12 ), 'Expected seed 12 to be marked as retained.' );
+html_api_fuzz_smoke_assert( ! $store->seed_artifacts_retained( 13 ), 'Expected seed 13 not to be marked as retained.' );
+html_api_fuzz_smoke_assert( 5 === $store->max_id(), 'Expected max id of five.' );
+
+$stored_replay = $store->replay_for_seed( 13 );
+html_api_fuzz_smoke_assert( is_array( $stored_replay ) && base64_encode( '<b>new replay</b>' ) === ( $stored_replay['inputBase64'] ?? null ), 'Expected seed replay lookup to return the most recent replay for compatibility.' );
+html_api_fuzz_smoke_assert( is_array( $store->replay_for_attempt_id( $pruned_id ) ) && base64_encode( '<i>fail</i>' ) === ( $store->replay_for_attempt_id( $pruned_id )['inputBase64'] ?? null ), 'Expected exact attempt replay lookup to survive same-seed reruns.' );
+html_api_fuzz_smoke_assert( is_array( $store->replay_for_attempt_id( $same_seed_id ) ) && base64_encode( '<b>new replay</b>' ) === ( $store->replay_for_attempt_id( $same_seed_id )['inputBase64'] ?? null ), 'Expected exact attempt replay lookup to retrieve the newer same-seed replay.' );
+html_api_fuzz_smoke_assert( null === $store->replay_for_seed( 11 ), 'Expected no stored replay for a passing seed.' );
+$stored_oracle_replay = $store->replay_for_seed( 14 );
+html_api_fuzz_smoke_assert( is_array( $stored_oracle_replay ) && base64_encode( '<svg></svg>' ) === ( $stored_oracle_replay['inputBase64'] ?? null ), 'Expected the oracle finding replay to be retrievable from the store.' );
+html_api_fuzz_smoke_assert( is_array( $store->replay_for_attempt_id( $oracle_id ) ) && base64_encode( '<svg></svg>' ) === ( $store->replay_for_attempt_id( $oracle_id )['inputBase64'] ?? null ), 'Expected the oracle finding replay to be retrievable by attempt id.' );
+
+$failures = $store->failures_after( 0, $store->max_id() );
+html_api_fuzz_smoke_assert( 3 === count( $failures ), 'Expected three failure rows.' );
+html_api_fuzz_smoke_assert( 12 === ( $failures[0]['record']['seed'] ?? null ), 'Expected the first failure record to be seed 12.' );
+html_api_fuzz_smoke_assert( 'abc123def456' === ( $failures[0]['record']['signature']['hash'] ?? null ), 'Expected the failure record to carry its signature.' );
+
+$tail = $store->failures_after( $failures[0]['id'], $store->max_id() );
+html_api_fuzz_smoke_assert( 2 === count( $tail ) && 13 === ( $tail[0]['record']['seed'] ?? null ) && 13 === ( $tail[1]['record']['seed'] ?? null ), 'Expected incremental reads to resume after an offset.' );
+
+$oracle_findings = $store->oracle_findings_after( 0, $store->max_id() );
+html_api_fuzz_smoke_assert( 1 === count( $oracle_findings ), 'Expected one oracle finding row.' );
+html_api_fuzz_smoke_assert( 14 === ( $oracle_findings[0]['record']['seed'] ?? null ), 'Expected the oracle finding record to be seed 14.' );
+html_api_fuzz_smoke_assert( 'oracle-abc123' === ( $oracle_findings[0]['record']['oracleFinding']['signature']['hash'] ?? null ), 'Expected the oracle finding record to carry its oracle signature.' );
+
+$store->close();
+
+// Reopen read-only as the watcher does and confirm persistence.
+$reader = new \HtmlApiFuzz\ResultStore( $db_path, true );
+html_api_fuzz_smoke_assert( 5 === $reader->count_attempts(), 'Expected attempts to persist across reopen.' );
+html_api_fuzz_smoke_assert( 3 === count( $reader->failures_after( 0, $reader->max_id() ) ), 'Expected failures to persist across reopen.' );
+html_api_fuzz_smoke_assert( 1 === count( $reader->oracle_findings_after( 0, $reader->max_id() ) ), 'Expected oracle findings to persist across reopen.' );
+$reader->close();
+
+// The grouping columns must be queryable without json_extract.
+$raw = new SQLite3( $db_path, SQLITE3_OPEN_READONLY );
+html_api_fuzz_smoke_assert( 2 === (int) $raw->querySingle( "SELECT COUNT(*) FROM attempts WHERE family_key = 'fam456789abc'" ), 'Expected family_key to be stored per failure row.' );
+html_api_fuzz_smoke_assert( 1 === (int) $raw->querySingle( "SELECT COUNT(*) FROM attempts WHERE oracle_family_key = 'oracle-fam123'" ), 'Expected oracle_family_key to be stored per oracle finding row.' );
+html_api_fuzz_smoke_assert( 1 === (int) $raw->querySingle( "SELECT COUNT(*) FROM attempts WHERE oracle_signature_hash = 'oracle-abc123'" ), 'Expected oracle_signature_hash to be queryable.' );
+html_api_fuzz_smoke_assert( 1 === (int) $raw->querySingle( "SELECT COUNT(*) FROM attempts WHERE signature_hash = 'abc123def456' AND failure_artifacts_retained = 1" ), 'Expected failure retention to use its own budget flag.' );
+html_api_fuzz_smoke_assert( 1 === (int) $raw->querySingle( "SELECT COUNT(*) FROM attempts WHERE oracle_signature_hash = 'oracle-abc123' AND oracle_artifacts_retained = 1" ), 'Expected oracle retention to use its own budget flag.' );
+html_api_fuzz_smoke_assert( 1 === (int) $raw->querySingle( "SELECT COUNT(*) FROM attempts WHERE seed = 11 AND oracle_kind = 'php-dom' AND oracle_version = '" . SQLite3::escapeString( PHP_VERSION ) . "'" ), 'Expected passing rows to keep PHP DOM oracle metadata in scalar columns.' );
+html_api_fuzz_smoke_assert( 3 === (int) $raw->querySingle( "SELECT COUNT(*) FROM attempts WHERE oracle_kind = 'lexbor-source' AND oracle_version = '2.10.0' AND oracle_commit = '481c444261a132190a3fb746d6d2f60824af3717'" ), 'Expected Lexbor oracle metadata to be queryable for failure rows.' );
+html_api_fuzz_smoke_assert( 3 === (int) $raw->querySingle( "SELECT COUNT(*) FROM attempts WHERE oracle_binary = '/tmp/lexbor-tree-oracle'" ), 'Expected Lexbor oracle binary to be stored in a scalar column.' );
+$raw->close();
+
+$future_db_path = $work_dir . '/future.sqlite';
+$future = new SQLite3( $future_db_path, SQLITE3_OPEN_READWRITE | SQLITE3_OPEN_CREATE );
+$future->exec( 'PRAGMA user_version = 99' );
+$future->close();
+$future_store = new \HtmlApiFuzz\ResultStore( $future_db_path );
+$future_store->close();
+$future = new SQLite3( $future_db_path, SQLITE3_OPEN_READONLY );
+html_api_fuzz_smoke_assert( 99 === (int) $future->querySingle( 'PRAGMA user_version' ), 'Opening a future schema should not downgrade user_version.' );
+$future->close();
+
+\HtmlApiFuzz\remove_dir_recursive( $work_dir );
+html_api_fuzz_smoke_assert( ! is_dir( $work_dir ), 'Expected remove_dir_recursive to delete the work directory.' );
+
+echo "OK result-store-smoke\n";
diff --git a/tools/html-api-fuzz/tests/runner-retention-smoke.php b/tools/html-api-fuzz/tests/runner-retention-smoke.php
new file mode 100644
index 0000000000000..2189cf84b4c6f
--- /dev/null
+++ b/tools/html-api-fuzz/tests/runner-retention-smoke.php
@@ -0,0 +1,232 @@
+#!/usr/bin/env php
+<?php
+require_once dirname( __DIR__ ) . '/lib/autoload.php';
+
+function html_api_fuzz_smoke_fail( string $message ): void {
+	fwrite( STDERR, "FAIL: {$message}\n" );
+	exit( 1 );
+}
+
+function html_api_fuzz_smoke_assert( bool $condition, string $message ): void {
+	if ( ! $condition ) {
+		html_api_fuzz_smoke_fail( $message );
+	}
+}
+
+$runner    = dirname( __DIR__ ) . '/runner.php';
+$stop_tool = dirname( __DIR__ ) . '/stop.php';
+$replay    = dirname( __DIR__ ) . '/replay.php';
+$repo_root = \HtmlApiFuzz\repo_root();
+$work_dir  = sys_get_temp_dir() . '/html-api-fuzz-runner-retention-' . \HtmlApiFuzz\timestamp();
+
+/*
+ * 1. A short bounded run: every attempt must land in results.sqlite, and
+ * seed directories may remain on disk only for retained failure exemplars.
+ */
+$run_dir = $work_dir . '/run';
+$proc    = \HtmlApiFuzz\run_php_process(
+	array(
+		$runner,
+		'--output-dir',
+		$run_dir,
+		'--max-seeds',
+		'6',
+		'--duration-seconds',
+		'0',
+		'--batch-size',
+		'3',
+		'--max-input-bytes',
+		'512',
+	),
+	$repo_root,
+	180000
+);
+html_api_fuzz_smoke_assert( 0 === $proc['code'], 'Expected runner to exit cleanly: ' . substr( $proc['output'], -1000 ) );
+
+$state = \HtmlApiFuzz\read_json_file( $run_dir . '/state.json' );
+html_api_fuzz_smoke_assert( 'max-seeds' === ( $state['stopReason'] ?? null ), 'Expected stopReason max-seeds.' );
+
+$db_path = $run_dir . '/' . \HtmlApiFuzz\ResultStore::FILENAME;
+html_api_fuzz_smoke_assert( is_file( $db_path ), 'Expected results.sqlite to exist.' );
+
+$db = new SQLite3( $db_path, SQLITE3_OPEN_READONLY );
+html_api_fuzz_smoke_assert( 6 === (int) $db->querySingle( 'SELECT COUNT(*) FROM attempts' ), 'Expected six recorded attempts.' );
+html_api_fuzz_smoke_assert( 0 === (int) $db->querySingle( 'SELECT COUNT(*) FROM attempts WHERE ok = 1 AND artifacts_retained = 1' ), 'Expected no retained artifacts for passing attempts.' );
+$rows = $db->query( 'SELECT seed, artifacts_retained FROM attempts' );
+while ( false !== ( $row = $rows->fetchArray( SQLITE3_ASSOC ) ) ) {
+	html_api_fuzz_smoke_assert(
+		is_dir( $run_dir . '/seed-' . $row['seed'] ) === (bool) $row['artifacts_retained'],
+		"Expected seed {$row['seed']} directory presence to match artifacts_retained={$row['artifacts_retained']}."
+	);
+}
+$db->close();
+
+/*
+ * 2. The failure path, deterministically: --fail-unsupported turns the many
+ * unsupported fragment contexts into failures with repeating signatures, so
+ * a cap of 1 must prune repeats while archiving their replay documents.
+ */
+$cap          = 1;
+$fail_run_dir = $work_dir . '/fail-run';
+$proc         = \HtmlApiFuzz\run_php_process(
+	array(
+		$runner,
+		'--output-dir',
+		$fail_run_dir,
+		'--max-seeds',
+		'40',
+		'--duration-seconds',
+		'0',
+		'--batch-size',
+		'10',
+		'--max-input-bytes',
+		'512',
+		'--fail-unsupported',
+		'--max-keep-per-signature',
+		(string) $cap,
+	),
+	$repo_root,
+	300000
+);
+html_api_fuzz_smoke_assert( 0 === $proc['code'], 'Expected failure-path runner to exit cleanly: ' . substr( $proc['output'], -1000 ) );
+
+$db = new SQLite3( $fail_run_dir . '/' . \HtmlApiFuzz\ResultStore::FILENAME, SQLITE3_OPEN_READONLY );
+// Precondition guard: this run must actually exercise pruning. If generator
+// or signature changes stop producing repeated signatures here, fail loudly
+// so the test can be re-tuned instead of silently going vacuous.
+$pruned_failures = (int) $db->querySingle( 'SELECT COUNT(*) FROM attempts WHERE ok = 0 AND artifacts_retained = 0' );
+html_api_fuzz_smoke_assert( $pruned_failures > 0, 'Expected the failure-path run to prune at least one over-cap failure; re-tune the seed range.' );
+html_api_fuzz_smoke_assert( 0 === (int) $db->querySingle( 'SELECT COUNT(*) FROM attempts WHERE ok = 0 AND summary_json IS NULL' ), 'Expected failures to store their summary JSON.' );
+html_api_fuzz_smoke_assert( 0 === (int) $db->querySingle( 'SELECT COUNT(*) FROM attempts WHERE ok = 0 AND artifacts_retained = 0 AND replay_json IS NULL' ), 'Expected pruned failures to archive their replay JSON.' );
+
+$max_retained_per_signature = (int) $db->querySingle(
+	'SELECT COALESCE( MAX( n ), 0 ) FROM ( SELECT COUNT(*) AS n FROM attempts WHERE artifacts_retained = 1 AND signature_hash IS NOT NULL GROUP BY signature_hash )'
+);
+html_api_fuzz_smoke_assert( $max_retained_per_signature <= $cap, 'Expected retained exemplars per signature to respect the cap.' );
+
+// Every signature with failures keeps its first exemplar on disk.
+$sig_rows = $db->query( 'SELECT signature_hash, MAX(artifacts_retained) AS retained FROM attempts WHERE ok = 0 AND signature_hash IS NOT NULL GROUP BY signature_hash' );
+while ( false !== ( $row = $sig_rows->fetchArray( SQLITE3_ASSOC ) ) ) {
+	html_api_fuzz_smoke_assert( 1 === (int) $row['retained'], "Expected signature {$row['signature_hash']} to retain its first exemplar." );
+}
+
+$rows = $db->query( 'SELECT seed, artifacts_retained FROM attempts' );
+while ( false !== ( $row = $rows->fetchArray( SQLITE3_ASSOC ) ) ) {
+	html_api_fuzz_smoke_assert(
+		is_dir( $fail_run_dir . '/seed-' . $row['seed'] ) === (bool) $row['artifacts_retained'],
+		"Expected seed {$row['seed']} directory presence to match artifacts_retained={$row['artifacts_retained']}."
+	);
+}
+
+$pruned = $db->querySingle( 'SELECT seed, signature_hash FROM attempts WHERE ok = 0 AND artifacts_retained = 0 LIMIT 1', true );
+$db->close();
+
+// A pruned failure must be reproducible from the store alone.
+$proc = \HtmlApiFuzz\run_php_process(
+	array(
+		$replay,
+		'--store',
+		$fail_run_dir . '/' . \HtmlApiFuzz\ResultStore::FILENAME,
+		'--seed',
+		(string) $pruned['seed'],
+		'--output-dir',
+		$work_dir . '/store-replay',
+	),
+	$repo_root,
+	60000
+);
+$replay_report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( is_array( $replay_report ), 'Expected replay --store to produce a JSON report: ' . substr( $proc['output'], -1000 ) );
+html_api_fuzz_smoke_assert( false === ( $replay_report['ok'] ?? true ), 'Expected the store replay to reproduce a failure.' );
+html_api_fuzz_smoke_assert(
+	( $replay_report['signature']['hash'] ?? null ) === $pruned['signature_hash'],
+	'Expected the store replay to reproduce the original signature.'
+);
+
+/*
+ * 3. A pre-existing stop file must refuse to start rather than silently
+ * succeed with zero seeds.
+ */
+$stop_run_dir = $work_dir . '/stop-run';
+\HtmlApiFuzz\ensure_dir( $stop_run_dir );
+file_put_contents( $stop_run_dir . '/STOP', "{}\n" );
+$proc = \HtmlApiFuzz\run_php_process(
+	array( $runner, '--output-dir', $stop_run_dir, '--max-seeds', '0', '--duration-seconds', '0' ),
+	$repo_root,
+	60000
+);
+html_api_fuzz_smoke_assert( 0 !== $proc['code'], 'Expected runner to refuse to start over a pre-existing stop file.' );
+html_api_fuzz_smoke_assert( false !== strpos( $proc['output'], 'Stop file already exists' ), 'Expected a clear stale stop file message.' );
+html_api_fuzz_smoke_assert( ! is_file( $stop_run_dir . '/state.json' ), 'Expected no state to be written when refusing to start.' );
+
+/*
+ * 4. Mid-run graceful stop: an indefinite runner must finish its in-flight
+ * batch, record it, and exit with stopReason stop-requested.
+ */
+$mid_run_dir = $work_dir . '/mid-run';
+$spec        = array( 0 => array( 'pipe', 'r' ), 1 => array( 'pipe', 'w' ), 2 => array( 'pipe', 'w' ) );
+$process     = proc_open(
+	array( PHP_BINARY, $runner, '--output-dir', $mid_run_dir, '--max-seeds', '0', '--duration-seconds', '0', '--batch-size', '5', '--max-input-bytes', '512' ),
+	$spec,
+	$pipes,
+	$repo_root
+);
+html_api_fuzz_smoke_assert( is_resource( $process ), 'Expected the indefinite runner to start.' );
+fclose( $pipes[0] );
+stream_set_blocking( $pipes[1], false );
+stream_set_blocking( $pipes[2], false );
+
+$deadline = microtime( true ) + 120.0;
+$progress = false;
+while ( microtime( true ) < $deadline ) {
+	$mid_state = is_file( $mid_run_dir . '/state.json' ) ? @json_decode( (string) @file_get_contents( $mid_run_dir . '/state.json' ), true ) : null;
+	$attempted = is_array( $mid_state )
+		? (int) ( $mid_state['successes'] ?? 0 ) + (int) ( $mid_state['failures'] ?? 0 ) + (int) ( $mid_state['unsupported'] ?? 0 )
+			+ (int) ( $mid_state['oracleParseErrors'] ?? 0 ) + (int) ( $mid_state['oracleUnsupported'] ?? 0 ) + (int) ( $mid_state['oracleTolerated'] ?? 0 )
+		: 0;
+	if ( $attempted > 0 ) {
+		$progress = true;
+		break;
+	}
+	usleep( 100000 );
+}
+html_api_fuzz_smoke_assert( $progress, 'Expected the indefinite runner to record progress before the stop request.' );
+
+// Request the stop through the stop tool to cover its run-dir path.
+$proc = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', $mid_run_dir ), $repo_root, 30000 );
+html_api_fuzz_smoke_assert( 0 === $proc['code'], 'Expected stop.php to succeed: ' . substr( $proc['output'], -500 ) );
+html_api_fuzz_smoke_assert( is_file( $mid_run_dir . '/STOP' ), 'Expected stop.php to create the stop file.' );
+
+$deadline = microtime( true ) + 120.0;
+$exited   = false;
+$code     = null;
+while ( microtime( true ) < $deadline ) {
+	stream_get_contents( $pipes[1] );
+	stream_get_contents( $pipes[2] );
+	$status = proc_get_status( $process );
+	if ( ! $status['running'] ) {
+		$exited = true;
+		$code   = $status['exitcode'];
+		break;
+	}
+	usleep( 100000 );
+}
+if ( ! $exited ) {
+	proc_terminate( $process, 9 );
+}
+fclose( $pipes[1] );
+fclose( $pipes[2] );
+proc_close( $process );
+html_api_fuzz_smoke_assert( $exited, 'Expected the runner to exit after the stop request.' );
+html_api_fuzz_smoke_assert( 0 === $code, 'Expected the stopped runner to exit cleanly.' );
+
+$mid_state = \HtmlApiFuzz\read_json_file( $mid_run_dir . '/state.json' );
+html_api_fuzz_smoke_assert( 'stop-requested' === ( $mid_state['stopReason'] ?? null ), 'Expected stopReason stop-requested after a mid-run stop.' );
+
+$db = new SQLite3( $mid_run_dir . '/' . \HtmlApiFuzz\ResultStore::FILENAME, SQLITE3_OPEN_READONLY );
+html_api_fuzz_smoke_assert( 0 < (int) $db->querySingle( 'SELECT COUNT(*) FROM attempts' ), 'Expected the in-flight batch to be recorded before stopping.' );
+$db->close();
+
+\HtmlApiFuzz\remove_dir_recursive( $work_dir );
+
+echo "OK runner-retention-smoke\n";
diff --git a/tools/html-api-fuzz/tests/stop-smoke.php b/tools/html-api-fuzz/tests/stop-smoke.php
new file mode 100644
index 0000000000000..54d904381edba
--- /dev/null
+++ b/tools/html-api-fuzz/tests/stop-smoke.php
@@ -0,0 +1,531 @@
+#!/usr/bin/env php
+<?php
+require_once dirname( __DIR__ ) . '/lib/autoload.php';
+
+function html_api_fuzz_smoke_fail( string $message ): void {
+	fwrite( STDERR, "FAIL: {$message}\n" );
+	exit( 1 );
+}
+
+function html_api_fuzz_smoke_assert( bool $condition, string $message ): void {
+	if ( ! $condition ) {
+		html_api_fuzz_smoke_fail( $message );
+	}
+}
+
+function html_api_fuzz_smoke_write_runner_state( string $path, array $overrides = array() ): void {
+	\HtmlApiFuzz\write_json_file(
+		$path,
+		array_merge(
+			array(
+				'kind'       => 'html-api-fuzz-runner-state',
+				'updatedAt'  => gmdate( 'c' ),
+				'stopFile'   => dirname( $path ) . '/STOP',
+				'stopReason' => null,
+			),
+			$overrides
+		)
+	);
+}
+
+function html_api_fuzz_smoke_touch( string $path, int $mtime ): void {
+	html_api_fuzz_smoke_assert( touch( $path, $mtime ), "Expected touch to succeed for {$path}." );
+	clearstatcache( true, $path );
+	html_api_fuzz_smoke_assert( $mtime === (int) filemtime( $path ), "Expected mtime {$mtime} for {$path}." );
+}
+
+$stop_tool = dirname( __DIR__ ) . '/stop.php';
+$runner_tool = dirname( __DIR__ ) . '/runner.php';
+$repo_root = \HtmlApiFuzz\repo_root();
+$work_dir  = sys_get_temp_dir() . '/html-api-fuzz-stop-' . \HtmlApiFuzz\timestamp();
+$repo_artifacts_dir = $repo_root . '/artifacts';
+$repo_fuzz_artifacts_dir = $repo_artifacts_dir . '/html-api-fuzz';
+$had_repo_artifacts_dir = is_dir( $repo_artifacts_dir );
+$had_repo_fuzz_artifacts_dir = is_dir( $repo_fuzz_artifacts_dir );
+$repo_relative_run_dir = $repo_fuzz_artifacts_dir . '/run-stop-smoke-' . basename( $work_dir );
+
+register_shutdown_function(
+	static function () use ( $work_dir, $repo_relative_run_dir, $repo_fuzz_artifacts_dir, $repo_artifacts_dir, $had_repo_fuzz_artifacts_dir, $had_repo_artifacts_dir ): void {
+		\HtmlApiFuzz\remove_dir_recursive( $work_dir );
+		\HtmlApiFuzz\remove_dir_recursive( $repo_relative_run_dir );
+		if ( ! $had_repo_fuzz_artifacts_dir ) {
+			@rmdir( $repo_fuzz_artifacts_dir );
+		}
+		if ( ! $had_repo_artifacts_dir ) {
+			@rmdir( $repo_artifacts_dir );
+		}
+	}
+);
+
+/*
+ * Discovery must prefer an unfinished launcher run over a more recently
+ * touched but already finished one.
+ */
+$launcher_artifacts = $work_dir . '/launcher-discovery';
+$finished_run       = $launcher_artifacts . '/run-finished';
+\HtmlApiFuzz\ensure_dir( $finished_run );
+html_api_fuzz_smoke_write_runner_state(
+	$finished_run . '/state.json',
+	array(
+		'stopReason' => 'max-seeds',
+	)
+);
+
+$active_run = $launcher_artifacts . '/run-active';
+\HtmlApiFuzz\ensure_dir( $active_run );
+\HtmlApiFuzz\write_json_file(
+	$active_run . '/launcher-state.json',
+	array(
+		'kind'      => 'html-api-fuzz-launcher-state',
+		'finished'  => false,
+		'updatedAt' => gmdate( 'c' ),
+	)
+);
+// Make the finished run the more recently touched one.
+html_api_fuzz_smoke_touch( $finished_run . '/state.json', time() + 5 );
+
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--artifacts-dir', $launcher_artifacts ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && is_array( $report ), 'Expected stop.php launcher discovery to succeed: ' . substr( $proc['output'], -500 ) );
+html_api_fuzz_smoke_assert( $active_run === ( $report['runDir'] ?? null ), 'Expected discovery to prefer the unfinished launcher run.' );
+html_api_fuzz_smoke_assert( is_file( $active_run . '/STOP' ), 'Expected the stop file in the active launcher run.' );
+html_api_fuzz_smoke_assert( ! is_file( $finished_run . '/STOP' ), 'Expected no stop file in the finished run.' );
+html_api_fuzz_smoke_assert( false === ( $report['looksFinished'] ?? null ), 'Expected the chosen launcher run not to look finished.' );
+
+// Lane runner state alone is also enough to mark a launch run unfinished.
+$lane_artifacts = $work_dir . '/lane-discovery';
+$lane_run       = $lane_artifacts . '/run-lane-active';
+\HtmlApiFuzz\ensure_dir( $lane_run . '/lane-00' );
+html_api_fuzz_smoke_write_runner_state( $lane_run . '/lane-00/state.json' );
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--artifacts-dir', $lane_artifacts ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && $lane_run === ( $report['runDir'] ?? null ), 'Expected lane runner state to mark a launch run unfinished.' );
+html_api_fuzz_smoke_assert( is_file( $lane_run . '/STOP' ), 'Expected the stop file in the lane-active run.' );
+
+/*
+ * A standalone runner writes root state.json and may advertise a custom
+ * stopFile. A newer malformed runner state without stopReason must not be
+ * treated as unfinished.
+ */
+$standalone_artifacts = $work_dir . '/standalone-discovery';
+$missing_run          = $standalone_artifacts . '/run-missing-stop-reason';
+\HtmlApiFuzz\ensure_dir( $missing_run );
+\HtmlApiFuzz\write_json_file(
+	$missing_run . '/state.json',
+	array(
+		'kind'      => 'html-api-fuzz-runner-state',
+		'updatedAt' => gmdate( 'c' ),
+	)
+);
+html_api_fuzz_smoke_touch( $missing_run . '/state.json', time() + 10 );
+
+$standalone_run = $standalone_artifacts . '/run-standalone-active';
+$custom_stop    = $standalone_artifacts . '/custom-stop/STOP';
+\HtmlApiFuzz\ensure_dir( $standalone_run );
+html_api_fuzz_smoke_write_runner_state(
+	$standalone_run . '/state.json',
+	array(
+		'stopFile' => $custom_stop,
+	)
+);
+
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--artifacts-dir', $standalone_artifacts ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && is_array( $report ), 'Expected stop.php standalone discovery to succeed: ' . substr( $proc['output'], -500 ) );
+html_api_fuzz_smoke_assert( $standalone_run === ( $report['runDir'] ?? null ), 'Expected discovery to prefer the unfinished standalone run.' );
+html_api_fuzz_smoke_assert( $standalone_run . '/STOP' === ( $report['stopFile'] ?? null ), 'Expected discovery to report the run-dir stop file as the primary stop file.' );
+html_api_fuzz_smoke_assert( in_array( $custom_stop, $report['stopFiles'] ?? array(), true ), 'Expected discovery to include the standalone runner custom stop file.' );
+html_api_fuzz_smoke_assert( is_file( $custom_stop ), 'Expected the custom stop file to be created.' );
+html_api_fuzz_smoke_assert( is_file( $standalone_run . '/STOP' ), 'Expected the run-dir stop file to be created for watcher and orchestrator paths.' );
+html_api_fuzz_smoke_assert( ! is_file( $missing_run . '/STOP' ), 'Expected no stop file in the malformed runner-state run.' );
+html_api_fuzz_smoke_assert( false === ( $report['looksFinished'] ?? null ), 'Expected the standalone run not to look finished.' );
+
+// A second invocation reports the existing request instead of failing.
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--artifacts-dir', $standalone_artifacts ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && true === ( $report['alreadyRequested'] ?? null ), 'Expected a repeat stop request to be reported as already requested.' );
+
+// Explicit --run-dir inspection also honors the custom stop file.
+unlink( $custom_stop );
+unlink( $standalone_run . '/STOP' );
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', $standalone_run ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && $standalone_run . '/STOP' === ( $report['stopFile'] ?? null ), 'Expected --run-dir to report the run-dir stop file as primary.' );
+html_api_fuzz_smoke_assert( in_array( $custom_stop, $report['stopFiles'] ?? array(), true ), 'Expected --run-dir to include the standalone runner custom stop file.' );
+html_api_fuzz_smoke_assert( is_file( $custom_stop ), 'Expected --run-dir to create the custom stop file.' );
+html_api_fuzz_smoke_assert( is_file( $standalone_run . '/STOP' ), 'Expected --run-dir to create the run-dir stop file too.' );
+
+// The README-documented relative command works from the repo root.
+$repo_relative_run_arg = 'artifacts/html-api-fuzz/' . basename( $repo_relative_run_dir );
+\HtmlApiFuzz\ensure_dir( $repo_relative_run_dir );
+\HtmlApiFuzz\write_json_file(
+	$repo_relative_run_dir . '/launcher-state.json',
+	array(
+		'kind'      => 'html-api-fuzz-launcher-state',
+		'finished'  => false,
+		'updatedAt' => gmdate( 'c' ),
+	)
+);
+$proc   = \HtmlApiFuzz\run_php_process( array( 'tools/html-api-fuzz/stop.php', '--run-dir', $repo_relative_run_arg ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && $repo_relative_run_arg === ( $report['runDir'] ?? null ), 'Expected README-style relative --run-dir to succeed: ' . substr( $proc['output'], -500 ) );
+html_api_fuzz_smoke_assert( $repo_relative_run_arg . '/STOP' === ( $report['stopFile'] ?? null ), 'Expected README-style relative --run-dir to report a relative run-dir stop file.' );
+html_api_fuzz_smoke_assert( is_file( $repo_relative_run_dir . '/STOP' ), 'Expected README-style relative --run-dir to create RUN_DIR/STOP.' );
+
+// Explicit --stop-file is added to the discovered stop files.
+unlink( $custom_stop );
+unlink( $standalone_run . '/STOP' );
+$override_stop = $standalone_artifacts . '/override-stop/STOP';
+$proc          = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', $standalone_run, '--stop-file', $override_stop ), $repo_root, 30000 );
+$report        = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && $standalone_run . '/STOP' === ( $report['stopFile'] ?? null ), 'Expected --run-dir --stop-file to report the run-dir stop file as primary.' );
+html_api_fuzz_smoke_assert( in_array( $override_stop, $report['stopFiles'] ?? array(), true ), 'Expected --stop-file to be included in stopFiles.' );
+html_api_fuzz_smoke_assert( is_file( $override_stop ), 'Expected --stop-file to create the override stop file.' );
+html_api_fuzz_smoke_assert( is_file( $custom_stop ), 'Expected --run-dir --stop-file to create the advertised custom stop file too.' );
+html_api_fuzz_smoke_assert( is_file( $standalone_run . '/STOP' ), 'Expected --run-dir --stop-file to create the run-dir stop file too.' );
+
+// Explicit --stop-file also works as a direct write without run discovery.
+$direct_stop = $work_dir . '/direct-stop/STOP';
+$proc        = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--stop-file', $direct_stop ), $repo_root, 30000 );
+$report      = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && is_array( $report ), 'Expected direct --stop-file to succeed without run discovery: ' . substr( $proc['output'], -500 ) );
+html_api_fuzz_smoke_assert( null === ( $report['runDir'] ?? null ), 'Expected direct --stop-file to report no run directory.' );
+html_api_fuzz_smoke_assert( is_file( $direct_stop ), 'Expected direct --stop-file to create the requested file.' );
+
+// A run directory without state can only be stopped unambiguously with --stop-file.
+$no_state_run = $work_dir . '/no-state-run';
+\HtmlApiFuzz\ensure_dir( $no_state_run );
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', $no_state_run ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 2 === $proc['code'] && false === ( $report['ok'] ?? null ), 'Expected --run-dir without state to report warning status.' );
+$no_state_custom_stop = $work_dir . '/no-state-custom-stop/STOP';
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', $no_state_run, '--stop-file', $no_state_custom_stop ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && is_file( $no_state_custom_stop ) && is_file( $no_state_run . '/STOP' ), 'Expected --run-dir --stop-file without state to write both stop files.' );
+
+// Bare and ambiguous CLI invocations fail.
+$proc = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--stop-file' ), $repo_root, 30000 );
+html_api_fuzz_smoke_assert( 0 !== $proc['code'] && false !== strpos( $proc['stderr'], 'non-empty path' ), 'Expected bare --stop-file to fail with a path error.' );
+$proc = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', '--stop-file', $work_dir . '/bare-run-dir-stop/STOP' ), $repo_root, 30000 );
+html_api_fuzz_smoke_assert( 0 !== $proc['code'] && false !== strpos( $proc['stderr'], 'non-empty path' ), 'Expected bare --run-dir to fail with a path error.' );
+$proc = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--stop-stale-seconds' ), $repo_root, 30000 );
+html_api_fuzz_smoke_assert( 0 !== $proc['code'] && false !== strpos( $proc['stderr'], 'numeric' ), 'Expected bare --stop-stale-seconds to fail with a numeric error.' );
+$proc = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--stop-stale-seconds', 'nope' ), $repo_root, 30000 );
+html_api_fuzz_smoke_assert( 0 !== $proc['code'] && false !== strpos( $proc['stderr'], 'numeric' ), 'Expected non-numeric --stop-stale-seconds to fail with a numeric error.' );
+$proc = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--artifacts-dir' ), $repo_root, 30000 );
+html_api_fuzz_smoke_assert( 0 !== $proc['code'] && false !== strpos( $proc['stderr'], 'non-empty path' ), 'Expected bare --artifacts-dir to fail with a path error.' );
+$proc = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--artifacts-dir', $launcher_artifacts, '--stop-file', $work_dir . '/ambiguous-stop/STOP' ), $repo_root, 30000 );
+html_api_fuzz_smoke_assert( 0 !== $proc['code'] && false !== strpos( $proc['stderr'], 'Pass --run-dir' ), 'Expected --artifacts-dir --stop-file without --run-dir to fail as ambiguous.' );
+$proc = \HtmlApiFuzz\run_php_process( array( $runner_tool, '--max-seeds', '1', '--stop-file=' ), $repo_root, 30000 );
+html_api_fuzz_smoke_assert( 0 !== $proc['code'] && false !== strpos( $proc['stderr'], 'non-empty path' ), 'Expected runner --stop-file= to fail with a path error.' );
+
+// Relative advertised stop files are resolved from real runner cwd, not stop.php's cwd.
+$relative_artifacts = $work_dir . '/relative-discovery';
+$relative_run       = $relative_artifacts . '/run-relative-stop';
+$relative_cwd       = $work_dir . '/relative-cwd';
+$relative_invoke_cwd = $work_dir . '/relative-invoke-cwd';
+$relative_stop      = 'custom-relative-stop/STOP';
+\HtmlApiFuzz\ensure_dir( $relative_cwd );
+\HtmlApiFuzz\ensure_dir( $relative_invoke_cwd );
+$relative_cwd_real = realpath( $relative_cwd );
+html_api_fuzz_smoke_assert( is_string( $relative_cwd_real ), 'Expected relative runner cwd realpath.' );
+$proc = \HtmlApiFuzz\run_php_process(
+	array(
+		$runner_tool,
+		'--output-dir',
+		$relative_run,
+		'--max-seeds',
+		'1',
+		'--stop-file',
+		$relative_stop,
+	),
+	$relative_cwd,
+	30000
+);
+html_api_fuzz_smoke_assert( 0 === $proc['code'], 'Expected real runner with relative stop file to finish: ' . substr( $proc['output'], -500 ) );
+$runner_state = \HtmlApiFuzz\read_json_file( $relative_run . '/state.json' );
+html_api_fuzz_smoke_assert( is_array( $runner_state ) && $relative_cwd_real === ( $runner_state['cwd'] ?? null ), 'Expected runner state to record its cwd.' );
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', $relative_run ), $relative_invoke_cwd, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+$expected_relative_stop = rtrim( $relative_cwd_real, DIRECTORY_SEPARATOR ) . DIRECTORY_SEPARATOR . $relative_stop;
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && in_array( $expected_relative_stop, $report['stopFiles'] ?? array(), true ), 'Expected relative advertised stopFile to resolve from runner cwd.' );
+html_api_fuzz_smoke_assert( is_file( $expected_relative_stop ), 'Expected the runner-cwd-relative stop file to be created.' );
+html_api_fuzz_smoke_assert( ! is_file( $relative_invoke_cwd . '/' . $relative_stop ), 'Expected no stop file relative to stop.php invocation cwd.' );
+
+// On POSIX, a leading backslash is still relative to runner cwd.
+if ( '\\' !== DIRECTORY_SEPARATOR ) {
+	$backslash_run  = $work_dir . '/backslash-relative-stop';
+	$backslash_cwd  = $work_dir . '/backslash-cwd';
+	$backslash_stop = '\\custom-backslash-stop/STOP';
+	\HtmlApiFuzz\ensure_dir( $backslash_run );
+	\HtmlApiFuzz\ensure_dir( $backslash_cwd );
+	html_api_fuzz_smoke_write_runner_state(
+		$backslash_run . '/state.json',
+		array(
+			'stopFile' => $backslash_stop,
+			'cwd'      => $backslash_cwd,
+		)
+	);
+	$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', $backslash_run ), $relative_invoke_cwd, 30000 );
+	$report = json_decode( trim( $proc['stdout'] ), true );
+	$expected_backslash_stop = rtrim( $backslash_cwd, DIRECTORY_SEPARATOR ) . DIRECTORY_SEPARATOR . $backslash_stop;
+	html_api_fuzz_smoke_assert( 0 === $proc['code'] && in_array( $expected_backslash_stop, $report['stopFiles'] ?? array(), true ), 'Expected POSIX leading-backslash stopFile to resolve from runner cwd.' );
+	html_api_fuzz_smoke_assert( is_file( $expected_backslash_stop ), 'Expected POSIX leading-backslash stop file to be created under runner cwd.' );
+}
+
+// Legacy active relative stopFile state without cwd warns because the watched file is ambiguous.
+$legacy_relative_run  = $work_dir . '/legacy-relative-stop';
+$legacy_relative_cwd = $work_dir . '/legacy-relative-cwd';
+$legacy_relative_stop = 'legacy-relative-caller-cwd/STOP';
+\HtmlApiFuzz\ensure_dir( $legacy_relative_run );
+\HtmlApiFuzz\ensure_dir( $legacy_relative_cwd );
+html_api_fuzz_smoke_write_runner_state(
+	$legacy_relative_run . '/state.json',
+	array(
+		'stopFile' => $legacy_relative_stop,
+	)
+);
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', $legacy_relative_run ), $legacy_relative_cwd, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 2 === $proc['code'] && false === ( $report['ok'] ?? null ), 'Expected legacy relative stopFile without cwd to report warning status.' );
+html_api_fuzz_smoke_assert( false !== strpos( $proc['stderr'], 'exact watched file may be unknown' ), 'Expected legacy relative stopFile without absolute cwd to warn.' );
+html_api_fuzz_smoke_assert( is_file( $legacy_relative_run . '/STOP' ), 'Expected legacy relative stopFile warning path to write RUN_DIR/STOP.' );
+html_api_fuzz_smoke_assert( is_file( $legacy_relative_cwd . '/' . $legacy_relative_stop ), 'Expected legacy relative stopFile warning path to write a caller-cwd candidate.' );
+
+$finished_legacy_relative_run  = $work_dir . '/finished-legacy-relative-stop';
+$finished_legacy_relative_cwd = $work_dir . '/finished-legacy-relative-cwd';
+$finished_legacy_relative_stop = 'finished-legacy-relative-caller-cwd/STOP';
+\HtmlApiFuzz\ensure_dir( $finished_legacy_relative_run );
+\HtmlApiFuzz\ensure_dir( $finished_legacy_relative_cwd );
+html_api_fuzz_smoke_write_runner_state(
+	$finished_legacy_relative_run . '/state.json',
+	array(
+		'stopFile'   => $finished_legacy_relative_stop,
+		'stopReason' => 'max-seeds',
+	)
+);
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', $finished_legacy_relative_run ), $finished_legacy_relative_cwd, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 2 === $proc['code'] && false === ( $report['ok'] ?? null ), 'Expected finished legacy relative stopFile without cwd to report warning status.' );
+html_api_fuzz_smoke_assert( false !== strpos( $proc['stderr'], 'exact watched file may be unknown' ), 'Expected finished legacy relative stopFile without cwd to warn.' );
+html_api_fuzz_smoke_assert( is_file( $finished_legacy_relative_run . '/STOP' ), 'Expected finished legacy relative stopFile warning path to write RUN_DIR/STOP.' );
+html_api_fuzz_smoke_assert( is_file( $finished_legacy_relative_cwd . '/' . $finished_legacy_relative_stop ), 'Expected finished legacy relative stopFile warning path to write a caller-cwd candidate.' );
+
+// Active malformed advertised stop files must not report unqualified success.
+$bad_advertised_run = $work_dir . '/bad-advertised-stop';
+\HtmlApiFuzz\ensure_dir( $bad_advertised_run );
+html_api_fuzz_smoke_write_runner_state(
+	$bad_advertised_run . '/state.json',
+	array(
+		'stopFile' => '',
+	)
+);
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', $bad_advertised_run ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 2 === $proc['code'] && is_file( $bad_advertised_run . '/STOP' ), 'Expected RUN_DIR/STOP to be written with warning status when advertised stopFile is empty.' );
+html_api_fuzz_smoke_assert( false === ( $report['ok'] ?? null ) && false !== strpos( $proc['stderr'], 'exact watched file may be unknown' ), 'Expected empty advertised stopFile to warn.' );
+
+$unknown_kind_run = $work_dir . '/unknown-kind-runner-state';
+$unknown_kind_stop = $work_dir . '/unknown-kind-stop/STOP';
+\HtmlApiFuzz\ensure_dir( $unknown_kind_run );
+\HtmlApiFuzz\write_json_file(
+	$unknown_kind_run . '/state.json',
+	array(
+		'updatedAt'  => gmdate( 'c' ),
+		'stopFile'   => $unknown_kind_stop,
+		'stopReason' => null,
+	)
+);
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', $unknown_kind_run ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 2 === $proc['code'] && false === ( $report['ok'] ?? null ), 'Expected runner-like state with missing kind to report warning status.' );
+html_api_fuzz_smoke_assert( is_file( $unknown_kind_stop ), 'Expected runner-like state with missing kind to write the advertised custom stop file.' );
+html_api_fuzz_smoke_assert( is_file( $unknown_kind_run . '/STOP' ), 'Expected runner-like state with missing kind to write RUN_DIR/STOP.' );
+html_api_fuzz_smoke_assert( false !== strpos( $proc['stderr'], 'missing or unknown kind' ), 'Expected runner-like state with missing kind to warn.' );
+
+$missing_stop_file_run = $work_dir . '/missing-stop-file';
+\HtmlApiFuzz\ensure_dir( $missing_stop_file_run );
+\HtmlApiFuzz\write_json_file(
+	$missing_stop_file_run . '/state.json',
+	array(
+		'kind'       => 'html-api-fuzz-runner-state',
+		'updatedAt'  => gmdate( 'c' ),
+		'stopReason' => null,
+	)
+);
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', $missing_stop_file_run ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 2 === $proc['code'] && false === ( $report['ok'] ?? null ), 'Expected active runner state missing stopFile to report warning status.' );
+html_api_fuzz_smoke_assert( false !== strpos( $proc['stderr'], 'missing or malformed' ), 'Expected active runner state missing stopFile to warn.' );
+
+$relative_cwd_run = $work_dir . '/relative-cwd-stop';
+$relative_bad_cwd = $work_dir . '/relative-bad-cwd';
+\HtmlApiFuzz\ensure_dir( $relative_cwd_run );
+\HtmlApiFuzz\ensure_dir( $relative_bad_cwd );
+html_api_fuzz_smoke_write_runner_state(
+	$relative_cwd_run . '/state.json',
+	array(
+		'stopFile' => 'relative-cwd-caller-cwd/STOP',
+		'cwd'      => 'not-absolute',
+	)
+);
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', $relative_cwd_run ), $relative_bad_cwd, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 2 === $proc['code'] && false === ( $report['ok'] ?? null ), 'Expected relative recorded cwd to report warning status.' );
+html_api_fuzz_smoke_assert( false !== strpos( $proc['stderr'], 'no recorded absolute cwd' ), 'Expected relative recorded cwd to warn.' );
+html_api_fuzz_smoke_assert( is_file( $relative_bad_cwd . '/relative-cwd-caller-cwd/STOP' ), 'Expected relative recorded cwd warning path to write a caller-cwd candidate.' );
+
+$unknown_runner_run = $work_dir . '/unknown-runner-stop-file';
+$unknown_runner_cwd = $work_dir . '/unknown-runner-cwd';
+\HtmlApiFuzz\ensure_dir( $unknown_runner_run );
+\HtmlApiFuzz\ensure_dir( $unknown_runner_cwd );
+\HtmlApiFuzz\write_json_file(
+	$unknown_runner_run . '/state.json',
+	array(
+		'kind'      => 'html-api-fuzz-runner-state',
+		'updatedAt' => gmdate( 'c' ),
+		'stopFile'  => 'unknown-runner-caller-cwd/STOP',
+	)
+);
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', $unknown_runner_run ), $unknown_runner_cwd, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 2 === $proc['code'] && false === ( $report['ok'] ?? null ), 'Expected runner state without stopReason and relative stopFile to warn.' );
+
+// Unreadable state warns and still writes RUN_DIR/STOP.
+if ( '\\' !== DIRECTORY_SEPARATOR ) {
+	$unreadable_run = $work_dir . '/unreadable-state';
+	\HtmlApiFuzz\ensure_dir( $unreadable_run );
+	$unreadable_state = $unreadable_run . '/state.json';
+	html_api_fuzz_smoke_write_runner_state( $unreadable_state );
+	html_api_fuzz_smoke_assert( chmod( $unreadable_state, 0000 ), 'Expected chmod to make state unreadable.' );
+	if ( ! is_readable( $unreadable_state ) ) {
+		$proc = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', $unreadable_run ), $repo_root, 30000 );
+		chmod( $unreadable_state, 0600 );
+		$report = json_decode( trim( $proc['stdout'] ), true );
+		html_api_fuzz_smoke_assert( 2 === $proc['code'] && false === ( $report['ok'] ?? null ), 'Expected unreadable state to report warning status.' );
+		html_api_fuzz_smoke_assert( is_file( $unreadable_run . '/STOP' ), 'Expected unreadable state fallback to write RUN_DIR/STOP.' );
+	} else {
+		chmod( $unreadable_state, 0600 );
+	}
+}
+
+// Unreadable in-progress state warns and still writes RUN_DIR/STOP.
+$corrupt_run = $work_dir . '/corrupt-state';
+\HtmlApiFuzz\ensure_dir( $corrupt_run );
+file_put_contents( $corrupt_run . '/state.json', "{not-json\n" );
+$proc = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--run-dir', $corrupt_run ), $repo_root, 30000 );
+html_api_fuzz_smoke_assert( 2 === $proc['code'] && is_file( $corrupt_run . '/STOP' ), 'Expected corrupt state fallback to write RUN_DIR/STOP with warning status.' );
+html_api_fuzz_smoke_assert( false !== strpos( $proc['stderr'], 'could not read' ), 'Expected corrupt state fallback to warn.' );
+
+// Stale corrupt state is not preferred during discovery.
+$stale_corrupt_artifacts = $work_dir . '/stale-corrupt-discovery';
+$stale_corrupt_run       = $stale_corrupt_artifacts . '/run-stale-corrupt';
+$recent_finished_run     = $stale_corrupt_artifacts . '/run-recent-finished';
+\HtmlApiFuzz\ensure_dir( $stale_corrupt_run );
+file_put_contents( $stale_corrupt_run . '/state.json', "{not-json\n" );
+html_api_fuzz_smoke_touch( $stale_corrupt_run . '/state.json', time() - 3600 );
+\HtmlApiFuzz\ensure_dir( $recent_finished_run );
+html_api_fuzz_smoke_write_runner_state(
+	$recent_finished_run . '/state.json',
+	array(
+		'stopReason' => 'max-seeds',
+	)
+);
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--artifacts-dir', $stale_corrupt_artifacts, '--stop-stale-seconds', '10' ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && $recent_finished_run === ( $report['runDir'] ?? null ), 'Expected stale corrupt state not to be preferred during discovery.' );
+html_api_fuzz_smoke_assert( true === ( $report['looksFinished'] ?? null ), 'Expected recent finished fallback to report looksFinished.' );
+html_api_fuzz_smoke_assert( is_file( $recent_finished_run . '/STOP' ), 'Expected recent finished fallback to create STOP.' );
+html_api_fuzz_smoke_assert( false !== strpos( $proc['stderr'], 'already looks stopped' ), 'Expected recent finished fallback to warn.' );
+
+// A stale runner state does not count as unfinished and therefore warns.
+$stale_artifacts = $work_dir . '/stale-discovery';
+$stale_run       = $stale_artifacts . '/run-stale';
+$stale_custom    = $stale_artifacts . '/custom-stale-stop/STOP';
+\HtmlApiFuzz\ensure_dir( $stale_run );
+html_api_fuzz_smoke_write_runner_state(
+	$stale_run . '/state.json',
+	array(
+		'updatedAt'     => gmdate( 'c', time() - 3600 ),
+		'batchBudgetMs' => 0,
+		'stopFile'      => $stale_custom,
+	)
+);
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--artifacts-dir', $stale_artifacts, '--stop-stale-seconds', '10' ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && true === ( $report['looksFinished'] ?? null ), 'Expected a stale-only artifacts dir to be reported as looksFinished.' );
+html_api_fuzz_smoke_assert( is_file( $stale_custom ), 'Expected stale custom stop file to be created for the targeted run.' );
+html_api_fuzz_smoke_assert( is_file( $stale_run . '/STOP' ), 'Expected stale run-dir stop file to be created for watcher and orchestrator paths.' );
+html_api_fuzz_smoke_assert( false !== strpos( $proc['stderr'], 'already looks stopped' ), 'Expected a warning when only stale runs exist.' );
+
+// Missing updatedAt falls back to state file mtime for stale detection.
+$mtime_stale_artifacts = $work_dir . '/mtime-stale-discovery';
+$mtime_stale_run       = $mtime_stale_artifacts . '/run-mtime-stale';
+\HtmlApiFuzz\ensure_dir( $mtime_stale_run );
+\HtmlApiFuzz\write_json_file(
+	$mtime_stale_run . '/state.json',
+	array(
+		'kind'          => 'html-api-fuzz-runner-state',
+		'batchBudgetMs' => 0,
+		'stopReason'    => null,
+	)
+);
+html_api_fuzz_smoke_touch( $mtime_stale_run . '/state.json', time() - 3600 );
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--artifacts-dir', $mtime_stale_artifacts, '--stop-stale-seconds', '10' ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && true === ( $report['looksFinished'] ?? null ), 'Expected missing updatedAt to use stale file mtime.' );
+
+// A large batch budget floors the stale threshold so long-running batches still look active.
+$budget_artifacts = $work_dir . '/batch-budget-discovery';
+$budget_run       = $budget_artifacts . '/run-budget-active';
+\HtmlApiFuzz\ensure_dir( $budget_run );
+html_api_fuzz_smoke_write_runner_state(
+	$budget_run . '/state.json',
+	array(
+		'updatedAt'     => gmdate( 'c', time() - 30 ),
+		'batchBudgetMs' => 600000,
+	)
+);
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--artifacts-dir', $budget_artifacts, '--stop-stale-seconds', '10' ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && false === ( $report['looksFinished'] ?? null ), 'Expected batch budget floor to keep the old runner state active.' );
+
+// A stale launcher state does not count as unfinished and therefore warns.
+$stale_launcher_artifacts = $work_dir . '/stale-launcher-discovery';
+$stale_launcher_run       = $stale_launcher_artifacts . '/run-stale-launcher';
+\HtmlApiFuzz\ensure_dir( $stale_launcher_run );
+\HtmlApiFuzz\write_json_file(
+	$stale_launcher_run . '/launcher-state.json',
+	array(
+		'kind'      => 'html-api-fuzz-launcher-state',
+		'finished'  => false,
+		'updatedAt' => gmdate( 'c', time() - 3600 ),
+	)
+);
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--artifacts-dir', $stale_launcher_artifacts, '--stop-stale-seconds', '10' ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && true === ( $report['looksFinished'] ?? null ), 'Expected a stale launcher-only artifacts dir to be reported as looksFinished.' );
+html_api_fuzz_smoke_assert( false !== strpos( $proc['stderr'], 'already looks stopped' ), 'Expected a warning when only stale launcher state exists.' );
+
+// Same-second active runs are ordered deterministically by the run path.
+$tie_artifacts = $work_dir . '/tie-discovery';
+$first_tie_run = $tie_artifacts . '/run-20260101T000000000001Z';
+$next_tie_run  = $tie_artifacts . '/run-20260101T000000000002Z';
+\HtmlApiFuzz\ensure_dir( $first_tie_run );
+\HtmlApiFuzz\ensure_dir( $next_tie_run );
+html_api_fuzz_smoke_write_runner_state( $first_tie_run . '/state.json' );
+html_api_fuzz_smoke_write_runner_state( $next_tie_run . '/state.json' );
+$same_mtime = time() + 20;
+html_api_fuzz_smoke_touch( $first_tie_run . '/state.json', $same_mtime );
+html_api_fuzz_smoke_touch( $next_tie_run . '/state.json', $same_mtime );
+$proc   = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--artifacts-dir', $tie_artifacts ), $repo_root, 30000 );
+$report = json_decode( trim( $proc['stdout'] ), true );
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && $next_tie_run === ( $report['runDir'] ?? null ), 'Expected same-second active runs to prefer the later path.' );
+
+// An empty artifacts dir is an error, not a silent success.
+$empty_dir = $work_dir . '/empty';
+\HtmlApiFuzz\ensure_dir( $empty_dir );
+$proc = \HtmlApiFuzz\run_php_process( array( $stop_tool, '--artifacts-dir', $empty_dir ), $repo_root, 30000 );
+html_api_fuzz_smoke_assert( 0 !== $proc['code'], 'Expected stop.php to fail when no run directory exists.' );
+
+echo "OK stop-smoke\n";
diff --git a/tools/html-api-fuzz/tests/tree-renderer-normalization-smoke.php b/tools/html-api-fuzz/tests/tree-renderer-normalization-smoke.php
new file mode 100644
index 0000000000000..653551778230e
--- /dev/null
+++ b/tools/html-api-fuzz/tests/tree-renderer-normalization-smoke.php
@@ -0,0 +1,412 @@
+#!/usr/bin/env php
+<?php
+require_once dirname( __DIR__ ) . '/lib/autoload.php';
+
+function html_api_fuzz_tree_normalization_fail( string $message ): void {
+	fwrite( STDERR, "FAIL: {$message}\n" );
+	exit( 1 );
+}
+
+function html_api_fuzz_tree_normalization_assert( bool $condition, string $message ): void {
+	if ( ! $condition ) {
+		html_api_fuzz_tree_normalization_fail( $message );
+	}
+}
+
+function html_api_fuzz_tree_normalization_assert_compares( array $result, string $message ): void {
+	if ( true === ( $result['ok'] ?? null ) ) {
+		return;
+	}
+
+	if ( 'normalize-invariant-failed' === ( $result['failureClass'] ?? null ) && true === ( $result['comparison']['ok'] ?? null ) ) {
+		return;
+	}
+
+	html_api_fuzz_tree_normalization_fail( $message );
+}
+
+function html_api_fuzz_tree_normalization_rm_tree( string $path ): void {
+	if ( ! file_exists( $path ) ) {
+		return;
+	}
+	if ( is_file( $path ) || is_link( $path ) ) {
+		@unlink( $path );
+		return;
+	}
+	foreach ( scandir( $path ) ?: array() as $item ) {
+		if ( '.' === $item || '..' === $item ) {
+			continue;
+		}
+		html_api_fuzz_tree_normalization_rm_tree( $path . DIRECTORY_SEPARATOR . $item );
+	}
+	@rmdir( $path );
+}
+
+function html_api_fuzz_tree_normalization_run( string $tmp, string $name, string $input_base64, string $mode ): array {
+	$input = base64_decode( $input_base64, true );
+	html_api_fuzz_tree_normalization_assert( false !== $input, "{$name} fixture should decode." );
+
+	return \HtmlApiFuzz\Worker::run(
+		array(
+			'input-base64' => base64_encode( $input ),
+			'profile'      => 'replay',
+			'mode'         => $mode,
+			'output-dir'   => $tmp . '/' . $name,
+			'max-tokens'   => '2000',
+			'max-nodes'    => '3000',
+		)
+	);
+}
+
+/*
+ * Synthetic compare_trees() cases exercise the comparison logic directly and
+ * need no DOM oracle, so they run before the Dom\HTMLDocument guard below.
+ *
+ * The comparison must keep failing on structural differences: scalar
+ * tolerance only applies when the spec substitution explains the entire
+ * differing line.
+ */
+$synthetic_mismatch = \HtmlApiFuzz\TreeRenderer::compare_trees( "<div>\n  \"a\"\n\n", "<div>\n  \"b\"\n\n" );
+html_api_fuzz_tree_normalization_assert( false === ( $synthetic_mismatch['ok'] ?? null ), 'Structural tree mismatches should still fail.' );
+html_api_fuzz_tree_normalization_assert( is_array( $synthetic_mismatch['firstDifference'] ?? null ) && 2 === ( $synthetic_mismatch['firstDifference']['line'] ?? null ), 'Structural mismatch should report the first differing line.' );
+
+$synthetic_structure_with_nul = \HtmlApiFuzz\TreeRenderer::compare_trees( "<div>\n  x=\"\\0\"\n  \"a\"\n\n", "<div>\n  x=\"\xEF\xBF\xBD\"\n  \"b\"\n\n" );
+html_api_fuzz_tree_normalization_assert( false === ( $synthetic_structure_with_nul['ok'] ?? null ), 'Scalar tolerance must not mask structural differences on other lines.' );
+
+$synthetic_tolerated = \HtmlApiFuzz\TreeRenderer::compare_trees( "<div>\n  x=\"\\0\"\n\n", "<div>\n  x=\"\xEF\xBF\xBD\"\n\n" );
+html_api_fuzz_tree_normalization_assert( true === ( $synthetic_tolerated['ok'] ?? null ), 'Scalar-only differences should be tolerated.' );
+html_api_fuzz_tree_normalization_assert( array( 1 ) === ( $synthetic_tolerated['scalarToleratedLines'] ?? null ), 'Scalar tolerance should report the tolerated line number.' );
+
+$synthetic_nul_with_agreed_cr = \HtmlApiFuzz\TreeRenderer::compare_trees( "<div>\n  x=\"\\0)\\r\"\n\n", "<div>\n  x=\"\xEF\xBF\xBD)\\r\"\n\n" );
+html_api_fuzz_tree_normalization_assert( true === ( $synthetic_nul_with_agreed_cr['ok'] ?? null ), 'NUL tolerance should not rewrite an agreed escaped CR on the same line.' );
+
+$synthetic_cr_only_wordpress = \HtmlApiFuzz\TreeRenderer::compare_trees( "<div>\n  x=\"a\\nb\"\n\n", "<div>\n  x=\"a\\rb\"\n\n" );
+html_api_fuzz_tree_normalization_assert( false === ( $synthetic_cr_only_wordpress['ok'] ?? null ), 'A DOM-side CR where WordPress holds LF is not the spec substitution and must fail.' );
+
+$synthetic_cr_before_decoded_lf = \HtmlApiFuzz\TreeRenderer::compare_trees( "<div>\n  x=\"\\r\\n\"\n\n", "<div>\n  x=\"\\n\\n\"\n\n" );
+html_api_fuzz_tree_normalization_assert( true === ( $synthetic_cr_before_decoded_lf['ok'] ?? null ), 'WordPress CR+LF opposite DOM LF+LF should be tolerated as CR-to-LF plus an agreed LF.' );
+
+$synthetic_raw_crlf = \HtmlApiFuzz\TreeRenderer::compare_trees( "<div>\n  x=\"\\r\\nX\"\n\n", "<div>\n  x=\"\\nX\"\n\n" );
+html_api_fuzz_tree_normalization_assert( true === ( $synthetic_raw_crlf['ok'] ?? null ), 'Raw CRLF collapsed to a single DOM LF should remain tolerated.' );
+
+$synthetic_backslash_collision = \HtmlApiFuzz\TreeRenderer::compare_trees( "<div>\n  x=\"\\\\r\"\n\n", "<div>\n  x=\"\\\\n\"\n\n" );
+html_api_fuzz_tree_normalization_assert( false === ( $synthetic_backslash_collision['ok'] ?? null ), 'A literal backslash followed by r must not be rewritten as a CR escape.' );
+
+$synthetic_repeated_cr_lf = \HtmlApiFuzz\TreeRenderer::compare_trees(
+	"<div>\n  x=\"" . str_repeat( '\\r\\n', 500 ) . "\"\n\n",
+	"<div>\n  x=\"" . str_repeat( '\\n\\n', 500 ) . "\"\n\n"
+);
+html_api_fuzz_tree_normalization_assert( true === ( $synthetic_repeated_cr_lf['ok'] ?? null ), 'A long run of raw CR plus decoded LF pairs should be tolerated without exhausting the matcher.' );
+
+/*
+ * WordPress preserves raw NUL/CR only in attribute values and tag/attribute
+ * names. In text, RCDATA, rawtext, and comments it applies the spec
+ * substitutions itself, so a scalar difference on those lines is a real
+ * divergence and the tolerance must not mask it.
+ */
+$synthetic_text_nul = \HtmlApiFuzz\TreeRenderer::compare_trees( "<div>\n  \"a\\0b\"\n\n", "<div>\n  \"a\xEF\xBF\xBDb\"\n\n" );
+html_api_fuzz_tree_normalization_assert( false === ( $synthetic_text_nul['ok'] ?? null ), 'Scalar tolerance must not apply to NUL differences on text lines.' );
+
+$synthetic_text_cr = \HtmlApiFuzz\TreeRenderer::compare_trees( "<div>\n  \"x\\ry\"\n\n", "<div>\n  \"x\\ny\"\n\n" );
+html_api_fuzz_tree_normalization_assert( false === ( $synthetic_text_cr['ok'] ?? null ), 'Scalar tolerance must not apply to CR differences on text lines.' );
+
+$synthetic_comment_nul = \HtmlApiFuzz\TreeRenderer::compare_trees( "<div>\n  <!-- a\\0b -->\n\n", "<div>\n  <!-- a\xEF\xBF\xBDb -->\n\n" );
+html_api_fuzz_tree_normalization_assert( false === ( $synthetic_comment_nul['ok'] ?? null ), 'Scalar tolerance must not apply to NUL differences on comment lines.' );
+
+$synthetic_tag_name_nul = \HtmlApiFuzz\TreeRenderer::compare_trees( "<div>\n  <svg x\\0y>\n\n", "<div>\n  <svg x\xEF\xBF\xBDy>\n\n" );
+html_api_fuzz_tree_normalization_assert( true === ( $synthetic_tag_name_nul['ok'] ?? null ), 'Scalar tolerance should still apply to NUL differences on tag-name lines.' );
+
+$synthetic_quoted_attribute_name_nul = \HtmlApiFuzz\TreeRenderer::compare_trees( "<div>\n  \"a\\0\"=\"\"\n\n", "<div>\n  \"a\xEF\xBF\xBD\"=\"\"\n\n" );
+html_api_fuzz_tree_normalization_assert( true === ( $synthetic_quoted_attribute_name_nul['ok'] ?? null ), 'An attribute name that begins with a quote is still an attribute line, not a text line.' );
+
+/*
+ * The tokenizer permits `<` and `!` in attribute names, so `<div <!--a="...">`
+ * carries an attribute named `<!--a` and the renderer emits a line that
+ * begins like a comment. It is an attribute line and keeps the tolerance;
+ * real comment lines end with ` -->`, not a quoted value.
+ */
+$synthetic_comment_prefixed_attribute = \HtmlApiFuzz\TreeRenderer::compare_trees( "<div>\n  <!--a=\"x\\0y\"\n\n", "<div>\n  <!--a=\"x\xEF\xBF\xBDy\"\n\n" );
+html_api_fuzz_tree_normalization_assert( true === ( $synthetic_comment_prefixed_attribute['ok'] ?? null ), 'An attribute name that begins with a comment opener is still an attribute line.' );
+
+/*
+ * Line classification must hold on lines far past the PCRE JIT stack
+ * comfort zone (~8KB with backtracking quantifiers): the generator's
+ * stress payloads produce long attribute values, and the scalar matcher
+ * itself budgets a million steps. Classification failing on length must
+ * not silently revoke an otherwise-legitimate tolerance.
+ */
+$synthetic_long_value = str_repeat( 'a', 9000 );
+$synthetic_long_attribute_nul = \HtmlApiFuzz\TreeRenderer::compare_trees( "<div>\n  x=\"{$synthetic_long_value}\\0\"\n\n", "<div>\n  x=\"{$synthetic_long_value}\xEF\xBF\xBD\"\n\n" );
+html_api_fuzz_tree_normalization_assert( true === ( $synthetic_long_attribute_nul['ok'] ?? null ), 'Scalar tolerance should survive attribute values longer than the PCRE JIT stack allows.' );
+
+$synthetic_long_repeated_cr_lf = \HtmlApiFuzz\TreeRenderer::compare_trees(
+	"<div>\n  x=\"" . str_repeat( '\\r\\n', 4096 ) . "\"\n\n",
+	"<div>\n  x=\"" . str_repeat( '\\n\\n', 4096 ) . "\"\n\n"
+);
+html_api_fuzz_tree_normalization_assert( true === ( $synthetic_long_repeated_cr_lf['ok'] ?? null ), 'A CR plus decoded LF run crossing the JIT stack boundary should stay tolerated.' );
+
+$synthetic_long_text_nul = \HtmlApiFuzz\TreeRenderer::compare_trees( "<div>\n  \"{$synthetic_long_value}\\0\"\n\n", "<div>\n  \"{$synthetic_long_value}\xEF\xBF\xBD\"\n\n" );
+html_api_fuzz_tree_normalization_assert( false === ( $synthetic_long_text_nul['ok'] ?? null ), 'Long text lines must stay ineligible for scalar tolerance.' );
+
+$synthetic_long_norm = \HtmlApiFuzz\TreeRenderer::normalize_tree_line( "  x=\"{$synthetic_long_value}\\0\"" );
+html_api_fuzz_tree_normalization_assert( 'x="<value>"' === $synthetic_long_norm, 'Line normalization should mask long attribute values rather than fail on them.' );
+
+$adjusted_svg_names = array(
+	'altGlyph',
+	'altGlyphDef',
+	'altGlyphItem',
+	'animateColor',
+	'animateMotion',
+	'animateTransform',
+	'clipPath',
+	'feBlend',
+	'feColorMatrix',
+	'feComponentTransfer',
+	'feComposite',
+	'feConvolveMatrix',
+	'feDiffuseLighting',
+	'feDisplacementMap',
+	'feDistantLight',
+	'feDropShadow',
+	'feFlood',
+	'feFuncA',
+	'feFuncB',
+	'feFuncG',
+	'feFuncR',
+	'feGaussianBlur',
+	'feImage',
+	'feMerge',
+	'feMergeNode',
+	'feMorphology',
+	'feOffset',
+	'fePointLight',
+	'feSpecularLighting',
+	'feSpotLight',
+	'feTile',
+	'feTurbulence',
+	'foreignObject',
+	'glyphRef',
+	'linearGradient',
+	'radialGradient',
+	'textPath',
+);
+foreach ( $adjusted_svg_names as $svg_name ) {
+	html_api_fuzz_tree_normalization_assert(
+		"<svg {$svg_name}>" === \HtmlApiFuzz\TreeRenderer::normalize_tree_line( "<svg {$svg_name}>" ),
+		"Adjusted SVG name {$svg_name} should not normalize to a custom element."
+	);
+	$lower_svg_name = strtolower( $svg_name );
+	html_api_fuzz_tree_normalization_assert(
+		"<svg {$lower_svg_name}>" === \HtmlApiFuzz\TreeRenderer::normalize_tree_line( "<svg {$lower_svg_name}>" ),
+		"Lowercase SVG oracle name {$lower_svg_name} should not normalize to a custom element."
+	);
+}
+foreach ( array( 'bgsound', 'isindex', 'keygen', 'selectedcontent' ) as $html_name ) {
+	html_api_fuzz_tree_normalization_assert(
+		"<{$html_name}>" === \HtmlApiFuzz\TreeRenderer::normalize_tree_line( "<{$html_name}>" ),
+		"Known HTML name {$html_name} should not normalize to a custom element."
+	);
+}
+foreach ( array( 'menclose', 'mprescripts', 'mstack', 'apply', 'csymbol', 'not', 'prsubset' ) as $mathml_name ) {
+	html_api_fuzz_tree_normalization_assert(
+		"<math {$mathml_name}>" === \HtmlApiFuzz\TreeRenderer::normalize_tree_line( "<math {$mathml_name}>" ),
+		"Known MathML name {$mathml_name} should not normalize to a custom element."
+	);
+}
+html_api_fuzz_tree_normalization_assert(
+	'<custom-element>' === \HtmlApiFuzz\TreeRenderer::normalize_tree_line( '<x-widget>' ),
+	'Unknown HTML custom element names should still normalize to the custom-element bucket.'
+);
+html_api_fuzz_tree_normalization_assert(
+	'<custom-element>' === \HtmlApiFuzz\TreeRenderer::normalize_tree_line( '<svg x-widget>' ),
+	'Unknown SVG names should still normalize to the custom-element bucket.'
+);
+html_api_fuzz_tree_normalization_assert(
+	'<custom-element>' === \HtmlApiFuzz\TreeRenderer::normalize_tree_line( '<math x-widget>' ),
+	'Unknown MathML names should still normalize to the custom-element bucket.'
+);
+
+if ( ! class_exists( 'Dom\\HTMLDocument' ) ) {
+	echo "tree renderer normalization oracle smoke tests skipped: Dom\\HTMLDocument unavailable\n";
+	exit( 0 );
+}
+
+$tmp = tempnam( sys_get_temp_dir(), 'html-api-fuzz-tree-normalization-' );
+if ( false === $tmp ) {
+	html_api_fuzz_tree_normalization_fail( 'Could not create temp path.' );
+}
+@unlink( $tmp );
+\HtmlApiFuzz\ensure_dir( $tmp );
+register_shutdown_function( 'html_api_fuzz_tree_normalization_rm_tree', $tmp );
+
+$presumptuous_tag = \HtmlApiFuzz\TreeRenderer::render_wordpress(
+	'</>',
+	\HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+	array(
+		'maxTokens' => 100,
+		'maxNodes'  => 100,
+	)
+);
+html_api_fuzz_tree_normalization_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $presumptuous_tag['status'] ?? null ), 'Presumptuous tag closers should be ignored by the WordPress renderer.' );
+html_api_fuzz_tree_normalization_assert( "\n" === ( $presumptuous_tag['tree'] ?? null ), 'Ignored presumptuous tag closers should not render tree nodes.' );
+
+$presumptuous_tag_text = \HtmlApiFuzz\TreeRenderer::render_wordpress(
+	'a</>b',
+	\HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY,
+	array(
+		'maxTokens' => 100,
+		'maxNodes'  => 100,
+	)
+);
+html_api_fuzz_tree_normalization_assert( \HtmlApiFuzz\TreeRenderer::STATUS_OK === ( $presumptuous_tag_text['status'] ?? null ), 'Presumptuous tag closers between text should not fail the WordPress renderer.' );
+html_api_fuzz_tree_normalization_assert( "\"ab\"\n\n" === ( $presumptuous_tag_text['tree'] ?? null ), 'Ignored presumptuous tag closers should not split adjacent text nodes.' );
+
+$presumptuous_tag_full_document = html_api_fuzz_tree_normalization_run(
+	$tmp,
+	'presumptuous-tag-full-document',
+	base64_encode( '<html><head></head><body></body></html></>' ),
+	\HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT
+);
+html_api_fuzz_tree_normalization_assert_compares( $presumptuous_tag_full_document, 'Full-document presumptuous tag closers after HTML should be ignored by the worker.' );
+html_api_fuzz_tree_normalization_assert( true === ( $presumptuous_tag_full_document['comparison']['ok'] ?? null ), 'Full-document presumptuous tag closer comparison should pass.' );
+
+/*
+ * WordPress preserves raw NUL and CR bytes that spec-following parsers
+ * substitute during input preprocessing. Trees render those bytes raw, and
+ * the comparison tolerates a line only when the exact spec substitution
+ * (NUL to U+FFFD, CR/CRLF to LF) explains the whole difference, reporting
+ * the tolerated line numbers instead of silently scrubbing both sides.
+ */
+$nul_attribute_value = html_api_fuzz_tree_normalization_run(
+	$tmp,
+	'nul-attribute-value',
+	'PCEgcD48L3A+PGh0bWwgaWQ9AD4=',
+	\HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT
+);
+html_api_fuzz_tree_normalization_assert_compares( $nul_attribute_value, 'NUL attribute values should compare with scalar tolerance.' );
+html_api_fuzz_tree_normalization_assert( true === ( $nul_attribute_value['comparison']['ok'] ?? null ), 'NUL attribute value comparison should pass.' );
+html_api_fuzz_tree_normalization_assert( ! empty( $nul_attribute_value['comparison']['scalarToleratedLines'] ), 'NUL attribute value comparison should report tolerated lines.' );
+$nul_attribute_value_tree = file_get_contents( $nul_attribute_value['wordpress']['treePath'] ?? '' );
+html_api_fuzz_tree_normalization_assert( false !== $nul_attribute_value_tree, 'NUL attribute value WordPress tree should be written.' );
+html_api_fuzz_tree_normalization_assert( false !== strpos( $nul_attribute_value_tree, 'id="\\0"' ), 'NUL attribute values should render raw as escaped NUL.' );
+html_api_fuzz_tree_normalization_assert( false === strpos( $nul_attribute_value_tree, "\xEF\xBF\xBD" ), 'NUL attribute values should not be scrubbed to U+FFFD in the WordPress tree.' );
+
+$nul_attribute_name = html_api_fuzz_tree_normalization_run(
+	$tmp,
+	'nul-attribute-name',
+	'PGh0bWwKN0Z5AG10ND4=',
+	\HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT
+);
+html_api_fuzz_tree_normalization_assert_compares( $nul_attribute_name, 'NUL attribute names should compare with scalar tolerance.' );
+html_api_fuzz_tree_normalization_assert( true === ( $nul_attribute_name['comparison']['ok'] ?? null ), 'NUL attribute name comparison should pass.' );
+html_api_fuzz_tree_normalization_assert( ! empty( $nul_attribute_name['comparison']['scalarToleratedLines'] ), 'NUL attribute name comparison should report tolerated lines.' );
+$nul_attribute_name_tree = file_get_contents( $nul_attribute_name['wordpress']['treePath'] ?? '' );
+html_api_fuzz_tree_normalization_assert( false !== $nul_attribute_name_tree, 'NUL attribute name WordPress tree should be written.' );
+html_api_fuzz_tree_normalization_assert( false !== strpos( $nul_attribute_name_tree, '7fy\\0mt4=""' ), 'NUL attribute names should render raw as escaped NUL.' );
+
+$comment_prefixed_attribute_name = html_api_fuzz_tree_normalization_run(
+	$tmp,
+	'comment-prefixed-attribute-name',
+	base64_encode( "<div <!--a=\"x\0y\">k</div>" ),
+	\HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT
+);
+html_api_fuzz_tree_normalization_assert_compares( $comment_prefixed_attribute_name, 'Attribute names beginning with a comment opener should compare with scalar tolerance.' );
+html_api_fuzz_tree_normalization_assert( true === ( $comment_prefixed_attribute_name['comparison']['ok'] ?? null ), 'Comment-opener attribute name comparison should pass.' );
+html_api_fuzz_tree_normalization_assert( ! empty( $comment_prefixed_attribute_name['comparison']['scalarToleratedLines'] ), 'Comment-opener attribute name comparison should report tolerated lines.' );
+
+$foreign_tag_name = html_api_fuzz_tree_normalization_run(
+	$tmp,
+	'foreign-tag-name',
+	'PHN0cm9uZyBz16oiPjxzdmcgPjxnPjx0aXRsZT7wn5mCPFBiKQAsRTMmI3hmZmZkOzwvPg==',
+	\HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY
+);
+html_api_fuzz_tree_normalization_assert_compares( $foreign_tag_name, 'NUL foreign-content tag names should compare with scalar tolerance.' );
+html_api_fuzz_tree_normalization_assert( true === ( $foreign_tag_name['comparison']['ok'] ?? null ), 'NUL foreign-content tag name comparison should pass.' );
+html_api_fuzz_tree_normalization_assert( ! empty( $foreign_tag_name['comparison']['scalarToleratedLines'] ), 'NUL foreign-content tag name comparison should report tolerated lines.' );
+
+$cr_attribute_value = html_api_fuzz_tree_normalization_run(
+	$tmp,
+	'cr-attribute-value',
+	'PCE+PGh0bWwgfUlnLXBlXWo6dXMyYzA9Ig0iPmE=',
+	\HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT
+);
+html_api_fuzz_tree_normalization_assert_compares( $cr_attribute_value, 'CR attribute values should compare with scalar tolerance.' );
+html_api_fuzz_tree_normalization_assert( true === ( $cr_attribute_value['comparison']['ok'] ?? null ), 'CR attribute value comparison should pass.' );
+html_api_fuzz_tree_normalization_assert( ! empty( $cr_attribute_value['comparison']['scalarToleratedLines'] ), 'CR attribute value comparison should report tolerated lines.' );
+$cr_attribute_value_tree = file_get_contents( $cr_attribute_value['wordpress']['treePath'] ?? '' );
+html_api_fuzz_tree_normalization_assert( false !== $cr_attribute_value_tree, 'CR attribute value WordPress tree should be written.' );
+html_api_fuzz_tree_normalization_assert( false !== strpos( $cr_attribute_value_tree, "}ig-pe]j:us2c0=\"\\r\"" ), 'CR attribute values should render raw as escaped CR.' );
+
+/*
+ * A decoded CR (from a character reference such as `&#13;`) survives input
+ * preprocessing identically on both sides, so it appears as `\r` in both
+ * trees. NUL tolerance on the same line must not rewrite that agreed `\r`:
+ * CR may only map to LF where the DOM side actually holds the normalized LF.
+ *
+ * Fuzzer signature 1d48d2e9a6bc: `><title\t...=\0)&#13;">(</title>`.
+ */
+$nul_with_agreed_cr = html_api_fuzz_tree_normalization_run(
+	$tmp,
+	'nul-with-agreed-cr',
+	'Pjx0aXRsZQk3UiV8Sjl1V0hofVU9ACkmIzEzOyI+KDwvdGl0bGU+',
+	\HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT
+);
+html_api_fuzz_tree_normalization_assert_compares( $nul_with_agreed_cr, 'NUL beside an agreed decoded CR should compare with scalar tolerance.' );
+html_api_fuzz_tree_normalization_assert( true === ( $nul_with_agreed_cr['comparison']['ok'] ?? null ), 'NUL beside an agreed decoded CR comparison should pass.' );
+html_api_fuzz_tree_normalization_assert( ! empty( $nul_with_agreed_cr['comparison']['scalarToleratedLines'] ), 'NUL beside an agreed decoded CR should report tolerated lines.' );
+
+/*
+ * A raw CR immediately followed by a decoded `&#10;` renders as `\r\n` in
+ * the WordPress tree while the DOM holds `\n\n`: input preprocessing maps
+ * the lone CR to LF before the character reference decodes to a second LF.
+ * The CR-to-LF substitution must bind per occurrence; the pair-collapse
+ * rule for raw CRLF must not consume a decoded LF.
+ */
+$raw_cr_decoded_lf = html_api_fuzz_tree_normalization_run(
+	$tmp,
+	'raw-cr-decoded-lf',
+	'PGRpdiBhPSINJiMxMDt4Ij4=',
+	\HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT
+);
+html_api_fuzz_tree_normalization_assert_compares( $raw_cr_decoded_lf, 'Raw CR before a decoded LF should compare with scalar tolerance.' );
+html_api_fuzz_tree_normalization_assert( true === ( $raw_cr_decoded_lf['comparison']['ok'] ?? null ), 'Raw CR before a decoded LF comparison should pass.' );
+
+$nul_raw_cr_decoded_lf = html_api_fuzz_tree_normalization_run(
+	$tmp,
+	'nul-raw-cr-decoded-lf',
+	'PGRpdiBhPSIADSYjMTA7eCI+',
+	\HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT
+);
+html_api_fuzz_tree_normalization_assert_compares( $nul_raw_cr_decoded_lf, 'NUL plus raw CR before a decoded LF should compare with scalar tolerance.' );
+html_api_fuzz_tree_normalization_assert( true === ( $nul_raw_cr_decoded_lf['comparison']['ok'] ?? null ), 'NUL plus raw CR before a decoded LF comparison should pass.' );
+
+/*
+ * An invalid UTF-8 byte (here raw 0x82 in a tag name) makes the trees
+ * differ by exactly the wp_scrub_utf8() substitution; the worker must
+ * classify that as encoding-mismatch, not tree-mismatch. Classification
+ * rests on the linesMatchAfterWordPressUtf8Scrub flag, which
+ * first_difference() computes on full lines.
+ */
+$invalid_utf8_tag_name = html_api_fuzz_tree_normalization_run(
+	$tmp,
+	'invalid-utf8-tag-name',
+	base64_encode( "<body><sma\x82>x" ),
+	\HtmlApiFuzz\Generator::MODE_FULL_DOCUMENT
+);
+html_api_fuzz_tree_normalization_assert( 'failed' === ( $invalid_utf8_tag_name['status'] ?? null ), 'Invalid UTF-8 in a tag name should fail the comparison.' );
+html_api_fuzz_tree_normalization_assert( 'encoding-mismatch' === ( $invalid_utf8_tag_name['failureClass'] ?? null ), 'Invalid UTF-8 in a tag name should classify as encoding-mismatch.' );
+
+/*
+ * NUL attributes whose scrubbed name sorts differently from the raw name
+ * must align with the DOM oracle ordering: sorting uses scrubbed names.
+ */
+$nul_attribute_sort = html_api_fuzz_tree_normalization_run(
+	$tmp,
+	'nul-attribute-sort',
+	base64_encode( "<p \0z=\"\" j=\"\">x</p>" ),
+	\HtmlApiFuzz\Generator::MODE_FRAGMENT_BODY
+);
+html_api_fuzz_tree_normalization_assert_compares( $nul_attribute_sort, 'NUL attribute names should sort by scrubbed name on both sides.' );
+html_api_fuzz_tree_normalization_assert( true === ( $nul_attribute_sort['comparison']['ok'] ?? null ), 'NUL attribute sort comparison should pass.' );
+
+echo "tree renderer normalization smoke tests passed\n";
diff --git a/tools/html-api-fuzz/tests/watcher-sqlite-smoke.php b/tools/html-api-fuzz/tests/watcher-sqlite-smoke.php
new file mode 100644
index 0000000000000..6c7ba541d5e7a
--- /dev/null
+++ b/tools/html-api-fuzz/tests/watcher-sqlite-smoke.php
@@ -0,0 +1,206 @@
+#!/usr/bin/env php
+<?php
+require_once dirname( __DIR__ ) . '/lib/autoload.php';
+
+function html_api_fuzz_smoke_fail( string $message ): void {
+	fwrite( STDERR, "FAIL: {$message}\n" );
+	exit( 1 );
+}
+
+function html_api_fuzz_smoke_assert( bool $condition, string $message ): void {
+	if ( ! $condition ) {
+		html_api_fuzz_smoke_fail( $message );
+	}
+}
+
+$watcher   = dirname( __DIR__ ) . '/watcher.php';
+$repo_root = \HtmlApiFuzz\repo_root();
+$work_dir  = sys_get_temp_dir() . '/html-api-fuzz-watcher-sqlite-' . \HtmlApiFuzz\timestamp();
+$run_dir   = $work_dir . '/run';
+$lane_dir  = $run_dir . '/lane-00';
+\HtmlApiFuzz\ensure_dir( $lane_dir );
+
+// Fabricate a lane store containing one failure, as a runner lane would write.
+$store   = new \HtmlApiFuzz\ResultStore( $lane_dir . '/' . \HtmlApiFuzz\ResultStore::FILENAME );
+$summary = array(
+	'kind'              => 'failure',
+	'ok'                => false,
+	'status'            => 'failed',
+	'failureClass'      => 'tree-mismatch',
+	'seed'              => 7,
+	'profile'           => 'document',
+	'mode'              => 'document',
+	'payloadPolicy'     => 'utf8',
+	'generator'         => array( 'features' => array( 'tables' ) ),
+	'inputSource'       => 'generated',
+	'inputSha1'         => sha1( 'watcher' ),
+	'inputLength'       => 16,
+	'signature'         => array( 'hash' => 'feedfacecafe' ),
+	'artifactsRetained' => false,
+	'resultPath'        => null,
+	'replayPath'        => null,
+	'logPath'           => null,
+	'durationMs'        => 21,
+	'workerCode'        => 2,
+	'workerTimedOut'    => false,
+);
+$store->record_attempt( $summary, array( 'ok' => false ), array( 'seed' => 7 ) );
+$oracle_summary = array(
+	'kind'              => 'oracle-finding',
+	'ok'                => true,
+	'status'            => 'oracle-tolerated',
+	'failureClass'      => 'oracle-tolerated',
+	'seed'              => 8,
+	'profile'           => 'document',
+	'mode'              => 'document',
+	'payloadPolicy'     => 'utf8',
+	'generator'         => array( 'features' => array( 'foreign-content' ) ),
+	'inputSource'       => 'generated',
+	'inputSha1'         => sha1( 'oracle-watcher' ),
+	'inputLength'       => 12,
+	'signature'         => null,
+	'oracleFinding'     => array(
+		'classification' => 'oracle-bug',
+		'type'           => 'dom-xlink-dropped-local-name-after-xlink',
+		'suspectedOwner' => 'Lexbor/PHP DOM',
+		'upstream'       => array(
+			'issueUrl' => 'https://github.com/lexbor/lexbor/issues/372',
+		),
+		'signature'      => array(
+			'hash'      => 'oracle-feedface',
+			'familyKey' => 'oracle-family',
+		),
+	),
+	'artifactsRetained' => false,
+	'resultPath'        => null,
+	'replayPath'        => null,
+	'logPath'           => null,
+	'durationMs'        => 22,
+	'workerCode'        => 0,
+	'workerTimedOut'    => false,
+);
+$store->record_attempt( $oracle_summary, array( 'ok' => true, 'oracleFinding' => $oracle_summary['oracleFinding'] ), array( 'seed' => 8 ) );
+$store->close();
+
+// A stopped runner state lets the watcher treat its STOP-file scan as final.
+\HtmlApiFuzz\write_json_file(
+	$lane_dir . '/state.json',
+	array(
+		'kind'       => 'html-api-fuzz-runner-state',
+		'stopReason' => 'stop-requested',
+	)
+);
+file_put_contents( $run_dir . '/STOP', "{}\n" );
+
+// A second lane with a corrupt store (e.g. truncated by a crashed lane, or a
+// schemaless file from a lane killed mid-initialization) must not kill the
+// watcher or block ingestion from healthy lanes.
+$corrupt_lane = $run_dir . '/lane-01';
+\HtmlApiFuzz\ensure_dir( $corrupt_lane );
+file_put_contents( $corrupt_lane . '/' . \HtmlApiFuzz\ResultStore::FILENAME, 'this is not a sqlite database' );
+\HtmlApiFuzz\write_json_file(
+	$corrupt_lane . '/state.json',
+	array(
+		'kind'       => 'html-api-fuzz-runner-state',
+		'stopReason' => 'stop-requested',
+	)
+);
+
+// A crashed lane never records a stop reason; once its state goes stale it
+// must be presumed dead instead of blocking the watcher's exit forever.
+$dead_lane = $run_dir . '/lane-02';
+\HtmlApiFuzz\ensure_dir( $dead_lane );
+\HtmlApiFuzz\write_json_file(
+	$dead_lane . '/state.json',
+	array(
+		'kind'       => 'html-api-fuzz-runner-state',
+		'stopReason' => null,
+		'updatedAt'  => gmdate( 'c', time() - 3600 ),
+	)
+);
+
+$triage_dir = $run_dir . '/triage';
+$proc       = \HtmlApiFuzz\run_php_process(
+	array(
+		$watcher,
+		'--run-dir',
+		$run_dir,
+		'--state-dir',
+		$triage_dir,
+		'--no-minimize',
+		'--stop-stale-seconds',
+		'10',
+		// No --once: exiting depends on STOP-file handling.
+	),
+	$repo_root,
+	30000
+);
+html_api_fuzz_smoke_assert( 0 === $proc['code'] && ! $proc['timedOut'], 'Expected watcher to exit via the stop request: ' . substr( $proc['output'], -1000 ) );
+html_api_fuzz_smoke_assert( false !== strpos( $proc['stderr'], 'presuming dead runner' ), 'Expected the stale lane to be presumed dead.' );
+
+$triage_state = \HtmlApiFuzz\read_json_file( $triage_dir . '/state.json' );
+html_api_fuzz_smoke_assert( isset( $triage_state['signatures']['feedfacecafe'] ), 'Expected the sqlite failure signature to be triaged.' );
+html_api_fuzz_smoke_assert( 1 === (int) ( $triage_state['signatures']['feedfacecafe']['seenCount'] ?? 0 ), 'Expected the failure to be seen exactly once.' );
+html_api_fuzz_smoke_assert( ! isset( $triage_state['signatures']['oracle-feedface'] ), 'Expected oracle findings to be ignored without --triage-oracle-findings.' );
+
+// A later opt-in scan must still ingest oracle findings even though the
+// default failure offset already advanced past the row.
+$proc = \HtmlApiFuzz\run_php_process(
+	array( $watcher, '--run-dir', $run_dir, '--state-dir', $triage_dir, '--no-minimize', '--triage-oracle-findings' ),
+	$repo_root,
+	30000
+);
+html_api_fuzz_smoke_assert( 0 === $proc['code'], 'Expected second watcher pass to exit cleanly.' );
+$second_scan = json_decode( $proc['stdout'], true );
+html_api_fuzz_smoke_assert( 0 === ( $second_scan['failuresSeen'] ?? null ), 'Expected the second watcher pass not to reread failures.' );
+html_api_fuzz_smoke_assert( 1 === ( $second_scan['oracleFindingsSeen'] ?? null ), 'Expected the second watcher pass to ingest the oracle finding.' );
+$triage_state = \HtmlApiFuzz\read_json_file( $triage_dir . '/state.json' );
+html_api_fuzz_smoke_assert( 1 === (int) ( $triage_state['signatures']['feedfacecafe']['seenCount'] ?? 0 ), 'Expected no duplicate ingestion across scans.' );
+html_api_fuzz_smoke_assert( 1 === (int) ( $triage_state['signatures']['oracle-feedface']['seenCount'] ?? 0 ), 'Expected the oracle finding to be seen exactly once.' );
+html_api_fuzz_smoke_assert( 'oracle-finding' === ( $triage_state['signatures']['oracle-feedface']['triageKind'] ?? null ), 'Expected oracle finding triage kind.' );
+html_api_fuzz_smoke_assert( 'Lexbor/PHP DOM' === ( $triage_state['signatures']['oracle-feedface']['suspectedOwner'] ?? null ), 'Expected oracle finding suspected owner to be preserved.' );
+
+// A third scan with oracle triage enabled must not re-ingest the same finding.
+$proc = \HtmlApiFuzz\run_php_process(
+	array( $watcher, '--run-dir', $run_dir, '--state-dir', $triage_dir, '--no-minimize', '--triage-oracle-findings' ),
+	$repo_root,
+	30000
+);
+html_api_fuzz_smoke_assert( 0 === $proc['code'], 'Expected third watcher pass to exit cleanly.' );
+$third_scan = json_decode( $proc['stdout'], true );
+html_api_fuzz_smoke_assert( 0 === ( $third_scan['oracleFindingsSeen'] ?? null ), 'Expected no duplicate oracle ingestion across scans.' );
+
+$summary_run_dir = $work_dir . '/summary-run';
+\HtmlApiFuzz\ensure_dir( $summary_run_dir );
+$summary_state_dir = $work_dir . '/summary-triage';
+$summary_record = $oracle_summary;
+$summary_record['resultPath'] = null;
+$summary_record['replayPath'] = null;
+$summary_record['oracleFinding']['signature']['hash'] = 'oracle-summary-one';
+\HtmlApiFuzz\append_ndjson( $summary_run_dir . '/summary.ndjson', $summary_record );
+$proc = \HtmlApiFuzz\run_php_process(
+	array( $watcher, '--run-dir', $summary_run_dir, '--state-dir', $summary_state_dir, '--no-minimize', '--triage-oracle-findings', '--once' ),
+	$repo_root,
+	30000
+);
+html_api_fuzz_smoke_assert( 0 === $proc['code'], 'Expected summary watcher pass to exit cleanly.' );
+$summary_scan = json_decode( $proc['stdout'], true );
+html_api_fuzz_smoke_assert( 1 === ( $summary_scan['oracleFindingsSeen'] ?? null ), 'Expected first pathless summary oracle finding to be ingested.' );
+
+$summary_record['seed'] = 9;
+$summary_record['oracleFinding']['signature']['hash'] = 'oracle-summary-two';
+\HtmlApiFuzz\append_ndjson( $summary_run_dir . '/summary.ndjson', $summary_record );
+$proc = \HtmlApiFuzz\run_php_process(
+	array( $watcher, '--run-dir', $summary_run_dir, '--state-dir', $summary_state_dir, '--no-minimize', '--triage-oracle-findings', '--once' ),
+	$repo_root,
+	30000
+);
+html_api_fuzz_smoke_assert( 0 === $proc['code'], 'Expected second summary watcher pass to exit cleanly.' );
+$summary_scan = json_decode( $proc['stdout'], true );
+html_api_fuzz_smoke_assert( 1 === ( $summary_scan['oracleFindingsSeen'] ?? null ), 'Expected appended pathless summary oracle finding to avoid fallback-key collision.' );
+$summary_state = \HtmlApiFuzz\read_json_file( $summary_state_dir . '/state.json' );
+html_api_fuzz_smoke_assert( isset( $summary_state['signatures']['oracle-summary-one'], $summary_state['signatures']['oracle-summary-two'] ), 'Expected both pathless summary oracle signatures to be triaged.' );
+
+\HtmlApiFuzz\remove_dir_recursive( $work_dir );
+
+echo "OK watcher-sqlite-smoke\n";
diff --git a/tools/html-api-fuzz/watcher.php b/tools/html-api-fuzz/watcher.php
new file mode 100755
index 0000000000000..4a8eec2586f71
--- /dev/null
+++ b/tools/html-api-fuzz/watcher.php
@@ -0,0 +1,620 @@
+#!/usr/bin/env php
+<?php
+require_once __DIR__ . '/lib/autoload.php';
+
+function html_api_fuzz_watcher_usage(): void {
+	echo "Usage: php tools/html-api-fuzz/watcher.php --run-dir DIR [--state-dir DIR] [--once] [--interval-seconds N] [--stop-stale-seconds N] [--triage-oracle-findings]\n";
+	echo "When RUN_DIR/STOP exists, the watcher exits after a final scan once every runner reports a stop reason.\n";
+	echo "--stop-stale-seconds (default 120) presumes a non-reporting runner dead; per lane it is floored at twice the lane's advertised batch budget (timeout-ms x batch-size).\n";
+	echo "--triage-oracle-findings scans oracleFinding rows in addition to ok = 0 failures.\n";
+}
+
+function html_api_fuzz_watcher_load_state( string $path ): array {
+	$state = \HtmlApiFuzz\read_json_file( $path );
+	if ( is_array( $state ) ) {
+		return $state;
+	}
+
+	return array(
+		'schemaVersion' => 1,
+		'kind'          => 'html-api-fuzz-triage-state',
+		'createdAt'     => gmdate( 'c' ),
+		'updatedAt'     => gmdate( 'c' ),
+		'signatures'    => array(),
+		'seenAttempts'  => array(),
+		'summaryOffsets'=> array(),
+		'sqliteOffsets' => array(),
+		'oracleSummaryOffsets' => array(),
+		'oracleSqliteOffsets'  => array(),
+	);
+}
+
+function html_api_fuzz_watcher_signature_dir( string $state_dir, string $hash ): string {
+	return $state_dir . '/signatures/' . preg_replace( '/[^a-zA-Z0-9._-]+/', '_', $hash );
+}
+
+function html_api_fuzz_watcher_increment_count( array &$counts, ?string $key ): void {
+	if ( null === $key || '' === $key ) {
+		return;
+	}
+
+	$counts[ $key ] = (int) ( $counts[ $key ] ?? 0 ) + 1;
+}
+
+function html_api_fuzz_watcher_status_markdown( array $state ): string {
+	$lines = array(
+		'# HTML API Fuzz Triage',
+		'',
+		'- Updated: ' . ( $state['updatedAt'] ?? gmdate( 'c' ) ),
+		'- Signatures: ' . count( $state['signatures'] ?? array() ),
+		'',
+	);
+
+	foreach ( $state['signatures'] ?? array() as $hash => $record ) {
+		$lines[] = '## ' . $hash;
+		$lines[] = '';
+		$lines[] = '- Class: ' . ( $record['failureClass'] ?? 'unknown' );
+		if ( ! empty( $record['triageKind'] ) ) {
+			$lines[] = '- Kind: ' . $record['triageKind'];
+		}
+		if ( ! empty( $record['oracleFindingType'] ) ) {
+			$lines[] = '- Oracle finding: ' . $record['oracleFindingType'];
+		}
+		if ( ! empty( $record['suspectedOwner'] ) ) {
+			$lines[] = '- Suspected owner: ' . $record['suspectedOwner'];
+		}
+		if ( ! empty( $record['upstreamIssueUrl'] ) ) {
+			$lines[] = '- Upstream: ' . $record['upstreamIssueUrl'];
+		}
+		$lines[] = '- Status: ' . ( $record['status'] ?? 'unknown' );
+		$lines[] = '- First seen: ' . ( $record['firstSeenAt'] ?? 'unknown' );
+		$lines[] = '- Last seen: ' . ( $record['lastSeenAt'] ?? 'unknown' );
+		$lines[] = '- Seen count: ' . ( $record['seenCount'] ?? 0 );
+		if ( ! empty( $record['replayPath'] ) ) {
+			$lines[] = '- Replay: ' . $record['replayPath'];
+		}
+		if ( ! empty( $record['minimizeResult'] ) ) {
+			$lines[] = '- Minimized: ' . $record['minimizeResult'];
+		}
+		$lines[] = '';
+	}
+
+	return implode( "\n", $lines ) . "\n";
+}
+
+function html_api_fuzz_watcher_record_failure( array $summary, string $state_dir, array &$state ): bool {
+	$hash = $summary['signature']['hash'] ?? null;
+	if ( null === $hash ) {
+		return false;
+	}
+
+	$now = gmdate( 'c' );
+	$new = ! isset( $state['signatures'][ $hash ] );
+	if ( $new ) {
+		$state['signatures'][ $hash ] = array(
+			'hash'         => $hash,
+			'status'       => 'new',
+			'triageKind'   => $summary['triageKind'] ?? ( ( $summary['ok'] ?? false ) ? 'oracle-finding' : 'failure' ),
+			'failureClass' => $summary['failureClass'] ?? 'unknown',
+			'oracleFindingType' => $summary['oracleFindingType'] ?? null,
+			'classification'    => $summary['classification'] ?? null,
+			'suspectedOwner'    => $summary['suspectedOwner'] ?? null,
+			'upstreamIssueUrl'  => $summary['upstreamIssueUrl'] ?? null,
+			'firstSeenAt'  => $now,
+			'lastSeenAt'   => $now,
+			'seenCount'    => 0,
+			'replayPath'   => $summary['replayPath'] ?? null,
+			'resultPath'   => $summary['resultPath'] ?? null,
+			'profileCounts'       => array(),
+			'payloadPolicyCounts' => array(),
+			'featureCounts'       => array(),
+			'examples'     => array(),
+		);
+	}
+
+	$record = &$state['signatures'][ $hash ];
+	++$record['seenCount'];
+	$record['lastSeenAt'] = $now;
+	html_api_fuzz_watcher_increment_count( $record['profileCounts'], $summary['profile'] ?? null );
+	html_api_fuzz_watcher_increment_count( $record['payloadPolicyCounts'], $summary['payloadPolicy'] ?? null );
+	foreach ( $summary['generator']['features'] ?? array() as $feature ) {
+		html_api_fuzz_watcher_increment_count( $record['featureCounts'], is_string( $feature ) ? $feature : null );
+	}
+	if ( empty( $record['replayPath'] ) && ! empty( $summary['replayPath'] ) ) {
+		$record['replayPath'] = $summary['replayPath'];
+	}
+	if ( count( $record['examples'] ) < 8 ) {
+		$record['examples'][] = array(
+			'seed'          => $summary['seed'] ?? null,
+			'profile'       => $summary['profile'] ?? null,
+			'mode'          => $summary['mode'] ?? null,
+			'payloadPolicy' => $summary['payloadPolicy'] ?? null,
+			'inputSource'   => $summary['inputSource'] ?? null,
+			'features'      => $summary['generator']['features'] ?? array(),
+			'inputSha1'     => $summary['inputSha1'] ?? null,
+			'oracleFindingType' => $summary['oracleFindingType'] ?? null,
+			'classification'    => $summary['classification'] ?? null,
+			'suspectedOwner'    => $summary['suspectedOwner'] ?? null,
+			'upstreamIssueUrl'  => $summary['upstreamIssueUrl'] ?? null,
+			'resultPath'    => $summary['resultPath'] ?? null,
+			'replayPath'    => $summary['replayPath'] ?? null,
+			'logPath'       => $summary['logPath'] ?? null,
+			'seenAt'        => $now,
+		);
+	}
+	unset( $record );
+
+	$signature_dir = html_api_fuzz_watcher_signature_dir( $state_dir, $hash );
+	\HtmlApiFuzz\ensure_dir( $signature_dir );
+	\HtmlApiFuzz\write_json_file( $signature_dir . '/failure.json', $state['signatures'][ $hash ] );
+
+	return $new;
+}
+
+function html_api_fuzz_watcher_summary_paths( string $run_dir ): array {
+	$paths = array();
+	$direct_summary = rtrim( $run_dir, DIRECTORY_SEPARATOR ) . '/summary.ndjson';
+	if ( is_file( $direct_summary ) ) {
+		$paths[] = $direct_summary;
+	}
+
+	$items = @scandir( $run_dir );
+	if ( false === $items ) {
+		return $paths;
+	}
+
+	foreach ( $items as $item ) {
+		if ( '.' === $item || '..' === $item || in_array( $item, array( '.git', '.triage-watcher', 'triage' ), true ) ) {
+			continue;
+		}
+
+		$summary_path = rtrim( $run_dir, DIRECTORY_SEPARATOR ) . DIRECTORY_SEPARATOR . $item . '/summary.ndjson';
+		if ( is_file( $summary_path ) ) {
+			$paths[] = $summary_path;
+		}
+	}
+
+	sort( $paths );
+	return array_values( array_unique( $paths ) );
+}
+
+function html_api_fuzz_watcher_read_summary_records( string $summary_path, int $offset ): array {
+	$records = array();
+	$line_no = 0;
+	$handle = @fopen( $summary_path, 'rb' );
+	if ( false === $handle ) {
+		return array(
+			'records' => $records,
+			'offset'  => $offset,
+		);
+	}
+
+	$size = filesize( $summary_path );
+	if ( false === $size ) {
+		$size = 0;
+	}
+	if ( $offset < 0 || $offset > $size ) {
+		$offset = 0;
+	}
+	if ( $offset > 0 ) {
+		fseek( $handle, $offset );
+	}
+
+	while ( false !== ( $line_offset = ftell( $handle ) ) && false !== ( $line = fgets( $handle ) ) ) {
+		++$line_no;
+		$line = trim( $line );
+		if ( '' === $line ) {
+			continue;
+		}
+
+		$record = json_decode( $line, true );
+		if ( JSON_ERROR_NONE === json_last_error() ) {
+			$records[] = array(
+				'line'   => $line_no,
+				'offset' => $line_offset,
+				'record' => $record,
+			);
+		}
+	}
+
+	$offset = ftell( $handle );
+	fclose( $handle );
+
+	return array(
+		'records' => $records,
+		'offset'  => false === $offset ? 0 : $offset,
+	);
+}
+
+function html_api_fuzz_watcher_attempt_key( array $record, string $fallback_key ): string {
+	if ( ! empty( $record['resultPath'] ) ) {
+		return 'result:' . $record['resultPath'];
+	}
+	if ( ! empty( $record['replayPath'] ) ) {
+		return 'replay:' . $record['replayPath'];
+	}
+	return $fallback_key;
+}
+
+/**
+ * Per-lane results.sqlite stores written by runner lanes: directly in the run
+ * directory for a standalone runner, one level down for launcher lanes.
+ */
+function html_api_fuzz_watcher_sqlite_paths( string $run_dir ): array {
+	$paths = array();
+	$direct = rtrim( $run_dir, DIRECTORY_SEPARATOR ) . '/' . \HtmlApiFuzz\ResultStore::FILENAME;
+	if ( is_file( $direct ) ) {
+		$paths[] = $direct;
+	}
+
+	$items = @scandir( $run_dir );
+	if ( false === $items ) {
+		return $paths;
+	}
+
+	foreach ( $items as $item ) {
+		if ( '.' === $item || '..' === $item || in_array( $item, array( '.git', '.triage-watcher', 'triage' ), true ) ) {
+			continue;
+		}
+
+		$store_path = rtrim( $run_dir, DIRECTORY_SEPARATOR ) . DIRECTORY_SEPARATOR . $item . '/' . \HtmlApiFuzz\ResultStore::FILENAME;
+		if ( is_file( $store_path ) ) {
+			$paths[] = $store_path;
+		}
+	}
+
+	sort( $paths );
+	return array_values( array_unique( $paths ) );
+}
+
+/**
+ * True when every runner under the run directory has recorded a stop reason;
+ * gates the watcher's graceful exit after a stop request so the final scan
+ * covers everything the runners wrote.
+ *
+ * A runner whose state has not been touched for $stale_seconds is presumed
+ * dead (crashed lanes never record a stop reason and must not block the exit
+ * forever). With no runner state at all the run has not started; the watcher
+ * keeps scanning rather than racing a launcher that is still spawning lanes.
+ */
+function html_api_fuzz_watcher_runners_stopped( string $run_dir, float $stale_seconds ): bool {
+	$state_paths = array();
+	$direct = rtrim( $run_dir, DIRECTORY_SEPARATOR ) . '/state.json';
+	if ( is_file( $direct ) ) {
+		$state_paths[] = $direct;
+	}
+	$items = @scandir( $run_dir );
+	if ( false !== $items ) {
+		foreach ( $items as $item ) {
+			if ( '.' === $item || '..' === $item || in_array( $item, array( '.git', '.triage-watcher', 'triage' ), true ) ) {
+				continue;
+			}
+			$state_path = rtrim( $run_dir, DIRECTORY_SEPARATOR ) . DIRECTORY_SEPARATOR . $item . '/state.json';
+			if ( is_file( $state_path ) ) {
+				$state_paths[] = $state_path;
+			}
+		}
+	}
+
+	$found_runner_state = false;
+	foreach ( $state_paths as $state_path ) {
+		try {
+			$runner_state = \HtmlApiFuzz\read_json_file( $state_path );
+		} catch ( \RuntimeException $e ) {
+			// Mid-write; the runner is alive.
+			return false;
+		}
+		if ( ! is_array( $runner_state ) || 'html-api-fuzz-runner-state' !== ( $runner_state['kind'] ?? null ) ) {
+			continue;
+		}
+		$found_runner_state = true;
+		if ( null !== ( $runner_state['stopReason'] ?? null ) ) {
+			continue;
+		}
+		/*
+		 * A live lane is legitimately silent for up to one batch worker run
+		 * (its advertised batchBudgetMs), so the staleness threshold is
+		 * floored at twice that budget — otherwise large --timeout-ms or
+		 * --batch-size values would get a live lane presumed dead and the
+		 * "final scan covers everything" contract silently broken.
+		 */
+		$lane_stale_seconds = max( $stale_seconds, 2.0 * ( (int) ( $runner_state['batchBudgetMs'] ?? 0 ) ) / 1000.0 );
+		$updated_at = strtotime( (string) ( $runner_state['updatedAt'] ?? '' ) );
+		if ( false !== $updated_at && ( time() - $updated_at ) > $lane_stale_seconds ) {
+			fwrite( STDERR, '[' . gmdate( 'c' ) . "] watcher: presuming dead runner (state stale): {$state_path}\n" );
+			continue;
+		}
+		return false;
+	}
+
+	return $found_runner_state;
+}
+
+function html_api_fuzz_watcher_minimize( string $hash, string $state_dir, array &$state, array $options ): void {
+	$record = $state['signatures'][ $hash ] ?? null;
+	if ( ! is_array( $record ) || empty( $record['replayPath'] ) || ! is_file( $record['replayPath'] ) ) {
+		$state['signatures'][ $hash ]['status'] = 'missing-replay';
+		return;
+	}
+
+	$signature_dir = html_api_fuzz_watcher_signature_dir( $state_dir, $hash );
+	$output_dir    = $signature_dir . '/minimize-' . \HtmlApiFuzz\timestamp();
+	$state['signatures'][ $hash ]['status'] = 'minimizing';
+	$state['signatures'][ $hash ]['minimizeStartedAt'] = gmdate( 'c' );
+	$state['signatures'][ $hash ]['minimizeAttempts'] = (int) ( $state['signatures'][ $hash ]['minimizeAttempts'] ?? 0 ) + 1;
+
+	$args = array(
+		__DIR__ . '/minimize.php',
+		'--replay',
+		$record['replayPath'],
+		'--output-dir',
+		$output_dir,
+		'--timeout-ms',
+		(string) \HtmlApiFuzz\option_int( $options, 'timeout-ms', 2500 ),
+		'--max-attempts',
+		(string) \HtmlApiFuzz\option_int( $options, 'max-attempts', 600 ),
+	);
+	if ( array_key_exists( 'probe-mode', $options ) ) {
+		$args[] = '--probe-mode';
+		$args[] = \HtmlApiFuzz\option_string( $options, 'probe-mode', 'auto' );
+	}
+	if ( \HtmlApiFuzz\option_bool( $options, 'keep-candidate-artifacts', false ) ) {
+		$args[] = '--keep-candidate-artifacts';
+	}
+	if ( \HtmlApiFuzz\option_bool( $options, 'any-failure', false ) ) {
+		$args[] = '--any-failure';
+	}
+	if ( 'oracle-finding' === ( $record['triageKind'] ?? null ) ) {
+		$args[] = '--target-kind';
+		$args[] = 'oracle-finding';
+		$args[] = '--target-hash';
+		$args[] = $hash;
+	}
+
+	$proc = \HtmlApiFuzz\run_php_process( $args, \HtmlApiFuzz\repo_root(), \HtmlApiFuzz\option_int( $options, 'minimize-timeout-ms', 300000 ), $signature_dir . '/minimize.log' );
+	$result_path = $output_dir . '/minimize-result.json';
+	$result = \HtmlApiFuzz\read_json_file( $result_path );
+
+	$state['signatures'][ $hash ]['minimizeFinishedAt'] = gmdate( 'c' );
+	$state['signatures'][ $hash ]['minimizeProcess'] = array(
+		'code'       => $proc['code'],
+		'timedOut'   => $proc['timedOut'],
+		'durationMs' => $proc['durationMs'],
+		'logPath'    => $proc['logPath'],
+	);
+	$state['signatures'][ $hash ]['minimizeResult'] = $result_path;
+	$state['signatures'][ $hash ]['minimizeOutputDir'] = $output_dir;
+	$state['signatures'][ $hash ]['status'] = ( is_array( $result ) && ( $result['ok'] ?? false ) ) ? 'minimized' : 'minimize-failed';
+	\HtmlApiFuzz\write_json_file( $signature_dir . '/failure.json', $state['signatures'][ $hash ] );
+}
+
+function html_api_fuzz_watcher_process_failure_record( array $record, string $fallback_key, string $state_dir, array &$state, array &$new_hashes, int &$failures_seen ): void {
+	if ( $record['ok'] ?? true ) {
+		return;
+	}
+	if ( empty( $record['signature'] ) ) {
+		$signature = \HtmlApiFuzz\Signature::from_result( $record );
+		if ( null !== $signature ) {
+			$record['signature'] = $signature;
+		}
+	}
+	$attempt_key = html_api_fuzz_watcher_attempt_key( $record, $fallback_key );
+	if ( isset( $state['seenAttempts'][ $attempt_key ] ) ) {
+		return;
+	}
+	$state['seenAttempts'][ $attempt_key ] = gmdate( 'c' );
+	++$failures_seen;
+	if ( html_api_fuzz_watcher_record_failure( $record, $state_dir, $state ) ) {
+		$new_hashes[] = $record['signature']['hash'];
+	}
+}
+
+function html_api_fuzz_watcher_process_oracle_finding_record( array $record, string $fallback_key, string $state_dir, array &$state, array &$new_hashes, int &$oracle_findings_seen ): void {
+	$finding = $record['oracleFinding'] ?? null;
+	if ( ! is_array( $finding ) || empty( $finding['signature']['hash'] ) ) {
+		return;
+	}
+
+	$summary = $record;
+	$summary['triageKind']        = 'oracle-finding';
+	$summary['signature']         = $finding['signature'];
+	$summary['failureClass']      = $finding['classification'] ?? 'oracle-finding';
+	$summary['classification']    = $finding['classification'] ?? null;
+	$summary['oracleFindingType'] = $finding['type'] ?? null;
+	$summary['suspectedOwner']    = $finding['suspectedOwner'] ?? null;
+	$summary['upstreamIssueUrl']  = $finding['upstream']['issueUrl'] ?? null;
+
+	$attempt_key = html_api_fuzz_watcher_attempt_key( $record, $fallback_key ) . ':oracle:' . $finding['signature']['hash'];
+	if ( isset( $state['seenAttempts'][ $attempt_key ] ) ) {
+		return;
+	}
+	$state['seenAttempts'][ $attempt_key ] = gmdate( 'c' );
+	++$oracle_findings_seen;
+	if ( html_api_fuzz_watcher_record_failure( $summary, $state_dir, $state ) ) {
+		$new_hashes[] = $finding['signature']['hash'];
+	}
+}
+
+function html_api_fuzz_watcher_scan_once( string $run_dir, string $state_dir, string $state_path, array &$state, array $options ): array {
+	$summary_paths = html_api_fuzz_watcher_summary_paths( $run_dir );
+	if ( ! is_array( $state['summaryOffsets'] ?? null ) ) {
+		$state['summaryOffsets'] = array();
+	}
+	if ( ! is_array( $state['sqliteOffsets'] ?? null ) ) {
+		$state['sqliteOffsets'] = array();
+	}
+	if ( ! is_array( $state['oracleSummaryOffsets'] ?? null ) ) {
+		$state['oracleSummaryOffsets'] = array();
+	}
+	if ( ! is_array( $state['oracleSqliteOffsets'] ?? null ) ) {
+		$state['oracleSqliteOffsets'] = array();
+	}
+
+	$new_hashes = array();
+	$failures_seen = 0;
+	$oracle_findings_seen = 0;
+	$triage_oracle_findings = \HtmlApiFuzz\option_bool( $options, 'triage-oracle-findings', false );
+	foreach ( $summary_paths as $summary_path ) {
+		$read = html_api_fuzz_watcher_read_summary_records( $summary_path, (int) ( $state['summaryOffsets'][ $summary_path ] ?? 0 ) );
+		$state['summaryOffsets'][ $summary_path ] = $read['offset'];
+		foreach ( $read['records'] as $entry ) {
+			html_api_fuzz_watcher_process_failure_record( $entry['record'], 'summary:' . $summary_path . ':offset:' . ( $entry['offset'] ?? $entry['line'] ), $state_dir, $state, $new_hashes, $failures_seen );
+		}
+	}
+
+	if ( $triage_oracle_findings ) {
+		foreach ( $summary_paths as $summary_path ) {
+			$read = html_api_fuzz_watcher_read_summary_records( $summary_path, (int) ( $state['oracleSummaryOffsets'][ $summary_path ] ?? 0 ) );
+			$state['oracleSummaryOffsets'][ $summary_path ] = $read['offset'];
+			foreach ( $read['records'] as $entry ) {
+				html_api_fuzz_watcher_process_oracle_finding_record( $entry['record'], 'summary:' . $summary_path . ':offset:' . ( $entry['offset'] ?? $entry['line'] ), $state_dir, $state, $new_hashes, $oracle_findings_seen );
+			}
+		}
+	}
+
+	$sqlite_paths = html_api_fuzz_watcher_sqlite_paths( $run_dir );
+	foreach ( $sqlite_paths as $store_path ) {
+		/*
+		 * Lane stores fail transiently: a lane may not have committed its
+		 * schema yet (SQLite opens lazily, so that surfaces on the first
+		 * query, not in the constructor), and busy/corrupt stores throw on
+		 * read. None of those may kill the long-lived watcher — log, skip,
+		 * and retry on the next scan. Offsets only advance on success.
+		 */
+		$store = null;
+		try {
+			$store = new \HtmlApiFuzz\ResultStore( $store_path, true );
+			$after  = (int) ( $state['sqliteOffsets'][ $store_path ] ?? 0 );
+			$max_id = $store->max_id();
+			if ( $after > $max_id ) {
+				// The store shrank: it was recreated. Re-read from the start;
+				// the seenAttempts keys dedupe anything genuinely re-seen.
+				// (A recreated store that already grew past the stale offset
+				// is not detected; recreating a lane store without wiping the
+				// watcher state is unsupported.)
+				$after = 0;
+			}
+			if ( $max_id > $after ) {
+				foreach ( $store->failures_after( $after, $max_id ) as $row ) {
+					html_api_fuzz_watcher_process_failure_record( $row['record'], 'sqlite:' . $store_path . ':' . $row['id'], $state_dir, $state, $new_hashes, $failures_seen );
+				}
+			}
+			$state['sqliteOffsets'][ $store_path ] = $max_id;
+			if ( $triage_oracle_findings ) {
+				$oracle_after = (int) ( $state['oracleSqliteOffsets'][ $store_path ] ?? 0 );
+				if ( $oracle_after > $max_id ) {
+					$oracle_after = 0;
+				}
+				if ( $max_id > $oracle_after ) {
+					foreach ( $store->oracle_findings_after( $oracle_after, $max_id ) as $row ) {
+						html_api_fuzz_watcher_process_oracle_finding_record( $row['record'], 'sqlite:' . $store_path . ':' . $row['id'], $state_dir, $state, $new_hashes, $oracle_findings_seen );
+					}
+				}
+				$state['oracleSqliteOffsets'][ $store_path ] = $max_id;
+			}
+		} catch ( \Throwable $e ) {
+			fwrite( STDERR, '[' . gmdate( 'c' ) . "] watcher: could not read {$store_path}: {$e->getMessage()}\n" );
+		} finally {
+			if ( null !== $store ) {
+				try {
+					$store->close();
+				} catch ( \Throwable $e ) {
+					// Already unusable; nothing to release.
+				}
+			}
+		}
+	}
+
+	$state['updatedAt'] = gmdate( 'c' );
+	\HtmlApiFuzz\write_json_file( $state_path, $state );
+
+	if ( \HtmlApiFuzz\option_bool( $options, 'no-minimize', false ) ) {
+		foreach ( array_unique( $new_hashes ) as $hash ) {
+			$state['signatures'][ $hash ]['status'] = 'queued';
+		}
+	} else {
+		$max_minimize_retries = \HtmlApiFuzz\option_int( $options, 'max-minimize-retries', 3 );
+		$queue = array();
+		foreach ( $state['signatures'] as $hash => $record ) {
+			$status = $record['status'] ?? 'new';
+			if ( in_array( $status, array( 'new', 'queued' ), true ) ) {
+				$queue[] = $hash;
+				continue;
+			}
+			// Re-queue failed minimizations on later scans, up to the cap:
+			// transient timeouts and load spikes should not strand a finding.
+			if ( 'minimize-failed' === $status && (int) ( $record['minimizeAttempts'] ?? 0 ) < $max_minimize_retries ) {
+				$queue[] = $hash;
+			}
+		}
+		$max_minimize = \HtmlApiFuzz\option_int( $options, 'max-minimize', count( $queue ) );
+		$started = 0;
+		foreach ( array_unique( $queue ) as $hash ) {
+			if ( $started >= $max_minimize ) {
+				$state['signatures'][ $hash ]['status'] = 'queued';
+				continue;
+			}
+			html_api_fuzz_watcher_minimize( $hash, $state_dir, $state, $options );
+			++$started;
+			$state['updatedAt'] = gmdate( 'c' );
+			\HtmlApiFuzz\write_json_file( $state_path, $state );
+		}
+	}
+
+	$state['updatedAt'] = gmdate( 'c' );
+	\HtmlApiFuzz\write_json_file( $state_path, $state );
+	file_put_contents( $state_dir . '/STATUS.md', html_api_fuzz_watcher_status_markdown( $state ) );
+
+	return array(
+		'summaryFiles'  => count( $summary_paths ),
+		'sqliteStores'  => count( $sqlite_paths ),
+		'failuresSeen'  => $failures_seen,
+		'oracleFindingsSeen' => $oracle_findings_seen,
+		'triageOracleFindings' => $triage_oracle_findings,
+		'newSignatures' => count( array_unique( $new_hashes ) ),
+	);
+}
+
+$options = \HtmlApiFuzz\parse_cli_options( $argv );
+$run_dir = \HtmlApiFuzz\option_string( $options, 'run-dir', $options['_'][0] ?? null );
+if ( null === $run_dir || \HtmlApiFuzz\option_bool( $options, 'help', false ) ) {
+	html_api_fuzz_watcher_usage();
+	exit( null === $run_dir ? 1 : 0 );
+}
+
+$state_dir  = \HtmlApiFuzz\option_string( $options, 'state-dir', rtrim( $run_dir, DIRECTORY_SEPARATOR ) . '/.triage-watcher' );
+$state_path = $state_dir . '/state.json';
+\HtmlApiFuzz\ensure_dir( $state_dir );
+$state = html_api_fuzz_watcher_load_state( $state_path );
+$state['runDir'] = $run_dir;
+$state['stateDir'] = $state_dir;
+
+$interval      = max( 1.0, \HtmlApiFuzz\option_float( $options, 'interval-seconds', 10.0 ) );
+$stop_file     = rtrim( $run_dir, DIRECTORY_SEPARATOR ) . '/STOP';
+$stale_seconds = max( 10.0, \HtmlApiFuzz\option_float( $options, 'stop-stale-seconds', 120.0 ) );
+
+do {
+	/*
+	 * Graceful stop: once a stop is requested and every runner has recorded a
+	 * stop reason (or gone stale), this scan is the final one — nothing
+	 * writes after it.
+	 */
+	$stop_requested = is_file( $stop_file );
+	$final_scan     = $stop_requested && html_api_fuzz_watcher_runners_stopped( $run_dir, $stale_seconds );
+	$scan = html_api_fuzz_watcher_scan_once( $run_dir, $state_dir, $state_path, $state, $options );
+	echo \HtmlApiFuzz\json_encode_safe(
+		array_merge(
+			array(
+				'ok'          => true,
+				'at'          => gmdate( 'c' ),
+				'finalScan'   => $final_scan,
+				'stopPending' => $stop_requested && ! $final_scan,
+			),
+			$scan
+		)
+	) . "\n";
+	if ( $final_scan || \HtmlApiFuzz\option_bool( $options, 'once', false ) ) {
+		break;
+	}
+	usleep( (int) round( $interval * 1000000 ) );
+} while ( true );
diff --git a/tools/html-api-fuzz/worker.php b/tools/html-api-fuzz/worker.php
new file mode 100755
index 0000000000000..5b6c69b93152e
--- /dev/null
+++ b/tools/html-api-fuzz/worker.php
@@ -0,0 +1,103 @@
+#!/usr/bin/env php
+<?php
+require_once __DIR__ . '/lib/autoload.php';
+
+$options = \HtmlApiFuzz\parse_cli_options( $argv );
+
+function html_api_fuzz_worker_fatal_result( array $options, Throwable $e, ?string $output_dir ): array {
+	$fallback = array(
+		'schemaVersion'  => 1,
+		'kind'           => 'html-api-fuzz-worker-result',
+		'createdAt'      => gmdate( 'c' ),
+		'ok'             => false,
+		'status'         => 'worker-fatal',
+		'failureClass'   => 'fatal-error',
+		'failureSnippet' => $e->getMessage(),
+		'throwable'      => get_class( $e ),
+		'seed'           => \HtmlApiFuzz\option_int( $options, 'seed', 1 ),
+		'profile'        => \HtmlApiFuzz\option_string( $options, 'profile', 'auto' ),
+		'mode'           => \HtmlApiFuzz\option_string( $options, 'mode', 'auto' ),
+		'payloadPolicy'  => \HtmlApiFuzz\option_string( $options, 'payload-policy', null ),
+		'inputSource'    => \HtmlApiFuzz\option_string( $options, 'input-file', null ) ? 'input-file' : ( \HtmlApiFuzz\option_string( $options, 'input-base64', null ) ? 'input-base64' : 'generated' ),
+	);
+	try {
+		$fallback['oracle'] = \HtmlApiFuzz\OracleRenderer::from_options( $options )->metadata();
+	} catch ( Throwable $oracle_error ) {
+		$fallback['oracle'] = array(
+			'kind'  => \HtmlApiFuzz\option_string( $options, 'dom-oracle', \HtmlApiFuzz\OracleRenderer::KIND_PHP_DOM ),
+			'error' => $oracle_error->getMessage(),
+		);
+	}
+
+	if ( null !== $output_dir ) {
+		$fallback['paths'] = array(
+			'outputDir'  => $output_dir,
+			'resultPath' => $output_dir . DIRECTORY_SEPARATOR . 'result.json',
+			'replayPath' => $output_dir . DIRECTORY_SEPARATOR . 'replay.json',
+		);
+		$signature = \HtmlApiFuzz\Signature::from_result( $fallback );
+		if ( null !== $signature ) {
+			$fallback['signature'] = $signature;
+		}
+		\HtmlApiFuzz\write_json_file( $output_dir . DIRECTORY_SEPARATOR . 'result.json', $fallback );
+	}
+
+	return $fallback;
+}
+
+$batch_count = \HtmlApiFuzz\option_int( $options, 'batch-count', 1 );
+
+if ( $batch_count > 1 ) {
+	/*
+	 * Batch mode: --output-dir is the run directory; each seed writes its
+	 * artifacts to seed-N/primary as the runner lays them out. A throwable
+	 * for one seed is recorded and the batch continues; only a process-level
+	 * fatal (which kills this loop) leaves seeds without results, and the
+	 * runner re-runs those individually.
+	 */
+	$run_dir     = \HtmlApiFuzz\option_string( $options, 'output-dir', getcwd() . DIRECTORY_SEPARATOR . 'html-api-fuzz-batch' );
+	$start_seed  = \HtmlApiFuzz\option_int( $options, 'seed', 1 );
+	$seed_stride = max( 1, \HtmlApiFuzz\option_int( $options, 'seed-stride', 1 ) );
+	$summaries   = array();
+
+	for ( $i = 0; $i < $batch_count; $i++ ) {
+		$seed       = $start_seed + ( $i * $seed_stride );
+		$seed_dir   = $run_dir . DIRECTORY_SEPARATOR . 'seed-' . $seed . DIRECTORY_SEPARATOR . 'primary';
+		$seed_opts  = $options;
+		$seed_opts['seed']       = (string) $seed;
+		$seed_opts['output-dir'] = $seed_dir;
+		unset( $seed_opts['batch-count'] );
+
+		try {
+			$result = \HtmlApiFuzz\Worker::run( $seed_opts );
+		} catch ( Throwable $e ) {
+			$result = html_api_fuzz_worker_fatal_result( $seed_opts, $e, $seed_dir );
+		}
+
+		$summaries[] = array(
+			'seed'         => $seed,
+			'ok'           => $result['ok'] ?? false,
+			'status'       => $result['status'] ?? 'unknown',
+			'failureClass' => $result['failureClass'] ?? null,
+		);
+	}
+
+	echo \HtmlApiFuzz\json_encode_safe(
+		array(
+			'kind'    => 'html-api-fuzz-worker-batch',
+			'count'   => $batch_count,
+			'results' => $summaries,
+		)
+	) . "\n";
+	exit( 0 );
+}
+
+try {
+	$result = \HtmlApiFuzz\Worker::run( $options );
+	echo \HtmlApiFuzz\json_encode_safe( $result ) . "\n";
+	exit( ( $result['ok'] ?? false ) ? 0 : 2 );
+} catch ( Throwable $e ) {
+	$fallback = html_api_fuzz_worker_fatal_result( $options, $e, \HtmlApiFuzz\option_string( $options, 'output-dir', null ) );
+	fwrite( STDERR, \HtmlApiFuzz\json_encode_safe( $fallback ) . "\n" );
+	exit( 1 );
+}