diff --git a/src/wp-includes/html-api/class-wp-html-decoder.php b/src/wp-includes/html-api/class-wp-html-decoder.php
index d902f4b7cabc4..9f33056de0c14 100644
--- a/src/wp-includes/html-api/class-wp-html-decoder.php
+++ b/src/wp-includes/html-api/class-wp-html-decoder.php
@@ -195,6 +195,8 @@ public static function decode( $context, $text ): string {
 	 *     7    === $token_length; // `&notin;`
 	 *
 	 * @since 6.6.0
+	 * @since 7.1.0 Detects ambiguous followers of semicolon-less references
+	 *              by ASCII classification only, independent of the locale.
 	 *
 	 * @global WP_Token_Map $html5_named_character_references Mappings for HTML5 named character references.
 	 *
@@ -377,14 +379,20 @@ public static function read_character_reference( $context, $text, $at = 0, &$mat
 		 * At this point though there's a match for an entry in the named
 		 * character reference table but the match doesn't end in `;`.
 		 * It may be allowed if it's followed by something unambiguous.
+		 *
+		 * Only an ASCII alphanumeric or U+003D EQUALS SIGN is ambiguous.
+		 * `ctype_alnum()` must be avoided here: its classification of
+		 * bytes 0x80 and above depends on the process locale, but only
+		 * these specific ASCII characters prevent decoding.
+		 *
+		 * @see https://html.spec.whatwg.org/#named-character-reference-state
 		 */
+		$follower           = $after_name < $length ? $text[ $after_name ] : '';
 		$ambiguous_follower = (
-			$after_name < $length &&
-			$name_at < $length &&
-			(
-				ctype_alnum( $text[ $after_name ] ) ||
-				'=' === $text[ $after_name ]
-			)
+			( 'a' <= $follower && 'z' >= $follower ) ||
+			( 'A' <= $follower && 'Z' >= $follower ) ||
+			( '0' <= $follower && '9' >= $follower ) ||
+			'=' === $follower
 		);
 
 		// It's non-ambiguous, safe to leave it in.
diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php
index 35d91fad3129c..c46151f05d9be 100644
--- a/src/wp-includes/html-api/class-wp-html-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-processor.php
@@ -5254,6 +5254,8 @@ public function get_namespace(): string {
 	 *     $processor->get_tag() === null;
 	 *
 	 * @since 6.4.0
+	 * @since 7.1.0 NULL bytes in source tag names are returned as U+FFFD,
+	 *              matching the tokenizer replacement browsers apply.
 	 *
 	 * @return string|null Name of currently matched tag in input HTML, or `null` if none found.
 	 */
@@ -5315,6 +5317,8 @@ public function has_self_closing_flag(): bool {
 	 * of the document without matching a token.
 	 *
 	 * @since 6.6.0 Subclassed for the HTML Processor.
+	 * @since 7.1.0 NULL bytes in source tag names are returned as U+FFFD,
+	 *              matching the tokenizer replacement browsers apply.
 	 *
 	 * @return string|null Name of the matched token.
 	 */
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 77c1a471db5b1..b73c837af0077 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1174,7 +1174,13 @@ public function paused_at_incomplete_token(): bool {
 	 *     }
 	 *     // Outputs: "free <egg> lang-en "
 	 *
+	 * Class names from the input document already carry the tokenizer's
+	 * U+FFFD replacement of NULL bytes through `get_attribute()`; values
+	 * supplied through the API are returned verbatim, as `Element.classList`
+	 * does in the DOM.
+	 *
 	 * @since 6.4.0
+	 * @since 7.1.0 No longer replaces NULL bytes in API-supplied class values.
 	 *
 	 * @return Generator<int, non-empty-string>
 	 */
@@ -1208,7 +1214,7 @@ public function class_list() {
 				return;
 			}
 
-			$name = str_replace( "\x00", "\u{FFFD}", substr( $class, $at, $length ) );
+			$name = substr( $class, $at, $length );
 			if ( $is_quirks ) {
 				$name = strtolower( $name );
 			}
@@ -2231,9 +2237,16 @@ private function parse_next_attribute(): bool {
 		 * > case-insensitive match for each other.
 		 *     - HTML 5 spec
 		 *
+		 * The tokenizer would have replaced U+0000 NULL bytes in attribute
+		 * names with U+FFFD, so names which differ only by those bytes are
+		 * duplicates. The replacement applies to the comparable name — a
+		 * comparison artifact — while the raw span in the document remains
+		 * untouched.
+		 *
 		 * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive
+		 * @see https://html.spec.whatwg.org/#attribute-name-state
 		 */
-		$comparable_name = strtolower( $attribute_name );
+		$comparable_name = strtolower( str_replace( "\x00", "\u{FFFD}", $attribute_name ) );
 
 		// If an attribute is listed many times, only use the first declaration and ignore the rest.
 		if ( ! isset( $this->attributes[ $comparable_name ] ) ) {
@@ -2359,13 +2372,7 @@ private function class_name_updates_to_attributes_updates(): void {
 		}
 
 		if ( false === $existing_class && isset( $this->attributes['class'] ) ) {
-			$existing_class = WP_HTML_Decoder::decode_attribute(
-				substr(
-					$this->html,
-					$this->attributes['class']->value_starts_at,
-					$this->attributes['class']->value_length
-				)
-			);
+			$existing_class = $this->get_decoded_source_attribute_value( $this->attributes['class'] );
 		}
 
 		if ( false === $existing_class ) {
@@ -2771,6 +2778,11 @@ private function get_enqueued_attribute_value( string $comparable_name ) {
 	 *     $p->get_attribute( 'class' ) === null;
 	 *
 	 * @since 6.2.0
+	 * @since 7.1.0 Applies input-stream preprocessing: newlines in the source value
+	 *              are normalized and NULL bytes are replaced with U+FFFD, as
+	 *              browsers do before decoding character references. Attributes
+	 *              whose source name contains a NULL byte are addressed by the
+	 *              name with U+FFFD in its place, as in the DOM.
 	 *
 	 * @param string $name Name of attribute whose value is requested.
 	 * @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`.
@@ -2793,7 +2805,7 @@ public function get_attribute( $name ) {
 		 * attribute values. If any exist, those enqueued class changes must first be flushed out
 		 * into an attribute value update.
 		 */
-		if ( 'class' === $name ) {
+		if ( 'class' === $comparable ) {
 			$this->class_name_updates_to_attributes_updates();
 		}
 
@@ -2824,9 +2836,58 @@ public function get_attribute( $name ) {
 			return true;
 		}
 
+		return $this->get_decoded_source_attribute_value( $attribute );
+	}
+
+	/**
+	 * Returns the decoded value of an attribute found in the input document.
+	 *
+	 * The Tag Processor defers the HTML input-stream preprocessing and the
+	 * tokenizer's replacements while scanning; they must be applied when
+	 * reading a value out of the document: newlines are normalized before
+	 * character references decode, and U+0000 NULL bytes are replaced
+	 * with U+FFFD. The replacements operate on bytes; NULL bytes inside
+	 * invalid UTF-8 sequences are replaced individually where a browser,
+	 * decoding the byte stream into characters first, may differ.
+	 *
+	 * @see https://html.spec.whatwg.org/#preprocessing-the-input-stream
+	 * @see https://html.spec.whatwg.org/#attribute-value-(double-quoted)-state
+	 *
+	 * @since 7.1.0
+	 *
+	 * @param WP_HTML_Attribute_Token $attribute Attribute token from the input document.
+	 * @return string Decoded attribute value.
+	 */
+	private function get_decoded_source_attribute_value( WP_HTML_Attribute_Token $attribute ): string {
 		$raw_value = substr( $this->html, $attribute->value_starts_at, $attribute->value_length );
 
-		return WP_HTML_Decoder::decode_attribute( $raw_value );
+		/*
+		 * Newline normalization is part of preprocessing the input stream
+		 * and precedes character reference decoding: `&#13;` decodes into
+		 * a carriage return which must be preserved. The check avoids
+		 * scanning the value again when it contains no carriage return;
+		 * most values contain none.
+		 */
+		if ( false !== strpos( $raw_value, "\r" ) ) {
+			$raw_value = str_replace( "\r\n", "\n", $raw_value );
+			$raw_value = str_replace( "\r", "\n", $raw_value );
+		}
+
+		$decoded_value = WP_HTML_Decoder::decode_attribute( $raw_value );
+
+		/*
+		 * The tokenizer replaces U+0000 NULL bytes as it consumes input:
+		 * character references see the raw NULL byte — an unambiguous
+		 * follower for references without a terminating semicolon — and
+		 * no character reference decodes into NULL, so the replacement
+		 * applies equivalently after decoding, where it cannot disturb
+		 * how references parse.
+		 */
+		if ( false !== strpos( $decoded_value, "\x00" ) ) {
+			$decoded_value = str_replace( "\x00", "\u{FFFD}", $decoded_value );
+		}
+
+		return $decoded_value;
 	}
 
 	/**
@@ -2849,6 +2910,10 @@ public function get_attribute( $name ) {
 	 *     $p->get_attribute_names_with_prefix( 'data-' ) === null;
 	 *
 	 * @since 6.2.0
+	 * @since 7.1.0 NULL bytes in source attribute names are returned as U+FFFD,
+	 *              matching the tokenizer replacement browsers apply. The prefix
+	 *              is matched verbatim against these replaced names; a prefix
+	 *              containing a NULL byte matches nothing.
 	 *
 	 * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive
 	 *
@@ -2898,6 +2963,8 @@ public function get_namespace(): string {
 	 *     $p->get_tag() === null;
 	 *
 	 * @since 6.2.0
+	 * @since 7.1.0 NULL bytes in the source tag name are returned as U+FFFD,
+	 *              matching the tokenizer replacement browsers apply.
 	 *
 	 * @return string|null Name of currently matched tag in input HTML, or `null` if none found.
 	 */
@@ -2906,7 +2973,15 @@ public function get_tag(): ?string {
 			return null;
 		}
 
-		$tag_name = substr( $this->html, $this->tag_name_starts_at, $this->tag_name_length );
+		/*
+		 * The tokenizer would have replaced U+0000 NULL bytes in the tag
+		 * name with U+FFFD; this is deferred to this read boundary. The
+		 * replacement never applies to internal identification, which
+		 * compares raw bytes (`scr\x00ipt` is not SCRIPT in browsers either).
+		 *
+		 * @see https://html.spec.whatwg.org/#tag-name-state
+		 */
+		$tag_name = str_replace( "\x00", "\u{FFFD}", substr( $this->html, $this->tag_name_starts_at, $this->tag_name_length ) );
 
 		if ( self::STATE_MATCHED_TAG === $this->parser_state ) {
 			return strtoupper( $tag_name );
@@ -2927,6 +3002,8 @@ public function get_tag(): ?string {
 	 * account the current parsing context, whether HTML, SVG, or MathML.
 	 *
 	 * @since 6.7.0
+	 * @since 7.1.0 NULL bytes in source tag names are returned as U+FFFD,
+	 *              matching the tokenizer replacement browsers apply.
 	 *
 	 * @return string|null Name of current tag name.
 	 */
@@ -3425,6 +3502,8 @@ public function get_token_type(): ?string {
 	 * of the document without matching a token.
 	 *
 	 * @since 6.5.0
+	 * @since 7.1.0 NULL bytes in source tag names are returned as U+FFFD,
+	 *              matching the tokenizer replacement browsers apply.
 	 *
 	 * @return string|null Name of the matched token.
 	 */
@@ -3782,6 +3861,12 @@ public function get_modifiable_text(): string {
 	 *     // Renders as “Eggs &amp; Milk” in a browser, encoded as `<p>Eggs &amp;amp; Milk</p>`.
 	 *     $processor->set_modifiable_text( 'Eggs &amp; Milk' );
 	 *
+	 * Note: unlike attribute values set through `set_attribute()`, which read
+	 * back verbatim, text set through this method currently reads back through
+	 * `get_modifiable_text()` with newlines normalized and NULL bytes handled
+	 * as if the text had come from the input document. In the DOM, API-supplied
+	 * text round-trips verbatim; this asymmetry is a known limitation.
+	 *
 	 * @since 6.7.0
 	 * @since 6.9.0 Escapes all character references instead of trying to avoid double-escaping.
 	 *
@@ -4770,14 +4855,37 @@ private function matches(): bool {
 		}
 
 		// Does the tag name match the requested tag name in a case-insensitive manner?
-		if (
-			isset( $this->sought_tag_name ) &&
-			(
-				strlen( $this->sought_tag_name ) !== $this->tag_name_length ||
-				0 !== substr_compare( $this->html, $this->sought_tag_name, $this->tag_name_starts_at, $this->tag_name_length, true )
-			)
-		) {
-			return false;
+		if ( isset( $this->sought_tag_name ) ) {
+			$tag_name_matches = (
+				strlen( $this->sought_tag_name ) === $this->tag_name_length &&
+				0 === substr_compare( $this->html, $this->sought_tag_name, $this->tag_name_starts_at, $this->tag_name_length, true )
+			);
+
+			/*
+			 * Names are matched in the same alphabet `get_tag()` exposes,
+			 * where U+0000 NULL bytes appear as U+FFFD: a sought name
+			 * containing U+FFFD matches source names with NULL bytes in
+			 * its place, and a sought name containing a NULL byte matches
+			 * nothing, since no exposed name contains one. The byte
+			 * comparison above already agrees for names without NULL
+			 * bytes, so this only resolves the rare disagreements.
+			 */
+			if ( $tag_name_matches ) {
+				$tag_name_matches = false === strpos( $this->sought_tag_name, "\x00" );
+			} elseif ( false !== strpos( $this->sought_tag_name, "\u{FFFD}" ) ) {
+				$raw_name = substr( $this->html, $this->tag_name_starts_at, $this->tag_name_length );
+				if ( false !== strpos( $raw_name, "\x00" ) ) {
+					$exposed_name     = str_replace( "\x00", "\u{FFFD}", $raw_name );
+					$tag_name_matches = (
+						strlen( $this->sought_tag_name ) === strlen( $exposed_name ) &&
+						0 === substr_compare( $exposed_name, $this->sought_tag_name, 0, strlen( $exposed_name ), true )
+					);
+				}
+			}
+
+			if ( ! $tag_name_matches ) {
+				return false;
+			}
 		}
 
 		if ( null !== $this->sought_class_name && ! $this->has_class( $this->sought_class_name ) ) {
diff --git a/tests/phpunit/tests/html-api/wpHtmlDecoder.php b/tests/phpunit/tests/html-api/wpHtmlDecoder.php
index 97954f4eb3e30..9527739edd23b 100644
--- a/tests/phpunit/tests/html-api/wpHtmlDecoder.php
+++ b/tests/phpunit/tests/html-api/wpHtmlDecoder.php
@@ -61,6 +61,84 @@ static function ( int $errno, string $errstr ) use ( &$errors ) {
 		$this->assertSame( "&\x00b", $decoded, 'Should have decoded the text without changing it.' );
 	}
 
+	/**
+	 * Ensures that numeric character references for U+0000 decode to U+FFFD
+	 * while raw NULL bytes pass through the decoder untransformed.
+	 *
+	 * The tokenizer, not the decoder, is responsible for replacing raw NULL
+	 * bytes; in the Tag Processor that responsibility falls on the methods
+	 * which read values out of the input document.
+	 *
+	 * @ticket 65372
+	 *
+	 * @dataProvider data_null_code_points
+	 *
+	 * @param string $raw_value     Raw attribute value.
+	 * @param string $decoded_value The expected decoded attribute value.
+	 */
+	public function test_null_code_points_in_attribute_values( string $raw_value, string $decoded_value ) {
+		$this->assertSame(
+			$decoded_value,
+			WP_HTML_Decoder::decode_attribute( $raw_value ),
+			'Improperly decoded raw attribute value.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_null_code_points() {
+		return array(
+			'Decimal zero'                 => array( 'a&#0;b', "a\u{FFFD}b" ),
+			'Hexadecimal zero'             => array( 'a&#x0;b', "a\u{FFFD}b" ),
+			'Multiple zeros'               => array( 'a&#0000;b', "a\u{FFFD}b" ),
+			'Raw NULL byte passes through' => array( "a\x00b", "a\x00b" ),
+		);
+	}
+
+	/**
+	 * Ensures that the ambiguous-follower check for character references
+	 * lacking a terminating semicolon treats only ASCII alphanumerics and
+	 * the equals sign as ambiguous, regardless of the process locale.
+	 *
+	 * `ctype_alnum()` classifies bytes 0x80 and above as alphanumeric under
+	 * UTF-8 locales, wrongly suppressing decodes whose follower is a
+	 * non-ASCII byte, such as U+FFFD produced by NULL-byte replacement.
+	 *
+	 * @ticket 65372
+	 *
+	 * @see https://html.spec.whatwg.org/#named-character-reference-state
+	 *
+	 * @dataProvider data_semicolon_less_references_with_followers
+	 *
+	 * @param string $raw_value     Raw attribute value.
+	 * @param string $decoded_value The expected decoded attribute value.
+	 */
+	public function test_semicolon_less_reference_followers( string $raw_value, string $decoded_value ) {
+		$this->assertSame(
+			$decoded_value,
+			WP_HTML_Decoder::decode_attribute( $raw_value ),
+			'Improperly decoded raw attribute value.'
+		);
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_semicolon_less_references_with_followers() {
+		return array(
+			'U+FFFD follower decodes'            => array( "x&amp\u{FFFD};y", "x&\u{FFFD};y" ),
+			'Non-ASCII follower decodes'         => array( "x&amp\u{E9}y", "x&\u{E9}y" ),
+			'ASCII letter follower is ambiguous' => array( 'x&ampzy', 'x&ampzy' ),
+			'ASCII digit follower is ambiguous'  => array( 'x&amp1y', 'x&amp1y' ),
+			'Equals sign follower is ambiguous'  => array( 'x&amp=y', 'x&amp=y' ),
+		);
+	}
+
 	/**
 	 * Ensures proper detection of attribute prefixes ignoring ASCII case.
 	 *
diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-input-preprocessing.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-input-preprocessing.php
new file mode 100644
index 0000000000000..1180fa7110c88
--- /dev/null
+++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-input-preprocessing.php
@@ -0,0 +1,428 @@
+<?php
+/**
+ * Unit tests covering WP_HTML_Tag_Processor input-stream preprocessing
+ * at its read boundaries.
+ *
+ * The HTML specification's "preprocessing the input stream" step (newline
+ * normalization) and the tokenizer's U+0000 NULL replacements are deferred
+ * by the Tag Processor while scanning and must be applied wherever parsed
+ * values are read out of the input document.
+ *
+ * @package WordPress
+ * @subpackage HTML-API
+ *
+ * @since 7.1.0
+ *
+ * @group html-api
+ *
+ * @coversDefaultClass WP_HTML_Tag_Processor
+ */
+class Tests_HtmlApi_WpHtmlTagProcessor_InputPreprocessing extends WP_UnitTestCase {
+	/**
+	 * Ensures that `get_attribute()` applies input-stream preprocessing and
+	 * tokenizer replacements to attribute values found in the input document.
+	 *
+	 * Newlines are normalized (CRLF → LF, CR → LF) and U+0000 NULL is replaced
+	 * with U+FFFD before character references decode, so `&#13;` produces a
+	 * real carriage return and `&#0;` produces U+FFFD. Browser-verified.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_attribute
+	 *
+	 * @dataProvider data_attribute_values_with_preprocessing
+	 *
+	 * @param string $html     HTML containing a tag with attribute `a`.
+	 * @param string $expected Expected attribute value after preprocessing and decoding.
+	 */
+	public function test_get_attribute_applies_input_preprocessing( string $html, string $expected ) {
+		$processor = new WP_HTML_Tag_Processor( $html );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertSame( $expected, $processor->get_attribute( 'a' ) );
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_attribute_values_with_preprocessing() {
+		return array(
+			'Raw CR'                      => array( "<div a='x\ry'>", "x\ny" ),
+			'Raw CRLF'                    => array( "<div a='x\r\ny'>", "x\ny" ),
+			'Raw CR then CRLF'            => array( "<div a='x\r\r\ny'>", "x\n\ny" ),
+			'Double-quoted raw CR'        => array( "<div a=\"x\ry\">", "x\ny" ),
+			'NULL byte'                   => array( "<div a='x\x00y'>", "x\u{FFFD}y" ),
+			'NULL byte unquoted'          => array( "<div a=x\x00y>", "x\u{FFFD}y" ),
+			'Encoded CR is preserved'     => array( "<div a='x&#13;y'>", "x\ry" ),
+			'Encoded NULL becomes U+FFFD' => array( "<div a='x&#0;y'>", "x\u{FFFD}y" ),
+			'Raw CR before encoded CR'    => array( "<div a='x\r&#13;y'>", "x\n\ry" ),
+			'Raw CR and NULL byte'        => array( "<div a='x\r\x00y'>", "x\n\u{FFFD}y" ),
+			'Named reference before NULL' => array( "<div a='x&amp\x00;y'>", "x&\u{FFFD};y" ),
+			'Named reference before CR'   => array( "<div a='x&amp\ry'>", "x&\ny" ),
+		);
+	}
+
+	/**
+	 * Ensures that values enqueued through `set_attribute()` are returned verbatim.
+	 *
+	 * Input-stream preprocessing applies only to the input document. API-supplied
+	 * values are plaintext, equivalent to DOM `setAttribute()`, which performs
+	 * no replacements. Browser-verified.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_attribute
+	 *
+	 * @dataProvider data_enqueued_attribute_values
+	 *
+	 * @param string $value Plaintext attribute value to set and expect back unchanged.
+	 */
+	public function test_get_attribute_returns_enqueued_values_verbatim( string $value ) {
+		$processor = new WP_HTML_Tag_Processor( '<div a="original">' );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertTrue( $processor->set_attribute( 'a', $value ), 'Should have enqueued the attribute update.' );
+		$this->assertSame( $value, $processor->get_attribute( 'a' ) );
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_enqueued_attribute_values() {
+		return array(
+			'Carriage return' => array( "x\ry" ),
+			'CRLF'            => array( "x\r\ny" ),
+			'NULL byte'       => array( "x\x00y" ),
+		);
+	}
+
+	/**
+	 * Ensures the existing class attribute value is preprocessed when enqueued
+	 * class updates are flushed into an attribute update.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::add_class
+	 *
+	 * @dataProvider data_class_updates_with_preprocessing
+	 *
+	 * @param string $html          HTML containing a tag with a class attribute.
+	 * @param string $expected_html Expected document after adding a class.
+	 */
+	public function test_class_updates_apply_input_preprocessing_to_existing_value( string $html, string $expected_html ) {
+		$processor = new WP_HTML_Tag_Processor( $html );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertTrue( $processor->add_class( 'added' ), 'Should have enqueued the class addition.' );
+		$this->assertSame( $expected_html, $processor->get_updated_html() );
+	}
+
+	/**
+	 * Data provider.
+	 *
+	 * @return array[]
+	 */
+	public static function data_class_updates_with_preprocessing() {
+		return array(
+			'Raw CR'                      => array( "<div class='a\rb'>", "<div class=\"a\nb added\">" ),
+			'Raw CRLF'                    => array( "<div class='a\r\nb'>", "<div class=\"a\nb added\">" ),
+			'NULL byte'                   => array( "<div class='a\x00b'>", "<div class=\"a\u{FFFD}b added\">" ),
+			'Named reference before NULL' => array( "<div class='&not\x00x'>", "<div class=\"\u{AC}\u{FFFD}x added\">" ),
+		);
+	}
+
+	/**
+	 * Ensures attribute names containing NULL bytes are exposed with U+FFFD and
+	 * are addressable only by their replaced name, as browsers expose them.
+	 *
+	 * Browser-verified: `getAttribute("da\u{FFFD}ta")` finds the attribute
+	 * parsed from `da\x00ta`; `getAttribute("da\x00ta")` does not.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_attribute
+	 * @covers ::get_attribute_names_with_prefix
+	 */
+	public function test_attribute_names_replace_null_bytes() {
+		$processor = new WP_HTML_Tag_Processor( "<div da\x00ta='1'>" );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertSame( array( "da\u{FFFD}ta" ), $processor->get_attribute_names_with_prefix( '' ) );
+		$this->assertSame( '1', $processor->get_attribute( "da\u{FFFD}ta" ), 'Should have found the attribute by its replaced name.' );
+		$this->assertNull( $processor->get_attribute( "da\x00ta" ), 'Should not have found the attribute by its raw source name.' );
+
+		$processor = new WP_HTML_Tag_Processor( "<div DA\x00TA='1'>" );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertSame( array( "da\u{FFFD}ta" ), $processor->get_attribute_names_with_prefix( '' ), 'Should have lowercased the name around the replacement character.' );
+	}
+
+	/**
+	 * Ensures attribute names which collapse to the same name after NULL-byte
+	 * replacement are duplicates of one attribute: the first in document order
+	 * provides the value and removal removes every collapsed copy.
+	 *
+	 * Browser-verified: `<div da\x00ta="1" da\u{FFFD}ta="2">` produces a single
+	 * attribute `da\u{FFFD}ta` with value "1".
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_attribute
+	 * @covers ::remove_attribute
+	 */
+	public function test_attribute_names_collapsing_after_null_replacement_are_duplicates() {
+		$processor = new WP_HTML_Tag_Processor( "<div da\x00ta='1' da\u{FFFD}ta='2'>" );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertSame( array( "da\u{FFFD}ta" ), $processor->get_attribute_names_with_prefix( '' ) );
+		$this->assertSame( '1', $processor->get_attribute( "da\u{FFFD}ta" ), 'First duplicate should provide the value.' );
+
+		$this->assertTrue( $processor->remove_attribute( "da\u{FFFD}ta" ), 'Should have removed the attribute.' );
+		$this->assertSame( '<div  >', $processor->get_updated_html(), 'Should have removed all duplicates of the attribute.' );
+	}
+
+	/**
+	 * Ensures setting an attribute by its U+FFFD-replaced name updates the
+	 * source attribute whose raw name contains a NULL byte instead of adding
+	 * a second attribute.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::set_attribute
+	 */
+	public function test_set_attribute_updates_attribute_with_null_byte_in_source_name() {
+		$processor = new WP_HTML_Tag_Processor( "<div da\x00ta='old'>" );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertTrue( $processor->set_attribute( "da\u{FFFD}ta", 'new' ), 'Should have set the attribute.' );
+		$this->assertSame( "<div da\u{FFFD}ta=\"new\">", $processor->get_updated_html() );
+	}
+
+	/**
+	 * Ensures tag names containing NULL bytes are exposed with U+FFFD,
+	 * matching the tokenizer's tag-name-state replacement in browsers.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_tag
+	 * @covers ::get_token_name
+	 */
+	public function test_get_tag_replaces_null_bytes() {
+		$processor = new WP_HTML_Tag_Processor( "<di\x00v>x</di\x00v>" );
+
+		$this->assertTrue( $processor->next_token(), 'Should have found the tag opener.' );
+		$this->assertSame( "DI\u{FFFD}V", $processor->get_tag() );
+		$this->assertSame( "DI\u{FFFD}V", $processor->get_token_name() );
+
+		$this->assertTrue( $processor->next_token(), 'Should have found the text node.' );
+		$this->assertSame( 'x', $processor->get_modifiable_text() );
+
+		$this->assertTrue( $processor->next_token(), 'Should have found the tag closer.' );
+		$this->assertTrue( $processor->is_tag_closer(), 'Should have matched the tag closer.' );
+		$this->assertSame( "DI\u{FFFD}V", $processor->get_tag() );
+	}
+
+	/**
+	 * Ensures NULL bytes in tag names do not affect special-element detection:
+	 * `<scr\x00ipt>` is not SCRIPT and does not switch into rawtext parsing,
+	 * in browsers or here. Internal identification uses raw bytes.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_tag
+	 */
+	public function test_null_byte_in_tag_name_does_not_select_rawtext_parsing() {
+		$processor = new WP_HTML_Tag_Processor( "<scr\x00ipt><b></b></scr\x00ipt>" );
+
+		$this->assertTrue( $processor->next_token(), 'Should have found the tag opener.' );
+		$this->assertSame( "SCR\u{FFFD}IPT", $processor->get_tag() );
+
+		$this->assertTrue( $processor->next_token(), 'Should have found the B tag, not raw text.' );
+		$this->assertSame( 'B', $processor->get_tag() );
+	}
+
+	/**
+	 * Ensures NULL bytes cannot appear in PI-lookalike comment tag names,
+	 * whose targets are restricted to ASCII name characters.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_tag
+	 */
+	public function test_pi_lookalike_target_stops_before_null_byte() {
+		$processor = new WP_HTML_Tag_Processor( "<?px\x00rest ?>" );
+
+		$this->assertTrue( $processor->next_token(), 'Should have found the comment.' );
+		$this->assertSame( WP_HTML_Tag_Processor::COMMENT_AS_PI_NODE_LOOKALIKE, $processor->get_comment_type() );
+		$this->assertSame( 'px', $processor->get_tag() );
+	}
+
+	/**
+	 * Ensures tag-name queries match in the same replaced alphabet that
+	 * `get_tag()` exposes: a sought name containing U+FFFD matches source
+	 * names whose raw bytes contain NULL in its place, a sought name
+	 * containing a raw NULL byte matches nothing, and the value returned
+	 * by `get_tag()` round-trips into a successful query.
+	 *
+	 * This is also how WP_HTML_Processor::next_tag() matches, since it
+	 * compares sought names against the token name.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::next_tag
+	 */
+	public function test_tag_name_queries_match_replaced_names() {
+		$processor = new WP_HTML_Tag_Processor( "<di\x00v>" );
+		$this->assertTrue( $processor->next_tag( "DI\u{FFFD}V" ), 'Should have matched the tag by its replaced name.' );
+
+		$processor = new WP_HTML_Tag_Processor( "<di\x00v>" );
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$tag_name  = $processor->get_tag();
+		$processor = new WP_HTML_Tag_Processor( "<di\x00v>" );
+		$this->assertTrue( $processor->next_tag( array( 'tag_name' => $tag_name ) ), 'The name returned by get_tag() should match in a query.' );
+
+		$processor = new WP_HTML_Tag_Processor( "<di\x00v>" );
+		$this->assertFalse( $processor->next_tag( "DI\x00V" ), 'Should not have matched the tag by its raw source name.' );
+
+		$processor = new WP_HTML_Tag_Processor( "<di\u{FFFD}v>" );
+		$this->assertTrue( $processor->next_tag( "DI\u{FFFD}V" ), 'Should have matched a raw U+FFFD name.' );
+
+		$processor = WP_HTML_Processor::create_full_parser( "<body><di\x00v>" );
+		$this->assertTrue( $processor->next_tag( array( 'tag_name' => "DI\u{FFFD}V" ) ), 'The HTML Processor should match the replaced name.' );
+
+		$processor = WP_HTML_Processor::create_full_parser( "<body><di\x00v>" );
+		$this->assertFalse( $processor->next_tag( array( 'tag_name' => "DI\x00V" ) ), 'The HTML Processor should not match the raw source name.' );
+	}
+
+	/**
+	 * Ensures class_list does not replace NULL bytes in API-supplied values.
+	 *
+	 * Browser-verified: `setAttribute('class', "a\x00b")` then reading
+	 * `classList` yields the token "a\x00b" with the NULL byte preserved;
+	 * U+0000 replacement happens only in the tokenizer, and values from the
+	 * input document already receive it through `get_attribute()`.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::class_list
+	 * @covers ::has_class
+	 */
+	public function test_class_list_preserves_null_bytes_in_enqueued_values() {
+		$processor = new WP_HTML_Tag_Processor( '<div>' );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertTrue( $processor->set_attribute( 'class', "a\x00b c\u{FFFD}d" ), 'Should have set the class attribute.' );
+		$this->assertSame( array( "a\x00b", "c\u{FFFD}d" ), iterator_to_array( $processor->class_list(), false ), 'Should have preserved the NULL byte in the API-supplied class.' );
+		$this->assertTrue( $processor->has_class( "a\x00b" ) );
+	}
+
+	/**
+	 * Ensures the class helpers operate on the replaced source value:
+	 * a class containing a NULL byte in the document is exposed, matched,
+	 * and queried by its U+FFFD spelling only.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::class_list
+	 * @covers ::has_class
+	 * @covers ::next_tag
+	 */
+	public function test_class_helpers_use_replaced_source_values() {
+		$processor = new WP_HTML_Tag_Processor( "<div class='a\x00b'>" );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertSame( array( "a\u{FFFD}b" ), iterator_to_array( $processor->class_list(), false ), 'Should have exposed the replaced class name.' );
+		$this->assertTrue( $processor->has_class( "a\u{FFFD}b" ), 'Should have matched the replaced class name.' );
+		$this->assertFalse( $processor->has_class( "a\x00b" ), 'Should not have matched the raw source class name.' );
+
+		$processor = new WP_HTML_Tag_Processor( "<div class='a\x00b'>" );
+		$this->assertTrue( $processor->next_tag( array( 'class_name' => "a\u{FFFD}b" ) ), 'Should have matched a class_name query by the replaced name.' );
+	}
+
+	/**
+	 * Ensures boolean attributes whose names contain NULL bytes are
+	 * addressable by their replaced name.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_attribute
+	 */
+	public function test_boolean_attribute_with_null_byte_in_name() {
+		$processor = new WP_HTML_Tag_Processor( "<div da\x00ta>" );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertTrue( $processor->get_attribute( "da\u{FFFD}ta" ), 'Should have reported the boolean attribute by its replaced name.' );
+	}
+
+	/**
+	 * Ensures attribute-name prefixes are matched verbatim against the
+	 * replaced names: a prefix spelled with U+FFFD matches, and a prefix
+	 * containing a raw NULL byte matches nothing.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_attribute_names_with_prefix
+	 */
+	public function test_attribute_name_prefixes_match_replaced_names() {
+		$processor = new WP_HTML_Tag_Processor( "<div da\x00ta='1'>" );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertSame( array( "da\u{FFFD}ta" ), $processor->get_attribute_names_with_prefix( "da\u{FFFD}" ), 'A replaced-name prefix should match.' );
+		$this->assertSame( array(), $processor->get_attribute_names_with_prefix( "da\x00" ), 'A raw NULL prefix should match nothing.' );
+	}
+
+	/**
+	 * Ensures the replaced tag names flow through HTML Processor tree
+	 * construction: an end tag spelled with U+FFFD closes an element
+	 * whose start tag was spelled with a raw NULL byte, as in browsers,
+	 * where both spellings tokenize to the same name.
+	 *
+	 * @ticket 65372
+	 */
+	public function test_html_processor_matches_end_tags_across_null_byte_spellings() {
+		$this->assertSame(
+			"<di\u{FFFD}v>x</di\u{FFFD}v>y",
+			WP_HTML_Processor::normalize( "<di\x00v>x</di\u{FFFD}v>y" ),
+			'The U+FFFD-spelled end tag should have closed the NULL-spelled element.'
+		);
+
+		$processor = WP_HTML_Processor::create_full_parser( "<body><di\x00v>x</di\u{FFFD}v>y" );
+		$this->assertTrue( $processor->next_tag( array( 'tag_name' => "DI\u{FFFD}V" ) ), 'Should have found the element by its replaced name.' );
+		$this->assertSame( array( 'HTML', 'BODY', "DI\u{FFFD}V" ), $processor->get_breadcrumbs(), 'Should have built breadcrumbs from replaced names.' );
+	}
+
+	/**
+	 * Ensures pending class updates are flushed for any case spelling of
+	 * the "class" attribute name, since attribute names are matched
+	 * ASCII-case-insensitively.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_attribute
+	 */
+	public function test_get_attribute_flushes_class_updates_case_insensitively() {
+		$processor = new WP_HTML_Tag_Processor( '<div class="a">' );
+
+		$this->assertTrue( $processor->next_tag(), 'Should have found the tag.' );
+		$this->assertTrue( $processor->add_class( 'b' ), 'Should have enqueued the class addition.' );
+		$this->assertSame( 'a b', $processor->get_attribute( 'CLASS' ), 'Should have included pending class updates for an uppercase lookup.' );
+	}
+
+	/**
+	 * Ensures numeric character references for U+0000 decode to U+FFFD in text.
+	 *
+	 * @ticket 65372
+	 *
+	 * @covers ::get_modifiable_text
+	 */
+	public function test_encoded_null_in_text_node_decodes_to_replacement_character() {
+		$processor = new WP_HTML_Tag_Processor( 'a&#0;b' );
+
+		$this->assertTrue( $processor->next_token(), 'Should have found the text node.' );
+		$this->assertSame( "a\u{FFFD}b", $processor->get_modifiable_text() );
+	}
+}