diff --git a/ext/html-api-rust/.gitignore b/ext/html-api-rust/.gitignore new file mode 100644 index 0000000000000..2e4f288caf9a1 --- /dev/null +++ b/ext/html-api-rust/.gitignore @@ -0,0 +1,32 @@ +/target/ +/wasm/dist/ +/autom4te.cache/ +/build/ +/modules/ +/.libs/ +/Makefile +/Makefile.fragments +/Makefile.objects +/Makefile.global +/acinclude.m4 +/aclocal.m4 +/config.cache +/config.guess +/config.h +/config.h.in +/config.log +/config.nice +/config.status +/config.sub +/configure +/configure~ +/install-sh +/libtool +/ltmain.sh +/missing +/mkinstalldirs +/run-tests.php +/tmp-php.ini +/*.dep +/*.la +/*.lo diff --git a/ext/html-api-rust/Cargo.lock b/ext/html-api-rust/Cargo.lock new file mode 100644 index 0000000000000..13b8d48675c6a --- /dev/null +++ b/ext/html-api-rust/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "wp-html-api-rust-core" +version = "0.1.0" diff --git a/ext/html-api-rust/Cargo.toml b/ext/html-api-rust/Cargo.toml new file mode 100644 index 0000000000000..f5ba622fc6e8e --- /dev/null +++ b/ext/html-api-rust/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "wp-html-api-rust-core" +version = "0.1.0" +edition = "2021" +publish = false + +[lib] +name = "wp_html_api_rust_core" +crate-type = ["staticlib", "cdylib"] + +[profile.release] +lto = true +codegen-units = 1 +panic = "abort" diff --git a/ext/html-api-rust/README.md b/ext/html-api-rust/README.md new file mode 100644 index 0000000000000..457d8c7cd1512 --- /dev/null +++ b/ext/html-api-rust/README.md @@ -0,0 +1,72 @@ +# WordPress HTML API Rust Extension + +This directory contains the native Rust core and PHP extension shim for the +incremental Rust implementation of the WordPress HTML API. + +Build locally with: + +```sh +cd ext/html-api-rust +sh build.sh +``` + +Smoke-test the built extension with: + +```sh +php -d extension="$(pwd)/modules/wp_html_api_rust.so" \ + -r 'var_dump(wp_html_api_rust_version(), wp_html_api_rust_scan_next_tag("

Hi

"));' +``` + +Build the WebAssembly module and JavaScript API wrapper with: + +```sh +cd ext/html-api-rust +npm --prefix wasm run build +npm --prefix wasm run test:all +``` + +Regenerate the Rust HTML5 named-character-reference table with: + +```sh +node scripts/generate-html5-named-character-references.mjs +``` + +The JavaScript wrapper is an ES module in `wasm/wp-html-api-rust.js`. It directly +exports `loadWasm()`, `createHtmlApi()`, `WP_HTML_Span`, +`WP_HTML_Text_Replacement`, `WP_HTML_Attribute_Token`, `WP_HTML_Token`, +`WP_HTML_Stack_Event`, `WP_HTML_Active_Formatting_Elements`, +`WP_HTML_Open_Elements`, `WP_HTML_Processor_State`, +`WP_HTML_Unsupported_Exception`, and `WP_HTML_Doctype_Info`. + +Call `loadWasm()` to instantiate the WASM module and receive the WASM-bound API: +`WP_HTML_Decoder`, `WP_HTML_Tag_Processor`, `WP_HTML_Processor`, +`scanNextTag()`, `version()`, the support classes, and the raw `wasm` exports. +The tag processor methods call the same Rust core used by the PHP extension. The +JavaScript API surface mirrors the public WordPress HTML API classes with +JavaScript naming, including processor factory/static helpers, constants, +bookmark methods, doctype parsing, serialization helpers, and inherited +tag-processor methods. + +```js +import { loadWasm } from "./wasm/wp-html-api-rust.js"; + +const { WP_HTML_Processor } = await loadWasm(); +const processor = WP_HTML_Processor.create_fragment("

Hello

"); +processor.next_tag("p"); +console.log(processor.get_breadcrumbs()); +processor.destroy(); +``` + +`loadWasm()` accepts the default bundled WASM URL, a path or URL string, +`URL`, `Request`, `Response`, `Blob`, `ArrayBuffer`, typed array/DataView, +`WebAssembly.Module`, `WebAssembly.Instance`, raw `WebAssembly.Exports`, or an +instantiated source object returned by `WebAssembly.instantiate()`. + +The processor layer adds JavaScript-side open-element stack tracking for HTML +breadcrumbs, breadcrumb queries, void-element handling, namespaces, scoped end +tags, frameset handling, implied closures, foster parenting, and adoption-agency +reconstruction. The html5lib tree-construction harness runs with zero +unsupported cases, aside from the known WordPress duplicate shell-attribute +skips. `WP_HTML_Unsupported_Exception` remains part of the public API for +guarded parser states, such as tentative encoding detection from unsupported +META tags. diff --git a/ext/html-api-rust/build-wasm.sh b/ext/html-api-rust/build-wasm.sh new file mode 100755 index 0000000000000..0a863dbdf750e --- /dev/null +++ b/ext/html-api-rust/build-wasm.sh @@ -0,0 +1,12 @@ +#!/bin/sh +set -eu + +cd "$(dirname "$0")" + +cargo build --target wasm32-unknown-unknown --release --lib + +mkdir -p wasm/dist +wasm_file="target/wasm32-unknown-unknown/release/wp_html_api_rust_core.wasm" +cp "$wasm_file" wasm/dist/wp_html_api_rust_core.wasm + +echo "WASM build complete: ext/html-api-rust/wasm/dist/wp_html_api_rust_core.wasm" diff --git a/ext/html-api-rust/build.sh b/ext/html-api-rust/build.sh new file mode 100644 index 0000000000000..4de3764dabfe8 --- /dev/null +++ b/ext/html-api-rust/build.sh @@ -0,0 +1,9 @@ +#!/bin/sh +set -eu + +PHP_CONFIG_BIN="${PHP_CONFIG:-php-config}" + +cargo build --release +phpize +./configure --enable-wp-html-api-rust --with-php-config="${PHP_CONFIG_BIN}" +make diff --git a/ext/html-api-rust/config.m4 b/ext/html-api-rust/config.m4 new file mode 100644 index 0000000000000..911511934b22c --- /dev/null +++ b/ext/html-api-rust/config.m4 @@ -0,0 +1,13 @@ +PHP_ARG_ENABLE( + [wp-html-api-rust], + [whether to enable the WordPress HTML API Rust extension], + [AS_HELP_STRING([--enable-wp-html-api-rust], [Enable WordPress HTML API Rust extension])], + [no] +) + +if test "$PHP_WP_HTML_API_RUST" != "no"; then + PHP_SUBST(WP_HTML_API_RUST_SHARED_LIBADD) + PHP_ADD_LIBRARY_WITH_PATH(wp_html_api_rust_core, $abs_srcdir/target/release, WP_HTML_API_RUST_SHARED_LIBADD) + + PHP_NEW_EXTENSION([wp_html_api_rust], [wp_html_api_rust.c], [$ext_shared]) +fi diff --git a/ext/html-api-rust/configure.ac b/ext/html-api-rust/configure.ac new file mode 100644 index 0000000000000..be7065b786d96 --- /dev/null +++ b/ext/html-api-rust/configure.ac @@ -0,0 +1,200 @@ +dnl This file becomes configure.ac for self-contained extensions. + +dnl Include external macro definitions before the AC_INIT to also remove +dnl comments starting with # and empty newlines from the included files. +m4_include([build/ax_check_compile_flag.m4]) +m4_include([build/ax_gcc_func_attribute.m4]) +m4_include([build/libtool.m4]) +m4_include([build/php_cxx_compile_stdcxx.m4]) +m4_include([build/php.m4]) +m4_include([build/pkg.m4]) + +AC_PREREQ([2.68]) +AC_INIT +AC_CONFIG_SRCDIR([config.m4]) +AC_CONFIG_AUX_DIR([build]) +AC_PRESERVE_HELP_ORDER + +PHP_CONFIG_NICE([config.nice]) + +AC_DEFUN([PHP_EXT_BUILDDIR],[.])dnl +AC_DEFUN([PHP_EXT_DIR],[""])dnl +AC_DEFUN([PHP_EXT_SRCDIR],[$abs_srcdir])dnl +AC_DEFUN([PHP_ALWAYS_SHARED],[ + ext_output="yes, shared" + ext_shared=yes + test "[$]$1" = "no" && $1=yes +])dnl + +PHP_INIT_BUILD_SYSTEM + +PKG_PROG_PKG_CONFIG +AC_PROG_CC([cc gcc]) +PHP_DETECT_ICC +PHP_DETECT_SUNCC + +dnl Support systems with system libraries in e.g. /usr/lib64. +PHP_ARG_WITH([libdir], + [for system library directory], + [AS_HELP_STRING([--with-libdir=NAME], + [Look for libraries in .../NAME rather than .../lib])], + [lib], + [no]) + +PHP_RUNPATH_SWITCH +PHP_SHLIB_SUFFIX_NAMES + +dnl Find php-config script. +PHP_ARG_WITH([php-config],, + [AS_HELP_STRING([--with-php-config=PATH], + [Path to php-config [php-config]])], + [php-config], + [no]) + +dnl For BC. +PHP_CONFIG=$PHP_PHP_CONFIG +prefix=$($PHP_CONFIG --prefix 2>/dev/null) +phpincludedir=$($PHP_CONFIG --include-dir 2>/dev/null) +INCLUDES=$($PHP_CONFIG --includes 2>/dev/null) +EXTENSION_DIR=$($PHP_CONFIG --extension-dir 2>/dev/null) +PHP_EXECUTABLE=$($PHP_CONFIG --php-binary 2>/dev/null) + +AS_VAR_IF([prefix],, + [AC_MSG_ERROR([Cannot find php-config. Please use --with-php-config=PATH])]) + +AC_MSG_CHECKING([for PHP prefix]) +AC_MSG_RESULT([$prefix]) +AC_MSG_CHECKING([for PHP includes]) +AC_MSG_RESULT([$INCLUDES]) +AC_MSG_CHECKING([for PHP extension directory]) +AC_MSG_RESULT([$EXTENSION_DIR]) +AC_MSG_CHECKING([for PHP installed headers prefix]) +AC_MSG_RESULT([$phpincludedir]) + +dnl Checks for PHP_DEBUG / ZEND_DEBUG / ZTS. +AC_MSG_CHECKING([if debugging is enabled]) +old_CPPFLAGS=$CPPFLAGS +CPPFLAGS="-I$phpincludedir" +AC_EGREP_CPP([php_debug_is_enabled], [ +#include
+#if ZEND_DEBUG +php_debug_is_enabled +#endif +], + [PHP_DEBUG=yes], + [PHP_DEBUG=no]) +CPPFLAGS=$old_CPPFLAGS +AC_MSG_RESULT([$PHP_DEBUG]) + +AC_MSG_CHECKING([if PHP is built with thread safety (ZTS)]) +old_CPPFLAGS=$CPPFLAGS +CPPFLAGS="-I$phpincludedir" +AC_EGREP_CPP([php_zts_is_enabled], [ +#include
+#ifdef ZTS +php_zts_is_enabled +#endif +], + [PHP_THREAD_SAFETY=yes], + [PHP_THREAD_SAFETY=no]) +CPPFLAGS=$old_CPPFLAGS +AC_MSG_RESULT([$PHP_THREAD_SAFETY]) + +dnl Discard optimization flags when debugging is enabled. +AS_VAR_IF([PHP_DEBUG], [yes], [ + PHP_DEBUG=1 + ZEND_DEBUG=yes + PHP_REMOVE_OPTIMIZATION_FLAGS + dnl Add -O0 only if GCC or ICC is used. + if test "$GCC" = "yes" || test "$ICC" = "yes"; then + CFLAGS="$CFLAGS -O0" + CXXFLAGS="$CXXFLAGS -g -O0" + fi + if test "$SUNCC" = "yes"; then + if test -n "$auto_cflags"; then + CFLAGS="-g" + CXXFLAGS="-g" + else + CFLAGS="$CFLAGS -g" + CXXFLAGS="$CFLAGS -g" + fi + fi +], [ + PHP_DEBUG=0 + ZEND_DEBUG=no +]) + +dnl Always shared. +PHP_BUILD_SHARED + +PHP_HELP_SEPARATOR([Extension:]) +PHP_CONFIGURE_PART([Configuring extension]) + +sinclude(config.m4) + +enable_static=no +enable_shared=yes + +PHP_HELP_SEPARATOR([Libtool:]) +PHP_CONFIGURE_PART([Configuring libtool]) + +dnl Only allow AC_PROG_CXX and AC_PROG_CXXCPP if they are explicitly called (by +dnl PHP_REQUIRE_CXX). Otherwise AC_PROG_LIBTOOL fails if there is no working C++ +dnl compiler. +AC_PROVIDE_IFELSE([PHP_REQUIRE_CXX], [], [ + undefine([AC_PROG_CXX]) + AC_DEFUN([AC_PROG_CXX], []) + undefine([AC_PROG_CXXCPP]) + AC_DEFUN([AC_PROG_CXXCPP], [php_prog_cxxcpp=disabled]) +]) +AC_PROG_LIBTOOL + +all_targets='$(PHP_MODULES) $(PHP_ZEND_EX)' +install_targets="install-modules install-headers" +CPPFLAGS="$CPPFLAGS -DHAVE_CONFIG_H" +CFLAGS_CLEAN='$(CFLAGS) -D_GNU_SOURCE' +CXXFLAGS_CLEAN='$(CXXFLAGS)' + +AS_VAR_IF([prefix], [NONE], [prefix=/usr/local]) +AS_VAR_IF([exec_prefix], [NONE], [exec_prefix='$(prefix)']) + +AS_VAR_IF([cross_compiling], [yes], + [AC_CHECK_PROGS([BUILD_CC], [gcc clang c99 c89 cc cl], [none]) + AC_MSG_CHECKING([for native build C compiler]) + AC_MSG_RESULT([$BUILD_CC])], + [BUILD_CC=$CC]) + +PHP_SUBST([PHP_MODULES]) +PHP_SUBST([PHP_ZEND_EX]) +PHP_SUBST([all_targets]) +PHP_SUBST([install_targets]) +PHP_SUBST([prefix]) +PHP_SUBST([exec_prefix]) +PHP_SUBST([libdir]) +PHP_SUBST([phpincludedir]) +PHP_SUBST([CC]) +PHP_SUBST([CFLAGS]) +PHP_SUBST([CFLAGS_CLEAN]) +PHP_SUBST([CPP]) +PHP_SUBST([CPPFLAGS]) +PHP_SUBST([CXX]) +PHP_SUBST([CXXFLAGS]) +PHP_SUBST([CXXFLAGS_CLEAN]) +PHP_SUBST([EXTENSION_DIR]) +PHP_SUBST([PHP_EXECUTABLE]) +PHP_SUBST([EXTRA_LDFLAGS]) +PHP_SUBST([EXTRA_LIBS]) +PHP_SUBST([INCLUDES]) +PHP_SUBST([LDFLAGS]) +PHP_SUBST([LIBTOOL]) +PHP_SUBST([SHELL]) +PHP_SUBST([INSTALL_HEADERS]) +PHP_SUBST([BUILD_CC]) + +PHP_CONFIGURE_PART([Generating files]) + +AC_CONFIG_HEADERS([config.h]) + +AC_CONFIG_COMMANDS_PRE([PHP_PATCH_CONFIG_HEADERS([config.h.in])]) + +AC_OUTPUT diff --git a/ext/html-api-rust/php_wp_html_api_rust.h b/ext/html-api-rust/php_wp_html_api_rust.h new file mode 100644 index 0000000000000..bd56b4b28fe4f --- /dev/null +++ b/ext/html-api-rust/php_wp_html_api_rust.h @@ -0,0 +1,9 @@ +#ifndef PHP_WP_HTML_API_RUST_H +#define PHP_WP_HTML_API_RUST_H + +extern zend_module_entry wp_html_api_rust_module_entry; +#define phpext_wp_html_api_rust_ptr &wp_html_api_rust_module_entry + +#define PHP_WP_HTML_API_RUST_VERSION "0.1.0" + +#endif diff --git a/ext/html-api-rust/scripts/generate-html5-named-character-references.mjs b/ext/html-api-rust/scripts/generate-html5-named-character-references.mjs new file mode 100644 index 0000000000000..c22b277d95088 --- /dev/null +++ b/ext/html-api-rust/scripts/generate-html5-named-character-references.mjs @@ -0,0 +1,55 @@ +#!/usr/bin/env node + +import { readFile, writeFile } from "node:fs/promises"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +const scriptDir = path.dirname(fileURLToPath(import.meta.url)); +const entitiesPath = path.resolve(scriptDir, "../../../tests/phpunit/data/html5-entities/entities.json"); +const outputPath = path.resolve(scriptDir, "../src/html5_named_character_references.rs"); + +const entities = JSON.parse(await readFile(entitiesPath, "utf8")); + +function byteStringLiteral(value) { + for (const byte of Buffer.from(value, "utf8")) { + if (byte < 0x20 || byte > 0x7e) { + throw new Error(`Expected ASCII entity name, got ${JSON.stringify(value)}`); + } + } + + return `b"${value.replaceAll("\\", "\\\\").replaceAll('"', '\\"')}"`; +} + +function unicodeEscape(codePoint) { + return `\\u{${codePoint.toString(16).toUpperCase()}}`; +} + +function characterReferenceExpression(characters) { + const codePoints = Array.from(characters, (character) => character.codePointAt(0)); + + if (codePoints.length === 1) { + return `CharacterReference::Scalar('${unicodeEscape(codePoints[0])}')`; + } + + return `CharacterReference::Text("${codePoints.map(unicodeEscape).join("")}")`; +} + +const entries = Object.entries(entities).map(([reference, { characters }]) => { + if (!reference.startsWith("&")) { + throw new Error(`Expected entity reference to start with &: ${reference}`); + } + + return ` (${byteStringLiteral(reference.slice(1))}, ${characterReferenceExpression(characters)}),`; +}); + +const output = `// Generated by scripts/generate-html5-named-character-references.mjs from +// tests/phpunit/data/html5-entities/entities.json. Do not edit manually. + +use super::CharacterReference; + +pub(super) const NAMED_CHARACTER_REFERENCES: &[(&[u8], CharacterReference)] = &[ +${entries.join("\n")} +]; +`; + +await writeFile(outputPath, output); diff --git a/ext/html-api-rust/src/html5_named_character_references.rs b/ext/html-api-rust/src/html5_named_character_references.rs new file mode 100644 index 0000000000000..2d3c8df8fdb2d --- /dev/null +++ b/ext/html-api-rust/src/html5_named_character_references.rs @@ -0,0 +1,2238 @@ +// Generated by scripts/generate-html5-named-character-references.mjs from +// tests/phpunit/data/html5-entities/entities.json. Do not edit manually. + +use super::CharacterReference; + +pub(super) const NAMED_CHARACTER_REFERENCES: &[(&[u8], CharacterReference)] = &[ + (b"AElig", CharacterReference::Scalar('\u{C6}')), + (b"AElig;", CharacterReference::Scalar('\u{C6}')), + (b"AMP", CharacterReference::Scalar('\u{26}')), + (b"AMP;", CharacterReference::Scalar('\u{26}')), + (b"Aacute", CharacterReference::Scalar('\u{C1}')), + (b"Aacute;", CharacterReference::Scalar('\u{C1}')), + (b"Abreve;", CharacterReference::Scalar('\u{102}')), + (b"Acirc", CharacterReference::Scalar('\u{C2}')), + (b"Acirc;", CharacterReference::Scalar('\u{C2}')), + (b"Acy;", CharacterReference::Scalar('\u{410}')), + (b"Afr;", CharacterReference::Scalar('\u{1D504}')), + (b"Agrave", CharacterReference::Scalar('\u{C0}')), + (b"Agrave;", CharacterReference::Scalar('\u{C0}')), + (b"Alpha;", CharacterReference::Scalar('\u{391}')), + (b"Amacr;", CharacterReference::Scalar('\u{100}')), + (b"And;", CharacterReference::Scalar('\u{2A53}')), + (b"Aogon;", CharacterReference::Scalar('\u{104}')), + (b"Aopf;", CharacterReference::Scalar('\u{1D538}')), + (b"ApplyFunction;", CharacterReference::Scalar('\u{2061}')), + (b"Aring", CharacterReference::Scalar('\u{C5}')), + (b"Aring;", CharacterReference::Scalar('\u{C5}')), + (b"Ascr;", CharacterReference::Scalar('\u{1D49C}')), + (b"Assign;", CharacterReference::Scalar('\u{2254}')), + (b"Atilde", CharacterReference::Scalar('\u{C3}')), + (b"Atilde;", CharacterReference::Scalar('\u{C3}')), + (b"Auml", CharacterReference::Scalar('\u{C4}')), + (b"Auml;", CharacterReference::Scalar('\u{C4}')), + (b"Backslash;", CharacterReference::Scalar('\u{2216}')), + (b"Barv;", CharacterReference::Scalar('\u{2AE7}')), + (b"Barwed;", CharacterReference::Scalar('\u{2306}')), + (b"Bcy;", CharacterReference::Scalar('\u{411}')), + (b"Because;", CharacterReference::Scalar('\u{2235}')), + (b"Bernoullis;", CharacterReference::Scalar('\u{212C}')), + (b"Beta;", CharacterReference::Scalar('\u{392}')), + (b"Bfr;", CharacterReference::Scalar('\u{1D505}')), + (b"Bopf;", CharacterReference::Scalar('\u{1D539}')), + (b"Breve;", CharacterReference::Scalar('\u{2D8}')), + (b"Bscr;", CharacterReference::Scalar('\u{212C}')), + (b"Bumpeq;", CharacterReference::Scalar('\u{224E}')), + (b"CHcy;", CharacterReference::Scalar('\u{427}')), + (b"COPY", CharacterReference::Scalar('\u{A9}')), + (b"COPY;", CharacterReference::Scalar('\u{A9}')), + (b"Cacute;", CharacterReference::Scalar('\u{106}')), + (b"Cap;", CharacterReference::Scalar('\u{22D2}')), + (b"CapitalDifferentialD;", CharacterReference::Scalar('\u{2145}')), + (b"Cayleys;", CharacterReference::Scalar('\u{212D}')), + (b"Ccaron;", CharacterReference::Scalar('\u{10C}')), + (b"Ccedil", CharacterReference::Scalar('\u{C7}')), + (b"Ccedil;", CharacterReference::Scalar('\u{C7}')), + (b"Ccirc;", CharacterReference::Scalar('\u{108}')), + (b"Cconint;", CharacterReference::Scalar('\u{2230}')), + (b"Cdot;", CharacterReference::Scalar('\u{10A}')), + (b"Cedilla;", CharacterReference::Scalar('\u{B8}')), + (b"CenterDot;", CharacterReference::Scalar('\u{B7}')), + (b"Cfr;", CharacterReference::Scalar('\u{212D}')), + (b"Chi;", CharacterReference::Scalar('\u{3A7}')), + (b"CircleDot;", CharacterReference::Scalar('\u{2299}')), + (b"CircleMinus;", CharacterReference::Scalar('\u{2296}')), + (b"CirclePlus;", CharacterReference::Scalar('\u{2295}')), + (b"CircleTimes;", CharacterReference::Scalar('\u{2297}')), + (b"ClockwiseContourIntegral;", CharacterReference::Scalar('\u{2232}')), + (b"CloseCurlyDoubleQuote;", CharacterReference::Scalar('\u{201D}')), + (b"CloseCurlyQuote;", CharacterReference::Scalar('\u{2019}')), + (b"Colon;", CharacterReference::Scalar('\u{2237}')), + (b"Colone;", CharacterReference::Scalar('\u{2A74}')), + (b"Congruent;", CharacterReference::Scalar('\u{2261}')), + (b"Conint;", CharacterReference::Scalar('\u{222F}')), + (b"ContourIntegral;", CharacterReference::Scalar('\u{222E}')), + (b"Copf;", CharacterReference::Scalar('\u{2102}')), + (b"Coproduct;", CharacterReference::Scalar('\u{2210}')), + (b"CounterClockwiseContourIntegral;", CharacterReference::Scalar('\u{2233}')), + (b"Cross;", CharacterReference::Scalar('\u{2A2F}')), + (b"Cscr;", CharacterReference::Scalar('\u{1D49E}')), + (b"Cup;", CharacterReference::Scalar('\u{22D3}')), + (b"CupCap;", CharacterReference::Scalar('\u{224D}')), + (b"DD;", CharacterReference::Scalar('\u{2145}')), + (b"DDotrahd;", CharacterReference::Scalar('\u{2911}')), + (b"DJcy;", CharacterReference::Scalar('\u{402}')), + (b"DScy;", CharacterReference::Scalar('\u{405}')), + (b"DZcy;", CharacterReference::Scalar('\u{40F}')), + (b"Dagger;", CharacterReference::Scalar('\u{2021}')), + (b"Darr;", CharacterReference::Scalar('\u{21A1}')), + (b"Dashv;", CharacterReference::Scalar('\u{2AE4}')), + (b"Dcaron;", CharacterReference::Scalar('\u{10E}')), + (b"Dcy;", CharacterReference::Scalar('\u{414}')), + (b"Del;", CharacterReference::Scalar('\u{2207}')), + (b"Delta;", CharacterReference::Scalar('\u{394}')), + (b"Dfr;", CharacterReference::Scalar('\u{1D507}')), + (b"DiacriticalAcute;", CharacterReference::Scalar('\u{B4}')), + (b"DiacriticalDot;", CharacterReference::Scalar('\u{2D9}')), + (b"DiacriticalDoubleAcute;", CharacterReference::Scalar('\u{2DD}')), + (b"DiacriticalGrave;", CharacterReference::Scalar('\u{60}')), + (b"DiacriticalTilde;", CharacterReference::Scalar('\u{2DC}')), + (b"Diamond;", CharacterReference::Scalar('\u{22C4}')), + (b"DifferentialD;", CharacterReference::Scalar('\u{2146}')), + (b"Dopf;", CharacterReference::Scalar('\u{1D53B}')), + (b"Dot;", CharacterReference::Scalar('\u{A8}')), + (b"DotDot;", CharacterReference::Scalar('\u{20DC}')), + (b"DotEqual;", CharacterReference::Scalar('\u{2250}')), + (b"DoubleContourIntegral;", CharacterReference::Scalar('\u{222F}')), + (b"DoubleDot;", CharacterReference::Scalar('\u{A8}')), + (b"DoubleDownArrow;", CharacterReference::Scalar('\u{21D3}')), + (b"DoubleLeftArrow;", CharacterReference::Scalar('\u{21D0}')), + (b"DoubleLeftRightArrow;", CharacterReference::Scalar('\u{21D4}')), + (b"DoubleLeftTee;", CharacterReference::Scalar('\u{2AE4}')), + (b"DoubleLongLeftArrow;", CharacterReference::Scalar('\u{27F8}')), + (b"DoubleLongLeftRightArrow;", CharacterReference::Scalar('\u{27FA}')), + (b"DoubleLongRightArrow;", CharacterReference::Scalar('\u{27F9}')), + (b"DoubleRightArrow;", CharacterReference::Scalar('\u{21D2}')), + (b"DoubleRightTee;", CharacterReference::Scalar('\u{22A8}')), + (b"DoubleUpArrow;", CharacterReference::Scalar('\u{21D1}')), + (b"DoubleUpDownArrow;", CharacterReference::Scalar('\u{21D5}')), + (b"DoubleVerticalBar;", CharacterReference::Scalar('\u{2225}')), + (b"DownArrow;", CharacterReference::Scalar('\u{2193}')), + (b"DownArrowBar;", CharacterReference::Scalar('\u{2913}')), + (b"DownArrowUpArrow;", CharacterReference::Scalar('\u{21F5}')), + (b"DownBreve;", CharacterReference::Scalar('\u{311}')), + (b"DownLeftRightVector;", CharacterReference::Scalar('\u{2950}')), + (b"DownLeftTeeVector;", CharacterReference::Scalar('\u{295E}')), + (b"DownLeftVector;", CharacterReference::Scalar('\u{21BD}')), + (b"DownLeftVectorBar;", CharacterReference::Scalar('\u{2956}')), + (b"DownRightTeeVector;", CharacterReference::Scalar('\u{295F}')), + (b"DownRightVector;", CharacterReference::Scalar('\u{21C1}')), + (b"DownRightVectorBar;", CharacterReference::Scalar('\u{2957}')), + (b"DownTee;", CharacterReference::Scalar('\u{22A4}')), + (b"DownTeeArrow;", CharacterReference::Scalar('\u{21A7}')), + (b"Downarrow;", CharacterReference::Scalar('\u{21D3}')), + (b"Dscr;", CharacterReference::Scalar('\u{1D49F}')), + (b"Dstrok;", CharacterReference::Scalar('\u{110}')), + (b"ENG;", CharacterReference::Scalar('\u{14A}')), + (b"ETH", CharacterReference::Scalar('\u{D0}')), + (b"ETH;", CharacterReference::Scalar('\u{D0}')), + (b"Eacute", CharacterReference::Scalar('\u{C9}')), + (b"Eacute;", CharacterReference::Scalar('\u{C9}')), + (b"Ecaron;", CharacterReference::Scalar('\u{11A}')), + (b"Ecirc", CharacterReference::Scalar('\u{CA}')), + (b"Ecirc;", CharacterReference::Scalar('\u{CA}')), + (b"Ecy;", CharacterReference::Scalar('\u{42D}')), + (b"Edot;", CharacterReference::Scalar('\u{116}')), + (b"Efr;", CharacterReference::Scalar('\u{1D508}')), + (b"Egrave", CharacterReference::Scalar('\u{C8}')), + (b"Egrave;", CharacterReference::Scalar('\u{C8}')), + (b"Element;", CharacterReference::Scalar('\u{2208}')), + (b"Emacr;", CharacterReference::Scalar('\u{112}')), + (b"EmptySmallSquare;", CharacterReference::Scalar('\u{25FB}')), + (b"EmptyVerySmallSquare;", CharacterReference::Scalar('\u{25AB}')), + (b"Eogon;", CharacterReference::Scalar('\u{118}')), + (b"Eopf;", CharacterReference::Scalar('\u{1D53C}')), + (b"Epsilon;", CharacterReference::Scalar('\u{395}')), + (b"Equal;", CharacterReference::Scalar('\u{2A75}')), + (b"EqualTilde;", CharacterReference::Scalar('\u{2242}')), + (b"Equilibrium;", CharacterReference::Scalar('\u{21CC}')), + (b"Escr;", CharacterReference::Scalar('\u{2130}')), + (b"Esim;", CharacterReference::Scalar('\u{2A73}')), + (b"Eta;", CharacterReference::Scalar('\u{397}')), + (b"Euml", CharacterReference::Scalar('\u{CB}')), + (b"Euml;", CharacterReference::Scalar('\u{CB}')), + (b"Exists;", CharacterReference::Scalar('\u{2203}')), + (b"ExponentialE;", CharacterReference::Scalar('\u{2147}')), + (b"Fcy;", CharacterReference::Scalar('\u{424}')), + (b"Ffr;", CharacterReference::Scalar('\u{1D509}')), + (b"FilledSmallSquare;", CharacterReference::Scalar('\u{25FC}')), + (b"FilledVerySmallSquare;", CharacterReference::Scalar('\u{25AA}')), + (b"Fopf;", CharacterReference::Scalar('\u{1D53D}')), + (b"ForAll;", CharacterReference::Scalar('\u{2200}')), + (b"Fouriertrf;", CharacterReference::Scalar('\u{2131}')), + (b"Fscr;", CharacterReference::Scalar('\u{2131}')), + (b"GJcy;", CharacterReference::Scalar('\u{403}')), + (b"GT", CharacterReference::Scalar('\u{3E}')), + (b"GT;", CharacterReference::Scalar('\u{3E}')), + (b"Gamma;", CharacterReference::Scalar('\u{393}')), + (b"Gammad;", CharacterReference::Scalar('\u{3DC}')), + (b"Gbreve;", CharacterReference::Scalar('\u{11E}')), + (b"Gcedil;", CharacterReference::Scalar('\u{122}')), + (b"Gcirc;", CharacterReference::Scalar('\u{11C}')), + (b"Gcy;", CharacterReference::Scalar('\u{413}')), + (b"Gdot;", CharacterReference::Scalar('\u{120}')), + (b"Gfr;", CharacterReference::Scalar('\u{1D50A}')), + (b"Gg;", CharacterReference::Scalar('\u{22D9}')), + (b"Gopf;", CharacterReference::Scalar('\u{1D53E}')), + (b"GreaterEqual;", CharacterReference::Scalar('\u{2265}')), + (b"GreaterEqualLess;", CharacterReference::Scalar('\u{22DB}')), + (b"GreaterFullEqual;", CharacterReference::Scalar('\u{2267}')), + (b"GreaterGreater;", CharacterReference::Scalar('\u{2AA2}')), + (b"GreaterLess;", CharacterReference::Scalar('\u{2277}')), + (b"GreaterSlantEqual;", CharacterReference::Scalar('\u{2A7E}')), + (b"GreaterTilde;", CharacterReference::Scalar('\u{2273}')), + (b"Gscr;", CharacterReference::Scalar('\u{1D4A2}')), + (b"Gt;", CharacterReference::Scalar('\u{226B}')), + (b"HARDcy;", CharacterReference::Scalar('\u{42A}')), + (b"Hacek;", CharacterReference::Scalar('\u{2C7}')), + (b"Hat;", CharacterReference::Scalar('\u{5E}')), + (b"Hcirc;", CharacterReference::Scalar('\u{124}')), + (b"Hfr;", CharacterReference::Scalar('\u{210C}')), + (b"HilbertSpace;", CharacterReference::Scalar('\u{210B}')), + (b"Hopf;", CharacterReference::Scalar('\u{210D}')), + (b"HorizontalLine;", CharacterReference::Scalar('\u{2500}')), + (b"Hscr;", CharacterReference::Scalar('\u{210B}')), + (b"Hstrok;", CharacterReference::Scalar('\u{126}')), + (b"HumpDownHump;", CharacterReference::Scalar('\u{224E}')), + (b"HumpEqual;", CharacterReference::Scalar('\u{224F}')), + (b"IEcy;", CharacterReference::Scalar('\u{415}')), + (b"IJlig;", CharacterReference::Scalar('\u{132}')), + (b"IOcy;", CharacterReference::Scalar('\u{401}')), + (b"Iacute", CharacterReference::Scalar('\u{CD}')), + (b"Iacute;", CharacterReference::Scalar('\u{CD}')), + (b"Icirc", CharacterReference::Scalar('\u{CE}')), + (b"Icirc;", CharacterReference::Scalar('\u{CE}')), + (b"Icy;", CharacterReference::Scalar('\u{418}')), + (b"Idot;", CharacterReference::Scalar('\u{130}')), + (b"Ifr;", CharacterReference::Scalar('\u{2111}')), + (b"Igrave", CharacterReference::Scalar('\u{CC}')), + (b"Igrave;", CharacterReference::Scalar('\u{CC}')), + (b"Im;", CharacterReference::Scalar('\u{2111}')), + (b"Imacr;", CharacterReference::Scalar('\u{12A}')), + (b"ImaginaryI;", CharacterReference::Scalar('\u{2148}')), + (b"Implies;", CharacterReference::Scalar('\u{21D2}')), + (b"Int;", CharacterReference::Scalar('\u{222C}')), + (b"Integral;", CharacterReference::Scalar('\u{222B}')), + (b"Intersection;", CharacterReference::Scalar('\u{22C2}')), + (b"InvisibleComma;", CharacterReference::Scalar('\u{2063}')), + (b"InvisibleTimes;", CharacterReference::Scalar('\u{2062}')), + (b"Iogon;", CharacterReference::Scalar('\u{12E}')), + (b"Iopf;", CharacterReference::Scalar('\u{1D540}')), + (b"Iota;", CharacterReference::Scalar('\u{399}')), + (b"Iscr;", CharacterReference::Scalar('\u{2110}')), + (b"Itilde;", CharacterReference::Scalar('\u{128}')), + (b"Iukcy;", CharacterReference::Scalar('\u{406}')), + (b"Iuml", CharacterReference::Scalar('\u{CF}')), + (b"Iuml;", CharacterReference::Scalar('\u{CF}')), + (b"Jcirc;", CharacterReference::Scalar('\u{134}')), + (b"Jcy;", CharacterReference::Scalar('\u{419}')), + (b"Jfr;", CharacterReference::Scalar('\u{1D50D}')), + (b"Jopf;", CharacterReference::Scalar('\u{1D541}')), + (b"Jscr;", CharacterReference::Scalar('\u{1D4A5}')), + (b"Jsercy;", CharacterReference::Scalar('\u{408}')), + (b"Jukcy;", CharacterReference::Scalar('\u{404}')), + (b"KHcy;", CharacterReference::Scalar('\u{425}')), + (b"KJcy;", CharacterReference::Scalar('\u{40C}')), + (b"Kappa;", CharacterReference::Scalar('\u{39A}')), + (b"Kcedil;", CharacterReference::Scalar('\u{136}')), + (b"Kcy;", CharacterReference::Scalar('\u{41A}')), + (b"Kfr;", CharacterReference::Scalar('\u{1D50E}')), + (b"Kopf;", CharacterReference::Scalar('\u{1D542}')), + (b"Kscr;", CharacterReference::Scalar('\u{1D4A6}')), + (b"LJcy;", CharacterReference::Scalar('\u{409}')), + (b"LT", CharacterReference::Scalar('\u{3C}')), + (b"LT;", CharacterReference::Scalar('\u{3C}')), + (b"Lacute;", CharacterReference::Scalar('\u{139}')), + (b"Lambda;", CharacterReference::Scalar('\u{39B}')), + (b"Lang;", CharacterReference::Scalar('\u{27EA}')), + (b"Laplacetrf;", CharacterReference::Scalar('\u{2112}')), + (b"Larr;", CharacterReference::Scalar('\u{219E}')), + (b"Lcaron;", CharacterReference::Scalar('\u{13D}')), + (b"Lcedil;", CharacterReference::Scalar('\u{13B}')), + (b"Lcy;", CharacterReference::Scalar('\u{41B}')), + (b"LeftAngleBracket;", CharacterReference::Scalar('\u{27E8}')), + (b"LeftArrow;", CharacterReference::Scalar('\u{2190}')), + (b"LeftArrowBar;", CharacterReference::Scalar('\u{21E4}')), + (b"LeftArrowRightArrow;", CharacterReference::Scalar('\u{21C6}')), + (b"LeftCeiling;", CharacterReference::Scalar('\u{2308}')), + (b"LeftDoubleBracket;", CharacterReference::Scalar('\u{27E6}')), + (b"LeftDownTeeVector;", CharacterReference::Scalar('\u{2961}')), + (b"LeftDownVector;", CharacterReference::Scalar('\u{21C3}')), + (b"LeftDownVectorBar;", CharacterReference::Scalar('\u{2959}')), + (b"LeftFloor;", CharacterReference::Scalar('\u{230A}')), + (b"LeftRightArrow;", CharacterReference::Scalar('\u{2194}')), + (b"LeftRightVector;", CharacterReference::Scalar('\u{294E}')), + (b"LeftTee;", CharacterReference::Scalar('\u{22A3}')), + (b"LeftTeeArrow;", CharacterReference::Scalar('\u{21A4}')), + (b"LeftTeeVector;", CharacterReference::Scalar('\u{295A}')), + (b"LeftTriangle;", CharacterReference::Scalar('\u{22B2}')), + (b"LeftTriangleBar;", CharacterReference::Scalar('\u{29CF}')), + (b"LeftTriangleEqual;", CharacterReference::Scalar('\u{22B4}')), + (b"LeftUpDownVector;", CharacterReference::Scalar('\u{2951}')), + (b"LeftUpTeeVector;", CharacterReference::Scalar('\u{2960}')), + (b"LeftUpVector;", CharacterReference::Scalar('\u{21BF}')), + (b"LeftUpVectorBar;", CharacterReference::Scalar('\u{2958}')), + (b"LeftVector;", CharacterReference::Scalar('\u{21BC}')), + (b"LeftVectorBar;", CharacterReference::Scalar('\u{2952}')), + (b"Leftarrow;", CharacterReference::Scalar('\u{21D0}')), + (b"Leftrightarrow;", CharacterReference::Scalar('\u{21D4}')), + (b"LessEqualGreater;", CharacterReference::Scalar('\u{22DA}')), + (b"LessFullEqual;", CharacterReference::Scalar('\u{2266}')), + (b"LessGreater;", CharacterReference::Scalar('\u{2276}')), + (b"LessLess;", CharacterReference::Scalar('\u{2AA1}')), + (b"LessSlantEqual;", CharacterReference::Scalar('\u{2A7D}')), + (b"LessTilde;", CharacterReference::Scalar('\u{2272}')), + (b"Lfr;", CharacterReference::Scalar('\u{1D50F}')), + (b"Ll;", CharacterReference::Scalar('\u{22D8}')), + (b"Lleftarrow;", CharacterReference::Scalar('\u{21DA}')), + (b"Lmidot;", CharacterReference::Scalar('\u{13F}')), + (b"LongLeftArrow;", CharacterReference::Scalar('\u{27F5}')), + (b"LongLeftRightArrow;", CharacterReference::Scalar('\u{27F7}')), + (b"LongRightArrow;", CharacterReference::Scalar('\u{27F6}')), + (b"Longleftarrow;", CharacterReference::Scalar('\u{27F8}')), + (b"Longleftrightarrow;", CharacterReference::Scalar('\u{27FA}')), + (b"Longrightarrow;", CharacterReference::Scalar('\u{27F9}')), + (b"Lopf;", CharacterReference::Scalar('\u{1D543}')), + (b"LowerLeftArrow;", CharacterReference::Scalar('\u{2199}')), + (b"LowerRightArrow;", CharacterReference::Scalar('\u{2198}')), + (b"Lscr;", CharacterReference::Scalar('\u{2112}')), + (b"Lsh;", CharacterReference::Scalar('\u{21B0}')), + (b"Lstrok;", CharacterReference::Scalar('\u{141}')), + (b"Lt;", CharacterReference::Scalar('\u{226A}')), + (b"Map;", CharacterReference::Scalar('\u{2905}')), + (b"Mcy;", CharacterReference::Scalar('\u{41C}')), + (b"MediumSpace;", CharacterReference::Scalar('\u{205F}')), + (b"Mellintrf;", CharacterReference::Scalar('\u{2133}')), + (b"Mfr;", CharacterReference::Scalar('\u{1D510}')), + (b"MinusPlus;", CharacterReference::Scalar('\u{2213}')), + (b"Mopf;", CharacterReference::Scalar('\u{1D544}')), + (b"Mscr;", CharacterReference::Scalar('\u{2133}')), + (b"Mu;", CharacterReference::Scalar('\u{39C}')), + (b"NJcy;", CharacterReference::Scalar('\u{40A}')), + (b"Nacute;", CharacterReference::Scalar('\u{143}')), + (b"Ncaron;", CharacterReference::Scalar('\u{147}')), + (b"Ncedil;", CharacterReference::Scalar('\u{145}')), + (b"Ncy;", CharacterReference::Scalar('\u{41D}')), + (b"NegativeMediumSpace;", CharacterReference::Scalar('\u{200B}')), + (b"NegativeThickSpace;", CharacterReference::Scalar('\u{200B}')), + (b"NegativeThinSpace;", CharacterReference::Scalar('\u{200B}')), + (b"NegativeVeryThinSpace;", CharacterReference::Scalar('\u{200B}')), + (b"NestedGreaterGreater;", CharacterReference::Scalar('\u{226B}')), + (b"NestedLessLess;", CharacterReference::Scalar('\u{226A}')), + (b"NewLine;", CharacterReference::Scalar('\u{A}')), + (b"Nfr;", CharacterReference::Scalar('\u{1D511}')), + (b"NoBreak;", CharacterReference::Scalar('\u{2060}')), + (b"NonBreakingSpace;", CharacterReference::Scalar('\u{A0}')), + (b"Nopf;", CharacterReference::Scalar('\u{2115}')), + (b"Not;", CharacterReference::Scalar('\u{2AEC}')), + (b"NotCongruent;", CharacterReference::Scalar('\u{2262}')), + (b"NotCupCap;", CharacterReference::Scalar('\u{226D}')), + (b"NotDoubleVerticalBar;", CharacterReference::Scalar('\u{2226}')), + (b"NotElement;", CharacterReference::Scalar('\u{2209}')), + (b"NotEqual;", CharacterReference::Scalar('\u{2260}')), + (b"NotEqualTilde;", CharacterReference::Text("\u{2242}\u{338}")), + (b"NotExists;", CharacterReference::Scalar('\u{2204}')), + (b"NotGreater;", CharacterReference::Scalar('\u{226F}')), + (b"NotGreaterEqual;", CharacterReference::Scalar('\u{2271}')), + (b"NotGreaterFullEqual;", CharacterReference::Text("\u{2267}\u{338}")), + (b"NotGreaterGreater;", CharacterReference::Text("\u{226B}\u{338}")), + (b"NotGreaterLess;", CharacterReference::Scalar('\u{2279}')), + (b"NotGreaterSlantEqual;", CharacterReference::Text("\u{2A7E}\u{338}")), + (b"NotGreaterTilde;", CharacterReference::Scalar('\u{2275}')), + (b"NotHumpDownHump;", CharacterReference::Text("\u{224E}\u{338}")), + (b"NotHumpEqual;", CharacterReference::Text("\u{224F}\u{338}")), + (b"NotLeftTriangle;", CharacterReference::Scalar('\u{22EA}')), + (b"NotLeftTriangleBar;", CharacterReference::Text("\u{29CF}\u{338}")), + (b"NotLeftTriangleEqual;", CharacterReference::Scalar('\u{22EC}')), + (b"NotLess;", CharacterReference::Scalar('\u{226E}')), + (b"NotLessEqual;", CharacterReference::Scalar('\u{2270}')), + (b"NotLessGreater;", CharacterReference::Scalar('\u{2278}')), + (b"NotLessLess;", CharacterReference::Text("\u{226A}\u{338}")), + (b"NotLessSlantEqual;", CharacterReference::Text("\u{2A7D}\u{338}")), + (b"NotLessTilde;", CharacterReference::Scalar('\u{2274}')), + (b"NotNestedGreaterGreater;", CharacterReference::Text("\u{2AA2}\u{338}")), + (b"NotNestedLessLess;", CharacterReference::Text("\u{2AA1}\u{338}")), + (b"NotPrecedes;", CharacterReference::Scalar('\u{2280}')), + (b"NotPrecedesEqual;", CharacterReference::Text("\u{2AAF}\u{338}")), + (b"NotPrecedesSlantEqual;", CharacterReference::Scalar('\u{22E0}')), + (b"NotReverseElement;", CharacterReference::Scalar('\u{220C}')), + (b"NotRightTriangle;", CharacterReference::Scalar('\u{22EB}')), + (b"NotRightTriangleBar;", CharacterReference::Text("\u{29D0}\u{338}")), + (b"NotRightTriangleEqual;", CharacterReference::Scalar('\u{22ED}')), + (b"NotSquareSubset;", CharacterReference::Text("\u{228F}\u{338}")), + (b"NotSquareSubsetEqual;", CharacterReference::Scalar('\u{22E2}')), + (b"NotSquareSuperset;", CharacterReference::Text("\u{2290}\u{338}")), + (b"NotSquareSupersetEqual;", CharacterReference::Scalar('\u{22E3}')), + (b"NotSubset;", CharacterReference::Text("\u{2282}\u{20D2}")), + (b"NotSubsetEqual;", CharacterReference::Scalar('\u{2288}')), + (b"NotSucceeds;", CharacterReference::Scalar('\u{2281}')), + (b"NotSucceedsEqual;", CharacterReference::Text("\u{2AB0}\u{338}")), + (b"NotSucceedsSlantEqual;", CharacterReference::Scalar('\u{22E1}')), + (b"NotSucceedsTilde;", CharacterReference::Text("\u{227F}\u{338}")), + (b"NotSuperset;", CharacterReference::Text("\u{2283}\u{20D2}")), + (b"NotSupersetEqual;", CharacterReference::Scalar('\u{2289}')), + (b"NotTilde;", CharacterReference::Scalar('\u{2241}')), + (b"NotTildeEqual;", CharacterReference::Scalar('\u{2244}')), + (b"NotTildeFullEqual;", CharacterReference::Scalar('\u{2247}')), + (b"NotTildeTilde;", CharacterReference::Scalar('\u{2249}')), + (b"NotVerticalBar;", CharacterReference::Scalar('\u{2224}')), + (b"Nscr;", CharacterReference::Scalar('\u{1D4A9}')), + (b"Ntilde", CharacterReference::Scalar('\u{D1}')), + (b"Ntilde;", CharacterReference::Scalar('\u{D1}')), + (b"Nu;", CharacterReference::Scalar('\u{39D}')), + (b"OElig;", CharacterReference::Scalar('\u{152}')), + (b"Oacute", CharacterReference::Scalar('\u{D3}')), + (b"Oacute;", CharacterReference::Scalar('\u{D3}')), + (b"Ocirc", CharacterReference::Scalar('\u{D4}')), + (b"Ocirc;", CharacterReference::Scalar('\u{D4}')), + (b"Ocy;", CharacterReference::Scalar('\u{41E}')), + (b"Odblac;", CharacterReference::Scalar('\u{150}')), + (b"Ofr;", CharacterReference::Scalar('\u{1D512}')), + (b"Ograve", CharacterReference::Scalar('\u{D2}')), + (b"Ograve;", CharacterReference::Scalar('\u{D2}')), + (b"Omacr;", CharacterReference::Scalar('\u{14C}')), + (b"Omega;", CharacterReference::Scalar('\u{3A9}')), + (b"Omicron;", CharacterReference::Scalar('\u{39F}')), + (b"Oopf;", CharacterReference::Scalar('\u{1D546}')), + (b"OpenCurlyDoubleQuote;", CharacterReference::Scalar('\u{201C}')), + (b"OpenCurlyQuote;", CharacterReference::Scalar('\u{2018}')), + (b"Or;", CharacterReference::Scalar('\u{2A54}')), + (b"Oscr;", CharacterReference::Scalar('\u{1D4AA}')), + (b"Oslash", CharacterReference::Scalar('\u{D8}')), + (b"Oslash;", CharacterReference::Scalar('\u{D8}')), + (b"Otilde", CharacterReference::Scalar('\u{D5}')), + (b"Otilde;", CharacterReference::Scalar('\u{D5}')), + (b"Otimes;", CharacterReference::Scalar('\u{2A37}')), + (b"Ouml", CharacterReference::Scalar('\u{D6}')), + (b"Ouml;", CharacterReference::Scalar('\u{D6}')), + (b"OverBar;", CharacterReference::Scalar('\u{203E}')), + (b"OverBrace;", CharacterReference::Scalar('\u{23DE}')), + (b"OverBracket;", CharacterReference::Scalar('\u{23B4}')), + (b"OverParenthesis;", CharacterReference::Scalar('\u{23DC}')), + (b"PartialD;", CharacterReference::Scalar('\u{2202}')), + (b"Pcy;", CharacterReference::Scalar('\u{41F}')), + (b"Pfr;", CharacterReference::Scalar('\u{1D513}')), + (b"Phi;", CharacterReference::Scalar('\u{3A6}')), + (b"Pi;", CharacterReference::Scalar('\u{3A0}')), + (b"PlusMinus;", CharacterReference::Scalar('\u{B1}')), + (b"Poincareplane;", CharacterReference::Scalar('\u{210C}')), + (b"Popf;", CharacterReference::Scalar('\u{2119}')), + (b"Pr;", CharacterReference::Scalar('\u{2ABB}')), + (b"Precedes;", CharacterReference::Scalar('\u{227A}')), + (b"PrecedesEqual;", CharacterReference::Scalar('\u{2AAF}')), + (b"PrecedesSlantEqual;", CharacterReference::Scalar('\u{227C}')), + (b"PrecedesTilde;", CharacterReference::Scalar('\u{227E}')), + (b"Prime;", CharacterReference::Scalar('\u{2033}')), + (b"Product;", CharacterReference::Scalar('\u{220F}')), + (b"Proportion;", CharacterReference::Scalar('\u{2237}')), + (b"Proportional;", CharacterReference::Scalar('\u{221D}')), + (b"Pscr;", CharacterReference::Scalar('\u{1D4AB}')), + (b"Psi;", CharacterReference::Scalar('\u{3A8}')), + (b"QUOT", CharacterReference::Scalar('\u{22}')), + (b"QUOT;", CharacterReference::Scalar('\u{22}')), + (b"Qfr;", CharacterReference::Scalar('\u{1D514}')), + (b"Qopf;", CharacterReference::Scalar('\u{211A}')), + (b"Qscr;", CharacterReference::Scalar('\u{1D4AC}')), + (b"RBarr;", CharacterReference::Scalar('\u{2910}')), + (b"REG", CharacterReference::Scalar('\u{AE}')), + (b"REG;", CharacterReference::Scalar('\u{AE}')), + (b"Racute;", CharacterReference::Scalar('\u{154}')), + (b"Rang;", CharacterReference::Scalar('\u{27EB}')), + (b"Rarr;", CharacterReference::Scalar('\u{21A0}')), + (b"Rarrtl;", CharacterReference::Scalar('\u{2916}')), + (b"Rcaron;", CharacterReference::Scalar('\u{158}')), + (b"Rcedil;", CharacterReference::Scalar('\u{156}')), + (b"Rcy;", CharacterReference::Scalar('\u{420}')), + (b"Re;", CharacterReference::Scalar('\u{211C}')), + (b"ReverseElement;", CharacterReference::Scalar('\u{220B}')), + (b"ReverseEquilibrium;", CharacterReference::Scalar('\u{21CB}')), + (b"ReverseUpEquilibrium;", CharacterReference::Scalar('\u{296F}')), + (b"Rfr;", CharacterReference::Scalar('\u{211C}')), + (b"Rho;", CharacterReference::Scalar('\u{3A1}')), + (b"RightAngleBracket;", CharacterReference::Scalar('\u{27E9}')), + (b"RightArrow;", CharacterReference::Scalar('\u{2192}')), + (b"RightArrowBar;", CharacterReference::Scalar('\u{21E5}')), + (b"RightArrowLeftArrow;", CharacterReference::Scalar('\u{21C4}')), + (b"RightCeiling;", CharacterReference::Scalar('\u{2309}')), + (b"RightDoubleBracket;", CharacterReference::Scalar('\u{27E7}')), + (b"RightDownTeeVector;", CharacterReference::Scalar('\u{295D}')), + (b"RightDownVector;", CharacterReference::Scalar('\u{21C2}')), + (b"RightDownVectorBar;", CharacterReference::Scalar('\u{2955}')), + (b"RightFloor;", CharacterReference::Scalar('\u{230B}')), + (b"RightTee;", CharacterReference::Scalar('\u{22A2}')), + (b"RightTeeArrow;", CharacterReference::Scalar('\u{21A6}')), + (b"RightTeeVector;", CharacterReference::Scalar('\u{295B}')), + (b"RightTriangle;", CharacterReference::Scalar('\u{22B3}')), + (b"RightTriangleBar;", CharacterReference::Scalar('\u{29D0}')), + (b"RightTriangleEqual;", CharacterReference::Scalar('\u{22B5}')), + (b"RightUpDownVector;", CharacterReference::Scalar('\u{294F}')), + (b"RightUpTeeVector;", CharacterReference::Scalar('\u{295C}')), + (b"RightUpVector;", CharacterReference::Scalar('\u{21BE}')), + (b"RightUpVectorBar;", CharacterReference::Scalar('\u{2954}')), + (b"RightVector;", CharacterReference::Scalar('\u{21C0}')), + (b"RightVectorBar;", CharacterReference::Scalar('\u{2953}')), + (b"Rightarrow;", CharacterReference::Scalar('\u{21D2}')), + (b"Ropf;", CharacterReference::Scalar('\u{211D}')), + (b"RoundImplies;", CharacterReference::Scalar('\u{2970}')), + (b"Rrightarrow;", CharacterReference::Scalar('\u{21DB}')), + (b"Rscr;", CharacterReference::Scalar('\u{211B}')), + (b"Rsh;", CharacterReference::Scalar('\u{21B1}')), + (b"RuleDelayed;", CharacterReference::Scalar('\u{29F4}')), + (b"SHCHcy;", CharacterReference::Scalar('\u{429}')), + (b"SHcy;", CharacterReference::Scalar('\u{428}')), + (b"SOFTcy;", CharacterReference::Scalar('\u{42C}')), + (b"Sacute;", CharacterReference::Scalar('\u{15A}')), + (b"Sc;", CharacterReference::Scalar('\u{2ABC}')), + (b"Scaron;", CharacterReference::Scalar('\u{160}')), + (b"Scedil;", CharacterReference::Scalar('\u{15E}')), + (b"Scirc;", CharacterReference::Scalar('\u{15C}')), + (b"Scy;", CharacterReference::Scalar('\u{421}')), + (b"Sfr;", CharacterReference::Scalar('\u{1D516}')), + (b"ShortDownArrow;", CharacterReference::Scalar('\u{2193}')), + (b"ShortLeftArrow;", CharacterReference::Scalar('\u{2190}')), + (b"ShortRightArrow;", CharacterReference::Scalar('\u{2192}')), + (b"ShortUpArrow;", CharacterReference::Scalar('\u{2191}')), + (b"Sigma;", CharacterReference::Scalar('\u{3A3}')), + (b"SmallCircle;", CharacterReference::Scalar('\u{2218}')), + (b"Sopf;", CharacterReference::Scalar('\u{1D54A}')), + (b"Sqrt;", CharacterReference::Scalar('\u{221A}')), + (b"Square;", CharacterReference::Scalar('\u{25A1}')), + (b"SquareIntersection;", CharacterReference::Scalar('\u{2293}')), + (b"SquareSubset;", CharacterReference::Scalar('\u{228F}')), + (b"SquareSubsetEqual;", CharacterReference::Scalar('\u{2291}')), + (b"SquareSuperset;", CharacterReference::Scalar('\u{2290}')), + (b"SquareSupersetEqual;", CharacterReference::Scalar('\u{2292}')), + (b"SquareUnion;", CharacterReference::Scalar('\u{2294}')), + (b"Sscr;", CharacterReference::Scalar('\u{1D4AE}')), + (b"Star;", CharacterReference::Scalar('\u{22C6}')), + (b"Sub;", CharacterReference::Scalar('\u{22D0}')), + (b"Subset;", CharacterReference::Scalar('\u{22D0}')), + (b"SubsetEqual;", CharacterReference::Scalar('\u{2286}')), + (b"Succeeds;", CharacterReference::Scalar('\u{227B}')), + (b"SucceedsEqual;", CharacterReference::Scalar('\u{2AB0}')), + (b"SucceedsSlantEqual;", CharacterReference::Scalar('\u{227D}')), + (b"SucceedsTilde;", CharacterReference::Scalar('\u{227F}')), + (b"SuchThat;", CharacterReference::Scalar('\u{220B}')), + (b"Sum;", CharacterReference::Scalar('\u{2211}')), + (b"Sup;", CharacterReference::Scalar('\u{22D1}')), + (b"Superset;", CharacterReference::Scalar('\u{2283}')), + (b"SupersetEqual;", CharacterReference::Scalar('\u{2287}')), + (b"Supset;", CharacterReference::Scalar('\u{22D1}')), + (b"THORN", CharacterReference::Scalar('\u{DE}')), + (b"THORN;", CharacterReference::Scalar('\u{DE}')), + (b"TRADE;", CharacterReference::Scalar('\u{2122}')), + (b"TSHcy;", CharacterReference::Scalar('\u{40B}')), + (b"TScy;", CharacterReference::Scalar('\u{426}')), + (b"Tab;", CharacterReference::Scalar('\u{9}')), + (b"Tau;", CharacterReference::Scalar('\u{3A4}')), + (b"Tcaron;", CharacterReference::Scalar('\u{164}')), + (b"Tcedil;", CharacterReference::Scalar('\u{162}')), + (b"Tcy;", CharacterReference::Scalar('\u{422}')), + (b"Tfr;", CharacterReference::Scalar('\u{1D517}')), + (b"Therefore;", CharacterReference::Scalar('\u{2234}')), + (b"Theta;", CharacterReference::Scalar('\u{398}')), + (b"ThickSpace;", CharacterReference::Text("\u{205F}\u{200A}")), + (b"ThinSpace;", CharacterReference::Scalar('\u{2009}')), + (b"Tilde;", CharacterReference::Scalar('\u{223C}')), + (b"TildeEqual;", CharacterReference::Scalar('\u{2243}')), + (b"TildeFullEqual;", CharacterReference::Scalar('\u{2245}')), + (b"TildeTilde;", CharacterReference::Scalar('\u{2248}')), + (b"Topf;", CharacterReference::Scalar('\u{1D54B}')), + (b"TripleDot;", CharacterReference::Scalar('\u{20DB}')), + (b"Tscr;", CharacterReference::Scalar('\u{1D4AF}')), + (b"Tstrok;", CharacterReference::Scalar('\u{166}')), + (b"Uacute", CharacterReference::Scalar('\u{DA}')), + (b"Uacute;", CharacterReference::Scalar('\u{DA}')), + (b"Uarr;", CharacterReference::Scalar('\u{219F}')), + (b"Uarrocir;", CharacterReference::Scalar('\u{2949}')), + (b"Ubrcy;", CharacterReference::Scalar('\u{40E}')), + (b"Ubreve;", CharacterReference::Scalar('\u{16C}')), + (b"Ucirc", CharacterReference::Scalar('\u{DB}')), + (b"Ucirc;", CharacterReference::Scalar('\u{DB}')), + (b"Ucy;", CharacterReference::Scalar('\u{423}')), + (b"Udblac;", CharacterReference::Scalar('\u{170}')), + (b"Ufr;", CharacterReference::Scalar('\u{1D518}')), + (b"Ugrave", CharacterReference::Scalar('\u{D9}')), + (b"Ugrave;", CharacterReference::Scalar('\u{D9}')), + (b"Umacr;", CharacterReference::Scalar('\u{16A}')), + (b"UnderBar;", CharacterReference::Scalar('\u{5F}')), + (b"UnderBrace;", CharacterReference::Scalar('\u{23DF}')), + (b"UnderBracket;", CharacterReference::Scalar('\u{23B5}')), + (b"UnderParenthesis;", CharacterReference::Scalar('\u{23DD}')), + (b"Union;", CharacterReference::Scalar('\u{22C3}')), + (b"UnionPlus;", CharacterReference::Scalar('\u{228E}')), + (b"Uogon;", CharacterReference::Scalar('\u{172}')), + (b"Uopf;", CharacterReference::Scalar('\u{1D54C}')), + (b"UpArrow;", CharacterReference::Scalar('\u{2191}')), + (b"UpArrowBar;", CharacterReference::Scalar('\u{2912}')), + (b"UpArrowDownArrow;", CharacterReference::Scalar('\u{21C5}')), + (b"UpDownArrow;", CharacterReference::Scalar('\u{2195}')), + (b"UpEquilibrium;", CharacterReference::Scalar('\u{296E}')), + (b"UpTee;", CharacterReference::Scalar('\u{22A5}')), + (b"UpTeeArrow;", CharacterReference::Scalar('\u{21A5}')), + (b"Uparrow;", CharacterReference::Scalar('\u{21D1}')), + (b"Updownarrow;", CharacterReference::Scalar('\u{21D5}')), + (b"UpperLeftArrow;", CharacterReference::Scalar('\u{2196}')), + (b"UpperRightArrow;", CharacterReference::Scalar('\u{2197}')), + (b"Upsi;", CharacterReference::Scalar('\u{3D2}')), + (b"Upsilon;", CharacterReference::Scalar('\u{3A5}')), + (b"Uring;", CharacterReference::Scalar('\u{16E}')), + (b"Uscr;", CharacterReference::Scalar('\u{1D4B0}')), + (b"Utilde;", CharacterReference::Scalar('\u{168}')), + (b"Uuml", CharacterReference::Scalar('\u{DC}')), + (b"Uuml;", CharacterReference::Scalar('\u{DC}')), + (b"VDash;", CharacterReference::Scalar('\u{22AB}')), + (b"Vbar;", CharacterReference::Scalar('\u{2AEB}')), + (b"Vcy;", CharacterReference::Scalar('\u{412}')), + (b"Vdash;", CharacterReference::Scalar('\u{22A9}')), + (b"Vdashl;", CharacterReference::Scalar('\u{2AE6}')), + (b"Vee;", CharacterReference::Scalar('\u{22C1}')), + (b"Verbar;", CharacterReference::Scalar('\u{2016}')), + (b"Vert;", CharacterReference::Scalar('\u{2016}')), + (b"VerticalBar;", CharacterReference::Scalar('\u{2223}')), + (b"VerticalLine;", CharacterReference::Scalar('\u{7C}')), + (b"VerticalSeparator;", CharacterReference::Scalar('\u{2758}')), + (b"VerticalTilde;", CharacterReference::Scalar('\u{2240}')), + (b"VeryThinSpace;", CharacterReference::Scalar('\u{200A}')), + (b"Vfr;", CharacterReference::Scalar('\u{1D519}')), + (b"Vopf;", CharacterReference::Scalar('\u{1D54D}')), + (b"Vscr;", CharacterReference::Scalar('\u{1D4B1}')), + (b"Vvdash;", CharacterReference::Scalar('\u{22AA}')), + (b"Wcirc;", CharacterReference::Scalar('\u{174}')), + (b"Wedge;", CharacterReference::Scalar('\u{22C0}')), + (b"Wfr;", CharacterReference::Scalar('\u{1D51A}')), + (b"Wopf;", CharacterReference::Scalar('\u{1D54E}')), + (b"Wscr;", CharacterReference::Scalar('\u{1D4B2}')), + (b"Xfr;", CharacterReference::Scalar('\u{1D51B}')), + (b"Xi;", CharacterReference::Scalar('\u{39E}')), + (b"Xopf;", CharacterReference::Scalar('\u{1D54F}')), + (b"Xscr;", CharacterReference::Scalar('\u{1D4B3}')), + (b"YAcy;", CharacterReference::Scalar('\u{42F}')), + (b"YIcy;", CharacterReference::Scalar('\u{407}')), + (b"YUcy;", CharacterReference::Scalar('\u{42E}')), + (b"Yacute", CharacterReference::Scalar('\u{DD}')), + (b"Yacute;", CharacterReference::Scalar('\u{DD}')), + (b"Ycirc;", CharacterReference::Scalar('\u{176}')), + (b"Ycy;", CharacterReference::Scalar('\u{42B}')), + (b"Yfr;", CharacterReference::Scalar('\u{1D51C}')), + (b"Yopf;", CharacterReference::Scalar('\u{1D550}')), + (b"Yscr;", CharacterReference::Scalar('\u{1D4B4}')), + (b"Yuml;", CharacterReference::Scalar('\u{178}')), + (b"ZHcy;", CharacterReference::Scalar('\u{416}')), + (b"Zacute;", CharacterReference::Scalar('\u{179}')), + (b"Zcaron;", CharacterReference::Scalar('\u{17D}')), + (b"Zcy;", CharacterReference::Scalar('\u{417}')), + (b"Zdot;", CharacterReference::Scalar('\u{17B}')), + (b"ZeroWidthSpace;", CharacterReference::Scalar('\u{200B}')), + (b"Zeta;", CharacterReference::Scalar('\u{396}')), + (b"Zfr;", CharacterReference::Scalar('\u{2128}')), + (b"Zopf;", CharacterReference::Scalar('\u{2124}')), + (b"Zscr;", CharacterReference::Scalar('\u{1D4B5}')), + (b"aacute", CharacterReference::Scalar('\u{E1}')), + (b"aacute;", CharacterReference::Scalar('\u{E1}')), + (b"abreve;", CharacterReference::Scalar('\u{103}')), + (b"ac;", CharacterReference::Scalar('\u{223E}')), + (b"acE;", CharacterReference::Text("\u{223E}\u{333}")), + (b"acd;", CharacterReference::Scalar('\u{223F}')), + (b"acirc", CharacterReference::Scalar('\u{E2}')), + (b"acirc;", CharacterReference::Scalar('\u{E2}')), + (b"acute", CharacterReference::Scalar('\u{B4}')), + (b"acute;", CharacterReference::Scalar('\u{B4}')), + (b"acy;", CharacterReference::Scalar('\u{430}')), + (b"aelig", CharacterReference::Scalar('\u{E6}')), + (b"aelig;", CharacterReference::Scalar('\u{E6}')), + (b"af;", CharacterReference::Scalar('\u{2061}')), + (b"afr;", CharacterReference::Scalar('\u{1D51E}')), + (b"agrave", CharacterReference::Scalar('\u{E0}')), + (b"agrave;", CharacterReference::Scalar('\u{E0}')), + (b"alefsym;", CharacterReference::Scalar('\u{2135}')), + (b"aleph;", CharacterReference::Scalar('\u{2135}')), + (b"alpha;", CharacterReference::Scalar('\u{3B1}')), + (b"amacr;", CharacterReference::Scalar('\u{101}')), + (b"amalg;", CharacterReference::Scalar('\u{2A3F}')), + (b"amp", CharacterReference::Scalar('\u{26}')), + (b"amp;", CharacterReference::Scalar('\u{26}')), + (b"and;", CharacterReference::Scalar('\u{2227}')), + (b"andand;", CharacterReference::Scalar('\u{2A55}')), + (b"andd;", CharacterReference::Scalar('\u{2A5C}')), + (b"andslope;", CharacterReference::Scalar('\u{2A58}')), + (b"andv;", CharacterReference::Scalar('\u{2A5A}')), + (b"ang;", CharacterReference::Scalar('\u{2220}')), + (b"ange;", CharacterReference::Scalar('\u{29A4}')), + (b"angle;", CharacterReference::Scalar('\u{2220}')), + (b"angmsd;", CharacterReference::Scalar('\u{2221}')), + (b"angmsdaa;", CharacterReference::Scalar('\u{29A8}')), + (b"angmsdab;", CharacterReference::Scalar('\u{29A9}')), + (b"angmsdac;", CharacterReference::Scalar('\u{29AA}')), + (b"angmsdad;", CharacterReference::Scalar('\u{29AB}')), + (b"angmsdae;", CharacterReference::Scalar('\u{29AC}')), + (b"angmsdaf;", CharacterReference::Scalar('\u{29AD}')), + (b"angmsdag;", CharacterReference::Scalar('\u{29AE}')), + (b"angmsdah;", CharacterReference::Scalar('\u{29AF}')), + (b"angrt;", CharacterReference::Scalar('\u{221F}')), + (b"angrtvb;", CharacterReference::Scalar('\u{22BE}')), + (b"angrtvbd;", CharacterReference::Scalar('\u{299D}')), + (b"angsph;", CharacterReference::Scalar('\u{2222}')), + (b"angst;", CharacterReference::Scalar('\u{C5}')), + (b"angzarr;", CharacterReference::Scalar('\u{237C}')), + (b"aogon;", CharacterReference::Scalar('\u{105}')), + (b"aopf;", CharacterReference::Scalar('\u{1D552}')), + (b"ap;", CharacterReference::Scalar('\u{2248}')), + (b"apE;", CharacterReference::Scalar('\u{2A70}')), + (b"apacir;", CharacterReference::Scalar('\u{2A6F}')), + (b"ape;", CharacterReference::Scalar('\u{224A}')), + (b"apid;", CharacterReference::Scalar('\u{224B}')), + (b"apos;", CharacterReference::Scalar('\u{27}')), + (b"approx;", CharacterReference::Scalar('\u{2248}')), + (b"approxeq;", CharacterReference::Scalar('\u{224A}')), + (b"aring", CharacterReference::Scalar('\u{E5}')), + (b"aring;", CharacterReference::Scalar('\u{E5}')), + (b"ascr;", CharacterReference::Scalar('\u{1D4B6}')), + (b"ast;", CharacterReference::Scalar('\u{2A}')), + (b"asymp;", CharacterReference::Scalar('\u{2248}')), + (b"asympeq;", CharacterReference::Scalar('\u{224D}')), + (b"atilde", CharacterReference::Scalar('\u{E3}')), + (b"atilde;", CharacterReference::Scalar('\u{E3}')), + (b"auml", CharacterReference::Scalar('\u{E4}')), + (b"auml;", CharacterReference::Scalar('\u{E4}')), + (b"awconint;", CharacterReference::Scalar('\u{2233}')), + (b"awint;", CharacterReference::Scalar('\u{2A11}')), + (b"bNot;", CharacterReference::Scalar('\u{2AED}')), + (b"backcong;", CharacterReference::Scalar('\u{224C}')), + (b"backepsilon;", CharacterReference::Scalar('\u{3F6}')), + (b"backprime;", CharacterReference::Scalar('\u{2035}')), + (b"backsim;", CharacterReference::Scalar('\u{223D}')), + (b"backsimeq;", CharacterReference::Scalar('\u{22CD}')), + (b"barvee;", CharacterReference::Scalar('\u{22BD}')), + (b"barwed;", CharacterReference::Scalar('\u{2305}')), + (b"barwedge;", CharacterReference::Scalar('\u{2305}')), + (b"bbrk;", CharacterReference::Scalar('\u{23B5}')), + (b"bbrktbrk;", CharacterReference::Scalar('\u{23B6}')), + (b"bcong;", CharacterReference::Scalar('\u{224C}')), + (b"bcy;", CharacterReference::Scalar('\u{431}')), + (b"bdquo;", CharacterReference::Scalar('\u{201E}')), + (b"becaus;", CharacterReference::Scalar('\u{2235}')), + (b"because;", CharacterReference::Scalar('\u{2235}')), + (b"bemptyv;", CharacterReference::Scalar('\u{29B0}')), + (b"bepsi;", CharacterReference::Scalar('\u{3F6}')), + (b"bernou;", CharacterReference::Scalar('\u{212C}')), + (b"beta;", CharacterReference::Scalar('\u{3B2}')), + (b"beth;", CharacterReference::Scalar('\u{2136}')), + (b"between;", CharacterReference::Scalar('\u{226C}')), + (b"bfr;", CharacterReference::Scalar('\u{1D51F}')), + (b"bigcap;", CharacterReference::Scalar('\u{22C2}')), + (b"bigcirc;", CharacterReference::Scalar('\u{25EF}')), + (b"bigcup;", CharacterReference::Scalar('\u{22C3}')), + (b"bigodot;", CharacterReference::Scalar('\u{2A00}')), + (b"bigoplus;", CharacterReference::Scalar('\u{2A01}')), + (b"bigotimes;", CharacterReference::Scalar('\u{2A02}')), + (b"bigsqcup;", CharacterReference::Scalar('\u{2A06}')), + (b"bigstar;", CharacterReference::Scalar('\u{2605}')), + (b"bigtriangledown;", CharacterReference::Scalar('\u{25BD}')), + (b"bigtriangleup;", CharacterReference::Scalar('\u{25B3}')), + (b"biguplus;", CharacterReference::Scalar('\u{2A04}')), + (b"bigvee;", CharacterReference::Scalar('\u{22C1}')), + (b"bigwedge;", CharacterReference::Scalar('\u{22C0}')), + (b"bkarow;", CharacterReference::Scalar('\u{290D}')), + (b"blacklozenge;", CharacterReference::Scalar('\u{29EB}')), + (b"blacksquare;", CharacterReference::Scalar('\u{25AA}')), + (b"blacktriangle;", CharacterReference::Scalar('\u{25B4}')), + (b"blacktriangledown;", CharacterReference::Scalar('\u{25BE}')), + (b"blacktriangleleft;", CharacterReference::Scalar('\u{25C2}')), + (b"blacktriangleright;", CharacterReference::Scalar('\u{25B8}')), + (b"blank;", CharacterReference::Scalar('\u{2423}')), + (b"blk12;", CharacterReference::Scalar('\u{2592}')), + (b"blk14;", CharacterReference::Scalar('\u{2591}')), + (b"blk34;", CharacterReference::Scalar('\u{2593}')), + (b"block;", CharacterReference::Scalar('\u{2588}')), + (b"bne;", CharacterReference::Text("\u{3D}\u{20E5}")), + (b"bnequiv;", CharacterReference::Text("\u{2261}\u{20E5}")), + (b"bnot;", CharacterReference::Scalar('\u{2310}')), + (b"bopf;", CharacterReference::Scalar('\u{1D553}')), + (b"bot;", CharacterReference::Scalar('\u{22A5}')), + (b"bottom;", CharacterReference::Scalar('\u{22A5}')), + (b"bowtie;", CharacterReference::Scalar('\u{22C8}')), + (b"boxDL;", CharacterReference::Scalar('\u{2557}')), + (b"boxDR;", CharacterReference::Scalar('\u{2554}')), + (b"boxDl;", CharacterReference::Scalar('\u{2556}')), + (b"boxDr;", CharacterReference::Scalar('\u{2553}')), + (b"boxH;", CharacterReference::Scalar('\u{2550}')), + (b"boxHD;", CharacterReference::Scalar('\u{2566}')), + (b"boxHU;", CharacterReference::Scalar('\u{2569}')), + (b"boxHd;", CharacterReference::Scalar('\u{2564}')), + (b"boxHu;", CharacterReference::Scalar('\u{2567}')), + (b"boxUL;", CharacterReference::Scalar('\u{255D}')), + (b"boxUR;", CharacterReference::Scalar('\u{255A}')), + (b"boxUl;", CharacterReference::Scalar('\u{255C}')), + (b"boxUr;", CharacterReference::Scalar('\u{2559}')), + (b"boxV;", CharacterReference::Scalar('\u{2551}')), + (b"boxVH;", CharacterReference::Scalar('\u{256C}')), + (b"boxVL;", CharacterReference::Scalar('\u{2563}')), + (b"boxVR;", CharacterReference::Scalar('\u{2560}')), + (b"boxVh;", CharacterReference::Scalar('\u{256B}')), + (b"boxVl;", CharacterReference::Scalar('\u{2562}')), + (b"boxVr;", CharacterReference::Scalar('\u{255F}')), + (b"boxbox;", CharacterReference::Scalar('\u{29C9}')), + (b"boxdL;", CharacterReference::Scalar('\u{2555}')), + (b"boxdR;", CharacterReference::Scalar('\u{2552}')), + (b"boxdl;", CharacterReference::Scalar('\u{2510}')), + (b"boxdr;", CharacterReference::Scalar('\u{250C}')), + (b"boxh;", CharacterReference::Scalar('\u{2500}')), + (b"boxhD;", CharacterReference::Scalar('\u{2565}')), + (b"boxhU;", CharacterReference::Scalar('\u{2568}')), + (b"boxhd;", CharacterReference::Scalar('\u{252C}')), + (b"boxhu;", CharacterReference::Scalar('\u{2534}')), + (b"boxminus;", CharacterReference::Scalar('\u{229F}')), + (b"boxplus;", CharacterReference::Scalar('\u{229E}')), + (b"boxtimes;", CharacterReference::Scalar('\u{22A0}')), + (b"boxuL;", CharacterReference::Scalar('\u{255B}')), + (b"boxuR;", CharacterReference::Scalar('\u{2558}')), + (b"boxul;", CharacterReference::Scalar('\u{2518}')), + (b"boxur;", CharacterReference::Scalar('\u{2514}')), + (b"boxv;", CharacterReference::Scalar('\u{2502}')), + (b"boxvH;", CharacterReference::Scalar('\u{256A}')), + (b"boxvL;", CharacterReference::Scalar('\u{2561}')), + (b"boxvR;", CharacterReference::Scalar('\u{255E}')), + (b"boxvh;", CharacterReference::Scalar('\u{253C}')), + (b"boxvl;", CharacterReference::Scalar('\u{2524}')), + (b"boxvr;", CharacterReference::Scalar('\u{251C}')), + (b"bprime;", CharacterReference::Scalar('\u{2035}')), + (b"breve;", CharacterReference::Scalar('\u{2D8}')), + (b"brvbar", CharacterReference::Scalar('\u{A6}')), + (b"brvbar;", CharacterReference::Scalar('\u{A6}')), + (b"bscr;", CharacterReference::Scalar('\u{1D4B7}')), + (b"bsemi;", CharacterReference::Scalar('\u{204F}')), + (b"bsim;", CharacterReference::Scalar('\u{223D}')), + (b"bsime;", CharacterReference::Scalar('\u{22CD}')), + (b"bsol;", CharacterReference::Scalar('\u{5C}')), + (b"bsolb;", CharacterReference::Scalar('\u{29C5}')), + (b"bsolhsub;", CharacterReference::Scalar('\u{27C8}')), + (b"bull;", CharacterReference::Scalar('\u{2022}')), + (b"bullet;", CharacterReference::Scalar('\u{2022}')), + (b"bump;", CharacterReference::Scalar('\u{224E}')), + (b"bumpE;", CharacterReference::Scalar('\u{2AAE}')), + (b"bumpe;", CharacterReference::Scalar('\u{224F}')), + (b"bumpeq;", CharacterReference::Scalar('\u{224F}')), + (b"cacute;", CharacterReference::Scalar('\u{107}')), + (b"cap;", CharacterReference::Scalar('\u{2229}')), + (b"capand;", CharacterReference::Scalar('\u{2A44}')), + (b"capbrcup;", CharacterReference::Scalar('\u{2A49}')), + (b"capcap;", CharacterReference::Scalar('\u{2A4B}')), + (b"capcup;", CharacterReference::Scalar('\u{2A47}')), + (b"capdot;", CharacterReference::Scalar('\u{2A40}')), + (b"caps;", CharacterReference::Text("\u{2229}\u{FE00}")), + (b"caret;", CharacterReference::Scalar('\u{2041}')), + (b"caron;", CharacterReference::Scalar('\u{2C7}')), + (b"ccaps;", CharacterReference::Scalar('\u{2A4D}')), + (b"ccaron;", CharacterReference::Scalar('\u{10D}')), + (b"ccedil", CharacterReference::Scalar('\u{E7}')), + (b"ccedil;", CharacterReference::Scalar('\u{E7}')), + (b"ccirc;", CharacterReference::Scalar('\u{109}')), + (b"ccups;", CharacterReference::Scalar('\u{2A4C}')), + (b"ccupssm;", CharacterReference::Scalar('\u{2A50}')), + (b"cdot;", CharacterReference::Scalar('\u{10B}')), + (b"cedil", CharacterReference::Scalar('\u{B8}')), + (b"cedil;", CharacterReference::Scalar('\u{B8}')), + (b"cemptyv;", CharacterReference::Scalar('\u{29B2}')), + (b"cent", CharacterReference::Scalar('\u{A2}')), + (b"cent;", CharacterReference::Scalar('\u{A2}')), + (b"centerdot;", CharacterReference::Scalar('\u{B7}')), + (b"cfr;", CharacterReference::Scalar('\u{1D520}')), + (b"chcy;", CharacterReference::Scalar('\u{447}')), + (b"check;", CharacterReference::Scalar('\u{2713}')), + (b"checkmark;", CharacterReference::Scalar('\u{2713}')), + (b"chi;", CharacterReference::Scalar('\u{3C7}')), + (b"cir;", CharacterReference::Scalar('\u{25CB}')), + (b"cirE;", CharacterReference::Scalar('\u{29C3}')), + (b"circ;", CharacterReference::Scalar('\u{2C6}')), + (b"circeq;", CharacterReference::Scalar('\u{2257}')), + (b"circlearrowleft;", CharacterReference::Scalar('\u{21BA}')), + (b"circlearrowright;", CharacterReference::Scalar('\u{21BB}')), + (b"circledR;", CharacterReference::Scalar('\u{AE}')), + (b"circledS;", CharacterReference::Scalar('\u{24C8}')), + (b"circledast;", CharacterReference::Scalar('\u{229B}')), + (b"circledcirc;", CharacterReference::Scalar('\u{229A}')), + (b"circleddash;", CharacterReference::Scalar('\u{229D}')), + (b"cire;", CharacterReference::Scalar('\u{2257}')), + (b"cirfnint;", CharacterReference::Scalar('\u{2A10}')), + (b"cirmid;", CharacterReference::Scalar('\u{2AEF}')), + (b"cirscir;", CharacterReference::Scalar('\u{29C2}')), + (b"clubs;", CharacterReference::Scalar('\u{2663}')), + (b"clubsuit;", CharacterReference::Scalar('\u{2663}')), + (b"colon;", CharacterReference::Scalar('\u{3A}')), + (b"colone;", CharacterReference::Scalar('\u{2254}')), + (b"coloneq;", CharacterReference::Scalar('\u{2254}')), + (b"comma;", CharacterReference::Scalar('\u{2C}')), + (b"commat;", CharacterReference::Scalar('\u{40}')), + (b"comp;", CharacterReference::Scalar('\u{2201}')), + (b"compfn;", CharacterReference::Scalar('\u{2218}')), + (b"complement;", CharacterReference::Scalar('\u{2201}')), + (b"complexes;", CharacterReference::Scalar('\u{2102}')), + (b"cong;", CharacterReference::Scalar('\u{2245}')), + (b"congdot;", CharacterReference::Scalar('\u{2A6D}')), + (b"conint;", CharacterReference::Scalar('\u{222E}')), + (b"copf;", CharacterReference::Scalar('\u{1D554}')), + (b"coprod;", CharacterReference::Scalar('\u{2210}')), + (b"copy", CharacterReference::Scalar('\u{A9}')), + (b"copy;", CharacterReference::Scalar('\u{A9}')), + (b"copysr;", CharacterReference::Scalar('\u{2117}')), + (b"crarr;", CharacterReference::Scalar('\u{21B5}')), + (b"cross;", CharacterReference::Scalar('\u{2717}')), + (b"cscr;", CharacterReference::Scalar('\u{1D4B8}')), + (b"csub;", CharacterReference::Scalar('\u{2ACF}')), + (b"csube;", CharacterReference::Scalar('\u{2AD1}')), + (b"csup;", CharacterReference::Scalar('\u{2AD0}')), + (b"csupe;", CharacterReference::Scalar('\u{2AD2}')), + (b"ctdot;", CharacterReference::Scalar('\u{22EF}')), + (b"cudarrl;", CharacterReference::Scalar('\u{2938}')), + (b"cudarrr;", CharacterReference::Scalar('\u{2935}')), + (b"cuepr;", CharacterReference::Scalar('\u{22DE}')), + (b"cuesc;", CharacterReference::Scalar('\u{22DF}')), + (b"cularr;", CharacterReference::Scalar('\u{21B6}')), + (b"cularrp;", CharacterReference::Scalar('\u{293D}')), + (b"cup;", CharacterReference::Scalar('\u{222A}')), + (b"cupbrcap;", CharacterReference::Scalar('\u{2A48}')), + (b"cupcap;", CharacterReference::Scalar('\u{2A46}')), + (b"cupcup;", CharacterReference::Scalar('\u{2A4A}')), + (b"cupdot;", CharacterReference::Scalar('\u{228D}')), + (b"cupor;", CharacterReference::Scalar('\u{2A45}')), + (b"cups;", CharacterReference::Text("\u{222A}\u{FE00}")), + (b"curarr;", CharacterReference::Scalar('\u{21B7}')), + (b"curarrm;", CharacterReference::Scalar('\u{293C}')), + (b"curlyeqprec;", CharacterReference::Scalar('\u{22DE}')), + (b"curlyeqsucc;", CharacterReference::Scalar('\u{22DF}')), + (b"curlyvee;", CharacterReference::Scalar('\u{22CE}')), + (b"curlywedge;", CharacterReference::Scalar('\u{22CF}')), + (b"curren", CharacterReference::Scalar('\u{A4}')), + (b"curren;", CharacterReference::Scalar('\u{A4}')), + (b"curvearrowleft;", CharacterReference::Scalar('\u{21B6}')), + (b"curvearrowright;", CharacterReference::Scalar('\u{21B7}')), + (b"cuvee;", CharacterReference::Scalar('\u{22CE}')), + (b"cuwed;", CharacterReference::Scalar('\u{22CF}')), + (b"cwconint;", CharacterReference::Scalar('\u{2232}')), + (b"cwint;", CharacterReference::Scalar('\u{2231}')), + (b"cylcty;", CharacterReference::Scalar('\u{232D}')), + (b"dArr;", CharacterReference::Scalar('\u{21D3}')), + (b"dHar;", CharacterReference::Scalar('\u{2965}')), + (b"dagger;", CharacterReference::Scalar('\u{2020}')), + (b"daleth;", CharacterReference::Scalar('\u{2138}')), + (b"darr;", CharacterReference::Scalar('\u{2193}')), + (b"dash;", CharacterReference::Scalar('\u{2010}')), + (b"dashv;", CharacterReference::Scalar('\u{22A3}')), + (b"dbkarow;", CharacterReference::Scalar('\u{290F}')), + (b"dblac;", CharacterReference::Scalar('\u{2DD}')), + (b"dcaron;", CharacterReference::Scalar('\u{10F}')), + (b"dcy;", CharacterReference::Scalar('\u{434}')), + (b"dd;", CharacterReference::Scalar('\u{2146}')), + (b"ddagger;", CharacterReference::Scalar('\u{2021}')), + (b"ddarr;", CharacterReference::Scalar('\u{21CA}')), + (b"ddotseq;", CharacterReference::Scalar('\u{2A77}')), + (b"deg", CharacterReference::Scalar('\u{B0}')), + (b"deg;", CharacterReference::Scalar('\u{B0}')), + (b"delta;", CharacterReference::Scalar('\u{3B4}')), + (b"demptyv;", CharacterReference::Scalar('\u{29B1}')), + (b"dfisht;", CharacterReference::Scalar('\u{297F}')), + (b"dfr;", CharacterReference::Scalar('\u{1D521}')), + (b"dharl;", CharacterReference::Scalar('\u{21C3}')), + (b"dharr;", CharacterReference::Scalar('\u{21C2}')), + (b"diam;", CharacterReference::Scalar('\u{22C4}')), + (b"diamond;", CharacterReference::Scalar('\u{22C4}')), + (b"diamondsuit;", CharacterReference::Scalar('\u{2666}')), + (b"diams;", CharacterReference::Scalar('\u{2666}')), + (b"die;", CharacterReference::Scalar('\u{A8}')), + (b"digamma;", CharacterReference::Scalar('\u{3DD}')), + (b"disin;", CharacterReference::Scalar('\u{22F2}')), + (b"div;", CharacterReference::Scalar('\u{F7}')), + (b"divide", CharacterReference::Scalar('\u{F7}')), + (b"divide;", CharacterReference::Scalar('\u{F7}')), + (b"divideontimes;", CharacterReference::Scalar('\u{22C7}')), + (b"divonx;", CharacterReference::Scalar('\u{22C7}')), + (b"djcy;", CharacterReference::Scalar('\u{452}')), + (b"dlcorn;", CharacterReference::Scalar('\u{231E}')), + (b"dlcrop;", CharacterReference::Scalar('\u{230D}')), + (b"dollar;", CharacterReference::Scalar('\u{24}')), + (b"dopf;", CharacterReference::Scalar('\u{1D555}')), + (b"dot;", CharacterReference::Scalar('\u{2D9}')), + (b"doteq;", CharacterReference::Scalar('\u{2250}')), + (b"doteqdot;", CharacterReference::Scalar('\u{2251}')), + (b"dotminus;", CharacterReference::Scalar('\u{2238}')), + (b"dotplus;", CharacterReference::Scalar('\u{2214}')), + (b"dotsquare;", CharacterReference::Scalar('\u{22A1}')), + (b"doublebarwedge;", CharacterReference::Scalar('\u{2306}')), + (b"downarrow;", CharacterReference::Scalar('\u{2193}')), + (b"downdownarrows;", CharacterReference::Scalar('\u{21CA}')), + (b"downharpoonleft;", CharacterReference::Scalar('\u{21C3}')), + (b"downharpoonright;", CharacterReference::Scalar('\u{21C2}')), + (b"drbkarow;", CharacterReference::Scalar('\u{2910}')), + (b"drcorn;", CharacterReference::Scalar('\u{231F}')), + (b"drcrop;", CharacterReference::Scalar('\u{230C}')), + (b"dscr;", CharacterReference::Scalar('\u{1D4B9}')), + (b"dscy;", CharacterReference::Scalar('\u{455}')), + (b"dsol;", CharacterReference::Scalar('\u{29F6}')), + (b"dstrok;", CharacterReference::Scalar('\u{111}')), + (b"dtdot;", CharacterReference::Scalar('\u{22F1}')), + (b"dtri;", CharacterReference::Scalar('\u{25BF}')), + (b"dtrif;", CharacterReference::Scalar('\u{25BE}')), + (b"duarr;", CharacterReference::Scalar('\u{21F5}')), + (b"duhar;", CharacterReference::Scalar('\u{296F}')), + (b"dwangle;", CharacterReference::Scalar('\u{29A6}')), + (b"dzcy;", CharacterReference::Scalar('\u{45F}')), + (b"dzigrarr;", CharacterReference::Scalar('\u{27FF}')), + (b"eDDot;", CharacterReference::Scalar('\u{2A77}')), + (b"eDot;", CharacterReference::Scalar('\u{2251}')), + (b"eacute", CharacterReference::Scalar('\u{E9}')), + (b"eacute;", CharacterReference::Scalar('\u{E9}')), + (b"easter;", CharacterReference::Scalar('\u{2A6E}')), + (b"ecaron;", CharacterReference::Scalar('\u{11B}')), + (b"ecir;", CharacterReference::Scalar('\u{2256}')), + (b"ecirc", CharacterReference::Scalar('\u{EA}')), + (b"ecirc;", CharacterReference::Scalar('\u{EA}')), + (b"ecolon;", CharacterReference::Scalar('\u{2255}')), + (b"ecy;", CharacterReference::Scalar('\u{44D}')), + (b"edot;", CharacterReference::Scalar('\u{117}')), + (b"ee;", CharacterReference::Scalar('\u{2147}')), + (b"efDot;", CharacterReference::Scalar('\u{2252}')), + (b"efr;", CharacterReference::Scalar('\u{1D522}')), + (b"eg;", CharacterReference::Scalar('\u{2A9A}')), + (b"egrave", CharacterReference::Scalar('\u{E8}')), + (b"egrave;", CharacterReference::Scalar('\u{E8}')), + (b"egs;", CharacterReference::Scalar('\u{2A96}')), + (b"egsdot;", CharacterReference::Scalar('\u{2A98}')), + (b"el;", CharacterReference::Scalar('\u{2A99}')), + (b"elinters;", CharacterReference::Scalar('\u{23E7}')), + (b"ell;", CharacterReference::Scalar('\u{2113}')), + (b"els;", CharacterReference::Scalar('\u{2A95}')), + (b"elsdot;", CharacterReference::Scalar('\u{2A97}')), + (b"emacr;", CharacterReference::Scalar('\u{113}')), + (b"empty;", CharacterReference::Scalar('\u{2205}')), + (b"emptyset;", CharacterReference::Scalar('\u{2205}')), + (b"emptyv;", CharacterReference::Scalar('\u{2205}')), + (b"emsp13;", CharacterReference::Scalar('\u{2004}')), + (b"emsp14;", CharacterReference::Scalar('\u{2005}')), + (b"emsp;", CharacterReference::Scalar('\u{2003}')), + (b"eng;", CharacterReference::Scalar('\u{14B}')), + (b"ensp;", CharacterReference::Scalar('\u{2002}')), + (b"eogon;", CharacterReference::Scalar('\u{119}')), + (b"eopf;", CharacterReference::Scalar('\u{1D556}')), + (b"epar;", CharacterReference::Scalar('\u{22D5}')), + (b"eparsl;", CharacterReference::Scalar('\u{29E3}')), + (b"eplus;", CharacterReference::Scalar('\u{2A71}')), + (b"epsi;", CharacterReference::Scalar('\u{3B5}')), + (b"epsilon;", CharacterReference::Scalar('\u{3B5}')), + (b"epsiv;", CharacterReference::Scalar('\u{3F5}')), + (b"eqcirc;", CharacterReference::Scalar('\u{2256}')), + (b"eqcolon;", CharacterReference::Scalar('\u{2255}')), + (b"eqsim;", CharacterReference::Scalar('\u{2242}')), + (b"eqslantgtr;", CharacterReference::Scalar('\u{2A96}')), + (b"eqslantless;", CharacterReference::Scalar('\u{2A95}')), + (b"equals;", CharacterReference::Scalar('\u{3D}')), + (b"equest;", CharacterReference::Scalar('\u{225F}')), + (b"equiv;", CharacterReference::Scalar('\u{2261}')), + (b"equivDD;", CharacterReference::Scalar('\u{2A78}')), + (b"eqvparsl;", CharacterReference::Scalar('\u{29E5}')), + (b"erDot;", CharacterReference::Scalar('\u{2253}')), + (b"erarr;", CharacterReference::Scalar('\u{2971}')), + (b"escr;", CharacterReference::Scalar('\u{212F}')), + (b"esdot;", CharacterReference::Scalar('\u{2250}')), + (b"esim;", CharacterReference::Scalar('\u{2242}')), + (b"eta;", CharacterReference::Scalar('\u{3B7}')), + (b"eth", CharacterReference::Scalar('\u{F0}')), + (b"eth;", CharacterReference::Scalar('\u{F0}')), + (b"euml", CharacterReference::Scalar('\u{EB}')), + (b"euml;", CharacterReference::Scalar('\u{EB}')), + (b"euro;", CharacterReference::Scalar('\u{20AC}')), + (b"excl;", CharacterReference::Scalar('\u{21}')), + (b"exist;", CharacterReference::Scalar('\u{2203}')), + (b"expectation;", CharacterReference::Scalar('\u{2130}')), + (b"exponentiale;", CharacterReference::Scalar('\u{2147}')), + (b"fallingdotseq;", CharacterReference::Scalar('\u{2252}')), + (b"fcy;", CharacterReference::Scalar('\u{444}')), + (b"female;", CharacterReference::Scalar('\u{2640}')), + (b"ffilig;", CharacterReference::Scalar('\u{FB03}')), + (b"fflig;", CharacterReference::Scalar('\u{FB00}')), + (b"ffllig;", CharacterReference::Scalar('\u{FB04}')), + (b"ffr;", CharacterReference::Scalar('\u{1D523}')), + (b"filig;", CharacterReference::Scalar('\u{FB01}')), + (b"fjlig;", CharacterReference::Text("\u{66}\u{6A}")), + (b"flat;", CharacterReference::Scalar('\u{266D}')), + (b"fllig;", CharacterReference::Scalar('\u{FB02}')), + (b"fltns;", CharacterReference::Scalar('\u{25B1}')), + (b"fnof;", CharacterReference::Scalar('\u{192}')), + (b"fopf;", CharacterReference::Scalar('\u{1D557}')), + (b"forall;", CharacterReference::Scalar('\u{2200}')), + (b"fork;", CharacterReference::Scalar('\u{22D4}')), + (b"forkv;", CharacterReference::Scalar('\u{2AD9}')), + (b"fpartint;", CharacterReference::Scalar('\u{2A0D}')), + (b"frac12", CharacterReference::Scalar('\u{BD}')), + (b"frac12;", CharacterReference::Scalar('\u{BD}')), + (b"frac13;", CharacterReference::Scalar('\u{2153}')), + (b"frac14", CharacterReference::Scalar('\u{BC}')), + (b"frac14;", CharacterReference::Scalar('\u{BC}')), + (b"frac15;", CharacterReference::Scalar('\u{2155}')), + (b"frac16;", CharacterReference::Scalar('\u{2159}')), + (b"frac18;", CharacterReference::Scalar('\u{215B}')), + (b"frac23;", CharacterReference::Scalar('\u{2154}')), + (b"frac25;", CharacterReference::Scalar('\u{2156}')), + (b"frac34", CharacterReference::Scalar('\u{BE}')), + (b"frac34;", CharacterReference::Scalar('\u{BE}')), + (b"frac35;", CharacterReference::Scalar('\u{2157}')), + (b"frac38;", CharacterReference::Scalar('\u{215C}')), + (b"frac45;", CharacterReference::Scalar('\u{2158}')), + (b"frac56;", CharacterReference::Scalar('\u{215A}')), + (b"frac58;", CharacterReference::Scalar('\u{215D}')), + (b"frac78;", CharacterReference::Scalar('\u{215E}')), + (b"frasl;", CharacterReference::Scalar('\u{2044}')), + (b"frown;", CharacterReference::Scalar('\u{2322}')), + (b"fscr;", CharacterReference::Scalar('\u{1D4BB}')), + (b"gE;", CharacterReference::Scalar('\u{2267}')), + (b"gEl;", CharacterReference::Scalar('\u{2A8C}')), + (b"gacute;", CharacterReference::Scalar('\u{1F5}')), + (b"gamma;", CharacterReference::Scalar('\u{3B3}')), + (b"gammad;", CharacterReference::Scalar('\u{3DD}')), + (b"gap;", CharacterReference::Scalar('\u{2A86}')), + (b"gbreve;", CharacterReference::Scalar('\u{11F}')), + (b"gcirc;", CharacterReference::Scalar('\u{11D}')), + (b"gcy;", CharacterReference::Scalar('\u{433}')), + (b"gdot;", CharacterReference::Scalar('\u{121}')), + (b"ge;", CharacterReference::Scalar('\u{2265}')), + (b"gel;", CharacterReference::Scalar('\u{22DB}')), + (b"geq;", CharacterReference::Scalar('\u{2265}')), + (b"geqq;", CharacterReference::Scalar('\u{2267}')), + (b"geqslant;", CharacterReference::Scalar('\u{2A7E}')), + (b"ges;", CharacterReference::Scalar('\u{2A7E}')), + (b"gescc;", CharacterReference::Scalar('\u{2AA9}')), + (b"gesdot;", CharacterReference::Scalar('\u{2A80}')), + (b"gesdoto;", CharacterReference::Scalar('\u{2A82}')), + (b"gesdotol;", CharacterReference::Scalar('\u{2A84}')), + (b"gesl;", CharacterReference::Text("\u{22DB}\u{FE00}")), + (b"gesles;", CharacterReference::Scalar('\u{2A94}')), + (b"gfr;", CharacterReference::Scalar('\u{1D524}')), + (b"gg;", CharacterReference::Scalar('\u{226B}')), + (b"ggg;", CharacterReference::Scalar('\u{22D9}')), + (b"gimel;", CharacterReference::Scalar('\u{2137}')), + (b"gjcy;", CharacterReference::Scalar('\u{453}')), + (b"gl;", CharacterReference::Scalar('\u{2277}')), + (b"glE;", CharacterReference::Scalar('\u{2A92}')), + (b"gla;", CharacterReference::Scalar('\u{2AA5}')), + (b"glj;", CharacterReference::Scalar('\u{2AA4}')), + (b"gnE;", CharacterReference::Scalar('\u{2269}')), + (b"gnap;", CharacterReference::Scalar('\u{2A8A}')), + (b"gnapprox;", CharacterReference::Scalar('\u{2A8A}')), + (b"gne;", CharacterReference::Scalar('\u{2A88}')), + (b"gneq;", CharacterReference::Scalar('\u{2A88}')), + (b"gneqq;", CharacterReference::Scalar('\u{2269}')), + (b"gnsim;", CharacterReference::Scalar('\u{22E7}')), + (b"gopf;", CharacterReference::Scalar('\u{1D558}')), + (b"grave;", CharacterReference::Scalar('\u{60}')), + (b"gscr;", CharacterReference::Scalar('\u{210A}')), + (b"gsim;", CharacterReference::Scalar('\u{2273}')), + (b"gsime;", CharacterReference::Scalar('\u{2A8E}')), + (b"gsiml;", CharacterReference::Scalar('\u{2A90}')), + (b"gt", CharacterReference::Scalar('\u{3E}')), + (b"gt;", CharacterReference::Scalar('\u{3E}')), + (b"gtcc;", CharacterReference::Scalar('\u{2AA7}')), + (b"gtcir;", CharacterReference::Scalar('\u{2A7A}')), + (b"gtdot;", CharacterReference::Scalar('\u{22D7}')), + (b"gtlPar;", CharacterReference::Scalar('\u{2995}')), + (b"gtquest;", CharacterReference::Scalar('\u{2A7C}')), + (b"gtrapprox;", CharacterReference::Scalar('\u{2A86}')), + (b"gtrarr;", CharacterReference::Scalar('\u{2978}')), + (b"gtrdot;", CharacterReference::Scalar('\u{22D7}')), + (b"gtreqless;", CharacterReference::Scalar('\u{22DB}')), + (b"gtreqqless;", CharacterReference::Scalar('\u{2A8C}')), + (b"gtrless;", CharacterReference::Scalar('\u{2277}')), + (b"gtrsim;", CharacterReference::Scalar('\u{2273}')), + (b"gvertneqq;", CharacterReference::Text("\u{2269}\u{FE00}")), + (b"gvnE;", CharacterReference::Text("\u{2269}\u{FE00}")), + (b"hArr;", CharacterReference::Scalar('\u{21D4}')), + (b"hairsp;", CharacterReference::Scalar('\u{200A}')), + (b"half;", CharacterReference::Scalar('\u{BD}')), + (b"hamilt;", CharacterReference::Scalar('\u{210B}')), + (b"hardcy;", CharacterReference::Scalar('\u{44A}')), + (b"harr;", CharacterReference::Scalar('\u{2194}')), + (b"harrcir;", CharacterReference::Scalar('\u{2948}')), + (b"harrw;", CharacterReference::Scalar('\u{21AD}')), + (b"hbar;", CharacterReference::Scalar('\u{210F}')), + (b"hcirc;", CharacterReference::Scalar('\u{125}')), + (b"hearts;", CharacterReference::Scalar('\u{2665}')), + (b"heartsuit;", CharacterReference::Scalar('\u{2665}')), + (b"hellip;", CharacterReference::Scalar('\u{2026}')), + (b"hercon;", CharacterReference::Scalar('\u{22B9}')), + (b"hfr;", CharacterReference::Scalar('\u{1D525}')), + (b"hksearow;", CharacterReference::Scalar('\u{2925}')), + (b"hkswarow;", CharacterReference::Scalar('\u{2926}')), + (b"hoarr;", CharacterReference::Scalar('\u{21FF}')), + (b"homtht;", CharacterReference::Scalar('\u{223B}')), + (b"hookleftarrow;", CharacterReference::Scalar('\u{21A9}')), + (b"hookrightarrow;", CharacterReference::Scalar('\u{21AA}')), + (b"hopf;", CharacterReference::Scalar('\u{1D559}')), + (b"horbar;", CharacterReference::Scalar('\u{2015}')), + (b"hscr;", CharacterReference::Scalar('\u{1D4BD}')), + (b"hslash;", CharacterReference::Scalar('\u{210F}')), + (b"hstrok;", CharacterReference::Scalar('\u{127}')), + (b"hybull;", CharacterReference::Scalar('\u{2043}')), + (b"hyphen;", CharacterReference::Scalar('\u{2010}')), + (b"iacute", CharacterReference::Scalar('\u{ED}')), + (b"iacute;", CharacterReference::Scalar('\u{ED}')), + (b"ic;", CharacterReference::Scalar('\u{2063}')), + (b"icirc", CharacterReference::Scalar('\u{EE}')), + (b"icirc;", CharacterReference::Scalar('\u{EE}')), + (b"icy;", CharacterReference::Scalar('\u{438}')), + (b"iecy;", CharacterReference::Scalar('\u{435}')), + (b"iexcl", CharacterReference::Scalar('\u{A1}')), + (b"iexcl;", CharacterReference::Scalar('\u{A1}')), + (b"iff;", CharacterReference::Scalar('\u{21D4}')), + (b"ifr;", CharacterReference::Scalar('\u{1D526}')), + (b"igrave", CharacterReference::Scalar('\u{EC}')), + (b"igrave;", CharacterReference::Scalar('\u{EC}')), + (b"ii;", CharacterReference::Scalar('\u{2148}')), + (b"iiiint;", CharacterReference::Scalar('\u{2A0C}')), + (b"iiint;", CharacterReference::Scalar('\u{222D}')), + (b"iinfin;", CharacterReference::Scalar('\u{29DC}')), + (b"iiota;", CharacterReference::Scalar('\u{2129}')), + (b"ijlig;", CharacterReference::Scalar('\u{133}')), + (b"imacr;", CharacterReference::Scalar('\u{12B}')), + (b"image;", CharacterReference::Scalar('\u{2111}')), + (b"imagline;", CharacterReference::Scalar('\u{2110}')), + (b"imagpart;", CharacterReference::Scalar('\u{2111}')), + (b"imath;", CharacterReference::Scalar('\u{131}')), + (b"imof;", CharacterReference::Scalar('\u{22B7}')), + (b"imped;", CharacterReference::Scalar('\u{1B5}')), + (b"in;", CharacterReference::Scalar('\u{2208}')), + (b"incare;", CharacterReference::Scalar('\u{2105}')), + (b"infin;", CharacterReference::Scalar('\u{221E}')), + (b"infintie;", CharacterReference::Scalar('\u{29DD}')), + (b"inodot;", CharacterReference::Scalar('\u{131}')), + (b"int;", CharacterReference::Scalar('\u{222B}')), + (b"intcal;", CharacterReference::Scalar('\u{22BA}')), + (b"integers;", CharacterReference::Scalar('\u{2124}')), + (b"intercal;", CharacterReference::Scalar('\u{22BA}')), + (b"intlarhk;", CharacterReference::Scalar('\u{2A17}')), + (b"intprod;", CharacterReference::Scalar('\u{2A3C}')), + (b"iocy;", CharacterReference::Scalar('\u{451}')), + (b"iogon;", CharacterReference::Scalar('\u{12F}')), + (b"iopf;", CharacterReference::Scalar('\u{1D55A}')), + (b"iota;", CharacterReference::Scalar('\u{3B9}')), + (b"iprod;", CharacterReference::Scalar('\u{2A3C}')), + (b"iquest", CharacterReference::Scalar('\u{BF}')), + (b"iquest;", CharacterReference::Scalar('\u{BF}')), + (b"iscr;", CharacterReference::Scalar('\u{1D4BE}')), + (b"isin;", CharacterReference::Scalar('\u{2208}')), + (b"isinE;", CharacterReference::Scalar('\u{22F9}')), + (b"isindot;", CharacterReference::Scalar('\u{22F5}')), + (b"isins;", CharacterReference::Scalar('\u{22F4}')), + (b"isinsv;", CharacterReference::Scalar('\u{22F3}')), + (b"isinv;", CharacterReference::Scalar('\u{2208}')), + (b"it;", CharacterReference::Scalar('\u{2062}')), + (b"itilde;", CharacterReference::Scalar('\u{129}')), + (b"iukcy;", CharacterReference::Scalar('\u{456}')), + (b"iuml", CharacterReference::Scalar('\u{EF}')), + (b"iuml;", CharacterReference::Scalar('\u{EF}')), + (b"jcirc;", CharacterReference::Scalar('\u{135}')), + (b"jcy;", CharacterReference::Scalar('\u{439}')), + (b"jfr;", CharacterReference::Scalar('\u{1D527}')), + (b"jmath;", CharacterReference::Scalar('\u{237}')), + (b"jopf;", CharacterReference::Scalar('\u{1D55B}')), + (b"jscr;", CharacterReference::Scalar('\u{1D4BF}')), + (b"jsercy;", CharacterReference::Scalar('\u{458}')), + (b"jukcy;", CharacterReference::Scalar('\u{454}')), + (b"kappa;", CharacterReference::Scalar('\u{3BA}')), + (b"kappav;", CharacterReference::Scalar('\u{3F0}')), + (b"kcedil;", CharacterReference::Scalar('\u{137}')), + (b"kcy;", CharacterReference::Scalar('\u{43A}')), + (b"kfr;", CharacterReference::Scalar('\u{1D528}')), + (b"kgreen;", CharacterReference::Scalar('\u{138}')), + (b"khcy;", CharacterReference::Scalar('\u{445}')), + (b"kjcy;", CharacterReference::Scalar('\u{45C}')), + (b"kopf;", CharacterReference::Scalar('\u{1D55C}')), + (b"kscr;", CharacterReference::Scalar('\u{1D4C0}')), + (b"lAarr;", CharacterReference::Scalar('\u{21DA}')), + (b"lArr;", CharacterReference::Scalar('\u{21D0}')), + (b"lAtail;", CharacterReference::Scalar('\u{291B}')), + (b"lBarr;", CharacterReference::Scalar('\u{290E}')), + (b"lE;", CharacterReference::Scalar('\u{2266}')), + (b"lEg;", CharacterReference::Scalar('\u{2A8B}')), + (b"lHar;", CharacterReference::Scalar('\u{2962}')), + (b"lacute;", CharacterReference::Scalar('\u{13A}')), + (b"laemptyv;", CharacterReference::Scalar('\u{29B4}')), + (b"lagran;", CharacterReference::Scalar('\u{2112}')), + (b"lambda;", CharacterReference::Scalar('\u{3BB}')), + (b"lang;", CharacterReference::Scalar('\u{27E8}')), + (b"langd;", CharacterReference::Scalar('\u{2991}')), + (b"langle;", CharacterReference::Scalar('\u{27E8}')), + (b"lap;", CharacterReference::Scalar('\u{2A85}')), + (b"laquo", CharacterReference::Scalar('\u{AB}')), + (b"laquo;", CharacterReference::Scalar('\u{AB}')), + (b"larr;", CharacterReference::Scalar('\u{2190}')), + (b"larrb;", CharacterReference::Scalar('\u{21E4}')), + (b"larrbfs;", CharacterReference::Scalar('\u{291F}')), + (b"larrfs;", CharacterReference::Scalar('\u{291D}')), + (b"larrhk;", CharacterReference::Scalar('\u{21A9}')), + (b"larrlp;", CharacterReference::Scalar('\u{21AB}')), + (b"larrpl;", CharacterReference::Scalar('\u{2939}')), + (b"larrsim;", CharacterReference::Scalar('\u{2973}')), + (b"larrtl;", CharacterReference::Scalar('\u{21A2}')), + (b"lat;", CharacterReference::Scalar('\u{2AAB}')), + (b"latail;", CharacterReference::Scalar('\u{2919}')), + (b"late;", CharacterReference::Scalar('\u{2AAD}')), + (b"lates;", CharacterReference::Text("\u{2AAD}\u{FE00}")), + (b"lbarr;", CharacterReference::Scalar('\u{290C}')), + (b"lbbrk;", CharacterReference::Scalar('\u{2772}')), + (b"lbrace;", CharacterReference::Scalar('\u{7B}')), + (b"lbrack;", CharacterReference::Scalar('\u{5B}')), + (b"lbrke;", CharacterReference::Scalar('\u{298B}')), + (b"lbrksld;", CharacterReference::Scalar('\u{298F}')), + (b"lbrkslu;", CharacterReference::Scalar('\u{298D}')), + (b"lcaron;", CharacterReference::Scalar('\u{13E}')), + (b"lcedil;", CharacterReference::Scalar('\u{13C}')), + (b"lceil;", CharacterReference::Scalar('\u{2308}')), + (b"lcub;", CharacterReference::Scalar('\u{7B}')), + (b"lcy;", CharacterReference::Scalar('\u{43B}')), + (b"ldca;", CharacterReference::Scalar('\u{2936}')), + (b"ldquo;", CharacterReference::Scalar('\u{201C}')), + (b"ldquor;", CharacterReference::Scalar('\u{201E}')), + (b"ldrdhar;", CharacterReference::Scalar('\u{2967}')), + (b"ldrushar;", CharacterReference::Scalar('\u{294B}')), + (b"ldsh;", CharacterReference::Scalar('\u{21B2}')), + (b"le;", CharacterReference::Scalar('\u{2264}')), + (b"leftarrow;", CharacterReference::Scalar('\u{2190}')), + (b"leftarrowtail;", CharacterReference::Scalar('\u{21A2}')), + (b"leftharpoondown;", CharacterReference::Scalar('\u{21BD}')), + (b"leftharpoonup;", CharacterReference::Scalar('\u{21BC}')), + (b"leftleftarrows;", CharacterReference::Scalar('\u{21C7}')), + (b"leftrightarrow;", CharacterReference::Scalar('\u{2194}')), + (b"leftrightarrows;", CharacterReference::Scalar('\u{21C6}')), + (b"leftrightharpoons;", CharacterReference::Scalar('\u{21CB}')), + (b"leftrightsquigarrow;", CharacterReference::Scalar('\u{21AD}')), + (b"leftthreetimes;", CharacterReference::Scalar('\u{22CB}')), + (b"leg;", CharacterReference::Scalar('\u{22DA}')), + (b"leq;", CharacterReference::Scalar('\u{2264}')), + (b"leqq;", CharacterReference::Scalar('\u{2266}')), + (b"leqslant;", CharacterReference::Scalar('\u{2A7D}')), + (b"les;", CharacterReference::Scalar('\u{2A7D}')), + (b"lescc;", CharacterReference::Scalar('\u{2AA8}')), + (b"lesdot;", CharacterReference::Scalar('\u{2A7F}')), + (b"lesdoto;", CharacterReference::Scalar('\u{2A81}')), + (b"lesdotor;", CharacterReference::Scalar('\u{2A83}')), + (b"lesg;", CharacterReference::Text("\u{22DA}\u{FE00}")), + (b"lesges;", CharacterReference::Scalar('\u{2A93}')), + (b"lessapprox;", CharacterReference::Scalar('\u{2A85}')), + (b"lessdot;", CharacterReference::Scalar('\u{22D6}')), + (b"lesseqgtr;", CharacterReference::Scalar('\u{22DA}')), + (b"lesseqqgtr;", CharacterReference::Scalar('\u{2A8B}')), + (b"lessgtr;", CharacterReference::Scalar('\u{2276}')), + (b"lesssim;", CharacterReference::Scalar('\u{2272}')), + (b"lfisht;", CharacterReference::Scalar('\u{297C}')), + (b"lfloor;", CharacterReference::Scalar('\u{230A}')), + (b"lfr;", CharacterReference::Scalar('\u{1D529}')), + (b"lg;", CharacterReference::Scalar('\u{2276}')), + (b"lgE;", CharacterReference::Scalar('\u{2A91}')), + (b"lhard;", CharacterReference::Scalar('\u{21BD}')), + (b"lharu;", CharacterReference::Scalar('\u{21BC}')), + (b"lharul;", CharacterReference::Scalar('\u{296A}')), + (b"lhblk;", CharacterReference::Scalar('\u{2584}')), + (b"ljcy;", CharacterReference::Scalar('\u{459}')), + (b"ll;", CharacterReference::Scalar('\u{226A}')), + (b"llarr;", CharacterReference::Scalar('\u{21C7}')), + (b"llcorner;", CharacterReference::Scalar('\u{231E}')), + (b"llhard;", CharacterReference::Scalar('\u{296B}')), + (b"lltri;", CharacterReference::Scalar('\u{25FA}')), + (b"lmidot;", CharacterReference::Scalar('\u{140}')), + (b"lmoust;", CharacterReference::Scalar('\u{23B0}')), + (b"lmoustache;", CharacterReference::Scalar('\u{23B0}')), + (b"lnE;", CharacterReference::Scalar('\u{2268}')), + (b"lnap;", CharacterReference::Scalar('\u{2A89}')), + (b"lnapprox;", CharacterReference::Scalar('\u{2A89}')), + (b"lne;", CharacterReference::Scalar('\u{2A87}')), + (b"lneq;", CharacterReference::Scalar('\u{2A87}')), + (b"lneqq;", CharacterReference::Scalar('\u{2268}')), + (b"lnsim;", CharacterReference::Scalar('\u{22E6}')), + (b"loang;", CharacterReference::Scalar('\u{27EC}')), + (b"loarr;", CharacterReference::Scalar('\u{21FD}')), + (b"lobrk;", CharacterReference::Scalar('\u{27E6}')), + (b"longleftarrow;", CharacterReference::Scalar('\u{27F5}')), + (b"longleftrightarrow;", CharacterReference::Scalar('\u{27F7}')), + (b"longmapsto;", CharacterReference::Scalar('\u{27FC}')), + (b"longrightarrow;", CharacterReference::Scalar('\u{27F6}')), + (b"looparrowleft;", CharacterReference::Scalar('\u{21AB}')), + (b"looparrowright;", CharacterReference::Scalar('\u{21AC}')), + (b"lopar;", CharacterReference::Scalar('\u{2985}')), + (b"lopf;", CharacterReference::Scalar('\u{1D55D}')), + (b"loplus;", CharacterReference::Scalar('\u{2A2D}')), + (b"lotimes;", CharacterReference::Scalar('\u{2A34}')), + (b"lowast;", CharacterReference::Scalar('\u{2217}')), + (b"lowbar;", CharacterReference::Scalar('\u{5F}')), + (b"loz;", CharacterReference::Scalar('\u{25CA}')), + (b"lozenge;", CharacterReference::Scalar('\u{25CA}')), + (b"lozf;", CharacterReference::Scalar('\u{29EB}')), + (b"lpar;", CharacterReference::Scalar('\u{28}')), + (b"lparlt;", CharacterReference::Scalar('\u{2993}')), + (b"lrarr;", CharacterReference::Scalar('\u{21C6}')), + (b"lrcorner;", CharacterReference::Scalar('\u{231F}')), + (b"lrhar;", CharacterReference::Scalar('\u{21CB}')), + (b"lrhard;", CharacterReference::Scalar('\u{296D}')), + (b"lrm;", CharacterReference::Scalar('\u{200E}')), + (b"lrtri;", CharacterReference::Scalar('\u{22BF}')), + (b"lsaquo;", CharacterReference::Scalar('\u{2039}')), + (b"lscr;", CharacterReference::Scalar('\u{1D4C1}')), + (b"lsh;", CharacterReference::Scalar('\u{21B0}')), + (b"lsim;", CharacterReference::Scalar('\u{2272}')), + (b"lsime;", CharacterReference::Scalar('\u{2A8D}')), + (b"lsimg;", CharacterReference::Scalar('\u{2A8F}')), + (b"lsqb;", CharacterReference::Scalar('\u{5B}')), + (b"lsquo;", CharacterReference::Scalar('\u{2018}')), + (b"lsquor;", CharacterReference::Scalar('\u{201A}')), + (b"lstrok;", CharacterReference::Scalar('\u{142}')), + (b"lt", CharacterReference::Scalar('\u{3C}')), + (b"lt;", CharacterReference::Scalar('\u{3C}')), + (b"ltcc;", CharacterReference::Scalar('\u{2AA6}')), + (b"ltcir;", CharacterReference::Scalar('\u{2A79}')), + (b"ltdot;", CharacterReference::Scalar('\u{22D6}')), + (b"lthree;", CharacterReference::Scalar('\u{22CB}')), + (b"ltimes;", CharacterReference::Scalar('\u{22C9}')), + (b"ltlarr;", CharacterReference::Scalar('\u{2976}')), + (b"ltquest;", CharacterReference::Scalar('\u{2A7B}')), + (b"ltrPar;", CharacterReference::Scalar('\u{2996}')), + (b"ltri;", CharacterReference::Scalar('\u{25C3}')), + (b"ltrie;", CharacterReference::Scalar('\u{22B4}')), + (b"ltrif;", CharacterReference::Scalar('\u{25C2}')), + (b"lurdshar;", CharacterReference::Scalar('\u{294A}')), + (b"luruhar;", CharacterReference::Scalar('\u{2966}')), + (b"lvertneqq;", CharacterReference::Text("\u{2268}\u{FE00}")), + (b"lvnE;", CharacterReference::Text("\u{2268}\u{FE00}")), + (b"mDDot;", CharacterReference::Scalar('\u{223A}')), + (b"macr", CharacterReference::Scalar('\u{AF}')), + (b"macr;", CharacterReference::Scalar('\u{AF}')), + (b"male;", CharacterReference::Scalar('\u{2642}')), + (b"malt;", CharacterReference::Scalar('\u{2720}')), + (b"maltese;", CharacterReference::Scalar('\u{2720}')), + (b"map;", CharacterReference::Scalar('\u{21A6}')), + (b"mapsto;", CharacterReference::Scalar('\u{21A6}')), + (b"mapstodown;", CharacterReference::Scalar('\u{21A7}')), + (b"mapstoleft;", CharacterReference::Scalar('\u{21A4}')), + (b"mapstoup;", CharacterReference::Scalar('\u{21A5}')), + (b"marker;", CharacterReference::Scalar('\u{25AE}')), + (b"mcomma;", CharacterReference::Scalar('\u{2A29}')), + (b"mcy;", CharacterReference::Scalar('\u{43C}')), + (b"mdash;", CharacterReference::Scalar('\u{2014}')), + (b"measuredangle;", CharacterReference::Scalar('\u{2221}')), + (b"mfr;", CharacterReference::Scalar('\u{1D52A}')), + (b"mho;", CharacterReference::Scalar('\u{2127}')), + (b"micro", CharacterReference::Scalar('\u{B5}')), + (b"micro;", CharacterReference::Scalar('\u{B5}')), + (b"mid;", CharacterReference::Scalar('\u{2223}')), + (b"midast;", CharacterReference::Scalar('\u{2A}')), + (b"midcir;", CharacterReference::Scalar('\u{2AF0}')), + (b"middot", CharacterReference::Scalar('\u{B7}')), + (b"middot;", CharacterReference::Scalar('\u{B7}')), + (b"minus;", CharacterReference::Scalar('\u{2212}')), + (b"minusb;", CharacterReference::Scalar('\u{229F}')), + (b"minusd;", CharacterReference::Scalar('\u{2238}')), + (b"minusdu;", CharacterReference::Scalar('\u{2A2A}')), + (b"mlcp;", CharacterReference::Scalar('\u{2ADB}')), + (b"mldr;", CharacterReference::Scalar('\u{2026}')), + (b"mnplus;", CharacterReference::Scalar('\u{2213}')), + (b"models;", CharacterReference::Scalar('\u{22A7}')), + (b"mopf;", CharacterReference::Scalar('\u{1D55E}')), + (b"mp;", CharacterReference::Scalar('\u{2213}')), + (b"mscr;", CharacterReference::Scalar('\u{1D4C2}')), + (b"mstpos;", CharacterReference::Scalar('\u{223E}')), + (b"mu;", CharacterReference::Scalar('\u{3BC}')), + (b"multimap;", CharacterReference::Scalar('\u{22B8}')), + (b"mumap;", CharacterReference::Scalar('\u{22B8}')), + (b"nGg;", CharacterReference::Text("\u{22D9}\u{338}")), + (b"nGt;", CharacterReference::Text("\u{226B}\u{20D2}")), + (b"nGtv;", CharacterReference::Text("\u{226B}\u{338}")), + (b"nLeftarrow;", CharacterReference::Scalar('\u{21CD}')), + (b"nLeftrightarrow;", CharacterReference::Scalar('\u{21CE}')), + (b"nLl;", CharacterReference::Text("\u{22D8}\u{338}")), + (b"nLt;", CharacterReference::Text("\u{226A}\u{20D2}")), + (b"nLtv;", CharacterReference::Text("\u{226A}\u{338}")), + (b"nRightarrow;", CharacterReference::Scalar('\u{21CF}')), + (b"nVDash;", CharacterReference::Scalar('\u{22AF}')), + (b"nVdash;", CharacterReference::Scalar('\u{22AE}')), + (b"nabla;", CharacterReference::Scalar('\u{2207}')), + (b"nacute;", CharacterReference::Scalar('\u{144}')), + (b"nang;", CharacterReference::Text("\u{2220}\u{20D2}")), + (b"nap;", CharacterReference::Scalar('\u{2249}')), + (b"napE;", CharacterReference::Text("\u{2A70}\u{338}")), + (b"napid;", CharacterReference::Text("\u{224B}\u{338}")), + (b"napos;", CharacterReference::Scalar('\u{149}')), + (b"napprox;", CharacterReference::Scalar('\u{2249}')), + (b"natur;", CharacterReference::Scalar('\u{266E}')), + (b"natural;", CharacterReference::Scalar('\u{266E}')), + (b"naturals;", CharacterReference::Scalar('\u{2115}')), + (b"nbsp", CharacterReference::Scalar('\u{A0}')), + (b"nbsp;", CharacterReference::Scalar('\u{A0}')), + (b"nbump;", CharacterReference::Text("\u{224E}\u{338}")), + (b"nbumpe;", CharacterReference::Text("\u{224F}\u{338}")), + (b"ncap;", CharacterReference::Scalar('\u{2A43}')), + (b"ncaron;", CharacterReference::Scalar('\u{148}')), + (b"ncedil;", CharacterReference::Scalar('\u{146}')), + (b"ncong;", CharacterReference::Scalar('\u{2247}')), + (b"ncongdot;", CharacterReference::Text("\u{2A6D}\u{338}")), + (b"ncup;", CharacterReference::Scalar('\u{2A42}')), + (b"ncy;", CharacterReference::Scalar('\u{43D}')), + (b"ndash;", CharacterReference::Scalar('\u{2013}')), + (b"ne;", CharacterReference::Scalar('\u{2260}')), + (b"neArr;", CharacterReference::Scalar('\u{21D7}')), + (b"nearhk;", CharacterReference::Scalar('\u{2924}')), + (b"nearr;", CharacterReference::Scalar('\u{2197}')), + (b"nearrow;", CharacterReference::Scalar('\u{2197}')), + (b"nedot;", CharacterReference::Text("\u{2250}\u{338}")), + (b"nequiv;", CharacterReference::Scalar('\u{2262}')), + (b"nesear;", CharacterReference::Scalar('\u{2928}')), + (b"nesim;", CharacterReference::Text("\u{2242}\u{338}")), + (b"nexist;", CharacterReference::Scalar('\u{2204}')), + (b"nexists;", CharacterReference::Scalar('\u{2204}')), + (b"nfr;", CharacterReference::Scalar('\u{1D52B}')), + (b"ngE;", CharacterReference::Text("\u{2267}\u{338}")), + (b"nge;", CharacterReference::Scalar('\u{2271}')), + (b"ngeq;", CharacterReference::Scalar('\u{2271}')), + (b"ngeqq;", CharacterReference::Text("\u{2267}\u{338}")), + (b"ngeqslant;", CharacterReference::Text("\u{2A7E}\u{338}")), + (b"nges;", CharacterReference::Text("\u{2A7E}\u{338}")), + (b"ngsim;", CharacterReference::Scalar('\u{2275}')), + (b"ngt;", CharacterReference::Scalar('\u{226F}')), + (b"ngtr;", CharacterReference::Scalar('\u{226F}')), + (b"nhArr;", CharacterReference::Scalar('\u{21CE}')), + (b"nharr;", CharacterReference::Scalar('\u{21AE}')), + (b"nhpar;", CharacterReference::Scalar('\u{2AF2}')), + (b"ni;", CharacterReference::Scalar('\u{220B}')), + (b"nis;", CharacterReference::Scalar('\u{22FC}')), + (b"nisd;", CharacterReference::Scalar('\u{22FA}')), + (b"niv;", CharacterReference::Scalar('\u{220B}')), + (b"njcy;", CharacterReference::Scalar('\u{45A}')), + (b"nlArr;", CharacterReference::Scalar('\u{21CD}')), + (b"nlE;", CharacterReference::Text("\u{2266}\u{338}")), + (b"nlarr;", CharacterReference::Scalar('\u{219A}')), + (b"nldr;", CharacterReference::Scalar('\u{2025}')), + (b"nle;", CharacterReference::Scalar('\u{2270}')), + (b"nleftarrow;", CharacterReference::Scalar('\u{219A}')), + (b"nleftrightarrow;", CharacterReference::Scalar('\u{21AE}')), + (b"nleq;", CharacterReference::Scalar('\u{2270}')), + (b"nleqq;", CharacterReference::Text("\u{2266}\u{338}")), + (b"nleqslant;", CharacterReference::Text("\u{2A7D}\u{338}")), + (b"nles;", CharacterReference::Text("\u{2A7D}\u{338}")), + (b"nless;", CharacterReference::Scalar('\u{226E}')), + (b"nlsim;", CharacterReference::Scalar('\u{2274}')), + (b"nlt;", CharacterReference::Scalar('\u{226E}')), + (b"nltri;", CharacterReference::Scalar('\u{22EA}')), + (b"nltrie;", CharacterReference::Scalar('\u{22EC}')), + (b"nmid;", CharacterReference::Scalar('\u{2224}')), + (b"nopf;", CharacterReference::Scalar('\u{1D55F}')), + (b"not", CharacterReference::Scalar('\u{AC}')), + (b"not;", CharacterReference::Scalar('\u{AC}')), + (b"notin;", CharacterReference::Scalar('\u{2209}')), + (b"notinE;", CharacterReference::Text("\u{22F9}\u{338}")), + (b"notindot;", CharacterReference::Text("\u{22F5}\u{338}")), + (b"notinva;", CharacterReference::Scalar('\u{2209}')), + (b"notinvb;", CharacterReference::Scalar('\u{22F7}')), + (b"notinvc;", CharacterReference::Scalar('\u{22F6}')), + (b"notni;", CharacterReference::Scalar('\u{220C}')), + (b"notniva;", CharacterReference::Scalar('\u{220C}')), + (b"notnivb;", CharacterReference::Scalar('\u{22FE}')), + (b"notnivc;", CharacterReference::Scalar('\u{22FD}')), + (b"npar;", CharacterReference::Scalar('\u{2226}')), + (b"nparallel;", CharacterReference::Scalar('\u{2226}')), + (b"nparsl;", CharacterReference::Text("\u{2AFD}\u{20E5}")), + (b"npart;", CharacterReference::Text("\u{2202}\u{338}")), + (b"npolint;", CharacterReference::Scalar('\u{2A14}')), + (b"npr;", CharacterReference::Scalar('\u{2280}')), + (b"nprcue;", CharacterReference::Scalar('\u{22E0}')), + (b"npre;", CharacterReference::Text("\u{2AAF}\u{338}")), + (b"nprec;", CharacterReference::Scalar('\u{2280}')), + (b"npreceq;", CharacterReference::Text("\u{2AAF}\u{338}")), + (b"nrArr;", CharacterReference::Scalar('\u{21CF}')), + (b"nrarr;", CharacterReference::Scalar('\u{219B}')), + (b"nrarrc;", CharacterReference::Text("\u{2933}\u{338}")), + (b"nrarrw;", CharacterReference::Text("\u{219D}\u{338}")), + (b"nrightarrow;", CharacterReference::Scalar('\u{219B}')), + (b"nrtri;", CharacterReference::Scalar('\u{22EB}')), + (b"nrtrie;", CharacterReference::Scalar('\u{22ED}')), + (b"nsc;", CharacterReference::Scalar('\u{2281}')), + (b"nsccue;", CharacterReference::Scalar('\u{22E1}')), + (b"nsce;", CharacterReference::Text("\u{2AB0}\u{338}")), + (b"nscr;", CharacterReference::Scalar('\u{1D4C3}')), + (b"nshortmid;", CharacterReference::Scalar('\u{2224}')), + (b"nshortparallel;", CharacterReference::Scalar('\u{2226}')), + (b"nsim;", CharacterReference::Scalar('\u{2241}')), + (b"nsime;", CharacterReference::Scalar('\u{2244}')), + (b"nsimeq;", CharacterReference::Scalar('\u{2244}')), + (b"nsmid;", CharacterReference::Scalar('\u{2224}')), + (b"nspar;", CharacterReference::Scalar('\u{2226}')), + (b"nsqsube;", CharacterReference::Scalar('\u{22E2}')), + (b"nsqsupe;", CharacterReference::Scalar('\u{22E3}')), + (b"nsub;", CharacterReference::Scalar('\u{2284}')), + (b"nsubE;", CharacterReference::Text("\u{2AC5}\u{338}")), + (b"nsube;", CharacterReference::Scalar('\u{2288}')), + (b"nsubset;", CharacterReference::Text("\u{2282}\u{20D2}")), + (b"nsubseteq;", CharacterReference::Scalar('\u{2288}')), + (b"nsubseteqq;", CharacterReference::Text("\u{2AC5}\u{338}")), + (b"nsucc;", CharacterReference::Scalar('\u{2281}')), + (b"nsucceq;", CharacterReference::Text("\u{2AB0}\u{338}")), + (b"nsup;", CharacterReference::Scalar('\u{2285}')), + (b"nsupE;", CharacterReference::Text("\u{2AC6}\u{338}")), + (b"nsupe;", CharacterReference::Scalar('\u{2289}')), + (b"nsupset;", CharacterReference::Text("\u{2283}\u{20D2}")), + (b"nsupseteq;", CharacterReference::Scalar('\u{2289}')), + (b"nsupseteqq;", CharacterReference::Text("\u{2AC6}\u{338}")), + (b"ntgl;", CharacterReference::Scalar('\u{2279}')), + (b"ntilde", CharacterReference::Scalar('\u{F1}')), + (b"ntilde;", CharacterReference::Scalar('\u{F1}')), + (b"ntlg;", CharacterReference::Scalar('\u{2278}')), + (b"ntriangleleft;", CharacterReference::Scalar('\u{22EA}')), + (b"ntrianglelefteq;", CharacterReference::Scalar('\u{22EC}')), + (b"ntriangleright;", CharacterReference::Scalar('\u{22EB}')), + (b"ntrianglerighteq;", CharacterReference::Scalar('\u{22ED}')), + (b"nu;", CharacterReference::Scalar('\u{3BD}')), + (b"num;", CharacterReference::Scalar('\u{23}')), + (b"numero;", CharacterReference::Scalar('\u{2116}')), + (b"numsp;", CharacterReference::Scalar('\u{2007}')), + (b"nvDash;", CharacterReference::Scalar('\u{22AD}')), + (b"nvHarr;", CharacterReference::Scalar('\u{2904}')), + (b"nvap;", CharacterReference::Text("\u{224D}\u{20D2}")), + (b"nvdash;", CharacterReference::Scalar('\u{22AC}')), + (b"nvge;", CharacterReference::Text("\u{2265}\u{20D2}")), + (b"nvgt;", CharacterReference::Text("\u{3E}\u{20D2}")), + (b"nvinfin;", CharacterReference::Scalar('\u{29DE}')), + (b"nvlArr;", CharacterReference::Scalar('\u{2902}')), + (b"nvle;", CharacterReference::Text("\u{2264}\u{20D2}")), + (b"nvlt;", CharacterReference::Text("\u{3C}\u{20D2}")), + (b"nvltrie;", CharacterReference::Text("\u{22B4}\u{20D2}")), + (b"nvrArr;", CharacterReference::Scalar('\u{2903}')), + (b"nvrtrie;", CharacterReference::Text("\u{22B5}\u{20D2}")), + (b"nvsim;", CharacterReference::Text("\u{223C}\u{20D2}")), + (b"nwArr;", CharacterReference::Scalar('\u{21D6}')), + (b"nwarhk;", CharacterReference::Scalar('\u{2923}')), + (b"nwarr;", CharacterReference::Scalar('\u{2196}')), + (b"nwarrow;", CharacterReference::Scalar('\u{2196}')), + (b"nwnear;", CharacterReference::Scalar('\u{2927}')), + (b"oS;", CharacterReference::Scalar('\u{24C8}')), + (b"oacute", CharacterReference::Scalar('\u{F3}')), + (b"oacute;", CharacterReference::Scalar('\u{F3}')), + (b"oast;", CharacterReference::Scalar('\u{229B}')), + (b"ocir;", CharacterReference::Scalar('\u{229A}')), + (b"ocirc", CharacterReference::Scalar('\u{F4}')), + (b"ocirc;", CharacterReference::Scalar('\u{F4}')), + (b"ocy;", CharacterReference::Scalar('\u{43E}')), + (b"odash;", CharacterReference::Scalar('\u{229D}')), + (b"odblac;", CharacterReference::Scalar('\u{151}')), + (b"odiv;", CharacterReference::Scalar('\u{2A38}')), + (b"odot;", CharacterReference::Scalar('\u{2299}')), + (b"odsold;", CharacterReference::Scalar('\u{29BC}')), + (b"oelig;", CharacterReference::Scalar('\u{153}')), + (b"ofcir;", CharacterReference::Scalar('\u{29BF}')), + (b"ofr;", CharacterReference::Scalar('\u{1D52C}')), + (b"ogon;", CharacterReference::Scalar('\u{2DB}')), + (b"ograve", CharacterReference::Scalar('\u{F2}')), + (b"ograve;", CharacterReference::Scalar('\u{F2}')), + (b"ogt;", CharacterReference::Scalar('\u{29C1}')), + (b"ohbar;", CharacterReference::Scalar('\u{29B5}')), + (b"ohm;", CharacterReference::Scalar('\u{3A9}')), + (b"oint;", CharacterReference::Scalar('\u{222E}')), + (b"olarr;", CharacterReference::Scalar('\u{21BA}')), + (b"olcir;", CharacterReference::Scalar('\u{29BE}')), + (b"olcross;", CharacterReference::Scalar('\u{29BB}')), + (b"oline;", CharacterReference::Scalar('\u{203E}')), + (b"olt;", CharacterReference::Scalar('\u{29C0}')), + (b"omacr;", CharacterReference::Scalar('\u{14D}')), + (b"omega;", CharacterReference::Scalar('\u{3C9}')), + (b"omicron;", CharacterReference::Scalar('\u{3BF}')), + (b"omid;", CharacterReference::Scalar('\u{29B6}')), + (b"ominus;", CharacterReference::Scalar('\u{2296}')), + (b"oopf;", CharacterReference::Scalar('\u{1D560}')), + (b"opar;", CharacterReference::Scalar('\u{29B7}')), + (b"operp;", CharacterReference::Scalar('\u{29B9}')), + (b"oplus;", CharacterReference::Scalar('\u{2295}')), + (b"or;", CharacterReference::Scalar('\u{2228}')), + (b"orarr;", CharacterReference::Scalar('\u{21BB}')), + (b"ord;", CharacterReference::Scalar('\u{2A5D}')), + (b"order;", CharacterReference::Scalar('\u{2134}')), + (b"orderof;", CharacterReference::Scalar('\u{2134}')), + (b"ordf", CharacterReference::Scalar('\u{AA}')), + (b"ordf;", CharacterReference::Scalar('\u{AA}')), + (b"ordm", CharacterReference::Scalar('\u{BA}')), + (b"ordm;", CharacterReference::Scalar('\u{BA}')), + (b"origof;", CharacterReference::Scalar('\u{22B6}')), + (b"oror;", CharacterReference::Scalar('\u{2A56}')), + (b"orslope;", CharacterReference::Scalar('\u{2A57}')), + (b"orv;", CharacterReference::Scalar('\u{2A5B}')), + (b"oscr;", CharacterReference::Scalar('\u{2134}')), + (b"oslash", CharacterReference::Scalar('\u{F8}')), + (b"oslash;", CharacterReference::Scalar('\u{F8}')), + (b"osol;", CharacterReference::Scalar('\u{2298}')), + (b"otilde", CharacterReference::Scalar('\u{F5}')), + (b"otilde;", CharacterReference::Scalar('\u{F5}')), + (b"otimes;", CharacterReference::Scalar('\u{2297}')), + (b"otimesas;", CharacterReference::Scalar('\u{2A36}')), + (b"ouml", CharacterReference::Scalar('\u{F6}')), + (b"ouml;", CharacterReference::Scalar('\u{F6}')), + (b"ovbar;", CharacterReference::Scalar('\u{233D}')), + (b"par;", CharacterReference::Scalar('\u{2225}')), + (b"para", CharacterReference::Scalar('\u{B6}')), + (b"para;", CharacterReference::Scalar('\u{B6}')), + (b"parallel;", CharacterReference::Scalar('\u{2225}')), + (b"parsim;", CharacterReference::Scalar('\u{2AF3}')), + (b"parsl;", CharacterReference::Scalar('\u{2AFD}')), + (b"part;", CharacterReference::Scalar('\u{2202}')), + (b"pcy;", CharacterReference::Scalar('\u{43F}')), + (b"percnt;", CharacterReference::Scalar('\u{25}')), + (b"period;", CharacterReference::Scalar('\u{2E}')), + (b"permil;", CharacterReference::Scalar('\u{2030}')), + (b"perp;", CharacterReference::Scalar('\u{22A5}')), + (b"pertenk;", CharacterReference::Scalar('\u{2031}')), + (b"pfr;", CharacterReference::Scalar('\u{1D52D}')), + (b"phi;", CharacterReference::Scalar('\u{3C6}')), + (b"phiv;", CharacterReference::Scalar('\u{3D5}')), + (b"phmmat;", CharacterReference::Scalar('\u{2133}')), + (b"phone;", CharacterReference::Scalar('\u{260E}')), + (b"pi;", CharacterReference::Scalar('\u{3C0}')), + (b"pitchfork;", CharacterReference::Scalar('\u{22D4}')), + (b"piv;", CharacterReference::Scalar('\u{3D6}')), + (b"planck;", CharacterReference::Scalar('\u{210F}')), + (b"planckh;", CharacterReference::Scalar('\u{210E}')), + (b"plankv;", CharacterReference::Scalar('\u{210F}')), + (b"plus;", CharacterReference::Scalar('\u{2B}')), + (b"plusacir;", CharacterReference::Scalar('\u{2A23}')), + (b"plusb;", CharacterReference::Scalar('\u{229E}')), + (b"pluscir;", CharacterReference::Scalar('\u{2A22}')), + (b"plusdo;", CharacterReference::Scalar('\u{2214}')), + (b"plusdu;", CharacterReference::Scalar('\u{2A25}')), + (b"pluse;", CharacterReference::Scalar('\u{2A72}')), + (b"plusmn", CharacterReference::Scalar('\u{B1}')), + (b"plusmn;", CharacterReference::Scalar('\u{B1}')), + (b"plussim;", CharacterReference::Scalar('\u{2A26}')), + (b"plustwo;", CharacterReference::Scalar('\u{2A27}')), + (b"pm;", CharacterReference::Scalar('\u{B1}')), + (b"pointint;", CharacterReference::Scalar('\u{2A15}')), + (b"popf;", CharacterReference::Scalar('\u{1D561}')), + (b"pound", CharacterReference::Scalar('\u{A3}')), + (b"pound;", CharacterReference::Scalar('\u{A3}')), + (b"pr;", CharacterReference::Scalar('\u{227A}')), + (b"prE;", CharacterReference::Scalar('\u{2AB3}')), + (b"prap;", CharacterReference::Scalar('\u{2AB7}')), + (b"prcue;", CharacterReference::Scalar('\u{227C}')), + (b"pre;", CharacterReference::Scalar('\u{2AAF}')), + (b"prec;", CharacterReference::Scalar('\u{227A}')), + (b"precapprox;", CharacterReference::Scalar('\u{2AB7}')), + (b"preccurlyeq;", CharacterReference::Scalar('\u{227C}')), + (b"preceq;", CharacterReference::Scalar('\u{2AAF}')), + (b"precnapprox;", CharacterReference::Scalar('\u{2AB9}')), + (b"precneqq;", CharacterReference::Scalar('\u{2AB5}')), + (b"precnsim;", CharacterReference::Scalar('\u{22E8}')), + (b"precsim;", CharacterReference::Scalar('\u{227E}')), + (b"prime;", CharacterReference::Scalar('\u{2032}')), + (b"primes;", CharacterReference::Scalar('\u{2119}')), + (b"prnE;", CharacterReference::Scalar('\u{2AB5}')), + (b"prnap;", CharacterReference::Scalar('\u{2AB9}')), + (b"prnsim;", CharacterReference::Scalar('\u{22E8}')), + (b"prod;", CharacterReference::Scalar('\u{220F}')), + (b"profalar;", CharacterReference::Scalar('\u{232E}')), + (b"profline;", CharacterReference::Scalar('\u{2312}')), + (b"profsurf;", CharacterReference::Scalar('\u{2313}')), + (b"prop;", CharacterReference::Scalar('\u{221D}')), + (b"propto;", CharacterReference::Scalar('\u{221D}')), + (b"prsim;", CharacterReference::Scalar('\u{227E}')), + (b"prurel;", CharacterReference::Scalar('\u{22B0}')), + (b"pscr;", CharacterReference::Scalar('\u{1D4C5}')), + (b"psi;", CharacterReference::Scalar('\u{3C8}')), + (b"puncsp;", CharacterReference::Scalar('\u{2008}')), + (b"qfr;", CharacterReference::Scalar('\u{1D52E}')), + (b"qint;", CharacterReference::Scalar('\u{2A0C}')), + (b"qopf;", CharacterReference::Scalar('\u{1D562}')), + (b"qprime;", CharacterReference::Scalar('\u{2057}')), + (b"qscr;", CharacterReference::Scalar('\u{1D4C6}')), + (b"quaternions;", CharacterReference::Scalar('\u{210D}')), + (b"quatint;", CharacterReference::Scalar('\u{2A16}')), + (b"quest;", CharacterReference::Scalar('\u{3F}')), + (b"questeq;", CharacterReference::Scalar('\u{225F}')), + (b"quot", CharacterReference::Scalar('\u{22}')), + (b"quot;", CharacterReference::Scalar('\u{22}')), + (b"rAarr;", CharacterReference::Scalar('\u{21DB}')), + (b"rArr;", CharacterReference::Scalar('\u{21D2}')), + (b"rAtail;", CharacterReference::Scalar('\u{291C}')), + (b"rBarr;", CharacterReference::Scalar('\u{290F}')), + (b"rHar;", CharacterReference::Scalar('\u{2964}')), + (b"race;", CharacterReference::Text("\u{223D}\u{331}")), + (b"racute;", CharacterReference::Scalar('\u{155}')), + (b"radic;", CharacterReference::Scalar('\u{221A}')), + (b"raemptyv;", CharacterReference::Scalar('\u{29B3}')), + (b"rang;", CharacterReference::Scalar('\u{27E9}')), + (b"rangd;", CharacterReference::Scalar('\u{2992}')), + (b"range;", CharacterReference::Scalar('\u{29A5}')), + (b"rangle;", CharacterReference::Scalar('\u{27E9}')), + (b"raquo", CharacterReference::Scalar('\u{BB}')), + (b"raquo;", CharacterReference::Scalar('\u{BB}')), + (b"rarr;", CharacterReference::Scalar('\u{2192}')), + (b"rarrap;", CharacterReference::Scalar('\u{2975}')), + (b"rarrb;", CharacterReference::Scalar('\u{21E5}')), + (b"rarrbfs;", CharacterReference::Scalar('\u{2920}')), + (b"rarrc;", CharacterReference::Scalar('\u{2933}')), + (b"rarrfs;", CharacterReference::Scalar('\u{291E}')), + (b"rarrhk;", CharacterReference::Scalar('\u{21AA}')), + (b"rarrlp;", CharacterReference::Scalar('\u{21AC}')), + (b"rarrpl;", CharacterReference::Scalar('\u{2945}')), + (b"rarrsim;", CharacterReference::Scalar('\u{2974}')), + (b"rarrtl;", CharacterReference::Scalar('\u{21A3}')), + (b"rarrw;", CharacterReference::Scalar('\u{219D}')), + (b"ratail;", CharacterReference::Scalar('\u{291A}')), + (b"ratio;", CharacterReference::Scalar('\u{2236}')), + (b"rationals;", CharacterReference::Scalar('\u{211A}')), + (b"rbarr;", CharacterReference::Scalar('\u{290D}')), + (b"rbbrk;", CharacterReference::Scalar('\u{2773}')), + (b"rbrace;", CharacterReference::Scalar('\u{7D}')), + (b"rbrack;", CharacterReference::Scalar('\u{5D}')), + (b"rbrke;", CharacterReference::Scalar('\u{298C}')), + (b"rbrksld;", CharacterReference::Scalar('\u{298E}')), + (b"rbrkslu;", CharacterReference::Scalar('\u{2990}')), + (b"rcaron;", CharacterReference::Scalar('\u{159}')), + (b"rcedil;", CharacterReference::Scalar('\u{157}')), + (b"rceil;", CharacterReference::Scalar('\u{2309}')), + (b"rcub;", CharacterReference::Scalar('\u{7D}')), + (b"rcy;", CharacterReference::Scalar('\u{440}')), + (b"rdca;", CharacterReference::Scalar('\u{2937}')), + (b"rdldhar;", CharacterReference::Scalar('\u{2969}')), + (b"rdquo;", CharacterReference::Scalar('\u{201D}')), + (b"rdquor;", CharacterReference::Scalar('\u{201D}')), + (b"rdsh;", CharacterReference::Scalar('\u{21B3}')), + (b"real;", CharacterReference::Scalar('\u{211C}')), + (b"realine;", CharacterReference::Scalar('\u{211B}')), + (b"realpart;", CharacterReference::Scalar('\u{211C}')), + (b"reals;", CharacterReference::Scalar('\u{211D}')), + (b"rect;", CharacterReference::Scalar('\u{25AD}')), + (b"reg", CharacterReference::Scalar('\u{AE}')), + (b"reg;", CharacterReference::Scalar('\u{AE}')), + (b"rfisht;", CharacterReference::Scalar('\u{297D}')), + (b"rfloor;", CharacterReference::Scalar('\u{230B}')), + (b"rfr;", CharacterReference::Scalar('\u{1D52F}')), + (b"rhard;", CharacterReference::Scalar('\u{21C1}')), + (b"rharu;", CharacterReference::Scalar('\u{21C0}')), + (b"rharul;", CharacterReference::Scalar('\u{296C}')), + (b"rho;", CharacterReference::Scalar('\u{3C1}')), + (b"rhov;", CharacterReference::Scalar('\u{3F1}')), + (b"rightarrow;", CharacterReference::Scalar('\u{2192}')), + (b"rightarrowtail;", CharacterReference::Scalar('\u{21A3}')), + (b"rightharpoondown;", CharacterReference::Scalar('\u{21C1}')), + (b"rightharpoonup;", CharacterReference::Scalar('\u{21C0}')), + (b"rightleftarrows;", CharacterReference::Scalar('\u{21C4}')), + (b"rightleftharpoons;", CharacterReference::Scalar('\u{21CC}')), + (b"rightrightarrows;", CharacterReference::Scalar('\u{21C9}')), + (b"rightsquigarrow;", CharacterReference::Scalar('\u{219D}')), + (b"rightthreetimes;", CharacterReference::Scalar('\u{22CC}')), + (b"ring;", CharacterReference::Scalar('\u{2DA}')), + (b"risingdotseq;", CharacterReference::Scalar('\u{2253}')), + (b"rlarr;", CharacterReference::Scalar('\u{21C4}')), + (b"rlhar;", CharacterReference::Scalar('\u{21CC}')), + (b"rlm;", CharacterReference::Scalar('\u{200F}')), + (b"rmoust;", CharacterReference::Scalar('\u{23B1}')), + (b"rmoustache;", CharacterReference::Scalar('\u{23B1}')), + (b"rnmid;", CharacterReference::Scalar('\u{2AEE}')), + (b"roang;", CharacterReference::Scalar('\u{27ED}')), + (b"roarr;", CharacterReference::Scalar('\u{21FE}')), + (b"robrk;", CharacterReference::Scalar('\u{27E7}')), + (b"ropar;", CharacterReference::Scalar('\u{2986}')), + (b"ropf;", CharacterReference::Scalar('\u{1D563}')), + (b"roplus;", CharacterReference::Scalar('\u{2A2E}')), + (b"rotimes;", CharacterReference::Scalar('\u{2A35}')), + (b"rpar;", CharacterReference::Scalar('\u{29}')), + (b"rpargt;", CharacterReference::Scalar('\u{2994}')), + (b"rppolint;", CharacterReference::Scalar('\u{2A12}')), + (b"rrarr;", CharacterReference::Scalar('\u{21C9}')), + (b"rsaquo;", CharacterReference::Scalar('\u{203A}')), + (b"rscr;", CharacterReference::Scalar('\u{1D4C7}')), + (b"rsh;", CharacterReference::Scalar('\u{21B1}')), + (b"rsqb;", CharacterReference::Scalar('\u{5D}')), + (b"rsquo;", CharacterReference::Scalar('\u{2019}')), + (b"rsquor;", CharacterReference::Scalar('\u{2019}')), + (b"rthree;", CharacterReference::Scalar('\u{22CC}')), + (b"rtimes;", CharacterReference::Scalar('\u{22CA}')), + (b"rtri;", CharacterReference::Scalar('\u{25B9}')), + (b"rtrie;", CharacterReference::Scalar('\u{22B5}')), + (b"rtrif;", CharacterReference::Scalar('\u{25B8}')), + (b"rtriltri;", CharacterReference::Scalar('\u{29CE}')), + (b"ruluhar;", CharacterReference::Scalar('\u{2968}')), + (b"rx;", CharacterReference::Scalar('\u{211E}')), + (b"sacute;", CharacterReference::Scalar('\u{15B}')), + (b"sbquo;", CharacterReference::Scalar('\u{201A}')), + (b"sc;", CharacterReference::Scalar('\u{227B}')), + (b"scE;", CharacterReference::Scalar('\u{2AB4}')), + (b"scap;", CharacterReference::Scalar('\u{2AB8}')), + (b"scaron;", CharacterReference::Scalar('\u{161}')), + (b"sccue;", CharacterReference::Scalar('\u{227D}')), + (b"sce;", CharacterReference::Scalar('\u{2AB0}')), + (b"scedil;", CharacterReference::Scalar('\u{15F}')), + (b"scirc;", CharacterReference::Scalar('\u{15D}')), + (b"scnE;", CharacterReference::Scalar('\u{2AB6}')), + (b"scnap;", CharacterReference::Scalar('\u{2ABA}')), + (b"scnsim;", CharacterReference::Scalar('\u{22E9}')), + (b"scpolint;", CharacterReference::Scalar('\u{2A13}')), + (b"scsim;", CharacterReference::Scalar('\u{227F}')), + (b"scy;", CharacterReference::Scalar('\u{441}')), + (b"sdot;", CharacterReference::Scalar('\u{22C5}')), + (b"sdotb;", CharacterReference::Scalar('\u{22A1}')), + (b"sdote;", CharacterReference::Scalar('\u{2A66}')), + (b"seArr;", CharacterReference::Scalar('\u{21D8}')), + (b"searhk;", CharacterReference::Scalar('\u{2925}')), + (b"searr;", CharacterReference::Scalar('\u{2198}')), + (b"searrow;", CharacterReference::Scalar('\u{2198}')), + (b"sect", CharacterReference::Scalar('\u{A7}')), + (b"sect;", CharacterReference::Scalar('\u{A7}')), + (b"semi;", CharacterReference::Scalar('\u{3B}')), + (b"seswar;", CharacterReference::Scalar('\u{2929}')), + (b"setminus;", CharacterReference::Scalar('\u{2216}')), + (b"setmn;", CharacterReference::Scalar('\u{2216}')), + (b"sext;", CharacterReference::Scalar('\u{2736}')), + (b"sfr;", CharacterReference::Scalar('\u{1D530}')), + (b"sfrown;", CharacterReference::Scalar('\u{2322}')), + (b"sharp;", CharacterReference::Scalar('\u{266F}')), + (b"shchcy;", CharacterReference::Scalar('\u{449}')), + (b"shcy;", CharacterReference::Scalar('\u{448}')), + (b"shortmid;", CharacterReference::Scalar('\u{2223}')), + (b"shortparallel;", CharacterReference::Scalar('\u{2225}')), + (b"shy", CharacterReference::Scalar('\u{AD}')), + (b"shy;", CharacterReference::Scalar('\u{AD}')), + (b"sigma;", CharacterReference::Scalar('\u{3C3}')), + (b"sigmaf;", CharacterReference::Scalar('\u{3C2}')), + (b"sigmav;", CharacterReference::Scalar('\u{3C2}')), + (b"sim;", CharacterReference::Scalar('\u{223C}')), + (b"simdot;", CharacterReference::Scalar('\u{2A6A}')), + (b"sime;", CharacterReference::Scalar('\u{2243}')), + (b"simeq;", CharacterReference::Scalar('\u{2243}')), + (b"simg;", CharacterReference::Scalar('\u{2A9E}')), + (b"simgE;", CharacterReference::Scalar('\u{2AA0}')), + (b"siml;", CharacterReference::Scalar('\u{2A9D}')), + (b"simlE;", CharacterReference::Scalar('\u{2A9F}')), + (b"simne;", CharacterReference::Scalar('\u{2246}')), + (b"simplus;", CharacterReference::Scalar('\u{2A24}')), + (b"simrarr;", CharacterReference::Scalar('\u{2972}')), + (b"slarr;", CharacterReference::Scalar('\u{2190}')), + (b"smallsetminus;", CharacterReference::Scalar('\u{2216}')), + (b"smashp;", CharacterReference::Scalar('\u{2A33}')), + (b"smeparsl;", CharacterReference::Scalar('\u{29E4}')), + (b"smid;", CharacterReference::Scalar('\u{2223}')), + (b"smile;", CharacterReference::Scalar('\u{2323}')), + (b"smt;", CharacterReference::Scalar('\u{2AAA}')), + (b"smte;", CharacterReference::Scalar('\u{2AAC}')), + (b"smtes;", CharacterReference::Text("\u{2AAC}\u{FE00}")), + (b"softcy;", CharacterReference::Scalar('\u{44C}')), + (b"sol;", CharacterReference::Scalar('\u{2F}')), + (b"solb;", CharacterReference::Scalar('\u{29C4}')), + (b"solbar;", CharacterReference::Scalar('\u{233F}')), + (b"sopf;", CharacterReference::Scalar('\u{1D564}')), + (b"spades;", CharacterReference::Scalar('\u{2660}')), + (b"spadesuit;", CharacterReference::Scalar('\u{2660}')), + (b"spar;", CharacterReference::Scalar('\u{2225}')), + (b"sqcap;", CharacterReference::Scalar('\u{2293}')), + (b"sqcaps;", CharacterReference::Text("\u{2293}\u{FE00}")), + (b"sqcup;", CharacterReference::Scalar('\u{2294}')), + (b"sqcups;", CharacterReference::Text("\u{2294}\u{FE00}")), + (b"sqsub;", CharacterReference::Scalar('\u{228F}')), + (b"sqsube;", CharacterReference::Scalar('\u{2291}')), + (b"sqsubset;", CharacterReference::Scalar('\u{228F}')), + (b"sqsubseteq;", CharacterReference::Scalar('\u{2291}')), + (b"sqsup;", CharacterReference::Scalar('\u{2290}')), + (b"sqsupe;", CharacterReference::Scalar('\u{2292}')), + (b"sqsupset;", CharacterReference::Scalar('\u{2290}')), + (b"sqsupseteq;", CharacterReference::Scalar('\u{2292}')), + (b"squ;", CharacterReference::Scalar('\u{25A1}')), + (b"square;", CharacterReference::Scalar('\u{25A1}')), + (b"squarf;", CharacterReference::Scalar('\u{25AA}')), + (b"squf;", CharacterReference::Scalar('\u{25AA}')), + (b"srarr;", CharacterReference::Scalar('\u{2192}')), + (b"sscr;", CharacterReference::Scalar('\u{1D4C8}')), + (b"ssetmn;", CharacterReference::Scalar('\u{2216}')), + (b"ssmile;", CharacterReference::Scalar('\u{2323}')), + (b"sstarf;", CharacterReference::Scalar('\u{22C6}')), + (b"star;", CharacterReference::Scalar('\u{2606}')), + (b"starf;", CharacterReference::Scalar('\u{2605}')), + (b"straightepsilon;", CharacterReference::Scalar('\u{3F5}')), + (b"straightphi;", CharacterReference::Scalar('\u{3D5}')), + (b"strns;", CharacterReference::Scalar('\u{AF}')), + (b"sub;", CharacterReference::Scalar('\u{2282}')), + (b"subE;", CharacterReference::Scalar('\u{2AC5}')), + (b"subdot;", CharacterReference::Scalar('\u{2ABD}')), + (b"sube;", CharacterReference::Scalar('\u{2286}')), + (b"subedot;", CharacterReference::Scalar('\u{2AC3}')), + (b"submult;", CharacterReference::Scalar('\u{2AC1}')), + (b"subnE;", CharacterReference::Scalar('\u{2ACB}')), + (b"subne;", CharacterReference::Scalar('\u{228A}')), + (b"subplus;", CharacterReference::Scalar('\u{2ABF}')), + (b"subrarr;", CharacterReference::Scalar('\u{2979}')), + (b"subset;", CharacterReference::Scalar('\u{2282}')), + (b"subseteq;", CharacterReference::Scalar('\u{2286}')), + (b"subseteqq;", CharacterReference::Scalar('\u{2AC5}')), + (b"subsetneq;", CharacterReference::Scalar('\u{228A}')), + (b"subsetneqq;", CharacterReference::Scalar('\u{2ACB}')), + (b"subsim;", CharacterReference::Scalar('\u{2AC7}')), + (b"subsub;", CharacterReference::Scalar('\u{2AD5}')), + (b"subsup;", CharacterReference::Scalar('\u{2AD3}')), + (b"succ;", CharacterReference::Scalar('\u{227B}')), + (b"succapprox;", CharacterReference::Scalar('\u{2AB8}')), + (b"succcurlyeq;", CharacterReference::Scalar('\u{227D}')), + (b"succeq;", CharacterReference::Scalar('\u{2AB0}')), + (b"succnapprox;", CharacterReference::Scalar('\u{2ABA}')), + (b"succneqq;", CharacterReference::Scalar('\u{2AB6}')), + (b"succnsim;", CharacterReference::Scalar('\u{22E9}')), + (b"succsim;", CharacterReference::Scalar('\u{227F}')), + (b"sum;", CharacterReference::Scalar('\u{2211}')), + (b"sung;", CharacterReference::Scalar('\u{266A}')), + (b"sup1", CharacterReference::Scalar('\u{B9}')), + (b"sup1;", CharacterReference::Scalar('\u{B9}')), + (b"sup2", CharacterReference::Scalar('\u{B2}')), + (b"sup2;", CharacterReference::Scalar('\u{B2}')), + (b"sup3", CharacterReference::Scalar('\u{B3}')), + (b"sup3;", CharacterReference::Scalar('\u{B3}')), + (b"sup;", CharacterReference::Scalar('\u{2283}')), + (b"supE;", CharacterReference::Scalar('\u{2AC6}')), + (b"supdot;", CharacterReference::Scalar('\u{2ABE}')), + (b"supdsub;", CharacterReference::Scalar('\u{2AD8}')), + (b"supe;", CharacterReference::Scalar('\u{2287}')), + (b"supedot;", CharacterReference::Scalar('\u{2AC4}')), + (b"suphsol;", CharacterReference::Scalar('\u{27C9}')), + (b"suphsub;", CharacterReference::Scalar('\u{2AD7}')), + (b"suplarr;", CharacterReference::Scalar('\u{297B}')), + (b"supmult;", CharacterReference::Scalar('\u{2AC2}')), + (b"supnE;", CharacterReference::Scalar('\u{2ACC}')), + (b"supne;", CharacterReference::Scalar('\u{228B}')), + (b"supplus;", CharacterReference::Scalar('\u{2AC0}')), + (b"supset;", CharacterReference::Scalar('\u{2283}')), + (b"supseteq;", CharacterReference::Scalar('\u{2287}')), + (b"supseteqq;", CharacterReference::Scalar('\u{2AC6}')), + (b"supsetneq;", CharacterReference::Scalar('\u{228B}')), + (b"supsetneqq;", CharacterReference::Scalar('\u{2ACC}')), + (b"supsim;", CharacterReference::Scalar('\u{2AC8}')), + (b"supsub;", CharacterReference::Scalar('\u{2AD4}')), + (b"supsup;", CharacterReference::Scalar('\u{2AD6}')), + (b"swArr;", CharacterReference::Scalar('\u{21D9}')), + (b"swarhk;", CharacterReference::Scalar('\u{2926}')), + (b"swarr;", CharacterReference::Scalar('\u{2199}')), + (b"swarrow;", CharacterReference::Scalar('\u{2199}')), + (b"swnwar;", CharacterReference::Scalar('\u{292A}')), + (b"szlig", CharacterReference::Scalar('\u{DF}')), + (b"szlig;", CharacterReference::Scalar('\u{DF}')), + (b"target;", CharacterReference::Scalar('\u{2316}')), + (b"tau;", CharacterReference::Scalar('\u{3C4}')), + (b"tbrk;", CharacterReference::Scalar('\u{23B4}')), + (b"tcaron;", CharacterReference::Scalar('\u{165}')), + (b"tcedil;", CharacterReference::Scalar('\u{163}')), + (b"tcy;", CharacterReference::Scalar('\u{442}')), + (b"tdot;", CharacterReference::Scalar('\u{20DB}')), + (b"telrec;", CharacterReference::Scalar('\u{2315}')), + (b"tfr;", CharacterReference::Scalar('\u{1D531}')), + (b"there4;", CharacterReference::Scalar('\u{2234}')), + (b"therefore;", CharacterReference::Scalar('\u{2234}')), + (b"theta;", CharacterReference::Scalar('\u{3B8}')), + (b"thetasym;", CharacterReference::Scalar('\u{3D1}')), + (b"thetav;", CharacterReference::Scalar('\u{3D1}')), + (b"thickapprox;", CharacterReference::Scalar('\u{2248}')), + (b"thicksim;", CharacterReference::Scalar('\u{223C}')), + (b"thinsp;", CharacterReference::Scalar('\u{2009}')), + (b"thkap;", CharacterReference::Scalar('\u{2248}')), + (b"thksim;", CharacterReference::Scalar('\u{223C}')), + (b"thorn", CharacterReference::Scalar('\u{FE}')), + (b"thorn;", CharacterReference::Scalar('\u{FE}')), + (b"tilde;", CharacterReference::Scalar('\u{2DC}')), + (b"times", CharacterReference::Scalar('\u{D7}')), + (b"times;", CharacterReference::Scalar('\u{D7}')), + (b"timesb;", CharacterReference::Scalar('\u{22A0}')), + (b"timesbar;", CharacterReference::Scalar('\u{2A31}')), + (b"timesd;", CharacterReference::Scalar('\u{2A30}')), + (b"tint;", CharacterReference::Scalar('\u{222D}')), + (b"toea;", CharacterReference::Scalar('\u{2928}')), + (b"top;", CharacterReference::Scalar('\u{22A4}')), + (b"topbot;", CharacterReference::Scalar('\u{2336}')), + (b"topcir;", CharacterReference::Scalar('\u{2AF1}')), + (b"topf;", CharacterReference::Scalar('\u{1D565}')), + (b"topfork;", CharacterReference::Scalar('\u{2ADA}')), + (b"tosa;", CharacterReference::Scalar('\u{2929}')), + (b"tprime;", CharacterReference::Scalar('\u{2034}')), + (b"trade;", CharacterReference::Scalar('\u{2122}')), + (b"triangle;", CharacterReference::Scalar('\u{25B5}')), + (b"triangledown;", CharacterReference::Scalar('\u{25BF}')), + (b"triangleleft;", CharacterReference::Scalar('\u{25C3}')), + (b"trianglelefteq;", CharacterReference::Scalar('\u{22B4}')), + (b"triangleq;", CharacterReference::Scalar('\u{225C}')), + (b"triangleright;", CharacterReference::Scalar('\u{25B9}')), + (b"trianglerighteq;", CharacterReference::Scalar('\u{22B5}')), + (b"tridot;", CharacterReference::Scalar('\u{25EC}')), + (b"trie;", CharacterReference::Scalar('\u{225C}')), + (b"triminus;", CharacterReference::Scalar('\u{2A3A}')), + (b"triplus;", CharacterReference::Scalar('\u{2A39}')), + (b"trisb;", CharacterReference::Scalar('\u{29CD}')), + (b"tritime;", CharacterReference::Scalar('\u{2A3B}')), + (b"trpezium;", CharacterReference::Scalar('\u{23E2}')), + (b"tscr;", CharacterReference::Scalar('\u{1D4C9}')), + (b"tscy;", CharacterReference::Scalar('\u{446}')), + (b"tshcy;", CharacterReference::Scalar('\u{45B}')), + (b"tstrok;", CharacterReference::Scalar('\u{167}')), + (b"twixt;", CharacterReference::Scalar('\u{226C}')), + (b"twoheadleftarrow;", CharacterReference::Scalar('\u{219E}')), + (b"twoheadrightarrow;", CharacterReference::Scalar('\u{21A0}')), + (b"uArr;", CharacterReference::Scalar('\u{21D1}')), + (b"uHar;", CharacterReference::Scalar('\u{2963}')), + (b"uacute", CharacterReference::Scalar('\u{FA}')), + (b"uacute;", CharacterReference::Scalar('\u{FA}')), + (b"uarr;", CharacterReference::Scalar('\u{2191}')), + (b"ubrcy;", CharacterReference::Scalar('\u{45E}')), + (b"ubreve;", CharacterReference::Scalar('\u{16D}')), + (b"ucirc", CharacterReference::Scalar('\u{FB}')), + (b"ucirc;", CharacterReference::Scalar('\u{FB}')), + (b"ucy;", CharacterReference::Scalar('\u{443}')), + (b"udarr;", CharacterReference::Scalar('\u{21C5}')), + (b"udblac;", CharacterReference::Scalar('\u{171}')), + (b"udhar;", CharacterReference::Scalar('\u{296E}')), + (b"ufisht;", CharacterReference::Scalar('\u{297E}')), + (b"ufr;", CharacterReference::Scalar('\u{1D532}')), + (b"ugrave", CharacterReference::Scalar('\u{F9}')), + (b"ugrave;", CharacterReference::Scalar('\u{F9}')), + (b"uharl;", CharacterReference::Scalar('\u{21BF}')), + (b"uharr;", CharacterReference::Scalar('\u{21BE}')), + (b"uhblk;", CharacterReference::Scalar('\u{2580}')), + (b"ulcorn;", CharacterReference::Scalar('\u{231C}')), + (b"ulcorner;", CharacterReference::Scalar('\u{231C}')), + (b"ulcrop;", CharacterReference::Scalar('\u{230F}')), + (b"ultri;", CharacterReference::Scalar('\u{25F8}')), + (b"umacr;", CharacterReference::Scalar('\u{16B}')), + (b"uml", CharacterReference::Scalar('\u{A8}')), + (b"uml;", CharacterReference::Scalar('\u{A8}')), + (b"uogon;", CharacterReference::Scalar('\u{173}')), + (b"uopf;", CharacterReference::Scalar('\u{1D566}')), + (b"uparrow;", CharacterReference::Scalar('\u{2191}')), + (b"updownarrow;", CharacterReference::Scalar('\u{2195}')), + (b"upharpoonleft;", CharacterReference::Scalar('\u{21BF}')), + (b"upharpoonright;", CharacterReference::Scalar('\u{21BE}')), + (b"uplus;", CharacterReference::Scalar('\u{228E}')), + (b"upsi;", CharacterReference::Scalar('\u{3C5}')), + (b"upsih;", CharacterReference::Scalar('\u{3D2}')), + (b"upsilon;", CharacterReference::Scalar('\u{3C5}')), + (b"upuparrows;", CharacterReference::Scalar('\u{21C8}')), + (b"urcorn;", CharacterReference::Scalar('\u{231D}')), + (b"urcorner;", CharacterReference::Scalar('\u{231D}')), + (b"urcrop;", CharacterReference::Scalar('\u{230E}')), + (b"uring;", CharacterReference::Scalar('\u{16F}')), + (b"urtri;", CharacterReference::Scalar('\u{25F9}')), + (b"uscr;", CharacterReference::Scalar('\u{1D4CA}')), + (b"utdot;", CharacterReference::Scalar('\u{22F0}')), + (b"utilde;", CharacterReference::Scalar('\u{169}')), + (b"utri;", CharacterReference::Scalar('\u{25B5}')), + (b"utrif;", CharacterReference::Scalar('\u{25B4}')), + (b"uuarr;", CharacterReference::Scalar('\u{21C8}')), + (b"uuml", CharacterReference::Scalar('\u{FC}')), + (b"uuml;", CharacterReference::Scalar('\u{FC}')), + (b"uwangle;", CharacterReference::Scalar('\u{29A7}')), + (b"vArr;", CharacterReference::Scalar('\u{21D5}')), + (b"vBar;", CharacterReference::Scalar('\u{2AE8}')), + (b"vBarv;", CharacterReference::Scalar('\u{2AE9}')), + (b"vDash;", CharacterReference::Scalar('\u{22A8}')), + (b"vangrt;", CharacterReference::Scalar('\u{299C}')), + (b"varepsilon;", CharacterReference::Scalar('\u{3F5}')), + (b"varkappa;", CharacterReference::Scalar('\u{3F0}')), + (b"varnothing;", CharacterReference::Scalar('\u{2205}')), + (b"varphi;", CharacterReference::Scalar('\u{3D5}')), + (b"varpi;", CharacterReference::Scalar('\u{3D6}')), + (b"varpropto;", CharacterReference::Scalar('\u{221D}')), + (b"varr;", CharacterReference::Scalar('\u{2195}')), + (b"varrho;", CharacterReference::Scalar('\u{3F1}')), + (b"varsigma;", CharacterReference::Scalar('\u{3C2}')), + (b"varsubsetneq;", CharacterReference::Text("\u{228A}\u{FE00}")), + (b"varsubsetneqq;", CharacterReference::Text("\u{2ACB}\u{FE00}")), + (b"varsupsetneq;", CharacterReference::Text("\u{228B}\u{FE00}")), + (b"varsupsetneqq;", CharacterReference::Text("\u{2ACC}\u{FE00}")), + (b"vartheta;", CharacterReference::Scalar('\u{3D1}')), + (b"vartriangleleft;", CharacterReference::Scalar('\u{22B2}')), + (b"vartriangleright;", CharacterReference::Scalar('\u{22B3}')), + (b"vcy;", CharacterReference::Scalar('\u{432}')), + (b"vdash;", CharacterReference::Scalar('\u{22A2}')), + (b"vee;", CharacterReference::Scalar('\u{2228}')), + (b"veebar;", CharacterReference::Scalar('\u{22BB}')), + (b"veeeq;", CharacterReference::Scalar('\u{225A}')), + (b"vellip;", CharacterReference::Scalar('\u{22EE}')), + (b"verbar;", CharacterReference::Scalar('\u{7C}')), + (b"vert;", CharacterReference::Scalar('\u{7C}')), + (b"vfr;", CharacterReference::Scalar('\u{1D533}')), + (b"vltri;", CharacterReference::Scalar('\u{22B2}')), + (b"vnsub;", CharacterReference::Text("\u{2282}\u{20D2}")), + (b"vnsup;", CharacterReference::Text("\u{2283}\u{20D2}")), + (b"vopf;", CharacterReference::Scalar('\u{1D567}')), + (b"vprop;", CharacterReference::Scalar('\u{221D}')), + (b"vrtri;", CharacterReference::Scalar('\u{22B3}')), + (b"vscr;", CharacterReference::Scalar('\u{1D4CB}')), + (b"vsubnE;", CharacterReference::Text("\u{2ACB}\u{FE00}")), + (b"vsubne;", CharacterReference::Text("\u{228A}\u{FE00}")), + (b"vsupnE;", CharacterReference::Text("\u{2ACC}\u{FE00}")), + (b"vsupne;", CharacterReference::Text("\u{228B}\u{FE00}")), + (b"vzigzag;", CharacterReference::Scalar('\u{299A}')), + (b"wcirc;", CharacterReference::Scalar('\u{175}')), + (b"wedbar;", CharacterReference::Scalar('\u{2A5F}')), + (b"wedge;", CharacterReference::Scalar('\u{2227}')), + (b"wedgeq;", CharacterReference::Scalar('\u{2259}')), + (b"weierp;", CharacterReference::Scalar('\u{2118}')), + (b"wfr;", CharacterReference::Scalar('\u{1D534}')), + (b"wopf;", CharacterReference::Scalar('\u{1D568}')), + (b"wp;", CharacterReference::Scalar('\u{2118}')), + (b"wr;", CharacterReference::Scalar('\u{2240}')), + (b"wreath;", CharacterReference::Scalar('\u{2240}')), + (b"wscr;", CharacterReference::Scalar('\u{1D4CC}')), + (b"xcap;", CharacterReference::Scalar('\u{22C2}')), + (b"xcirc;", CharacterReference::Scalar('\u{25EF}')), + (b"xcup;", CharacterReference::Scalar('\u{22C3}')), + (b"xdtri;", CharacterReference::Scalar('\u{25BD}')), + (b"xfr;", CharacterReference::Scalar('\u{1D535}')), + (b"xhArr;", CharacterReference::Scalar('\u{27FA}')), + (b"xharr;", CharacterReference::Scalar('\u{27F7}')), + (b"xi;", CharacterReference::Scalar('\u{3BE}')), + (b"xlArr;", CharacterReference::Scalar('\u{27F8}')), + (b"xlarr;", CharacterReference::Scalar('\u{27F5}')), + (b"xmap;", CharacterReference::Scalar('\u{27FC}')), + (b"xnis;", CharacterReference::Scalar('\u{22FB}')), + (b"xodot;", CharacterReference::Scalar('\u{2A00}')), + (b"xopf;", CharacterReference::Scalar('\u{1D569}')), + (b"xoplus;", CharacterReference::Scalar('\u{2A01}')), + (b"xotime;", CharacterReference::Scalar('\u{2A02}')), + (b"xrArr;", CharacterReference::Scalar('\u{27F9}')), + (b"xrarr;", CharacterReference::Scalar('\u{27F6}')), + (b"xscr;", CharacterReference::Scalar('\u{1D4CD}')), + (b"xsqcup;", CharacterReference::Scalar('\u{2A06}')), + (b"xuplus;", CharacterReference::Scalar('\u{2A04}')), + (b"xutri;", CharacterReference::Scalar('\u{25B3}')), + (b"xvee;", CharacterReference::Scalar('\u{22C1}')), + (b"xwedge;", CharacterReference::Scalar('\u{22C0}')), + (b"yacute", CharacterReference::Scalar('\u{FD}')), + (b"yacute;", CharacterReference::Scalar('\u{FD}')), + (b"yacy;", CharacterReference::Scalar('\u{44F}')), + (b"ycirc;", CharacterReference::Scalar('\u{177}')), + (b"ycy;", CharacterReference::Scalar('\u{44B}')), + (b"yen", CharacterReference::Scalar('\u{A5}')), + (b"yen;", CharacterReference::Scalar('\u{A5}')), + (b"yfr;", CharacterReference::Scalar('\u{1D536}')), + (b"yicy;", CharacterReference::Scalar('\u{457}')), + (b"yopf;", CharacterReference::Scalar('\u{1D56A}')), + (b"yscr;", CharacterReference::Scalar('\u{1D4CE}')), + (b"yucy;", CharacterReference::Scalar('\u{44E}')), + (b"yuml", CharacterReference::Scalar('\u{FF}')), + (b"yuml;", CharacterReference::Scalar('\u{FF}')), + (b"zacute;", CharacterReference::Scalar('\u{17A}')), + (b"zcaron;", CharacterReference::Scalar('\u{17E}')), + (b"zcy;", CharacterReference::Scalar('\u{437}')), + (b"zdot;", CharacterReference::Scalar('\u{17C}')), + (b"zeetrf;", CharacterReference::Scalar('\u{2128}')), + (b"zeta;", CharacterReference::Scalar('\u{3B6}')), + (b"zfr;", CharacterReference::Scalar('\u{1D537}')), + (b"zhcy;", CharacterReference::Scalar('\u{436}')), + (b"zigrarr;", CharacterReference::Scalar('\u{21DD}')), + (b"zopf;", CharacterReference::Scalar('\u{1D56B}')), + (b"zscr;", CharacterReference::Scalar('\u{1D4CF}')), + (b"zwj;", CharacterReference::Scalar('\u{200D}')), + (b"zwnj;", CharacterReference::Scalar('\u{200C}')), +]; diff --git a/ext/html-api-rust/src/lib.rs b/ext/html-api-rust/src/lib.rs new file mode 100644 index 0000000000000..1abc707c0e70b --- /dev/null +++ b/ext/html-api-rust/src/lib.rs @@ -0,0 +1,4059 @@ +use std::ffi::c_char; +use std::ptr; +use std::slice; + +mod html5_named_character_references; + +static VERSION: &[u8] = b"0.1.0\0"; + +const TOKEN_TYPE_TAG: u8 = 1; +const TOKEN_TYPE_TEXT: u8 = 2; +const TOKEN_TYPE_COMMENT: u8 = 3; +const TOKEN_TYPE_DOCTYPE: u8 = 4; +const TOKEN_TYPE_CDATA: u8 = 5; +const TOKEN_TYPE_PRESUMPTUOUS_TAG: u8 = 6; +const TOKEN_TYPE_FUNKY_COMMENT: u8 = 7; + +const NAMESPACE_HTML: u8 = 0; +const NAMESPACE_FOREIGN: u8 = 1; + +const COMMENT_TYPE_NONE: u8 = 0; +const COMMENT_TYPE_ABRUPTLY_CLOSED: u8 = 1; +const COMMENT_TYPE_CDATA_LOOKALIKE: u8 = 2; +const COMMENT_TYPE_HTML: u8 = 3; +const COMMENT_TYPE_PI_LOOKALIKE: u8 = 4; +const COMMENT_TYPE_INVALID: u8 = 5; + +#[repr(C)] +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub struct TagScan { + pub tag_start: usize, + pub tag_end: usize, + pub name_start: usize, + pub name_len: usize, + pub is_closing: bool, + pub has_self_closing_flag: bool, + pub token_end: usize, + pub token_type: u8, +} + +#[repr(C)] +#[derive(Clone, Copy, Debug, Default)] +pub struct ByteSlice { + pub ptr: *const u8, + pub len: usize, +} + +pub struct TagProcessor { + html: Vec, + offset: usize, + current: Option, + scratch: Vec, + paused_at_incomplete: bool, + inserted_attributes: Vec>, + parsing_namespace: u8, +} + +#[no_mangle] +pub extern "C" fn wp_html_api_rust_alloc(len: usize) -> *mut u8 { + let mut buffer = Vec::::with_capacity(len); + let ptr = buffer.as_mut_ptr(); + std::mem::forget(buffer); + ptr +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_dealloc(ptr: *mut u8, len: usize) { + if !ptr.is_null() { + drop(Vec::from_raw_parts(ptr, 0, len)); + } +} + +#[derive(Clone, Copy, Debug)] +struct AttributeSpan { + name_start: usize, + full_end: usize, + value: Option<(usize, usize)>, +} + +#[no_mangle] +pub extern "C" fn wp_html_api_rust_core_version() -> *const c_char { + VERSION.as_ptr().cast() +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_decoder_decode( + context: u8, + text: *const u8, + text_len: usize, + out_ptr: *mut u8, + out_capacity: usize, + out_len: *mut usize, +) -> bool { + if (text.is_null() && text_len > 0) || out_ptr.is_null() || out_len.is_null() { + return false; + } + + let text = if text_len == 0 { + &[][..] + } else { + slice::from_raw_parts(text, text_len) + }; + let decoded = decode_html_text(decode_context_from_byte(context), text); + write_output_buffer(&decoded, out_ptr, out_capacity, out_len) +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_decoder_read_character_reference( + context: u8, + text: *const u8, + text_len: usize, + at: usize, + out_ptr: *mut u8, + out_capacity: usize, + out_len: *mut usize, + match_len: *mut usize, +) -> bool { + if (text.is_null() && text_len > 0) + || out_ptr.is_null() + || out_len.is_null() + || match_len.is_null() + { + return false; + } + + let text = if text_len == 0 { + &[][..] + } else { + slice::from_raw_parts(text, text_len) + }; + + if at >= text.len() { + return false; + } + + let Some((decoded, consumed)) = + decode_character_reference(decode_context_from_byte(context), &text[at..]) + else { + return false; + }; + + let mut output = Vec::with_capacity(consumed); + decoded.append_to(&mut output); + if !write_output_buffer(&output, out_ptr, out_capacity, out_len) { + return false; + } + + ptr::write(match_len, consumed); + true +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_decoder_attribute_starts_with( + haystack: *const u8, + haystack_len: usize, + search_text: *const u8, + search_text_len: usize, + ascii_case_insensitive: bool, +) -> bool { + if (haystack.is_null() && haystack_len > 0) || (search_text.is_null() && search_text_len > 0) { + return false; + } + + let haystack = if haystack_len == 0 { + &[][..] + } else { + slice::from_raw_parts(haystack, haystack_len) + }; + let search_text = if search_text_len == 0 { + &[][..] + } else { + slice::from_raw_parts(search_text, search_text_len) + }; + + attribute_starts_with(haystack, search_text, ascii_case_insensitive) +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_decoder_code_point_to_utf8_bytes( + code_point: u32, + out_ptr: *mut u8, + out_capacity: usize, + out_len: *mut usize, +) -> bool { + if out_ptr.is_null() || out_len.is_null() { + return false; + } + + let character = char::from_u32(code_point).unwrap_or('\u{FFFD}'); + let mut buffer = [0; 4]; + write_output_buffer( + character.encode_utf8(&mut buffer).as_bytes(), + out_ptr, + out_capacity, + out_len, + ) +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_scan_next_tag( + html: *const u8, + len: usize, + offset: usize, + out: *mut TagScan, +) -> bool { + if html.is_null() || out.is_null() { + return false; + } + + let html = slice::from_raw_parts(html, len); + + match scan_next_tag(html, offset) { + Some(scan) => { + ptr::write(out, scan); + true + } + None => false, + } +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_new( + html: *const u8, + len: usize, +) -> *mut TagProcessor { + if html.is_null() && len > 0 { + return ptr::null_mut(); + } + + let html = if len == 0 { + Vec::new() + } else { + slice::from_raw_parts(html, len).to_vec() + }; + + Box::into_raw(Box::new(TagProcessor { + html, + offset: 0, + current: None, + scratch: Vec::new(), + paused_at_incomplete: false, + inserted_attributes: Vec::new(), + parsing_namespace: NAMESPACE_HTML, + })) +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_free(processor: *mut TagProcessor) { + if !processor.is_null() { + drop(Box::from_raw(processor)); + } +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_next_tag( + processor: *mut TagProcessor, + query: *const u8, + query_len: usize, + visit_closers: bool, +) -> bool { + let Some(processor) = processor.as_mut() else { + return false; + }; + + let query = if query.is_null() { + None + } else { + Some(slice::from_raw_parts(query, query_len)) + }; + + processor.paused_at_incomplete = false; + processor.inserted_attributes.clear(); + + loop { + let scan = match scan_next_token_in_namespace( + &processor.html, + processor.offset, + processor.parsing_namespace, + ) { + ScanResult::Token(scan) => scan, + ScanResult::Incomplete => { + processor.paused_at_incomplete = true; + return false; + } + ScanResult::None => { + return false; + } + }; + + processor.offset = scan.token_end; + + if scan.token_type != TOKEN_TYPE_TAG { + continue; + } + + if scan.is_closing && !visit_closers { + continue; + } + + if let Some(query) = query { + let tag_name = &processor.html[scan.name_start..scan.name_start + scan.name_len]; + if tag_name.len() != query.len() || !eq_ignore_ascii_case(tag_name, query) { + continue; + } + } + + processor.current = Some(scan); + return true; + } +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_next_token( + processor: *mut TagProcessor, +) -> bool { + let Some(processor) = processor.as_mut() else { + return false; + }; + + processor.paused_at_incomplete = false; + processor.inserted_attributes.clear(); + + match scan_next_token_in_namespace( + &processor.html, + processor.offset, + processor.parsing_namespace, + ) { + ScanResult::Token(scan) => { + processor.offset = scan.token_end; + processor.current = Some(scan); + true + } + ScanResult::Incomplete => { + processor.paused_at_incomplete = true; + false + } + ScanResult::None => false, + } +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_seek( + processor: *mut TagProcessor, + offset: usize, +) { + let Some(processor) = processor.as_mut() else { + return; + }; + + processor.offset = offset.min(processor.html.len()); + processor.current = None; + processor.paused_at_incomplete = false; + processor.inserted_attributes.clear(); +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_set_namespace( + processor: *mut TagProcessor, + namespace: u8, +) { + let Some(processor) = processor.as_mut() else { + return; + }; + + processor.parsing_namespace = if namespace == NAMESPACE_FOREIGN { + NAMESPACE_FOREIGN + } else { + NAMESPACE_HTML + }; +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_apply_lexical_update( + processor: *mut TagProcessor, + start: usize, + length: usize, + replacement: *const u8, + replacement_len: usize, +) -> bool { + let Some(processor) = processor.as_mut() else { + return false; + }; + + if replacement.is_null() && replacement_len > 0 { + return false; + } + + let Some(end) = start.checked_add(length) else { + return false; + }; + + if end > processor.html.len() { + return false; + } + + let replacement = if replacement_len == 0 { + &[] + } else { + slice::from_raw_parts(replacement, replacement_len) + }; + + processor.replace_range_preserving_cursor_at_inserted_start(start, end, replacement); + true +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_current_span( + processor: *const TagProcessor, + start: *mut usize, + length: *mut usize, +) -> bool { + let Some(processor) = processor.as_ref() else { + return false; + }; + + let Some(scan) = processor.current else { + return false; + }; + + if start.is_null() || length.is_null() { + return false; + } + + ptr::write(start, scan.tag_start); + ptr::write(length, scan.token_end - scan.tag_start); + true +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_current_token_type( + processor: *const TagProcessor, +) -> u8 { + let Some(processor) = processor.as_ref() else { + return 0; + }; + + processor.current.map(|scan| scan.token_type).unwrap_or(0) +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_paused_at_incomplete( + processor: *const TagProcessor, +) -> bool { + let Some(processor) = processor.as_ref() else { + return false; + }; + + processor.paused_at_incomplete +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_subdivide_text_appropriately( + processor: *mut TagProcessor, +) -> u8 { + let Some(processor) = processor.as_mut() else { + return 0; + }; + + processor.subdivide_current_text() +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_get_modifiable_text( + processor: *mut TagProcessor, + out: *mut ByteSlice, +) -> bool { + let Some(processor) = processor.as_mut() else { + return false; + }; + + let Some(scan) = processor.current else { + return false; + }; + + if out.is_null() { + return false; + } + + let Some(text) = processor.current_modifiable_text(scan) else { + return false; + }; + + processor.scratch = text; + ptr::write( + out, + ByteSlice { + ptr: processor.scratch.as_ptr(), + len: processor.scratch.len(), + }, + ); + true +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_set_modifiable_text( + processor: *mut TagProcessor, + text: *const u8, + text_len: usize, +) -> bool { + let Some(processor) = processor.as_mut() else { + return false; + }; + + if text.is_null() && text_len > 0 { + return false; + } + + let replacement = if text_len == 0 { + &[] + } else { + slice::from_raw_parts(text, text_len) + }; + + processor.set_modifiable_text(replacement) +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_current_comment_type( + processor: *const TagProcessor, +) -> u8 { + let Some(processor) = processor.as_ref() else { + return COMMENT_TYPE_NONE; + }; + + let Some(scan) = processor.current else { + return COMMENT_TYPE_NONE; + }; + + processor.comment_type(scan) +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_script_content_type( + processor: *const TagProcessor, +) -> u8 { + let Some(processor) = processor.as_ref() else { + return 0; + }; + + let Some(scan) = processor.current else { + return 0; + }; + + if processor.parsing_namespace != NAMESPACE_HTML || scan.token_type != TOKEN_TYPE_TAG || scan.is_closing { + return 0; + } + + let tag_name = &processor.html[scan.name_start..scan.name_start + scan.name_len]; + if !eq_ignore_ascii_case(tag_name, b"SCRIPT") { + return 0; + } + + match processor.script_content_type(scan) { + ScriptContentType::JavaScript => 1, + ScriptContentType::Json => 2, + ScriptContentType::Other => 0, + } +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_get_tag( + processor: *const TagProcessor, + out: *mut ByteSlice, +) -> bool { + let Some(processor) = processor.as_ref() else { + return false; + }; + + let Some(scan) = processor.current else { + return false; + }; + + if scan.token_type == TOKEN_TYPE_COMMENT { + let Some((target_start, target_end)) = pi_target_span(&processor.html, scan) else { + return false; + }; + + ptr::write( + out, + ByteSlice { + ptr: processor.html.as_ptr().add(target_start), + len: target_end - target_start, + }, + ); + return true; + } + + if scan.token_type != TOKEN_TYPE_TAG { + return false; + } + + ptr::write( + out, + ByteSlice { + ptr: processor.html.as_ptr().add(scan.name_start), + len: scan.name_len, + }, + ); + + true +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_is_tag_closer( + processor: *const TagProcessor, +) -> bool { + let Some(processor) = processor.as_ref() else { + return false; + }; + + let Some(scan) = processor.current else { + return false; + }; + + if scan.token_type != TOKEN_TYPE_TAG { + return false; + } + + scan.is_closing + && !eq_ignore_ascii_case(&processor.html[scan.name_start..scan.name_start + scan.name_len], b"BR") +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_has_self_closing_flag( + processor: *const TagProcessor, +) -> bool { + let Some(processor) = processor.as_ref() else { + return false; + }; + + processor + .current + .filter(|scan| scan.token_type == TOKEN_TYPE_TAG) + .map(|scan| scan.has_self_closing_flag) + .unwrap_or(false) +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_get_attribute( + processor: *mut TagProcessor, + name: *const u8, + name_len: usize, + out: *mut ByteSlice, +) -> u8 { + let Some(processor) = processor.as_mut() else { + return 0; + }; + + if name.is_null() || out.is_null() { + return 0; + } + + let name = slice::from_raw_parts(name, name_len); + match processor.get_attribute(name) { + AttributeValue::Missing => 0, + AttributeValue::Boolean => 1, + AttributeValue::String => { + ptr::write( + out, + ByteSlice { + ptr: processor.scratch.as_ptr(), + len: processor.scratch.len(), + }, + ); + 2 + } + } +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_get_attribute_names_with_prefix( + processor: *mut TagProcessor, + prefix: *const u8, + prefix_len: usize, + out: *mut ByteSlice, +) -> u8 { + let Some(processor) = processor.as_mut() else { + return 0; + }; + + if prefix.is_null() || out.is_null() { + return 0; + } + + let prefix = slice::from_raw_parts(prefix, prefix_len); + if !processor.get_attribute_names_with_prefix(prefix) { + return 0; + } + + ptr::write( + out, + ByteSlice { + ptr: processor.scratch.as_ptr(), + len: processor.scratch.len(), + }, + ); + 1 +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_set_attribute( + processor: *mut TagProcessor, + name: *const u8, + name_len: usize, + value: *const u8, + value_len: usize, + value_kind: u8, +) -> bool { + let Some(processor) = processor.as_mut() else { + return false; + }; + + if name.is_null() { + return false; + } + + let name = slice::from_raw_parts(name, name_len); + let value = if value.is_null() { + &[][..] + } else { + slice::from_raw_parts(value, value_len) + }; + + processor.set_attribute(name, value, value_kind) +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_remove_attribute( + processor: *mut TagProcessor, + name: *const u8, + name_len: usize, +) -> bool { + let Some(processor) = processor.as_mut() else { + return false; + }; + + if name.is_null() { + return false; + } + + let name = slice::from_raw_parts(name, name_len); + processor.remove_attribute(name) +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_add_class( + processor: *mut TagProcessor, + class_name: *const u8, + class_name_len: usize, + quirks_mode: bool, +) -> bool { + let Some(processor) = processor.as_mut() else { + return false; + }; + + if class_name.is_null() { + return false; + } + + let class_name = slice::from_raw_parts(class_name, class_name_len); + processor.add_class(class_name, quirks_mode) +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_remove_class( + processor: *mut TagProcessor, + class_name: *const u8, + class_name_len: usize, + quirks_mode: bool, +) -> bool { + let Some(processor) = processor.as_mut() else { + return false; + }; + + if class_name.is_null() { + return false; + } + + let class_name = slice::from_raw_parts(class_name, class_name_len); + processor.remove_class(class_name, quirks_mode) +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_has_class( + processor: *mut TagProcessor, + class_name: *const u8, + class_name_len: usize, + quirks_mode: bool, +) -> u8 { + let Some(processor) = processor.as_mut() else { + return 0; + }; + + if class_name.is_null() { + return 0; + } + + let class_name = slice::from_raw_parts(class_name, class_name_len); + processor.has_class(class_name, quirks_mode) +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_class_list( + processor: *mut TagProcessor, + out: *mut ByteSlice, + quirks_mode: bool, +) -> u8 { + let Some(processor) = processor.as_mut() else { + return 0; + }; + + if out.is_null() || !processor.class_list(quirks_mode) { + return 0; + } + + ptr::write( + out, + ByteSlice { + ptr: processor.scratch.as_ptr(), + len: processor.scratch.len(), + }, + ); + 1 +} + +#[no_mangle] +pub unsafe extern "C" fn wp_html_api_rust_tag_processor_get_html( + processor: *const TagProcessor, + out: *mut ByteSlice, +) -> bool { + let Some(processor) = processor.as_ref() else { + return false; + }; + + ptr::write( + out, + ByteSlice { + ptr: processor.html.as_ptr(), + len: processor.html.len(), + }, + ); + + true +} + +enum AttributeValue { + Missing, + Boolean, + String, +} + +struct ClassEntry { + name: Vec, + comparable: Vec, +} + +impl TagProcessor { + fn current_modifiable_text(&self, scan: TagScan) -> Option> { + match scan.token_type { + TOKEN_TYPE_TEXT => { + let mut raw = &self.html[scan.tag_start..scan.token_end]; + if self.text_follows_pre_or_listing(scan.tag_start) { + raw = strip_initial_newline(raw); + } + let null_transform = if self.parsing_namespace == NAMESPACE_HTML { + NullTransform::Remove + } else { + NullTransform::Replace + }; + Some(transform_text(raw, true, null_transform)) + } + TOKEN_TYPE_CDATA if scan.token_end >= scan.tag_start + 9 => { + let token = &self.html[scan.tag_start..scan.token_end]; + let text_end = if token.ends_with(b"]]>") { + scan.token_end - 3 + } else { + scan.token_end + }; + Some(transform_text( + &self.html[scan.tag_start + 9..text_end], + false, + NullTransform::Replace, + )) + } + TOKEN_TYPE_DOCTYPE if scan.token_end > scan.tag_start + 9 => { + Some(self.html[scan.tag_start + 9..scan.token_end - 1].to_vec()) + } + TOKEN_TYPE_COMMENT => self.comment_modifiable_text(scan), + TOKEN_TYPE_FUNKY_COMMENT => { + let text_end = if self.html.get(scan.token_end.saturating_sub(1)) == Some(&b'>') { + scan.token_end - 1 + } else { + scan.token_end + }; + Some(transform_text( + &self.html[scan.tag_start + 2..text_end], + false, + NullTransform::Replace, + )) + } + TOKEN_TYPE_TAG if scan.token_end > scan.tag_end => { + let inner = &self.html[scan.tag_end..scan.token_end]; + let tag_name = &self.html[scan.name_start..scan.name_start + scan.name_len]; + let relative = if find_special_closer(&self.html[..scan.token_end], scan.tag_end, tag_name).is_some() { + find_last_subslice(inner, b" None, + } + } + + fn comment_modifiable_text(&self, scan: TagScan) -> Option> { + let token = &self.html[scan.tag_start..scan.token_end]; + + if token.starts_with(b"") { + scan.token_end.saturating_sub(4) + } else if token.ends_with(b"-->") { + scan.token_end.saturating_sub(3) + } else if token.ends_with(b">") { + scan.token_end.saturating_sub(1) + } else { + scan.token_end + }; + if end < body_start { + end = body_start; + } + return Some(transform_text( + &self.html[body_start..end], + false, + NullTransform::Replace, + )); + } + + if starts_with_ignore_ascii_case(token, b"") { + return Some(transform_text( + &self.html[scan.tag_start + 9..scan.token_end - 3], + false, + NullTransform::Replace, + )); + } + } + + if token.starts_with(b"") { + if let Some((_target_start, target_end)) = pi_target_span(&self.html, scan) { + let text_end = scan.token_end.saturating_sub(2); + return Some(transform_text( + &self.html[target_end..text_end], + false, + NullTransform::Replace, + )); + } + } + + let text_end = if self.html.get(scan.token_end.saturating_sub(1)) == Some(&b'>') { + scan.token_end - 1 + } else { + scan.token_end + }; + Some(transform_text( + &self.html[scan.tag_start + 2..text_end], + false, + NullTransform::Replace, + )) + } + + fn comment_type(&self, scan: TagScan) -> u8 { + if scan.token_type == TOKEN_TYPE_FUNKY_COMMENT { + return COMMENT_TYPE_INVALID; + } + + if scan.token_type != TOKEN_TYPE_COMMENT { + return COMMENT_TYPE_NONE; + } + + let token = &self.html[scan.tag_start..scan.token_end]; + if token.starts_with(b"") || token.ends_with(b"") || token.ends_with(b"") { + return COMMENT_TYPE_ABRUPTLY_CLOSED; + } + return COMMENT_TYPE_HTML; + } + + if starts_with_ignore_ascii_case(token, b"") { + return COMMENT_TYPE_CDATA_LOOKALIKE; + } + return COMMENT_TYPE_INVALID; + } + + if token.starts_with(b"") { + return COMMENT_TYPE_PI_LOOKALIKE; + } + return COMMENT_TYPE_INVALID; + } + + COMMENT_TYPE_INVALID + } + + fn text_follows_pre_or_listing(&self, text_start: usize) -> bool { + if text_start == 0 || self.html[text_start - 1] != b'>' { + return false; + } + + let Some(tag_start) = self.html[..text_start].iter().rposition(|&byte| byte == b'<') else { + return false; + }; + + let ScanResult::Token(scan) = scan_next_token_in_namespace(&self.html, tag_start, self.parsing_namespace) else { + return false; + }; + + if scan.token_type != TOKEN_TYPE_TAG || scan.is_closing || scan.tag_end != text_start { + return false; + } + + matches_ignore_ascii_case( + &self.html[scan.name_start..scan.name_start + scan.name_len], + &[&b"PRE"[..], &b"LISTING"[..]], + ) + } + + fn subdivide_current_text(&mut self) -> u8 { + const TEXT_IS_GENERIC: u8 = 0; + const TEXT_IS_NULL_SEQUENCE: u8 = 1; + const TEXT_IS_WHITESPACE: u8 = 2; + + let Some(scan) = self.current else { + return TEXT_IS_GENERIC; + }; + + if scan.token_type != TOKEN_TYPE_TEXT || scan.tag_start >= scan.token_end { + return TEXT_IS_GENERIC; + } + + let mut at = scan.tag_start; + while at < scan.token_end && self.html[at] == 0 { + at += 1; + } + + if at > scan.tag_start { + self.truncate_current_text(at); + return TEXT_IS_NULL_SEQUENCE; + } + + while at < scan.token_end { + while at < scan.token_end && is_html_whitespace(self.html[at]) { + at += 1; + } + + if at < scan.token_end && self.html[at] == b'&' { + if let Some((decoded, consumed)) = + decode_character_reference(DecodeContext::Data, &self.html[at..scan.token_end]) + { + if decoded.is_html_whitespace() { + at += consumed; + continue; + } + } + } + + break; + } + + if at > scan.tag_start { + self.truncate_current_text(at); + return TEXT_IS_WHITESPACE; + } + + TEXT_IS_GENERIC + } + + fn truncate_current_text(&mut self, end: usize) { + if let Some(scan) = self.current.as_mut() { + scan.tag_end = end; + scan.token_end = end; + } + self.offset = end; + } + + fn set_modifiable_text(&mut self, plaintext: &[u8]) -> bool { + let Some(scan) = self.current else { + return false; + }; + + match scan.token_type { + TOKEN_TYPE_TEXT => { + if self.parsing_namespace != NAMESPACE_HTML { + return false; + } + let replacement = escape_html_text(plaintext); + self.replace_range(scan.tag_start, scan.token_end, &replacement); + true + } + TOKEN_TYPE_COMMENT => { + if self.comment_type(scan) != COMMENT_TYPE_HTML { + return false; + } + if find_subslice(plaintext, b"-->").is_some() + || find_subslice(plaintext, b"--!>").is_some() + { + return false; + } + let Some((start, end)) = self.comment_body_span(scan) else { + return false; + }; + self.replace_range(start, end, plaintext); + true + } + TOKEN_TYPE_TAG => self.set_atomic_modifiable_text(scan, plaintext), + _ => false, + } + } + + fn set_atomic_modifiable_text(&mut self, scan: TagScan, plaintext: &[u8]) -> bool { + if self.parsing_namespace != NAMESPACE_HTML || scan.is_closing || scan.token_end <= scan.tag_end { + return false; + } + + let Some((start, end)) = self.atomic_text_span(scan) else { + return false; + }; + + let tag_name = &self.html[scan.name_start..scan.name_start + scan.name_len]; + let replacement = if eq_ignore_ascii_case(tag_name, b"SCRIPT") { + let script_type = self.script_content_type(scan); + match script_type { + ScriptContentType::JavaScript | ScriptContentType::Json => { + escape_script_text(plaintext) + } + ScriptContentType::Other => { + if find_case_insensitive_script_tag(plaintext).is_some() { + return false; + } + plaintext.to_vec() + } + } + } else if eq_ignore_ascii_case(tag_name, b"STYLE") { + escape_rawtext_closer(plaintext, b"style", b"\\3c\\2f") + } else if eq_ignore_ascii_case(tag_name, b"TEXTAREA") { + let normalized = normalize_newlines(plaintext); + let mut escaped = escape_rcdata_closer(&normalized, b"textarea"); + if matches!(escaped.first(), Some(b'\n')) { + let mut with_extra_newline = Vec::with_capacity(escaped.len() + 1); + with_extra_newline.push(b'\n'); + with_extra_newline.extend_from_slice(&escaped); + escaped = with_extra_newline; + } + escaped + } else if eq_ignore_ascii_case(tag_name, b"TITLE") { + escape_rcdata_closer(plaintext, b"title") + } else { + return false; + }; + + self.replace_atomic_text_range(start, end, &replacement); + true + } + + fn atomic_text_span(&self, scan: TagScan) -> Option<(usize, usize)> { + if scan.token_type != TOKEN_TYPE_TAG || scan.token_end <= scan.tag_end { + return None; + } + + let tag_name = &self.html[scan.name_start..scan.name_start + scan.name_len]; + if find_special_closer(&self.html[..scan.token_end], scan.tag_end, tag_name).is_some() { + let inner = &self.html[scan.tag_end..scan.token_end]; + return find_last_subslice(inner, b" Option<(usize, usize)> { + if scan.token_type != TOKEN_TYPE_COMMENT { + return None; + } + + let token = &self.html[scan.tag_start..scan.token_end]; + if !token.starts_with(b"") { + scan.token_end.saturating_sub(4) + } else if token.ends_with(b"-->") { + scan.token_end.saturating_sub(3) + } else { + scan.token_end.saturating_sub(1) + }; + + if body_end < body_start { + body_end = body_start; + } + + Some((body_start, body_end)) + } + + fn script_content_type(&self, scan: TagScan) -> ScriptContentType { + let type_attr = self.find_attribute(scan, b"type"); + let language_attr = self.find_attribute(scan, b"language"); + + if let Some(attribute) = type_attr { + let type_string = match attribute.value { + None => return ScriptContentType::JavaScript, + Some((start, end)) => { + let decoded = decode_html_attribute(&self.html[start..end]); + let trimmed = trim_ascii_whitespace(&decoded); + if trimmed.is_empty() { + return ScriptContentType::JavaScript; + } + ascii_lowercase_vec(trimmed) + } + }; + + return classify_script_type_string(&type_string); + } + + let Some(attribute) = language_attr else { + return ScriptContentType::JavaScript; + }; + + let language = match attribute.value { + None => return ScriptContentType::JavaScript, + Some((start, end)) => decode_html_attribute(&self.html[start..end]), + }; + + if language.is_empty() { + return ScriptContentType::JavaScript; + } + + let mut type_string = Vec::with_capacity(b"text/".len() + language.len()); + type_string.extend_from_slice(b"text/"); + type_string.extend(ascii_lowercase_vec(&language)); + classify_script_type_string(&type_string) + } + + fn get_attribute(&mut self, wanted_name: &[u8]) -> AttributeValue { + let Some(scan) = self.current else { + return AttributeValue::Missing; + }; + + if scan.token_type != TOKEN_TYPE_TAG || scan.is_closing { + return AttributeValue::Missing; + } + + let Some(attribute) = self.find_attribute(scan, wanted_name) else { + return AttributeValue::Missing; + }; + + let Some((value_start, value_end)) = attribute.value else { + return AttributeValue::Boolean; + }; + + self.scratch = decode_html_attribute(&self.html[value_start..value_end]); + AttributeValue::String + } + + fn set_attribute(&mut self, name: &[u8], value: &[u8], value_kind: u8) -> bool { + let Some(scan) = self.current else { + return false; + }; + + if scan.token_type != TOKEN_TYPE_TAG || scan.is_closing || !is_valid_attribute_name(name) { + return false; + } + + if value_kind == 0 { + return self.remove_attribute(name); + } + + let comparable_name = ascii_lowercase_vec(name); + let replacement = serialize_attribute(name, value, value_kind); + + if let Some(attribute) = self.find_attribute(scan, name) { + self.replace_range(attribute.name_start, attribute.full_end, &replacement); + return true; + } + + let mut inserted = Vec::with_capacity(replacement.len() + 1); + inserted.push(b' '); + inserted.extend_from_slice(&replacement); + + let insertion_point = scan.name_start + scan.name_len; + self.replace_range(insertion_point, insertion_point, &inserted); + if !self + .inserted_attributes + .iter() + .any(|inserted_name| inserted_name == &comparable_name) + { + self.inserted_attributes.push(comparable_name); + } + true + } + + fn remove_attribute(&mut self, name: &[u8]) -> bool { + let Some(scan) = self.current else { + return false; + }; + + if scan.token_type != TOKEN_TYPE_TAG || scan.is_closing { + return false; + } + + let comparable_name = ascii_lowercase_vec(name); + let remove_inserted_space = self + .inserted_attributes + .iter() + .any(|inserted_name| inserted_name == &comparable_name); + let mut removed = false; + while let Some(attribute) = self.current.and_then(|current| self.find_attribute(current, name)) { + let removal_start = if remove_inserted_space + && attribute.name_start > 0 + && is_html_whitespace(self.html[attribute.name_start - 1]) + { + attribute.name_start - 1 + } else { + attribute.name_start + }; + self.replace_range(removal_start, attribute.full_end, &[]); + removed = true; + } + + if removed { + self.inserted_attributes + .retain(|inserted_name| inserted_name != &comparable_name); + } + + removed + } + + fn add_class(&mut self, class_name: &[u8], quirks_mode: bool) -> bool { + let Some(scan) = self.current else { + return false; + }; + + if scan.token_type != TOKEN_TYPE_TAG || scan.is_closing { + return false; + } + + let comparable_class_name = comparable_class_bytes(class_name, quirks_mode); + if self + .current_raw_class_entries(quirks_mode) + .iter() + .any(|class| class.comparable.as_slice() == comparable_class_name.as_slice()) + { + return true; + } + + match self.get_attribute(b"class") { + AttributeValue::String => { + let mut value = self.scratch.clone(); + trim_html_whitespace_in_place(&mut value); + if !value.is_empty() { + value.push(b' '); + } + value.extend_from_slice(class_name); + self.set_attribute(b"class", &value, 2) + } + AttributeValue::Boolean | AttributeValue::Missing => { + self.set_attribute(b"class", class_name, 2) + } + } + } + + fn remove_class(&mut self, class_name: &[u8], quirks_mode: bool) -> bool { + let Some(scan) = self.current else { + return false; + }; + + if scan.token_type != TOKEN_TYPE_TAG || scan.is_closing { + return false; + } + + let comparable_class_name = comparable_class_bytes(class_name, quirks_mode); + let entries = self.current_raw_class_entries(quirks_mode); + let classes: Vec> = entries + .iter() + .filter(|class| class.comparable.as_slice() != comparable_class_name.as_slice()) + .map(|class| class.name.clone()) + .collect(); + + if classes.len() == entries.len() { + return true; + } + + if classes.is_empty() { + let _ = self.remove_attribute(b"class"); + return true; + } + + let value = join_classes(&classes); + self.set_attribute(b"class", &value, 2) + } + + fn has_class(&mut self, class_name: &[u8], quirks_mode: bool) -> u8 { + let Some(scan) = self.current else { + return 0; + }; + + if scan.token_type != TOKEN_TYPE_TAG || scan.is_closing { + return 0; + } + + let comparable_class_name = comparable_class_bytes(class_name, quirks_mode); + + if self + .current_public_class_entries(quirks_mode) + .into_iter() + .any(|class| class.comparable.as_slice() == comparable_class_name.as_slice()) + { + 2 + } else { + 1 + } + } + + fn class_list(&mut self, quirks_mode: bool) -> bool { + let Some(scan) = self.current else { + return false; + }; + + if scan.token_type != TOKEN_TYPE_TAG || scan.is_closing { + return false; + } + + let classes = self.current_public_class_entries(quirks_mode); + self.scratch.clear(); + for class in classes { + if !self.scratch.is_empty() { + self.scratch.push(0x1f); + } + self.scratch.extend_from_slice(if quirks_mode { + &class.comparable + } else { + &class.name + }); + } + + true + } + + fn current_raw_class_entries(&mut self, quirks_mode: bool) -> Vec { + let value = match self.get_attribute(b"class") { + AttributeValue::String => self.scratch.clone(), + AttributeValue::Boolean | AttributeValue::Missing => Vec::new(), + }; + + let mut classes = Vec::new(); + for class in value.split(|byte| is_html_whitespace(*byte)) { + if class.is_empty() { + continue; + } + + let name = class.to_vec(); + let comparable = comparable_class_bytes(&name, quirks_mode); + if classes + .iter() + .any(|seen: &ClassEntry| seen.comparable.as_slice() == comparable.as_slice()) + { + continue; + } + classes.push(ClassEntry { name, comparable }); + } + + classes + } + + fn current_public_class_entries(&mut self, quirks_mode: bool) -> Vec { + let value = match self.get_attribute(b"class") { + AttributeValue::String => self.scratch.clone(), + AttributeValue::Boolean | AttributeValue::Missing => Vec::new(), + }; + + let mut classes = Vec::new(); + for class in value.split(|byte| is_html_whitespace(*byte)) { + if class.is_empty() { + continue; + } + + let name = normalize_class_bytes(class); + let comparable = comparable_class_bytes(&name, quirks_mode); + if classes + .iter() + .any(|seen: &ClassEntry| seen.comparable.as_slice() == comparable.as_slice()) + { + continue; + } + classes.push(ClassEntry { name, comparable }); + } + + classes + } + + fn get_attribute_names_with_prefix(&mut self, prefix: &[u8]) -> bool { + let Some(scan) = self.current else { + return false; + }; + + if scan.token_type != TOKEN_TYPE_TAG || scan.is_closing { + return false; + } + + self.scratch.clear(); + let mut at = scan.name_start + scan.name_len; + let mut end = scan.tag_end.saturating_sub(1); + let comparable_prefix = comparable_attribute_name(prefix); + let mut seen_attribute_names: Vec> = Vec::new(); + + if tag_ends_with_syntactic_self_closing_flag( + &self.html, + scan.name_start + scan.name_len, + scan.tag_end, + ) { + end = end.saturating_sub(1); + } + + while at < end { + while at < end && (is_html_whitespace(self.html[at]) || self.html[at] == b'/') { + at += 1; + } + + if at >= end { + break; + } + + let name_start = at; + while at < end && !is_attribute_name_delimiter(self.html[at]) { + at += 1; + } + + if name_start == at { + at += 1; + continue; + } + + let name_end = at; + let comparable_name = comparable_attribute_name(&self.html[name_start..name_end]); + if comparable_name.starts_with(&comparable_prefix) + && !seen_attribute_names + .iter() + .any(|seen| seen.as_slice() == comparable_name.as_slice()) + { + if !self.scratch.is_empty() { + self.scratch.push(0); + } + self.scratch.extend_from_slice(&comparable_name); + seen_attribute_names.push(comparable_name); + } + + while at < end && is_html_whitespace(self.html[at]) { + at += 1; + } + + if at < end && self.html[at] == b'=' { + at += 1; + while at < end && is_html_whitespace(self.html[at]) { + at += 1; + } + + if at < end && (self.html[at] == b'\'' || self.html[at] == b'"') { + let quote = self.html[at]; + at += 1; + while at < end && self.html[at] != quote { + at += 1; + } + if at < end { + at += 1; + } + } else { + while at < end && !is_html_whitespace(self.html[at]) { + at += 1; + } + } + } + } + + true + } + + fn find_attribute(&self, scan: TagScan, wanted_name: &[u8]) -> Option { + let mut at = scan.name_start + scan.name_len; + let mut end = scan.tag_end.saturating_sub(1); + let comparable_wanted_name = comparable_attribute_name(wanted_name); + + if tag_ends_with_syntactic_self_closing_flag( + &self.html, + scan.name_start + scan.name_len, + scan.tag_end, + ) { + end = end.saturating_sub(1); + } + + while at < end { + while at < end && (is_html_whitespace(self.html[at]) || self.html[at] == b'/') { + at += 1; + } + + if at >= end { + break; + } + + let name_start = at; + while at < end && !is_attribute_name_delimiter(self.html[at]) { + at += 1; + } + + if name_start == at { + at += 1; + continue; + } + + let name_end = at; + let mut full_end = name_end; + while at < end && is_html_whitespace(self.html[at]) { + at += 1; + } + + let mut value = None; + if at < end && self.html[at] == b'=' { + at += 1; + while at < end && is_html_whitespace(self.html[at]) { + at += 1; + } + + if at < end && (self.html[at] == b'\'' || self.html[at] == b'"') { + let quote = self.html[at]; + at += 1; + let value_start = at; + while at < end && self.html[at] != quote { + at += 1; + } + value = Some((value_start, at)); + if at < end { + at += 1; + } + } else { + let value_start = at; + while at < end && !is_html_whitespace(self.html[at]) { + at += 1; + } + value = Some((value_start, at)); + } + full_end = at; + } + + if comparable_attribute_name(&self.html[name_start..name_end]) == comparable_wanted_name { + return Some(AttributeSpan { + name_start, + full_end, + value, + }); + } + } + + None + } + + fn replace_range(&mut self, start: usize, end: usize, replacement: &[u8]) { + self.replace_range_internal(start, end, replacement, true); + } + + fn replace_range_preserving_cursor_at_inserted_start( + &mut self, + start: usize, + end: usize, + replacement: &[u8], + ) { + self.replace_range_internal(start, end, replacement, false); + } + + fn replace_atomic_text_range(&mut self, start: usize, end: usize, replacement: &[u8]) { + let old_len = end - start; + let new_len = replacement.len(); + self.html.splice(start..end, replacement.iter().copied()); + + let delta = new_len as isize - old_len as isize; + if delta == 0 { + return; + } + + if let Some(scan) = self.current.as_mut() { + if should_shift_point(scan.token_end, end, old_len, true) { + scan.token_end = scan.token_end.saturating_add_signed(delta); + } + scan.has_self_closing_flag = scan.tag_end >= 2 && self.html[scan.tag_end - 2] == b'/'; + } + + if should_shift_point(self.offset, end, old_len, true) { + self.offset = self.offset.saturating_add_signed(delta); + } + } + + fn replace_range_internal( + &mut self, + start: usize, + end: usize, + replacement: &[u8], + shift_points_at_zero_width_end: bool, + ) { + let old_len = end - start; + let new_len = replacement.len(); + self.html.splice(start..end, replacement.iter().copied()); + + let delta = new_len as isize - old_len as isize; + if delta == 0 { + return; + } + + if let Some(scan) = self.current.as_mut() { + if should_shift_point(scan.tag_end, end, old_len, shift_points_at_zero_width_end) { + scan.tag_end = scan.tag_end.saturating_add_signed(delta); + } + if should_shift_point(scan.token_end, end, old_len, shift_points_at_zero_width_end) { + scan.token_end = scan.token_end.saturating_add_signed(delta); + } + scan.has_self_closing_flag = scan.tag_end >= 2 && self.html[scan.tag_end - 2] == b'/'; + } + + if should_shift_point(self.offset, end, old_len, shift_points_at_zero_width_end) { + self.offset = self.offset.saturating_add_signed(delta); + } + } +} + +fn should_shift_point( + point: usize, + edit_end: usize, + old_len: usize, + shift_points_at_zero_width_end: bool, +) -> bool { + point > edit_end || (point == edit_end && (old_len > 0 || shift_points_at_zero_width_end)) +} + +fn trim_html_whitespace_in_place(value: &mut Vec) { + let start = value + .iter() + .position(|&byte| !is_html_whitespace(byte)) + .unwrap_or(value.len()); + let end = value + .iter() + .rposition(|&byte| !is_html_whitespace(byte)) + .map(|index| index + 1) + .unwrap_or(start); + + if start > 0 || end < value.len() { + value.copy_within(start..end, 0); + value.truncate(end - start); + } +} + +#[derive(Clone, Copy)] +enum NullTransform { + Remove, + Replace, +} + +#[derive(Clone, Copy, Eq, PartialEq)] +enum DecodeContext { + Data, + Attribute, +} + +#[derive(Clone, Copy)] +enum CharacterReference { + Scalar(char), + Text(&'static str), +} + +impl CharacterReference { + fn append_to(self, output: &mut Vec) { + match self { + CharacterReference::Scalar(value) => { + let mut buffer = [0; 4]; + output.extend_from_slice(value.encode_utf8(&mut buffer).as_bytes()); + } + CharacterReference::Text(value) => output.extend_from_slice(value.as_bytes()), + } + } + + fn is_line_feed(self) -> bool { + matches!(self, CharacterReference::Scalar('\n')) + } + + fn is_null(self) -> bool { + matches!(self, CharacterReference::Scalar('\0')) + } + + fn is_html_whitespace(self) -> bool { + matches!( + self, + CharacterReference::Scalar(' ') + | CharacterReference::Scalar('\t') + | CharacterReference::Scalar('\n') + | CharacterReference::Scalar('\u{000C}') + | CharacterReference::Scalar('\r') + ) + } +} + +#[derive(Clone, Copy)] +enum ScriptContentType { + JavaScript, + Json, + Other, +} + +fn classify_script_type_string(type_string: &[u8]) -> ScriptContentType { + match type_string { + b"application/ecmascript" + | b"application/javascript" + | b"application/x-ecmascript" + | b"application/x-javascript" + | b"text/ecmascript" + | b"text/javascript" + | b"text/javascript1.0" + | b"text/javascript1.1" + | b"text/javascript1.2" + | b"text/javascript1.3" + | b"text/javascript1.4" + | b"text/javascript1.5" + | b"text/jscript" + | b"text/livescript" + | b"text/x-ecmascript" + | b"text/x-javascript" + | b"module" => ScriptContentType::JavaScript, + b"importmap" | b"speculationrules" | b"application/json" | b"text/json" => { + ScriptContentType::Json + } + _ => ScriptContentType::Other, + } +} + +fn transform_text(input: &[u8], decode_entities: bool, null_transform: NullTransform) -> Vec { + let mut output = Vec::with_capacity(input.len()); + let mut at = 0; + + while at < input.len() { + if input[at] == b'\r' { + output.push(b'\n'); + at += if input.get(at + 1) == Some(&b'\n') { 2 } else { 1 }; + continue; + } + + if input[at] == 0 { + match null_transform { + NullTransform::Remove => {} + NullTransform::Replace => output.extend_from_slice("\u{FFFD}".as_bytes()), + } + at += 1; + continue; + } + + if decode_entities && input[at] == b'&' { + if let Some((decoded, consumed)) = + decode_character_reference(DecodeContext::Data, &input[at..]) + { + if decoded.is_null() { + match null_transform { + NullTransform::Remove => {} + NullTransform::Replace => output.extend_from_slice("\u{FFFD}".as_bytes()), + } + } else { + decoded.append_to(&mut output); + } + at += consumed; + continue; + } + } + + output.push(input[at]); + at += 1; + } + + output +} + +fn strip_initial_newline(input: &[u8]) -> &[u8] { + if input.starts_with(b"\r\n") { + return &input[2..]; + } + + if input.starts_with(b"\r") { + return &input[1..]; + } + + if input.starts_with(b"\n") { + return &input[1..]; + } + + if let Some((decoded, consumed)) = decode_character_reference(DecodeContext::Data, input) { + if !decoded.is_line_feed() { + return input; + } + + return &input[consumed..]; + } + + input +} + +fn normalize_newlines(input: &[u8]) -> Vec { + let mut normalized = Vec::with_capacity(input.len()); + let mut at = 0; + + while at < input.len() { + if input[at] == b'\r' { + normalized.push(b'\n'); + at += if input.get(at + 1) == Some(&b'\n') { 2 } else { 1 }; + continue; + } + + normalized.push(input[at]); + at += 1; + } + + normalized +} + +fn pi_target_span(html: &[u8], scan: TagScan) -> Option<(usize, usize)> { + if scan.token_type != TOKEN_TYPE_COMMENT { + return None; + } + + if scan.tag_start + 3 > scan.token_end || !html[scan.tag_start..scan.token_end].starts_with(b"= scan.token_end || !is_html_whitespace(html[target_end]) { + return None; + } + + Some((target_start, target_end)) +} + +fn is_pi_target_char(byte: u8) -> bool { + byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_' | b':' | b'.') +} + +fn escape_html_text(input: &[u8]) -> Vec { + let mut output = Vec::with_capacity(input.len()); + for &byte in input { + match byte { + b'<' => output.extend_from_slice(b"<"), + b'>' => output.extend_from_slice(b">"), + b'&' => output.extend_from_slice(b"&"), + b'"' => output.extend_from_slice(b"""), + b'\'' => output.extend_from_slice(b"'"), + _ => output.push(byte), + } + } + output +} + +fn escape_script_text(input: &[u8]) -> Vec { + let mut output = Vec::with_capacity(input.len()); + let mut at = 0; + + while at < input.len() { + if let Some((script_start, escape_at)) = script_tag_match_at(input, at) { + output.extend_from_slice(&input[at..escape_at]); + let escaped = if input[escape_at].is_ascii_uppercase() { + b"\\u0053" + } else { + b"\\u0073" + }; + output.extend_from_slice(escaped); + at = escape_at + 1; + if script_start == at { + at += 1; + } + continue; + } + + output.push(input[at]); + at += 1; + } + + output +} + +fn find_case_insensitive_script_tag(input: &[u8]) -> Option { + let mut at = 0; + while at < input.len() { + if script_tag_match_at(input, at).is_some() { + return Some(at); + } + at += 1; + } + None +} + +fn script_tag_match_at(input: &[u8], at: usize) -> Option<(usize, usize)> { + if at >= input.len() || input[at] != b'<' { + return None; + } + + let (name_start, escape_at) = if at + 1 < input.len() && input[at + 1] == b'/' { + (at + 2, at + 2) + } else { + (at + 1, at + 1) + }; + + let name_end = name_start + b"script".len(); + if name_end > input.len() || !eq_ignore_ascii_case(&input[name_start..name_end], b"script") { + return None; + } + + if name_end < input.len() && !is_tag_name_delimiter(input[name_end]) { + return None; + } + + Some((at, escape_at)) +} + +fn escape_rawtext_closer(input: &[u8], tag_name: &[u8], prefix: &[u8]) -> Vec { + let mut output = Vec::with_capacity(input.len()); + let mut at = 0; + + while at < input.len() { + if starts_with_rawtext_closer(input, at, tag_name) { + output.extend_from_slice(prefix); + output.extend_from_slice(&input[at + 2..at + 2 + tag_name.len()]); + at += 2 + tag_name.len(); + continue; + } + + output.push(input[at]); + at += 1; + } + + output +} + +fn escape_rcdata_closer(input: &[u8], tag_name: &[u8]) -> Vec { + escape_rawtext_closer(input, tag_name, b"</") +} + +fn starts_with_rawtext_closer(input: &[u8], at: usize, tag_name: &[u8]) -> bool { + if at + 2 + tag_name.len() > input.len() || input[at] != b'<' || input[at + 1] != b'/' { + return false; + } + + let name_start = at + 2; + let name_end = name_start + tag_name.len(); + if !eq_ignore_ascii_case(&input[name_start..name_end], tag_name) { + return false; + } + + name_end == input.len() || is_tag_name_delimiter(input[name_end]) +} + +fn trim_ascii_whitespace(value: &[u8]) -> &[u8] { + let start = value + .iter() + .position(|&byte| !is_html_whitespace(byte)) + .unwrap_or(value.len()); + let end = value + .iter() + .rposition(|&byte| !is_html_whitespace(byte)) + .map(|index| index + 1) + .unwrap_or(start); + &value[start..end] +} + +fn scan_next_tag(html: &[u8], offset: usize) -> Option { + let mut at = offset.min(html.len()); + + loop { + match scan_next_token(html, at) { + ScanResult::Token(scan) if scan.token_type == TOKEN_TYPE_TAG => return Some(scan), + ScanResult::Token(scan) => at = scan.token_end, + ScanResult::Incomplete | ScanResult::None => return None, + } + } +} + +enum ScanResult { + Token(TagScan), + Incomplete, + None, +} + +fn scan_next_token(html: &[u8], offset: usize) -> ScanResult { + scan_next_token_in_namespace(html, offset, NAMESPACE_HTML) +} + +fn scan_next_token_in_namespace(html: &[u8], offset: usize, namespace: u8) -> ScanResult { + let len = html.len(); + let at = offset.min(len); + + if at >= len { + return ScanResult::None; + } + + let Some(tag_start) = find_next_token_start(html, at) else { + return ScanResult::Token(text_scan(at, len)); + }; + + if tag_start > at { + return ScanResult::Token(text_scan(at, tag_start)); + } + + if tag_start + 1 >= len { + return ScanResult::Token(text_scan(tag_start, len)); + } + + if starts_with_ignore_ascii_case(&html[tag_start..], b"") { + return ScanResult::Token(non_tag_scan(tag_start, tag_start + 5, TOKEN_TYPE_COMMENT)); + } + + if tag_start + 5 < html.len() && html[tag_start..].starts_with(b"") { + return ScanResult::Token(non_tag_scan(tag_start, tag_start + 6, TOKEN_TYPE_COMMENT)); + } + + let Some(token_end) = find_comment_end(html, tag_start + 4) else { + return ScanResult::Incomplete; + }; + + ScanResult::Token(non_tag_scan(tag_start, token_end, TOKEN_TYPE_COMMENT)) +} + +fn find_comment_end(html: &[u8], offset: usize) -> Option { + let mut at = offset; + + while at + 2 < html.len() { + let relative = find_subslice(&html[at..], b"--")?; + let dash_start = at + relative; + let after_dashes = dash_start + 2; + + if after_dashes < html.len() && html[after_dashes] == b'>' { + return Some(after_dashes + 1); + } + + if after_dashes + 1 < html.len() + && html[after_dashes] == b'!' + && html[after_dashes + 1] == b'>' + { + return Some(after_dashes + 2); + } + + at = dash_start + 1; + } + + None +} + +fn scan_cdata(html: &[u8], tag_start: usize, namespace: u8) -> ScanResult { + if namespace == NAMESPACE_HTML { + return scan_markup_declaration(html, tag_start, TOKEN_TYPE_COMMENT); + } + + let Some(relative_end) = find_subslice(&html[tag_start + 9..], b"]]>") else { + return ScanResult::Token(non_tag_scan(tag_start, html.len(), TOKEN_TYPE_CDATA)); + }; + + let token_end = tag_start + 9 + relative_end + 3; + ScanResult::Token(non_tag_scan(tag_start, token_end, TOKEN_TYPE_CDATA)) +} + +fn scan_markup_declaration(html: &[u8], tag_start: usize, token_type: u8) -> ScanResult { + let Some(relative_end) = html[tag_start + 2..].iter().position(|&byte| byte == b'>') else { + return match token_type { + TOKEN_TYPE_COMMENT | TOKEN_TYPE_FUNKY_COMMENT => { + ScanResult::Token(non_tag_scan(tag_start, html.len(), token_type)) + } + _ => ScanResult::Incomplete, + }; + }; + + let token_end = tag_start + 2 + relative_end + 1; + ScanResult::Token(non_tag_scan(tag_start, token_end, token_type)) +} + +fn non_tag_scan(start: usize, end: usize, token_type: u8) -> TagScan { + TagScan { + tag_start: start, + tag_end: end, + name_start: start, + name_len: 0, + is_closing: false, + has_self_closing_flag: false, + token_end: end, + token_type, + } +} + +fn is_special_atomic_tag(tag_name: &[u8]) -> bool { + matches_ignore_ascii_case( + tag_name, + &[ + &b"IFRAME"[..], + &b"NOEMBED"[..], + &b"NOFRAMES"[..], + &b"SCRIPT"[..], + &b"STYLE"[..], + &b"TEXTAREA"[..], + &b"TITLE"[..], + &b"XMP"[..], + ], + ) +} + +fn should_consume_unclosed_atomic_tag_at_eof(tag_name: &[u8]) -> bool { + is_special_atomic_tag(tag_name) +} + +fn find_special_closer(html: &[u8], offset: usize, tag_name: &[u8]) -> Option { + if eq_ignore_ascii_case(tag_name, b"SCRIPT") { + return find_script_closer(html, offset); + } + + let mut at = offset; + + while at + 3 + tag_name.len() <= html.len() { + let relative = find_subslice(&html[at..], b" usize { + if eq_ignore_ascii_case(tag_name, b"SCRIPT") { + return unclosed_script_text_end(html, text_start, token_end); + } + + let mut at = text_start; + + while at + 2 + tag_name.len() <= token_end { + let Some(relative) = find_subslice(&html[at..token_end], b" usize { + let mut at = text_start; + let mut escaped = false; + let mut double_escaped = false; + + while at < token_end { + if html[at..token_end].starts_with(b"") { + at += 5; + continue; + } + + if html[at..token_end].starts_with(b"") { + escaped = false; + double_escaped = false; + at += 3; + continue; + } + + if starts_with_ignore_ascii_case(&html[at..token_end], b" Option { + let mut at = offset; + let mut escaped = false; + let mut double_escaped = false; + + while at < html.len() { + if html[at..].starts_with(b"") { + at += 5; + continue; + } + + if html[at..].starts_with(b"") { + escaped = false; + double_escaped = false; + at += 3; + continue; + } + + if starts_with_ignore_ascii_case(&html[at..], b" Option { + if needle.is_empty() { + return Some(0); + } + + haystack + .windows(needle.len()) + .position(|candidate| candidate == needle) +} + +fn find_last_subslice(haystack: &[u8], needle: &[u8]) -> Option { + if needle.is_empty() { + return Some(haystack.len()); + } + + haystack + .windows(needle.len()) + .rposition(|candidate| candidate == needle) +} + +fn matches_ignore_ascii_case(value: &[u8], candidates: &[&[u8]]) -> bool { + candidates + .iter() + .any(|candidate| value.len() == candidate.len() && eq_ignore_ascii_case(value, candidate)) +} + +fn is_tag_name_delimiter(byte: u8) -> bool { + matches!(byte, b' ' | b'\t' | b'\n' | b'\x0c' | b'\r' | b'/' | b'>') +} + +fn is_attribute_name_delimiter(byte: u8) -> bool { + matches!(byte, b' ' | b'\t' | b'\n' | b'\x0c' | b'\r' | b'/' | b'>' | b'=') +} + +fn is_html_whitespace(byte: u8) -> bool { + matches!(byte, b' ' | b'\t' | b'\n' | b'\x0c' | b'\r') +} + +fn is_valid_attribute_name(name: &[u8]) -> bool { + if name.is_empty() { + return false; + } + + name.iter().all(|&byte| { + byte > 0x1f + && !matches!( + byte, + b' ' | b'\t' | b'\n' | b'\x0c' | b'\r' | b'"' | b'\'' | b'>' | b'&' | b'<' | b'/' | b'=' + ) + }) +} + +fn serialize_attribute(name: &[u8], value: &[u8], value_kind: u8) -> Vec { + let mut output = Vec::new(); + output.extend_from_slice(name); + + if value_kind == 1 { + return output; + } + + output.extend_from_slice(b"=\""); + output.extend_from_slice(&encode_html_attribute(value)); + output.push(b'"'); + output +} + +fn encode_html_attribute(input: &[u8]) -> Vec { + let mut output = Vec::with_capacity(input.len()); + + for &byte in input { + match byte { + b'&' => output.extend_from_slice(b"&"), + b'"' => output.extend_from_slice(b"""), + b'\'' => output.extend_from_slice(b"'"), + b'<' => output.extend_from_slice(b"<"), + b'>' => output.extend_from_slice(b">"), + _ => output.push(byte), + } + } + + output +} + +fn join_classes(classes: &[Vec]) -> Vec { + let mut output = Vec::new(); + + for class in classes { + if !output.is_empty() { + output.push(b' '); + } + output.extend_from_slice(class); + } + + output +} + +fn normalize_class_bytes(class_name: &[u8]) -> Vec { + let mut output = Vec::with_capacity(class_name.len()); + + for &byte in class_name { + if byte == 0 { + output.extend_from_slice("\u{fffd}".as_bytes()); + } else { + output.push(byte); + } + } + + output +} + +fn comparable_class_bytes(class_name: &[u8], quirks_mode: bool) -> Vec { + if quirks_mode { + ascii_lowercase_vec(class_name) + } else { + class_name.to_vec() + } +} + +fn comparable_attribute_name(name: &[u8]) -> Vec { + let mut output = Vec::with_capacity(name.len()); + + for &byte in name { + if byte == 0 { + output.extend_from_slice("\u{fffd}".as_bytes()); + } else { + output.push(byte.to_ascii_lowercase()); + } + } + + output +} + +fn ascii_lowercase_vec(value: &[u8]) -> Vec { + value.iter().map(u8::to_ascii_lowercase).collect() +} + +fn find_tag_end(html: &[u8], offset: usize) -> Option { + let mut at = offset; + let mut quote = None; + let mut after_equals = false; + + while at < html.len() { + let byte = html[at]; + + match quote { + Some(quote_byte) if byte == quote_byte => quote = None, + Some(_) => {} + None if after_equals && (byte == b'\'' || byte == b'"') => { + quote = Some(byte); + after_equals = false; + } + None if byte == b'=' => after_equals = true, + None if is_html_whitespace(byte) => {} + None if byte == b'/' => {} + None if byte == b'>' => return Some(at + 1), + None => after_equals = false, + } + + at += 1; + } + + None +} + +fn eq_ignore_ascii_case(left: &[u8], right: &[u8]) -> bool { + left.len() == right.len() + && left.iter() + .zip(right.iter()) + .all(|(&left, &right)| left.eq_ignore_ascii_case(&right)) +} + +fn starts_with_ignore_ascii_case(value: &[u8], prefix: &[u8]) -> bool { + value.len() >= prefix.len() && eq_ignore_ascii_case(&value[..prefix.len()], prefix) +} + +fn decode_html_attribute(input: &[u8]) -> Vec { + let mut output = Vec::with_capacity(input.len()); + let mut at = 0; + + while at < input.len() { + if input[at] != b'&' { + output.push(input[at]); + at += 1; + continue; + } + + let Some((decoded, consumed)) = + decode_character_reference(DecodeContext::Attribute, &input[at..]) + else { + output.push(input[at]); + at += 1; + continue; + }; + + decoded.append_to(&mut output); + at += consumed; + } + + output +} + +fn decode_html_text(context: DecodeContext, input: &[u8]) -> Vec { + let mut output = Vec::with_capacity(input.len()); + let mut at = 0; + + while at < input.len() { + if input[at] != b'&' { + output.push(input[at]); + at += 1; + continue; + } + + let Some((decoded, consumed)) = decode_character_reference(context, &input[at..]) else { + output.push(input[at]); + at += 1; + continue; + }; + + decoded.append_to(&mut output); + at += consumed; + } + + output +} + +fn decode_context_from_byte(context: u8) -> DecodeContext { + if context == 1 { + DecodeContext::Attribute + } else { + DecodeContext::Data + } +} + +unsafe fn write_output_buffer( + output: &[u8], + out_ptr: *mut u8, + out_capacity: usize, + out_len: *mut usize, +) -> bool { + if output.len() > out_capacity { + return false; + } + + if !output.is_empty() { + ptr::copy_nonoverlapping(output.as_ptr(), out_ptr, output.len()); + } + ptr::write(out_len, output.len()); + true +} + +fn attribute_starts_with( + haystack: &[u8], + search_text: &[u8], + ascii_case_insensitive: bool, +) -> bool { + let mut search_at = 0; + let mut haystack_at = 0; + + while search_at < search_text.len() && haystack_at < haystack.len() { + if haystack[haystack_at] == b'&' { + if let Some((decoded, consumed)) = + decode_character_reference(DecodeContext::Attribute, &haystack[haystack_at..]) + { + let mut decoded_bytes = Vec::new(); + decoded.append_to(&mut decoded_bytes); + if !slice_starts_with( + &search_text[search_at..], + &decoded_bytes, + ascii_case_insensitive, + ) { + return false; + } + + haystack_at += consumed; + search_at += decoded_bytes.len(); + continue; + } + } + + if !byte_eq( + haystack[haystack_at], + search_text[search_at], + ascii_case_insensitive, + ) { + return false; + } + + haystack_at += 1; + search_at += 1; + } + + true +} + +fn slice_starts_with(value: &[u8], prefix: &[u8], ascii_case_insensitive: bool) -> bool { + value.len() >= prefix.len() + && value + .iter() + .zip(prefix.iter()) + .all(|(&left, &right)| byte_eq(left, right, ascii_case_insensitive)) +} + +fn byte_eq(left: u8, right: u8, ascii_case_insensitive: bool) -> bool { + if ascii_case_insensitive { + left.eq_ignore_ascii_case(&right) + } else { + left == right + } +} + +fn decode_character_reference( + context: DecodeContext, + input: &[u8], +) -> Option<(CharacterReference, usize)> { + if input.len() < 3 || input[0] != b'&' { + return None; + } + + if input[1] == b'#' { + let mut at = 2; + let radix = if at < input.len() && (input[at] == b'x' || input[at] == b'X') { + at += 1; + 16 + } else { + 10 + }; + let max_digits = if radix == 16 { 6 } else { 7 }; + + let digits_start = at; + while at < input.len() && input[at] == b'0' { + at += 1; + } + let zero_count = at - digits_start; + let significant_digits_start = at; + while at < input.len() + && if radix == 16 { + input[at].is_ascii_hexdigit() + } else { + input[at].is_ascii_digit() + } + { + at += 1; + } + let digit_count = at - significant_digits_start; + + if 0 == zero_count && 0 == digit_count { + return None; + } + + let consumed = if at < input.len() && input[at] == b';' { at + 1 } else { at }; + if 0 == digit_count || digit_count > max_digits { + return Some((CharacterReference::Scalar('\u{FFFD}'), consumed)); + } + + let digits = std::str::from_utf8(&input[significant_digits_start..at]).ok()?; + let value = u32::from_str_radix(digits, radix).ok()?; + return Some(( + CharacterReference::Scalar(character_reference_code_point(value)), + consumed, + )); + } + + let (decoded, name_len) = named_character_reference(&input[1..])?; + let after_name = 1 + name_len; + let has_semicolon = input[after_name - 1] == b';'; + + if has_semicolon { + return Some((decoded, after_name)); + } + + let ambiguous_follower = after_name < input.len() + && (input[after_name].is_ascii_alphanumeric() || input[after_name] == b'='); + + if DecodeContext::Attribute == context && ambiguous_follower { + return None; + } + + Some((decoded, after_name)) +} + +fn character_reference_code_point(code_point: u32) -> char { + let code_point = match code_point { + 0x80 => 0x20AC, + 0x82 => 0x201A, + 0x83 => 0x0192, + 0x84 => 0x201E, + 0x85 => 0x2026, + 0x86 => 0x2020, + 0x87 => 0x2021, + 0x88 => 0x02C6, + 0x89 => 0x2030, + 0x8A => 0x0160, + 0x8B => 0x2039, + 0x8C => 0x0152, + 0x8E => 0x017D, + 0x91 => 0x2018, + 0x92 => 0x2019, + 0x93 => 0x201C, + 0x94 => 0x201D, + 0x95 => 0x2022, + 0x96 => 0x2013, + 0x97 => 0x2014, + 0x98 => 0x02DC, + 0x99 => 0x2122, + 0x9A => 0x0161, + 0x9B => 0x203A, + 0x9C => 0x0153, + 0x9E => 0x017E, + 0x9F => 0x0178, + other => other, + }; + + char::from_u32(code_point).unwrap_or('\u{FFFD}') +} + +fn named_character_reference(input: &[u8]) -> Option<(CharacterReference, usize)> { + let mut best = None; + for (name, decoded) in html5_named_character_references::NAMED_CHARACTER_REFERENCES { + if input.starts_with(name) && best.map(|(_, len)| name.len() > len).unwrap_or(true) { + best = Some((*decoded, name.len())); + } + } + + best +} + +#[cfg(test)] +mod tests { + use super::{ + find_script_closer, scan_next_tag, scan_next_token, scan_next_token_in_namespace, + AttributeValue, ScanResult, TagProcessor, TagScan, COMMENT_TYPE_INVALID, NAMESPACE_FOREIGN, + NAMESPACE_HTML, TOKEN_TYPE_COMMENT, TOKEN_TYPE_FUNKY_COMMENT, TOKEN_TYPE_TAG, + TOKEN_TYPE_TEXT, + }; + use std::ptr; + + #[test] + fn scans_basic_start_tag() { + assert_eq!( + scan_next_tag(b"one
two", 0).unwrap(), + TagScan { + tag_start: 4, + tag_end: 19, + name_start: 5, + name_len: 3, + is_closing: false, + has_self_closing_flag: false, + token_end: 19, + token_type: TOKEN_TYPE_TAG, + } + ); + } + + #[test] + fn scans_basic_closing_tag() { + assert_eq!( + scan_next_tag(b"

text

", 3).unwrap(), + TagScan { + tag_start: 7, + tag_end: 11, + name_start: 9, + name_len: 1, + is_closing: true, + has_self_closing_flag: false, + token_end: 11, + token_type: TOKEN_TYPE_TAG, + } + ); + } + + #[test] + fn skips_non_tag_less_than_sequences() { + let scan = scan_next_tag(b"1 < 2 ", 0).unwrap(); + + assert_eq!(scan.tag_start, 23); + assert_eq!(scan.name_start, 24); + assert_eq!(scan.name_len, 4); + } + + #[test] + fn ignores_gt_inside_quoted_attributes() { + let scan = scan_next_tag(br#"
ok
"#, 0).unwrap(); + + assert_eq!(scan.tag_end, 19); + } + + #[test] + fn reports_incomplete_tag_as_not_found() { + assert!(scan_next_tag(br#"
X"; + + assert!(find_script_closer(html, b"".to_vec(), + offset: 0, + current: None, + scratch: Vec::new(), + paused_at_incomplete: false, + inserted_attributes: Vec::new(), + parsing_namespace: NAMESPACE_HTML, + }; + + assert!(unsafe { + super::wp_html_api_rust_tag_processor_next_tag( + &mut processor, + b"script".as_ptr(), + b"script".len(), + false, + ) + }); + assert!(processor.set_modifiable_text(b"different text")); + + let scan = processor.current.unwrap(); + assert_eq!(scan.tag_end, b"
"; +const scriptScan = scanNextTag(scriptScanHtml); +assert.equal(scriptScan.tag_name, "SCRIPT"); +assert.equal(scanNextTag(scriptScanHtml, scriptScan.token_end).tag_name, "DIV"); + +const tags = new WP_HTML_Tag_Processor('
Hi
'); +assert.equal(tags.next_tag({ tag_name: "span" }), true); +assert.equal(tags.get_tag(), "SPAN"); +assert.equal(tags.get_attribute("data-id"), "7"); +assert.equal(tags.set_attribute("data-id", "8"), true); +assert.equal(tags.add_class("active"), true); +assert.equal(tags.has_class("active"), true); +assert.deepEqual(tags.class_list(), ["active"]); +assert.equal(tags.get_updated_html(), '
Hi
'); +tags.destroy(); + +const coercedAttributeTags = new WP_HTML_Tag_Processor('
'); +assert.equal(coercedAttributeTags.next_tag("div"), true); +assert.equal(coercedAttributeTags.get_attribute(true), "one"); +assert.equal(coercedAttributeTags.get_attribute(false), null); +assert.equal(coercedAttributeTags.get_attribute(null), null); +assert.deepEqual(coercedAttributeTags.get_attribute_names_with_prefix(true), ["1"]); +assert.deepEqual(coercedAttributeTags.get_attribute_names_with_prefix(false), ["1", "class", "data-", "data-1"]); +assert.deepEqual(coercedAttributeTags.get_attribute_names_with_prefix(null), ["1", "class", "data-", "data-1"]); +assert.equal(coercedAttributeTags.has_class(true), true); +assert.equal(coercedAttributeTags.has_class(false), false); +assert.equal(coercedAttributeTags.has_class(null), false); +assert.equal(coercedAttributeTags.add_class(true), true); +assert.equal(coercedAttributeTags.add_class(null), true); +assert.equal(coercedAttributeTags.remove_class(false), true); +assert.equal(coercedAttributeTags.remove_class(null), true); +assert.equal(coercedAttributeTags.set_attribute(false, "v"), false); +assert.equal(coercedAttributeTags.set_attribute(null, "v"), false); +assert.equal(coercedAttributeTags.set_attribute("data-num", 123), true); +assert.equal(coercedAttributeTags.get_attribute("data-num"), "123"); +assert.equal(coercedAttributeTags.set_attribute("data-round", 1.23456789012345), true); +assert.equal(coercedAttributeTags.get_attribute("data-round"), "1.2345678901235"); +assert.equal(coercedAttributeTags.set_attribute("data-safe-int", 100000000000000), true); +assert.equal(coercedAttributeTags.get_attribute("data-safe-int"), "100000000000000"); +assert.equal(coercedAttributeTags.set_attribute("data-small", 1e-5), true); +assert.equal(coercedAttributeTags.get_attribute("data-small"), "1.0E-5"); +assert.equal(coercedAttributeTags.set_attribute("data-large", 1e20), true); +assert.equal(coercedAttributeTags.get_attribute("data-large"), "1.0E+20"); +assert.equal(coercedAttributeTags.set_attribute("data-negative-zero", -0), true); +assert.equal(coercedAttributeTags.get_attribute("data-negative-zero"), "-0"); +assert.equal(coercedAttributeTags.set_attribute("data-nan", NaN), true); +assert.equal(coercedAttributeTags.get_attribute("data-nan"), "NAN"); +assert.equal(coercedAttributeTags.set_attribute("data-inf", Infinity), true); +assert.equal(coercedAttributeTags.get_attribute("data-inf"), "INF"); +assert.equal(coercedAttributeTags.set_attribute("data-null", null), false); +assert.equal(coercedAttributeTags.get_attribute("data-null"), null); +assert.equal(coercedAttributeTags.remove_attribute(null), false); +assert.throws( + () => coercedAttributeTags.get_attribute({ name: "class" }), + TypeError, +); +assert.throws( + () => coercedAttributeTags.set_attribute("data-object", {}), + TypeError, +); +coercedAttributeTags.destroy(); + +const invalidAttributeNameTags = new WP_HTML_Tag_Processor("
"); +assert.equal(invalidAttributeNameTags.next_tag("div"), true); +for (const name of [ + "", + "too late", + 'too"late', + "too&late", + "too'late", + "too/late", + "toolate", + "shut\0down", + "shut\u001Fdown", + "shut\uFDD0down", + "shut\uFFFEdown", + "shut\uFFFFdown", + "shut\u{1FFFE}down", + "shut\u{10FFFF}down", +]) { + assert.equal(invalidAttributeNameTags.set_attribute(name, true), false, `Should reject ${JSON.stringify(name)}`); +} +assert.equal(invalidAttributeNameTags.get_updated_html(), "
"); +invalidAttributeNameTags.destroy(); + +const unicodeAttributeNameTags = new WP_HTML_Tag_Processor("
"); +assert.equal(unicodeAttributeNameTags.next_tag("div"), true); +assert.equal(unicodeAttributeNameTags.set_attribute("data-\u00E9", "ok"), true); +assert.equal(unicodeAttributeNameTags.get_attribute("data-\u00E9"), "ok"); +unicodeAttributeNameTags.destroy(); + +const escapedAttributeValueTags = new WP_HTML_Tag_Processor("
"); +assert.equal(escapedAttributeValueTags.next_tag("div"), true); +assert.equal( + escapedAttributeValueTags.set_attribute("test", "\" onclick=\"alert('1');\">"), + true, +); +assert.equal( + escapedAttributeValueTags.get_updated_html(), + '
', +); +escapedAttributeValueTags.destroy(); + +const removedBooleanAttributeTags = new WP_HTML_Tag_Processor(''); +assert.equal(removedBooleanAttributeTags.next_tag("input"), true); +assert.equal(removedBooleanAttributeTags.set_attribute("checked", false), true); +assert.equal(removedBooleanAttributeTags.get_attribute("checked"), null); +assert.equal(removedBooleanAttributeTags.get_updated_html(), ''); +removedBooleanAttributeTags.destroy(); + +const missingFalseAttributeTags = new WP_HTML_Tag_Processor(''); +assert.equal(missingFalseAttributeTags.next_tag("input"), true); +assert.equal(missingFalseAttributeTags.set_attribute("checked", false), false); +assert.equal(missingFalseAttributeTags.get_updated_html(), ''); +missingFalseAttributeTags.destroy(); + +const unselectedAttributePrefixTags = new WP_HTML_Tag_Processor('
Test
'); +assert.equal(unselectedAttributePrefixTags.get_attribute_names_with_prefix("data-"), null); +unselectedAttributePrefixTags.destroy(); + +const missingAttributePrefixTags = new WP_HTML_Tag_Processor('
Test
'); +assert.equal(missingAttributePrefixTags.next_tag("p"), false); +assert.equal(missingAttributePrefixTags.get_attribute_names_with_prefix("data-"), null); +missingAttributePrefixTags.destroy(); + +const closingAttributePrefixTags = new WP_HTML_Tag_Processor('
Test
'); +assert.equal(closingAttributePrefixTags.next_tag("div"), true); +assert.equal(closingAttributePrefixTags.next_tag({ tag_closers: "visit" }), true); +assert.equal(closingAttributePrefixTags.get_attribute_names_with_prefix("data-"), null); +closingAttributePrefixTags.destroy(); + +const emptyAttributePrefixTags = new WP_HTML_Tag_Processor("
Test
"); +assert.equal(emptyAttributePrefixTags.next_tag("div"), true); +assert.deepEqual(emptyAttributePrefixTags.get_attribute_names_with_prefix("data-"), []); +emptyAttributePrefixTags.destroy(); + +const mixedCaseAttributePrefixTags = new WP_HTML_Tag_Processor('
Test
'); +assert.equal(mixedCaseAttributePrefixTags.next_tag(), true); +assert.deepEqual(mixedCaseAttributePrefixTags.get_attribute_names_with_prefix("data-"), ["data-enabled", "data-test-id"]); +mixedCaseAttributePrefixTags.destroy(); + +const addedAttributePrefixTags = new WP_HTML_Tag_Processor('
Test
'); +assert.equal(addedAttributePrefixTags.next_tag(), true); +assert.equal(addedAttributePrefixTags.set_attribute("data-test-id", "14"), true); +assert.equal(addedAttributePrefixTags.get_updated_html(), '
Test
'); +assert.deepEqual(addedAttributePrefixTags.get_attribute_names_with_prefix("data-"), ["data-test-id", "data-foo"]); +addedAttributePrefixTags.destroy(); + +const duplicateAttributeNameTags = new WP_HTML_Tag_Processor("
"); +assert.equal(duplicateAttributeNameTags.next_tag("div"), true); +assert.deepEqual(duplicateAttributeNameTags.get_attribute_names_with_prefix("data-"), ["data-x", "data-y"]); +assert.equal(duplicateAttributeNameTags.get_attribute("data-x"), "1"); +duplicateAttributeNameTags.destroy(); + +const noAttributePrefixMatches = new WP_HTML_Tag_Processor("
"); +assert.equal(noAttributePrefixMatches.next_tag("div"), true); +assert.deepEqual(noAttributePrefixMatches.get_attribute_names_with_prefix("data-"), []); +assert.deepEqual(noAttributePrefixMatches.get_attribute_names_with_prefix(""), ["id"]); +noAttributePrefixMatches.destroy(); + +const decodedClassQueryTags = new WP_HTML_Tag_Processor('
'); +assert.equal(decodedClassQueryTags.next_tag({ class_name: "" }), true); +assert.equal(decodedClassQueryTags.get_tag(), "DIV"); +assert.deepEqual(decodedClassQueryTags.class_list(), ["∉-class", "", "#"]); +decodedClassQueryTags.destroy(); + +const nonStringClassQueryTags = new WP_HTML_Tag_Processor('
'); +assert.equal(nonStringClassQueryTags.next_tag({ class_name: null }), true); +assert.equal(nonStringClassQueryTags.get_tag(), "DIV"); +nonStringClassQueryTags.destroy(); + +const nonStringTagQueryTags = new WP_HTML_Tag_Processor("
"); +assert.equal(nonStringTagQueryTags.next_tag({ tag_name: 1, match_offset: "2" }), true); +assert.equal(nonStringTagQueryTags.get_attribute("one"), true); +assert.equal(nonStringTagQueryTags.get_attribute("two"), null); +nonStringTagQueryTags.destroy(); + +const nonVisitCloserQueryTags = new WP_HTML_Tag_Processor("
"); +assert.equal(nonVisitCloserQueryTags.next_tag("div"), true); +assert.equal(nonVisitCloserQueryTags.next_tag({ tag_name: "div", tag_closers: {} }), false); +nonVisitCloserQueryTags.destroy(); + +const duplicateDecodedClassList = new WP_HTML_Tag_Processor('
'); +assert.equal(duplicateDecodedClassList.next_tag("div"), true); +assert.deepEqual(duplicateDecodedClassList.class_list(), ["one"]); +duplicateDecodedClassList.destroy(); + +const addClassBeforeSetClassAttribute = new WP_HTML_Tag_Processor('
'); +assert.equal(addClassBeforeSetClassAttribute.next_tag("div"), true); +assert.equal(addClassBeforeSetClassAttribute.add_class("add_class"), true); +assert.equal(addClassBeforeSetClassAttribute.set_attribute("class", "set_attribute"), true); +assert.equal(addClassBeforeSetClassAttribute.get_attribute("class"), "set_attribute"); +assert.equal(addClassBeforeSetClassAttribute.get_updated_html(), '
'); +addClassBeforeSetClassAttribute.destroy(); + +const addClassAfterSetClassAttribute = new WP_HTML_Tag_Processor('
'); +assert.equal(addClassAfterSetClassAttribute.next_tag("div"), true); +assert.equal(addClassAfterSetClassAttribute.set_attribute("class", "set_attribute"), true); +assert.equal(addClassAfterSetClassAttribute.add_class("add_class"), true); +assert.equal(addClassAfterSetClassAttribute.get_attribute("class"), "set_attribute add_class"); +assert.equal(addClassAfterSetClassAttribute.get_updated_html(), '
'); +addClassAfterSetClassAttribute.destroy(); + +const addClassAfterBooleanClassAttribute = new WP_HTML_Tag_Processor('
'); +assert.equal(addClassAfterBooleanClassAttribute.next_tag("div"), true); +assert.equal(addClassAfterBooleanClassAttribute.set_attribute("class", true), true); +assert.equal(addClassAfterBooleanClassAttribute.add_class("add_class"), true); +assert.equal(addClassAfterBooleanClassAttribute.get_attribute("class"), "add_class"); +assert.equal(addClassAfterBooleanClassAttribute.get_updated_html(), '
'); +addClassAfterBooleanClassAttribute.destroy(); + +const addEmptyClassName = new WP_HTML_Tag_Processor("
"); +assert.equal(addEmptyClassName.next_tag("div"), true); +assert.equal(addEmptyClassName.add_class(null), true); +assert.equal(addEmptyClassName.add_class(""), true); +assert.equal(addEmptyClassName.get_updated_html(), "
"); +addEmptyClassName.destroy(); + +const addEmptyClassNameWithExistingClass = new WP_HTML_Tag_Processor('
'); +assert.equal(addEmptyClassNameWithExistingClass.next_tag("div"), true); +assert.equal(addEmptyClassNameWithExistingClass.add_class(null), true); +assert.equal(addEmptyClassNameWithExistingClass.get_updated_html(), '
'); +addEmptyClassNameWithExistingClass.destroy(); + +const addFalseClassName = new WP_HTML_Tag_Processor("
"); +assert.equal(addFalseClassName.next_tag("div"), true); +assert.equal(addFalseClassName.add_class(false), true); +assert.equal(addFalseClassName.get_updated_html(), '
'); +addFalseClassName.destroy(); + +const addNumericClassName = new WP_HTML_Tag_Processor('
'); +assert.equal(addNumericClassName.next_tag("div"), true); +assert.equal(addNumericClassName.add_class(1), true); +assert.equal(addNumericClassName.add_class("1"), true); +assert.equal(addNumericClassName.add_class(1.5), true); +assert.equal(addNumericClassName.get_updated_html(), '
'); +addNumericClassName.destroy(); + +for (const attributeName of ["CLASS", "Class"]) { + const pendingAddClassAttribute = new WP_HTML_Tag_Processor('
'); + assert.equal(pendingAddClassAttribute.next_tag("div"), true); + assert.equal(pendingAddClassAttribute.add_class("two"), true); + assert.equal(pendingAddClassAttribute.get_attribute(attributeName), "one"); + assert.equal(pendingAddClassAttribute.get_updated_html(), '
'); + pendingAddClassAttribute.destroy(); +} + +for (const attributeName of ["CLASS", "Class"]) { + const pendingRemoveClassAttribute = new WP_HTML_Tag_Processor('
'); + assert.equal(pendingRemoveClassAttribute.next_tag("div"), true); + assert.equal(pendingRemoveClassAttribute.remove_class("two"), true); + assert.equal(pendingRemoveClassAttribute.get_attribute(attributeName), "one two"); + assert.equal(pendingRemoveClassAttribute.get_attribute("class"), "one"); + pendingRemoveClassAttribute.destroy(); +} + +const pendingSetThenAddClassAttribute = new WP_HTML_Tag_Processor('
'); +assert.equal(pendingSetThenAddClassAttribute.next_tag("div"), true); +assert.equal(pendingSetThenAddClassAttribute.set_attribute("class", "set"), true); +assert.equal(pendingSetThenAddClassAttribute.add_class("two"), true); +assert.equal(pendingSetThenAddClassAttribute.get_attribute("CLASS"), "set"); +assert.equal(pendingSetThenAddClassAttribute.get_attribute("class"), "set two"); +pendingSetThenAddClassAttribute.destroy(); + +const pendingClassPrefixAttributes = new WP_HTML_Tag_Processor("
"); +assert.equal(pendingClassPrefixAttributes.next_tag("div"), true); +assert.equal(pendingClassPrefixAttributes.add_class("two"), true); +assert.deepEqual(pendingClassPrefixAttributes.get_attribute_names_with_prefix("cl"), []); +assert.equal(pendingClassPrefixAttributes.get_attribute("class"), "two"); +pendingClassPrefixAttributes.destroy(); + +const pendingEmptyThenClassName = new WP_HTML_Tag_Processor('
'); +assert.equal(pendingEmptyThenClassName.next_tag("div"), true); +assert.equal(pendingEmptyThenClassName.add_class(null), true); +assert.equal(pendingEmptyThenClassName.add_class("two"), true); +assert.equal(pendingEmptyThenClassName.get_attribute("CLASS"), "one"); +assert.equal(pendingEmptyThenClassName.get_attribute("class"), "one two"); +pendingEmptyThenClassName.destroy(); + +const removeNumericClassName = new WP_HTML_Tag_Processor('
'); +assert.equal(removeNumericClassName.next_tag("div"), true); +assert.equal(removeNumericClassName.remove_class(false), true); +assert.equal(removeNumericClassName.remove_class(1), true); +assert.equal(removeNumericClassName.remove_class("1"), true); +assert.equal(removeNumericClassName.remove_class(1.5), true); +assert.equal(removeNumericClassName.get_updated_html(), '
'); +assert.equal(removeNumericClassName.remove_class("1.5"), true); +assert.equal(removeNumericClassName.remove_class("01"), true); +assert.equal(removeNumericClassName.get_updated_html(), '
'); +removeNumericClassName.destroy(); + +const stagedAttributeUpdates = new WP_HTML_Tag_Processor( + '
Test
', +); +assert.equal(stagedAttributeUpdates.next_tag(), true); +assert.equal(stagedAttributeUpdates.remove_attribute("id"), true); +assert.equal(stagedAttributeUpdates.next_tag(), true); +assert.equal(stagedAttributeUpdates.set_attribute("id", "div-id-1"), true); +assert.equal(stagedAttributeUpdates.add_class("new_class_1"), true); +assert.equal( + stagedAttributeUpdates.get_updated_html(), + '
Test
', +); +assert.equal(stagedAttributeUpdates.toString(), stagedAttributeUpdates.get_updated_html()); +assert.equal(stagedAttributeUpdates.set_attribute("id", "div-id-2"), true); +assert.equal(stagedAttributeUpdates.add_class("new_class_2"), true); +assert.equal( + stagedAttributeUpdates.get_updated_html(), + '
Test
', +); +assert.equal(stagedAttributeUpdates.next_tag(), true); +assert.equal(stagedAttributeUpdates.remove_attribute("id"), true); +assert.equal( + stagedAttributeUpdates.get_updated_html(), + '
Test
', +); +stagedAttributeUpdates.destroy(); + +const rawClassNameUpdates = new WP_HTML_Tag_Processor('
'); +assert.equal(rawClassNameUpdates.next_tag("div"), true); +assert.equal(rawClassNameUpdates.has_class("x\0y"), false); +assert.equal(rawClassNameUpdates.add_class("x\0y"), true); +assert.equal(rawClassNameUpdates.get_updated_html(), '
'); +assert.deepEqual(rawClassNameUpdates.class_list(), ["x\uFFFDy"]); +assert.equal(rawClassNameUpdates.has_class("x\0y"), false); +assert.equal(rawClassNameUpdates.remove_class("x\0y"), true); +assert.equal(rawClassNameUpdates.get_updated_html(), '
'); +rawClassNameUpdates.destroy(); + +const tagMatchOffset = new WP_HTML_Tag_Processor("
"); +assert.equal(tagMatchOffset.next_tag({ tag_name: "div", match_offset: 2 }), true); +assert.equal(tagMatchOffset.get_attribute("two"), true); +tagMatchOffset.destroy(); + +for (const invalidHtml of [null, 123]) { + const invalidTags = new WP_HTML_Tag_Processor(invalidHtml); + assert.equal(invalidTags.get_updated_html(), ""); + assert.equal(invalidTags.next_token(), false); + invalidTags.destroy(); +} + +const text = new WP_HTML_Tag_Processor(" \0

Hi

"); +assert.equal(text.get_modifiable_text(), ""); +assert.equal(text.get_qualified_attribute_name("data-id"), null); +assert.equal(text.next_token(), true); +assert.equal(text.get_token_type(), "#text"); +assert.equal(text.get_qualified_attribute_name("data-id"), null); +assert.equal(text.subdivide_text_appropriately(), true); +assert.equal(text.text_node_classification, WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE); +text.destroy(); + +const bareLessThanText = new WP_HTML_Tag_Processor("<"); +assert.equal(bareLessThanText.next_token(), true); +assert.equal(bareLessThanText.get_token_type(), "#text"); +assert.equal(bareLessThanText.get_modifiable_text(), "<"); +assert.equal(bareLessThanText.paused_at_incomplete_token(), false); +assert.equal(bareLessThanText.next_token(), false); +bareLessThanText.destroy(); + +const nonText = new WP_HTML_Tag_Processor("
"); +assert.equal(nonText.next_tag("div"), true); +assert.equal(nonText.get_qualified_attribute_name("DATA-ID"), "DATA-ID"); +assert.equal(nonText.get_qualified_attribute_name(123), "123"); +assert.equal(nonText.get_qualified_attribute_name(false), ""); +assert.equal(nonText.get_qualified_attribute_name(null), null); +assert.throws( + () => nonText.get_qualified_attribute_name([]), + TypeError, +); +assert.equal(nonText.get_modifiable_text(), ""); +assert.equal(nonText.next_tag({ tag_name: "div", tag_closers: "visit" }), true); +assert.equal(nonText.is_tag_closer(), true); +assert.equal(nonText.get_qualified_attribute_name("DATA-ID"), "DATA-ID"); +assert.equal(nonText.get_attribute_names_with_prefix([]), null); +assert.equal(nonText.has_class("active"), false); +assert.throws( + () => nonText.has_class([]), + TypeError, +); +assert.deepEqual(nonText.class_list(), []); +assert.equal(nonText.set_attribute("id", "x"), false); +assert.equal(nonText.set_attribute([], []), false); +assert.equal(nonText.remove_attribute([]), false); +assert.equal(nonText.add_class("active"), false); +assert.equal(nonText.add_class([]), false); +assert.equal(nonText.remove_class([]), false); +assert.throws( + () => nonText.set_modifiable_text(null), + TypeError, +); +assert.equal(nonText.get_updated_html(), "
"); +nonText.destroy(); + +const tagProcessorBrEndTag = new WP_HTML_Tag_Processor("
"); +assert.equal(tagProcessorBrEndTag.next_tag({ tag_name: "br", tag_closers: "visit" }), true); +assert.equal(tagProcessorBrEndTag.is_tag_closer(), false); +assert.equal(tagProcessorBrEndTag.get_attribute_names_with_prefix([]), null); +assert.equal(tagProcessorBrEndTag.has_class("x"), false); +assert.throws( + () => tagProcessorBrEndTag.has_class([]), + TypeError, +); +assert.deepEqual(tagProcessorBrEndTag.class_list(), []); +assert.equal(tagProcessorBrEndTag.set_attribute([], []), false); +assert.equal(tagProcessorBrEndTag.remove_attribute([]), false); +assert.equal(tagProcessorBrEndTag.add_class([]), false); +assert.equal(tagProcessorBrEndTag.remove_class([]), false); +tagProcessorBrEndTag.destroy(); + +const completedClosingTag = new WP_HTML_Tag_Processor("
"); +assert.equal(completedClosingTag.next_tag({ tag_name: "div", tag_closers: "visit" }), true); +assert.equal(completedClosingTag.is_tag_closer(), true); +assert.equal(completedClosingTag.next_tag({ tag_name: "div", tag_closers: "visit" }), false); +assert.equal(completedClosingTag.get_token_type(), null); +assert.equal(completedClosingTag.is_tag_closer(), false); +completedClosingTag.destroy(); + +const completedSelfClosingTag = new WP_HTML_Tag_Processor(""); +assert.equal(completedSelfClosingTag.next_tag("img"), true); +assert.equal(completedSelfClosingTag.has_self_closing_flag(), true); +assert.equal(completedSelfClosingTag.next_tag(), false); +assert.equal(completedSelfClosingTag.get_token_type(), null); +assert.equal(completedSelfClosingTag.has_self_closing_flag(), false); +completedSelfClosingTag.destroy(); + +for (const [completedTextHtml, completedTextAdvance, expectedText] of [ + ["text", (processor) => processor.next_token(), "text"], + ["", (processor) => processor.next_token(), "comment"], + ["", (processor) => processor.next_tag("script"), "abc"], +]) { + const completedTextProcessor = new WP_HTML_Tag_Processor(completedTextHtml); + assert.equal(completedTextAdvance(completedTextProcessor), true); + assert.equal(completedTextProcessor.get_modifiable_text(), expectedText); + assert.equal(completedTextProcessor.next_token(), false); + assert.equal(completedTextProcessor.get_token_type(), null); + assert.equal(completedTextProcessor.get_modifiable_text(), ""); + completedTextProcessor.destroy(); +} + +const coercedModifiableText = new WP_HTML_Tag_Processor("abc"); +assert.equal(coercedModifiableText.next_token(), true); +assert.equal(coercedModifiableText.set_modifiable_text(123), true); +assert.equal(coercedModifiableText.get_updated_html(), "123"); +assert.equal(coercedModifiableText.set_modifiable_text(false), true); +assert.equal(coercedModifiableText.get_updated_html(), ""); +assert.throws( + () => coercedModifiableText.set_modifiable_text(null), + TypeError, +); +assert.throws( + () => coercedModifiableText.set_modifiable_text({ text: "object" }), + TypeError, +); +coercedModifiableText.destroy(); + +const svgQualifiedNames = new WP_HTML_Tag_Processor(''); +assert.equal(svgQualifiedNames.change_parsing_namespace(true), false); +assert.throws( + () => svgQualifiedNames.change_parsing_namespace(null), + TypeError, +); +assert.equal(svgQualifiedNames.change_parsing_namespace("svg"), true); +assert.equal(svgQualifiedNames.next_tag("foreignobject"), true); +assert.equal(svgQualifiedNames.get_namespace(), "svg"); +assert.equal(svgQualifiedNames.get_qualified_tag_name(), "foreignObject"); +assert.equal(svgQualifiedNames.get_qualified_attribute_name("attributeName"), "attributeName"); +assert.equal(svgQualifiedNames.get_qualified_attribute_name("xlink:href"), "xlink href"); +assert.equal(svgQualifiedNames.get_qualified_attribute_name("viewbox"), "viewBox"); +assert.equal(svgQualifiedNames.get_qualified_attribute_name("DATA-ID"), "DATA-ID"); +assert.equal(svgQualifiedNames.get_qualified_attribute_name(123), "123"); +assert.equal(svgQualifiedNames.get_qualified_attribute_name(null), null); +svgQualifiedNames.destroy(); + +const mathQualifiedNames = new WP_HTML_Tag_Processor(""); +assert.equal(mathQualifiedNames.change_parsing_namespace("math"), true); +assert.equal(mathQualifiedNames.next_tag("mi"), true); +assert.equal(mathQualifiedNames.get_namespace(), "math"); +assert.equal(mathQualifiedNames.get_qualified_tag_name(), "mi"); +assert.equal(mathQualifiedNames.get_qualified_attribute_name("definitionurl"), "definitionURL"); +assert.equal(mathQualifiedNames.get_qualified_attribute_name("xlink:title"), "xlink title"); +assert.equal(mathQualifiedNames.get_qualified_attribute_name("viewBox"), "viewBox"); +mathQualifiedNames.destroy(); + +const textarea = new WP_HTML_Tag_Processor(""); +assert.equal(textarea.next_token(), true); +assert.equal(textarea.get_modifiable_text(), "One"); +assert.equal(textarea.set_modifiable_text("Two"), true); +assert.equal(textarea.get_updated_html(), ""); +textarea.destroy(); + +for (const [name, html, advanceTokenCount, replacement, expectedHtml] of [ + ["Text node (start)", "Text", 1, "Blubber", "Blubber"], + ["Text node (middle)", "Bold move", 2, "yo", "yo"], + ["Text node (end)", "of a dog", 2, "of a cat", "of a cat"], + [ + "Encoded text node", + "
birds and dogs
", + 2, + " & ", + "
<birds> & <dogs>
", + ], + [ + "SCRIPT tag", + "beforeafter", + 2, + 'const img = " &
";', + 'beforeafter', + ], + [ + "STYLE tag", + "", + 1, + 'p::before { content: " & "; }', + '', + ], + [ + "TEXTAREA tag", + "ab", + 2, + "so it ", + "ab", + ], + [ + "TEXTAREA (escape)", + "ab", + 2, + "but it does for ", + "ab", + ], + [ + "TEXTAREA (escape+attrs)", + "ab", + 2, + 'but it does for ', + 'ab', + ], + [ + "TITLE tag", + "ahas no need to escapeb", + 2, + "so it ", + "aso it <doesn't>b", + ], + [ + "TITLE (escape)", + "ahas no need to escapeb", + 2, + "but it does for ", + "abut it does for </title>b", + ], + [ + "TITLE (escape+attrs)", + "ahas no need to escapeb", + 2, + 'but it does for ', + 'abut it does for </title not an="attribute">b', + ], +]) { + const modifiableText = new WP_HTML_Tag_Processor(html); + for (let i = 0; i < advanceTokenCount; i++) { + assert.equal(modifiableText.next_token(), true, name); + } + assert.equal(modifiableText.set_modifiable_text(replacement), true, name); + assert.equal(modifiableText.get_updated_html(), expectedHtml, name); + modifiableText.destroy(); +} + +for (const [name, html, invalidUpdate] of [ + ["Comment with -->", "", "Comments end in -->"], + ["Comment with --!>", "", "Invalid but legitimate comments end in --!>"], + [ + "Non-JS SCRIPT with ', + "", WP_HTML_Processor.COMMENT_AS_HTML_COMMENT, " A comment. ", " A comment. ", null], + ["", WP_HTML_Processor.COMMENT_AS_ABRUPTLY_CLOSED_COMMENT, "", "", null], + ["", WP_HTML_Processor.COMMENT_AS_INVALID_HTML, " Bang opener ", " Bang opener ", null], + ["", WP_HTML_Processor.COMMENT_AS_INVALID_HTML, " Question opener ", "? Question opener ", null], + ["", WP_HTML_Processor.COMMENT_AS_CDATA_LOOKALIKE, " cdata body ", "[CDATA[ cdata body ]]", null], + ["", WP_HTML_Processor.COMMENT_AS_PI_NODE_LOOKALIKE, " Instruction body. ", "?pi-target Instruction body. ?", "pi-target"], + ["", WP_HTML_Processor.COMMENT_AS_PI_NODE_LOOKALIKE, " const HTML_COMMENT = true; ", "?php const HTML_COMMENT = true; ?", "php"], +]) { + const processorComment = WP_HTML_Processor.create_fragment(html); + assert.equal(processorComment.next_token(), true); + assert.equal(processorComment.get_token_name(), "#comment"); + assert.equal(processorComment.get_comment_type(), expectedType); + assert.equal(processorComment.get_modifiable_text(), expectedText); + assert.equal(processorComment.get_full_comment_text(), expectedFullText); + assert.equal(processorComment.get_tag(), expectedTag); + processorComment.destroy(); +} + +for (const [html, expectedText] of [ + ["", "#"], + ["", "# foo"], + ["", "• bar"], +]) { + const processorFunkyCommentCase = WP_HTML_Processor.create_fragment(html); + assert.equal(processorFunkyCommentCase.next_token(), true); + assert.equal(processorFunkyCommentCase.get_token_name(), "#funky-comment"); + assert.equal(processorFunkyCommentCase.get_modifiable_text(), expectedText); + processorFunkyCommentCase.destroy(); +} + +const incompleteComment = new WP_HTML_Tag_Processor("FOO', 3], + ['
', 4], + ['

What

', 5], +]) { + const nextNodeDepthProcessor = WP_HTML_Processor.create_fragment(html); + assert.equal(nextNodeDepthProcessor.next_tag({ class_name: "target" }), true); + assert.equal(nextNodeDepthProcessor.next_token(), true); + assert.equal(nextNodeDepthProcessor.get_current_depth(), expectedDepth); + nextNodeDepthProcessor.destroy(); +} + +const completedProcessor = WP_HTML_Processor.create_fragment('
Test
'); +assert.equal(completedProcessor.next_tag(), true); +assert.equal(completedProcessor.get_tag(), "DIV"); +assert.equal(completedProcessor.next_tag(), false); +assert.equal(completedProcessor.get_tag(), null); +completedProcessor.destroy(); + +const processorMatchOffsetWithoutBreadcrumbs = WP_HTML_Processor.create_fragment("
"); +assert.equal(processorMatchOffsetWithoutBreadcrumbs.next_tag({ tag_name: "div", match_offset: 2 }), true); +assert.equal(processorMatchOffsetWithoutBreadcrumbs.get_attribute("one"), true); +assert.equal(processorMatchOffsetWithoutBreadcrumbs.get_attribute("two"), null); +processorMatchOffsetWithoutBreadcrumbs.destroy(); + +const processorNonStringClassQuery = WP_HTML_Processor.create_fragment('
'); +assert.equal(processorNonStringClassQuery.next_tag({ class_name: {} }), true); +assert.equal(processorNonStringClassQuery.get_tag(), "DIV"); +processorNonStringClassQuery.destroy(); + +const processorNumericTagName = WP_HTML_Processor.create_fragment("
"); +assert.equal(processorNumericTagName.next_tag({ tag_name: 1 }), false); +assert.equal(processorNumericTagName.get_tag(), null); +processorNumericTagName.destroy(); + +const processorBooleanTagName = WP_HTML_Processor.create_fragment("
"); +assert.equal(processorBooleanTagName.next_tag({ tag_name: true }), false); +assert.equal(processorBooleanTagName.get_tag(), null); +processorBooleanTagName.destroy(); + +const processorObjectTagName = WP_HTML_Processor.create_fragment("
"); +assert.throws( + () => processorObjectTagName.next_tag({ tag_name: {} }), + TypeError, +); +processorObjectTagName.destroy(); + +const processorBreadcrumbMatchOffset = WP_HTML_Processor.create_fragment("
"); +assert.equal(processorBreadcrumbMatchOffset.next_tag({ breadcrumbs: ["DIV", "SPAN"], match_offset: "2nd" }), true); +assert.equal(processorBreadcrumbMatchOffset.get_attribute("one"), null); +assert.equal(processorBreadcrumbMatchOffset.get_attribute("two"), true); +processorBreadcrumbMatchOffset.destroy(); + +const processorObjectMatchOffset = WP_HTML_Processor.create_fragment("
"); +assert.equal(processorObjectMatchOffset.next_tag({ breadcrumbs: ["DIV", "SPAN"], match_offset: {} }), true); +assert.equal(processorObjectMatchOffset.get_attribute("one"), true); +assert.equal(processorObjectMatchOffset.get_attribute("two"), null); +processorObjectMatchOffset.destroy(); + +const processorArrayMatchOffset = WP_HTML_Processor.create_fragment("
"); +assert.equal(processorArrayMatchOffset.next_tag({ breadcrumbs: ["DIV", "SPAN"], match_offset: [2] }), true); +assert.equal(processorArrayMatchOffset.get_attribute("one"), true); +assert.equal(processorArrayMatchOffset.get_attribute("two"), null); +processorArrayMatchOffset.destroy(); + +const processorNullBreadcrumb = WP_HTML_Processor.create_fragment("
"); +assert.equal(processorNullBreadcrumb.next_tag({ breadcrumbs: [null] }), false); +processorNullBreadcrumb.destroy(); + +const processorBreadcrumbIgnoresTagName = WP_HTML_Processor.create_fragment("
"); +assert.equal(processorBreadcrumbIgnoresTagName.next_tag({ tag_name: "span", breadcrumbs: ["DIV"] }), true); +assert.equal(processorBreadcrumbIgnoresTagName.get_tag(), "DIV"); +processorBreadcrumbIgnoresTagName.destroy(); + +const processorZeroBreadcrumbMatchOffset = WP_HTML_Processor.create_fragment("
"); +assert.equal(processorZeroBreadcrumbMatchOffset.next_tag({ breadcrumbs: ["DIV", "SPAN"], match_offset: 0 }), false); +assert.equal(processorZeroBreadcrumbMatchOffset.get_tag(), null); +processorZeroBreadcrumbMatchOffset.destroy(); + +for (const [html, expectedBreadcrumbs] of [ + ["

", ["HTML", "BODY", "ARTICLE"]], + ["
  • ", ["HTML", "BODY", "LI", "BLOCKQUOTE", "LI"]], + ["
  • ", ["HTML", "BODY", "LI"]], + ["
    ", ["HTML", "BODY", "DT"]], + ["

    !

  • ', +); +assert.equal(semanticButtonProcessor.next_tag("BUTTON"), true); +assert.equal(semanticButtonProcessor.get_attribute("one"), true); +assert.deepEqual(semanticButtonProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "BUTTON"]); +assert.equal(semanticButtonProcessor.next_tag("BUTTON"), true); +assert.equal(semanticButtonProcessor.get_attribute("two"), true); +assert.deepEqual(semanticButtonProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "BUTTON"]); +assert.equal(semanticButtonProcessor.next_tag("BUTTON"), true); +assert.equal(semanticButtonProcessor.get_attribute("three"), true); +assert.deepEqual(semanticButtonProcessor.get_breadcrumbs(), ["HTML", "BODY", "BUTTON"]); +semanticButtonProcessor.destroy(); + +const imageNamespaceProcessor = WP_HTML_Processor.create_fragment(""); +assert.equal(imageNamespaceProcessor.next_tag(), true); +assert.equal(imageNamespaceProcessor.get_tag(), "IMG"); +assert.equal(imageNamespaceProcessor.get_namespace(), "html"); +assert.equal(imageNamespaceProcessor.expects_closer(), false); +assert.deepEqual(imageNamespaceProcessor.get_breadcrumbs(), ["HTML", "BODY", "IMG"]); +assert.equal(imageNamespaceProcessor.next_tag("svg"), true); +assert.equal(imageNamespaceProcessor.next_tag(), true); +assert.equal(imageNamespaceProcessor.get_tag(), "IMAGE"); +assert.equal(imageNamespaceProcessor.get_namespace(), "svg"); +assert.equal(imageNamespaceProcessor.expects_closer(), false); +assert.deepEqual(imageNamespaceProcessor.get_breadcrumbs(), ["HTML", "BODY", "SVG", "IMAGE"]); +imageNamespaceProcessor.destroy(); + +const processorQualifiedNames = WP_HTML_Processor.create_fragment( + "", +); +assert.equal(processorQualifiedNames.next_tag("foreignobject"), true); +assert.equal(processorQualifiedNames.get_namespace(), "svg"); +assert.equal(processorQualifiedNames.get_qualified_tag_name(), "foreignObject"); +assert.equal(processorQualifiedNames.get_qualified_attribute_name("xlink:href"), "xlink href"); +assert.equal(processorQualifiedNames.get_qualified_attribute_name("viewbox"), "viewBox"); +assert.equal(processorQualifiedNames.next_tag("mi"), true); +assert.equal(processorQualifiedNames.get_namespace(), "math"); +assert.equal(processorQualifiedNames.get_qualified_tag_name(), "mi"); +assert.equal(processorQualifiedNames.get_qualified_attribute_name("definitionurl"), "definitionURL"); +assert.equal(processorQualifiedNames.get_qualified_attribute_name("xlink:title"), "xlink title"); +processorQualifiedNames.destroy(); + +const processorManualNamespace = WP_HTML_Processor.create_fragment(""); +assert.equal(processorManualNamespace.get_namespace(), "html"); +assert.equal(processorManualNamespace.change_parsing_namespace("svg"), true); +assert.equal(processorManualNamespace.get_namespace(), "svg"); +assert.equal(processorManualNamespace.change_parsing_namespace("invalid"), false); +assert.throws( + () => processorManualNamespace.change_parsing_namespace(null), + TypeError, +); +assert.equal(processorManualNamespace.get_namespace(), "svg"); +assert.equal(processorManualNamespace.next_tag("rect"), true); +assert.equal(processorManualNamespace.get_namespace(), "svg"); +assert.equal(processorManualNamespace.get_qualified_tag_name(), "rect"); +assert.equal(processorManualNamespace.has_self_closing_flag(), true); +assert.equal(processorManualNamespace.expects_closer(), false); +processorManualNamespace.destroy(); + +const processorCoercedModifiableText = WP_HTML_Processor.create_fragment("abc"); +assert.equal(processorCoercedModifiableText.next_token(), true); +assert.equal(processorCoercedModifiableText.get_token_type(), "#text"); +assert.equal(processorCoercedModifiableText.set_modifiable_text(456), true); +assert.equal(processorCoercedModifiableText.get_updated_html(), "456"); +assert.throws( + () => processorCoercedModifiableText.set_modifiable_text(null), + TypeError, +); +processorCoercedModifiableText.destroy(); + +assert.equal(WP_HTML_Processor.PROCESS_NEXT_NODE, "process-next-node"); +assert.equal(WP_HTML_Processor.REPROCESS_CURRENT_NODE, "reprocess-current-node"); +assert.equal(WP_HTML_Processor.PROCESS_CURRENT_NODE, "process-current-node"); +assert.equal(WP_HTML_Processor.ERROR_UNSUPPORTED, "unsupported"); +assert.equal(WP_HTML_Processor.ERROR_EXCEEDED_MAX_BOOKMARKS, "exceeded-max-bookmarks"); +assert.equal(WP_HTML_Processor.MAX_BOOKMARKS, 10000); +const directFullParser = new WP_HTML_Processor("

    Direct"); +assert.equal(directFullParser.next_tag(), true); +assert.equal(directFullParser.get_tag(), "HTML"); +assert.deepEqual(directFullParser.get_breadcrumbs(), ["HTML"]); +directFullParser.destroy(); +const directUnlockedFullParser = new WP_HTML_Processor( + "

    Direct", + WP_HTML_Processor.CONSTRUCTOR_UNLOCK_CODE, +); +assert.equal(directUnlockedFullParser.next_tag(), true); +assert.equal(directUnlockedFullParser.get_tag(), "HTML"); +assert.deepEqual(directUnlockedFullParser.get_breadcrumbs(), ["HTML"]); +directUnlockedFullParser.destroy(); +const directNullProcessor = new WP_HTML_Processor(null); +assert.equal(directNullProcessor.next_tag(), false); +directNullProcessor.destroy(); +assert.equal(WP_HTML_Processor.create_fragment(null), null); +assert.equal(WP_HTML_Processor.create_fragment("", "", "ISO-8859-1"), null); +assert.equal(WP_HTML_Processor.create_fragment("", ""), null); +assert.equal(WP_HTML_Processor.create_fragment("", "
    "), null); +assert.equal(WP_HTML_Processor.create_fragment("", null), null); +assert.equal(WP_HTML_Processor.create_fragment("", "", {}), null); +const emptyTextareaFragment = WP_HTML_Processor.create_fragment("", "", false], + ["

    ", true], + ["

    ", true], +]) { + const currentTokenExpectationsProcessor = WP_HTML_Processor.create_fragment(html); + if (html !== "") { + assert.equal(currentTokenExpectationsProcessor.next_token(), true); + } + assert.equal(currentTokenExpectationsProcessor.expects_closer(), expected); + currentTokenExpectationsProcessor.destroy(); +} + +for (const html of [ + "", + '', + "", + '', + '', + '', +]) { + const supportedMetaProcessor = new WP_HTML_Processor(html, { fullParser: true }); + assert.equal(supportedMetaProcessor.next_tag("meta"), true, html); + assert.equal(supportedMetaProcessor.get_last_error(), null, html); + supportedMetaProcessor.destroy(); +} + +for (const [html, message] of [ + ['', "Cannot yet process META tags with charset to determine encoding."], + ['', "Cannot yet process META tags with charset to determine encoding."], + ['', "Cannot yet process META tags with http-equiv Content-Type to determine encoding."], + ['', "Cannot yet process META tags with http-equiv Content-Type to determine encoding."], +]) { + const supportedMetaProcessor = WP_HTML_Processor.create_full_parser(html); + assert.equal(supportedMetaProcessor.next_tag("meta"), true); + assert.equal(supportedMetaProcessor.get_last_error(), null); + supportedMetaProcessor.destroy(); + + const unsupportedMetaProcessor = new WP_HTML_Processor(html, { fullParser: true }); + assert.equal(unsupportedMetaProcessor.next_tag("meta"), false); + assert.equal(unsupportedMetaProcessor.get_last_error(), WP_HTML_Processor.ERROR_UNSUPPORTED); + const exception = unsupportedMetaProcessor.get_unsupported_exception(); + assert.ok(exception instanceof WP_HTML_Unsupported_Exception); + assert.ok(exception instanceof Error); + const tokenAt = html.indexOf("'); +assert.equal(fragmentMetaProcessor.next_tag("meta"), true); +assert.equal(fragmentMetaProcessor.get_last_error(), null); +fragmentMetaProcessor.destroy(); + +const plaintextProcessor = WP_HTML_Processor.create_fragment("

    raw <b>markup</b>"); +assert.equal(plaintextProcessor.next_tag("plaintext"), true); +assert.equal(plaintextProcessor.get_last_error(), null); +assert.deepEqual(plaintextProcessor.get_breadcrumbs(), ["HTML", "BODY", "PLAINTEXT"]); +assert.equal(plaintextProcessor.next_token(), true); +assert.equal(plaintextProcessor.get_token_type(), "#text"); +assert.equal(plaintextProcessor.get_modifiable_text(), "raw <b>markup</b>"); +assert.equal(plaintextProcessor.serialize_token(), "raw <b>markup</b>"); +assert.equal(plaintextProcessor.set_modifiable_text("updated <\0"), true); +assert.equal(plaintextProcessor.get_modifiable_text(), "updated <\uFFFD"); +assert.equal(plaintextProcessor.get_updated_html(), "<plaintext>updated <\uFFFD"); +plaintextProcessor.destroy(); + +assert.equal( + WP_HTML_Processor.normalize("<plaintext>raw <b>markup</b>"), + "<plaintext>raw <b>markup</b></plaintext>", +); + +const incompleteStepProcessor = WP_HTML_Processor.create_fragment("<div"); +assert.equal(incompleteStepProcessor.next_token(), false); +assert.equal(incompleteStepProcessor.paused_at_incomplete_token(), true); +assert.equal(incompleteStepProcessor.step(WP_HTML_Processor.PROCESS_CURRENT_NODE), false); +assert.equal(incompleteStepProcessor.step(WP_HTML_Processor.REPROCESS_CURRENT_NODE), false); +incompleteStepProcessor.destroy(); + +const fullParserIncompleteTagProcessor = WP_HTML_Processor.create_full_parser("<div"); +assert.equal(fullParserIncompleteTagProcessor.next_token(), true); +assert.equal(fullParserIncompleteTagProcessor.get_tag(), "HTML"); +assert.equal(fullParserIncompleteTagProcessor.next_token(), true); +assert.equal(fullParserIncompleteTagProcessor.get_tag(), "HEAD"); +assert.equal(fullParserIncompleteTagProcessor.next_token(), true); +assert.equal(fullParserIncompleteTagProcessor.get_tag(), "HEAD"); +assert.equal(fullParserIncompleteTagProcessor.is_tag_closer(), true); +assert.equal(fullParserIncompleteTagProcessor.next_token(), true); +assert.equal(fullParserIncompleteTagProcessor.get_tag(), "BODY"); +assert.equal(fullParserIncompleteTagProcessor.next_token(), true); +assert.equal(fullParserIncompleteTagProcessor.get_tag(), "BODY"); +assert.equal(fullParserIncompleteTagProcessor.is_tag_closer(), true); +assert.equal(fullParserIncompleteTagProcessor.next_token(), true); +assert.equal(fullParserIncompleteTagProcessor.get_tag(), "HTML"); +assert.equal(fullParserIncompleteTagProcessor.is_tag_closer(), true); +assert.equal(fullParserIncompleteTagProcessor.next_token(), false); +assert.equal(fullParserIncompleteTagProcessor.paused_at_incomplete_token(), false); +assert.deepEqual(fullParserIncompleteTagProcessor.get_breadcrumbs(), []); +fullParserIncompleteTagProcessor.destroy(); + +const fullParserIncompleteAfterDoctypeProcessor = WP_HTML_Processor.create_full_parser("<!DOCTYPE html><div"); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.next_token(), true); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.get_token_type(), "#doctype"); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.next_token(), true); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.get_tag(), "HTML"); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.next_token(), true); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.get_tag(), "HEAD"); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.next_token(), true); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.get_tag(), "HEAD"); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.is_tag_closer(), true); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.next_token(), true); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.get_tag(), "BODY"); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.next_token(), true); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.get_tag(), "BODY"); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.is_tag_closer(), true); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.next_token(), true); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.get_tag(), "HTML"); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.is_tag_closer(), true); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.next_token(), false); +assert.equal(fullParserIncompleteAfterDoctypeProcessor.paused_at_incomplete_token(), false); +assert.deepEqual(fullParserIncompleteAfterDoctypeProcessor.get_breadcrumbs(), []); +fullParserIncompleteAfterDoctypeProcessor.destroy(); + +const fullParserIncompleteEndTagProcessor = WP_HTML_Processor.create_full_parser("</b test"); +while (fullParserIncompleteEndTagProcessor.next_token()) {} +assert.equal(fullParserIncompleteEndTagProcessor.paused_at_incomplete_token(), false); +assert.equal(fullParserIncompleteEndTagProcessor.get_last_error(), null); +assert.deepEqual(fullParserIncompleteEndTagProcessor.get_breadcrumbs(), []); +fullParserIncompleteEndTagProcessor.destroy(); + +const fullParserIncompleteQuotedAttributeProcessor = WP_HTML_Processor.create_full_parser( + '<html><body><img src="" border="0" alt="><div>A</div></body></html>', +); +while (fullParserIncompleteQuotedAttributeProcessor.next_token()) {} +assert.equal(fullParserIncompleteQuotedAttributeProcessor.paused_at_incomplete_token(), false); +assert.equal(fullParserIncompleteQuotedAttributeProcessor.get_last_error(), null); +assert.deepEqual(fullParserIncompleteQuotedAttributeProcessor.get_breadcrumbs(), []); +fullParserIncompleteQuotedAttributeProcessor.destroy(); + +const fullParserIncompleteRawtextProcessor = WP_HTML_Processor.create_full_parser('<script type="data"><!-- foo-'); +assert.equal(fullParserIncompleteRawtextProcessor.next_tag("script"), true); +assert.equal(fullParserIncompleteRawtextProcessor.get_modifiable_text(), "<!-- foo-"); +assert.deepEqual(fullParserIncompleteRawtextProcessor.get_breadcrumbs(), ["HTML", "HEAD", "SCRIPT"]); +while (fullParserIncompleteRawtextProcessor.next_token()) {} +assert.equal(fullParserIncompleteRawtextProcessor.paused_at_incomplete_token(), false); +assert.equal(fullParserIncompleteRawtextProcessor.get_last_error(), null); +fullParserIncompleteRawtextProcessor.destroy(); + +const fullParserUnclosedTextareaProcessor = WP_HTML_Processor.create_full_parser("<textarea>test</div>test"); +assert.equal(fullParserUnclosedTextareaProcessor.next_tag("textarea"), true); +assert.equal(fullParserUnclosedTextareaProcessor.get_modifiable_text(), "test</div>test"); +assert.deepEqual(fullParserUnclosedTextareaProcessor.get_breadcrumbs(), ["HTML", "BODY", "TEXTAREA"]); +while (fullParserUnclosedTextareaProcessor.next_token()) {} +assert.equal(fullParserUnclosedTextareaProcessor.paused_at_incomplete_token(), false); +assert.equal(fullParserUnclosedTextareaProcessor.get_last_error(), null); +fullParserUnclosedTextareaProcessor.destroy(); + +const fragmentDoctypeProcessor = WP_HTML_Processor.create_fragment("<!doctype html><p>x"); +assert.equal(fragmentDoctypeProcessor.next_token(), true); +assert.equal(fragmentDoctypeProcessor.get_tag(), "P"); +assert.deepEqual(fragmentDoctypeProcessor.get_breadcrumbs(), ["HTML", "BODY", "P"]); +fragmentDoctypeProcessor.destroy(); +assert.equal(WP_HTML_Processor.normalize("<!doctype html><p>x"), "<p>x</p>"); + +const processorBookmarkLimit = WP_HTML_Processor.create_fragment("<div>"); +assert.equal(processorBookmarkLimit.next_tag("div"), true); +for (let i = 0; i <= WP_HTML_Tag_Processor.MAX_BOOKMARKS; i += 1) { + assert.equal(processorBookmarkLimit.set_bookmark(`processor-${i}`), true); +} +processorBookmarkLimit.destroy(); + +const completedProcessorBookmark = WP_HTML_Processor.create_fragment("<div>"); +assert.equal(completedProcessorBookmark.next_tag("div"), true); +assert.equal(completedProcessorBookmark.next_tag(), false); +assert.equal(completedProcessorBookmark.set_bookmark("after-complete"), false); +assert.equal(completedProcessorBookmark.has_bookmark("after-complete"), false); +completedProcessorBookmark.destroy(); + +const incompleteProcessorBookmark = WP_HTML_Processor.create_fragment("<div"); +assert.equal(incompleteProcessorBookmark.next_tag(), false); +assert.equal(incompleteProcessorBookmark.paused_at_incomplete_token(), true); +assert.equal(incompleteProcessorBookmark.set_bookmark("after-incomplete"), false); +assert.equal(incompleteProcessorBookmark.has_bookmark("after-incomplete"), false); +incompleteProcessorBookmark.destroy(); + +const processorBookmarkRelease = WP_HTML_Processor.create_fragment("<div><span>"); +assert.equal(processorBookmarkRelease.next_tag("div"), true); +assert.equal(processorBookmarkRelease.set_bookmark("mark"), true); +assert.equal(processorBookmarkRelease.has_bookmark("mark"), true); +assert.equal(processorBookmarkRelease.release_bookmark("mark"), true); +assert.equal(processorBookmarkRelease.has_bookmark("mark"), false); +assert.equal(processorBookmarkRelease.seek("mark"), false); +processorBookmarkRelease.destroy(); + +const processorBookmarkScalarNames = WP_HTML_Processor.create_fragment("<div></div><span></span>"); +assert.equal(processorBookmarkScalarNames.next_tag("div"), true); +assert.equal(processorBookmarkScalarNames.set_bookmark(1), true); +assert.equal(processorBookmarkScalarNames.has_bookmark("1"), true); +assert.equal(processorBookmarkScalarNames.seek("1"), true); +assert.equal(processorBookmarkScalarNames.release_bookmark(true), true); +assert.equal(processorBookmarkScalarNames.has_bookmark(1), false); +assert.equal(processorBookmarkScalarNames.set_bookmark(false), true); +assert.equal(processorBookmarkScalarNames.has_bookmark(""), true); +assert.equal(processorBookmarkScalarNames.has_bookmark(0), false); +assert.equal(processorBookmarkScalarNames.release_bookmark(""), true); +assert.equal(processorBookmarkScalarNames.set_bookmark(null), true); +assert.equal(processorBookmarkScalarNames.has_bookmark(false), true); +assert.equal(processorBookmarkScalarNames.set_bookmark(["x"]), true); +assert.equal(processorBookmarkScalarNames.has_bookmark([]), true); +assert.equal(processorBookmarkScalarNames.seek(["different"]), true); +assert.equal(processorBookmarkScalarNames.get_tag(), "DIV"); +assert.equal(processorBookmarkScalarNames.release_bookmark([]), true); +assert.equal(processorBookmarkScalarNames.has_bookmark(["x"]), false); +assert.equal(processorBookmarkScalarNames.set_bookmark(NaN), true); +assert.equal(processorBookmarkScalarNames.has_bookmark("NAN"), true); +assert.equal(processorBookmarkScalarNames.release_bookmark("NAN"), true); +assert.equal(processorBookmarkScalarNames.set_bookmark(Infinity), true); +assert.equal(processorBookmarkScalarNames.has_bookmark("INF"), true); +assert.equal(processorBookmarkScalarNames.release_bookmark("INF"), true); +assert.equal(processorBookmarkScalarNames.set_bookmark(1e-5), true); +assert.equal(processorBookmarkScalarNames.has_bookmark("1.0E-5"), true); +assert.equal(processorBookmarkScalarNames.release_bookmark("1.0E-5"), true); +assert.equal(processorBookmarkScalarNames.set_bookmark(1e20), true); +assert.equal(processorBookmarkScalarNames.has_bookmark("1.0E+20"), true); +assert.equal(processorBookmarkScalarNames.release_bookmark("1.0E+20"), true); +assert.equal(processorBookmarkScalarNames.set_bookmark(100000000000000), true); +assert.equal(processorBookmarkScalarNames.has_bookmark("100000000000000"), true); +assert.equal(processorBookmarkScalarNames.release_bookmark("100000000000000"), true); +assert.equal(processorBookmarkScalarNames.set_bookmark(-0), true); +assert.equal(processorBookmarkScalarNames.has_bookmark("-0"), true); +assert.equal(processorBookmarkScalarNames.release_bookmark("-0"), true); +assert.throws( + () => processorBookmarkScalarNames.set_bookmark({}), + TypeError, +); +processorBookmarkScalarNames.destroy(); + +for (const [parserName, createProcessor] of [ + ["fragment", (html) => WP_HTML_Processor.create_fragment(html)], + ["full parser", (html) => WP_HTML_Processor.create_full_parser(html)], +]) { + const seekSameLocationProcessor = createProcessor("<div><span>"); + assert.notEqual(seekSameLocationProcessor, null, parserName); + assert.equal(seekSameLocationProcessor.next_tag("div"), true, parserName); + assert.equal(seekSameLocationProcessor.set_bookmark("mark"), true, parserName); + assert.equal(seekSameLocationProcessor.has_bookmark("mark"), true, parserName); + assert.equal(seekSameLocationProcessor.seek("mark"), true, parserName); + assert.equal(seekSameLocationProcessor.get_tag(), "DIV", parserName); + assert.deepEqual(seekSameLocationProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV"], parserName); + assert.equal(seekSameLocationProcessor.next_tag(), true, parserName); + assert.equal(seekSameLocationProcessor.get_tag(), "SPAN", parserName); + assert.deepEqual(seekSameLocationProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "SPAN"], parserName); + seekSameLocationProcessor.destroy(); + + const seekForwardProcessor = createProcessor("<div one></div><span two></span><a three>"); + assert.notEqual(seekForwardProcessor, null, parserName); + assert.equal(seekForwardProcessor.next_tag("div"), true, parserName); + assert.equal(seekForwardProcessor.set_bookmark("one"), true, parserName); + assert.equal(seekForwardProcessor.has_bookmark("one"), true, parserName); + assert.equal(seekForwardProcessor.next_tag("span"), true, parserName); + assert.equal(seekForwardProcessor.get_attribute("two"), true, parserName); + assert.equal(seekForwardProcessor.set_bookmark("two"), true, parserName); + assert.equal(seekForwardProcessor.has_bookmark("two"), true, parserName); + assert.equal(seekForwardProcessor.seek("one"), true, parserName); + assert.equal(seekForwardProcessor.get_tag(), "DIV", parserName); + assert.equal(seekForwardProcessor.seek("two"), true, parserName); + assert.equal(seekForwardProcessor.get_tag(), "SPAN", parserName); + assert.equal(seekForwardProcessor.get_attribute("two"), true, parserName); + assert.equal(seekForwardProcessor.next_tag(), true, parserName); + assert.equal(seekForwardProcessor.get_tag(), "A", parserName); + assert.equal(seekForwardProcessor.get_attribute("three"), true, parserName); + seekForwardProcessor.destroy(); +} + +for (const html of [ + "<i>".repeat(WP_HTML_Processor.MAX_BOOKMARKS + 1), + "<table><td>".repeat(Math.ceil(WP_HTML_Processor.MAX_BOOKMARKS / 4) + 1), +]) { + const deepNestingProcessor = WP_HTML_Processor.create_fragment(html); + while (deepNestingProcessor.next_token()) { + } + assert.equal(deepNestingProcessor.get_last_error(), WP_HTML_Processor.ERROR_EXCEEDED_MAX_BOOKMARKS); + deepNestingProcessor.destroy(); +} + +const processorSeekBreadcrumbs = WP_HTML_Processor.create_fragment("<div><img></div><div><hr></div>"); +assert.equal(processorSeekBreadcrumbs.next_tag("img"), true); +assert.deepEqual(processorSeekBreadcrumbs.get_breadcrumbs(), ["HTML", "BODY", "DIV", "IMG"]); +assert.equal(processorSeekBreadcrumbs.set_bookmark("first"), true); +assert.equal(processorSeekBreadcrumbs.next_tag("hr"), true); +assert.deepEqual(processorSeekBreadcrumbs.get_breadcrumbs(), ["HTML", "BODY", "DIV", "HR"]); +assert.equal(processorSeekBreadcrumbs.seek("first"), true); +assert.equal(processorSeekBreadcrumbs.get_tag(), "IMG"); +assert.deepEqual(processorSeekBreadcrumbs.get_breadcrumbs(), ["HTML", "BODY", "DIV", "IMG"]); +processorSeekBreadcrumbs.destroy(); + +const processorSeekNamespace = WP_HTML_Processor.create_fragment("<custom-element /><svg><rect />"); +assert.equal(processorSeekNamespace.next_tag("custom-element"), true); +assert.equal(processorSeekNamespace.has_self_closing_flag(), true); +assert.equal(processorSeekNamespace.expects_closer(), true); +assert.equal(processorSeekNamespace.set_bookmark("custom"), true); +assert.equal(processorSeekNamespace.next_tag("rect"), true); +assert.equal(processorSeekNamespace.get_namespace(), "svg"); +assert.equal(processorSeekNamespace.has_self_closing_flag(), true); +assert.equal(processorSeekNamespace.expects_closer(), false); +assert.equal(processorSeekNamespace.seek("custom"), true); +assert.equal(processorSeekNamespace.get_tag(), "CUSTOM-ELEMENT"); +assert.equal(processorSeekNamespace.get_namespace(), "html"); +assert.equal(processorSeekNamespace.has_self_closing_flag(), true); +assert.equal(processorSeekNamespace.expects_closer(), true); +assert.equal(processorSeekNamespace.next_tag("rect"), true); +assert.deepEqual(processorSeekNamespace.get_breadcrumbs(), ["HTML", "BODY", "CUSTOM-ELEMENT", "SVG", "RECT"]); +processorSeekNamespace.destroy(); + +assert.equal(WP_HTML_Processor.normalize("<A><I><A>"), "<a><i></i></a><i><a></a></i>"); +const reconstructedFormattingProcessor = WP_HTML_Processor.create_fragment('<p><em class="tone">One<p>Two'); +assert.equal(reconstructedFormattingProcessor.next_tag("em"), true); +assert.equal(reconstructedFormattingProcessor.get_attribute("class"), "tone"); +assert.equal(reconstructedFormattingProcessor.next_tag("em"), true); +assert.equal(reconstructedFormattingProcessor.is_virtual(), true); +assert.equal(reconstructedFormattingProcessor.get_attribute("class"), "tone"); +assert.equal(reconstructedFormattingProcessor.has_class("tone"), true); +assert.deepEqual(reconstructedFormattingProcessor.class_list(), ["tone"]); +assert.deepEqual(reconstructedFormattingProcessor.get_breadcrumbs(), ["HTML", "BODY", "P", "EM"]); +reconstructedFormattingProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize('<p><em class="tone">One<p>Two'), + '<p><em class="tone">One</em></p><p><em class="tone">Two</em></p>', +); + +const repeatedFormattingProcessor = WP_HTML_Processor.create_full_parser("<p><b><b><b><b><p>x"); +while (repeatedFormattingProcessor.next_token()) { + if ( + repeatedFormattingProcessor.get_token_type() === "#text" && + repeatedFormattingProcessor.get_modifiable_text() === "x" + ) { + break; + } +} +assert.deepEqual(repeatedFormattingProcessor.get_breadcrumbs(), ["HTML", "BODY", "P", "B", "B", "B", "#text"]); +repeatedFormattingProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<p><b><b><b><b><p>x"), + "<p><b><b><b><b></b></b></b></b></p><p><b><b><b>x</b></b></b></p>", +); + +assert.equal( + WP_HTML_Processor.normalize("<nobr>1<nobr>2"), + "<nobr>1</nobr><nobr>2</nobr>", +); +assert.equal( + WP_HTML_Processor.normalize("<b>1<nobr></b><i><nobr>2</i>"), + "<b>1<nobr></nobr></b><nobr><i></i></nobr><i><nobr>2</nobr></i>", +); + +assert.equal( + WP_HTML_Processor.normalize("<p><b id=a><b id=a><b id=a><b><object><b id=a><b id=a>X</object><p>Y"), + [ + '<p><b id="a"><b id="a"><b id="a"><b><object><b id="a"><b id="a">X</b></b></object></b></b></b></b></p>', + '<p><b id="a"><b id="a"><b id="a"><b>Y</b></b></b></b></p>', + ].join(""), +); + +const staleFormattingCloserProcessor = WP_HTML_Processor.create_full_parser("<p id=a><b><p id=b></b>TEST"); +while ( + staleFormattingCloserProcessor.next_token() && + ( + staleFormattingCloserProcessor.get_token_type() !== "#text" || + staleFormattingCloserProcessor.get_modifiable_text() !== "TEST" + ) +) {} +assert.equal(staleFormattingCloserProcessor.get_modifiable_text(), "TEST"); +assert.deepEqual(staleFormattingCloserProcessor.get_breadcrumbs(), ["HTML", "BODY", "P", "#text"]); +staleFormattingCloserProcessor.destroy(); + +const nestedStaleFormattingCloserProcessor = WP_HTML_Processor.create_full_parser("<b id=a><p><b id=b></p></b>TEST"); +while ( + nestedStaleFormattingCloserProcessor.next_token() && + ( + nestedStaleFormattingCloserProcessor.get_token_type() !== "#text" || + nestedStaleFormattingCloserProcessor.get_modifiable_text() !== "TEST" + ) +) {} +assert.equal(nestedStaleFormattingCloserProcessor.get_modifiable_text(), "TEST"); +assert.deepEqual(nestedStaleFormattingCloserProcessor.get_breadcrumbs(), ["HTML", "BODY", "B", "#text"]); +nestedStaleFormattingCloserProcessor.destroy(); + +const reconstructedAfterParagraphCloseProcessor = WP_HTML_Processor.create_full_parser("<p><b></p>text"); +while ( + reconstructedAfterParagraphCloseProcessor.next_token() && + ( + reconstructedAfterParagraphCloseProcessor.get_token_type() !== "#text" || + reconstructedAfterParagraphCloseProcessor.get_modifiable_text() !== "text" + ) +) {} +assert.equal(reconstructedAfterParagraphCloseProcessor.get_modifiable_text(), "text"); +assert.deepEqual(reconstructedAfterParagraphCloseProcessor.get_breadcrumbs(), ["HTML", "BODY", "B", "#text"]); +reconstructedAfterParagraphCloseProcessor.destroy(); + +const nestedFormattingCloseProcessor = WP_HTML_Processor.create_full_parser("<b><b></b>X</b>"); +while ( + nestedFormattingCloseProcessor.next_token() && + ( + nestedFormattingCloseProcessor.get_token_type() !== "#text" || + nestedFormattingCloseProcessor.get_modifiable_text() !== "X" + ) +) {} +assert.equal(nestedFormattingCloseProcessor.get_modifiable_text(), "X"); +assert.deepEqual(nestedFormattingCloseProcessor.get_breadcrumbs(), ["HTML", "BODY", "B", "#text"]); +while (nestedFormattingCloseProcessor.next_token()) {} +assert.equal(nestedFormattingCloseProcessor.get_last_error(), null); +assert.equal(nestedFormattingCloseProcessor.get_unsupported_exception(), null); +nestedFormattingCloseProcessor.destroy(); +assert.equal(WP_HTML_Processor.normalize("<b><b></b>X</b>"), "<b><b></b>X</b>"); + +const marqueeReconstructsFormattingProcessor = WP_HTML_Processor.create_full_parser("<p><b><div><marquee></p></b></div>X"); +assert.equal(marqueeReconstructsFormattingProcessor.next_tag("marquee"), true); +assert.deepEqual(marqueeReconstructsFormattingProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "B", "MARQUEE"]); +while ( + marqueeReconstructsFormattingProcessor.next_token() && + ( + marqueeReconstructsFormattingProcessor.get_token_type() !== "#text" || + marqueeReconstructsFormattingProcessor.get_modifiable_text() !== "X" + ) +) {} +assert.equal(marqueeReconstructsFormattingProcessor.get_modifiable_text(), "X"); +assert.deepEqual(marqueeReconstructsFormattingProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "B", "MARQUEE", "#text"]); +while (marqueeReconstructsFormattingProcessor.next_token()) {} +assert.equal(marqueeReconstructsFormattingProcessor.get_last_error(), null); +assert.equal(marqueeReconstructsFormattingProcessor.get_unsupported_exception(), null); +marqueeReconstructsFormattingProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<p><b><div><marquee></p></b></div>X"), + "<p><b></b></p><div><b><marquee><p></p>X</marquee></b></div>", +); + +const menuitemReconstructsFormattingProcessor = WP_HTML_Processor.create_full_parser("<!DOCTYPE html><p><b></p><menuitem>"); +assert.equal(menuitemReconstructsFormattingProcessor.next_tag("menuitem"), true); +assert.deepEqual(menuitemReconstructsFormattingProcessor.get_breadcrumbs(), ["HTML", "BODY", "B", "MENUITEM"]); +menuitemReconstructsFormattingProcessor.destroy(); + +const closedFormattingProcessor = WP_HTML_Processor.create_fragment("<b>one</b><p>two"); +assert.equal(closedFormattingProcessor.next_tag("b"), true); +assert.equal(closedFormattingProcessor.next_tag({ tag_name: "b", tag_closers: "visit" }), true); +assert.equal(closedFormattingProcessor.is_tag_closer(), true); +assert.deepEqual(closedFormattingProcessor.get_breadcrumbs(), ["HTML", "BODY"]); +assert.equal(closedFormattingProcessor.next_tag("p"), true); +assert.deepEqual(closedFormattingProcessor.get_breadcrumbs(), ["HTML", "BODY", "P"]); +closedFormattingProcessor.destroy(); + +assert.equal( + WP_HTML_Processor.normalize("<b><i></b><p>x"), + "<b><i></i></b><p><i>x</i></p>", +); +assert.equal( + WP_HTML_Processor.normalize("<b><button>foo</b>bar"), + "<b></b><button><b>foo</b>bar</button>", +); +assert.equal( + WP_HTML_Processor.normalize("<b><button></b></button></b>"), + "<b></b><button><b></b></button>", +); +assert.equal( + WP_HTML_Processor.normalize("<i><menu>Foo</i>"), + "<i></i><menu><i>Foo</i></menu>", +); +assert.equal( + WP_HTML_Processor.normalize("<a><p></a></p>"), + "<a></a><p><a></a></p>", +); +assert.equal( + WP_HTML_Processor.normalize("<a>1<p>2</a>3</p>"), + "<a>1</a><p><a>2</a>3</p>", +); +assert.equal( + WP_HTML_Processor.normalize("<a>1<button>2</a>3</button>"), + "<a>1</a><button><a>2</a>3</button>", +); +assert.equal( + WP_HTML_Processor.normalize("<a>1<div>2<div>3</a>4</div>5</div>"), + "<a>1</a><div><a>2</a><div><a>3</a>4</div>5</div>", +); +assert.equal( + WP_HTML_Processor.normalize("<a><div><p></a>"), + "<a></a><div><a></a><p><a></a></p></div>", +); +assert.equal( + WP_HTML_Processor.normalize("<a><p>text"), + "<a><p>text</p></a>", +); +assert.equal( + WP_HTML_Processor.normalize("<b><p></b>TEST"), + "<b></b><p><b></b>TEST</p>", +); +assert.equal( + WP_HTML_Processor.normalize("<b>1<i>2<p>3</b>4"), + "<b>1<i>2</i></b><i><p><b>3</b>4</p></i>", +); +assert.equal( + WP_HTML_Processor.normalize("<i>A<b>B<p></i>C</b>D"), + "<i>A<b>B</b></i><b></b><p><b><i></i>C</b>D</p>", +); +assert.equal( + WP_HTML_Processor.normalize("<html><body>\n<p><font size=\"7\">First paragraph.</p>\n<p>Second paragraph.</p></font>\n<b><p><i>Bold and Italic</b> Italic</p>"), + "\n<p><font size=\"7\">First paragraph.</font></p><font size=\"7\">\n<p>Second paragraph.</p></font>\n<b></b><p><b><i>Bold and Italic</i></b><i> Italic</i></p>", +); +assert.equal( + WP_HTML_Processor.normalize("<DIV> abc <B> def <I> ghi <P> jkl </B> mno"), + "<div> abc <b> def <i> ghi </i></b><i><p><b> jkl </b> mno</p></i></div>", +); +assert.equal( + WP_HTML_Processor.normalize("<div><a><b><u><i><code><div></a>"), + "<div><a><b><u><i><code></code></i></u></b></a><u><i><code><div><a></a></div></code></i></u></div>", +); +assert.equal( + WP_HTML_Processor.normalize("<a><b><big><em><strong><div>X</a>"), + "<a><b><big><em><strong></strong></em></big></b></a><big><em><strong><div><a>X</a></div></strong></em></big>", +); +assert.equal( + WP_HTML_Processor.normalize("<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9>A</a>"), + '<a><b></b></a><b><div id="1"><a></a><div id="2"><a></a><div id="3"><a></a><div id="4"><a></a><div id="5"><a></a><div id="6"><a></a><div id="7"><a></a><div id="8"><a><div id="9">A</div></a></div></div></div></div></div></div></div></div></b>', +); +assert.equal( + WP_HTML_Processor.normalize("<font><p>hello<b>cruel</font>world"), + "<font></font><p><font>hello<b>cruel</b></font><b>world</b></p>", +); +assert.equal( + WP_HTML_Processor.normalize("<font><p><i>x</i>y</font>z</p>"), + "<font></font><p><font><i>x</i>y</font>z</p>", +); +assert.equal( + WP_HTML_Processor.normalize("<font></p><p><meta><title></title></font>"), + "<font><p></p></font><p><font><meta><title></title></font></p>", +); +assert.equal( + WP_HTML_Processor.normalize("<b>A<cite>B<div>C</b>D"), + "<b>A<cite>B</cite></b><div><b>C</b>D</div>", +); +assert.equal( + WP_HTML_Processor.normalize("<cite><b><cite><i><cite><i><cite><i><div>X</b>TEST"), + "<cite><b><cite><i><cite><i><cite><i></i></cite></i></cite></i></cite></b><i><i><div><b>X</b>TEST</div></i></i></cite>", +); +assert.equal( + WP_HTML_Processor.normalize("<!doctype html><i>a<b>b<div>c<a>d</i>e</b>f"), + "<i>a<b>b</b></i><b></b><div><b><i>c<a>d</a></i><a>e</a></b><a>f</a></div>", +); +assert.equal( + WP_HTML_Processor.normalize("<a><p>X<a>Y</a>Z</p></a>"), + "<a></a><p><a>X</a><a>Y</a>Z</p>", +); +assert.equal( + WP_HTML_Processor.normalize("<a><p><a></a></p></a>"), + "<a></a><p><a></a><a></a></p>", +); +assert.equal( + WP_HTML_Processor.normalize("<a><div><style></style><address><a>"), + "<a></a><div><a><style></style></a><address><a></a><a></a></address></div>", +); +assert.equal( + WP_HTML_Processor.normalize("<a><div supported><a unsupported></div></a>"), + "<a></a><div supported><a></a><a unsupported></a></div>", +); +assert.equal( + WP_HTML_Processor.normalize("<a><center><title></title><a>"), + "<a></a><center><a><title></title></a><a></a></center>", +); +assert.equal( + WP_HTML_Processor.normalize("<a href=a>aa<marquee>aa<a href=b>bb</marquee>aa"), + '<a href="a">aa<marquee>aa<a href="b">bb</a></marquee>aa</a>', +); +assert.equal( + WP_HTML_Processor.normalize("<a><li><style></style><title></title></a>"), + "<a></a><li><a><style></style><title></title></a></li>", +); + +assert.equal( + WP_HTML_Processor.normalize("<a><b>1<a>2"), + "<a><b>1</b></a><b><a>2</a></b>", +); +assert.equal( + WP_HTML_Processor.normalize("<a X>0<b>1<a Y>2"), + "<a x>0<b>1</b></a><b><a y>2</a></b>", +); +assert.equal( + WP_HTML_Processor.normalize("<a><strong>Click <span supported><a unsupported><big>Here</big></a></strong></a>"), + "<a><strong>Click <span supported></span></strong></a><strong><a unsupported><big>Here</big></a></strong>", +); + +assert.equal(WP_HTML_Processor.normalize("</b><p>x"), "<p>x</p>"); +assert.equal(WP_HTML_Processor.normalize("<b></b></b><p>x"), "<b></b><p>x</p>"); +assert.equal(WP_HTML_Processor.normalize("<b>Test</i>Test"), "<b>TestTest</b>"); + +assert.equal( + WP_HTML_Processor.normalize("<b><div></b><p>x"), + "<b></b><div><b></b><p>x</p></div>", +); +assert.equal( + WP_HTML_Processor.normalize("<b><em><foo><foo><aside></b>"), + "<b><em><foo><foo></foo></foo></em></b><em><aside><b></b></aside></em>", +); +assert.equal( + WP_HTML_Processor.normalize("<b><em><foo><foo><aside></b></em>"), + "<b><em><foo><foo></foo></foo></em></b><em></em><aside><em><b></b></em></aside>", +); +assert.equal( + WP_HTML_Processor.normalize("<b><em><foo><foo><foo><aside></b></em>"), + "<b><em><foo><foo><foo></foo></foo></foo></em></b><aside><b></b></aside>", +); +assert.equal( + WP_HTML_Processor.normalize("<b>a<div></div><div></b>y"), + "<b>a<div></div></b><div><b></b>y</div>", +); +assert.equal( + WP_HTML_Processor.normalize("<a><div></a><p>x"), + "<a></a><div><p>x</p></div>", +); +assert.equal( + WP_HTML_Processor.normalize("<label><a><div>Hello<div>World</div></a></label> "), + "<label><a></a><div><a>Hello<div>World</div></a> </div></label>", +); +assert.equal( + WP_HTML_Processor.normalize("<!DOCTYPE html><body><b><nobr>1<nobr></b><i><nobr>2<nobr></i>3"), + "<b><nobr>1</nobr><nobr></nobr></b><nobr><i></i></nobr><i><nobr>2</nobr><nobr></nobr></i><nobr>3</nobr>", +); +assert.equal( + WP_HTML_Processor.normalize("<!DOCTYPE html><body><b><nobr>1<div><nobr></b><i><nobr>2<nobr></i>3"), + "<b><nobr>1</nobr></b><div><b><nobr></nobr><nobr></nobr></b><nobr><i></i></nobr><i><nobr>2</nobr><nobr></nobr></i><nobr>3</nobr></div>", +); +assert.equal( + WP_HTML_Processor.normalize("<!DOCTYPE html><body><b><nobr>1<nobr><ins></b><i><nobr>"), + "<b><nobr>1</nobr><nobr><ins></ins></nobr></b><nobr><i></i></nobr><i><nobr></nobr></i>", +); + +assert.equal( + WP_HTML_Processor.normalize("<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3"), + "<b><nobr>1<nobr><i></i></nobr><i><nobr>2</nobr><nobr></nobr></i><nobr>3</nobr><table></table></nobr></b>", +); + +const fullParserText = WP_HTML_Processor.create_full_parser("text"); +assert.equal(fullParserText.next_tag("body"), true); +assert.equal(fullParserText.get_tag(), "BODY"); +assert.equal(fullParserText.is_virtual(), true); +assert.equal(fullParserText.is_tag_closer(), false); +assert.deepEqual(fullParserText.get_breadcrumbs(), ["HTML", "BODY"]); +assert.equal(fullParserText.set_bookmark("body"), false); +assert.equal(fullParserText.next_token(), true); +assert.equal(fullParserText.get_token_name(), "#text"); +assert.deepEqual(fullParserText.get_breadcrumbs(), ["HTML", "BODY", "#text"]); +assert.equal(fullParserText.set_bookmark("text"), true); +fullParserText.destroy(); + +const fullParserDoctype = WP_HTML_Processor.create_full_parser("<!doctype html><p>Hi</p>"); +assert.equal(fullParserDoctype.next_token(), true); +assert.equal(fullParserDoctype.get_token_type(), "#doctype"); +assert.equal(fullParserDoctype.next_tag("p"), true); +assert.equal(fullParserDoctype.get_tag(), "P"); +assert.deepEqual(fullParserDoctype.get_breadcrumbs(), ["HTML", "BODY", "P"]); +fullParserDoctype.destroy(); + +const fullParserExplicitHtmlEof = WP_HTML_Processor.create_full_parser("<html><!DOCTYPE html>"); +const fullParserExplicitHtmlEofTokens = []; +while (fullParserExplicitHtmlEof.next_token()) { + fullParserExplicitHtmlEofTokens.push( + fullParserExplicitHtmlEof.get_token_type() === "#tag" + ? `${fullParserExplicitHtmlEof.is_virtual() ? "V" : "R"}${fullParserExplicitHtmlEof.is_tag_closer() ? "-" : "+"}${fullParserExplicitHtmlEof.get_tag()}:${fullParserExplicitHtmlEof.get_breadcrumbs().join("/")}` + : fullParserExplicitHtmlEof.get_token_name(), + ); +} +assert.deepEqual(fullParserExplicitHtmlEofTokens, [ + "R+HTML:HTML", + "V+HEAD:HTML/HEAD", + "V-HEAD:HTML", + "V+BODY:HTML/BODY", + "V-BODY:HTML", + "V-HTML:", +]); +fullParserExplicitHtmlEof.destroy(); + +const fullParserHeadNoscriptBreakout = WP_HTML_Processor.create_full_parser("<head><noscript></br><!--foo--></noscript>"); +assert.equal(fullParserHeadNoscriptBreakout.next_tag("noscript"), true); +assert.deepEqual(fullParserHeadNoscriptBreakout.get_breadcrumbs(), ["HTML", "HEAD", "NOSCRIPT"]); +assert.equal(fullParserHeadNoscriptBreakout.next_tag("br"), true); +assert.deepEqual(fullParserHeadNoscriptBreakout.get_breadcrumbs(), ["HTML", "BODY", "BR"]); +assert.equal(fullParserHeadNoscriptBreakout.next_token(), true); +assert.equal(fullParserHeadNoscriptBreakout.get_token_type(), "#comment"); +assert.deepEqual(fullParserHeadNoscriptBreakout.get_breadcrumbs(), ["HTML", "BODY", "#comment"]); +fullParserHeadNoscriptBreakout.destroy(); + +const fullParserNestedHeadNoscript = WP_HTML_Processor.create_full_parser('<head><noscript><noscript class="foo"><!--foo--></noscript>'); +assert.equal(fullParserNestedHeadNoscript.next_tag("noscript"), true); +assert.deepEqual(fullParserNestedHeadNoscript.get_breadcrumbs(), ["HTML", "HEAD", "NOSCRIPT"]); +assert.equal(fullParserNestedHeadNoscript.next_token(), true); +assert.equal(fullParserNestedHeadNoscript.get_token_type(), "#comment"); +assert.deepEqual(fullParserNestedHeadNoscript.get_breadcrumbs(), ["HTML", "HEAD", "NOSCRIPT", "#comment"]); +assert.equal(fullParserNestedHeadNoscript.next_tag("noscript"), false); +assert.equal(fullParserNestedHeadNoscript.get_last_error(), null); +fullParserNestedHeadNoscript.destroy(); + +const fullParserOpenHeadNoscript = WP_HTML_Processor.create_full_parser("<head><noscript>"); +assert.equal(fullParserOpenHeadNoscript.next_tag("noscript"), true); +assert.deepEqual(fullParserOpenHeadNoscript.get_breadcrumbs(), ["HTML", "HEAD", "NOSCRIPT"]); +assert.equal(fullParserOpenHeadNoscript.next_tag("body"), true); +assert.deepEqual(fullParserOpenHeadNoscript.get_breadcrumbs(), ["HTML", "BODY"]); +fullParserOpenHeadNoscript.destroy(); + +const fullParserHeadContentAfterHead = WP_HTML_Processor.create_full_parser( + "<head></head><!-- --><style></style><!-- --><script></script>", +); +const fullParserHeadContentAfterHeadTokens = []; +while (fullParserHeadContentAfterHead.next_token()) { + fullParserHeadContentAfterHeadTokens.push( + `${fullParserHeadContentAfterHead.is_virtual() ? "V" : "R"}${fullParserHeadContentAfterHead.is_tag_closer() ? "-" : "+"}${fullParserHeadContentAfterHead.get_token_name()}:${fullParserHeadContentAfterHead.get_breadcrumbs().join("/")}`, + ); +} +assert.deepEqual(fullParserHeadContentAfterHeadTokens, [ + "V+HTML:HTML", + "R+HEAD:HTML/HEAD", + "R-HEAD:HTML", + "R+#comment:HTML/#comment", + "R+STYLE:HTML/HEAD/STYLE", + "R+#comment:HTML/#comment", + "R+SCRIPT:HTML/HEAD/SCRIPT", + "V+BODY:HTML/BODY", + "V-BODY:HTML", + "V-HTML:", +]); +fullParserHeadContentAfterHead.destroy(); + +const fullParserHeadTemplateText = WP_HTML_Processor.create_full_parser("<template>Hello</template>"); +assert.equal(fullParserHeadTemplateText.next_tag("template"), true); +assert.deepEqual(fullParserHeadTemplateText.get_breadcrumbs(), ["HTML", "HEAD", "TEMPLATE"]); +assert.equal(fullParserHeadTemplateText.next_token(), true); +assert.equal(fullParserHeadTemplateText.get_token_type(), "#text"); +assert.deepEqual(fullParserHeadTemplateText.get_breadcrumbs(), ["HTML", "HEAD", "TEMPLATE", "#text"]); +assert.equal(fullParserHeadTemplateText.next_tag("body"), true); +assert.deepEqual(fullParserHeadTemplateText.get_breadcrumbs(), ["HTML", "BODY"]); +fullParserHeadTemplateText.destroy(); + +const fullParserOpenHeadTemplate = WP_HTML_Processor.create_full_parser("<template><div>"); +assert.equal(fullParserOpenHeadTemplate.next_tag("div"), true); +assert.deepEqual(fullParserOpenHeadTemplate.get_breadcrumbs(), ["HTML", "HEAD", "TEMPLATE", "DIV"]); +assert.equal(fullParserOpenHeadTemplate.next_tag("body"), true); +assert.deepEqual(fullParserOpenHeadTemplate.get_breadcrumbs(), ["HTML", "BODY"]); +fullParserOpenHeadTemplate.destroy(); + +const fullParserTemplateNestedAnchorTable = WP_HTML_Processor.create_full_parser("<template><a><table><a>"); +const fullParserTemplateNestedAnchorTableStarts = []; +while (fullParserTemplateNestedAnchorTable.next_token()) { + if ( + fullParserTemplateNestedAnchorTable.get_token_type() === "#tag" && + !fullParserTemplateNestedAnchorTable.is_tag_closer() && + ["A", "TABLE"].includes(fullParserTemplateNestedAnchorTable.get_tag()) + ) { + fullParserTemplateNestedAnchorTableStarts.push([ + fullParserTemplateNestedAnchorTable.get_tag(), + fullParserTemplateNestedAnchorTable.get_breadcrumbs(), + ]); + } +} +assert.equal(fullParserTemplateNestedAnchorTable.get_last_error(), null); +assert.deepEqual(fullParserTemplateNestedAnchorTableStarts, [ + ["A", ["HTML", "HEAD", "TEMPLATE", "A"]], + ["A", ["HTML", "HEAD", "TEMPLATE", "A", "A"]], + ["TABLE", ["HTML", "HEAD", "TEMPLATE", "A", "TABLE"]], +]); +fullParserTemplateNestedAnchorTable.destroy(); + +const fullParserExplicitHeadTemplate = WP_HTML_Processor.create_full_parser("<head><template><div></div></template></head>"); +assert.equal(fullParserExplicitHeadTemplate.next_tag("div"), true); +assert.deepEqual(fullParserExplicitHeadTemplate.get_breadcrumbs(), ["HTML", "HEAD", "TEMPLATE", "DIV"]); +assert.equal(fullParserExplicitHeadTemplate.next_tag("body"), true); +assert.deepEqual(fullParserExplicitHeadTemplate.get_breadcrumbs(), ["HTML", "BODY"]); +fullParserExplicitHeadTemplate.destroy(); + +const fullParserTemplateAfterHead = WP_HTML_Processor.create_full_parser("<head></head><template>Foo</template>"); +assert.equal(fullParserTemplateAfterHead.next_tag("template"), true); +assert.deepEqual(fullParserTemplateAfterHead.get_breadcrumbs(), ["HTML", "HEAD", "TEMPLATE"]); +assert.equal(fullParserTemplateAfterHead.next_token(), true); +assert.equal(fullParserTemplateAfterHead.get_token_type(), "#text"); +assert.equal(fullParserTemplateAfterHead.get_modifiable_text(), "Foo"); +assert.deepEqual(fullParserTemplateAfterHead.get_breadcrumbs(), ["HTML", "HEAD", "TEMPLATE", "#text"]); +assert.equal(fullParserTemplateAfterHead.next_tag("body"), true); +assert.deepEqual(fullParserTemplateAfterHead.get_breadcrumbs(), ["HTML", "BODY"]); +fullParserTemplateAfterHead.destroy(); + +const fullParserLinkAfterHead = WP_HTML_Processor.create_full_parser("<head><meta></head><link><p>"); +assert.equal(fullParserLinkAfterHead.next_tag("link"), true); +assert.deepEqual(fullParserLinkAfterHead.get_breadcrumbs(), ["HTML", "HEAD", "LINK"]); +assert.equal(fullParserLinkAfterHead.next_tag("p"), true); +assert.deepEqual(fullParserLinkAfterHead.get_breadcrumbs(), ["HTML", "BODY", "P"]); +fullParserLinkAfterHead.destroy(); + +const fullParserTemplateAfterBody = WP_HTML_Processor.create_full_parser("<body></body><template>"); +assert.equal(fullParserTemplateAfterBody.next_tag("template"), true); +assert.deepEqual(fullParserTemplateAfterBody.get_breadcrumbs(), ["HTML", "BODY", "TEMPLATE"]); +fullParserTemplateAfterBody.destroy(); + +const fullParserBodyTemplateOuterCloser = WP_HTML_Processor.create_full_parser("<div><template></div>Hello"); +assert.equal(fullParserBodyTemplateOuterCloser.next_tag("template"), true); +assert.deepEqual(fullParserBodyTemplateOuterCloser.get_breadcrumbs(), ["HTML", "BODY", "DIV", "TEMPLATE"]); +assert.equal(fullParserBodyTemplateOuterCloser.next_token(), true); +assert.equal(fullParserBodyTemplateOuterCloser.get_token_type(), "#text"); +assert.equal(fullParserBodyTemplateOuterCloser.get_modifiable_text(), "Hello"); +assert.deepEqual(fullParserBodyTemplateOuterCloser.get_breadcrumbs(), ["HTML", "BODY", "DIV", "TEMPLATE", "#text"]); +fullParserBodyTemplateOuterCloser.destroy(); + +const fullParserTemplateFrames = WP_HTML_Processor.create_full_parser("<template><frame></frame></frameset><frame></frame></template>"); +const fullParserTemplateFrameTags = []; +while (fullParserTemplateFrames.next_token()) { + if (fullParserTemplateFrames.get_token_type() === "#tag") { + fullParserTemplateFrameTags.push(fullParserTemplateFrames.get_tag()); + } +} +assert.equal(fullParserTemplateFrames.get_last_error(), null); +assert.equal(fullParserTemplateFrameTags.includes("FRAME"), false); +assert.equal(fullParserTemplateFrameTags.includes("FRAMESET"), false); +fullParserTemplateFrames.destroy(); + +const fullParserTemplateIgnoredFrameset = WP_HTML_Processor.create_full_parser( + "<template><div><frameset><span></span></div><span></span></template>", +); +const fullParserTemplateIgnoredFramesetSpans = []; +while (fullParserTemplateIgnoredFrameset.next_token()) { + if ( + fullParserTemplateIgnoredFrameset.get_token_type() === "#tag" && + !fullParserTemplateIgnoredFrameset.is_tag_closer() && + fullParserTemplateIgnoredFrameset.get_tag() === "SPAN" + ) { + fullParserTemplateIgnoredFramesetSpans.push(fullParserTemplateIgnoredFrameset.get_breadcrumbs()); + } +} +assert.equal(fullParserTemplateIgnoredFrameset.get_last_error(), null); +assert.deepEqual(fullParserTemplateIgnoredFramesetSpans, [ + ["HTML", "HEAD", "TEMPLATE", "DIV", "SPAN"], + ["HTML", "HEAD", "TEMPLATE", "SPAN"], +]); +fullParserTemplateIgnoredFrameset.destroy(); + +const fullParserTemplateIgnoresHtmlStart = WP_HTML_Processor.create_full_parser( + "<html a=b><template><div><html b=c><span></template>", +); +assert.equal(fullParserTemplateIgnoresHtmlStart.next_tag("span"), true); +assert.deepEqual(fullParserTemplateIgnoresHtmlStart.get_breadcrumbs(), ["HTML", "HEAD", "TEMPLATE", "DIV", "SPAN"]); +fullParserTemplateIgnoresHtmlStart.destroy(); + +const fullParserTemplateIgnoresBodyStart = WP_HTML_Processor.create_full_parser("<template><body><span></template>"); +assert.equal(fullParserTemplateIgnoresBodyStart.next_tag("span"), true); +assert.deepEqual(fullParserTemplateIgnoresBodyStart.get_breadcrumbs(), ["HTML", "HEAD", "TEMPLATE", "SPAN"]); +fullParserTemplateIgnoresBodyStart.destroy(); + +const fullParserNestedTemplateFormatting = WP_HTML_Processor.create_full_parser( + "<body><template><template><b><template></template></template>text</template>", +); +assert.equal(fullParserNestedTemplateFormatting.next_token(), true); +while ( + fullParserNestedTemplateFormatting.get_token_type() !== "#text" && + fullParserNestedTemplateFormatting.next_token() +) { +} +assert.equal(fullParserNestedTemplateFormatting.get_token_type(), "#text"); +assert.equal(fullParserNestedTemplateFormatting.get_modifiable_text(), "text"); +assert.deepEqual(fullParserNestedTemplateFormatting.get_breadcrumbs(), ["HTML", "BODY", "TEMPLATE", "#text"]); +fullParserNestedTemplateFormatting.destroy(); + +const fullParserTemplateCellAfterRow = WP_HTML_Processor.create_full_parser("<body><template><tr></tr><td></td></template>"); +assert.equal(fullParserTemplateCellAfterRow.next_tag("td"), true); +assert.deepEqual(fullParserTemplateCellAfterRow.get_breadcrumbs(), ["HTML", "BODY", "TEMPLATE", "TR", "TD"]); +fullParserTemplateCellAfterRow.destroy(); + +const fullParserTemplateSkipsTableWrappers = WP_HTML_Processor.create_full_parser( + "<body><template><td></td><tbody><td></td></template>", +); +const fullParserTemplateSkipsTableWrapperCells = []; +while (fullParserTemplateSkipsTableWrappers.next_tag("td")) { + fullParserTemplateSkipsTableWrapperCells.push(fullParserTemplateSkipsTableWrappers.get_breadcrumbs()); +} +assert.deepEqual(fullParserTemplateSkipsTableWrapperCells, [ + ["HTML", "BODY", "TEMPLATE", "TD"], + ["HTML", "BODY", "TEMPLATE", "TD"], +]); +fullParserTemplateSkipsTableWrappers.destroy(); + +const fullParserTemplateIgnoresBadTableRows = WP_HTML_Processor.create_full_parser("<body><template><div><tr></tr></div></template>"); +assert.equal(fullParserTemplateIgnoresBadTableRows.next_tag("tr"), false); +assert.equal(fullParserTemplateIgnoresBadTableRows.get_last_error(), null); +fullParserTemplateIgnoresBadTableRows.destroy(); + +const fullParserTemplateIgnoresAfterCol = WP_HTML_Processor.create_full_parser("<body><template><col><div>"); +assert.equal(fullParserTemplateIgnoresAfterCol.next_tag("col"), true); +assert.deepEqual(fullParserTemplateIgnoresAfterCol.get_breadcrumbs(), ["HTML", "BODY", "TEMPLATE", "COL"]); +assert.equal(fullParserTemplateIgnoresAfterCol.next_tag("div"), false); +assert.equal(fullParserTemplateIgnoresAfterCol.get_last_error(), null); +fullParserTemplateIgnoresAfterCol.destroy(); + +const fullParserTemplateIgnoresTextAfterCol = WP_HTML_Processor.create_full_parser("<body><template><col>Hello"); +assert.equal(fullParserTemplateIgnoresTextAfterCol.next_tag("col"), true); +assert.deepEqual(fullParserTemplateIgnoresTextAfterCol.get_breadcrumbs(), ["HTML", "BODY", "TEMPLATE", "COL"]); +assert.equal(fullParserTemplateIgnoresTextAfterCol.next_token(), true); +assert.equal(fullParserTemplateIgnoresTextAfterCol.get_token_name(), "TEMPLATE"); +assert.equal(fullParserTemplateIgnoresTextAfterCol.is_tag_closer(), true); +fullParserTemplateIgnoresTextAfterCol.destroy(); + +assert.equal( + buildFullParserHtml5libTree("<body><template><thead></thead><template><tr></tr></template><tr></tr><tfoot></tfoot></template>"), + "<html>\n <head>\n <body>\n <template>\n content\n <thead>\n <template>\n content\n <tr>\n <tbody>\n <tr>\n <tfoot>\n\n", +); + +assert.equal( + buildFullParserHtml5libTree("<body><template></div><div>Foo</div><template></template><tr></tr>"), + "<html>\n <head>\n <body>\n <template>\n content\n <div>\n \"Foo\"\n <template>\n content\n\n", +); + +assert.equal( + buildFullParserHtml5libTree("<body><template><script>var i = 1;</script><td></td></template>"), + "<html>\n <head>\n <body>\n <template>\n content\n <script>\n \"var i = 1;\"\n <td>\n\n", +); + +assert.equal( + buildFullParserHtml5libTree("<body><template><thead></thead><caption></caption><tbody></tbody></template>"), + "<html>\n <head>\n <body>\n <template>\n content\n <thead>\n <caption>\n <tbody>\n\n", +); + +assert.equal( + buildFullParserHtml5libTree("<template><template><tbody><select>"), + "<html>\n <head>\n <template>\n content\n <template>\n content\n <tbody>\n <select>\n <body>\n\n", +); + +assert.equal( + buildFullParserHtml5libTree("<body><table><tr><td><select><template>Foo</template><caption>A</table>"), + "<html>\n <head>\n <body>\n <table>\n <tbody>\n <tr>\n <td>\n <select>\n <template>\n content\n \"Foo\"\n <caption>\n \"A\"\n\n", +); + +assert.equal( + buildFullParserHtml5libTree("<template><td></template><body><span>Foo"), + "<html>\n <head>\n <template>\n content\n <td>\n <body>\n <span>\n \"Foo\"\n\n", +); + +assert.equal( + buildFullParserHtml5libTree("<head></head><template>"), + "<html>\n <head>\n <template>\n content\n <body>\n\n", +); + +assert.equal( + buildFullParserHtml5libTree("<!DOCTYPE html><body t1=1><body t2=2><body t3=3 t4=4>"), + '<!DOCTYPE html>\n<html>\n <head>\n <body>\n t1="1"\n\n', +); + +assert.equal( + buildFullParserHtml5libTree("<!DOCTYPE html><html a=1><body><html b=2>"), + '<!DOCTYPE html>\n<html>\n a="1"\n <head>\n <body>\n\n', +); + +const fullParserExplicitShell = WP_HTML_Processor.create_full_parser( + "<html><head><title>Title</title></head><body><p>One<footer>Two</footer><ul><li>A<li>B</ul></body></html>", +); +const fullParserExplicitShellTokens = []; +while (fullParserExplicitShell.next_token()) { + fullParserExplicitShellTokens.push( + fullParserExplicitShell.get_token_type() === "#tag" + ? `${fullParserExplicitShell.is_tag_closer() ? "-" : "+"}${fullParserExplicitShell.get_tag()}` + : fullParserExplicitShell.get_token_name(), + ); +} +assert.deepEqual(fullParserExplicitShellTokens, [ + "+HTML", + "+HEAD", + "+TITLE", + "-HEAD", + "+BODY", + "+P", + "#text", + "-P", + "+FOOTER", + "#text", + "-FOOTER", + "+UL", + "+LI", + "#text", + "-LI", + "+LI", + "#text", + "-LI", + "-UL", + "-BODY", + "-HTML", +]); +fullParserExplicitShell.destroy(); + +const fullParserFrameset = WP_HTML_Processor.create_full_parser("<frameset><frame></frameset>"); +const fullParserFramesetTokens = []; +while (fullParserFrameset.next_token()) { + fullParserFramesetTokens.push( + fullParserFrameset.get_token_type() === "#tag" + ? `${fullParserFrameset.is_tag_closer() ? "-" : "+"}${fullParserFrameset.get_tag()}:${fullParserFrameset.get_breadcrumbs().join("/")}` + : fullParserFrameset.get_token_name(), + ); +} +assert.deepEqual(fullParserFramesetTokens, [ + "+HTML:HTML", + "+HEAD:HTML/HEAD", + "-HEAD:HTML", + "+FRAMESET:HTML/FRAMESET", + "+FRAME:HTML/FRAMESET/FRAME", + "-FRAMESET:HTML", + "-HTML:", +]); +fullParserFrameset.destroy(); + +const fullParserFramesetAfterNull = WP_HTML_Processor.create_full_parser("<html>\0<frameset></frameset>"); +const fullParserFramesetAfterNullTags = []; +while (fullParserFramesetAfterNull.next_token()) { + if (fullParserFramesetAfterNull.get_token_type() === "#tag") { + fullParserFramesetAfterNullTags.push( + `${fullParserFramesetAfterNull.is_tag_closer() ? "-" : "+"}${fullParserFramesetAfterNull.get_tag()}`, + ); + } +} +assert.deepEqual(fullParserFramesetAfterNullTags, ["+HTML", "+HEAD", "-HEAD", "+FRAMESET", "-FRAMESET", "-HTML"]); +assert.equal(fullParserFramesetAfterNull.get_last_error(), null); +assert.equal(fullParserFramesetAfterNull.get_unsupported_exception(), null); +fullParserFramesetAfterNull.destroy(); + +const fullParserFramesetAfterHiddenInput = WP_HTML_Processor.create_full_parser('<input type="hidden"><frameset>'); +const fullParserFramesetAfterHiddenInputTokens = []; +while (fullParserFramesetAfterHiddenInput.next_token()) { + if (fullParserFramesetAfterHiddenInput.get_token_type() === "#tag") { + fullParserFramesetAfterHiddenInputTokens.push( + `${fullParserFramesetAfterHiddenInput.is_tag_closer() ? "-" : "+"}${fullParserFramesetAfterHiddenInput.get_tag()}`, + ); + } +} +assert.deepEqual(fullParserFramesetAfterHiddenInputTokens, ["+HTML", "+HEAD", "-HEAD", "+FRAMESET", "-FRAMESET", "-HTML"]); +assert.equal(fullParserFramesetAfterHiddenInput.get_last_error(), null); +assert.equal(fullParserFramesetAfterHiddenInput.get_unsupported_exception(), null); +fullParserFramesetAfterHiddenInput.destroy(); + +for (const html of ["<param><frameset></frameset>", "<source> <frameset></frameset>", "<track><frameset></frameset>"]) { + const fullParserFramesetAfterIgnoredStart = WP_HTML_Processor.create_full_parser(html); + const fullParserFramesetAfterIgnoredStartTags = []; + while (fullParserFramesetAfterIgnoredStart.next_token()) { + if (fullParserFramesetAfterIgnoredStart.get_token_type() === "#tag") { + fullParserFramesetAfterIgnoredStartTags.push( + `${fullParserFramesetAfterIgnoredStart.is_tag_closer() ? "-" : "+"}${fullParserFramesetAfterIgnoredStart.get_tag()}`, + ); + } + } + assert.deepEqual(fullParserFramesetAfterIgnoredStartTags, ["+HTML", "+HEAD", "-HEAD", "+FRAMESET", "-FRAMESET", "-HTML"]); + assert.equal(fullParserFramesetAfterIgnoredStart.get_last_error(), null); + assert.equal(fullParserFramesetAfterIgnoredStart.get_unsupported_exception(), null); + fullParserFramesetAfterIgnoredStart.destroy(); +} + +const fullParserFramesetAfterIgnoredFrameNoise = WP_HTML_Processor.create_full_parser( + "<frame></frame></frame><frameset><frame><frameset><frame></frameset><noframes></frameset><noframes>", +); +const fullParserFramesetAfterIgnoredFrameNoiseTokens = []; +while (fullParserFramesetAfterIgnoredFrameNoise.next_token()) { + if (fullParserFramesetAfterIgnoredFrameNoise.get_token_type() === "#tag") { + const tagPrefix = fullParserFramesetAfterIgnoredFrameNoise.is_tag_closer() ? "-" : "+"; + fullParserFramesetAfterIgnoredFrameNoiseTokens.push( + `${tagPrefix}${fullParserFramesetAfterIgnoredFrameNoise.get_tag()}:${ + fullParserFramesetAfterIgnoredFrameNoise.get_breadcrumbs().join("/") + }`, + ); + } +} +assert.deepEqual(fullParserFramesetAfterIgnoredFrameNoiseTokens, [ + "+HTML:HTML", + "+HEAD:HTML/HEAD", + "-HEAD:HTML", + "+FRAMESET:HTML/FRAMESET", + "+FRAME:HTML/FRAMESET/FRAME", + "+FRAMESET:HTML/FRAMESET/FRAMESET", + "+FRAME:HTML/FRAMESET/FRAMESET/FRAME", + "-FRAMESET:HTML/FRAMESET", + "+NOFRAMES:HTML/FRAMESET/NOFRAMES", + "-FRAMESET:HTML", + "-HTML:", +]); +assert.equal(fullParserFramesetAfterIgnoredFrameNoise.get_last_error(), null); +assert.equal(fullParserFramesetAfterIgnoredFrameNoise.get_unsupported_exception(), null); +fullParserFramesetAfterIgnoredFrameNoise.destroy(); + +for (const html of [ + "<svg></svg><frameset><frame>", + "<math></math><frameset><frame>", + "<svg>\0 </svg><frameset><frame>", + "<svg><path></path></svg><frameset><frame>", +]) { + const fullParserFramesetAfterEmptyForeign = WP_HTML_Processor.create_full_parser(html); + const fullParserFramesetAfterEmptyForeignTags = []; + while (fullParserFramesetAfterEmptyForeign.next_token()) { + if (fullParserFramesetAfterEmptyForeign.get_token_type() === "#tag") { + fullParserFramesetAfterEmptyForeignTags.push( + `${fullParserFramesetAfterEmptyForeign.is_tag_closer() ? "-" : "+"}${fullParserFramesetAfterEmptyForeign.get_tag()}`, + ); + } + } + assert.deepEqual(fullParserFramesetAfterEmptyForeignTags, ["+HTML", "+HEAD", "-HEAD", "+FRAMESET", "+FRAME", "-FRAMESET", "-HTML"]); + assert.equal(fullParserFramesetAfterEmptyForeign.get_last_error(), null); + assert.equal(fullParserFramesetAfterEmptyForeign.get_unsupported_exception(), null); + fullParserFramesetAfterEmptyForeign.destroy(); +} + +for (const html of ["<svg>\0</svg><frameset>", "<svg> </svg><frameset>"]) { + const fullParserFramesetAfterEmptyForeign = WP_HTML_Processor.create_full_parser(html); + const fullParserFramesetAfterEmptyForeignTags = []; + while (fullParserFramesetAfterEmptyForeign.next_token()) { + if (fullParserFramesetAfterEmptyForeign.get_token_type() === "#tag") { + fullParserFramesetAfterEmptyForeignTags.push( + `${fullParserFramesetAfterEmptyForeign.is_tag_closer() ? "-" : "+"}${fullParserFramesetAfterEmptyForeign.get_tag()}`, + ); + } + } + assert.deepEqual(fullParserFramesetAfterEmptyForeignTags, ["+HTML", "+HEAD", "-HEAD", "+FRAMESET", "-FRAMESET", "-HTML"]); + assert.equal(fullParserFramesetAfterEmptyForeign.get_last_error(), null); + assert.equal(fullParserFramesetAfterEmptyForeign.get_unsupported_exception(), null); + fullParserFramesetAfterEmptyForeign.destroy(); +} + +for (const [html, expectedTags] of [ + ["<div><frameset>", ["+HTML", "+HEAD", "-HEAD", "+FRAMESET", "-FRAMESET", "-HTML"]], + ["<svg><p><frameset>", ["+HTML", "+HEAD", "-HEAD", "+FRAMESET", "-FRAMESET", "-HTML"]], + ["<svg><foreignObject><div> <frameset><frame>", ["+HTML", "+HEAD", "-HEAD", "+FRAMESET", "+FRAME", "-FRAMESET", "-HTML"]], +]) { + const fullParserFramesetAfterIgnoredOpenChain = WP_HTML_Processor.create_full_parser(html); + const fullParserFramesetAfterIgnoredOpenChainTags = []; + while (fullParserFramesetAfterIgnoredOpenChain.next_token()) { + if (fullParserFramesetAfterIgnoredOpenChain.get_token_type() === "#tag") { + fullParserFramesetAfterIgnoredOpenChainTags.push( + `${fullParserFramesetAfterIgnoredOpenChain.is_tag_closer() ? "-" : "+"}${fullParserFramesetAfterIgnoredOpenChain.get_tag()}`, + ); + } + } + assert.deepEqual(fullParserFramesetAfterIgnoredOpenChainTags, expectedTags); + assert.equal(fullParserFramesetAfterIgnoredOpenChain.get_last_error(), null); + assert.equal(fullParserFramesetAfterIgnoredOpenChain.get_unsupported_exception(), null); + fullParserFramesetAfterIgnoredOpenChain.destroy(); +} + +for (const html of ["</html><frameset></frameset>", "</body> <frameset></frameset>"]) { + const fullParserFramesetAfterIgnoredCloser = WP_HTML_Processor.create_full_parser(html); + const fullParserFramesetAfterIgnoredCloserTags = []; + while (fullParserFramesetAfterIgnoredCloser.next_token()) { + if (fullParserFramesetAfterIgnoredCloser.get_token_type() === "#tag") { + fullParserFramesetAfterIgnoredCloserTags.push( + `${fullParserFramesetAfterIgnoredCloser.is_tag_closer() ? "-" : "+"}${fullParserFramesetAfterIgnoredCloser.get_tag()}`, + ); + } + } + assert.deepEqual(fullParserFramesetAfterIgnoredCloserTags, ["+HTML", "+HEAD", "-HEAD", "+FRAMESET", "-FRAMESET", "-HTML"]); + assert.equal(fullParserFramesetAfterIgnoredCloser.get_last_error(), null); + assert.equal(fullParserFramesetAfterIgnoredCloser.get_unsupported_exception(), null); + fullParserFramesetAfterIgnoredCloser.destroy(); +} + +for (const html of ["<p><frameset><frame>", "<p> <frameset><frame>"]) { + const fullParserFramesetAfterParagraph = WP_HTML_Processor.create_full_parser(html); + const fullParserFramesetAfterParagraphTokens = []; + while (fullParserFramesetAfterParagraph.next_token()) { + if (fullParserFramesetAfterParagraph.get_token_type() === "#tag") { + fullParserFramesetAfterParagraphTokens.push( + `${fullParserFramesetAfterParagraph.is_tag_closer() ? "-" : "+"}${fullParserFramesetAfterParagraph.get_tag()}`, + ); + } + } + assert.deepEqual(fullParserFramesetAfterParagraphTokens, ["+HTML", "+HEAD", "-HEAD", "+FRAMESET", "+FRAME", "-FRAMESET", "-HTML"]); + assert.equal(fullParserFramesetAfterParagraph.get_last_error(), null); + assert.equal(fullParserFramesetAfterParagraph.get_unsupported_exception(), null); + fullParserFramesetAfterParagraph.destroy(); +} + +const fullParserFramesetNoframes = WP_HTML_Processor.create_full_parser("<frameset><noframes>x</noframes><frame></frameset>"); +const fullParserFramesetNoframesTokens = []; +while (fullParserFramesetNoframes.next_token()) { + fullParserFramesetNoframesTokens.push( + fullParserFramesetNoframes.get_token_type() === "#tag" + ? `${fullParserFramesetNoframes.is_tag_closer() ? "-" : "+"}${fullParserFramesetNoframes.get_tag()}:${fullParserFramesetNoframes.get_breadcrumbs().join("/")}` + : fullParserFramesetNoframes.get_token_name(), + ); +} +assert.deepEqual(fullParserFramesetNoframesTokens, [ + "+HTML:HTML", + "+HEAD:HTML/HEAD", + "-HEAD:HTML", + "+FRAMESET:HTML/FRAMESET", + "+NOFRAMES:HTML/FRAMESET/NOFRAMES", + "+FRAME:HTML/FRAMESET/FRAME", + "-FRAMESET:HTML", + "-HTML:", +]); +fullParserFramesetNoframes.destroy(); + +for (const [html, expectedTextTokens] of [ + ["<frameset>text", []], + ["<frameset>\nfoo", [["\n", ["HTML", "FRAMESET", "#text"]]]], + ["<frameset></frameset> te st", [[" ", ["HTML", "#text"]], [" ", ["HTML", "#text"]]]], + ["<frameset></frameset></html>text", []], +]) { + const framesetTextProcessor = WP_HTML_Processor.create_full_parser(html); + const framesetTextTokens = []; + while (framesetTextProcessor.next_token()) { + if (framesetTextProcessor.get_token_type() === "#text") { + framesetTextTokens.push([ + framesetTextProcessor.get_modifiable_text(), + framesetTextProcessor.get_breadcrumbs(), + ]); + } + } + assert.equal(framesetTextProcessor.get_last_error(), null); + assert.equal(framesetTextProcessor.get_unsupported_exception(), null); + assert.deepEqual(framesetTextTokens, expectedTextTokens); + framesetTextProcessor.destroy(); +} + +for (const html of [ + "<body><frameset></frameset><p>x</p>", + "text<frameset></frameset><p>x</p>", + '<input type="text"><frameset></frameset><p>x</p>', + "<svg>x</svg><frameset></frameset><p>x</p>", +]) { + const ignoredFramesetProcessor = WP_HTML_Processor.create_full_parser(html); + const visitedFramesetTags = []; + while (ignoredFramesetProcessor.next_token()) { + if (ignoredFramesetProcessor.get_token_type() === "#tag") { + visitedFramesetTags.push(ignoredFramesetProcessor.get_tag()); + } + } + assert.equal(ignoredFramesetProcessor.get_last_error(), null); + assert.equal(visitedFramesetTags.includes("FRAMESET"), false); + assert.equal(visitedFramesetTags.includes("P"), true); + ignoredFramesetProcessor.destroy(); +} + +for (const [html, expectedTokens] of [ + [ + "<svg>\0<frameset>", + [ + "+HTML:html", + "+HEAD:html", + "-HEAD:html", + "+BODY:html", + "+SVG:svg", + "#text:svg", + "+FRAMESET:svg", + "-FRAMESET:svg", + "-SVG:svg", + "-BODY:html", + "-HTML:html", + ], + ], + [ + "<svg>\0 <frameset>", + [ + "+HTML:html", + "+HEAD:html", + "-HEAD:html", + "+BODY:html", + "+SVG:svg", + "#text:svg", + "#text:svg", + "+FRAMESET:svg", + "-FRAMESET:svg", + "-SVG:svg", + "-BODY:html", + "-HTML:html", + ], + ], +]) { + const fullParserOpenSvgFramesetProcessor = WP_HTML_Processor.create_full_parser(html); + const visitedOpenSvgFramesetTokens = []; + while (fullParserOpenSvgFramesetProcessor.next_token()) { + visitedOpenSvgFramesetTokens.push( + fullParserOpenSvgFramesetProcessor.get_token_type() === "#tag" + ? `${fullParserOpenSvgFramesetProcessor.is_tag_closer() ? "-" : "+"}${fullParserOpenSvgFramesetProcessor.get_tag()}:${fullParserOpenSvgFramesetProcessor.get_namespace()}` + : `${fullParserOpenSvgFramesetProcessor.get_token_type()}:${fullParserOpenSvgFramesetProcessor.get_namespace()}`, + ); + } + assert.deepEqual(visitedOpenSvgFramesetTokens, expectedTokens); + assert.equal(fullParserOpenSvgFramesetProcessor.get_last_error(), null); + assert.equal(fullParserOpenSvgFramesetProcessor.get_unsupported_exception(), null); + fullParserOpenSvgFramesetProcessor.destroy(); +} + +const fullParserCommentAfterBody = WP_HTML_Processor.create_full_parser("<html><body></body><!--outside-->"); +while ( + fullParserCommentAfterBody.next_token() && + fullParserCommentAfterBody.get_token_type() !== "#comment" +) { +} +assert.equal(fullParserCommentAfterBody.get_token_type(), "#comment"); +assert.deepEqual(fullParserCommentAfterBody.get_breadcrumbs(), ["HTML", "#comment"]); +assert.equal(fullParserCommentAfterBody.get_last_error(), null); +assert.equal(fullParserCommentAfterBody.get_unsupported_exception(), null); +fullParserCommentAfterBody.destroy(); + +const fullParserCommentAfterHtml = WP_HTML_Processor.create_full_parser("<html><body></body></html><!--outside-->"); +while ( + fullParserCommentAfterHtml.next_token() && + fullParserCommentAfterHtml.get_token_type() !== "#comment" +) { +} +assert.equal(fullParserCommentAfterHtml.get_token_type(), "#comment"); +assert.deepEqual(fullParserCommentAfterHtml.get_breadcrumbs(), ["#comment"]); +assert.equal(fullParserCommentAfterHtml.get_last_error(), null); +assert.equal(fullParserCommentAfterHtml.get_unsupported_exception(), null); +fullParserCommentAfterHtml.destroy(); + +const fullParserCommentAfterFramesetHtml = WP_HTML_Processor.create_full_parser("<html><frameset></frameset></html><!--outside-->"); +while ( + fullParserCommentAfterFramesetHtml.next_token() && + fullParserCommentAfterFramesetHtml.get_token_type() !== "#comment" +) { +} +assert.equal(fullParserCommentAfterFramesetHtml.get_token_type(), "#comment"); +assert.deepEqual(fullParserCommentAfterFramesetHtml.get_breadcrumbs(), ["#comment"]); +assert.equal(fullParserCommentAfterFramesetHtml.get_last_error(), null); +assert.equal(fullParserCommentAfterFramesetHtml.get_unsupported_exception(), null); +fullParserCommentAfterFramesetHtml.destroy(); + +const fullParserCommentAfterFramesetNoframes = WP_HTML_Processor.create_full_parser( + "<html><frameset></frameset></html><noframes>fallback</noframes><!--outside-->", +); +while ( + fullParserCommentAfterFramesetNoframes.next_token() && + fullParserCommentAfterFramesetNoframes.get_token_type() !== "#comment" +) { +} +assert.equal(fullParserCommentAfterFramesetNoframes.get_token_type(), "#comment"); +assert.deepEqual(fullParserCommentAfterFramesetNoframes.get_breadcrumbs(), ["#comment"]); +assert.equal(fullParserCommentAfterFramesetNoframes.get_last_error(), null); +assert.equal(fullParserCommentAfterFramesetNoframes.get_unsupported_exception(), null); +fullParserCommentAfterFramesetNoframes.destroy(); + +const fullParserDelayedFramesetComment = WP_HTML_Processor.create_full_parser( + "<html><frameset></frameset></html><!--before--><noframes>fallback</noframes><!--after-->", +); +const delayedFramesetCommentTokens = []; +while (fullParserDelayedFramesetComment.next_token()) { + if (fullParserDelayedFramesetComment.get_token_type() === "#tag" && fullParserDelayedFramesetComment.get_tag() === "NOFRAMES") { + delayedFramesetCommentTokens.push([ + "noframes", + fullParserDelayedFramesetComment.get_modifiable_text(), + fullParserDelayedFramesetComment.get_breadcrumbs(), + ]); + } else if (fullParserDelayedFramesetComment.get_token_type() === "#comment") { + delayedFramesetCommentTokens.push([ + "comment", + fullParserDelayedFramesetComment.get_full_comment_text(), + fullParserDelayedFramesetComment.get_breadcrumbs(), + ]); + } +} +assert.deepEqual(delayedFramesetCommentTokens, [ + ["noframes", "fallback", ["HTML", "NOFRAMES"]], + ["comment", "before", ["#comment"]], + ["comment", "after", ["#comment"]], +]); +assert.equal(fullParserDelayedFramesetComment.get_last_error(), null); +assert.equal(fullParserDelayedFramesetComment.get_unsupported_exception(), null); +fullParserDelayedFramesetComment.destroy(); + +const noQuirksClasses = WP_HTML_Processor.create_full_parser('<!DOCTYPE html><span class="UPPER">'); +assert.equal(noQuirksClasses.next_tag("span"), true); +assert.equal(noQuirksClasses.compat_mode, WP_HTML_Tag_Processor.NO_QUIRKS_MODE); +assert.equal(noQuirksClasses.has_class("upper"), false); +assert.equal(noQuirksClasses.has_class("UPPER"), true); +assert.equal(noQuirksClasses.add_class("upper"), true); +assert.equal(noQuirksClasses.get_updated_html(), '<!DOCTYPE html><span class="UPPER upper">'); +noQuirksClasses.destroy(); + +const noQuirksClassList = WP_HTML_Processor.create_full_parser("<!DOCTYPE html><span class='A A a B b \u00C9 \u0045\u0301 \u00C9 é'>"); +assert.equal(noQuirksClassList.next_tag("span"), true); +assert.deepEqual(noQuirksClassList.class_list(), ["A", "a", "B", "b", "É", "E\u0301", "é"]); +noQuirksClassList.destroy(); + +const quirksClasses = WP_HTML_Processor.create_full_parser('<span class="UPPER">'); +assert.equal(quirksClasses.next_tag("span"), true); +assert.equal(quirksClasses.compat_mode, WP_HTML_Tag_Processor.QUIRKS_MODE); +assert.equal(quirksClasses.has_class("upper"), true); +assert.equal(quirksClasses.has_class("UPPER"), true); +assert.equal(quirksClasses.add_class("upper"), true); +assert.equal(quirksClasses.get_updated_html(), '<span class="UPPER">'); +assert.equal(quirksClasses.remove_class("upPer"), true); +assert.equal(quirksClasses.get_updated_html(), "<span >"); +quirksClasses.destroy(); + +for (const [className, expectedHtml] of [ + [null, '<span class="0 1 ONE ">'], + [false, '<span class="0 1 ONE">'], + [1, '<span class="0 1 ONE">'], + ["1", '<span class="0 1 ONE">'], + [1.5, '<span class="0 1 ONE">'], + ["1.5", '<span class="0 1 ONE 1.5">'], + ["01", '<span class="0 1 ONE 01">'], + ["one", '<span class="0 1 ONE">'], +]) { + const quirksAddClassKey = WP_HTML_Processor.create_full_parser('<span class="0 1 ONE">'); + assert.equal(quirksAddClassKey.next_tag("span"), true); + assert.equal(quirksAddClassKey.add_class(className), true); + assert.equal(quirksAddClassKey.get_updated_html(), expectedHtml); + quirksAddClassKey.destroy(); +} + +for (const [className, expectedHtml] of [ + [null, '<span class="0 1 ONE">'], + [false, '<span class="1 ONE">'], + [1, '<span class="0 ONE">'], + ["1", '<span class="0 ONE">'], + [1.5, '<span class="0 ONE">'], + ["1.5", '<span class="0 1 ONE">'], + ["01", '<span class="0 1 ONE">'], + ["one", '<span class="0 1">'], + ["ONE", '<span class="0 1">'], +]) { + const quirksRemoveClassKey = WP_HTML_Processor.create_full_parser('<span class="0 1 ONE">'); + assert.equal(quirksRemoveClassKey.next_tag("span"), true); + assert.equal(quirksRemoveClassKey.remove_class(className), true); + assert.equal(quirksRemoveClassKey.get_updated_html(), expectedHtml); + quirksRemoveClassKey.destroy(); +} + +const quirksClassList = WP_HTML_Processor.create_full_parser("<span class='A A a B b \u00C9 \u0045\u0301 \u00C9 é \u0065\u0301'>"); +assert.equal(quirksClassList.next_tag("span"), true); +assert.deepEqual(quirksClassList.class_list(), ["a", "b", "É", "e\u0301", "é"]); +quirksClassList.destroy(); + +const noQuirksParagraphTable = WP_HTML_Processor.create_full_parser("<!DOCTYPE html><p><table>"); +assert.equal(noQuirksParagraphTable.next_tag("table"), true); +assert.deepEqual(noQuirksParagraphTable.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +noQuirksParagraphTable.destroy(); + +const quirksParagraphTable = WP_HTML_Processor.create_full_parser('<!DOCTYPE html PUBLIC "html"><p><table>'); +assert.equal(quirksParagraphTable.next_tag("table"), true); +assert.equal(quirksParagraphTable.compat_mode, WP_HTML_Tag_Processor.QUIRKS_MODE); +assert.deepEqual(quirksParagraphTable.get_breadcrumbs(), ["HTML", "BODY", "P", "TABLE"]); +quirksParagraphTable.destroy(); + +const stepProcessor = WP_HTML_Processor.create_fragment("<div>Step</div>"); +assert.equal(stepProcessor.step(), true); +assert.equal(stepProcessor.get_tag(), "DIV"); +assert.equal(stepProcessor.step(WP_HTML_Processor.PROCESS_CURRENT_NODE), true); +assert.equal(stepProcessor.get_tag(), "DIV"); +assert.equal(stepProcessor.step(), true); +assert.equal(stepProcessor.get_token_type(), "#text"); +assert.equal(stepProcessor.step(), true); +assert.equal(stepProcessor.get_tag(), "DIV"); +assert.equal(stepProcessor.is_tag_closer(), true); +stepProcessor.destroy(); + +const processorVisitClosersAlias = WP_HTML_Processor.create_fragment("<div></div>"); +assert.equal(processorVisitClosersAlias.next_tag({ tag_name: "div" }), true); +assert.equal(processorVisitClosersAlias.is_tag_closer(), false); +assert.equal(processorVisitClosersAlias.next_tag({ tag_name: "div", visit_closers: true }), true); +assert.equal(processorVisitClosersAlias.is_tag_closer(), true); +processorVisitClosersAlias.destroy(); + +const virtualPOpenerProcessor = WP_HTML_Processor.create_fragment("</p>"); +assert.equal(virtualPOpenerProcessor.next_token(), true); +assert.equal(virtualPOpenerProcessor.get_tag(), "P"); +assert.equal(virtualPOpenerProcessor.is_virtual(), true); +assert.equal(virtualPOpenerProcessor.is_tag_closer(), false); +assert.deepEqual(virtualPOpenerProcessor.get_breadcrumbs(), ["HTML", "BODY", "P"]); +assert.equal(virtualPOpenerProcessor.get_current_depth(), 3); +assert.equal(virtualPOpenerProcessor.next_token(), true); +assert.equal(virtualPOpenerProcessor.get_tag(), "P"); +assert.equal(virtualPOpenerProcessor.is_virtual(), true); +assert.equal(virtualPOpenerProcessor.is_tag_closer(), true); +assert.deepEqual(virtualPOpenerProcessor.get_breadcrumbs(), ["HTML", "BODY"]); +virtualPOpenerProcessor.destroy(); + +const virtualHeadingCloserProcessor = WP_HTML_Processor.create_fragment("<h1><h2>"); +assert.equal(virtualHeadingCloserProcessor.next_token(), true); +assert.equal(virtualHeadingCloserProcessor.get_tag(), "H1"); +assert.equal(virtualHeadingCloserProcessor.next_token(), true); +assert.equal(virtualHeadingCloserProcessor.get_tag(), "H1"); +assert.equal(virtualHeadingCloserProcessor.is_virtual(), true); +assert.equal(virtualHeadingCloserProcessor.is_tag_closer(), true); +assert.equal(virtualHeadingCloserProcessor.get_modifiable_text(), ""); +assert.deepEqual(virtualHeadingCloserProcessor.get_breadcrumbs(), ["HTML", "BODY"]); +assert.equal(virtualHeadingCloserProcessor.next_token(), true); +assert.equal(virtualHeadingCloserProcessor.get_tag(), "H2"); +assert.equal(virtualHeadingCloserProcessor.is_virtual(), false); +assert.deepEqual(virtualHeadingCloserProcessor.get_breadcrumbs(), ["HTML", "BODY", "H2"]); +virtualHeadingCloserProcessor.destroy(); + +const virtualAnchorCloserProcessor = WP_HTML_Processor.create_fragment("<a><span><a>"); +assert.equal(virtualAnchorCloserProcessor.next_token(), true); +assert.equal(virtualAnchorCloserProcessor.get_tag(), "A"); +assert.equal(virtualAnchorCloserProcessor.next_token(), true); +assert.equal(virtualAnchorCloserProcessor.get_tag(), "SPAN"); +assert.equal(virtualAnchorCloserProcessor.next_token(), true); +assert.equal(virtualAnchorCloserProcessor.get_tag(), "SPAN"); +assert.equal(virtualAnchorCloserProcessor.is_virtual(), true); +assert.equal(virtualAnchorCloserProcessor.is_tag_closer(), true); +assert.deepEqual(virtualAnchorCloserProcessor.get_breadcrumbs(), ["HTML", "BODY", "A"]); +assert.equal(virtualAnchorCloserProcessor.next_token(), true); +assert.equal(virtualAnchorCloserProcessor.get_tag(), "A"); +assert.equal(virtualAnchorCloserProcessor.is_virtual(), true); +assert.equal(virtualAnchorCloserProcessor.is_tag_closer(), true); +assert.deepEqual(virtualAnchorCloserProcessor.get_breadcrumbs(), ["HTML", "BODY"]); +assert.equal(virtualAnchorCloserProcessor.next_token(), true); +assert.equal(virtualAnchorCloserProcessor.get_tag(), "A"); +assert.equal(virtualAnchorCloserProcessor.is_virtual(), false); +assert.deepEqual(virtualAnchorCloserProcessor.get_breadcrumbs(), ["HTML", "BODY", "A"]); +virtualAnchorCloserProcessor.destroy(); + +const nestedHeadingProcessor = WP_HTML_Processor.create_fragment("<h2><span>Major<h4 target>"); +assert.equal(nestedHeadingProcessor.next_tag("h4"), true); +assert.deepEqual(nestedHeadingProcessor.get_breadcrumbs(), ["HTML", "BODY", "H2", "SPAN", "H4"]); +assert.equal(nestedHeadingProcessor.get_attribute("target"), true); +nestedHeadingProcessor.destroy(); + +const nestedProcessor = WP_HTML_Processor.create_fragment("<div><span><figure><img></figure></span></div>"); +assert.equal(nestedProcessor.next_tag({ breadcrumbs: ["FIGURE", "IMG"] }), true); +assert.equal(nestedProcessor.get_tag(), "IMG"); +assert.equal(nestedProcessor.expects_closer(), false); +assert.deepEqual(nestedProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "SPAN", "FIGURE", "IMG"]); +nestedProcessor.destroy(); + +const voidProcessor = WP_HTML_Processor.create_fragment("<img><div>"); +assert.equal(voidProcessor.next_tag("div"), true); +assert.deepEqual(voidProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV"]); +voidProcessor.destroy(); + +const paragraphProcessor = WP_HTML_Processor.create_fragment("<p><p target>"); +assert.equal(paragraphProcessor.next_tag({ breadcrumbs: ["P"], match_offset: 2 }), true); +assert.deepEqual(paragraphProcessor.get_breadcrumbs(), ["HTML", "BODY", "P"]); +assert.equal(paragraphProcessor.get_attribute("target"), true); +paragraphProcessor.destroy(); + +const listingClosesParagraphProcessor = WP_HTML_Processor.create_full_parser("<!doctype html><p>foo<listing>bar<p>baz"); +assert.equal(listingClosesParagraphProcessor.next_tag("listing"), true); +assert.deepEqual(listingClosesParagraphProcessor.get_breadcrumbs(), ["HTML", "BODY", "LISTING"]); +assert.equal(listingClosesParagraphProcessor.next_tag("p"), true); +assert.deepEqual(listingClosesParagraphProcessor.get_breadcrumbs(), ["HTML", "BODY", "LISTING", "P"]); +listingClosesParagraphProcessor.destroy(); + +const articleProcessor = WP_HTML_Processor.create_fragment("<p><p><article target>"); +assert.equal(articleProcessor.next_tag("article"), true); +assert.deepEqual(articleProcessor.get_breadcrumbs(), ["HTML", "BODY", "ARTICLE"]); +assert.equal(articleProcessor.get_attribute("target"), true); +articleProcessor.destroy(); + +const buttonProcessor = WP_HTML_Processor.create_fragment("<div><button one><p><span><button two>Two</button></span></p></div><button three>"); +assert.equal(buttonProcessor.next_tag("button"), true); +assert.deepEqual(buttonProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "BUTTON"]); +assert.equal(buttonProcessor.get_attribute("one"), true); +assert.equal(buttonProcessor.next_tag("button"), true); +assert.deepEqual(buttonProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "BUTTON"]); +assert.equal(buttonProcessor.get_attribute("two"), true); +assert.equal(buttonProcessor.next_tag("button"), true); +assert.deepEqual(buttonProcessor.get_breadcrumbs(), ["HTML", "BODY", "BUTTON"]); +assert.equal(buttonProcessor.get_attribute("three"), true); +buttonProcessor.destroy(); + +const selectMenuitemProcessor = WP_HTML_Processor.create_full_parser("<!DOCTYPE html><select><menuitem></select>"); +assert.equal(selectMenuitemProcessor.next_tag("select"), true); +assert.equal(selectMenuitemProcessor.next_tag("menuitem"), false); +assert.equal(selectMenuitemProcessor.get_last_error(), null); +selectMenuitemProcessor.destroy(); + +const listBoundaryProcessor = WP_HTML_Processor.create_fragment("<li><li><blockquote><li target>"); +assert.equal(listBoundaryProcessor.next_tag({ breadcrumbs: ["LI"], match_offset: 3 }), true); +assert.deepEqual(listBoundaryProcessor.get_breadcrumbs(), ["HTML", "BODY", "LI", "BLOCKQUOTE", "LI"]); +assert.equal(listBoundaryProcessor.get_attribute("target"), true); +listBoundaryProcessor.destroy(); + +const listImpliedProcessor = WP_HTML_Processor.create_fragment("<li><li><div><li target>"); +assert.equal(listImpliedProcessor.next_tag({ breadcrumbs: ["LI"], match_offset: 3 }), true); +assert.deepEqual(listImpliedProcessor.get_breadcrumbs(), ["HTML", "BODY", "LI"]); +assert.equal(listImpliedProcessor.get_attribute("target"), true); +listImpliedProcessor.destroy(); + +const listPInButtonScopeProcessor = WP_HTML_Processor.create_fragment("<li><li><p><button><p><li target>"); +assert.equal(listPInButtonScopeProcessor.next_tag({ breadcrumbs: ["LI"], match_offset: 3 }), true); +assert.deepEqual(listPInButtonScopeProcessor.get_breadcrumbs(), ["HTML", "BODY", "LI", "P", "BUTTON", "LI"]); +assert.equal(listPInButtonScopeProcessor.get_attribute("target"), true); +listPInButtonScopeProcessor.destroy(); + +const ddPInButtonScopeProcessor = WP_HTML_Processor.create_fragment("<dd><dd><p><button><p><dd target>"); +assert.equal(ddPInButtonScopeProcessor.next_tag({ breadcrumbs: ["DD"], match_offset: 3 }), true); +assert.deepEqual(ddPInButtonScopeProcessor.get_breadcrumbs(), ["HTML", "BODY", "DD", "P", "BUTTON", "DD"]); +assert.equal(ddPInButtonScopeProcessor.get_attribute("target"), true); +ddPInButtonScopeProcessor.destroy(); + +const dtPInButtonScopeProcessor = WP_HTML_Processor.create_fragment("<dt><dt><p><button><p><dt target>"); +assert.equal(dtPInButtonScopeProcessor.next_tag({ breadcrumbs: ["DT"], match_offset: 3 }), true); +assert.deepEqual(dtPInButtonScopeProcessor.get_breadcrumbs(), ["HTML", "BODY", "DT", "P", "BUTTON", "DT"]); +assert.equal(dtPInButtonScopeProcessor.get_attribute("target"), true); +dtPInButtonScopeProcessor.destroy(); + +const unexpectedListCloserProcessor = WP_HTML_Processor.create_fragment("<ul><li><ul></li><li target>a</li></ul></li></ul>"); +assert.equal(unexpectedListCloserProcessor.next_tag({ breadcrumbs: ["LI"], match_offset: 2 }), true); +assert.deepEqual(unexpectedListCloserProcessor.get_breadcrumbs(), ["HTML", "BODY", "UL", "LI", "UL", "LI"]); +assert.equal(unexpectedListCloserProcessor.get_attribute("target"), true); +unexpectedListCloserProcessor.destroy(); + +const rubyImpliedEndTagsProcessor = WP_HTML_Processor.create_full_parser("<html><ruby>a<rb>b<rt></ruby></html>"); +assert.equal(rubyImpliedEndTagsProcessor.next_tag("rt"), true); +assert.deepEqual(rubyImpliedEndTagsProcessor.get_breadcrumbs(), ["HTML", "BODY", "RUBY", "RT"]); +rubyImpliedEndTagsProcessor.destroy(); + +const rubyRtcChildrenProcessor = WP_HTML_Processor.create_full_parser("<html><ruby>a<rtc>b<rt>c<rt>d</ruby></html>"); +assert.equal(rubyRtcChildrenProcessor.next_tag("rt"), true); +assert.deepEqual(rubyRtcChildrenProcessor.get_breadcrumbs(), ["HTML", "BODY", "RUBY", "RTC", "RT"]); +assert.equal(rubyRtcChildrenProcessor.next_tag("rt"), true); +assert.deepEqual(rubyRtcChildrenProcessor.get_breadcrumbs(), ["HTML", "BODY", "RUBY", "RTC", "RT"]); +rubyRtcChildrenProcessor.destroy(); + +const hrProcessor = WP_HTML_Processor.create_fragment("<p><hr>"); +assert.equal(hrProcessor.next_tag("hr"), true); +assert.deepEqual(hrProcessor.get_breadcrumbs(), ["HTML", "BODY", "HR"]); +assert.equal(hrProcessor.expects_closer(), false); +hrProcessor.destroy(); + +const brEndTagProcessor = WP_HTML_Processor.create_fragment('</br id="an-opener" html>'); +assert.equal(brEndTagProcessor.next_tag(), true); +assert.equal(brEndTagProcessor.get_tag(), "BR"); +assert.equal(brEndTagProcessor.is_tag_closer(), false); +assert.equal(brEndTagProcessor.has_class("html"), false); +assert.deepEqual(brEndTagProcessor.class_list(), []); +assert.equal(brEndTagProcessor.get_attribute_names_with_prefix(""), null); +assert.deepEqual(brEndTagProcessor.get_breadcrumbs(), ["HTML", "BODY", "BR"]); +brEndTagProcessor.destroy(); + +const directFormCloserProcessor = WP_HTML_Processor.create_fragment("<form></form><p>x"); +assert.equal(directFormCloserProcessor.next_tag("form"), true); +assert.equal(directFormCloserProcessor.is_tag_closer(), false); +assert.equal(directFormCloserProcessor.next_tag({ tag_name: "form", tag_closers: "visit" }), true); +assert.equal(directFormCloserProcessor.is_tag_closer(), true); +assert.deepEqual(directFormCloserProcessor.get_breadcrumbs(), ["HTML", "BODY"]); +assert.equal(directFormCloserProcessor.next_tag("p"), true); +assert.deepEqual(directFormCloserProcessor.get_breadcrumbs(), ["HTML", "BODY", "P"]); +directFormCloserProcessor.destroy(); + +const impliedFormCloserProcessor = WP_HTML_Processor.create_fragment("<form><p></form><span>x"); +assert.equal(impliedFormCloserProcessor.next_tag("p"), true); +assert.equal(impliedFormCloserProcessor.next_token(), true); +assert.equal(impliedFormCloserProcessor.get_tag(), "P"); +assert.equal(impliedFormCloserProcessor.is_virtual(), true); +assert.equal(impliedFormCloserProcessor.is_tag_closer(), true); +assert.deepEqual(impliedFormCloserProcessor.get_breadcrumbs(), ["HTML", "BODY", "FORM"]); +assert.equal(impliedFormCloserProcessor.next_token(), true); +assert.equal(impliedFormCloserProcessor.get_tag(), "FORM"); +assert.equal(impliedFormCloserProcessor.is_virtual(), false); +assert.equal(impliedFormCloserProcessor.is_tag_closer(), true); +assert.deepEqual(impliedFormCloserProcessor.get_breadcrumbs(), ["HTML", "BODY"]); +assert.equal(impliedFormCloserProcessor.next_tag("span"), true); +assert.deepEqual(impliedFormCloserProcessor.get_breadcrumbs(), ["HTML", "BODY", "SPAN"]); +impliedFormCloserProcessor.destroy(); + +for (const html of [ + "<form><div></form><p>x", + "<form><button></form><p>x", +]) { + const detachedFormCloserProcessor = WP_HTML_Processor.create_fragment(html); + assert.equal(detachedFormCloserProcessor.next_tag("form"), true); + assert.equal(detachedFormCloserProcessor.is_tag_closer(), false); + assert.equal(detachedFormCloserProcessor.next_tag(), true); + const descendantTag = detachedFormCloserProcessor.get_tag(); + assert.equal(detachedFormCloserProcessor.next_tag({ tag_name: "form", tag_closers: "visit" }), true); + assert.equal(detachedFormCloserProcessor.is_tag_closer(), true); + assert.deepEqual(detachedFormCloserProcessor.get_breadcrumbs(), ["HTML", "BODY", "FORM", descendantTag]); + assert.equal(detachedFormCloserProcessor.next_tag("p"), true); + assert.deepEqual(detachedFormCloserProcessor.get_breadcrumbs(), ["HTML", "BODY", "FORM", descendantTag, "P"]); + while (detachedFormCloserProcessor.next_token()) { + } + assert.equal(detachedFormCloserProcessor.get_last_error(), null); + detachedFormCloserProcessor.destroy(); + assert.notEqual(WP_HTML_Processor.normalize(html), null); +} + +const detachedFormPointerProcessor = WP_HTML_Processor.create_fragment("<form><div></form><form><span>"); +assert.equal(detachedFormPointerProcessor.next_tag("form"), true); +assert.equal(detachedFormPointerProcessor.next_tag("div"), true); +assert.equal(detachedFormPointerProcessor.next_tag({ tag_name: "form", tag_closers: "visit" }), true); +assert.equal(detachedFormPointerProcessor.is_tag_closer(), true); +assert.deepEqual(detachedFormPointerProcessor.get_breadcrumbs(), ["HTML", "BODY", "FORM", "DIV"]); +assert.equal(detachedFormPointerProcessor.next_tag("form"), true); +assert.equal(detachedFormPointerProcessor.is_tag_closer(), false); +assert.deepEqual(detachedFormPointerProcessor.get_breadcrumbs(), ["HTML", "BODY", "FORM", "DIV", "FORM"]); +detachedFormPointerProcessor.destroy(); + +assert.equal( + WP_HTML_Processor.normalize('<form id><table te"><script></script><td srce" ID/></form><form claslicate>'), + '<form id><table te"><script></script><tbody><tr><td srce" id></td></tr></tbody></table></form>', +); +assert.equal( + WP_HTML_Processor.normalize("<form><table><caption></form><form >"), + "<form><table><caption></caption></table></form>", +); + +const selectOptionProcessor = WP_HTML_Processor.create_fragment("<select><option>one<option>two</select>"); +assert.equal(selectOptionProcessor.next_tag({ breadcrumbs: ["SELECT", "OPTION"], match_offset: 2 }), true); +assert.deepEqual(selectOptionProcessor.get_breadcrumbs(), ["HTML", "BODY", "SELECT", "OPTION"]); +selectOptionProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<select><option>one<option>two</select>"), + "<select><option>one</option><option>two</option></select>", +); + +const selectOptgroupProcessor = WP_HTML_Processor.create_fragment("<select><optgroup><option>one<optgroup><option>two</select>"); +assert.equal(selectOptgroupProcessor.next_tag({ breadcrumbs: ["SELECT", "OPTGROUP"], match_offset: 2 }), true); +assert.deepEqual(selectOptgroupProcessor.get_breadcrumbs(), ["HTML", "BODY", "SELECT", "OPTGROUP"]); +selectOptgroupProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<select><optgroup><option>one<optgroup><option>two</select>"), + "<select><optgroup><option>one</option></optgroup><optgroup><option>two</option></optgroup></select>", +); + +const selectHrProcessor = WP_HTML_Processor.create_fragment("<select><optgroup><option>one<hr><option>two</select>"); +assert.equal(selectHrProcessor.next_tag("hr"), true); +assert.deepEqual(selectHrProcessor.get_breadcrumbs(), ["HTML", "BODY", "SELECT", "HR"]); +selectHrProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<select><optgroup><option>one<hr><option>two</select>"), + "<select><optgroup><option>one</option></optgroup><hr><option>two</option></select>", +); + +const selectDoctypeProcessor = WP_HTML_Processor.create_fragment("<select><!doctype html><option>one"); +assert.equal(selectDoctypeProcessor.next_tag("option"), true); +assert.deepEqual(selectDoctypeProcessor.get_breadcrumbs(), ["HTML", "BODY", "SELECT", "OPTION"]); +selectDoctypeProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<select><!doctype html><option>one"), + "<select><option>one</option></select>", +); + +const selectInputProcessor = WP_HTML_Processor.create_fragment("<select><option>one<input><p>after"); +assert.equal(selectInputProcessor.next_tag("input"), true); +assert.deepEqual(selectInputProcessor.get_breadcrumbs(), ["HTML", "BODY", "INPUT"]); +assert.equal(selectInputProcessor.next_tag("p"), true); +assert.deepEqual(selectInputProcessor.get_breadcrumbs(), ["HTML", "BODY", "P"]); +selectInputProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<select><option>one<input><p>after"), + "<select><option>one</option></select><input><p>after</p>", +); + +const selectTextareaProcessor = WP_HTML_Processor.create_fragment("<select><option>one<textarea>after</textarea><p>end"); +assert.equal(selectTextareaProcessor.next_tag("textarea"), true); +assert.deepEqual(selectTextareaProcessor.get_breadcrumbs(), ["HTML", "BODY", "TEXTAREA"]); +assert.equal(selectTextareaProcessor.next_tag("p"), true); +assert.deepEqual(selectTextareaProcessor.get_breadcrumbs(), ["HTML", "BODY", "P"]); +selectTextareaProcessor.destroy(); + +const selectFragmentIncompleteTextareaProcessor = WP_HTML_Processor.create_fragment("<textarea><option>", "<select>"); +assert.equal(selectFragmentIncompleteTextareaProcessor.next_tag("option"), true); +assert.deepEqual(selectFragmentIncompleteTextareaProcessor.get_breadcrumbs(), ["HTML", "SELECT", "OPTION"]); +assert.equal(selectFragmentIncompleteTextareaProcessor.paused_at_incomplete_token(), false); +selectFragmentIncompleteTextareaProcessor.destroy(); + +const selectInTableProcessor = WP_HTML_Processor.create_fragment("<table><select><option>one<tr><td>cell"); +assert.equal(selectInTableProcessor.next_tag("select"), true); +assert.deepEqual(selectInTableProcessor.get_breadcrumbs(), ["HTML", "BODY", "SELECT"]); +assert.equal(selectInTableProcessor.next_tag("td"), true); +assert.deepEqual(selectInTableProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"]); +assert.equal(selectInTableProcessor.get_last_error(), null); +selectInTableProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<table><select><option>one<tr><td>cell"), + "<select><option>one</option></select><table><tbody><tr><td>cell</td></tr></tbody></table>", +); +assert.equal( + WP_HTML_Processor.normalize("<table><select><option>one</table><p>after"), + "<select><option>one</option></select><table></table><p>after</p>", +); +assert.equal( + WP_HTML_Processor.normalize("<table><select><option>3</select></table>"), + "<select><option>3</option></select><table></table>", +); + +const bareColProcessor = WP_HTML_Processor.create_fragment("<table><col><tr><td>cell"); +assert.equal(bareColProcessor.next_tag("col"), true); +assert.deepEqual(bareColProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "COLGROUP", "COL"]); +assert.equal(bareColProcessor.next_tag("tr"), true); +assert.deepEqual(bareColProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR"]); +bareColProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<table><col><tr><td>cell"), + "<table><colgroup><col></colgroup><tbody><tr><td>cell</td></tr></tbody></table>", +); + +const trailingColProcessor = WP_HTML_Processor.create_fragment("<table><col></table><col>"); +assert.equal(trailingColProcessor.next_tag("col"), true); +assert.deepEqual(trailingColProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "COLGROUP", "COL"]); +assert.equal(trailingColProcessor.next_tag("col"), false); +trailingColProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<table><col></table><col>"), + "<table><colgroup><col></colgroup></table>", +); + +const colgroupProcessor = WP_HTML_Processor.create_fragment("<table><colgroup><tbody><tr><td>cell"); +assert.equal(colgroupProcessor.next_tag("tbody"), true); +assert.deepEqual(colgroupProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY"]); +colgroupProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<table><colgroup><tbody><tr><td>cell"), + "<table><colgroup></colgroup><tbody><tr><td>cell</td></tr></tbody></table>", +); + +const tableCaptionProcessor = WP_HTML_Processor.create_fragment("<table><caption><p>cap<tr><td>cell"); +assert.equal(tableCaptionProcessor.next_tag("tr"), true); +assert.deepEqual(tableCaptionProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR"]); +tableCaptionProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<table><caption><p>cap<tr><td>cell"), + "<table><caption><p>cap</p></caption><tbody><tr><td>cell</td></tr></tbody></table>", +); + +const tableCaptionEndProcessor = WP_HTML_Processor.create_fragment("<table><caption>cap</table><p>after"); +assert.equal(tableCaptionEndProcessor.next_tag("p"), true); +assert.deepEqual(tableCaptionEndProcessor.get_breadcrumbs(), ["HTML", "BODY", "P"]); +tableCaptionEndProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<table><caption>cap</table><p>after"), + "<table><caption>cap</caption></table><p>after</p>", +); + +const tableTextProcessor = WP_HTML_Processor.create_fragment("<table>text<tr><td>cell"); +assert.equal(tableTextProcessor.next_token(), true); +assert.equal(tableTextProcessor.get_token_type(), "#text"); +assert.equal(tableTextProcessor.get_modifiable_text(), "text"); +assert.deepEqual(tableTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "#text"]); +assert.equal(tableTextProcessor.next_tag("td"), true); +assert.deepEqual(tableTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"]); +assert.equal(tableTextProcessor.get_last_error(), null); +tableTextProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<table>text<tr><td>cell"), + "text<table><tbody><tr><td>cell</td></tr></tbody></table>", +); + +const tableEndParagraphProcessor = WP_HTML_Processor.create_full_parser("<p><table></p>"); +while (tableEndParagraphProcessor.next_token()) { +} +assert.equal(tableEndParagraphProcessor.get_last_error(), null); +assert.equal(tableEndParagraphProcessor.get_unsupported_exception(), null); +tableEndParagraphProcessor.destroy(); +const tableEndParagraphSerializer = WP_HTML_Processor.create_full_parser("<p><table></p>"); +assert.equal( + tableEndParagraphSerializer.serialize(), + "<html><head></head><body><p><p></p><table></table></p></body></html>", +); +tableEndParagraphSerializer.destroy(); +assert.equal(WP_HTML_Processor.normalize("<p><table></p>"), null); + +const tablePresumptuousBrProcessor = WP_HTML_Processor.create_full_parser("<table><tr></br></table>"); +assert.equal( + tablePresumptuousBrProcessor.serialize(), + "<html><head></head><body><br><table><tbody><tr></tr></tbody></table></body></html>", +); +tablePresumptuousBrProcessor.destroy(); + +const tablePlaintextProcessor = WP_HTML_Processor.create_full_parser("<table><plaintext><td>"); +assert.equal(tablePlaintextProcessor.next_tag("plaintext"), true); +assert.deepEqual(tablePlaintextProcessor.get_breadcrumbs(), ["HTML", "BODY", "PLAINTEXT"]); +assert.equal(tablePlaintextProcessor.next_token(), true); +assert.equal(tablePlaintextProcessor.get_token_type(), "#text"); +assert.equal(tablePlaintextProcessor.get_modifiable_text(), "<td>"); +assert.equal(tablePlaintextProcessor.next_tag("table"), true); +assert.deepEqual(tablePlaintextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(tablePlaintextProcessor.get_last_error(), null); +tablePlaintextProcessor.destroy(); + +const tableStyleProcessor = WP_HTML_Processor.create_full_parser("<table><tr><style></script></style>abc"); +let sawTableStyleFosteredText = false; +let sawTableStyle = false; +while (tableStyleProcessor.next_token()) { + if (tableStyleProcessor.get_token_type() === "#text" && tableStyleProcessor.get_modifiable_text() === "abc") { + assert.deepEqual(tableStyleProcessor.get_breadcrumbs(), ["HTML", "BODY", "#text"]); + sawTableStyleFosteredText = true; + } + if ( + tableStyleProcessor.get_token_type() === "#tag" && + !tableStyleProcessor.is_tag_closer() && + tableStyleProcessor.get_tag() === "STYLE" + ) { + assert.equal(sawTableStyleFosteredText, true); + assert.deepEqual(tableStyleProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "STYLE"]); + assert.equal(tableStyleProcessor.get_modifiable_text(), "</script>"); + sawTableStyle = true; + } +} +assert.equal(sawTableStyle, true); +assert.equal(tableStyleProcessor.get_last_error(), null); +tableStyleProcessor.destroy(); + +const tableFormFosterProcessor = WP_HTML_Processor.create_full_parser( + "<table><form><input type=hidden><input></form><div></div></table>", +); +assert.equal(tableFormFosterProcessor.next_tag("input"), true); +assert.deepEqual(tableFormFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "INPUT"]); +assert.equal(tableFormFosterProcessor.next_tag("div"), true); +assert.deepEqual(tableFormFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV"]); +assert.equal(tableFormFosterProcessor.next_tag("table"), true); +assert.deepEqual(tableFormFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(tableFormFosterProcessor.next_tag("input"), true); +assert.equal(tableFormFosterProcessor.get_attribute("type"), "hidden"); +assert.deepEqual(tableFormFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "INPUT"]); +assert.equal(tableFormFosterProcessor.get_last_error(), null); +tableFormFosterProcessor.destroy(); + +const tableAnchorFosterProcessor = WP_HTML_Processor.create_full_parser( + "<!doctype html><div><table><a>foo</a> <tr><td>bar</td></tr></table></div>", +); +assert.equal(tableAnchorFosterProcessor.next_tag("a"), true); +assert.deepEqual(tableAnchorFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "A"]); +assert.equal(tableAnchorFosterProcessor.next_token(), true); +assert.equal(tableAnchorFosterProcessor.get_token_type(), "#text"); +assert.equal(tableAnchorFosterProcessor.get_modifiable_text(), "foo"); +assert.deepEqual(tableAnchorFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "A", "#text"]); +assert.equal(tableAnchorFosterProcessor.next_tag("table"), true); +assert.deepEqual(tableAnchorFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "TABLE"]); +assert.equal(tableAnchorFosterProcessor.get_last_error(), null); +tableAnchorFosterProcessor.destroy(); + +const tableColSelectProcessor = WP_HTML_Processor.create_full_parser("<kbd><table></kbd><col><select><tr>"); +assert.equal(tableColSelectProcessor.next_tag("select"), true); +assert.deepEqual(tableColSelectProcessor.get_breadcrumbs(), ["HTML", "BODY", "KBD", "SELECT"]); +assert.equal(tableColSelectProcessor.next_tag("table"), true); +assert.deepEqual(tableColSelectProcessor.get_breadcrumbs(), ["HTML", "BODY", "KBD", "TABLE"]); +assert.equal(tableColSelectProcessor.next_tag("col"), true); +assert.deepEqual(tableColSelectProcessor.get_breadcrumbs(), ["HTML", "BODY", "KBD", "TABLE", "COLGROUP", "COL"]); +assert.equal(tableColSelectProcessor.next_tag("tr"), true); +assert.deepEqual(tableColSelectProcessor.get_breadcrumbs(), ["HTML", "BODY", "KBD", "TABLE", "TBODY", "TR"]); +assert.equal(tableColSelectProcessor.get_last_error(), null); +tableColSelectProcessor.destroy(); + +const tableFragmentAnchorFosterProcessor = WP_HTML_Processor.create_fragment( + "<td><table><tbody><a><tr>", + "<tbody>", +); +assert.equal(tableFragmentAnchorFosterProcessor.next_tag("a"), true); +assert.deepEqual(tableFragmentAnchorFosterProcessor.get_breadcrumbs(), ["HTML", "TBODY", "TR", "TD", "A"]); +assert.equal(tableFragmentAnchorFosterProcessor.next_tag("table"), true); +assert.deepEqual(tableFragmentAnchorFosterProcessor.get_breadcrumbs(), ["HTML", "TBODY", "TR", "TD", "TABLE"]); +assert.equal(tableFragmentAnchorFosterProcessor.next_tag("tr"), true); +assert.deepEqual(tableFragmentAnchorFosterProcessor.get_breadcrumbs(), ["HTML", "TBODY", "TR", "TD", "TABLE", "TBODY", "TR"]); +assert.equal(tableFragmentAnchorFosterProcessor.get_last_error(), null); +tableFragmentAnchorFosterProcessor.destroy(); + +const nestedTableMetaProcessor = WP_HTML_Processor.create_full_parser( + "<!doctype html><table>X<tr><td><table> <meta></table></table>", +); +assert.equal(nestedTableMetaProcessor.next_tag("meta"), true); +assert.deepEqual(nestedTableMetaProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD", "META"]); +assert.equal(nestedTableMetaProcessor.next_tag("table"), true); +assert.deepEqual(nestedTableMetaProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD", "TABLE"]); +assert.equal(nestedTableMetaProcessor.next_token(), true); +assert.equal(nestedTableMetaProcessor.get_token_type(), "#text"); +assert.equal(nestedTableMetaProcessor.get_modifiable_text(), " "); +assert.deepEqual(nestedTableMetaProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD", "TABLE", "#text"]); +assert.equal(nestedTableMetaProcessor.get_last_error(), null); +nestedTableMetaProcessor.destroy(); + +const tableCellFosterTextProcessor = WP_HTML_Processor.create_full_parser("<table>A<td>B</td>C</table>"); +assert.equal(tableCellFosterTextProcessor.next_tag("body"), true); +assert.equal(tableCellFosterTextProcessor.next_token(), true); +assert.equal(tableCellFosterTextProcessor.get_token_type(), "#text"); +assert.equal(tableCellFosterTextProcessor.get_modifiable_text(), "A"); +assert.deepEqual(tableCellFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "#text"]); +assert.equal(tableCellFosterTextProcessor.next_token(), true); +assert.equal(tableCellFosterTextProcessor.get_token_type(), "#text"); +assert.equal(tableCellFosterTextProcessor.get_modifiable_text(), "C"); +assert.deepEqual(tableCellFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "#text"]); +assert.equal(tableCellFosterTextProcessor.next_tag("table"), true); +assert.deepEqual(tableCellFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(tableCellFosterTextProcessor.next_tag("td"), true); +assert.deepEqual(tableCellFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"]); +assert.equal(tableCellFosterTextProcessor.next_token(), true); +assert.equal(tableCellFosterTextProcessor.get_token_type(), "#text"); +assert.equal(tableCellFosterTextProcessor.get_modifiable_text(), "B"); +assert.deepEqual(tableCellFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD", "#text"]); +assert.equal(tableCellFosterTextProcessor.get_last_error(), null); +tableCellFosterTextProcessor.destroy(); + +const tableRowFosterTextProcessor = WP_HTML_Processor.create_full_parser("A<table><tr> B</tr> B</table>"); +assert.equal(tableRowFosterTextProcessor.next_tag("body"), true); +for (const text of ["A", " ", "B", " ", "B"]) { + assert.equal(tableRowFosterTextProcessor.next_token(), true); + assert.equal(tableRowFosterTextProcessor.get_token_type(), "#text"); + assert.equal(tableRowFosterTextProcessor.get_modifiable_text(), text); + assert.deepEqual(tableRowFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "#text"]); +} +assert.equal(tableRowFosterTextProcessor.next_tag("table"), true); +assert.deepEqual(tableRowFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(tableRowFosterTextProcessor.next_tag("tr"), true); +assert.deepEqual(tableRowFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR"]); +assert.equal(tableRowFosterTextProcessor.get_last_error(), null); +tableRowFosterTextProcessor.destroy(); + +const tableRowIgnoredEndFosterTextProcessor = WP_HTML_Processor.create_full_parser("A<table><tr> B</tr> </em>C</table>"); +assert.equal(tableRowIgnoredEndFosterTextProcessor.next_tag("body"), true); +for (const text of ["A", " ", "B", "C"]) { + assert.equal(tableRowIgnoredEndFosterTextProcessor.next_token(), true); + assert.equal(tableRowIgnoredEndFosterTextProcessor.get_token_type(), "#text"); + assert.equal(tableRowIgnoredEndFosterTextProcessor.get_modifiable_text(), text); + assert.deepEqual(tableRowIgnoredEndFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "#text"]); +} +assert.equal(tableRowIgnoredEndFosterTextProcessor.next_tag("table"), true); +assert.deepEqual(tableRowIgnoredEndFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(tableRowIgnoredEndFosterTextProcessor.next_tag("tr"), true); +assert.deepEqual(tableRowIgnoredEndFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR"]); +assert.equal(tableRowIgnoredEndFosterTextProcessor.next_token(), true); +assert.equal(tableRowIgnoredEndFosterTextProcessor.get_token_type(), "#text"); +assert.equal(tableRowIgnoredEndFosterTextProcessor.get_modifiable_text(), " "); +assert.deepEqual(tableRowIgnoredEndFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "#text"]); +assert.equal(tableRowIgnoredEndFosterTextProcessor.get_last_error(), null); +tableRowIgnoredEndFosterTextProcessor.destroy(); + +const tableCellEndFosterTextProcessor = WP_HTML_Processor.create_full_parser("<table><td></tbody>A"); +assert.equal(tableCellEndFosterTextProcessor.next_tag("body"), true); +assert.equal(tableCellEndFosterTextProcessor.next_token(), true); +assert.equal(tableCellEndFosterTextProcessor.get_token_type(), "#text"); +assert.equal(tableCellEndFosterTextProcessor.get_modifiable_text(), "A"); +assert.deepEqual(tableCellEndFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "#text"]); +assert.equal(tableCellEndFosterTextProcessor.next_tag("table"), true); +assert.deepEqual(tableCellEndFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(tableCellEndFosterTextProcessor.next_tag("td"), true); +assert.deepEqual(tableCellEndFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"]); +assert.equal(tableCellEndFosterTextProcessor.get_last_error(), null); +tableCellEndFosterTextProcessor.destroy(); + +const tableFormattingFosterTextProcessor = WP_HTML_Processor.create_full_parser("<table><b><tr><td>aaa</td></tr>bbb</table>ccc"); +assert.equal(tableFormattingFosterTextProcessor.next_tag("b"), true); +assert.deepEqual(tableFormattingFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "B"]); +assert.equal(tableFormattingFosterTextProcessor.next_tag("b"), true); +assert.equal(tableFormattingFosterTextProcessor.is_virtual(), true); +assert.deepEqual(tableFormattingFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "B"]); +assert.equal(tableFormattingFosterTextProcessor.next_token(), true); +assert.equal(tableFormattingFosterTextProcessor.get_token_type(), "#text"); +assert.equal(tableFormattingFosterTextProcessor.get_modifiable_text(), "bbb"); +assert.deepEqual(tableFormattingFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "B", "#text"]); +assert.equal(tableFormattingFosterTextProcessor.next_tag("table"), true); +assert.deepEqual(tableFormattingFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(tableFormattingFosterTextProcessor.next_tag("td"), true); +assert.deepEqual(tableFormattingFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"]); +assert.equal(tableFormattingFosterTextProcessor.next_tag("b"), true); +assert.equal(tableFormattingFosterTextProcessor.is_virtual(), true); +assert.deepEqual(tableFormattingFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "B"]); +assert.equal(tableFormattingFosterTextProcessor.next_token(), true); +assert.equal(tableFormattingFosterTextProcessor.get_token_type(), "#text"); +assert.equal(tableFormattingFosterTextProcessor.get_modifiable_text(), "ccc"); +assert.deepEqual(tableFormattingFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "B", "#text"]); +assert.equal(tableFormattingFosterTextProcessor.get_last_error(), null); +tableFormattingFosterTextProcessor.destroy(); + +const tableAnchorFosterTextProcessor = WP_HTML_Processor.create_full_parser("<table><a>1<td>2</td>3</table>"); +assert.equal(tableAnchorFosterTextProcessor.next_tag("a"), true); +assert.deepEqual(tableAnchorFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "A"]); +assert.equal(tableAnchorFosterTextProcessor.next_token(), true); +assert.equal(tableAnchorFosterTextProcessor.get_token_type(), "#text"); +assert.equal(tableAnchorFosterTextProcessor.get_modifiable_text(), "1"); +assert.deepEqual(tableAnchorFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "A", "#text"]); +assert.equal(tableAnchorFosterTextProcessor.next_tag("a"), true); +assert.equal(tableAnchorFosterTextProcessor.is_virtual(), true); +assert.deepEqual(tableAnchorFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "A"]); +assert.equal(tableAnchorFosterTextProcessor.next_token(), true); +assert.equal(tableAnchorFosterTextProcessor.get_token_type(), "#text"); +assert.equal(tableAnchorFosterTextProcessor.get_modifiable_text(), "3"); +assert.deepEqual(tableAnchorFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "A", "#text"]); +assert.equal(tableAnchorFosterTextProcessor.next_tag("table"), true); +assert.deepEqual(tableAnchorFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(tableAnchorFosterTextProcessor.next_tag("td"), true); +assert.deepEqual(tableAnchorFosterTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"]); +assert.equal(tableAnchorFosterTextProcessor.get_last_error(), null); +tableAnchorFosterTextProcessor.destroy(); + +const tableCenterFontFosterProcessor = WP_HTML_Processor.create_full_parser( + "<table><center> <font>a</center> <img> <tr><td> </td> </tr> </table>", +); +assert.equal(tableCenterFontFosterProcessor.next_tag("center"), true); +assert.deepEqual(tableCenterFontFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "CENTER"]); +assert.equal(tableCenterFontFosterProcessor.next_token(), true); +assert.equal(tableCenterFontFosterProcessor.get_token_type(), "#text"); +assert.equal(tableCenterFontFosterProcessor.get_modifiable_text(), " "); +assert.deepEqual(tableCenterFontFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "CENTER", "#text"]); +assert.equal(tableCenterFontFosterProcessor.next_tag("font"), true); +assert.deepEqual(tableCenterFontFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "CENTER", "FONT"]); +assert.equal(tableCenterFontFosterProcessor.next_token(), true); +assert.equal(tableCenterFontFosterProcessor.get_token_type(), "#text"); +assert.equal(tableCenterFontFosterProcessor.get_modifiable_text(), "a"); +assert.deepEqual(tableCenterFontFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "CENTER", "FONT", "#text"]); +assert.equal(tableCenterFontFosterProcessor.next_token(), true); +assert.equal(tableCenterFontFosterProcessor.get_token_type(), "#tag"); +assert.equal(tableCenterFontFosterProcessor.get_tag(), "FONT"); +assert.equal(tableCenterFontFosterProcessor.is_virtual(), true); +assert.deepEqual(tableCenterFontFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "CENTER"]); +assert.equal(tableCenterFontFosterProcessor.next_token(), true); +assert.equal(tableCenterFontFosterProcessor.get_token_type(), "#tag"); +assert.equal(tableCenterFontFosterProcessor.get_tag(), "CENTER"); +assert.deepEqual(tableCenterFontFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(tableCenterFontFosterProcessor.next_token(), true); +assert.equal(tableCenterFontFosterProcessor.get_token_type(), "#tag"); +assert.equal(tableCenterFontFosterProcessor.get_tag(), "FONT"); +assert.equal(tableCenterFontFosterProcessor.is_virtual(), true); +assert.deepEqual(tableCenterFontFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "FONT"]); +assert.equal(tableCenterFontFosterProcessor.next_token(), true); +assert.equal(tableCenterFontFosterProcessor.get_token_type(), "#tag"); +assert.equal(tableCenterFontFosterProcessor.get_tag(), "IMG"); +assert.deepEqual(tableCenterFontFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "FONT", "IMG"]); +assert.equal(tableCenterFontFosterProcessor.next_token(), true); +assert.equal(tableCenterFontFosterProcessor.get_token_type(), "#text"); +assert.equal(tableCenterFontFosterProcessor.get_modifiable_text(), " "); +assert.deepEqual(tableCenterFontFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "FONT", "#text"]); +assert.equal(tableCenterFontFosterProcessor.next_token(), true); +assert.equal(tableCenterFontFosterProcessor.get_token_type(), "#tag"); +assert.equal(tableCenterFontFosterProcessor.get_tag(), "FONT"); +assert.equal(tableCenterFontFosterProcessor.is_virtual(), true); +assert.deepEqual(tableCenterFontFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(tableCenterFontFosterProcessor.next_token(), true); +assert.equal(tableCenterFontFosterProcessor.get_token_type(), "#tag"); +assert.equal(tableCenterFontFosterProcessor.get_tag(), "TABLE"); +assert.deepEqual(tableCenterFontFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(tableCenterFontFosterProcessor.next_token(), true); +assert.equal(tableCenterFontFosterProcessor.get_token_type(), "#text"); +assert.equal(tableCenterFontFosterProcessor.get_modifiable_text(), " "); +assert.deepEqual(tableCenterFontFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "#text"]); +assert.equal(tableCenterFontFosterProcessor.next_tag("td"), true); +assert.deepEqual(tableCenterFontFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"]); +assert.equal(tableCenterFontFosterProcessor.next_token(), true); +assert.equal(tableCenterFontFosterProcessor.get_token_type(), "#text"); +assert.equal(tableCenterFontFosterProcessor.get_modifiable_text(), " "); +assert.deepEqual(tableCenterFontFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD", "#text"]); +assert.equal(tableCenterFontFosterProcessor.next_token(), true); +assert.equal(tableCenterFontFosterProcessor.get_token_type(), "#tag"); +assert.equal(tableCenterFontFosterProcessor.get_tag(), "TD"); +assert.equal(tableCenterFontFosterProcessor.next_token(), true); +assert.equal(tableCenterFontFosterProcessor.get_token_type(), "#text"); +assert.equal(tableCenterFontFosterProcessor.get_modifiable_text(), " "); +assert.deepEqual(tableCenterFontFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "#text"]); +assert.equal(tableCenterFontFosterProcessor.get_last_error(), null); +tableCenterFontFosterProcessor.destroy(); + +const colgroupTextProcessor = WP_HTML_Processor.create_fragment("<table><colgroup> foo</colgroup></table>"); +assert.equal(colgroupTextProcessor.next_token(), true); +assert.equal(colgroupTextProcessor.get_token_type(), "#text"); +assert.equal(colgroupTextProcessor.get_modifiable_text(), "foo"); +assert.deepEqual(colgroupTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "#text"]); +assert.equal(colgroupTextProcessor.next_tag("colgroup"), true); +assert.deepEqual(colgroupTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "COLGROUP"]); +assert.equal(colgroupTextProcessor.next_token(), true); +assert.equal(colgroupTextProcessor.get_token_type(), "#text"); +assert.equal(colgroupTextProcessor.get_modifiable_text(), " "); +assert.deepEqual(colgroupTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "COLGROUP", "#text"]); +assert.equal(colgroupTextProcessor.get_last_error(), null); +colgroupTextProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<table><colgroup> foo</colgroup></table>"), + "foo<table><colgroup> </colgroup></table>", +); + +const tableWhitespaceProcessor = WP_HTML_Processor.create_fragment("<table> \n <tr><td>cell"); +assert.equal(tableWhitespaceProcessor.next_token(), true); +assert.equal(tableWhitespaceProcessor.get_tag(), "TABLE"); +assert.equal(tableWhitespaceProcessor.next_token(), true); +assert.equal(tableWhitespaceProcessor.get_token_name(), "#text"); +assert.deepEqual(tableWhitespaceProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "#text"]); +assert.equal(tableWhitespaceProcessor.next_tag("td"), true); +assert.equal(tableWhitespaceProcessor.get_last_error(), null); +tableWhitespaceProcessor.destroy(); + +const tableNullTextProcessor = WP_HTML_Processor.create_fragment("<table>\0<tr><td>cell"); +assert.equal(tableNullTextProcessor.next_tag("tr"), true); +assert.deepEqual(tableNullTextProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR"]); +assert.equal(tableNullTextProcessor.get_last_error(), null); +tableNullTextProcessor.destroy(); + +const tableDoctypeProcessor = WP_HTML_Processor.create_fragment("<table><!doctype html><tr><td>cell"); +assert.equal(tableDoctypeProcessor.next_tag("td"), true); +assert.deepEqual(tableDoctypeProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"]); +tableDoctypeProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<table><!doctype html><tr><td>cell"), + "<table><tbody><tr><td>cell</td></tr></tbody></table>", +); + +assert.equal( + WP_HTML_Processor.normalize("<table><div><tr><td>cell"), + "<div></div><table><tbody><tr><td>cell</td></tr></tbody></table>", +); + +const tableInputFosterProcessor = WP_HTML_Processor.create_fragment("<table><input><tr><td>cell"); +assert.equal(tableInputFosterProcessor.next_tag("input"), true); +assert.deepEqual(tableInputFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "INPUT"]); +assert.equal(tableInputFosterProcessor.next_tag("td"), true); +assert.deepEqual(tableInputFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"]); +assert.equal(tableInputFosterProcessor.get_last_error(), null); +tableInputFosterProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<table><input><tr><td>cell"), + "<input><table><tbody><tr><td>cell</td></tr></tbody></table>", +); + +assert.equal( + WP_HTML_Processor.normalize("<table><tbody><div><tr><td>cell"), + "<div></div><table><tbody><tr><td>cell</td></tr></tbody></table>", +); +assert.equal( + WP_HTML_Processor.normalize("<table><tr><div><td>cell"), + "<div></div><table><tbody><tr><td>cell</td></tr></tbody></table>", +); + +const colgroupForeignFosterProcessor = WP_HTML_Processor.create_fragment("<table><colgroup><svg><g>cell</g>"); +assert.equal(colgroupForeignFosterProcessor.next_tag("svg"), true); +assert.equal(colgroupForeignFosterProcessor.get_namespace(), "svg"); +assert.deepEqual(colgroupForeignFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "SVG"]); +assert.equal(colgroupForeignFosterProcessor.next_tag("table"), true); +assert.deepEqual(colgroupForeignFosterProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(colgroupForeignFosterProcessor.get_last_error(), null); +colgroupForeignFosterProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<table><colgroup><svg><g>cell</g>"), + "<svg><g>cell</g></svg><table><colgroup></colgroup></table>", +); + +const tableForeignCellFosterParentingProcessor = WP_HTML_Processor.create_full_parser( + "<body><table><tr><td><svg><td><foreignObject><span></td>Foo", +); +assert.equal(tableForeignCellFosterParentingProcessor.next_tag("body"), true); +assert.equal(tableForeignCellFosterParentingProcessor.next_token(), true); +assert.equal(tableForeignCellFosterParentingProcessor.get_token_type(), "#text"); +assert.equal(tableForeignCellFosterParentingProcessor.get_modifiable_text(), "Foo"); +assert.deepEqual(tableForeignCellFosterParentingProcessor.get_breadcrumbs(), ["HTML", "BODY", "#text"]); +assert.equal(tableForeignCellFosterParentingProcessor.next_tag("table"), true); +assert.deepEqual(tableForeignCellFosterParentingProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(tableForeignCellFosterParentingProcessor.next_tag("svg"), true); +assert.equal(tableForeignCellFosterParentingProcessor.get_namespace(), "svg"); +assert.deepEqual(tableForeignCellFosterParentingProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD", "SVG"]); +assert.equal(tableForeignCellFosterParentingProcessor.next_tag("td"), true); +assert.equal(tableForeignCellFosterParentingProcessor.get_namespace(), "svg"); +assert.deepEqual(tableForeignCellFosterParentingProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD", "SVG", "TD"]); +assert.equal(tableForeignCellFosterParentingProcessor.next_tag("foreignobject"), true); +assert.equal(tableForeignCellFosterParentingProcessor.get_namespace(), "svg"); +assert.equal(tableForeignCellFosterParentingProcessor.get_qualified_tag_name(), "foreignObject"); +assert.deepEqual(tableForeignCellFosterParentingProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD", "SVG", "TD", "FOREIGNOBJECT"]); +assert.equal(tableForeignCellFosterParentingProcessor.next_tag("span"), true); +assert.equal(tableForeignCellFosterParentingProcessor.get_namespace(), "html"); +assert.deepEqual(tableForeignCellFosterParentingProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD", "SVG", "TD", "FOREIGNOBJECT", "SPAN"]); +assert.equal(tableForeignCellFosterParentingProcessor.get_last_error(), null); +tableForeignCellFosterParentingProcessor.destroy(); + +const tableHiddenInputProcessor = WP_HTML_Processor.create_fragment("<table><input type=hidden><tr><td>cell"); +assert.equal(tableHiddenInputProcessor.next_tag("input"), true); +assert.equal(tableHiddenInputProcessor.get_attribute("type"), "hidden"); +assert.deepEqual(tableHiddenInputProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "INPUT"]); +assert.equal(tableHiddenInputProcessor.next_tag("td"), true); +assert.equal(tableHiddenInputProcessor.get_last_error(), null); +tableHiddenInputProcessor.destroy(); + +const tableFragmentStandaloneAnchorProcessor = WP_HTML_Processor.create_fragment("<a>", "<table>"); +assert.equal(tableFragmentStandaloneAnchorProcessor.serialize(), "<a></a>"); +assert.equal(tableFragmentStandaloneAnchorProcessor.get_last_error(), null); +tableFragmentStandaloneAnchorProcessor.destroy(); + +for (const html of ["<caption><a>", "<col><a>", "<tbody><a>", "<tfoot><a>", "<thead><a>", "</table><a>"]) { + const tableSectionFragmentStandaloneAnchorProcessor = WP_HTML_Processor.create_fragment(html, "<tbody>"); + assert.equal(tableSectionFragmentStandaloneAnchorProcessor.serialize(), "<a></a>"); + assert.equal(tableSectionFragmentStandaloneAnchorProcessor.get_last_error(), null); + tableSectionFragmentStandaloneAnchorProcessor.destroy(); +} + +const tableFormProcessor = WP_HTML_Processor.create_fragment("<table><form><!--comment-->"); +assert.equal(tableFormProcessor.next_tag("form"), true); +assert.equal(tableFormProcessor.get_tag(), "FORM"); +assert.equal(tableFormProcessor.is_virtual(), false); +assert.deepEqual(tableFormProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "FORM"]); +assert.equal(tableFormProcessor.next_token(), true); +assert.equal(tableFormProcessor.get_token_name(), "FORM"); +assert.equal(tableFormProcessor.get_token_type(), "#tag"); +assert.equal(tableFormProcessor.is_virtual(), true); +assert.equal(tableFormProcessor.is_tag_closer(), true); +assert.equal(tableFormProcessor.get_attribute_names_with_prefix(""), null); +assert.equal(tableFormProcessor.set_attribute("id", "ignored"), false); +assert.equal(tableFormProcessor.set_bookmark("virtual-form"), false); +assert.equal(tableFormProcessor.get_comment_type(), null); +assert.deepEqual(tableFormProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(tableFormProcessor.next_token(), true); +assert.equal(tableFormProcessor.get_token_name(), "#comment"); +assert.deepEqual(tableFormProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "#comment"]); +tableFormProcessor.destroy(); + +const tableCellProcessor = WP_HTML_Processor.create_fragment("<table><td>cell"); +assert.equal(tableCellProcessor.next_token(), true); +assert.equal(tableCellProcessor.get_tag(), "TABLE"); +assert.equal(tableCellProcessor.is_virtual(), false); +assert.deepEqual(tableCellProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(tableCellProcessor.next_token(), true); +assert.equal(tableCellProcessor.get_tag(), "TBODY"); +assert.equal(tableCellProcessor.is_virtual(), true); +assert.equal(tableCellProcessor.is_tag_closer(), false); +assert.equal(tableCellProcessor.set_bookmark("tbody"), false); +assert.deepEqual(tableCellProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY"]); +assert.equal(tableCellProcessor.next_token(), true); +assert.equal(tableCellProcessor.get_tag(), "TR"); +assert.equal(tableCellProcessor.is_virtual(), true); +assert.equal(tableCellProcessor.is_tag_closer(), false); +assert.deepEqual(tableCellProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR"]); +assert.equal(tableCellProcessor.next_token(), true); +assert.equal(tableCellProcessor.get_tag(), "TD"); +assert.equal(tableCellProcessor.is_virtual(), false); +assert.deepEqual(tableCellProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"]); +tableCellProcessor.destroy(); + +const tableRowUnmatchedFormattingCloserProcessor = WP_HTML_Processor.create_fragment("<table><tr></strong><td>cell"); +assert.equal(tableRowUnmatchedFormattingCloserProcessor.next_tag("td"), true); +assert.deepEqual( + tableRowUnmatchedFormattingCloserProcessor.get_breadcrumbs(), + ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"], +); +tableRowUnmatchedFormattingCloserProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<table><tr></strong><td>cell"), + "<table><tbody><tr><td>cell</td></tr></tbody></table>", +); + +const tableRowUnmatchedEndTagProcessor = WP_HTML_Processor.create_fragment("<table><tr></blink><td>cell"); +assert.equal(tableRowUnmatchedEndTagProcessor.next_tag("td"), true); +assert.deepEqual( + tableRowUnmatchedEndTagProcessor.get_breadcrumbs(), + ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"], +); +tableRowUnmatchedEndTagProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<table><tr></blink><td>cell"), + "<table><tbody><tr><td>cell</td></tr></tbody></table>", +); + +const ignoredTableEndTagsProcessor = WP_HTML_Processor.create_fragment( + "<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td>", +); +assert.equal(ignoredTableEndTagsProcessor.next_tag("td"), true); +assert.deepEqual(ignoredTableEndTagsProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"]); +ignoredTableEndTagsProcessor.destroy(); + +const ignoredTableCellEndTagsProcessor = WP_HTML_Processor.create_fragment( + "<table><td></body></caption></col></colgroup></html>foo", +); +assert.equal(ignoredTableCellEndTagsProcessor.next_tag("td"), true); +assert.equal(ignoredTableCellEndTagsProcessor.next_token(), true); +assert.equal(ignoredTableCellEndTagsProcessor.get_token_name(), "#text"); +assert.equal(ignoredTableCellEndTagsProcessor.get_modifiable_text(), "foo"); +assert.deepEqual(ignoredTableCellEndTagsProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD", "#text"]); +ignoredTableCellEndTagsProcessor.destroy(); + +const adjacentTableCellProcessor = WP_HTML_Processor.create_fragment("<table><td>a<td>b"); +assert.equal(adjacentTableCellProcessor.next_tag("td"), true); +assert.deepEqual(adjacentTableCellProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"]); +assert.equal(adjacentTableCellProcessor.next_token(), true); +assert.equal(adjacentTableCellProcessor.get_token_name(), "#text"); +assert.equal(adjacentTableCellProcessor.next_token(), true); +assert.equal(adjacentTableCellProcessor.get_tag(), "TD"); +assert.equal(adjacentTableCellProcessor.is_virtual(), true); +assert.equal(adjacentTableCellProcessor.is_tag_closer(), true); +assert.deepEqual(adjacentTableCellProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR"]); +assert.equal(adjacentTableCellProcessor.next_token(), true); +assert.equal(adjacentTableCellProcessor.get_tag(), "TD"); +assert.equal(adjacentTableCellProcessor.is_virtual(), false); +assert.equal(adjacentTableCellProcessor.is_tag_closer(), false); +assert.deepEqual(adjacentTableCellProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"]); +adjacentTableCellProcessor.destroy(); + +const adjacentTableRowProcessor = WP_HTML_Processor.create_fragment("<table><tr><td>a<tr><td>b"); +assert.equal(adjacentTableRowProcessor.next_tag("td"), true); +assert.deepEqual(adjacentTableRowProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"]); +assert.equal(adjacentTableRowProcessor.next_token(), true); +assert.equal(adjacentTableRowProcessor.get_token_name(), "#text"); +assert.equal(adjacentTableRowProcessor.next_token(), true); +assert.equal(adjacentTableRowProcessor.get_tag(), "TD"); +assert.equal(adjacentTableRowProcessor.is_virtual(), true); +assert.equal(adjacentTableRowProcessor.is_tag_closer(), true); +assert.deepEqual(adjacentTableRowProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR"]); +assert.equal(adjacentTableRowProcessor.next_token(), true); +assert.equal(adjacentTableRowProcessor.get_tag(), "TR"); +assert.equal(adjacentTableRowProcessor.is_virtual(), true); +assert.equal(adjacentTableRowProcessor.is_tag_closer(), true); +assert.deepEqual(adjacentTableRowProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY"]); +assert.equal(adjacentTableRowProcessor.next_token(), true); +assert.equal(adjacentTableRowProcessor.get_tag(), "TR"); +assert.equal(adjacentTableRowProcessor.is_virtual(), false); +assert.equal(adjacentTableRowProcessor.is_tag_closer(), false); +assert.deepEqual(adjacentTableRowProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR"]); +adjacentTableRowProcessor.destroy(); + +const adjacentTableSectionProcessor = WP_HTML_Processor.create_fragment("<table><tbody><tr><td>a<tbody><tr><td>b"); +assert.equal(adjacentTableSectionProcessor.next_tag("td"), true); +assert.deepEqual(adjacentTableSectionProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"]); +assert.equal(adjacentTableSectionProcessor.next_token(), true); +assert.equal(adjacentTableSectionProcessor.get_token_name(), "#text"); +assert.equal(adjacentTableSectionProcessor.next_token(), true); +assert.equal(adjacentTableSectionProcessor.get_tag(), "TD"); +assert.equal(adjacentTableSectionProcessor.is_virtual(), true); +assert.equal(adjacentTableSectionProcessor.is_tag_closer(), true); +assert.equal(adjacentTableSectionProcessor.next_token(), true); +assert.equal(adjacentTableSectionProcessor.get_tag(), "TR"); +assert.equal(adjacentTableSectionProcessor.is_virtual(), true); +assert.equal(adjacentTableSectionProcessor.is_tag_closer(), true); +assert.equal(adjacentTableSectionProcessor.next_token(), true); +assert.equal(adjacentTableSectionProcessor.get_tag(), "TBODY"); +assert.equal(adjacentTableSectionProcessor.is_virtual(), true); +assert.equal(adjacentTableSectionProcessor.is_tag_closer(), true); +assert.deepEqual(adjacentTableSectionProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(adjacentTableSectionProcessor.next_token(), true); +assert.equal(adjacentTableSectionProcessor.get_tag(), "TBODY"); +assert.equal(adjacentTableSectionProcessor.is_virtual(), false); +assert.equal(adjacentTableSectionProcessor.is_tag_closer(), false); +assert.deepEqual(adjacentTableSectionProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY"]); +adjacentTableSectionProcessor.destroy(); + +const tableEndTagProcessor = WP_HTML_Processor.create_fragment("<table><tbody><tr><td>a</table><p>b"); +assert.equal(tableEndTagProcessor.next_tag("td"), true); +assert.deepEqual(tableEndTagProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD"]); +assert.equal(tableEndTagProcessor.next_token(), true); +assert.equal(tableEndTagProcessor.get_token_name(), "#text"); +assert.equal(tableEndTagProcessor.next_token(), true); +assert.equal(tableEndTagProcessor.get_tag(), "TD"); +assert.equal(tableEndTagProcessor.is_virtual(), true); +assert.equal(tableEndTagProcessor.is_tag_closer(), true); +assert.equal(tableEndTagProcessor.next_token(), true); +assert.equal(tableEndTagProcessor.get_tag(), "TR"); +assert.equal(tableEndTagProcessor.is_virtual(), true); +assert.equal(tableEndTagProcessor.is_tag_closer(), true); +assert.equal(tableEndTagProcessor.next_token(), true); +assert.equal(tableEndTagProcessor.get_tag(), "TBODY"); +assert.equal(tableEndTagProcessor.is_virtual(), true); +assert.equal(tableEndTagProcessor.is_tag_closer(), true); +assert.equal(tableEndTagProcessor.next_token(), true); +assert.equal(tableEndTagProcessor.get_tag(), "TABLE"); +assert.equal(tableEndTagProcessor.is_virtual(), false); +assert.equal(tableEndTagProcessor.is_tag_closer(), true); +assert.deepEqual(tableEndTagProcessor.get_breadcrumbs(), ["HTML", "BODY"]); +assert.equal(tableEndTagProcessor.next_tag("p"), true); +assert.deepEqual(tableEndTagProcessor.get_breadcrumbs(), ["HTML", "BODY", "P"]); +tableEndTagProcessor.destroy(); + +const nestedTableStartProcessor = WP_HTML_Processor.create_fragment("<table><tbody><table><tr><td>cell"); +assert.equal(nestedTableStartProcessor.next_token(), true); +assert.equal(nestedTableStartProcessor.get_tag(), "TABLE"); +assert.equal(nestedTableStartProcessor.is_tag_closer(), false); +assert.deepEqual(nestedTableStartProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(nestedTableStartProcessor.next_token(), true); +assert.equal(nestedTableStartProcessor.get_tag(), "TBODY"); +assert.equal(nestedTableStartProcessor.is_tag_closer(), false); +assert.deepEqual(nestedTableStartProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY"]); +assert.equal(nestedTableStartProcessor.next_token(), true); +assert.equal(nestedTableStartProcessor.get_tag(), "TBODY"); +assert.equal(nestedTableStartProcessor.is_virtual(), true); +assert.equal(nestedTableStartProcessor.is_tag_closer(), true); +assert.deepEqual(nestedTableStartProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +assert.equal(nestedTableStartProcessor.next_token(), true); +assert.equal(nestedTableStartProcessor.get_tag(), "TABLE"); +assert.equal(nestedTableStartProcessor.is_virtual(), true); +assert.equal(nestedTableStartProcessor.is_tag_closer(), true); +assert.deepEqual(nestedTableStartProcessor.get_breadcrumbs(), ["HTML", "BODY"]); +assert.equal(nestedTableStartProcessor.next_token(), true); +assert.equal(nestedTableStartProcessor.get_tag(), "TABLE"); +assert.equal(nestedTableStartProcessor.is_virtual(), false); +assert.equal(nestedTableStartProcessor.is_tag_closer(), false); +assert.deepEqual(nestedTableStartProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE"]); +nestedTableStartProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<table><tbody><table><tr><td>cell"), + "<table><tbody></tbody></table><table><tbody><tr><td>cell</td></tr></tbody></table>", +); +assert.equal( + WP_HTML_Processor.normalize("<table><tr><table><tr><td>cell"), + "<table><tbody><tr></tr></tbody></table><table><tbody><tr><td>cell</td></tr></tbody></table>", +); +assert.equal( + WP_HTML_Processor.normalize("<table><td><table><td>cell"), + "<table><tbody><tr><td><table><tbody><tr><td>cell</td></tr></tbody></table></td></tr></tbody></table>", +); + +const unexpectedCloserProcessor = WP_HTML_Processor.create_fragment("<div>Test</button></div>"); +assert.equal(unexpectedCloserProcessor.next_token(), true); +assert.equal(unexpectedCloserProcessor.get_tag(), "DIV"); +assert.equal(unexpectedCloserProcessor.is_tag_closer(), false); +assert.equal(unexpectedCloserProcessor.next_token(), true); +assert.equal(unexpectedCloserProcessor.get_token_type(), "#text"); +assert.equal(unexpectedCloserProcessor.next_token(), true); +assert.equal(unexpectedCloserProcessor.get_tag(), "DIV"); +assert.equal(unexpectedCloserProcessor.is_tag_closer(), true); +unexpectedCloserProcessor.destroy(); + +const eofCloserProcessor = WP_HTML_Processor.create_fragment("<div><p><span>"); +assert.equal(eofCloserProcessor.next_tag("span"), true); +assert.deepEqual(eofCloserProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "P", "SPAN"]); +assert.equal(eofCloserProcessor.next_token(), true); +assert.equal(eofCloserProcessor.get_tag(), "SPAN"); +assert.equal(eofCloserProcessor.is_virtual(), true); +assert.equal(eofCloserProcessor.is_tag_closer(), true); +assert.deepEqual(eofCloserProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "P"]); +assert.equal(eofCloserProcessor.next_token(), true); +assert.equal(eofCloserProcessor.get_tag(), "P"); +assert.equal(eofCloserProcessor.is_virtual(), true); +assert.equal(eofCloserProcessor.is_tag_closer(), true); +assert.deepEqual(eofCloserProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV"]); +assert.equal(eofCloserProcessor.next_token(), true); +assert.equal(eofCloserProcessor.get_tag(), "DIV"); +assert.equal(eofCloserProcessor.is_virtual(), true); +assert.equal(eofCloserProcessor.is_tag_closer(), true); +assert.deepEqual(eofCloserProcessor.get_breadcrumbs(), ["HTML", "BODY"]); +assert.equal(eofCloserProcessor.next_token(), false); +assert.equal(eofCloserProcessor.get_tag(), null); +eofCloserProcessor.destroy(); + +const specialEndTagProcessor = WP_HTML_Processor.create_fragment("<div><span><p></span><div target>"); +assert.equal(specialEndTagProcessor.next_tag("p"), true); +assert.deepEqual(specialEndTagProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "SPAN", "P"]); +assert.equal(specialEndTagProcessor.next_tag("div"), true); +assert.deepEqual(specialEndTagProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "SPAN", "DIV"]); +assert.equal(specialEndTagProcessor.get_attribute("target"), true); +specialEndTagProcessor.destroy(); + +const nonSpecialEndTagProcessor = WP_HTML_Processor.create_fragment("<div><span><code></span><div target>"); +assert.equal(nonSpecialEndTagProcessor.next_tag("code"), true); +assert.deepEqual(nonSpecialEndTagProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "SPAN", "CODE"]); +assert.equal(nonSpecialEndTagProcessor.next_tag({ tag_name: "span", tag_closers: "visit" }), true); +assert.equal(nonSpecialEndTagProcessor.is_tag_closer(), true); +assert.deepEqual(nonSpecialEndTagProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV"]); +assert.equal(nonSpecialEndTagProcessor.next_tag("div"), true); +assert.deepEqual(nonSpecialEndTagProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "DIV"]); +assert.equal(nonSpecialEndTagProcessor.get_attribute("target"), true); +nonSpecialEndTagProcessor.destroy(); + +const modeledScopedEndTagProcessor = WP_HTML_Processor.create_fragment("<div><p></div><span target>"); +assert.equal(modeledScopedEndTagProcessor.next_tag("p"), true); +assert.deepEqual(modeledScopedEndTagProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV", "P"]); +assert.equal(modeledScopedEndTagProcessor.next_tag({ tag_name: "div", tag_closers: "visit" }), true); +assert.equal(modeledScopedEndTagProcessor.is_tag_closer(), true); +assert.deepEqual(modeledScopedEndTagProcessor.get_breadcrumbs(), ["HTML", "BODY"]); +assert.equal(modeledScopedEndTagProcessor.next_tag("span"), true); +assert.deepEqual(modeledScopedEndTagProcessor.get_breadcrumbs(), ["HTML", "BODY", "SPAN"]); +assert.equal(modeledScopedEndTagProcessor.get_attribute("target"), true); +modeledScopedEndTagProcessor.destroy(); + +const svgProcessor = WP_HTML_Processor.create_fragment("<svg><image /><rect></rect></svg><p>"); +assert.equal(svgProcessor.next_tag("image"), true); +assert.equal(svgProcessor.get_namespace(), "svg"); +assert.equal(svgProcessor.expects_closer(), false); +assert.deepEqual(svgProcessor.get_breadcrumbs(), ["HTML", "BODY", "SVG", "IMAGE"]); +assert.equal(svgProcessor.next_tag("rect"), true); +assert.equal(svgProcessor.get_namespace(), "svg"); +assert.equal(svgProcessor.expects_closer(), true); +assert.deepEqual(svgProcessor.get_breadcrumbs(), ["HTML", "BODY", "SVG", "RECT"]); +assert.equal(svgProcessor.next_tag("p"), true); +assert.equal(svgProcessor.get_namespace(), "html"); +assert.deepEqual(svgProcessor.get_breadcrumbs(), ["HTML", "BODY", "P"]); +svgProcessor.destroy(); + +const foreignHtmlBreakoutProcessor = WP_HTML_Processor.create_fragment("<svg><img>text"); +assert.equal(foreignHtmlBreakoutProcessor.next_token(), true); +assert.equal(foreignHtmlBreakoutProcessor.get_tag(), "SVG"); +assert.equal(foreignHtmlBreakoutProcessor.get_namespace(), "svg"); +assert.equal(foreignHtmlBreakoutProcessor.next_token(), true); +assert.equal(foreignHtmlBreakoutProcessor.get_tag(), "SVG"); +assert.equal(foreignHtmlBreakoutProcessor.is_virtual(), true); +assert.equal(foreignHtmlBreakoutProcessor.is_tag_closer(), true); +assert.deepEqual(foreignHtmlBreakoutProcessor.get_breadcrumbs(), ["HTML", "BODY"]); +assert.equal(foreignHtmlBreakoutProcessor.next_token(), true); +assert.equal(foreignHtmlBreakoutProcessor.get_tag(), "IMG"); +assert.equal(foreignHtmlBreakoutProcessor.get_namespace(), "html"); +assert.equal(foreignHtmlBreakoutProcessor.expects_closer(), false); +assert.deepEqual(foreignHtmlBreakoutProcessor.get_breadcrumbs(), ["HTML", "BODY", "IMG"]); +foreignHtmlBreakoutProcessor.destroy(); +assert.equal(WP_HTML_Processor.normalize("<svg><img>text"), "<svg></svg><img>text"); +assert.equal(WP_HTML_Processor.normalize("<svg><span>text"), "<svg></svg><span>text</span>"); +assert.equal( + WP_HTML_Processor.normalize("<svg><foreignObject><svg><img>text"), + "<svg><foreignObject><svg></svg><img>text</foreignObject></svg>", +); +assert.equal(WP_HTML_Processor.normalize("<svg><font color=red>text"), '<svg></svg><font color="red">text</font>'); +assert.equal(WP_HTML_Processor.normalize("<svg><font>text"), "<svg><font>text</font></svg>"); + +const foreignEndTagBreakoutProcessor = WP_HTML_Processor.create_fragment("<svg></p><span>x"); +assert.equal(foreignEndTagBreakoutProcessor.next_token(), true); +assert.equal(foreignEndTagBreakoutProcessor.get_tag(), "SVG"); +assert.equal(foreignEndTagBreakoutProcessor.get_namespace(), "svg"); +assert.equal(foreignEndTagBreakoutProcessor.next_token(), true); +assert.equal(foreignEndTagBreakoutProcessor.get_tag(), "SVG"); +assert.equal(foreignEndTagBreakoutProcessor.is_virtual(), true); +assert.equal(foreignEndTagBreakoutProcessor.is_tag_closer(), true); +assert.deepEqual(foreignEndTagBreakoutProcessor.get_breadcrumbs(), ["HTML", "BODY"]); +assert.equal(foreignEndTagBreakoutProcessor.next_token(), true); +assert.equal(foreignEndTagBreakoutProcessor.get_tag(), "P"); +assert.equal(foreignEndTagBreakoutProcessor.is_virtual(), true); +assert.equal(foreignEndTagBreakoutProcessor.is_tag_closer(), false); +assert.deepEqual(foreignEndTagBreakoutProcessor.get_breadcrumbs(), ["HTML", "BODY", "P"]); +assert.equal(foreignEndTagBreakoutProcessor.next_token(), true); +assert.equal(foreignEndTagBreakoutProcessor.get_tag(), "P"); +assert.equal(foreignEndTagBreakoutProcessor.is_virtual(), true); +assert.equal(foreignEndTagBreakoutProcessor.is_tag_closer(), true); +assert.deepEqual(foreignEndTagBreakoutProcessor.get_breadcrumbs(), ["HTML", "BODY"]); +foreignEndTagBreakoutProcessor.destroy(); +assert.equal(WP_HTML_Processor.normalize("<svg></p><span>x"), "<svg></svg><p></p><span>x</span>"); +assert.equal(WP_HTML_Processor.normalize("<svg><g></p><span>x"), "<svg><g></g></svg><p></p><span>x</span>"); + +const qualifiedSvgProcessor = WP_HTML_Processor.create_fragment('<svg /><svg><lineargradient gradientunits="userSpaceOnUse"></lineargradient></svg>'); +assert.equal(qualifiedSvgProcessor.next_tag("svg"), true); +assert.equal(qualifiedSvgProcessor.get_namespace(), "svg"); +assert.equal(qualifiedSvgProcessor.get_qualified_tag_name(), "svg"); +assert.equal(qualifiedSvgProcessor.serialize_token(), "<svg />"); +assert.equal(qualifiedSvgProcessor.next_tag("lineargradient"), true); +assert.equal(qualifiedSvgProcessor.get_namespace(), "svg"); +assert.equal(qualifiedSvgProcessor.get_qualified_tag_name(), "linearGradient"); +assert.equal(qualifiedSvgProcessor.get_qualified_attribute_name("gradientunits"), "gradientUnits"); +qualifiedSvgProcessor.destroy(); + +const qualifiedForeignAttributeProcessor = WP_HTML_Processor.create_fragment( + '<svg><use xlink:href="#icon" xml:lang="en" xmlns:xlink="http://www.w3.org/1999/xlink" custom:attr="v"></use></svg>', +); +assert.equal(qualifiedForeignAttributeProcessor.next_tag("use"), true); +assert.equal(qualifiedForeignAttributeProcessor.get_namespace(), "svg"); +assert.equal(qualifiedForeignAttributeProcessor.get_qualified_attribute_name("xlink:href"), "xlink href"); +assert.equal(qualifiedForeignAttributeProcessor.get_qualified_attribute_name("xml:lang"), "xml lang"); +assert.equal(qualifiedForeignAttributeProcessor.get_qualified_attribute_name("xmlns:xlink"), "xmlns xlink"); +assert.equal(qualifiedForeignAttributeProcessor.get_qualified_attribute_name("custom:attr"), "custom:attr"); +assert.equal(qualifiedForeignAttributeProcessor.get_qualified_attribute_name("custom:Attr"), "custom:Attr"); +qualifiedForeignAttributeProcessor.destroy(); + +const foreignObjectProcessor = WP_HTML_Processor.create_fragment("<svg><foreignObject><div></div></foreignObject></svg>"); +assert.equal(foreignObjectProcessor.next_tag("div"), true); +assert.equal(foreignObjectProcessor.get_namespace(), "html"); +assert.deepEqual(foreignObjectProcessor.get_breadcrumbs(), ["HTML", "BODY", "SVG", "FOREIGNOBJECT", "DIV"]); +foreignObjectProcessor.destroy(); + +const mathProcessor = WP_HTML_Processor.create_fragment("<mo><image /></mo><math><image /><mo><image /></mo></math>"); +assert.equal(mathProcessor.next_token(), true); +assert.equal(mathProcessor.get_tag(), "MO"); +assert.equal(mathProcessor.get_namespace(), "html"); +assert.equal(mathProcessor.next_token(), true); +assert.equal(mathProcessor.get_tag(), "IMG"); +assert.equal(mathProcessor.get_namespace(), "html"); +assert.equal(mathProcessor.next_token(), true); +assert.equal(mathProcessor.is_tag_closer(), true); +assert.equal(mathProcessor.next_token(), true); +assert.equal(mathProcessor.get_tag(), "MATH"); +assert.equal(mathProcessor.get_namespace(), "math"); +assert.equal(mathProcessor.next_token(), true); +assert.equal(mathProcessor.get_tag(), "IMAGE"); +assert.equal(mathProcessor.get_namespace(), "math"); +assert.equal(mathProcessor.get_qualified_tag_name(), "image"); +assert.equal(mathProcessor.next_token(), true); +assert.equal(mathProcessor.get_tag(), "MO"); +assert.equal(mathProcessor.get_namespace(), "math"); +assert.equal(mathProcessor.next_token(), true); +assert.equal(mathProcessor.get_tag(), "IMG"); +assert.equal(mathProcessor.get_namespace(), "html"); +mathProcessor.destroy(); + +const mathQualifiedProcessor = WP_HTML_Processor.create_fragment('<math><mi definitionurl="x"></mi></math>'); +assert.equal(mathQualifiedProcessor.next_tag("mi"), true); +assert.equal(mathQualifiedProcessor.get_namespace(), "math"); +assert.equal(mathQualifiedProcessor.get_qualified_attribute_name("definitionurl"), "definitionURL"); +mathQualifiedProcessor.destroy(); + +const mathQualifiedForeignAttributeProcessor = WP_HTML_Processor.create_fragment( + '<math><mi definitionurl="x" xlink:show="new" viewbox="raw"></mi></math>', +); +assert.equal(mathQualifiedForeignAttributeProcessor.next_tag("mi"), true); +assert.equal(mathQualifiedForeignAttributeProcessor.get_namespace(), "math"); +assert.equal(mathQualifiedForeignAttributeProcessor.get_qualified_attribute_name("definitionurl"), "definitionURL"); +assert.equal(mathQualifiedForeignAttributeProcessor.get_qualified_attribute_name("xlink:show"), "xlink show"); +assert.equal(mathQualifiedForeignAttributeProcessor.get_qualified_attribute_name("viewbox"), "viewbox"); +assert.equal(mathQualifiedForeignAttributeProcessor.get_qualified_attribute_name("viewBox"), "viewBox"); +mathQualifiedForeignAttributeProcessor.destroy(); + +const mathIntegrationEndTagProcessor = WP_HTML_Processor.create_fragment("<math><mi>x</mi><mn>1</mn></math>"); +assert.equal(mathIntegrationEndTagProcessor.next_tag("mn"), true); +assert.equal(mathIntegrationEndTagProcessor.get_namespace(), "math"); +assert.deepEqual(mathIntegrationEndTagProcessor.get_breadcrumbs(), ["HTML", "BODY", "MATH", "MN"]); +mathIntegrationEndTagProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<math><mi>x</mi><mn>1</mn></math>"), + "<math><mi>x</mi><mn>1</mn></math>", +); +assert.equal( + WP_HTML_Processor.normalize("<math><mo><image /></mo><mn>1</mn></math>"), + "<math><mo><img></mo><mn>1</mn></math>", +); + +const mathTextIntegrationFragmentProcessor = WP_HTML_Processor.create_fragment( + "<b></b><mglyph/><i></i><malignmark/><u></u><ms/>X", + "<math><ms>", +); +assert.equal(mathTextIntegrationFragmentProcessor.next_tag("mglyph"), true); +assert.equal(mathTextIntegrationFragmentProcessor.get_namespace(), "math"); +assert.deepEqual(mathTextIntegrationFragmentProcessor.get_breadcrumbs(), ["HTML", "MS", "MGLYPH"]); +assert.equal(mathTextIntegrationFragmentProcessor.next_tag("malignmark"), true); +assert.equal(mathTextIntegrationFragmentProcessor.get_namespace(), "math"); +assert.deepEqual(mathTextIntegrationFragmentProcessor.get_breadcrumbs(), ["HTML", "MS", "MALIGNMARK"]); +assert.equal(mathTextIntegrationFragmentProcessor.next_tag("ms"), true); +assert.equal(mathTextIntegrationFragmentProcessor.get_namespace(), "html"); +assert.deepEqual(mathTextIntegrationFragmentProcessor.get_breadcrumbs(), ["HTML", "MS", "MS"]); +mathTextIntegrationFragmentProcessor.destroy(); + +const foreignFragmentBreakoutProcessor = WP_HTML_Processor.create_fragment("<nobr>X", "<svg><path>"); +assert.equal(foreignFragmentBreakoutProcessor.next_tag("nobr"), true); +assert.equal(foreignFragmentBreakoutProcessor.get_namespace(), "html"); +assert.deepEqual(foreignFragmentBreakoutProcessor.get_breadcrumbs(), ["HTML", "SVG", "PATH", "NOBR"]); +assert.equal(foreignFragmentBreakoutProcessor.next_token(), true); +assert.equal(foreignFragmentBreakoutProcessor.get_token_type(), "#text"); +assert.deepEqual(foreignFragmentBreakoutProcessor.get_breadcrumbs(), ["HTML", "SVG", "PATH", "NOBR", "#text"]); +foreignFragmentBreakoutProcessor.destroy(); + +for (const [html, firstHtmlTag] of [ + ["</p><foo>", "P"], + ["</br><foo>", "BR"], + ["<body><foo>", null], + ["<p></p><foo>", "P"], +]) { + const svgFragmentNamespaceProcessor = WP_HTML_Processor.create_fragment(html, "<svg>"); + if (firstHtmlTag !== null) { + assert.equal(svgFragmentNamespaceProcessor.next_tag(firstHtmlTag), true, html); + assert.equal(svgFragmentNamespaceProcessor.get_namespace(), "html", html); + } + assert.equal(svgFragmentNamespaceProcessor.next_tag("foo"), true, html); + assert.equal(svgFragmentNamespaceProcessor.get_namespace(), "svg", html); + assert.deepEqual(svgFragmentNamespaceProcessor.get_breadcrumbs(), ["HTML", "SVG", "FOO"], html); + svgFragmentNamespaceProcessor.destroy(); +} + +const mathAnnotationXmlFragmentProcessor = WP_HTML_Processor.create_fragment("<figure></figure>", "<math><annotation-xml>"); +assert.equal(mathAnnotationXmlFragmentProcessor.next_tag("figure"), true); +assert.equal(mathAnnotationXmlFragmentProcessor.get_namespace(), "math"); +assert.deepEqual(mathAnnotationXmlFragmentProcessor.get_breadcrumbs(), ["HTML", "ANNOTATION-XML", "FIGURE"]); +mathAnnotationXmlFragmentProcessor.destroy(); + +const mathAnnotationXmlBreakoutProcessor = WP_HTML_Processor.create_fragment("<div></div>", "<math><annotation-xml>"); +assert.equal(mathAnnotationXmlBreakoutProcessor.next_tag("div"), true); +assert.equal(mathAnnotationXmlBreakoutProcessor.get_namespace(), "html"); +assert.deepEqual(mathAnnotationXmlBreakoutProcessor.get_breadcrumbs(), ["HTML", "MATH", "ANNOTATION-XML", "DIV"]); +mathAnnotationXmlBreakoutProcessor.destroy(); + +const mathAnnotationXmlHtmlIntegrationProcessor = WP_HTML_Processor.create_fragment( + "<div></div>", + '<math><annotation-xml encoding="text/html">', +); +assert.equal(mathAnnotationXmlHtmlIntegrationProcessor.next_tag("div"), true); +assert.equal(mathAnnotationXmlHtmlIntegrationProcessor.get_namespace(), "html"); +assert.deepEqual(mathAnnotationXmlHtmlIntegrationProcessor.get_breadcrumbs(), ["HTML", "ANNOTATION-XML", "DIV"]); +mathAnnotationXmlHtmlIntegrationProcessor.destroy(); + +const svgTableNameProcessor = WP_HTML_Processor.create_fragment("<svg><tr><td>cell"); +assert.equal(svgTableNameProcessor.next_tag("tr"), true); +assert.equal(svgTableNameProcessor.get_namespace(), "svg"); +assert.deepEqual(svgTableNameProcessor.get_breadcrumbs(), ["HTML", "BODY", "SVG", "TR"]); +assert.equal(svgTableNameProcessor.next_tag("td"), true); +assert.equal(svgTableNameProcessor.get_namespace(), "svg"); +assert.deepEqual(svgTableNameProcessor.get_breadcrumbs(), ["HTML", "BODY", "SVG", "TR", "TD"]); +svgTableNameProcessor.destroy(); +assert.equal(WP_HTML_Processor.normalize("<svg><tr><td>cell"), "<svg><tr><td>cell</td></tr></svg>"); + +const svgTableNameInCellProcessor = WP_HTML_Processor.create_fragment("<table><tr><td><svg><tr><circle>"); +assert.equal(svgTableNameInCellProcessor.next_tag("svg"), true); +assert.equal(svgTableNameInCellProcessor.get_namespace(), "svg"); +assert.deepEqual(svgTableNameInCellProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD", "SVG"]); +assert.equal(svgTableNameInCellProcessor.next_tag("tr"), true); +assert.equal(svgTableNameInCellProcessor.get_namespace(), "svg"); +assert.deepEqual(svgTableNameInCellProcessor.get_breadcrumbs(), ["HTML", "BODY", "TABLE", "TBODY", "TR", "TD", "SVG", "TR"]); +svgTableNameInCellProcessor.destroy(); +assert.equal( + WP_HTML_Processor.normalize("<table><tr><td><svg><tr><circle>"), + "<table><tbody><tr><td><svg><tr><circle></circle></tr></svg></td></tr></tbody></table>", +); +assert.equal(WP_HTML_Processor.normalize("<svg><!DOCTYPE html></svg>"), "<svg></svg>"); + +const tableRowMathFragmentProcessor = WP_HTML_Processor.create_fragment("<math><tr><td><mo><tr>", "<tr>"); +assert.equal(tableRowMathFragmentProcessor.next_tag("math"), true); +assert.equal(tableRowMathFragmentProcessor.get_namespace(), "math"); +assert.deepEqual(tableRowMathFragmentProcessor.get_breadcrumbs(), ["HTML", "TR", "MATH"]); +assert.equal(tableRowMathFragmentProcessor.next_tag("tr"), true); +assert.equal(tableRowMathFragmentProcessor.get_namespace(), "math"); +assert.deepEqual(tableRowMathFragmentProcessor.get_breadcrumbs(), ["HTML", "TR", "MATH", "TR"]); +tableRowMathFragmentProcessor.destroy(); + +const tableSectionSvgFragmentProcessor = WP_HTML_Processor.create_fragment("<svg><thead><title><tbody>", "<thead>"); +assert.equal(tableSectionSvgFragmentProcessor.next_tag("svg"), true); +assert.equal(tableSectionSvgFragmentProcessor.get_namespace(), "svg"); +assert.deepEqual(tableSectionSvgFragmentProcessor.get_breadcrumbs(), ["HTML", "THEAD", "SVG"]); +assert.equal(tableSectionSvgFragmentProcessor.next_tag("thead"), true); +assert.equal(tableSectionSvgFragmentProcessor.get_namespace(), "svg"); +assert.deepEqual(tableSectionSvgFragmentProcessor.get_breadcrumbs(), ["HTML", "THEAD", "SVG", "THEAD"]); +tableSectionSvgFragmentProcessor.destroy(); + +const foreignModifiableTextProcessor = WP_HTML_Processor.create_fragment("<svg><title>One</title></svg>"); +assert.equal(foreignModifiableTextProcessor.next_tag("title"), true); +assert.equal(foreignModifiableTextProcessor.get_namespace(), "svg"); +assert.equal(foreignModifiableTextProcessor.get_modifiable_text(), ""); +assert.equal(foreignModifiableTextProcessor.set_modifiable_text("Two"), false); +assert.equal(foreignModifiableTextProcessor.get_updated_html(), "<svg><title>One</title></svg>"); +foreignModifiableTextProcessor.destroy(); + +for (const [setText, expectedText, expectedHtml] of [ + ["\nAFTER NEWLINE", "\nAFTER NEWLINE", "<textarea>\n\nAFTER NEWLINE</textarea>"], + ["\rCR", "\nCR", "<textarea>\n\nCR</textarea>"], + ["\r\nCR-N", "\nCR-N", "<textarea>\n\nCR-N</textarea>"], +]) { + const textareaModifiableTextProcessor = WP_HTML_Processor.create_fragment("<textarea></textarea>"); + assert.equal(textareaModifiableTextProcessor.next_token(), true); + assert.equal(textareaModifiableTextProcessor.set_modifiable_text(setText), true); + assert.equal(textareaModifiableTextProcessor.get_modifiable_text(), expectedText); + assert.equal(textareaModifiableTextProcessor.get_updated_html(), expectedHtml); + textareaModifiableTextProcessor.destroy(); +} + +for (const [html, targetTag] of [ + ["<div>", "DIV"], + ["<svg><path></path></svg>", "PATH"], + ["<svg><path /></svg>", "PATH"], + ["<math><mtext></mtext></math>", "MTEXT"], + ["<math><mspace /></math>", "MSPACE"], + ["<svg><textarea></textarea></svg>", "TEXTAREA"], + ["<svg><title></title></svg>", "TITLE"], + ["<svg><style></style></svg>", "STYLE"], + ["<svg><script></script></svg>", "SCRIPT"], + ["<math><textarea></textarea></math>", "TEXTAREA"], + ["<math><title></title></math>", "TITLE"], + ["<math><style></style></math>", "STYLE"], + ["<math><script></script></math>", "SCRIPT"], +]) { + const nonAtomicModifiableTextProcessor = WP_HTML_Processor.create_fragment(html); + assert.equal(nonAtomicModifiableTextProcessor.next_tag(targetTag), true); + assert.equal(nonAtomicModifiableTextProcessor.set_modifiable_text("test"), false); + assert.equal(nonAtomicModifiableTextProcessor.get_updated_html(), html); + nonAtomicModifiableTextProcessor.destroy(); +} + +const templateNamespaceProcessor = WP_HTML_Processor.create_fragment("<template><svg><template><foreignObject><div></template><div target>"); +assert.equal(templateNamespaceProcessor.next_tag("div"), true); +assert.deepEqual( + templateNamespaceProcessor.get_breadcrumbs(), + ["HTML", "BODY", "TEMPLATE", "SVG", "TEMPLATE", "FOREIGNOBJECT", "DIV"], +); +assert.equal(templateNamespaceProcessor.next_tag("div"), true); +assert.deepEqual(templateNamespaceProcessor.get_breadcrumbs(), ["HTML", "BODY", "DIV"]); +assert.equal(templateNamespaceProcessor.get_attribute("target"), true); +templateNamespaceProcessor.destroy(); + +function assertNormalizesToSupportedHtml(name, html) { + const normalized = WP_HTML_Processor.normalize(html); + assert.equal(typeof normalized, "string", name); + assert.equal(typeof WP_HTML_Processor.normalize(normalized), "string", name); +} + +function assertNormalizesIdempotently(name, html) { + const normalized = WP_HTML_Processor.normalize(html); + assert.equal(typeof normalized, "string", name); + assert.equal(WP_HTML_Processor.normalize(normalized), normalized, name); +} + +for (const [name, html] of [ + ["Unclosed SVG TITLE after P in EM", "<em><p><svg><title>"], + ["Unclosed SVG TITLE after P in STRONG", "<strong><p><svg ><title>"], +]) { + assertNormalizesToSupportedHtml(name, html); +} + +for (const [name, html] of [ + ["Malformed quoted attribute boundary", '<A "/=>'], + ["Duplicate attribute after bare attribute", '<A V=5 R V=""=>'], + ["Duplicate DATA-ID after numeric attribute", '<E DATA-ID=1 1 DATA-ID=""=>'], + ["Duplicate attribute before tag end", "<R V=5 R V=5 =>"], + ["NULL byte in foreign tag name", "<SVG><L\0 D>"], + ["Malformed closing-looking attribute", "<a </=>"], + ["Malformed self-closing attribute", "<a h/=>"], + ["Duplicate ID with quote boundary", '<d ID=""" ID=""=>'], + ["Mixed-case duplicate TITLE", '<d TITLE=""\' title=""=>'], + ["Colon before self-closing slash", "<e :/=>"], + ["Duplicate class after bare attribute", "<e class=y d class=''=>"], + ["Duplicate DATA-ID after hyphen", '<e data-id=1 - data-id="">'], + ["Duplicate title after quotes", '<e title=\'\'\' title=""=>'], + ["FORM with SVG TITLE text edge", '<form ><svg ><title "\'></form><form>'], + ["FORM with TABLE and SCRIPT", '<form id><table te"><script></script><td srce" ID/></form><form claslicate>'], + ["FORM with TABLE CAPTION", "<form><table><caption></form><form >"], + ["Short malformed G attribute C", "<g c/=>"], + ["Short malformed G attribute S", "<g s/=>"], + ["Duplicate SRC boundary", '<g src=""g src="">'], + ["Short malformed H attribute", "<h f/=>"], + ["Malformed SRC equals boundary", '<i src=""= src=""=>'], + ["Malformed slash in tag opener", "<i/t/=>"], + ["Malformed L colon attribute", "<l :/=>"], + ["Malformed L less-than attribute", "<l/</=>"], + ["Malformed N less-than attribute", "<n </=>"], + ["Unclosed SVG TITLE after P", "<p><svg><title>"], + ["Duplicate ALT boundary", '<r alt=\'\'d alt=""=>'], + ["NULL byte in SVG child tag", "<svg><l\0 '>"], + ["NULL byte before slash in SVG child tag", "<svg><l\0/r>"], +]) { + assertNormalizesIdempotently(name, html); +} + +assert.equal( + WP_HTML_Processor.normalize('<a href=#anchor enabled>Tom & Jerry</a>'), + '<a href="#anchor" enabled>Tom &amp; Jerry</a>', +); +assert.equal(WP_HTML_Processor.normalize("apples > or\0anges"), "apples &gt; oranges"); +assert.equal(WP_HTML_Processor.normalize("<>"), "&lt;&gt;"); +assert.equal(WP_HTML_Processor.normalize("</>"), ""); +assert.equal( + WP_HTML_Processor.normalize('<![CDATA[invalid comment]]> syntax < <> "oddities" \'apostrophe\''), + "<!--[CDATA[invalid comment]]--> syntax &lt; &lt;&gt; &quot;oddities&quot; &apos;apostrophe&apos;", +); +assert.equal(WP_HTML_Processor.normalize("<input disabled>"), "<input disabled>"); +assert.equal(WP_HTML_Processor.normalize("<p id=3></p>"), '<p id="3"></p>'); +assert.equal(WP_HTML_Processor.normalize('<br class="clear"/>'), '<br class="clear">'); +assert.equal(WP_HTML_Processor.normalize('<div one=1 one="one" one=\'won\' one>'), '<div one="1"></div>'); +assert.equal(WP_HTML_Processor.normalize("<script>apples > or\0anges</script>"), "<script>apples > or\uFFFDanges</script>"); +assert.equal(WP_HTML_Processor.normalize("<style>apples > or\0anges</style>"), "<style>apples > or\uFFFDanges</style>"); +assert.equal(WP_HTML_Processor.normalize("one</div>two</span>three"), "onetwothree"); +assert.equal(WP_HTML_Processor.normalize("<div><p>One"), "<div><p>One</p></div>"); +for (const [input, expected] of [ + ["<pre>\nline 1\nline 2</pre>", "<pre>line 1\nline 2</pre>"], + ["<pre>\n\nline 2\nline 3</pre>", "<pre>\n\nline 2\nline 3</pre>"], + ["<pre>\nline 1<!--comment--> still line 1</pre>", "<pre>line 1<!--comment--> still line 1</pre>"], + ["<pre>\n\nline 2<!--comment--> still line 2</pre>", "<pre>\n\nline 2<!--comment--> still line 2</pre>"], + ["<listing>\nline 1\nline 2</listing>", "<listing>line 1\nline 2</listing>"], + ["<listing>\n\nline 2\nline 3</listing>", "<listing>\n\nline 2\nline 3</listing>"], + ["<listing>\nline 1<!--comment--> still line 1</listing>", "<listing>line 1<!--comment--> still line 1</listing>"], + ["<listing>\n\nline 2<!--comment--> still line 2</listing>", "<listing>\n\nline 2<!--comment--> still line 2</listing>"], + ["<textarea>\nline 1\nline 2</textarea>", "<textarea>line 1\nline 2</textarea>"], + ["<textarea>\n\nline 2\nline 3</textarea>", "<textarea>\n\nline 2\nline 3</textarea>"], +]) { + assert.equal(WP_HTML_Processor.normalize(input), expected); + assert.equal(WP_HTML_Processor.normalize(expected), expected); +} +assert.equal(WP_HTML_Processor.normalize("<table><td>cell"), "<table><tbody><tr><td>cell</td></tr></tbody></table>"); +assert.equal(WP_HTML_Processor.normalize("<table><tr><td>cell"), "<table><tbody><tr><td>cell</td></tr></tbody></table>"); +assert.equal(WP_HTML_Processor.normalize("<table><td>a<td>b"), "<table><tbody><tr><td>a</td><td>b</td></tr></tbody></table>"); +assert.equal(WP_HTML_Processor.normalize("<table><tr><td>a<tr><td>b"), "<table><tbody><tr><td>a</td></tr><tr><td>b</td></tr></tbody></table>"); +assert.equal(WP_HTML_Processor.normalize("<table><tbody><tr><td>a<tbody><tr><td>b"), "<table><tbody><tr><td>a</td></tr></tbody><tbody><tr><td>b</td></tr></tbody></table>"); +assert.equal(WP_HTML_Processor.normalize("<table><tbody><tr><td>a</table><p>b"), "<table><tbody><tr><td>a</td></tr></tbody></table><p>b</p>"); +assert.equal(WP_HTML_Processor.normalize("<div></p>fun<table><td>cell</div>"), "<div><p></p>fun<table><tbody><tr><td>cell</td></tr></tbody></table></div>"); +assert.equal(WP_HTML_Processor.normalize("<div><span></div>"), "<div><span></span></div>"); +assert.equal(WP_HTML_Processor.normalize("<svg><g><g /></svg>"), "<svg><g><g /></g></svg>"); + +for (const [htmlWithNulls, expected] of [ + ["<img\0id=5>", "<img\uFFFDid=5></img\uFFFDid=5>"], + ["<img/\0id=5>", '<img \uFFFDid="5">'], + ["<img id='5\0'>", '<img id="5\uFFFD">'], + ["one\0two", "onetwo"], + ["<svg>one\0two</svg>", "<svg>one\uFFFDtwo</svg>"], + ["<script>alert(\0)</script>", "<script>alert(\uFFFD)</script>"], + ["<style>\0 {}</style>", "<style>\uFFFD {}</style>"], + ["<!-- \0 -->", "<!-- \uFFFD -->"], +]) { + assert.equal(WP_HTML_Processor.normalize(htmlWithNulls), expected); +} + +for (const [doctypeInput, doctypeOutput] of [ + ["", ""], + ["<!DOCTYPE>", "<!DOCTYPE>"], + ["<!DOCTYPE html>", "<!DOCTYPE html>"], + ["<!DOCTYPE WordPress>", "<!DOCTYPE wordpress>"], + ['<!DOCTYPE html PUBLIC "x">', '<!DOCTYPE html PUBLIC "x">'], + ['<!DOCTYPE html SYSTEM "y">', '<!DOCTYPE html SYSTEM "y">'], + ['<!DOCTYPE html PUBLIC "x" "y">', '<!DOCTYPE html PUBLIC "x" "y">'], + ['<!docType HtmL pubLIc\'xxx\'"yyy" all this is ignored>', '<!DOCTYPE html PUBLIC "xxx" "yyy">'], + ['<!DOCTYPE html PUBLIC "\'quoted\'">', '<!DOCTYPE html PUBLIC "\'quoted\'">'], + ['<!DOCTYPE html PUBLIC \'"quoted"\'>', '<!DOCTYPE html PUBLIC \'"quoted"\'>'], + ['<!DOCTYPE html SYSTEM "\'quoted\'">', '<!DOCTYPE html SYSTEM "\'quoted\'">'], + ['<!DOCTYPE html SYSTEM \'"quoted"\'>', '<!DOCTYPE html SYSTEM \'"quoted"\'>'], +]) { + const fullParserSerializeDoctype = WP_HTML_Processor.create_full_parser(`${doctypeInput}👌`); + assert.equal( + fullParserSerializeDoctype.serialize(), + `${doctypeOutput}<html><head></head><body>👌</body></html>`, + ); + fullParserSerializeDoctype.destroy(); +} + +const fullParserFosteredTextBeforeTable = WP_HTML_Processor.create_full_parser( + "<table class=x data-id=1>a<!doctype html>", +); +assert.equal( + fullParserFosteredTextBeforeTable.serialize(), + '<html><head></head><body>a<table class="x" data-id="1"></table></body></html>', +); +fullParserFosteredTextBeforeTable.destroy(); + +const fullParserFosteredTextBeforeTableHiddenInput = WP_HTML_Processor.create_full_parser( + "<!doctype html><table>X<input type=hidDEN></table>", +); +assert.equal( + fullParserFosteredTextBeforeTableHiddenInput.serialize(), + '<!DOCTYPE html><html><head></head><body>X<table><input type="hidDEN"></table></body></html>', +); +fullParserFosteredTextBeforeTableHiddenInput.destroy(); + +const fullParserFosteredInputBeforeTable = WP_HTML_Processor.create_full_parser( + "<table><input>", +); +assert.equal( + fullParserFosteredInputBeforeTable.serialize(), + "<html><head></head><body><input><table></table></body></html>", +); +fullParserFosteredInputBeforeTable.destroy(); + +const fullParserFosteredInputBeforeHiddenTableInput = WP_HTML_Processor.create_full_parser( + '<!doctype html><table><input type=" hidden"><input type=hidDEN></table>', +); +assert.equal( + fullParserFosteredInputBeforeHiddenTableInput.serialize(), + '<!DOCTYPE html><html><head></head><body><input type=" hidden"><table><input type="hidDEN"></table></body></html>', +); +fullParserFosteredInputBeforeHiddenTableInput.destroy(); + +const fullParserFosteredTextBeforeTableMeta = WP_HTML_Processor.create_full_parser( + "<!doctype html><table> X<meta></table>", +); +assert.equal( + fullParserFosteredTextBeforeTableMeta.serialize(), + "<!DOCTYPE html><html><head></head><body> X<meta><table></table></body></html>", +); +fullParserFosteredTextBeforeTableMeta.destroy(); + +const fullParserFosteredSvgBeforeTable = WP_HTML_Processor.create_full_parser( + "<!doctype html><table><svg><g>foo</g></svg></table>", +); +assert.equal( + fullParserFosteredSvgBeforeTable.serialize(), + "<!DOCTYPE html><html><head></head><body><svg><g>foo</g></svg><table></table></body></html>", +); +fullParserFosteredSvgBeforeTable.destroy(); + +const fullParserFosteredSelectBeforeTable = WP_HTML_Processor.create_full_parser( + "<table><select><option>3</select></table>", +); +assert.equal( + fullParserFosteredSelectBeforeTable.serialize(), + "<html><head></head><body><select><option>3</option></select><table></table></body></html>", +); +fullParserFosteredSelectBeforeTable.destroy(); + +const fullParserFosteredSvgBeforeTableSection = WP_HTML_Processor.create_full_parser( + "<!doctype html><table><tbody><tr><svg><g>foo</g></svg></tr></tbody></table>", +); +assert.equal( + fullParserFosteredSvgBeforeTableSection.serialize(), + "<!DOCTYPE html><html><head></head><body><svg><g>foo</g></svg><table><tbody><tr></tr></tbody></table></body></html>", +); +fullParserFosteredSvgBeforeTableSection.destroy(); + +const fullParserFosteredTextBeforeTableComment = WP_HTML_Processor.create_full_parser( + "<!doctype html><table>abc<!--foo-->", +); +assert.equal( + fullParserFosteredTextBeforeTableComment.serialize(), + "<!DOCTYPE html><html><head></head><body>abc<table><!--foo--></table></body></html>", +); +fullParserFosteredTextBeforeTableComment.destroy(); + +const fullParserFosteredMetaBeforeTable = WP_HTML_Processor.create_full_parser( + "<!doctype html><table><meta></table>", +); +assert.equal( + fullParserFosteredMetaBeforeTable.serialize(), + "<!DOCTYPE html><html><head></head><body><meta><table></table></body></html>", +); +fullParserFosteredMetaBeforeTable.destroy(); + +const fullParserFosteredTitleBeforeTable = WP_HTML_Processor.create_full_parser( + "<!doctype html><table><title>X</title></table>", +); +assert.equal( + fullParserFosteredTitleBeforeTable.serialize(), + "<!DOCTYPE html><html><head></head><body><title>X</title><table></table></body></html>", +); +fullParserFosteredTitleBeforeTable.destroy(); + +const fullParserFosteredTextBeforeTableRow = WP_HTML_Processor.create_full_parser( + "<!doctype html><table><tr> x</table>", +); +assert.equal( + fullParserFosteredTextBeforeTableRow.serialize(), + "<!DOCTYPE html><html><head></head><body> x<table><tbody><tr></tr></tbody></table></body></html>", +); +fullParserFosteredTextBeforeTableRow.destroy(); + +const fullParserFosteredAnchorTextAfterTableCell = WP_HTML_Processor.create_full_parser( + '<a href="blah">aba<table><tr><td><a href="foo">br</td></tr>x</table>aoe', +); +assert.equal( + fullParserFosteredAnchorTextAfterTableCell.serialize(), + '<html><head></head><body><a href="blah">abax<table><tbody><tr><td><a href="foo">br</tbody></table>aoe</a></body></html>', +); +fullParserFosteredAnchorTextAfterTableCell.destroy(); + +const fullParserReconstructedAnchorTextAfterTableCell = WP_HTML_Processor.create_full_parser( + '<table><a href="blah">aba<tr><td><a href="foo">br</td></tr>x</table>aoe', +); +assert.equal( + fullParserReconstructedAnchorTextAfterTableCell.serialize(), + '<html><head></head><body><a href="blah">aba</a><a href="blah">x</a><table><tbody><tr><td><a href="foo">br</tbody></table><a href="blah">aoe</a></body></html>', +); +fullParserReconstructedAnchorTextAfterTableCell.destroy(); + +const fullParserNestedFosteredAnchorBeforeTableRow = WP_HTML_Processor.create_full_parser( + '<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe', +); +assert.equal( + fullParserNestedFosteredAnchorBeforeTableRow.serialize(), + '<html><head></head><body><a href="blah">aba<a href="foo">br</a><a href="foo">x</a><table><tbody><tr><td></tbody></table></a><a href="foo">aoe</a></body></html>', +); +fullParserNestedFosteredAnchorBeforeTableRow.destroy(); + +const fullParserNestedFosteredAnchorBeforeTableEnd = WP_HTML_Processor.create_full_parser( + "<a><table><a></table><p><a><div><a>", +); +const fullParserNestedFosteredAnchorBeforeTableEndStarts = []; +while (fullParserNestedFosteredAnchorBeforeTableEnd.next_token()) { + if ( + fullParserNestedFosteredAnchorBeforeTableEnd.get_token_type() === "#tag" && + !fullParserNestedFosteredAnchorBeforeTableEnd.is_tag_closer() && + ["A", "DIV", "P", "TABLE"].includes(fullParserNestedFosteredAnchorBeforeTableEnd.get_tag()) + ) { + fullParserNestedFosteredAnchorBeforeTableEndStarts.push([ + fullParserNestedFosteredAnchorBeforeTableEnd.get_tag(), + fullParserNestedFosteredAnchorBeforeTableEnd.get_breadcrumbs(), + ]); + } +} +assert.equal(fullParserNestedFosteredAnchorBeforeTableEnd.get_last_error(), null); +assert.deepEqual(fullParserNestedFosteredAnchorBeforeTableEndStarts, [ + ["A", ["HTML", "BODY", "A"]], + ["A", ["HTML", "BODY", "A", "A"]], + ["TABLE", ["HTML", "BODY", "A", "TABLE"]], + ["P", ["HTML", "BODY", "P"]], + ["A", ["HTML", "BODY", "P", "A"]], + ["DIV", ["HTML", "BODY", "DIV"]], + ["A", ["HTML", "BODY", "DIV", "A"]], +]); +fullParserNestedFosteredAnchorBeforeTableEnd.destroy(); + +const fullParserFosteredDivBeforeTableRow = WP_HTML_Processor.create_full_parser( + "<table><tr><div>", +); +assert.equal( + fullParserFosteredDivBeforeTableRow.serialize(), + "<html><head></head><body><div></div><table><tbody><tr></tr></tbody></table></body></html>", +); +fullParserFosteredDivBeforeTableRow.destroy(); + +const fullParserFosteredDivBeforeTableCell = WP_HTML_Processor.create_full_parser( + "<table><tr><div><td>", +); +assert.equal( + fullParserFosteredDivBeforeTableCell.serialize(), + "<html><head></head><body><div></div><table><tbody><tr><td></td></tr></tbody></table></body></html>", +); +fullParserFosteredDivBeforeTableCell.destroy(); + +const fullParserWhitespaceBeforeFosteredCenterTableCell = WP_HTML_Processor.create_full_parser( + "<table>\n<tr><center><td>", +); +assert.equal( + fullParserWhitespaceBeforeFosteredCenterTableCell.serialize(), + "<html><head></head><body><center></center><table>\n<tbody><tr><td></td></tr></tbody></table></body></html>", +); +fullParserWhitespaceBeforeFosteredCenterTableCell.destroy(); + +const fullParserFosteredListItemsBeforeTable = WP_HTML_Processor.create_full_parser( + "<table><li><li></table>", +); +assert.equal( + fullParserFosteredListItemsBeforeTable.serialize(), + "<html><head></head><body><li></li><li></li><table></table></body></html>", +); +fullParserFosteredListItemsBeforeTable.destroy(); + +const fullParserFosteredParagraphsBeforeTable = WP_HTML_Processor.create_full_parser( + "<table><tr><p><a><p>You should see this text.", +); +assert.equal( + fullParserFosteredParagraphsBeforeTable.serialize(), + "<html><head></head><body><p><a></a></p><p><a>You should see this text.</a></p><table><tbody><tr></tr></tbody></table></body></html>", +); +fullParserFosteredParagraphsBeforeTable.destroy(); + +const fullParserFosteredItalicDivBeforeTable = WP_HTML_Processor.create_full_parser( + "<!doctype html><table><i>a<b>b<div>c</i>", +); +assert.equal( + fullParserFosteredItalicDivBeforeTable.serialize(), + "<!DOCTYPE html><html><head></head><body><i>a<b>b</b></i><b><div><i>c</i></div></b><table></table></body></html>", +); +fullParserFosteredItalicDivBeforeTable.destroy(); + +const fullParserFosteredForeignSelectTable = WP_HTML_Processor.create_full_parser( + "<div><table><svg><foreignObject><select><table><s>", +); +assert.equal( + fullParserFosteredForeignSelectTable.serialize(), + "<html><head></head><body><div><svg><foreignObject><select></select></foreignObject></svg><table></table><s></s><table></table></div></body></html>", +); +fullParserFosteredForeignSelectTable.destroy(); + +const fullParserFosteredTextBeforeTableColgroup = WP_HTML_Processor.create_full_parser( + "<table><colgroup>foo", +); +assert.equal( + fullParserFosteredTextBeforeTableColgroup.serialize(), + "<html><head></head><body>foo<table><colgroup></colgroup></table></body></html>", +); +fullParserFosteredTextBeforeTableColgroup.destroy(); + +const fullParserSplitFosteredTextBeforeTableColgroup = WP_HTML_Processor.create_full_parser( + "<table><colgroup> foo</colgroup></table>", +); +assert.equal( + fullParserSplitFosteredTextBeforeTableColgroup.serialize(), + "<html><head></head><body>foo<table><colgroup> </colgroup></table></body></html>", +); +fullParserSplitFosteredTextBeforeTableColgroup.destroy(); + +const fullParserFosteredTextAfterIgnoredHtmlEndInTableColgroup = WP_HTML_Processor.create_full_parser( + "<table><colgroup></html>foo", +); +assert.equal( + fullParserFosteredTextAfterIgnoredHtmlEndInTableColgroup.serialize(), + "<html><head></head><body>foo<table><colgroup></colgroup></table></body></html>", +); +fullParserFosteredTextAfterIgnoredHtmlEndInTableColgroup.destroy(); + +for (const incompleteToken of [ + "<!--", + "<!--x", + "<!--x--", + "<!--x--!", + "<!--x--! >", + "<![sneaky[", + "</3 is not a tag", + "<!DOCTYPE html", + "<!DOCTY", + "<![CDATA[something inside of here needs to get out", + "<![CDA", + "<![CDATA[cannot escape]", + "<my-custom status=\"pending\"", + "<script>", + "<script><div>", + "<script><!--<script></script>", + "<style><div>", + "<textarea><div>", + "<title><div>", + "<xmp><div>", +]) { + assert.equal(WP_HTML_Processor.normalize(`content${incompleteToken}`), "content"); +} + +const serializationProcessor = WP_HTML_Processor.create_fragment("<textarea>One & Two</textarea>"); +assert.equal(serializationProcessor.next_token(), true); +assert.equal(serializationProcessor.serialize(), null); +assert.equal(serializationProcessor.serialize_token(), "<textarea>One &amp; Two</textarea>"); +serializationProcessor.destroy(); + +console.log("WASM smoke tests passed."); diff --git a/ext/html-api-rust/wasm/wp-html-api-rust.d.ts b/ext/html-api-rust/wasm/wp-html-api-rust.d.ts new file mode 100644 index 0000000000000..13b64f067ce60 --- /dev/null +++ b/ext/html-api-rust/wasm/wp-html-api-rust.d.ts @@ -0,0 +1,612 @@ +export interface WasmInstantiatedSource { + module?: WebAssembly.Module; + instance: WebAssembly.Instance; +} + +export interface WpHtmlApiRustWasmExports extends WebAssembly.Exports { + readonly __data_end: WebAssembly.Global; + readonly __heap_base: WebAssembly.Global; + readonly memory: WebAssembly.Memory; + wp_html_api_rust_alloc(len: number): number; + wp_html_api_rust_dealloc(ptr: number, len: number): void; + wp_html_api_rust_core_version(): number; + wp_html_api_rust_decoder_decode( + context: number, + text: number, + textLen: number, + outPtr: number, + outCapacity: number, + outLen: number, + ): boolean; + wp_html_api_rust_decoder_read_character_reference( + context: number, + text: number, + textLen: number, + at: number, + outPtr: number, + outCapacity: number, + outLen: number, + matchLen: number, + ): boolean; + wp_html_api_rust_decoder_attribute_starts_with( + haystack: number, + haystackLen: number, + searchText: number, + searchTextLen: number, + asciiCaseInsensitive: boolean, + ): boolean; + wp_html_api_rust_decoder_code_point_to_utf8_bytes( + codePoint: number, + outPtr: number, + outCapacity: number, + outLen: number, + ): boolean; + wp_html_api_rust_scan_next_tag(html: number, len: number, offset: number, out: number): boolean; + wp_html_api_rust_tag_processor_new(html: number, len: number): number; + wp_html_api_rust_tag_processor_free(processor: number): void; + wp_html_api_rust_tag_processor_next_tag( + processor: number, + query: number, + queryLen: number, + visitClosers: boolean, + ): boolean; + wp_html_api_rust_tag_processor_next_token(processor: number): boolean; + wp_html_api_rust_tag_processor_seek(processor: number, offset: number): void; + wp_html_api_rust_tag_processor_set_namespace(processor: number, namespace: number): void; + wp_html_api_rust_tag_processor_apply_lexical_update( + processor: number, + start: number, + length: number, + replacement: number, + replacementLen: number, + ): boolean; + wp_html_api_rust_tag_processor_current_span(processor: number, start: number, length: number): boolean; + wp_html_api_rust_tag_processor_current_token_type(processor: number): number; + wp_html_api_rust_tag_processor_paused_at_incomplete(processor: number): boolean; + wp_html_api_rust_tag_processor_subdivide_text_appropriately(processor: number): number; + wp_html_api_rust_tag_processor_get_modifiable_text(processor: number, out: number): boolean; + wp_html_api_rust_tag_processor_set_modifiable_text( + processor: number, + text: number, + textLen: number, + ): boolean; + wp_html_api_rust_tag_processor_current_comment_type(processor: number): number; + wp_html_api_rust_tag_processor_script_content_type(processor: number): number; + wp_html_api_rust_tag_processor_get_tag(processor: number, out: number): boolean; + wp_html_api_rust_tag_processor_is_tag_closer(processor: number): boolean; + wp_html_api_rust_tag_processor_has_self_closing_flag(processor: number): boolean; + wp_html_api_rust_tag_processor_get_attribute( + processor: number, + name: number, + nameLen: number, + out: number, + ): number; + wp_html_api_rust_tag_processor_get_attribute_names_with_prefix( + processor: number, + prefix: number, + prefixLen: number, + out: number, + ): number; + wp_html_api_rust_tag_processor_set_attribute( + processor: number, + name: number, + nameLen: number, + value: number, + valueLen: number, + valueKind: number, + ): boolean; + wp_html_api_rust_tag_processor_remove_attribute(processor: number, name: number, nameLen: number): boolean; + wp_html_api_rust_tag_processor_add_class( + processor: number, + className: number, + classNameLen: number, + quirksMode: boolean, + ): boolean; + wp_html_api_rust_tag_processor_remove_class( + processor: number, + className: number, + classNameLen: number, + quirksMode: boolean, + ): boolean; + wp_html_api_rust_tag_processor_has_class( + processor: number, + className: number, + classNameLen: number, + quirksMode: boolean, + ): number; + wp_html_api_rust_tag_processor_class_list(processor: number, out: number, quirksMode: boolean): number; + wp_html_api_rust_tag_processor_get_html(processor: number, out: number): boolean; +} + +export type WasmInputSource = + | URL + | Request + | Response + | string + | ArrayBuffer + | ArrayBufferView + | Blob + | WebAssembly.Module + | WebAssembly.Exports + | WebAssembly.Instance + | WasmInstantiatedSource; + +export type WasmInput = WasmInputSource | PromiseLike<WasmInputSource>; + +export type WasmExportsInput = + | WebAssembly.Exports + | WebAssembly.Instance + | WasmInstantiatedSource; + +export interface ScanNextTagResult { + tag_start: number; + tag_end: number; + name_start: number; + name_len: number; + name_length: number; + tag_name: string; + is_closing: boolean; + has_self_closing_flag: boolean; + token_end: number; + token_type: number; +} + +export interface NextTagBaseQuery { + class_name?: unknown; + tag_closers?: unknown; + visit_closers?: boolean; +} + +export interface TagNextTagQuery extends NextTagBaseQuery { + tag_name?: unknown; + match_offset?: unknown; +} + +export interface ProcessorNextTagQuery extends NextTagBaseQuery { + tag_name?: string | number | boolean | null; + match_offset?: unknown; + breadcrumbs?: PhpInternalStringParameter[]; +} + +export type NextTagQuery = ProcessorNextTagQuery; + +export type CommentType = + | "COMMENT_AS_ABRUPTLY_CLOSED_COMMENT" + | "COMMENT_AS_CDATA_LOOKALIKE" + | "COMMENT_AS_HTML_COMMENT" + | "COMMENT_AS_PI_NODE_LOOKALIKE" + | "COMMENT_AS_INVALID_HTML"; + +export type TokenType = + | "#tag" + | "#doctype" + | "#text" + | "#comment" + | "#cdata-section" + | "#presumptuous-tag" + | "#funky-comment"; + +export type ParserState = + | "STATE_READY" + | "STATE_COMPLETE" + | "STATE_INCOMPLETE_INPUT" + | "STATE_MATCHED_TAG" + | "STATE_TEXT_NODE" + | "STATE_CDATA_NODE" + | "STATE_COMMENT" + | "STATE_DOCTYPE" + | "STATE_PRESUMPTUOUS_TAG" + | "STATE_WP_FUNKY"; + +export type TextNodeClassification = + | "TEXT_IS_GENERIC" + | "TEXT_IS_NULL_SEQUENCE" + | "TEXT_IS_WHITESPACE"; + +export type HtmlNamespace = "html" | "math" | "svg"; + +export type EncodingConfidence = "tentative" | "certain" | "irrelevant"; + +export type DecoderContext = "attribute" | "data" | string; + +export type PhpIntegerParameter = number | string | boolean; + +export type PhpStringParameter = string | number | boolean; + +export type PhpInternalStringParameter = string | number | boolean | null; + +export type PhpInterpolatedStringParameter = string | number | boolean | null | unknown[]; + +export type PhpBooleanParameter = boolean | number | string; + +export type PhpArrayKeyParameter = string | number | boolean | null; + +export interface MatchByteLength { + value?: number; +} + +export interface WP_HTML_Decoder_Constructor { + attribute_starts_with( + haystack: PhpInternalStringParameter, + searchText: PhpInternalStringParameter, + caseSensitivity?: unknown, + ): boolean; + decode_text_node(text: PhpStringParameter): string; + decode_attribute(text: PhpStringParameter): string; + decode(context: unknown, text: PhpStringParameter): string; + read_character_reference( + context: unknown, + text: PhpInternalStringParameter, + at?: number | string | boolean | null, + matchByteLength?: MatchByteLength | null, + ): string | null; + code_point_to_utf8_bytes(codePoint: PhpIntegerParameter | null): string; +} + +export interface WP_HTML_Doctype_Info { + name: string | null; + public_identifier: string | null; + system_identifier: string | null; + indicated_compatibility_mode: "no-quirks" | "limited-quirks" | "quirks"; +} + +export interface WP_HTML_Doctype_Info_Constructor { + from_doctype_token(doctypeHtml: PhpStringParameter): WP_HTML_Doctype_Info | null; +} + +export const WP_HTML_Doctype_Info: WP_HTML_Doctype_Info_Constructor; + +export interface WP_HTML_Unsupported_Exception extends Error { + message: string; + token_name: string; + token_at: number; + token: string; + stack_of_open_elements: unknown[]; + active_formatting_elements: unknown[]; +} + +export interface WP_HTML_Unsupported_Exception_Constructor { + new ( + message: PhpStringParameter, + tokenName: PhpStringParameter, + tokenAt: PhpIntegerParameter, + token: PhpStringParameter, + stackOfOpenElements: unknown[], + activeFormattingElements: unknown[], + ): WP_HTML_Unsupported_Exception; +} + +export const WP_HTML_Unsupported_Exception: WP_HTML_Unsupported_Exception_Constructor; + +export interface WP_HTML_Span { + start: number; + length: number; +} + +export interface WP_HTML_Span_Constructor { + new (start: PhpIntegerParameter, length: PhpIntegerParameter): WP_HTML_Span; +} + +export const WP_HTML_Span: WP_HTML_Span_Constructor; + +export interface WP_HTML_Text_Replacement { + start: number; + length: number; + text: string; +} + +export interface WP_HTML_Text_Replacement_Constructor { + new (start: PhpIntegerParameter, length: PhpIntegerParameter, text: PhpStringParameter): WP_HTML_Text_Replacement; +} + +export const WP_HTML_Text_Replacement: WP_HTML_Text_Replacement_Constructor; + +export interface WP_HTML_Attribute_Token { + name: unknown; + value_starts_at: unknown; + value_length: unknown; + start: unknown; + length: unknown; + is_true: unknown; +} + +export interface WP_HTML_Attribute_Token_Constructor { + new ( + name: unknown, + valueStart: unknown, + valueLength: unknown, + start: unknown, + length: unknown, + isTrue: unknown, + ): WP_HTML_Attribute_Token; +} + +export const WP_HTML_Attribute_Token: WP_HTML_Attribute_Token_Constructor; + +export interface WP_HTML_Token { + bookmark_name: string | null; + namespace: HtmlNamespace; + node_name: string; + has_self_closing_flag: boolean; + integration_node_type: "math" | "html" | null; + on_destroy: ((bookmarkName: string | null) => void) | null; + destroy(): void; + free(): void; +} + +export interface WP_HTML_Token_Constructor { + new ( + bookmarkName: PhpStringParameter | null, + nodeName: PhpStringParameter, + hasSelfClosingFlag: PhpBooleanParameter, + onDestroy?: ((bookmarkName: string | null) => void) | null, + ): WP_HTML_Token; +} + +export const WP_HTML_Token: WP_HTML_Token_Constructor; + +export interface WP_HTML_Stack_Event { + token: WP_HTML_Token; + operation: "pop" | "push" | string; + provenance: "virtual" | "real" | string; +} + +export interface WP_HTML_Stack_Event_Constructor { + readonly POP: "pop"; + readonly PUSH: "push"; + new (token: WP_HTML_Token, operation: PhpStringParameter, provenance: PhpStringParameter): WP_HTML_Stack_Event; +} + +export const WP_HTML_Stack_Event: WP_HTML_Stack_Event_Constructor; + +export interface WP_HTML_Active_Formatting_Elements { + contains_node(token: WP_HTML_Token): boolean; + count(): number; + current_node(): WP_HTML_Token | null; + insert_marker(): void; + push(token: WP_HTML_Token): void; + remove_node(token: WP_HTML_Token): boolean; + walk_down(): IterableIterator<WP_HTML_Token>; + walk_up(): IterableIterator<WP_HTML_Token>; + clear_up_to_last_marker(): void; +} + +export interface WP_HTML_Active_Formatting_Elements_Constructor { + new (): WP_HTML_Active_Formatting_Elements; +} + +export const WP_HTML_Active_Formatting_Elements: WP_HTML_Active_Formatting_Elements_Constructor; + +export interface WP_HTML_Open_Elements { + stack: WP_HTML_Token[]; + set_pop_handler(handler: (token: WP_HTML_Token) => void): void; + set_push_handler(handler: (token: WP_HTML_Token) => void): void; + at(nth: PhpIntegerParameter): WP_HTML_Token | null; + contains(nodeName: PhpStringParameter): boolean; + contains_node(token: WP_HTML_Token): boolean; + count(): number; + current_node(): WP_HTML_Token | null; + current_node_is(identity: PhpStringParameter): boolean; + has_element_in_specific_scope(tagName: PhpStringParameter, terminationList: unknown): boolean; + has_element_in_scope(tagName: PhpStringParameter): boolean; + has_element_in_list_item_scope(tagName: PhpStringParameter): boolean; + has_element_in_button_scope(tagName: PhpStringParameter): boolean; + has_element_in_table_scope(tagName: PhpStringParameter): boolean; + has_element_in_select_scope(tagName: PhpStringParameter): boolean; + has_p_in_button_scope(): boolean; + pop(): boolean; + pop_until(htmlTagName: PhpStringParameter): boolean; + push(stackItem: WP_HTML_Token): void; + remove_node(token: WP_HTML_Token): boolean; + walk_down(): IterableIterator<WP_HTML_Token>; + walk_up(aboveThisNode?: WP_HTML_Token | null): IterableIterator<WP_HTML_Token>; + after_element_push(item: WP_HTML_Token): void; + after_element_pop(item: WP_HTML_Token): void; + clear_to_table_context(): void; + clear_to_table_body_context(): void; + clear_to_table_row_context(): void; +} + +export interface WP_HTML_Open_Elements_Constructor { + new (): WP_HTML_Open_Elements; +} + +export const WP_HTML_Open_Elements: WP_HTML_Open_Elements_Constructor; + +export interface WP_HTML_Processor_State { + stack_of_template_insertion_modes: string[]; + stack_of_open_elements: WP_HTML_Open_Elements; + active_formatting_elements: WP_HTML_Active_Formatting_Elements; + current_token: WP_HTML_Token | null; + insertion_mode: string; + context_node: null; + encoding: string | null; + encoding_confidence: "tentative" | "certain" | "irrelevant" | string; + head_element: WP_HTML_Token | null; + form_element: WP_HTML_Token | null; + frameset_ok: boolean; +} + +export interface WP_HTML_Processor_State_Constructor { + readonly INSERTION_MODE_INITIAL: "insertion-mode-initial"; + readonly INSERTION_MODE_BEFORE_HTML: "insertion-mode-before-html"; + readonly INSERTION_MODE_BEFORE_HEAD: "insertion-mode-before-head"; + readonly INSERTION_MODE_IN_HEAD: "insertion-mode-in-head"; + readonly INSERTION_MODE_IN_HEAD_NOSCRIPT: "insertion-mode-in-head-noscript"; + readonly INSERTION_MODE_AFTER_HEAD: "insertion-mode-after-head"; + readonly INSERTION_MODE_IN_BODY: "insertion-mode-in-body"; + readonly INSERTION_MODE_IN_TABLE: "insertion-mode-in-table"; + readonly INSERTION_MODE_IN_TABLE_TEXT: "insertion-mode-in-table-text"; + readonly INSERTION_MODE_IN_CAPTION: "insertion-mode-in-caption"; + readonly INSERTION_MODE_IN_COLUMN_GROUP: "insertion-mode-in-column-group"; + readonly INSERTION_MODE_IN_TABLE_BODY: "insertion-mode-in-table-body"; + readonly INSERTION_MODE_IN_ROW: "insertion-mode-in-row"; + readonly INSERTION_MODE_IN_CELL: "insertion-mode-in-cell"; + readonly INSERTION_MODE_IN_SELECT: "insertion-mode-in-select"; + readonly INSERTION_MODE_IN_SELECT_IN_TABLE: "insertion-mode-in-select-in-table"; + readonly INSERTION_MODE_IN_TEMPLATE: "insertion-mode-in-template"; + readonly INSERTION_MODE_AFTER_BODY: "insertion-mode-after-body"; + readonly INSERTION_MODE_IN_FRAMESET: "insertion-mode-in-frameset"; + readonly INSERTION_MODE_AFTER_FRAMESET: "insertion-mode-after-frameset"; + readonly INSERTION_MODE_AFTER_AFTER_BODY: "insertion-mode-after-after-body"; + readonly INSERTION_MODE_AFTER_AFTER_FRAMESET: "insertion-mode-after-after-frameset"; + new (): WP_HTML_Processor_State; +} + +export const WP_HTML_Processor_State: WP_HTML_Processor_State_Constructor; + +export interface WP_HTML_Tag_Processor { + parser_state: ParserState; + compat_mode: "no-quirks-mode" | "quirks-mode"; + parsing_namespace: HtmlNamespace; + text_node_classification: TextNodeClassification; + destroy(): void; + free(): void; + next_tag(query?: string | TagNextTagQuery | null): boolean; + next_token(): boolean; + get_tag(): string | null; + get_attribute(name: PhpInternalStringParameter): string | true | null; + get_attribute_names_with_prefix(prefix: PhpInternalStringParameter): string[] | null; + set_attribute(name: PhpInternalStringParameter, value: PhpStringParameter | boolean | null): boolean; + remove_attribute(name: PhpInternalStringParameter): boolean; + add_class(className: PhpInternalStringParameter): boolean; + remove_class(className: PhpInternalStringParameter): boolean; + has_class(className: PhpInternalStringParameter): boolean | null; + class_list(): string[] | null; + is_tag_closer(): boolean; + has_self_closing_flag(): boolean; + get_token_name(): string | null; + get_token_type(): TokenType | null; + paused_at_incomplete_token(): boolean; + subdivide_text_appropriately(): boolean; + get_modifiable_text(): string; + native_get_script_content_type(): "javascript" | "json" | null; + set_modifiable_text(text: PhpStringParameter): boolean; + get_comment_type(): CommentType | null; + get_doctype_info(): WP_HTML_Doctype_Info | null; + set_bookmark(name: PhpArrayKeyParameter): boolean; + release_bookmark(name: PhpArrayKeyParameter): boolean; + has_bookmark(name: PhpArrayKeyParameter): boolean; + seek(name: PhpArrayKeyParameter): boolean; + change_parsing_namespace(namespaceName: PhpStringParameter): boolean; + get_namespace(): HtmlNamespace; + get_qualified_tag_name(): string | null; + get_qualified_attribute_name(attributeName: PhpInternalStringParameter): string | null; + get_full_comment_text(): string | null; + get_updated_html(flushClassNameUpdates?: boolean): string; + toString(): string; +} + +export interface WP_HTML_Tag_Processor_Constructor { + new (html: unknown): WP_HTML_Tag_Processor; + readonly MAX_BOOKMARKS: number; + readonly MAX_SEEK_OPS: 1000; + readonly ADD_CLASS: true; + readonly REMOVE_CLASS: false; + readonly SKIP_CLASS: null; + readonly STATE_READY: "STATE_READY"; + readonly STATE_COMPLETE: "STATE_COMPLETE"; + readonly STATE_INCOMPLETE_INPUT: "STATE_INCOMPLETE_INPUT"; + readonly STATE_MATCHED_TAG: "STATE_MATCHED_TAG"; + readonly STATE_TEXT_NODE: "STATE_TEXT_NODE"; + readonly STATE_CDATA_NODE: "STATE_CDATA_NODE"; + readonly STATE_COMMENT: "STATE_COMMENT"; + readonly STATE_DOCTYPE: "STATE_DOCTYPE"; + readonly STATE_PRESUMPTUOUS_TAG: "STATE_PRESUMPTUOUS_TAG"; + readonly STATE_FUNKY_COMMENT: "STATE_WP_FUNKY"; + readonly TEXT_IS_GENERIC: "TEXT_IS_GENERIC"; + readonly TEXT_IS_NULL_SEQUENCE: "TEXT_IS_NULL_SEQUENCE"; + readonly TEXT_IS_WHITESPACE: "TEXT_IS_WHITESPACE"; + readonly NO_QUIRKS_MODE: "no-quirks-mode"; + readonly QUIRKS_MODE: "quirks-mode"; + readonly COMMENT_AS_ABRUPTLY_CLOSED_COMMENT: "COMMENT_AS_ABRUPTLY_CLOSED_COMMENT"; + readonly COMMENT_AS_CDATA_LOOKALIKE: "COMMENT_AS_CDATA_LOOKALIKE"; + readonly COMMENT_AS_HTML_COMMENT: "COMMENT_AS_HTML_COMMENT"; + readonly COMMENT_AS_PI_NODE_LOOKALIKE: "COMMENT_AS_PI_NODE_LOOKALIKE"; + readonly COMMENT_AS_INVALID_HTML: "COMMENT_AS_INVALID_HTML"; +} + +export type ProcessorStepMode = + | "process-next-node" + | "reprocess-current-node" + | "process-current-node"; + +export interface SpecialTagInput { + node_name?: PhpStringParameter | null; + nodeName?: PhpStringParameter | null; + tagName?: PhpStringParameter | null; + namespace?: PhpStringParameter | null; + namespaceName?: PhpStringParameter | null; +} + +export interface WP_HTML_Processor_Options { + contextNode?: string; + contextNamespace?: HtmlNamespace; + contextIntegrationNodeType?: "math" | "html" | null; + contextBreadcrumbs?: string[]; + compatMode?: "no-quirks-mode" | "quirks-mode"; + fullParser?: boolean; + htmlFragmentContext?: boolean; + rawTextFragmentContext?: string | null; + encodingConfidence?: EncodingConfidence; + preserveInBodyIgnoredStartTags?: boolean; +} + +export interface WP_HTML_Processor extends WP_HTML_Tag_Processor { + next_tag(query?: string | ProcessorNextTagQuery | null): boolean; + next_token(): boolean; + step(nodeToProcess?: ProcessorStepMode): boolean; + get_last_error(): string | null; + get_unsupported_exception(): WP_HTML_Unsupported_Exception | null; + is_virtual(): boolean; + is_tag_closer(): boolean; + get_namespace(): HtmlNamespace; + expects_closer(node?: WP_HTML_Token | null): boolean | null; + get_breadcrumbs(): string[]; + get_current_depth(): number; + matches_breadcrumbs(breadcrumbs: PhpInternalStringParameter[]): boolean; + set_bookmark(name: PhpInterpolatedStringParameter): boolean; + release_bookmark(name: PhpInterpolatedStringParameter): boolean; + has_bookmark(name: PhpInterpolatedStringParameter): boolean; + seek(name: PhpInterpolatedStringParameter): boolean; + serialize(): string | null; + serialize_token(): string; +} + +export interface WP_HTML_Processor_Constructor extends WP_HTML_Tag_Processor_Constructor { + new (html: unknown, options?: unknown): WP_HTML_Processor; + readonly MAX_BOOKMARKS: 10000; + readonly PROCESS_NEXT_NODE: "process-next-node"; + readonly REPROCESS_CURRENT_NODE: "reprocess-current-node"; + readonly PROCESS_CURRENT_NODE: "process-current-node"; + readonly ERROR_UNSUPPORTED: "unsupported"; + readonly ERROR_EXCEEDED_MAX_BOOKMARKS: "exceeded-max-bookmarks"; + readonly CONSTRUCTOR_UNLOCK_CODE: string; + create_fragment(html: unknown, context?: unknown, encoding?: unknown): WP_HTML_Processor | null; + create_full_parser(html: unknown, encoding?: unknown): WP_HTML_Processor | null; + normalize(html: PhpStringParameter): string | null; + is_void(tagName: PhpInternalStringParameter): boolean; + is_special(tagName: unknown): boolean; +} + +export interface HtmlApi { + WP_HTML_Decoder: WP_HTML_Decoder_Constructor; + WP_HTML_Unsupported_Exception: WP_HTML_Unsupported_Exception_Constructor; + WP_HTML_Span: WP_HTML_Span_Constructor; + WP_HTML_Text_Replacement: WP_HTML_Text_Replacement_Constructor; + WP_HTML_Attribute_Token: WP_HTML_Attribute_Token_Constructor; + WP_HTML_Token: WP_HTML_Token_Constructor; + WP_HTML_Stack_Event: WP_HTML_Stack_Event_Constructor; + WP_HTML_Active_Formatting_Elements: WP_HTML_Active_Formatting_Elements_Constructor; + WP_HTML_Open_Elements: WP_HTML_Open_Elements_Constructor; + WP_HTML_Processor_State: WP_HTML_Processor_State_Constructor; + WP_HTML_Doctype_Info: WP_HTML_Doctype_Info_Constructor; + WP_HTML_Tag_Processor: WP_HTML_Tag_Processor_Constructor; + WP_HTML_Processor: WP_HTML_Processor_Constructor; + scanNextTag(html: unknown, offset?: number | string | boolean | null): ScanNextTagResult | false; + version(): string; + wasm: WpHtmlApiRustWasmExports; +} + +export function loadWasm(input?: WasmInput): Promise<HtmlApi>; +export function createHtmlApi(wasm: WasmExportsInput): HtmlApi; diff --git a/ext/html-api-rust/wasm/wp-html-api-rust.js b/ext/html-api-rust/wasm/wp-html-api-rust.js new file mode 100644 index 0000000000000..020ce80accc48 --- /dev/null +++ b/ext/html-api-rust/wasm/wp-html-api-rust.js @@ -0,0 +1,12646 @@ +const TOKEN_TYPE_TAG = 1; +const TOKEN_TYPE_TEXT = 2; +const TOKEN_TYPE_COMMENT = 3; +const TOKEN_TYPE_DOCTYPE = 4; +const TOKEN_TYPE_CDATA = 5; +const TOKEN_TYPE_PRESUMPTUOUS_TAG = 6; +const TOKEN_TYPE_FUNKY_COMMENT = 7; +const DECODE_CONTEXT_DATA = 0; +const DECODE_CONTEXT_ATTRIBUTE = 1; +const CLASS_UPDATE_ADD = true; +const CLASS_UPDATE_REMOVE = false; + +const STATE_READY = "STATE_READY"; +const STATE_COMPLETE = "STATE_COMPLETE"; +const STATE_INCOMPLETE_INPUT = "STATE_INCOMPLETE_INPUT"; +const STATE_MATCHED_TAG = "STATE_MATCHED_TAG"; +const STATE_TEXT_NODE = "STATE_TEXT_NODE"; +const STATE_CDATA_NODE = "STATE_CDATA_NODE"; +const STATE_COMMENT = "STATE_COMMENT"; +const STATE_DOCTYPE = "STATE_DOCTYPE"; +const STATE_PRESUMPTUOUS_TAG = "STATE_PRESUMPTUOUS_TAG"; +const STATE_FUNKY_COMMENT = "STATE_WP_FUNKY"; + +const COMMENT_TYPES = new Map([ + [1, "COMMENT_AS_ABRUPTLY_CLOSED_COMMENT"], + [2, "COMMENT_AS_CDATA_LOOKALIKE"], + [3, "COMMENT_AS_HTML_COMMENT"], + [4, "COMMENT_AS_PI_NODE_LOOKALIKE"], + [5, "COMMENT_AS_INVALID_HTML"], +]); + +const TOKEN_NAMES = new Map([ + [STATE_TEXT_NODE, "#text"], + [STATE_COMMENT, "#comment"], + [STATE_DOCTYPE, "html"], + [STATE_CDATA_NODE, "#cdata-section"], + [STATE_PRESUMPTUOUS_TAG, "#presumptuous-tag"], + [STATE_FUNKY_COMMENT, "#funky-comment"], +]); + +const TOKEN_TYPES = new Map([ + [STATE_MATCHED_TAG, "#tag"], + [STATE_DOCTYPE, "#doctype"], + [STATE_TEXT_NODE, "#text"], + [STATE_COMMENT, "#comment"], + [STATE_CDATA_NODE, "#cdata-section"], + [STATE_PRESUMPTUOUS_TAG, "#presumptuous-tag"], + [STATE_FUNKY_COMMENT, "#funky-comment"], +]); + +const VOID_ELEMENTS = new Set([ + "AREA", + "BASE", + "BASEFONT", + "BGSOUND", + "BR", + "COL", + "EMBED", + "FRAME", + "HR", + "IMG", + "INPUT", + "KEYGEN", + "LINK", + "META", + "PARAM", + "SOURCE", + "TRACK", + "WBR", +]); + +const SPECIAL_ATOMIC_ELEMENTS = new Set([ + "IFRAME", + "NOEMBED", + "NOFRAMES", + "SCRIPT", + "STYLE", + "TEXTAREA", + "TITLE", + "XMP", +]); +const RAW_TEXT_FRAGMENT_CONTEXT_ELEMENTS = new Set([ + "PLAINTEXT", + "SCRIPT", + "STYLE", + "TEXTAREA", + "TITLE", +]); +const RCDATA_FRAGMENT_CONTEXT_ELEMENTS = new Set(["TEXTAREA", "TITLE"]); +const RAW_TEXT_FRAGMENT_CONTEXT_END_TAGS = "</textarea></title></script></style>"; + +const HEAD_CONTENT_ELEMENTS = new Set([ + "BASE", + "BASEFONT", + "BGSOUND", + "LINK", + "META", + "NOFRAMES", + "NOSCRIPT", + "SCRIPT", + "STYLE", + "TEMPLATE", + "TITLE", +]); +const AFTER_HEAD_TEMPORARY_HEAD_START_TAGS = new Set([ + "BASE", + "BASEFONT", + "BGSOUND", + "LINK", + "META", + "NOFRAMES", + "SCRIPT", + "STYLE", + "TITLE", +]); +const TEMPLATE_HEAD_START_TAGS = new Set([ + "BASE", + "BASEFONT", + "BGSOUND", + "LINK", + "META", + "NOFRAMES", + "SCRIPT", + "STYLE", + "TITLE", +]); +const IN_HEAD_NOSCRIPT_ALLOWED_START_TAGS = new Set([ + "BASEFONT", + "BGSOUND", + "LINK", + "META", + "NOFRAMES", + "STYLE", +]); + +const FOREIGN_CONTENT_HTML_BREAKOUT_START_TAGS = new Set([ + "B", + "BIG", + "BLOCKQUOTE", + "BODY", + "BR", + "CENTER", + "CODE", + "DD", + "DIV", + "DL", + "DT", + "EM", + "EMBED", + "H1", + "H2", + "H3", + "H4", + "H5", + "H6", + "HEAD", + "HR", + "I", + "IMG", + "LI", + "LISTING", + "MENU", + "META", + "NOBR", + "OL", + "P", + "PRE", + "RUBY", + "S", + "SMALL", + "SPAN", + "STRIKE", + "STRONG", + "SUB", + "SUP", + "TABLE", + "TT", + "U", + "UL", + "VAR", +]); +const HEADING_ELEMENTS = new Set(["H1", "H2", "H3", "H4", "H5", "H6"]); +const FORMATTING_ELEMENTS = new Set([ + "A", + "B", + "BIG", + "CODE", + "EM", + "FONT", + "I", + "NOBR", + "S", + "SMALL", + "STRIKE", + "STRONG", + "TT", + "U", +]); +const ADOPTION_AGENCY_END_TAGS = new Set([ + ...FORMATTING_ELEMENTS, + "NOBR", +]); +const ACTIVE_FORMATTING_RECONSTRUCTING_START_TAGS = new Set([ + "APPLET", + "BR", + "MARQUEE", + "MENUITEM", + "OBJECT", + "SPAN", +]); +const ACTIVE_FORMATTING_MARKER_ELEMENTS = new Set(["APPLET", "CAPTION", "MARQUEE", "OBJECT"]); +const FORMATTING_ELEMENT_SPECIAL_PRECLOSURE_START_TAGS = new Set(["ASIDE", "BUTTON", "DIV", "MENU", "NOBR"]); +const FORMATTING_ELEMENT_ANCESTOR_PRECLOSURE_START_TAGS = new Set(["ASIDE", "DIV"]); +const NESTED_ANCHOR_BLOCK_PRECLOSURE_START_TAGS = new Set(["ADDRESS", "BUTTON", "CENTER", "DIV", "LI"]); +const NESTED_ANCHOR_RECONSTRUCTING_START_TAGS = new Set(["STYLE", "TITLE"]); +const FONT_PARAGRAPH_ADOPTION_RECONSTRUCTING_START_TAGS = new Set(["META", "TITLE"]); +const FONT_PARAGRAPH_ADOPTION_SKIPPABLE_END_TAGS = new Set(["I", "TITLE"]); +const IN_BODY_IGNORED_START_TAGS = new Set([ + "CAPTION", + "COL", + "COLGROUP", + "FRAME", + "HEAD", + "TBODY", + "TD", + "TFOOT", + "TH", + "THEAD", + "TR", +]); +const AFTER_HEAD_FRAMESET_IGNORED_START_TAGS = new Set(["PARAM", "SOURCE", "TRACK"]); +const AFTER_HEAD_FRAMESET_IGNORED_CLOSED_START_TAGS = new Set(["MATH", "SVG"]); +const AFTER_HEAD_FRAMESET_IGNORED_OPEN_START_TAGS = new Set(["DIV", "FOREIGNOBJECT", "P", "SVG"]); +const TABLE_SECTION_ELEMENTS = new Set(["TBODY", "TFOOT", "THEAD"]); +const MATHML_TEXT_INTEGRATION_POINT_ELEMENTS = new Set(["MI", "MO", "MN", "MS", "MTEXT"]); +const MATHML_TEXT_INTEGRATION_FOREIGN_START_TAGS = new Set(["MALIGNMARK", "MGLYPH"]); +const SVG_HTML_INTEGRATION_POINT_ELEMENTS = new Set(["DESC", "FOREIGNOBJECT", "TITLE"]); +const MATHML_HTML_INTEGRATION_POINT_ENCODINGS = new Set(["application/xhtml+xml", "text/html"]); +const FOREIGN_CONTENT_START_TAGS = new Set(["MATH", "SVG"]); +const TABLE_TEXT_CURRENT_NODE_ELEMENTS = new Set([ + "COLGROUP", + "TABLE", + "TBODY", + "TEMPLATE", + "TFOOT", + "THEAD", + "TR", +]); +const TABLE_MODE_START_TAGS = new Set([ + "CAPTION", + "COL", + "COLGROUP", + "FORM", + "INPUT", + "SCRIPT", + "STYLE", + "TABLE", + "TBODY", + "TD", + "TEMPLATE", + "TFOOT", + "TH", + "THEAD", + "TR", +]); +const TABLE_MODE_IGNORED_END_TAGS = new Set([ + "BODY", + "CAPTION", + "COL", + "COLGROUP", + "HTML", + "TBODY", + "TD", + "TFOOT", + "TH", + "THEAD", + "TR", +]); +const TABLE_BODY_MODE_IGNORED_END_TAGS = new Set([ + "BODY", + "CAPTION", + "COL", + "COLGROUP", + "HTML", + "TD", + "TH", + "TR", +]); +const TABLE_ROW_MODE_IGNORED_END_TAGS = new Set([ + "BODY", + "CAPTION", + "COL", + "COLGROUP", + "HTML", + "TD", + "TH", +]); +const TABLE_CELL_MODE_IGNORED_END_TAGS = new Set([ + "BODY", + "CAPTION", + "COL", + "COLGROUP", + "HTML", +]); +const TABLE_CELL_ELEMENTS = new Set(["TD", "TH"]); +const FRAMESET_NOT_OK_START_TAGS = new Set([ + "APPLET", + "AREA", + "BODY", + "BR", + "BUTTON", + "DD", + "DT", + "EMBED", + "HR", + "IFRAME", + "IMG", + "KEYGEN", + "LI", + "LISTING", + "MARQUEE", + "OBJECT", + "PRE", + "SELECT", + "TABLE", + "TEXTAREA", + "WBR", + "XMP", +]); +const TEMPLATE_TABLE_WRAPPER_START_TAGS = new Set(["CAPTION", "COLGROUP", "TBODY", "TFOOT", "THEAD"]); +const FORM_TABLE_DESCENDANT_ELEMENTS = new Set([ + "CAPTION", + "COLGROUP", + "TABLE", + "TBODY", + "TD", + "TFOOT", + "TH", + "THEAD", + "TR", +]); +const TABLE_CELL_BOUNDARY_START_TAGS = new Set([ + "CAPTION", + "COL", + "COLGROUP", + "TBODY", + "TD", + "TFOOT", + "TH", + "THEAD", + "TR", +]); +const TABLE_ROW_BOUNDARY_START_TAGS = new Set([ + "CAPTION", + "COL", + "COLGROUP", + "TBODY", + "TFOOT", + "THEAD", + "TR", +]); +const TABLE_SECTION_BOUNDARY_START_TAGS = new Set([ + "CAPTION", + "COL", + "COLGROUP", + "TBODY", + "TFOOT", + "THEAD", +]); +const SELECT_BREAKOUT_START_TAGS = new Set(["INPUT", "KEYGEN", "TEXTAREA"]); +const SELECT_IN_TABLE_BREAKOUT_TAGS = new Set([ + "CAPTION", + "TABLE", + "TBODY", + "TFOOT", + "THEAD", + "TR", + "TD", + "TH", +]); +const SELECT_ALLOWED_START_TAGS = new Set([ + "HTML", + "OPTION", + "OPTGROUP", + "HR", + "SELECT", + ...SELECT_BREAKOUT_START_TAGS, + "SCRIPT", + "TEMPLATE", +]); +const SELECT_ALLOWED_END_TAGS = new Set(["OPTION", "OPTGROUP", "SELECT", "TEMPLATE"]); +const COLGROUP_CLOSING_START_TAGS = new Set([ + "CAPTION", + "COLGROUP", + "TABLE", + "TBODY", + "TD", + "TFOOT", + "TH", + "THEAD", + "TR", +]); +const CAPTION_CLOSING_START_TAGS = new Set([ + "CAPTION", + "COL", + "COLGROUP", + "TBODY", + "TD", + "TFOOT", + "TH", + "THEAD", + "TR", +]); + +const IMPLIED_END_TAG_ELEMENTS = new Set([ + "DD", + "DT", + "LI", + "OPTGROUP", + "OPTION", + "P", + "RB", + "RP", + "RT", + "RTC", +]); +const RUBY_IMPLIED_END_TAG_START_TAGS = new Set([ + "RB", + "RP", + "RT", + "RTC", +]); + +const P_CLOSING_START_TAGS = new Set([ + "ADDRESS", + "ARTICLE", + "ASIDE", + "BLOCKQUOTE", + "CENTER", + "DETAILS", + "DIALOG", + "DIR", + "DD", + "DIV", + "DL", + "DT", + "FIELDSET", + "FIGCAPTION", + "FIGURE", + "FOOTER", + "HEADER", + "HGROUP", + "HR", + "LI", + "LISTING", + "MAIN", + "MENU", + "NAV", + "OL", + "P", + "PLAINTEXT", + "PRE", + "SEARCH", + "SECTION", + "SUMMARY", + "TABLE", + "UL", + "XMP", + ...HEADING_ELEMENTS, +]); + +const FOREIGN_SCOPE_BOUNDARIES = [ + "math MI", + "math MO", + "math MN", + "math MS", + "math MTEXT", + "math ANNOTATION-XML", + "svg FOREIGNOBJECT", + "svg DESC", + "svg TITLE", +]; + +const BUTTON_SCOPE_BOUNDARIES = new Set([ + "APPLET", + "BUTTON", + "CAPTION", + "HTML", + "MARQUEE", + "OBJECT", + "TABLE", + "TD", + "TEMPLATE", + "TH", + ...FOREIGN_SCOPE_BOUNDARIES, +]); + +const DEFAULT_SCOPE_BOUNDARIES = new Set([ + "APPLET", + "CAPTION", + "HTML", + "MARQUEE", + "OBJECT", + "TABLE", + "TD", + "TEMPLATE", + "TH", + ...FOREIGN_SCOPE_BOUNDARIES, +]); + +const LIST_ITEM_SCOPE_BOUNDARIES = new Set([ + "APPLET", + "BLOCKQUOTE", + "BUTTON", + "CAPTION", + "FIELDSET", + "HTML", + "MARQUEE", + "OBJECT", + "OL", + "TABLE", + "TD", + "TEMPLATE", + "TH", + "UL", + ...FOREIGN_SCOPE_BOUNDARIES, +]); + +const END_TAG_SPECIAL_BOUNDARIES = new Set([ + "ADDRESS", + "APPLET", + "AREA", + "ARTICLE", + "ASIDE", + "BASE", + "BASEFONT", + "BGSOUND", + "BLOCKQUOTE", + "BODY", + "BR", + "BUTTON", + "CAPTION", + "CENTER", + "COL", + "COLGROUP", + "DD", + "DETAILS", + "DIR", + "DIV", + "DL", + "DT", + "EMBED", + "FIELDSET", + "FIGCAPTION", + "FIGURE", + "FOOTER", + "FORM", + "FRAME", + "FRAMESET", + ...HEADING_ELEMENTS, + "HEAD", + "HEADER", + "HGROUP", + "HR", + "HTML", + "IFRAME", + "IMG", + "INPUT", + "KEYGEN", + "LI", + "LINK", + "LISTING", + "MAIN", + "MARQUEE", + "MENU", + "META", + "NAV", + "NOEMBED", + "NOFRAMES", + "NOSCRIPT", + "OBJECT", + "OL", + "P", + "PARAM", + "PLAINTEXT", + "PRE", + "SCRIPT", + "SEARCH", + "SECTION", + "SELECT", + "SOURCE", + "STYLE", + "SUMMARY", + "TABLE", + "TBODY", + "TD", + "TEMPLATE", + "TEXTAREA", + "TFOOT", + "TH", + "THEAD", + "TITLE", + "TR", + "TRACK", + "UL", + "WBR", + "XMP", +]); + +const MODELED_SCOPED_END_TAGS = new Set([ + "ADDRESS", + "APPLET", + "ARTICLE", + "ASIDE", + "BLOCKQUOTE", + "BODY", + "BUTTON", + "CENTER", + "DD", + "DETAILS", + "DIALOG", + "DIR", + "DIV", + "DL", + "DT", + "FIELDSET", + "FIGCAPTION", + "FIGURE", + "FOOTER", + "FORM", + "HEADER", + "HGROUP", + "HTML", + "LI", + "LISTING", + "MAIN", + "MARQUEE", + "MENU", + "NAV", + "OBJECT", + "OL", + "P", + "PRE", + "SEARCH", + "SECTION", + "SUMMARY", + "TEMPLATE", + "TD", + "TH", + "UL", + ...HEADING_ELEMENTS, +]); + +const QUIRKS_PUBLIC_IDENTIFIER_PREFIXES = [ + "+//silmaril//dtd html pro v0r11 19970101//", + "-//as//dtd html 3.0 aswedit + extensions//", + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", + "-//ietf//dtd html 2.0 level 1//", + "-//ietf//dtd html 2.0 level 2//", + "-//ietf//dtd html 2.0 strict level 1//", + "-//ietf//dtd html 2.0 strict level 2//", + "-//ietf//dtd html 2.0 strict//", + "-//ietf//dtd html 2.0//", + "-//ietf//dtd html 2.1e//", + "-//ietf//dtd html 3.0//", + "-//ietf//dtd html 3.2 final//", + "-//ietf//dtd html 3.2//", + "-//ietf//dtd html 3//", + "-//ietf//dtd html level 0//", + "-//ietf//dtd html level 1//", + "-//ietf//dtd html level 2//", + "-//ietf//dtd html level 3//", + "-//ietf//dtd html strict level 0//", + "-//ietf//dtd html strict level 1//", + "-//ietf//dtd html strict level 2//", + "-//ietf//dtd html strict level 3//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html//", + "-//metrius//dtd metrius presentational//", + "-//microsoft//dtd internet explorer 2.0 html strict//", + "-//microsoft//dtd internet explorer 2.0 html//", + "-//microsoft//dtd internet explorer 2.0 tables//", + "-//microsoft//dtd internet explorer 3.0 html strict//", + "-//microsoft//dtd internet explorer 3.0 html//", + "-//microsoft//dtd internet explorer 3.0 tables//", + "-//netscape comm. corp.//dtd html//", + "-//netscape comm. corp.//dtd strict html//", + "-//o'reilly and associates//dtd html 2.0//", + "-//o'reilly and associates//dtd html extended 1.0//", + "-//o'reilly and associates//dtd html extended relaxed 1.0//", + "-//sq//dtd html 2.0 hotmetal + extensions//", + "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", + "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", + "-//spyglass//dtd html 2.0 extended//", + "-//sun microsystems corp.//dtd hotjava html//", + "-//sun microsystems corp.//dtd hotjava strict html//", + "-//w3c//dtd html 3 1995-03-24//", + "-//w3c//dtd html 3.2 draft//", + "-//w3c//dtd html 3.2 final//", + "-//w3c//dtd html 3.2//", + "-//w3c//dtd html 3.2s draft//", + "-//w3c//dtd html 4.0 frameset//", + "-//w3c//dtd html 4.0 transitional//", + "-//w3c//dtd html experimental 19960712//", + "-//w3c//dtd html experimental 970421//", + "-//w3c//dtd w3 html//", + "-//w3o//dtd w3 html 3.0//", + "-//webtechs//dtd mozilla html 2.0//", + "-//webtechs//dtd mozilla html//", +]; + +const textEncoder = new TextEncoder(); +const textDecoder = new TextDecoder(); +const DOCTYPE_INFO_INTERNAL = Symbol("WP_HTML_Doctype_Info internal constructor"); + +export class WP_HTML_Unsupported_Exception extends Error { + constructor(message, tokenName, tokenAt, token, stackOfOpenElements, activeFormattingElements) { + const normalizedMessage = phpStringParameterCoerce(message, "message"); + const normalizedTokenName = phpStringParameterCoerce(tokenName, "token_name"); + const normalizedTokenAt = phpIntegerParameterCoerce(tokenAt, "token_at"); + const normalizedToken = phpStringParameterCoerce(token, "token"); + const normalizedStackOfOpenElements = phpArrayParameterCoerce( + stackOfOpenElements, + "stack_of_open_elements", + ); + const normalizedActiveFormattingElements = phpArrayParameterCoerce( + activeFormattingElements, + "active_formatting_elements", + ); + + super(normalizedMessage); + this.name = "WP_HTML_Unsupported_Exception"; + this.token_name = normalizedTokenName; + this.token_at = normalizedTokenAt; + this.token = normalizedToken; + this.stack_of_open_elements = normalizedStackOfOpenElements; + this.active_formatting_elements = normalizedActiveFormattingElements; + } +} + +export class WP_HTML_Span { + constructor(start, length) { + this.start = phpIntegerParameterCoerce(start, "start"); + this.length = phpIntegerParameterCoerce(length, "length"); + } +} + +export class WP_HTML_Text_Replacement { + constructor(start, length, text) { + this.start = phpIntegerParameterCoerce(start, "start"); + this.length = phpIntegerParameterCoerce(length, "length"); + this.text = phpStringParameterCoerce(text, "text"); + } +} + +export class WP_HTML_Attribute_Token { + constructor(name, valueStart, valueLength, start, length, isTrue) { + this.name = name; + this.value_starts_at = valueStart; + this.value_length = valueLength; + this.start = start; + this.length = length; + this.is_true = isTrue; + } +} + +export class WP_HTML_Token { + constructor(bookmarkName, nodeName, hasSelfClosingFlag, onDestroy = null) { + this.bookmark_name = phpStringParameterCoerce(bookmarkName, "bookmark_name", true); + this.namespace = "html"; + this.node_name = phpStringParameterCoerce(nodeName, "node_name"); + this.has_self_closing_flag = phpBooleanParameterCoerce(hasSelfClosingFlag, "has_self_closing_flag"); + this.integration_node_type = null; + if (onDestroy !== null && typeof onDestroy !== "function") { + throw new TypeError("Argument $on_destroy must be callable or null."); + } + this.on_destroy = onDestroy; + } + + destroy() { + if (typeof this.on_destroy === "function") { + this.on_destroy(this.bookmark_name); + } + this.on_destroy = null; + } + + free() { + this.destroy(); + } +} + +export class WP_HTML_Stack_Event { + static POP = "pop"; + static PUSH = "push"; + + constructor(token, operation, provenance) { + this.token = phpTokenParameterCoerce(token, "token"); + this.operation = phpStringParameterCoerce(operation, "operation"); + this.provenance = phpStringParameterCoerce(provenance, "provenance"); + } +} + +export class WP_HTML_Active_Formatting_Elements { + #stack = []; + + contains_node(token) { + const normalizedToken = phpTokenParameterCoerce(token, "token"); + for (const item of this.walk_up()) { + if (normalizedToken.bookmark_name === item.bookmark_name) { + return true; + } + } + return false; + } + + count() { + return this.#stack.length; + } + + current_node() { + return this.#stack.at(-1) ?? null; + } + + insert_marker() { + this.push(new WP_HTML_Token(null, "marker", false)); + } + + push(token) { + this.#stack.push(phpTokenParameterCoerce(token, "token")); + } + + remove_node(token) { + const normalizedToken = phpTokenParameterCoerce(token, "token"); + for (let i = this.#stack.length - 1; i >= 0; i -= 1) { + if (normalizedToken.bookmark_name !== this.#stack[i].bookmark_name) { + continue; + } + this.#stack.splice(i, 1); + return true; + } + return false; + } + + *walk_down() { + for (const item of this.#stack) { + yield item; + } + } + + *walk_up() { + for (let i = this.#stack.length - 1; i >= 0; i -= 1) { + yield this.#stack[i]; + } + } + + clear_up_to_last_marker() { + while (this.#stack.length > 0) { + const item = this.#stack.pop(); + if (item.node_name === "marker") { + break; + } + } + } +} + +export class WP_HTML_Open_Elements { + stack = []; + + #hasPInButtonScope = false; + #popHandler = null; + #pushHandler = null; + + set_pop_handler(handler) { + if (typeof handler !== "function") { + throw new TypeError("Argument $handler must be callable."); + } + this.#popHandler = handler; + } + + set_push_handler(handler) { + if (typeof handler !== "function") { + throw new TypeError("Argument $handler must be callable."); + } + this.#pushHandler = handler; + } + + at(nth) { + let remaining = phpIntegerParameterCoerce(nth, "nth"); + for (const item of this.walk_down()) { + remaining -= 1; + if (remaining === 0) { + return item; + } + } + return null; + } + + contains(nodeName) { + const normalizedNodeName = phpStringParameterCoerce(nodeName, "node_name"); + for (const item of this.walk_up()) { + if (normalizedNodeName === item.node_name) { + return true; + } + } + return false; + } + + contains_node(token) { + const normalizedToken = phpTokenParameterCoerce(token, "token"); + for (const item of this.walk_up()) { + if (normalizedToken === item) { + return true; + } + } + return false; + } + + count() { + return this.stack.length; + } + + current_node() { + return this.stack.at(-1) ?? null; + } + + current_node_is(identity) { + const normalizedIdentity = phpStringParameterCoerce(identity, "identity"); + const currentNode = this.current_node(); + if (currentNode === null) { + return false; + } + + const currentNodeName = currentNode.node_name; + return ( + currentNodeName === normalizedIdentity || + (normalizedIdentity === "#doctype" && currentNodeName === "html") || + (normalizedIdentity === "#tag" && /^[A-Z]+$/.test(currentNodeName)) + ); + } + + has_element_in_specific_scope(tagName, terminationList) { + const normalizedTagName = phpStringParameterCoerce(tagName, "tag_name"); + let terminationSet = null; + for (const node of this.walk_up()) { + const namespacedName = openElementNamespacedName(node); + + if (namespacedName === normalizedTagName) { + return true; + } + + if ( + normalizedTagName === "(internal: H1 through H6 - do not use)" && + HEADING_ELEMENTS.has(namespacedName) + ) { + return true; + } + + if (terminationSet === null) { + terminationSet = new Set(phpArrayParameterCoerce(terminationList, "termination_list")); + } + + if (terminationSet.has(namespacedName)) { + return false; + } + } + + return false; + } + + has_element_in_scope(tagName) { + const normalizedTagName = phpStringParameterCoerce(tagName, "tag_name"); + return this.has_element_in_specific_scope(normalizedTagName, [ + "APPLET", + "CAPTION", + "HTML", + "TABLE", + "TD", + "TH", + "MARQUEE", + "OBJECT", + "TEMPLATE", + ...FOREIGN_SCOPE_BOUNDARIES, + ]); + } + + has_element_in_list_item_scope(tagName) { + const normalizedTagName = phpStringParameterCoerce(tagName, "tag_name"); + return this.has_element_in_specific_scope(normalizedTagName, [ + "APPLET", + "BUTTON", + "CAPTION", + "HTML", + "TABLE", + "TD", + "TH", + "MARQUEE", + "OBJECT", + "OL", + "TEMPLATE", + "UL", + ...FOREIGN_SCOPE_BOUNDARIES, + ]); + } + + has_element_in_button_scope(tagName) { + const normalizedTagName = phpStringParameterCoerce(tagName, "tag_name"); + return this.has_element_in_specific_scope(normalizedTagName, [ + "APPLET", + "BUTTON", + "CAPTION", + "HTML", + "TABLE", + "TD", + "TH", + "MARQUEE", + "OBJECT", + "TEMPLATE", + ...FOREIGN_SCOPE_BOUNDARIES, + ]); + } + + has_element_in_table_scope(tagName) { + const normalizedTagName = phpStringParameterCoerce(tagName, "tag_name"); + return this.has_element_in_specific_scope(normalizedTagName, ["HTML", "TABLE", "TEMPLATE"]); + } + + has_element_in_select_scope(tagName) { + const normalizedTagName = phpStringParameterCoerce(tagName, "tag_name"); + for (const node of this.walk_up()) { + if (node.node_name === normalizedTagName) { + return true; + } + + if (node.node_name !== "OPTION" && node.node_name !== "OPTGROUP") { + return false; + } + } + + return false; + } + + has_p_in_button_scope() { + return this.#hasPInButtonScope; + } + + pop() { + const item = this.stack.pop(); + if (item === undefined) { + return false; + } + + this.after_element_pop(item); + return true; + } + + pop_until(htmlTagName) { + const normalizedHtmlTagName = phpStringParameterCoerce(htmlTagName, "html_tag_name"); + while (this.stack.length > 0) { + const item = this.current_node(); + this.pop(); + + if (item.namespace !== "html") { + continue; + } + + if ( + normalizedHtmlTagName === "(internal: H1 through H6 - do not use)" && + HEADING_ELEMENTS.has(item.node_name) + ) { + return true; + } + + if (normalizedHtmlTagName === item.node_name) { + return true; + } + } + + return false; + } + + push(stackItem) { + const normalizedStackItem = phpTokenParameterCoerce(stackItem, "stack_item"); + this.stack.push(normalizedStackItem); + this.after_element_push(normalizedStackItem); + } + + remove_node(token) { + const normalizedToken = phpTokenParameterCoerce(token, "token"); + for (let i = this.stack.length - 1; i >= 0; i -= 1) { + const item = this.stack[i]; + if (normalizedToken.bookmark_name !== item.bookmark_name) { + continue; + } + + this.stack.splice(i, 1); + this.after_element_pop(item); + return true; + } + return false; + } + + *walk_down() { + for (const item of this.stack) { + yield item; + } + } + + *walk_up(aboveThisNode = null) { + const normalizedAboveThisNode = aboveThisNode === null + ? null + : phpTokenParameterCoerce(aboveThisNode, "above_this_node"); + let hasFoundNode = normalizedAboveThisNode === null; + for (let i = this.stack.length - 1; i >= 0; i -= 1) { + const node = this.stack[i]; + + if (!hasFoundNode) { + hasFoundNode = node === normalizedAboveThisNode; + continue; + } + + yield node; + } + } + + after_element_push(item) { + const normalizedItem = phpTokenParameterCoerce(item, "item"); + switch (openElementNamespacedName(normalizedItem)) { + case "APPLET": + case "BUTTON": + case "CAPTION": + case "HTML": + case "TABLE": + case "TD": + case "TH": + case "MARQUEE": + case "OBJECT": + case "TEMPLATE": + case "math MI": + case "math MO": + case "math MN": + case "math MS": + case "math MTEXT": + case "math ANNOTATION-XML": + case "svg FOREIGNOBJECT": + case "svg DESC": + case "svg TITLE": + this.#hasPInButtonScope = false; + break; + + case "P": + this.#hasPInButtonScope = true; + break; + } + + if (typeof this.#pushHandler === "function") { + this.#pushHandler(normalizedItem); + } + } + + after_element_pop(item) { + const normalizedItem = phpTokenParameterCoerce(item, "item"); + switch (openElementNamespacedName(normalizedItem)) { + case "APPLET": + case "BUTTON": + case "CAPTION": + case "HTML": + case "P": + case "TABLE": + case "TD": + case "TH": + case "MARQUEE": + case "OBJECT": + case "TEMPLATE": + case "math MI": + case "math MO": + case "math MN": + case "math MS": + case "math MTEXT": + case "math ANNOTATION-XML": + case "svg FOREIGNOBJECT": + case "svg DESC": + case "svg TITLE": + this.#hasPInButtonScope = this.has_element_in_button_scope("P"); + break; + } + + if (typeof this.#popHandler === "function") { + this.#popHandler(normalizedItem); + } + } + + clear_to_table_context() { + while (this.stack.length > 0) { + const item = this.current_node(); + if (["TABLE", "TEMPLATE", "HTML"].includes(item.node_name)) { + break; + } + this.pop(); + } + } + + clear_to_table_body_context() { + while (this.stack.length > 0) { + const item = this.current_node(); + if (["TBODY", "TFOOT", "THEAD", "TEMPLATE", "HTML"].includes(item.node_name)) { + break; + } + this.pop(); + } + } + + clear_to_table_row_context() { + while (this.stack.length > 0) { + const item = this.current_node(); + if (["TR", "TEMPLATE", "HTML"].includes(item.node_name)) { + break; + } + this.pop(); + } + } +} + +function openElementNamespacedName(token) { + return token.namespace === "html" ? token.node_name : `${token.namespace} ${token.node_name}`; +} + +export class WP_HTML_Processor_State { + static INSERTION_MODE_INITIAL = "insertion-mode-initial"; + static INSERTION_MODE_BEFORE_HTML = "insertion-mode-before-html"; + static INSERTION_MODE_BEFORE_HEAD = "insertion-mode-before-head"; + static INSERTION_MODE_IN_HEAD = "insertion-mode-in-head"; + static INSERTION_MODE_IN_HEAD_NOSCRIPT = "insertion-mode-in-head-noscript"; + static INSERTION_MODE_AFTER_HEAD = "insertion-mode-after-head"; + static INSERTION_MODE_IN_BODY = "insertion-mode-in-body"; + static INSERTION_MODE_IN_TABLE = "insertion-mode-in-table"; + static INSERTION_MODE_IN_TABLE_TEXT = "insertion-mode-in-table-text"; + static INSERTION_MODE_IN_CAPTION = "insertion-mode-in-caption"; + static INSERTION_MODE_IN_COLUMN_GROUP = "insertion-mode-in-column-group"; + static INSERTION_MODE_IN_TABLE_BODY = "insertion-mode-in-table-body"; + static INSERTION_MODE_IN_ROW = "insertion-mode-in-row"; + static INSERTION_MODE_IN_CELL = "insertion-mode-in-cell"; + static INSERTION_MODE_IN_SELECT = "insertion-mode-in-select"; + static INSERTION_MODE_IN_SELECT_IN_TABLE = "insertion-mode-in-select-in-table"; + static INSERTION_MODE_IN_TEMPLATE = "insertion-mode-in-template"; + static INSERTION_MODE_AFTER_BODY = "insertion-mode-after-body"; + static INSERTION_MODE_IN_FRAMESET = "insertion-mode-in-frameset"; + static INSERTION_MODE_AFTER_FRAMESET = "insertion-mode-after-frameset"; + static INSERTION_MODE_AFTER_AFTER_BODY = "insertion-mode-after-after-body"; + static INSERTION_MODE_AFTER_AFTER_FRAMESET = "insertion-mode-after-after-frameset"; + + constructor() { + this.stack_of_template_insertion_modes = []; + this.stack_of_open_elements = new WP_HTML_Open_Elements(); + this.active_formatting_elements = new WP_HTML_Active_Formatting_Elements(); + this.current_token = null; + this.insertion_mode = WP_HTML_Processor_State.INSERTION_MODE_INITIAL; + this.context_node = null; + this.encoding = null; + this.encoding_confidence = "tentative"; + this.head_element = null; + this.form_element = null; + this.frameset_ok = true; + } +} + +export class WP_HTML_Doctype_Info { + constructor(name, publicIdentifier, systemIdentifier, forceQuirksFlag, internalToken = null) { + if (internalToken !== DOCTYPE_INFO_INTERNAL) { + throw new TypeError("WP_HTML_Doctype_Info constructor is private."); + } + + const normalizedName = phpStringParameterCoerce(name, "name", true); + const normalizedPublicIdentifier = phpStringParameterCoerce(publicIdentifier, "public_identifier", true); + const normalizedSystemIdentifier = phpStringParameterCoerce(systemIdentifier, "system_identifier", true); + const normalizedForceQuirksFlag = phpBooleanParameterCoerce(forceQuirksFlag, "force_quirks_flag"); + + this.name = normalizedName; + this.public_identifier = normalizedPublicIdentifier; + this.system_identifier = normalizedSystemIdentifier; + this.indicated_compatibility_mode = doctypeCompatibilityMode( + normalizedName, + normalizedPublicIdentifier, + normalizedSystemIdentifier, + normalizedForceQuirksFlag, + ); + } + + static from_doctype_token(doctypeHtml) { + let doctype = phpStringParameterCoerce(doctypeHtml, "doctype_html"); + let end = doctype.length - 1; + + if (end < 9 || !asciiStartsWithAt(doctype, "<!DOCTYPE", 0)) { + return null; + } + + let at = 9; + if (doctype[end] !== ">" || doctype.indexOf(">", at) < end) { + return null; + } + + doctype = doctype.replace(/\r\n/g, "\n").replace(/\r/g, "\n"); + end = doctype.length - 1; + at = skipHtmlWhitespace(doctype, at, end); + + if (at >= end) { + return createDoctypeInfo(null, null, null, true); + } + + const nameStart = at; + while (at < end && !isHtmlWhitespaceCode(doctype.charCodeAt(at))) { + at += 1; + } + const name = replaceNulls(doctype.slice(nameStart, at).toLowerCase()); + + at = skipHtmlWhitespace(doctype, at, end); + if (at >= end) { + return createDoctypeInfo(name, null, null, false); + } + + if (at + 6 >= end) { + return createDoctypeInfo(name, null, null, true); + } + + if (asciiStartsWithAt(doctype, "PUBLIC", at)) { + at = skipHtmlWhitespace(doctype, at + 6, end); + if (at >= end) { + return createDoctypeInfo(name, null, null, true); + } + return parsePublicIdentifier(doctype, at, end, name); + } + + if (asciiStartsWithAt(doctype, "SYSTEM", at)) { + at = skipHtmlWhitespace(doctype, at + 6, end); + if (at >= end) { + return createDoctypeInfo(name, null, null, true); + } + return parseSystemIdentifier(doctype, at, end, name, null); + } + + return createDoctypeInfo(name, null, null, true); + } +} + +async function wasmSourceFromInput(input) { + input = await input; + + if (isWebAssemblyInstantiatedSource(input)) { + return input.instance; + } + + if (isWebAssemblyExports(input)) { + return input; + } + + if (input instanceof WebAssembly.Instance) { + return input; + } + + if (input instanceof WebAssembly.Module) { + return input; + } + + if (input instanceof ArrayBuffer) { + return input; + } + + if (ArrayBuffer.isView(input)) { + return input instanceof Uint8Array + ? input + : new Uint8Array(input.buffer, input.byteOffset, input.byteLength); + } + + if (input instanceof URL) { + if (input.protocol === "file:" && isNodeLikeRuntime()) { + return nodeFileUrlBytes(input); + } + if (typeof fetch === "function") { + return fetchWasmSource(input); + } + throw new TypeError("Unsupported WASM input."); + } + + if (typeof Response === "function" && input instanceof Response) { + return responseWasmSource(input); + } + + if (typeof Blob === "function" && input instanceof Blob) { + return input.arrayBuffer(); + } + + if (typeof Request === "function" && input instanceof Request) { + if (typeof fetch === "function") { + return fetchWasmSource(input); + } + throw new TypeError("Unsupported WASM input."); + } + + if (typeof input === "string") { + if (/^file:\/\//.test(input) && isNodeLikeRuntime()) { + return nodeFileUrlBytes(input); + } + + if (typeof fetch === "function" && (/^https?:\/\//.test(input) || !isNodeLikeRuntime())) { + return fetchWasmSource(input); + } + + const { readFile } = await import("node:fs/promises"); + return readFile(input); + } + + throw new TypeError("Unsupported WASM input."); +} + +async function nodeFileUrlBytes(input) { + const [{ readFile }, { fileURLToPath }] = await Promise.all([ + import("node:fs/promises"), + import("node:url"), + ]); + return readFile(fileURLToPath(input)); +} + +function isWebAssemblyInstantiatedSource(input) { + return input !== null && + typeof input === "object" && + input.instance instanceof WebAssembly.Instance; +} + +function isNodeLikeRuntime() { + return typeof process === "object" && process !== null && Boolean(process.versions?.node); +} + +async function fetchWasmSource(input) { + return responseWasmSource(await fetch(input)); +} + +async function responseWasmSource(response) { + if (!response.ok) { + throw new Error(`Failed to load WASM: ${response.status} ${response.statusText}`); + } + + if ( + typeof WebAssembly.instantiateStreaming === "function" && + typeof Response === "function" && + response instanceof Response && + typeof response.clone === "function" + ) { + try { + return (await WebAssembly.instantiateStreaming(response.clone(), {})).instance; + } catch { + // Fall through to byte-buffer loading when streaming compilation is unavailable for the response. + } + } + + return response.arrayBuffer(); +} + +export async function loadWasm(input = new URL("./dist/wp_html_api_rust_core.wasm", import.meta.url)) { + const source = await wasmSourceFromInput(input); + if (isWebAssemblyExports(source)) { + return createHtmlApi(source); + } + if (source instanceof WebAssembly.Instance) { + return createHtmlApi(source.exports); + } + const module = source instanceof WebAssembly.Module ? source : await WebAssembly.compile(source); + const instance = await WebAssembly.instantiate(module, {}); + + return createHtmlApi(instance.exports); +} + +export function createHtmlApi(wasm) { + const wasmExports = wasmExportsFromInput(wasm); + const runtime = new WasmRuntime(wasmExports); + + class WP_HTML_Decoder { + static attribute_starts_with(haystack, searchText, caseSensitivity = "case-sensitive") { + if (typeof haystack !== "string" || typeof searchText !== "string") { + return phpAttributeStartsWithNonStringScalar(haystack, searchText); + } + + return runtime.decoderAttributeStartsWith( + haystack, + searchText, + caseSensitivity === "ascii-case-insensitive", + ); + } + + static decode_text_node(text) { + return runtime.decoderDecode("data", phpStringParameterCoerce(text, "text")); + } + + static decode_attribute(text) { + return runtime.decoderDecode("attribute", phpStringParameterCoerce(text, "text")); + } + + static decode(context, text) { + return runtime.decoderDecode( + context, + phpStringParameterCoerce(text, "text"), + ); + } + + static read_character_reference(context, text, at = 0, matchByteLength = null) { + return runtime.decoderReadCharacterReference( + context, + phpInternalStringCoerce(text, "text"), + at, + matchByteLength, + ); + } + + static code_point_to_utf8_bytes(codePoint) { + return runtime.decoderCodePointToUtf8Bytes(codePoint); + } + } + + class WP_HTML_Tag_Processor { + static MAX_BOOKMARKS = 10; + static MAX_SEEK_OPS = 1000; + static ADD_CLASS = true; + static REMOVE_CLASS = false; + static SKIP_CLASS = null; + static STATE_READY = STATE_READY; + static STATE_COMPLETE = STATE_COMPLETE; + static STATE_INCOMPLETE_INPUT = STATE_INCOMPLETE_INPUT; + static STATE_MATCHED_TAG = STATE_MATCHED_TAG; + static STATE_TEXT_NODE = STATE_TEXT_NODE; + static STATE_CDATA_NODE = STATE_CDATA_NODE; + static STATE_COMMENT = STATE_COMMENT; + static STATE_DOCTYPE = STATE_DOCTYPE; + static STATE_PRESUMPTUOUS_TAG = STATE_PRESUMPTUOUS_TAG; + static STATE_FUNKY_COMMENT = STATE_FUNKY_COMMENT; + static TEXT_IS_GENERIC = "TEXT_IS_GENERIC"; + static TEXT_IS_NULL_SEQUENCE = "TEXT_IS_NULL_SEQUENCE"; + static TEXT_IS_WHITESPACE = "TEXT_IS_WHITESPACE"; + static NO_QUIRKS_MODE = "no-quirks-mode"; + static QUIRKS_MODE = "quirks-mode"; + static COMMENT_AS_ABRUPTLY_CLOSED_COMMENT = "COMMENT_AS_ABRUPTLY_CLOSED_COMMENT"; + static COMMENT_AS_CDATA_LOOKALIKE = "COMMENT_AS_CDATA_LOOKALIKE"; + static COMMENT_AS_HTML_COMMENT = "COMMENT_AS_HTML_COMMENT"; + static COMMENT_AS_PI_NODE_LOOKALIKE = "COMMENT_AS_PI_NODE_LOOKALIKE"; + static COMMENT_AS_INVALID_HTML = "COMMENT_AS_INVALID_HTML"; + #reportIncompleteTokens = true; + #pausedAtJsIncompleteToken = false; + #classNameUpdates = new Map(); + + constructor(html, options = {}) { + if (typeof html !== "string") { + html = ""; + } + + this.parser_state = STATE_READY; + this.compat_mode = WP_HTML_Tag_Processor.NO_QUIRKS_MODE; + this.parsing_namespace = "html"; + this.text_node_classification = WP_HTML_Tag_Processor.TEXT_IS_GENERIC; + this.comment_type = null; + this.bookmarks = new Map(); + this.seek_count = 0; + this.#reportIncompleteTokens = options.reportIncompleteTokens !== false; + this.#pausedAtJsIncompleteToken = false; + + const input = runtime.encode(html); + const allocated = runtime.allocBytes(input); + try { + this.pointer = wasm.wp_html_api_rust_tag_processor_new(allocated.ptr, input.length); + } finally { + runtime.freeBytes(allocated); + } + + if (!this.pointer) { + throw new Error("Failed to initialize WP_HTML_Tag_Processor WASM state."); + } + + this.html = this.get_updated_html(); + } + + destroy() { + if (this.pointer) { + wasm.wp_html_api_rust_tag_processor_free(this.pointer); + this.pointer = 0; + } + } + + free() { + this.destroy(); + } + + next_tag(query = undefined) { + this.#ensureLive(); + this.#syncLexicalUpdates(); + + let tagName = null; + let className = null; + let matchOffset = 1; + let visitClosers = false; + + if (typeof query === "string") { + tagName = query; + } else if (query && typeof query === "object") { + if (typeof query.tag_name === "string") { + tagName = query.tag_name; + } + if (typeof query.class_name === "string") { + className = query.class_name; + } + if (Number.isInteger(query.match_offset) && query.match_offset > 0) { + matchOffset = query.match_offset; + } + visitClosers = query.tag_closers === "visit" || query.visit_closers === true; + } + + const normalizedTagName = tagName === null ? null : asciiUpper(tagName); + + let found = 0; + while (this.next_token()) { + if (this.get_token_type() !== "#tag") { + continue; + } + + if (this.is_tag_closer() && !visitClosers) { + continue; + } + + if (normalizedTagName !== null && asciiUpper(this.get_tag() ?? "") !== normalizedTagName) { + continue; + } + + if (className !== null && this.has_class(className) !== true) { + continue; + } + + found += 1; + if (found < matchOffset) { + continue; + } + + return true; + } + + return false; + } + + next_token() { + this.#ensureLive(); + this.#syncLexicalUpdates(); + this.#pausedAtJsIncompleteToken = false; + + if (wasm.wp_html_api_rust_tag_processor_next_token(this.pointer)) { + this.#updateParserStateFromNative(); + if (this.#reportIncompleteTokens && this.#currentTokenIsIncompleteAtEof()) { + this.#pausedAtJsIncompleteToken = true; + this.parser_state = STATE_INCOMPLETE_INPUT; + return false; + } + return true; + } + + this.parser_state = wasm.wp_html_api_rust_tag_processor_paused_at_incomplete(this.pointer) + ? STATE_INCOMPLETE_INPUT + : STATE_COMPLETE; + return false; + } + + get_tag() { + this.#ensureLive(); + if (![STATE_MATCHED_TAG, STATE_COMMENT].includes(this.parser_state)) { + return null; + } + + if (this.parser_state === STATE_COMMENT && wasm.wp_html_api_rust_tag_processor_current_comment_type(this.pointer) !== 4) { + return null; + } + + const tagName = runtime.readOutputString((out) => ( + wasm.wp_html_api_rust_tag_processor_get_tag(this.pointer, out) + )); + + if (tagName === null) { + return null; + } + + return this.parser_state === STATE_COMMENT && wasm.wp_html_api_rust_tag_processor_current_comment_type(this.pointer) === 4 + ? tagName + : asciiUpper(tagName); + } + + get_attribute(name) { + this.#ensureLive(); + if (this.parser_state !== STATE_MATCHED_TAG) { + return null; + } + + const attributeName = phpInternalStringCoerce(name, "name"); + if (attributeName === "class") { + this.#flushClassNameUpdates(); + } + + return this.#readNativeAttribute(attributeName); + } + + get_attribute_names_with_prefix(prefix) { + this.#ensureLive(); + if (this.parser_state !== STATE_MATCHED_TAG) { + return null; + } + + if (this.is_tag_closer() || this.#isRawTagCloser()) { + return null; + } + + const attributePrefix = phpInternalStringCoerce(prefix, "prefix"); + return runtime.withEncoded(attributePrefix, ({ ptr, len }) => runtime.withOutSlice((out) => { + const result = wasm.wp_html_api_rust_tag_processor_get_attribute_names_with_prefix(this.pointer, ptr, len, out); + if (result === 0) { + return null; + } + const bytes = runtime.readBytesFromOut(out); + return splitNullSeparatedAscii(bytes); + })); + } + + set_attribute(name, value) { + this.#ensureLive(); + if (this.parser_state !== STATE_MATCHED_TAG) { + return false; + } + + if (this.is_tag_closer() || this.#isRawTagCloser()) { + return false; + } + + const attributeName = phpInternalStringCoerce(name, "name"); + if (!isValidAttributeName(attributeName)) { + return false; + } + + let valueKind = 2; + let encodedValue = new Uint8Array(); + if (value === false) { + valueKind = 0; + } else if (value === true) { + valueKind = 1; + } else if (value === null) { + return false; + } else { + encodedValue = runtime.encode(phpStringParameterCoerce(value, "value")); + } + + const result = this.#setNativeAttribute(attributeName, encodedValue, valueKind); + if (result && asciiLower(attributeName) === "class") { + this.#classNameUpdates.clear(); + } + return result; + } + + remove_attribute(name) { + this.#ensureLive(); + if (this.parser_state !== STATE_MATCHED_TAG) { + return false; + } + + if (this.is_tag_closer() || this.#isRawTagCloser()) { + return false; + } + + const attributeName = phpInternalStringCoerce(name, "name"); + if (asciiLower(attributeName) === "class") { + this.#classNameUpdates.clear(); + } + return this.#removeNativeAttribute(attributeName); + } + + add_class(className) { + this.#ensureLive(); + if (this.parser_state !== STATE_MATCHED_TAG) { + return false; + } + + if (this.is_tag_closer() || this.#isRawTagCloser()) { + return false; + } + + const normalizedClassName = phpClassUpdateKey(className, "class_name"); + this.#queueClassNameUpdate(normalizedClassName, CLASS_UPDATE_ADD); + return true; + } + + remove_class(className) { + this.#ensureLive(); + if (this.parser_state !== STATE_MATCHED_TAG) { + return false; + } + + if (this.is_tag_closer() || this.#isRawTagCloser()) { + return false; + } + + const normalizedClassName = phpClassUpdateKey(className, "class_name"); + this.#queueClassNameUpdate(normalizedClassName, CLASS_UPDATE_REMOVE); + return true; + } + + has_class(className) { + this.#ensureLive(); + if (this.parser_state !== STATE_MATCHED_TAG) { + return null; + } + + const wantedClass = phpInternalStringCoerce(className, "wanted_class"); + if (this.is_tag_closer() || this.#isRawTagCloser()) { + return false; + } + + this.#flushClassNameUpdates(); + return runtime.withEncoded(wantedClass, ({ ptr, len }) => { + const result = wasm.wp_html_api_rust_tag_processor_has_class(this.pointer, ptr, len, this.#isQuirksMode()); + return result === 0 ? null : result === 2; + }); + } + + class_list() { + this.#ensureLive(); + if (this.parser_state !== STATE_MATCHED_TAG) { + return null; + } + + if (this.is_tag_closer() || this.#isRawTagCloser()) { + return []; + } + + this.#flushClassNameUpdates(); + return runtime.withOutSlice((out) => { + if (wasm.wp_html_api_rust_tag_processor_class_list(this.pointer, out, this.#isQuirksMode()) === 0) { + return null; + } + + return splitUnitSeparatedString(runtime.readStringFromOut(out)); + }); + } + + is_tag_closer() { + this.#ensureLive(); + if (this.parser_state !== STATE_MATCHED_TAG) { + return false; + } + return Boolean(wasm.wp_html_api_rust_tag_processor_is_tag_closer(this.pointer)); + } + + has_self_closing_flag() { + this.#ensureLive(); + if (this.parser_state !== STATE_MATCHED_TAG) { + return false; + } + return Boolean(wasm.wp_html_api_rust_tag_processor_has_self_closing_flag(this.pointer)); + } + + get_token_name() { + this.#ensureLive(); + if (TOKEN_NAMES.has(this.parser_state)) { + return TOKEN_NAMES.get(this.parser_state); + } + return this.parser_state === STATE_MATCHED_TAG ? this.get_tag() : null; + } + + get_token_type() { + this.#ensureLive(); + return TOKEN_TYPES.get(this.parser_state) ?? null; + } + + paused_at_incomplete_token() { + this.#ensureLive(); + return this.#pausedAtJsIncompleteToken || + Boolean(wasm.wp_html_api_rust_tag_processor_paused_at_incomplete(this.pointer)); + } + + subdivide_text_appropriately() { + this.#ensureLive(); + if (this.parser_state !== STATE_TEXT_NODE) { + return false; + } + + this.text_node_classification = WP_HTML_Tag_Processor.TEXT_IS_GENERIC; + const classification = wasm.wp_html_api_rust_tag_processor_subdivide_text_appropriately(this.pointer); + if (classification === 1) { + this.text_node_classification = WP_HTML_Tag_Processor.TEXT_IS_NULL_SEQUENCE; + return true; + } + if (classification === 2) { + this.text_node_classification = WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE; + return true; + } + return false; + } + + get_modifiable_text() { + this.#ensureLive(); + if ( + ![ + STATE_MATCHED_TAG, + STATE_TEXT_NODE, + STATE_CDATA_NODE, + STATE_COMMENT, + STATE_DOCTYPE, + STATE_PRESUMPTUOUS_TAG, + STATE_FUNKY_COMMENT, + ].includes(this.parser_state) + ) { + return ""; + } + + return runtime.readOutputString((out) => ( + wasm.wp_html_api_rust_tag_processor_get_modifiable_text(this.pointer, out) + )) ?? ""; + } + + native_get_script_content_type() { + this.#ensureLive(); + if ( + this.parser_state !== STATE_MATCHED_TAG || + this.get_tag() !== "SCRIPT" || + this.get_namespace() !== "html" + ) { + return null; + } + + switch (wasm.wp_html_api_rust_tag_processor_script_content_type(this.pointer)) { + case 1: + return "javascript"; + case 2: + return "json"; + default: + return null; + } + } + + set_modifiable_text(text) { + this.#ensureLive(); + const plaintextContent = phpStringParameterCoerce(text, "plaintext_content"); + if (![STATE_MATCHED_TAG, STATE_TEXT_NODE, STATE_COMMENT].includes(this.parser_state)) { + return false; + } + + return this.#mutateCurrentToken(() => runtime.withEncoded(plaintextContent, ({ ptr, len }) => ( + wasm.wp_html_api_rust_tag_processor_set_modifiable_text(this.pointer, ptr, len) + ))); + } + + get_comment_type() { + this.#ensureLive(); + if (this.parser_state !== STATE_COMMENT) { + return null; + } + + return COMMENT_TYPES.get(wasm.wp_html_api_rust_tag_processor_current_comment_type(this.pointer)) ?? null; + } + + get_doctype_info() { + this.#ensureLive(); + if (this.parser_state !== STATE_DOCTYPE) { + return null; + } + + return WP_HTML_Doctype_Info.from_doctype_token(this.#currentTokenString()); + } + + set_bookmark(name) { + this.#ensureLive(); + if ( + this.parser_state === STATE_COMPLETE || + this.parser_state === STATE_INCOMPLETE_INPUT + ) { + return false; + } + + const bookmarkName = phpArrayKeyParameterCoerce(name, "name"); + const maxBookmarks = this.constructor.MAX_BOOKMARKS ?? WP_HTML_Tag_Processor.MAX_BOOKMARKS; + if (this.bookmarks.size >= maxBookmarks && !this.bookmarks.has(bookmarkName)) { + return false; + } + + const span = this.#currentSpan(); + if (!span) { + return false; + } + + this.bookmarks.set(bookmarkName, span); + return true; + } + + release_bookmark(name) { + return this.bookmarks.delete(phpArrayKeyParameterCoerce(name, "name")); + } + + has_bookmark(name) { + return this.bookmarks.has(phpArrayKeyParameterCoerce(name, "name")); + } + + seek(name) { + this.#ensureLive(); + this.#pausedAtJsIncompleteToken = false; + const bookmarkName = phpArrayKeyParameterCoerce(name, "name"); + if (!this.bookmarks.has(bookmarkName)) { + return false; + } + + const bookmark = this.bookmarks.get(bookmarkName); + const currentSpan = this.#currentSpan(); + if ( + currentSpan && + currentSpan.start === bookmark.start && + currentSpan.length === bookmark.length + ) { + this.#updateParserStateFromNative(); + return true; + } + + if (this.seek_count >= WP_HTML_Tag_Processor.MAX_SEEK_OPS) { + return false; + } + + this.seek_count += 1; + this.#syncLexicalUpdates(); + const updatedBookmark = this.bookmarks.get(bookmarkName); + wasm.wp_html_api_rust_tag_processor_seek(this.pointer, updatedBookmark.start); + if (!wasm.wp_html_api_rust_tag_processor_next_token(this.pointer)) { + this.parser_state = wasm.wp_html_api_rust_tag_processor_paused_at_incomplete(this.pointer) + ? STATE_INCOMPLETE_INPUT + : STATE_COMPLETE; + return false; + } + + this.#updateParserStateFromNative(); + return true; + } + + change_parsing_namespace(namespaceName) { + this.#ensureLive(); + const normalizedNamespaceName = phpStringParameterCoerce(namespaceName, "new_namespace"); + if (!["html", "math", "svg"].includes(normalizedNamespaceName)) { + return false; + } + + this.parsing_namespace = normalizedNamespaceName; + wasm.wp_html_api_rust_tag_processor_set_namespace(this.pointer, normalizedNamespaceName === "html" ? 0 : 1); + return true; + } + + get_namespace() { + return this.parsing_namespace; + } + + get_qualified_tag_name() { + const tagName = this.get_tag(); + if (tagName === null || this.parsing_namespace === "html") { + return tagName; + } + + const lower = asciiLower(tagName); + return this.parsing_namespace === "svg" ? qualifySvgTagName(lower) : lower; + } + + get_qualified_attribute_name(attributeName) { + if (this.parser_state !== STATE_MATCHED_TAG) { + return null; + } + if (attributeName === null) { + return null; + } + + const normalizedAttributeName = phpInternalStringCoerce(attributeName, "attribute_name"); + if (this.parsing_namespace === "html") { + return normalizedAttributeName; + } + return qualifyForeignAttributeName(this.parsing_namespace, normalizedAttributeName); + } + + get_full_comment_text() { + if (![STATE_COMMENT, STATE_FUNKY_COMMENT].includes(this.parser_state)) { + return null; + } + + const text = this.get_modifiable_text(); + if (text === null || this.parser_state === STATE_FUNKY_COMMENT) { + return text; + } + + switch (wasm.wp_html_api_rust_tag_processor_current_comment_type(this.pointer)) { + case 1: + case 3: + return text; + case 2: + return `[CDATA[${text}]]`; + case 4: { + const tagName = this.get_tag(); + return tagName === null ? null : `?${tagName}${text}?`; + } + case 5: { + const token = this.#currentTokenBytes(); + return token && token[1] === 0x3f ? `?${text}` : text; + } + default: + return null; + } + } + + get_updated_html(flushClassNameUpdates = true) { + this.#ensureLive(); + if (flushClassNameUpdates) { + this.#flushClassNameUpdates(); + } + return runtime.readOutputString((out) => ( + wasm.wp_html_api_rust_tag_processor_get_html(this.pointer, out) + )) ?? ""; + } + + toString() { + return this.get_updated_html(); + } + + #updateParserStateFromNative() { + this.text_node_classification = WP_HTML_Tag_Processor.TEXT_IS_GENERIC; + const tokenType = wasm.wp_html_api_rust_tag_processor_current_token_type(this.pointer); + switch (tokenType) { + case TOKEN_TYPE_TAG: + this.parser_state = STATE_MATCHED_TAG; + break; + case TOKEN_TYPE_TEXT: + this.parser_state = STATE_TEXT_NODE; + break; + case TOKEN_TYPE_COMMENT: + this.parser_state = STATE_COMMENT; + break; + case TOKEN_TYPE_DOCTYPE: + this.parser_state = STATE_DOCTYPE; + break; + case TOKEN_TYPE_CDATA: + this.parser_state = STATE_CDATA_NODE; + break; + case TOKEN_TYPE_PRESUMPTUOUS_TAG: + this.parser_state = STATE_PRESUMPTUOUS_TAG; + break; + case TOKEN_TYPE_FUNKY_COMMENT: + this.parser_state = STATE_FUNKY_COMMENT; + break; + default: + this.parser_state = STATE_READY; + } + this.comment_type = this.get_comment_type(); + } + + #currentTokenIsIncompleteAtEof() { + const span = this.#currentSpan(); + if (span === null || span.start + span.length !== this.html.length) { + return false; + } + + const tokenType = this.get_token_type(); + const tokenHtml = this.html.slice(span.start); + if ( + (tokenType === "#comment" || tokenType === "#funky-comment") && + incompleteBogusCommentAtEof(tokenHtml) + ) { + return true; + } + + if ( + tokenType !== "#tag" || + this.get_namespace() !== "html" || + this.is_tag_closer() + ) { + return false; + } + + const tagName = this.get_tag(); + if (!SPECIAL_ATOMIC_ELEMENTS.has(tagName)) { + return false; + } + + const startTag = completeStartTagAt(this.html, span.start); + return startTag !== null && + startTag.tagName === tagName && + findSpecialAtomicCloserEnd(this.html, startTag.end, tagName) === null; + } + + #currentSpan() { + return runtime.withOutPair((startPtr, lengthPtr) => { + if (!wasm.wp_html_api_rust_tag_processor_current_span(this.pointer, startPtr, lengthPtr)) { + return null; + } + return { + start: runtime.readU32(startPtr), + length: runtime.readU32(lengthPtr), + }; + }); + } + + #currentTokenBytes() { + const span = this.#currentSpan(); + if (!span) { + return null; + } + + const html = runtime.readOutputBytes((out) => ( + wasm.wp_html_api_rust_tag_processor_get_html(this.pointer, out) + )); + return html === null ? null : html.slice(span.start, span.start + span.length); + } + + #currentTokenString() { + const token = this.#currentTokenBytes(); + return token === null ? "" : textDecoder.decode(token); + } + + #isRawTagCloser() { + const token = this.#currentTokenBytes(); + return token !== null && token.length >= 2 && token[0] === 0x3c && token[1] === 0x2f; + } + + #readNativeAttribute(attributeName) { + return runtime.withEncoded(attributeName, ({ ptr, len }) => runtime.withOutSlice((out) => { + const result = wasm.wp_html_api_rust_tag_processor_get_attribute(this.pointer, ptr, len, out); + if (result === 0) { + return null; + } + if (result === 1) { + return true; + } + return runtime.readStringFromOut(out); + })); + } + + #setNativeAttribute(attributeName, encodedValue, valueKind) { + return this.#mutateCurrentToken(() => runtime.withEncoded(attributeName, (nameBytes) => ( + runtime.withBytes(encodedValue, (valueBytes) => ( + wasm.wp_html_api_rust_tag_processor_set_attribute( + this.pointer, + nameBytes.ptr, + nameBytes.len, + valueBytes.ptr, + valueBytes.len, + valueKind, + ) + )) + ))); + } + + #removeNativeAttribute(attributeName) { + return this.#mutateCurrentToken(() => runtime.withEncoded(attributeName, ({ ptr, len }) => ( + wasm.wp_html_api_rust_tag_processor_remove_attribute(this.pointer, ptr, len) + ))); + } + + #queueClassNameUpdate(className, operation) { + if (this.#isQuirksMode()) { + for (const queued of this.#classNameUpdates.values()) { + if ( + queued.name.length === className.name.length && + asciiLower(queued.name) === asciiLower(className.name) + ) { + queued.operation = operation; + return; + } + } + } + + this.#classNameUpdates.set(classUpdateMapKey(className), { + isIntegerKey: className.isIntegerKey, + name: className.name, + operation, + }); + } + + #flushClassNameUpdates() { + if (this.#classNameUpdates.size === 0 || this.parser_state !== STATE_MATCHED_TAG) { + return false; + } + + const updates = Array.from(this.#classNameUpdates.values()); + this.#classNameUpdates.clear(); + + let existingClass = this.#readNativeAttribute("class"); + if (existingClass === null || existingClass === true) { + existingClass = ""; + } + + const result = applyClassNameUpdates(existingClass, updates, this.#isQuirksMode()); + if (!result.modified) { + return false; + } + + if (result.className.length > 0) { + return this.#setNativeAttribute("class", runtime.encode(result.className), 2); + } + return this.#removeNativeAttribute("class"); + } + + #mutateCurrentToken(callback) { + const oldSpan = this.#currentSpan(); + const result = Boolean(callback()); + if (!result) { + return false; + } + + const newSpan = this.#currentSpan(); + if (oldSpan && newSpan) { + this.#adjustBookmarks(oldSpan, newSpan); + } + this.html = this.get_updated_html(false); + return true; + } + + #adjustBookmarks(oldSpan, newSpan) { + let delta = newSpan.length - oldSpan.length; + if (oldSpan.start !== newSpan.start && delta === 0) { + delta = newSpan.start - oldSpan.start; + } + if (delta === 0 && oldSpan.length === newSpan.length) { + return; + } + + for (const [name, bookmark] of this.bookmarks.entries()) { + if (bookmark.start === oldSpan.start) { + this.bookmarks.set(name, { start: bookmark.start, length: newSpan.length }); + } else if (bookmark.start > oldSpan.start) { + this.bookmarks.set(name, { start: bookmark.start + delta, length: bookmark.length }); + } + } + } + + #syncLexicalUpdates() { + this.html = this.get_updated_html(); + } + + #isQuirksMode() { + return this.compat_mode === WP_HTML_Tag_Processor.QUIRKS_MODE; + } + + #ensureLive() { + if (!this.pointer) { + throw new Error("WP_HTML_Tag_Processor has been destroyed."); + } + } + } + + class WP_HTML_Processor extends WP_HTML_Tag_Processor { + static MAX_BOOKMARKS = 10000; + static PROCESS_NEXT_NODE = "process-next-node"; + static REPROCESS_CURRENT_NODE = "reprocess-current-node"; + static PROCESS_CURRENT_NODE = "process-current-node"; + static ERROR_UNSUPPORTED = "unsupported"; + static ERROR_EXCEEDED_MAX_BOOKMARKS = "exceeded-max-bookmarks"; + static CONSTRUCTOR_UNLOCK_CODE = "Use WP_HTML_Processor::create_fragment() instead of calling the class constructor directly."; + + constructor(html, options = undefined) { + if ( + options === undefined || + options === null || + typeof options !== "object" || + Array.isArray(options) + ) { + options = typeof html === "string" + ? { + fullParser: true, + encodingConfidence: "certain", + } + : {}; + } + + super(html, { reportIncompleteTokens: false }); + this.last_error = null; + this.unsupported_exception = null; + this.current_virtual = null; + this.current_synthetic_token = null; + this.synthetic_eof_comment_consumed = false; + this.delayed_synthetic_tokens = []; + this.virtual_tokens = []; + this.pending_real_token = false; + this.pending_real_parser_state = null; + this.paragraph_adoption_preclosed_formatting_elements = []; + this.special_start_adoption_preclosed_formatting_elements = []; + this.deep_anchor_reconstructed_div_start_offsets = new Set(); + this.table_nobr_reconstructed_start_offsets = new Set(); + this.deferred_table_opener = null; + this.deferred_table_child_openers = []; + this.pending_foreign_table_fostered_text_table_index = null; + this.pending_nested_anchor_outer_closer_after_deferred_table_index = null; + this.pending_nested_anchor_active_removal_after_deferred_table = false; + this.pending_nested_anchor_div_active_removal_after_deferred_table = false; + this.skip_current_token = false; + this.is_html_fragment_context = Boolean(options.htmlFragmentContext); + this.raw_text_fragment_context = options.rawTextFragmentContext ?? null; + this.raw_text_fragment_consumed = false; + this.raw_text_fragment_updated_html = null; + this.plaintext_pending = false; + this.plaintext_content_start = null; + this.plaintext_text_consumed = false; + this.plaintext_updated_html = null; + this.is_full_parser = Boolean(options.fullParser || this.is_html_fragment_context); + this.encoding_confidence = options.encodingConfidence ?? (this.is_full_parser ? "tentative" : "irrelevant"); + this.full_parser_insertion_mode = this.is_html_fragment_context + ? "before_head" + : this.is_full_parser ? "initial" : "in_body"; + this.full_parser_scaffolded = !this.is_full_parser || this.is_html_fragment_context; + this.full_parser_seen_doctype = false; + this.frameset_ok = true; + this.pre_frameset_paragraph_ignored = false; + this.pre_frameset_ignored_element_depth = 0; + this.form_element_pointer = null; + this.preserve_in_body_ignored_start_tags = Boolean(options.preserveInBodyIgnoredStartTags); + this.context_node = options.contextNode ?? "BODY"; + this.context_namespace = options.contextNamespace ?? contextNamespace(this.context_node); + this.context_integration_node_type = options.contextIntegrationNodeType ?? null; + this.context_breadcrumbs = options.contextBreadcrumbs ?? ( + this.is_full_parser || this.is_html_fragment_context ? [] : [this.context_node] + ); + this.open_elements = this.is_html_fragment_context ? ["HTML"] : this.is_full_parser ? [] : ["HTML", this.context_node]; + this.open_element_namespaces = this.is_html_fragment_context ? ["html"] : this.is_full_parser ? [] : ["html", this.context_namespace]; + this.open_element_integration_node_types = this.is_html_fragment_context ? [null] : this.is_full_parser ? [] : [null, this.context_integration_node_type]; + this.open_element_foster_parented_table_indices = this.open_elements.map(() => null); + this.detached_context_breadcrumbs = []; + this.detached_breadcrumbs = []; + this.active_formatting_elements = []; + this.ignored_select_formatting_elements = new Map(); + this.template_insertion_modes = []; + this.base_open_element_count = this.open_elements.length; + this.temporary_reopened_head = false; + this.breadcrumbs = this.#breadcrumbStack(); + this.current_namespace = this.is_html_fragment_context + ? "html" + : this.#childNamespaceForStackEntry( + this.context_node, + this.context_namespace, + this.context_integration_node_type, + ); + this.current_token_namespace = this.current_namespace; + this.compat_mode = options.compatMode ?? this.compat_mode; + super.change_parsing_namespace(this.current_namespace); + } + + static create_fragment(html, context = "<body>", encoding = "UTF-8") { + if (encoding !== "UTF-8" || typeof html !== "string" || typeof context !== "string") { + return null; + } + + const plaintextContextNode = WP_HTML_Processor.#plaintextFragmentContextNode(context); + if (plaintextContextNode !== null) { + return new this(html, { + compatMode: WP_HTML_Tag_Processor.NO_QUIRKS_MODE, + contextNode: plaintextContextNode, + contextNamespace: "html", + fullParser: false, + rawTextFragmentContext: plaintextContextNode, + }); + } + + // Context discovery must preserve otherwise ignored table-context tags like TR and TD. + const contextProcessor = new this(`<!DOCTYPE html>${context}${RAW_TEXT_FRAGMENT_CONTEXT_END_TAGS}`, { + fullParser: true, + encodingConfidence: "certain", + preserveInBodyIgnoredStartTags: true, + }); + + let contextNode = null; + let contextNamespaceName = null; + let contextIntegrationNodeType = null; + const contextBreadcrumbs = []; + while (contextProcessor.next_tag()) { + if (!contextProcessor.is_virtual() && !contextProcessor.is_tag_closer()) { + contextNode = contextProcessor.get_tag(); + contextNamespaceName = contextProcessor.get_namespace(); + contextIntegrationNodeType = contextProcessor.#integrationNodeTypeForCurrentStartTag( + contextNode, + contextNamespaceName, + ); + contextBreadcrumbs.push(contextNode); + } + } + + const compatMode = contextProcessor.compat_mode; + contextProcessor.destroy(); + if (contextNode === null || contextNamespaceName === null) { + return null; + } + + if ( + contextNamespaceName === "html" && + RAW_TEXT_FRAGMENT_CONTEXT_ELEMENTS.has(contextNode) + ) { + return new this(html, { + compatMode, + contextNode, + contextNamespace: contextNamespaceName, + contextIntegrationNodeType, + contextBreadcrumbs, + fullParser: false, + rawTextFragmentContext: contextNode, + }); + } + + if ( + contextNamespaceName === "html" && + ( + VOID_ELEMENTS.has(contextNode) || + SPECIAL_ATOMIC_ELEMENTS.has(contextNode) || + contextNode === "PLAINTEXT" + ) + ) { + return null; + } + + if (contextNamespaceName === "html" && contextNode === "HTML") { + return new this(html, { + compatMode, + contextNode, + contextNamespace: contextNamespaceName, + contextIntegrationNodeType, + contextBreadcrumbs, + fullParser: false, + htmlFragmentContext: true, + encodingConfidence: "irrelevant", + }); + } + + return new this(html, { + compatMode, + contextNode, + contextNamespace: contextNamespaceName, + contextIntegrationNodeType, + contextBreadcrumbs, + fullParser: false, + }); + } + + static create_full_parser(html, encoding = "UTF-8") { + if (encoding !== "UTF-8" || typeof html !== "string") { + return null; + } + + return new this(html, { + fullParser: true, + encodingConfidence: "certain", + }); + } + + static #plaintextFragmentContextNode(context) { + const contextProcessor = new WP_HTML_Tag_Processor(context); + let contextNode = null; + let startTagCount = 0; + while (contextProcessor.next_tag()) { + if (!contextProcessor.is_tag_closer()) { + startTagCount += 1; + contextNode = normalizeTagNameForNamespace(contextProcessor.get_tag(), "html"); + } + } + contextProcessor.destroy(); + return startTagCount === 1 && contextNode === "PLAINTEXT" ? contextNode : null; + } + + static normalize(html) { + const normalizedHtml = phpStringParameterCoerce(html, "html"); + const processor = this.create_fragment(normalizedHtml); + if (processor === null) { + return null; + } + + try { + return processor.serialize(); + } finally { + processor.destroy(); + } + } + + static is_void(tagName) { + return VOID_ELEMENTS.has(asciiUpper(phpInternalStringCoerce(tagName, "tag_name"))); + } + + static is_special(tagName) { + const normalized = normalizeSpecialTagInput(tagName); + return isSpecialBoundary(normalized.nodeName, normalized.namespaceName); + } + + next_tag(query = null) { + const visitClosers = Boolean( + query && + typeof query === "object" && + (query.tag_closers === "visit" || query.visit_closers === true) + ); + + if (query === null) { + while (this.next_token()) { + if (this.get_token_type() !== "#tag") { + continue; + } + + if (!this.is_tag_closer() || visitClosers) { + return true; + } + } + return false; + } + + if (typeof query === "string") { + query = { breadcrumbs: [query] }; + } + + if (!query || typeof query !== "object") { + return false; + } + + const needsTag = query.tag_name == null + ? null + : asciiUpper(phpStringParameterCoerce(query.tag_name, "tag_name")); + const needsClass = typeof query.class_name === "string" ? query.class_name : null; + const hasBreadcrumbs = Array.isArray(query.breadcrumbs); + + if (!hasBreadcrumbs) { + while (this.next_token()) { + if (this.get_token_type() !== "#tag") { + continue; + } + + if (this.is_tag_closer() && !visitClosers) { + continue; + } + + if (needsTag !== null && this.get_token_name() !== needsTag) { + continue; + } + + if (needsClass !== null && this.has_class(needsClass) !== true) { + continue; + } + + return true; + } + + return false; + } + + let remaining = query.match_offset == null ? 1 : phpIntegerCast(query.match_offset); + if (remaining < 1) { + return false; + } + + while (remaining > 0 && this.next_token()) { + if (this.get_token_type() !== "#tag") { + continue; + } + + if (this.is_tag_closer()) { + if (!visitClosers || hasBreadcrumbs) { + continue; + } + } + + if (needsClass !== null && this.has_class(needsClass) !== true) { + continue; + } + + if (hasBreadcrumbs && !this.matches_breadcrumbs(query.breadcrumbs)) { + continue; + } + + remaining -= 1; + } + + return remaining === 0; + } + + step(nodeToProcess = WP_HTML_Processor.PROCESS_NEXT_NODE) { + if (this.last_error !== null) { + return false; + } + + if (nodeToProcess === WP_HTML_Processor.PROCESS_NEXT_NODE) { + return this.next_token(); + } + + if ( + nodeToProcess === WP_HTML_Processor.REPROCESS_CURRENT_NODE || + nodeToProcess === WP_HTML_Processor.PROCESS_CURRENT_NODE + ) { + return ( + this.parser_state !== STATE_READY && + this.parser_state !== STATE_COMPLETE && + this.parser_state !== STATE_INCOMPLETE_INPUT + ); + } + + return false; + } + + next_token() { + if (this.last_error !== null) { + return false; + } + + if (this.current_synthetic_token !== null) { + this.current_synthetic_token = null; + } + + if (this.delayed_synthetic_tokens.length > 0) { + return this.#consumeDelayedSyntheticToken(); + } + + if (this.virtual_tokens.length > 0) { + return this.#consumeVirtualToken(); + } + + if (this.pending_real_token) { + this.current_virtual = null; + this.pending_real_token = false; + if (this.pending_real_parser_state !== null) { + this.parser_state = this.pending_real_parser_state; + this.pending_real_parser_state = null; + } + this.skip_current_token = false; + this.#subdivideCurrentTextToken(); + this.#updateTreeStateForCurrentToken(true); + if (this.last_error !== null) { + return false; + } + if (this.pending_real_token && this.virtual_tokens.length > 0) { + return this.#consumeVirtualToken(); + } + if (this.skip_current_token) { + return this.next_token(); + } + return true; + } + + if (this.#consumeRawTextFragmentToken()) { + return true; + } + + if (this.raw_text_fragment_context !== null) { + this.breadcrumbs = this.#breadcrumbStack(); + return false; + } + + if (this.#consumePlaintextTextToken()) { + return true; + } + + if (this.plaintext_content_start !== null) { + if (this.#queueFullParserMissingBodyAtEof()) { + return this.#consumeVirtualToken(); + } + + if (this.#queueFosteredElementPopsBeforeDeferredTable()) { + return this.#consumeVirtualToken(); + } + + if (this.#consumeDeferredTableOpener()) { + return true; + } + + if (this.#queueEofVirtualClosers()) { + return this.#consumeVirtualToken(); + } + + this.breadcrumbs = this.#breadcrumbStack(); + return false; + } + + this.current_virtual = null; + while (super.next_token()) { + this.skip_current_token = false; + this.#subdivideCurrentTextToken(); + this.#updateTreeStateForCurrentToken(true); + if (this.last_error !== null) { + return false; + } + if (this.pending_real_token && this.virtual_tokens.length > 0) { + return this.#consumeVirtualToken(); + } + if (!this.skip_current_token) { + return true; + } + if (this.#nextDelayedSyntheticTokenIsTableOpener()) { + return this.#consumeDelayedSyntheticToken(); + } + if (this.virtual_tokens.length > 0) { + return this.#consumeVirtualToken(); + } + this.current_virtual = null; + } + + if (super.paused_at_incomplete_token() && !this.#incompleteTokenIsEofComment()) { + if (this.#skipIncompleteFullParserEndTag()) { + return this.next_token(); + } + + if (this.#skipIncompleteFullParserQuotedStartTag()) { + return this.next_token(); + } + + if (this.#skipIncompleteFullParserStartTag()) { + return this.next_token(); + } + + if (this.#skipIncompleteSelectBreakoutStartTag()) { + return this.next_token(); + } + + this.breadcrumbs = this.#breadcrumbStack(); + return false; + } + + if (this.#shouldConsumeEofCommentBeforeMissingBody() && this.#consumeFullParserEofComment()) { + return true; + } + + if (this.is_full_parser && !this.full_parser_scaffolded) { + this.full_parser_scaffolded = true; + if (!this.full_parser_seen_doctype) { + this.compat_mode = WP_HTML_Tag_Processor.QUIRKS_MODE; + } + this.#queueFullParserScaffold(); + return this.#consumeVirtualToken(); + } + + if (this.#queueFosteredElementPopsBeforeDeferredTable()) { + return this.#consumeVirtualToken(); + } + + if (this.#consumeDeferredTableOpener()) { + return true; + } + + if (this.#queueFullParserMissingBodyAtEof()) { + return this.#consumeVirtualToken(); + } + + if (this.#consumeFullParserEofComment()) { + return true; + } + + if (this.#queueEofVirtualClosers()) { + return this.#consumeVirtualToken(); + } + + this.breadcrumbs = this.#breadcrumbStack(); + return false; + } + + get_last_error() { + return this.last_error; + } + + get_unsupported_exception() { + return this.unsupported_exception; + } + + is_virtual() { + return this.current_virtual !== null; + } + + #isSyntheticToken() { + return this.current_synthetic_token !== null; + } + + is_tag_closer() { + if (this.is_virtual()) { + return this.current_virtual.operation === "pop"; + } + return this.#isSyntheticToken() ? false : super.is_tag_closer(); + } + + get_tag() { + if (this.is_virtual()) { + return this.current_virtual.tagName; + } + + if (this.#isSyntheticToken()) { + return this.current_synthetic_token.tagName ?? null; + } + + return normalizeTagNameForNamespace(super.get_tag(), this.current_token_namespace); + } + + get_attribute(name) { + if (this.is_virtual()) { + return this.#getVirtualAttribute(name); + } + return this.#isSyntheticToken() ? this.#getSyntheticAttribute(name) : super.get_attribute(name); + } + + get_attribute_names_with_prefix(prefix) { + if (this.is_virtual()) { + return this.#getVirtualAttributeNamesWithPrefix(prefix); + } + return this.#isSyntheticToken() ? this.#getSyntheticAttributeNamesWithPrefix(prefix) : super.get_attribute_names_with_prefix(prefix); + } + + set_attribute(name, value) { + return this.is_virtual() || this.#isSyntheticToken() ? false : super.set_attribute(name, value); + } + + remove_attribute(name) { + return this.is_virtual() || this.#isSyntheticToken() ? false : super.remove_attribute(name); + } + + add_class(className) { + return this.is_virtual() || this.#isSyntheticToken() ? false : super.add_class(className); + } + + remove_class(className) { + return this.is_virtual() || this.#isSyntheticToken() ? false : super.remove_class(className); + } + + has_class(className) { + if (this.is_virtual()) { + return this.#virtualHasClass(className); + } + return this.#isSyntheticToken() ? null : super.has_class(className); + } + + class_list() { + if (this.is_virtual()) { + return this.#virtualClassList(); + } + return this.#isSyntheticToken() ? null : super.class_list(); + } + + has_self_closing_flag() { + if (this.is_virtual()) { + return false; + } + return this.#isSyntheticToken() + ? Boolean(this.current_synthetic_token.hasSelfClosingFlag) + : super.has_self_closing_flag(); + } + + get_token_name() { + if (this.is_virtual()) { + return this.current_virtual.tagName; + } + return this.#isSyntheticToken() ? this.current_synthetic_token.tokenName : super.get_token_name(); + } + + get_token_type() { + if (this.is_virtual()) { + return "#tag"; + } + return this.#isSyntheticToken() ? this.current_synthetic_token.tokenType : super.get_token_type(); + } + + paused_at_incomplete_token() { + return this.synthetic_eof_comment_consumed || this.raw_text_fragment_context !== null + ? false + : super.paused_at_incomplete_token(); + } + + get_comment_type() { + if (this.is_virtual()) { + return null; + } + if (this.#isSyntheticToken()) { + return this.current_synthetic_token.tokenType === "#comment" + ? WP_HTML_Tag_Processor.COMMENT_AS_HTML_COMMENT + : null; + } + return super.get_comment_type(); + } + + get_full_comment_text() { + if (this.#isSyntheticToken()) { + return this.current_synthetic_token.tokenType === "#comment" + ? this.current_synthetic_token.commentText + : null; + } + return super.get_full_comment_text(); + } + + get_doctype_info() { + return this.is_virtual() || this.#isSyntheticToken() ? null : super.get_doctype_info(); + } + + subdivide_text_appropriately() { + return this.is_virtual() || this.#isSyntheticToken() ? false : super.subdivide_text_appropriately(); + } + + get_modifiable_text() { + if (this.is_virtual()) { + return ""; + } + if (this.#isSyntheticToken() && this.current_synthetic_token.tokenType === "#tag") { + return SPECIAL_ATOMIC_ELEMENTS.has(this.current_synthetic_token.tagName) + ? this.current_synthetic_token.modifiableText ?? "" + : ""; + } + return this.#isSyntheticToken() + ? this.current_synthetic_token.modifiableText ?? this.current_synthetic_token.commentText ?? "" + : super.get_modifiable_text(); + } + + set_modifiable_text(text) { + const plaintextContent = phpStringParameterCoerce(text, "plaintext_content"); + if (this.#isSyntheticToken()) { + if ( + this.current_synthetic_token.tokenType !== "#text" || + this.current_synthetic_token.readOnly + ) { + return false; + } + + this.current_synthetic_token.modifiableText = replaceNulls(plaintextContent); + if (this.raw_text_fragment_context !== null) { + this.raw_text_fragment_updated_html = this.#serializeTextToken(); + } + if (this.plaintext_content_start !== null) { + this.plaintext_updated_html = this.#serializeTextToken(); + } + return true; + } + + if ( + this.is_virtual() || + ( + this.parser_state === STATE_MATCHED_TAG && + this.get_namespace() !== "html" + ) + ) { + return false; + } + + return super.set_modifiable_text(plaintextContent); + } + + get_updated_html(flushClassNameUpdates = true) { + if (this.plaintext_updated_html != null && this.plaintext_content_start != null) { + return super.get_updated_html(flushClassNameUpdates).slice(0, this.plaintext_content_start) + this.plaintext_updated_html; + } + const html = super.get_updated_html(flushClassNameUpdates); + return this.raw_text_fragment_updated_html ?? html; + } + + set_bookmark(name) { + if (this.is_virtual() || this.#isSyntheticToken()) { + return false; + } + + const bookmarkName = phpInterpolatedStringCoerce(name, "bookmark_name"); + const bookmarkKey = phpArrayKeyParameterCoerce(bookmarkName, "bookmark_name"); + if (!super.set_bookmark(bookmarkName)) { + return false; + } + + this.bookmarks.set(bookmarkKey, { + ...this.bookmarks.get(bookmarkKey), + processorState: this.#snapshotProcessorState(), + }); + return true; + } + + release_bookmark(name) { + return super.release_bookmark(phpInterpolatedStringCoerce(name, "bookmark_name")); + } + + has_bookmark(name) { + return super.has_bookmark(phpInterpolatedStringCoerce(name, "bookmark_name")); + } + + seek(name) { + const bookmarkName = phpInterpolatedStringCoerce(name, "bookmark_name"); + const bookmarkKey = phpArrayKeyParameterCoerce(bookmarkName, "bookmark_name"); + const bookmark = this.bookmarks.get(bookmarkKey); + if (!bookmark || !super.seek(bookmarkName)) { + return false; + } + + if (bookmark.processorState) { + this.#restoreProcessorState(bookmark.processorState); + } + return true; + } + + change_parsing_namespace(namespaceName) { + const normalizedNamespaceName = phpStringParameterCoerce(namespaceName, "new_namespace"); + if (!super.change_parsing_namespace(normalizedNamespaceName)) { + return false; + } + + this.current_namespace = normalizedNamespaceName; + if ( + this.parser_state === STATE_READY || + this.parser_state === STATE_COMPLETE || + this.parser_state === STATE_INCOMPLETE_INPUT + ) { + this.current_token_namespace = namespaceName; + } + return true; + } + + get_namespace() { + return this.current_token_namespace; + } + + get_qualified_tag_name() { + const tagName = this.get_tag(); + if (tagName === null || this.get_namespace() === "html") { + return tagName; + } + + const lower = asciiLower(tagName); + return this.get_namespace() === "svg" ? qualifySvgTagName(lower) : lower; + } + + get_qualified_attribute_name(attributeName) { + if (this.get_token_type() !== "#tag") { + return null; + } + if (attributeName === null) { + return null; + } + + const normalizedAttributeName = phpInternalStringCoerce(attributeName, "attribute_name"); + return this.get_namespace() === "html" + ? normalizedAttributeName + : qualifyForeignAttributeName(this.get_namespace(), normalizedAttributeName); + } + + expects_closer(node = null) { + const token = this.#normalizeExpectsCloserToken(node); + const tokenName = token.nodeName; + if (tokenName === null) { + return null; + } + + return tokenExpectsCloser( + tokenName, + token.namespaceName, + token.hasSelfClosingFlag, + ); + } + + #normalizeExpectsCloserToken(node) { + let tokenName; + let namespaceName; + let hasSelfClosingFlag; + + if (node !== null) { + const token = phpTokenParameterCoerce(node, "node"); + tokenName = token.node_name ?? this.get_token_name(); + namespaceName = token.namespace ?? this.get_namespace(); + hasSelfClosingFlag = token.has_self_closing_flag ?? this.has_self_closing_flag(); + } else { + tokenName = this.get_token_name(); + namespaceName = this.get_namespace(); + hasSelfClosingFlag = this.has_self_closing_flag(); + } + + if (tokenName === null || tokenName === undefined) { + return { + nodeName: null, + namespaceName: null, + hasSelfClosingFlag: false, + }; + } + + const normalizedNamespace = asciiLower(phpStringParameterCoerce(namespaceName ?? "html", "namespace")); + let normalizedTokenName = phpStringParameterCoerce(tokenName, "node_name"); + if (normalizedNamespace === "html" && normalizedTokenName !== "html" && normalizedTokenName[0] !== "#") { + normalizedTokenName = asciiUpper(normalizedTokenName); + } + + return { + nodeName: normalizedTokenName, + namespaceName: normalizedNamespace, + hasSelfClosingFlag: phpBooleanParameterCoerce(hasSelfClosingFlag, "has_self_closing_flag"), + }; + } + + get_breadcrumbs() { + return [...this.breadcrumbs]; + } + + #breadcrumbStack(tokenName = null, endIndex = this.open_elements.length) { + this.#pruneDetachedBreadcrumbs(); + const stack = []; + const boundedEndIndex = Math.max(0, Math.min(endIndex, this.open_elements.length)); + const fosterParentedRange = this.#fosterParentedBreadcrumbRange(boundedEndIndex); + for (let i = 0; i < boundedEndIndex; i += 1) { + if ( + fosterParentedRange !== null && + i >= fosterParentedRange.tableIndex && + i < fosterParentedRange.fosteredIndex + ) { + continue; + } + + for (const detached of this.detached_breadcrumbs) { + if (detached.index === i) { + stack.push(detached.tagName); + } + } + + stack.push(this.open_elements[i]); + + if ( + i === 0 && + this.detached_context_breadcrumbs.length > 0 && + this.open_elements[0] === "HTML" + ) { + stack.push(...this.detached_context_breadcrumbs); + } + } + + if (tokenName !== null) { + stack.push(tokenName); + } + return stack; + } + + #fosterParentedBreadcrumbRange(endIndex) { + for (let i = 0; i < endIndex; i += 1) { + const tableIndex = this.open_element_foster_parented_table_indices[i] ?? null; + if (tableIndex !== null && tableIndex < i) { + return { + fosteredIndex: i, + tableIndex, + }; + } + } + + return null; + } + + get_current_depth() { + return this.breadcrumbs.length; + } + + matches_breadcrumbs(breadcrumbs) { + const normalizedBreadcrumbs = phpArrayParameterCoerce(breadcrumbs, "breadcrumbs"); + + if (normalizedBreadcrumbs.length === 0) { + return true; + } + + const normalized = normalizedBreadcrumbs.map((crumb) => { + const normalizedCrumb = phpInternalStringCoerce(crumb, "breadcrumbs"); + return normalizedCrumb === "*" ? "*" : asciiUpper(normalizedCrumb); + }); + const lastCrumb = normalized[normalized.length - 1]; + if (lastCrumb !== "*" && this.get_tag() !== lastCrumb) { + return false; + } + + let crumbIndex = normalized.length - 1; + for (let nodeIndex = this.breadcrumbs.length - 1; nodeIndex >= 0; nodeIndex -= 1) { + const crumb = normalized[crumbIndex]; + if (crumb !== "*" && this.breadcrumbs[nodeIndex] !== crumb) { + return false; + } + + crumbIndex -= 1; + if (crumbIndex < 0) { + return true; + } + } + + return false; + } + + serialize() { + if (this.parser_state !== STATE_READY) { + return null; + } + + let html = ""; + let preserveLeadingNewlineFor = null; + while (this.next_token()) { + const tokenType = this.get_token_type(); + const tokenName = this.get_token_name(); + + if ( + preserveLeadingNewlineFor !== null && + tokenType === "#text" + ) { + if ( + this.get_namespace() === "html" && + this.breadcrumbs.at(-2) === preserveLeadingNewlineFor && + this.get_modifiable_text().startsWith("\n") + ) { + html += "\n"; + } + preserveLeadingNewlineFor = null; + } else if (preserveLeadingNewlineFor !== null) { + preserveLeadingNewlineFor = null; + } + + if (!this.#shouldOmitTrailingIncompleteSyntaxTokenFromSerialization()) { + html += this.serialize_token(); + } + + if ( + tokenType === "#tag" && + !this.is_tag_closer() && + this.get_namespace() === "html" && + (tokenName === "PRE" || tokenName === "LISTING") + ) { + preserveLeadingNewlineFor = tokenName; + } + } + + return this.get_last_error() === null ? html : null; + } + + #shouldOmitTrailingIncompleteSyntaxTokenFromSerialization() { + const span = this.#nativeCurrentSpan(); + if (span === null || span.start + span.length !== this.html.length) { + return false; + } + + const tokenType = this.get_token_type(); + const tokenHtml = this.html.slice(span.start); + if ( + (tokenType === "#comment" || tokenType === "#funky-comment") && + incompleteBogusCommentAtEof(tokenHtml) + ) { + return true; + } + + if ( + tokenType !== "#tag" || + this.get_namespace() !== "html" || + this.is_tag_closer() + ) { + return false; + } + + const tagName = this.get_tag(); + if (!SPECIAL_ATOMIC_ELEMENTS.has(tagName)) { + return false; + } + + const startTag = completeStartTagAt(this.html, span.start); + return startTag !== null && + startTag.tagName === tagName && + findSpecialAtomicCloserEnd(this.html, startTag.end, tagName) === null; + } + + serialize_token() { + const tokenType = this.get_token_type(); + + switch (tokenType) { + case "#doctype": + return serializeDoctype(this.get_doctype_info()); + case "#text": + return this.#serializeTextToken(); + case "#presumptuous-tag": + return ""; + case "#funky-comment": + case "#comment": + return `<!--${this.get_full_comment_text() ?? ""}-->`; + case "#cdata-section": + return `<![CDATA[${this.get_modifiable_text() ?? ""}]]>`; + case "#tag": + if (this.is_virtual() && this.current_virtual.skipSerialization) { + return ""; + } + return this.#serializeCurrentTag(); + default: + return ""; + } + } + + #updateTreeStateForCurrentToken(allowVirtualPreclosures = true) { + const tokenType = this.get_token_type(); + const tokenName = this.get_token_name(); + + if (tokenName === null) { + this.breadcrumbs = this.#breadcrumbStack(); + this.current_token_namespace = this.current_namespace; + return; + } + + if ( + allowVirtualPreclosures && + this.#queueDeferredTableOpenerBeforeCurrentToken(tokenType) + ) { + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + this.skip_current_token = true; + return; + } + + if (this.#applyFullParserInsertionMode(tokenType, tokenName)) { + return; + } + + if (!this.is_full_parser && tokenType === "#doctype") { + this.skip_current_token = true; + return; + } + + if (tokenType === "#doctype" && this.current_namespace !== "html") { + this.skip_current_token = true; + return; + } + + if (tokenType !== "#tag") { + if (this.#shouldIgnoreTextInColumnGroup(tokenType)) { + this.skip_current_token = true; + return; + } + + if (this.#representPendingForeignTableFosteredText(tokenType)) { + return; + } + + if ( + tokenType === "#text" && + this.text_node_classification === WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE && + this.#currentTextChunkPrecedesDeferredTableChildOpener() + ) { + this.#deferCurrentTextAsTableChild(); + return; + } + + if (tokenType === "#text" && this.#isInTableTextContext()) { + if (this.text_node_classification === WP_HTML_Tag_Processor.TEXT_IS_NULL_SEQUENCE) { + this.skip_current_token = true; + return; + } + + if ( + this.text_node_classification !== WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE || + this.#currentTextChunkPrecedesFosteredTableText() + ) { + if (this.#representFosteredTextBeforeDeferredTable()) { + return; + } + if (this.#queueReconstructActiveFormattingElementsBeforeDeferredTable()) { + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + return; + } + this.#bailUnsupported("Foster parenting is not supported."); + return; + } + } + + if ( + tokenType === "#text" && + this.#shouldDeferCurrentTextInsideDeferredTable() + ) { + return; + } + + if ( + allowVirtualPreclosures && + tokenType === "#text" && + this.text_node_classification !== WP_HTML_Tag_Processor.TEXT_IS_NULL_SEQUENCE && + ( + this.#queueNestedAnchorOuterCloserAfterDeferredTable() || + this.#queueSpecialStartAdoptionPreclosedFormattingElementsForText() || + this.#queueParagraphAdoptionPreclosedFormattingElementsForText() || + ( + !this.#isInTableTextContext() && + this.#queueReconstructActiveFormattingElements() + ) + ) + ) { + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + return; + } + + if ( + this.is_full_parser && + this.current_namespace !== "html" && + tokenType === "#text" && + this.text_node_classification === WP_HTML_Tag_Processor.TEXT_IS_GENERIC + ) { + this.frameset_ok = false; + } + + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(tokenName); + return; + } + + const tagName = this.#getCurrentTreeTagName(); + if (tagName === null) { + this.breadcrumbs = this.#breadcrumbStack(); + this.current_token_namespace = this.current_namespace; + return; + } + + if (this.is_tag_closer()) { + const closingNamespace = this.#namespaceForEndTag(tagName); + + if ( + closingNamespace === "html" && + this.#shouldIgnoreEndTagInTableContext(tagName) + ) { + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.skip_current_token = true; + return; + } + + if (this.#skipDeferredTableTemplateCloser(tagName, closingNamespace)) { + return; + } + + if ( + this.#skipDeferredTableStructureCloserBeforeFosteredText( + tagName, + closingNamespace, + this.#lastOpenElementIndex(tagName, closingNamespace), + ) + ) { + return; + } + + if ( + this.deferred_table_opener !== null && + closingNamespace === "html" && + !ADOPTION_AGENCY_END_TAGS.has(tagName) && + this.#isSkippedFosterLookaheadEndTag(tagName) + ) { + this.#ignoreCurrentToken(); + return; + } + + if (allowVirtualPreclosures && this.#queueVirtualPreclosuresForEndTag(tagName)) { + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + return; + } + + if ( + closingNamespace === "html" && + this.#hasOpenHtmlElement("SELECT") && + !SELECT_ALLOWED_END_TAGS.has(tagName) + ) { + this.#ignoreCurrentToken(); + return; + } + + let existingIndex = this.#lastOpenElementIndex(tagName, closingNamespace); + if (tagName === "LI" && closingNamespace === "html") { + existingIndex = this.#findOpenElementBeforeBoundary("LI", LIST_ITEM_SCOPE_BOUNDARIES); + } + if (tagName === "P" && closingNamespace === "html") { + existingIndex = this.#findOpenElementBeforeBoundary("P", BUTTON_SCOPE_BOUNDARIES); + if (existingIndex === -1) { + if (this.#shouldBailUnsupportedTableFosterParenting(tagName, true)) { + this.#bailUnsupported("Foster parenting is not supported."); + return; + } + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.virtual_tokens.push( + { + operation: "push", + tagName: "P", + namespaceName: "html", + fosterParentedTableIndex: this.#missingParagraphCloserFosterParentedTableIndex(), + }, + { + operation: "pop", + tagName: "P", + namespaceName: "html", + }, + ); + this.skip_current_token = true; + return; + } + } + + if (this.#skipDeferredTableStructureCloserBeforeFosteredText(tagName, closingNamespace, existingIndex)) { + return; + } + + if (!this.is_full_parser && existingIndex !== -1 && existingIndex < this.base_open_element_count) { + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.skip_current_token = true; + return; + } + + if ( + closingNamespace === "html" && + existingIndex === -1 && + this.#consumeIgnoredSelectFormattingElement(tagName) + ) { + this.#ignoreCurrentToken(); + return; + } + + if (this.#shouldIgnoreEndTagClosingOutsideTemplate(tagName, closingNamespace, existingIndex)) { + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.skip_current_token = true; + return; + } + + if ( + closingNamespace === "html" && + tagName !== "TEMPLATE" && + existingIndex !== -1 && + this.#hasForeignIntegrationPointAfter(existingIndex) + ) { + if ( + this.#hasElementInTableScope("TABLE") && + FORM_TABLE_DESCENDANT_ELEMENTS.has(tagName) + ) { + if (this.#ignoreCrossedForeignTableStructureCloserBeforeFosteredText(tagName)) { + return; + } + + this.#bailUnsupported("Foster parenting is not supported."); + return; + } + + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.skip_current_token = true; + return; + } + + if ( + allowVirtualPreclosures && + tagName === "FORM" && + closingNamespace === "html" && + existingIndex !== -1 && + this.#hasOnlyTableElementsAfter(existingIndex) + ) { + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.form_element_pointer = null; + if (this.open_elements.length === existingIndex + 2) { + this.skip_current_token = true; + return; + } + this.#queueVirtualPopsFrom(existingIndex + 1); + this.skip_current_token = true; + return; + } + + if (tagName === "FORM" && closingNamespace === "html") { + if (this.form_element_pointer === "detached") { + this.form_element_pointer = null; + this.#ignoreCurrentToken(); + return; + } + if (this.form_element_pointer === "open") { + this.form_element_pointer = null; + } + } + + if (this.#shouldDetachFormCloser(tagName, closingNamespace, existingIndex)) { + this.#detachFormElementFromOpenStack(existingIndex); + return; + } + + if ( + allowVirtualPreclosures && + this.#queueParagraphAdoptionReconstructionForEndTag(tagName, closingNamespace, existingIndex) + ) { + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + return; + } + + if ( + allowVirtualPreclosures && + this.#queueSpecialStartAdoptionReconstructionForEndTag(tagName, closingNamespace, existingIndex) + ) { + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + return; + } + + if (this.#shouldIgnoreAdoptionAgencyEndTagWithStaleEntry(tagName, closingNamespace)) { + this.#removeStaleActiveFormattingElementsForClose(tagName); + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.skip_current_token = true; + return; + } + + if (this.#shouldIgnoreAdoptionAgencyEndTagOutsideScope(tagName, closingNamespace, existingIndex)) { + this.#removeActiveFormattingElementsForClose(tagName); + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.skip_current_token = true; + return; + } + + if (this.#shouldIgnoreCappedDeepAnchorEndTag(tagName, closingNamespace, existingIndex)) { + this.#removeActiveFormattingElementsForClose(tagName); + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.skip_current_token = true; + return; + } + + if (this.#shouldBailUnsupportedAdoptionAgency(tagName, closingNamespace, existingIndex)) { + this.#bailUnsupported("Cannot extract common ancestor in adoption agency algorithm."); + return; + } + + if (this.#shouldIgnoreAdoptionAgencyEndTagFallback(tagName, closingNamespace, existingIndex)) { + this.#ignoreCurrentToken(); + return; + } + + if (HEADING_ELEMENTS.has(tagName) && closingNamespace === "html") { + const headingIndex = this.#findOpenElementBeforeBoundary( + (nodeName) => HEADING_ELEMENTS.has(nodeName), + DEFAULT_SCOPE_BOUNDARIES, + ); + if (headingIndex !== -1 && (existingIndex === -1 || headingIndex > existingIndex)) { + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.#queueVirtualPopsFrom(headingIndex); + this.skip_current_token = true; + return; + } + } + + if ( + allowVirtualPreclosures && + closingNamespace !== "html" && + existingIndex !== -1 && + existingIndex < this.open_elements.length - 1 + ) { + this.#queueVirtualPopsFrom(existingIndex + 1); + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + return; + } + + if ( + allowVirtualPreclosures && + existingIndex !== -1 && + existingIndex < this.open_elements.length - 1 && + tagName !== "HTML" && + tagName !== "BODY" && + tagName !== "TEMPLATE" && + MODELED_SCOPED_END_TAGS.has(tagName) && + this.#hasHtmlScopeBoundaryAfter(existingIndex, DEFAULT_SCOPE_BOUNDARIES) + ) { + if (this.#shouldBailUnsupportedTableFosterParenting(tagName, true)) { + this.#bailUnsupported("Foster parenting is not supported."); + return; + } + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.skip_current_token = true; + return; + } + + if ( + allowVirtualPreclosures && + existingIndex !== -1 && + existingIndex < this.open_elements.length - 1 && + tagName !== "HTML" && + tagName !== "BODY" && + MODELED_SCOPED_END_TAGS.has(tagName) && + hasSpecialBoundaryAfter( + this.open_elements, + this.open_element_namespaces, + existingIndex, + ) + ) { + if (this.#shouldBailUnsupportedTableFosterParenting(tagName, true)) { + this.#bailUnsupported("Foster parenting is not supported."); + return; + } + this.#queueVirtualPopsFrom(existingIndex + 1); + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + return; + } + + if ( + existingIndex === -1 || + ( + !MODELED_SCOPED_END_TAGS.has(tagName) && + hasSpecialBoundaryAfter( + this.open_elements, + this.open_element_namespaces, + existingIndex, + ) + ) + ) { + if (closingNamespace === "html" && FORMATTING_ELEMENTS.has(tagName)) { + this.#removeActiveFormattingElementsForClose(tagName); + } + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.skip_current_token = true; + return; + } + + if (this.#shouldBailUnsupportedTableFosterParenting(tagName, true)) { + this.#bailUnsupported("Foster parenting is not supported."); + return; + } + + if (allowVirtualPreclosures && existingIndex < this.open_elements.length - 1) { + this.#queueVirtualPopsFrom(existingIndex + 1); + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + return; + } + + this.current_token_namespace = this.open_element_namespaces[existingIndex]; + if (closingNamespace === "html") { + this.#applyTemplateInsertionModeForEndTag(tagName); + } + if (tagName === "TEMPLATE" && closingNamespace === "html") { + this.#clearActiveFormattingElementsForTemplateClose(existingIndex); + this.#popTemplateInsertionMode(); + } + if (closingNamespace === "html" && FORMATTING_ELEMENTS.has(tagName)) { + this.#removeActiveFormattingElementsForClose(tagName); + } + this.open_elements = this.open_elements.slice(0, existingIndex); + this.open_element_namespaces = this.open_element_namespaces.slice(0, existingIndex); + this.open_element_integration_node_types = this.open_element_integration_node_types.slice(0, existingIndex); + this.open_element_foster_parented_table_indices = this.open_element_foster_parented_table_indices.slice(0, existingIndex); + if (closingNamespace === "html" && TABLE_CELL_ELEMENTS.has(tagName)) { + this.#clearActiveFormattingElementsUpToLastMarker(); + } + if (closingNamespace === "html" && ACTIVE_FORMATTING_MARKER_ELEMENTS.has(tagName)) { + this.#clearActiveFormattingElementsUpToLastMarker(); + } + this.breadcrumbs = this.#breadcrumbStack(); + this.#setCurrentNamespace(this.#namespaceForStackTop()); + return; + } + + if ( + allowVirtualPreclosures && + this.#queueNestedAnchorOuterCloserAfterDeferredTable() + ) { + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + return; + } + + const fosterParentedTableIndex = this.#fosterParentedStartTableIndex(tagName); + + if ( + allowVirtualPreclosures && + this.#shouldReconstructActiveFormattingBeforeFosteredStart(tagName) && + !this.#alreadyReconstructedTableNobrForCurrentToken(tagName) && + this.#queueReconstructActiveFormattingElementsBeforeDeferredTable() + ) { + this.#rememberTableNobrReconstructionForCurrentToken(tagName); + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + return; + } + + if (this.#representFosteredVoidStartBeforeDeferredTable(tagName)) { + return; + } + + if (this.#representFosteredAtomicStartBeforeDeferredTable(tagName)) { + return; + } + + if (this.#shouldIgnoreTableContextTableStartTag(tagName)) { + this.#ignoreCurrentToken(); + return; + } + + if (this.#shouldIgnoreTableRowContextBoundaryStartTag(tagName)) { + this.#ignoreCurrentToken(); + return; + } + + if (this.#shouldIgnoreTableSectionContextBoundaryStartTag(tagName)) { + this.#ignoreCurrentToken(); + return; + } + + if (this.#shouldIgnoreCaptionContextBoundaryStartTag(tagName)) { + this.#ignoreCurrentToken(); + return; + } + + if (this.#shouldIgnoreColgroupFragmentFosteredStartTag(tagName)) { + this.#ignoreCurrentToken(); + return; + } + + if ( + allowVirtualPreclosures && + ( + this.#queueNestedAnchorBlockAdoptionPreclosure(tagName) || + this.#queueDeepFormattingElementSpecialStartPreclosure(tagName) || + this.#queueFormattingElementSpecialStartPreclosure(tagName) || + this.#queueFormattingElementAncestorSpecialStartPreclosure(tagName) || + this.#queueParagraphAdoptionFormattingPreclosure(tagName) || + this.#queueVirtualPreclosuresForStartTag(tagName) || + this.#queueVirtualOpenersForStartTag(tagName) + ) + ) { + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + return; + } + + if (this.#shouldIgnoreInBodyFragmentStartTag(tagName)) { + this.#ignoreCurrentToken(); + return; + } + + if ( + fosterParentedTableIndex === -1 && + this.#shouldBailUnsupportedTableFosterParenting(tagName, false) + ) { + this.#bailUnsupported("Foster parenting is not supported."); + return; + } + + if (this.#applyTemplateInsertionModeForStartTag(tagName)) { + return; + } + + if ( + this.current_namespace === "html" && + (!this.is_full_parser || this.full_parser_insertion_mode === "in_body") && + !this.preserve_in_body_ignored_start_tags && + this.template_insertion_modes.length === 0 && + !this.#isInTableInsertionContext() && + this.#shouldIgnoreInBodyStartTag(tagName) + ) { + this.#ignoreCurrentToken(); + return; + } + + if ( + allowVirtualPreclosures && + this.current_namespace === "html" && + tagName === "SELECT" + ) { + const selectIndex = this.#lastOpenElementIndex("SELECT", "html"); + if (selectIndex !== -1) { + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.#queueVirtualPopsFrom(selectIndex); + this.skip_current_token = true; + return; + } + } + + if ( + this.current_namespace === "html" && + SELECT_BREAKOUT_START_TAGS.has(tagName) + ) { + const selectIndex = this.#lastOpenElementIndex("SELECT", "html"); + if (selectIndex !== -1 && selectIndex < this.base_open_element_count) { + if (tagName === "TEXTAREA") { + this.#seekPastCurrentStartTag(); + } + this.#ignoreCurrentToken(); + return; + } + } + + if ( + this.current_namespace === "html" && + this.#hasOpenHtmlElement("SELECT") && + !SELECT_ALLOWED_START_TAGS.has(tagName) + ) { + this.#rememberIgnoredSelectFormattingElement(tagName); + this.#ignoreCurrentToken(); + return; + } + + if ( + this.current_namespace === "html" && + tagName === "FORM" && + !this.#hasOpenHtmlElement("TEMPLATE") && + ( + this.form_element_pointer !== null || + ( + this.#hasOpenHtmlElement("FORM") && + !this.#isInTableInsertionContext() + ) + ) + ) { + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.skip_current_token = true; + return; + } + + if ( + this.current_namespace === "html" && + tagName === "MENUITEM" && + this.#hasOpenHtmlElement("SELECT") + ) { + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.skip_current_token = true; + return; + } + + if ( + this.is_full_parser && + this.encoding_confidence === "tentative" && + this.current_namespace === "html" && + tagName === "META" + ) { + const unsupportedEncodingMessage = this.#unsupportedEncodingMetaMessage(); + if (unsupportedEncodingMessage !== null) { + this.#bailUnsupported(unsupportedEncodingMessage); + return; + } + } + + const entersPlaintext = this.current_namespace === "html" && tagName === "PLAINTEXT"; + + this.#applySimpleHtmlSemanticClosures(tagName); + if ( + allowVirtualPreclosures && + ( + FORMATTING_ELEMENTS.has(tagName) || + ACTIVE_FORMATTING_RECONSTRUCTING_START_TAGS.has(tagName) || + this.#shouldReconstructActiveAnchorForStartTag(tagName) || + this.#shouldReconstructActiveFontForStartTag(tagName) + ) && + !this.#alreadyReconstructedTableNobrForCurrentToken(tagName) && + this.#queueReconstructActiveFormattingElements() + ) { + this.#rememberTableNobrReconstructionForCurrentToken(tagName); + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + return; + } + this.current_token_namespace = this.#namespaceForCurrentStartTag(super.get_tag()); + const currentTokenIntegrationNodeType = this.#integrationNodeTypeForCurrentStartTag( + tagName, + this.current_token_namespace, + ); + this.open_elements.push(tagName); + this.open_element_namespaces.push(this.current_token_namespace); + this.open_element_integration_node_types.push(currentTokenIntegrationNodeType); + this.open_element_foster_parented_table_indices.push( + fosterParentedTableIndex === -1 + ? this.#currentFosterParentedTableIndex() + : fosterParentedTableIndex, + ); + if (this.current_token_namespace === "html" && tagName === "TEMPLATE") { + this.template_insertion_modes.push("in_template"); + } + if (this.current_token_namespace === "html" && TABLE_CELL_ELEMENTS.has(tagName)) { + this.#insertActiveFormattingMarker(); + } + if (this.current_token_namespace === "html" && ACTIVE_FORMATTING_MARKER_ELEMENTS.has(tagName)) { + this.#insertActiveFormattingMarker(); + } + if (this.current_token_namespace === "html" && FORMATTING_ELEMENTS.has(tagName)) { + this.#insertActiveFormattingElement(this.#createActiveFormattingElement(tagName)); + } + const shouldPopTableFormImmediately = this.#shouldPopTableFormImmediately( + tagName, + this.current_token_namespace, + ); + if ( + this.current_token_namespace === "html" && + tagName === "FORM" && + !this.#hasOpenHtmlElement("TEMPLATE") + ) { + this.form_element_pointer = shouldPopTableFormImmediately ? "detached" : "open"; + } + this.breadcrumbs = this.#breadcrumbStack(); + + if (this.#shouldDeferCurrentTableOpener(tagName, this.current_token_namespace)) { + this.deferred_table_opener = { + tokenType: "#tag", + tokenName: tagName, + tagName, + namespaceName: this.current_token_namespace, + attributes: this.#currentTokenAttributes(), + breadcrumbs: [...this.breadcrumbs], + hasSelfClosingFlag: this.has_self_closing_flag(), + }; + this.skip_current_token = true; + } + + if (this.#shouldDeferCurrentTableChildOpener(tagName, this.current_token_namespace)) { + this.deferred_table_child_openers.push({ + tokenType: "#tag", + tokenName: tagName, + tagName, + namespaceName: this.current_token_namespace, + attributes: this.#currentTokenAttributes(), + breadcrumbs: [...this.breadcrumbs], + hasSelfClosingFlag: this.has_self_closing_flag(), + modifiableText: SPECIAL_ATOMIC_ELEMENTS.has(tagName) ? this.#currentSpecialAtomicText(tagName) : "", + }); + this.skip_current_token = true; + } + + if (this.#shouldDeferCurrentStartInsideDeferredTable(tagName, this.current_token_namespace)) { + this.deferred_table_child_openers.push({ + tokenType: "#tag", + tokenName: tagName, + tagName, + namespaceName: this.current_token_namespace, + attributes: this.#currentTokenAttributes(), + breadcrumbs: [...this.breadcrumbs], + hasSelfClosingFlag: this.has_self_closing_flag(), + }); + this.skip_current_token = true; + } + + if (!tokenExpectsCloser(tagName, this.current_token_namespace, this.has_self_closing_flag())) { + this.open_elements.pop(); + this.open_element_namespaces.pop(); + this.open_element_integration_node_types.pop(); + this.open_element_foster_parented_table_indices.pop(); + this.#setCurrentNamespace(this.#namespaceForStackTop()); + } else { + this.#setCurrentNamespace(this.#childNamespaceForStackEntry( + tagName, + this.current_token_namespace, + currentTokenIntegrationNodeType, + )); + if (shouldPopTableFormImmediately) { + this.virtual_tokens.push({ + operation: "pop", + tagName, + namespaceName: this.current_token_namespace, + skipSerialization: false, + }); + } + } + if (entersPlaintext) { + this.plaintext_pending = true; + this.#queueReconstructActiveFormattingElements(); + } + + this.#closeTemporaryReopenedHeadAfterCurrentToken(); + this.#bailIfExceededMaxBookmarks(); + } + + #consumeDelayedSyntheticToken() { + const token = this.delayed_synthetic_tokens.shift(); + this.current_virtual = null; + this.current_synthetic_token = token; + this.skip_current_token = false; + this.parser_state = token.tokenType === "#comment" + ? STATE_COMMENT + : token.tokenType === "#tag" ? STATE_MATCHED_TAG : STATE_TEXT_NODE; + if (token.tokenType === "#text") { + this.text_node_classification = token.textClassification ?? ( + splitHtmlWhitespace(token.modifiableText ?? "").length === 0 + ? WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE + : WP_HTML_Tag_Processor.TEXT_IS_GENERIC + ); + } + this.current_token_namespace = token.namespaceName ?? this.current_namespace; + this.breadcrumbs = [...token.breadcrumbs]; + return true; + } + + #nextDelayedSyntheticTokenIsTableOpener() { + const token = this.delayed_synthetic_tokens[0] ?? null; + return ( + token !== null && + token.tokenType === "#tag" && + token.tagName === "TABLE" && + token.namespaceName === "html" + ); + } + + #queueDeferredTableOpener() { + if (this.deferred_table_opener === null && this.deferred_table_child_openers.length === 0) { + return false; + } + + if (this.deferred_table_opener !== null) { + this.delayed_synthetic_tokens.push(this.deferred_table_opener); + } + this.delayed_synthetic_tokens.push(...this.deferred_table_child_openers); + this.deferred_table_opener = null; + this.deferred_table_child_openers = []; + return true; + } + + #consumeDeferredTableOpener() { + return this.#queueDeferredTableOpener() && this.#consumeDelayedSyntheticToken(); + } + + #consumeVirtualToken() { + const token = this.virtual_tokens.shift(); + this.current_virtual = token; + this.skip_current_token = false; + this.parser_state = STATE_MATCHED_TAG; + this.current_token_namespace = token.namespaceName; + + if (token.operation === "push") { + this.open_elements.push(token.tagName); + this.open_element_namespaces.push(token.namespaceName); + this.open_element_integration_node_types.push(token.integrationNodeType ?? null); + this.open_element_foster_parented_table_indices.push( + token.fosterParentedTableIndex ?? this.#currentFosterParentedTableIndex(), + ); + this.breadcrumbs = this.#breadcrumbStack(); + this.#setCurrentNamespace(this.#childNamespaceForStackEntry( + token.tagName, + token.namespaceName, + token.integrationNodeType ?? null, + )); + this.#bailIfExceededMaxBookmarks(); + } else if (token.operation === "pop") { + const existingIndex = this.#lastOpenElementIndex(token.tagName, token.namespaceName); + if (existingIndex !== -1) { + if (token.tagName === "TEMPLATE" && token.namespaceName === "html") { + this.#clearActiveFormattingElementsForTemplateClose(existingIndex); + this.#popTemplateInsertionMode(); + } else if (token.namespaceName === "html") { + this.#applyTemplateInsertionModeForEndTag(token.tagName); + } + if (token.skipSerialization && existingIndex < this.base_open_element_count) { + const detachedContextBreadcrumbs = ( + existingIndex === 1 && + this.base_open_element_count === 2 && + this.context_breadcrumbs.length > 0 + ) + ? this.context_breadcrumbs + : this.open_elements.slice(existingIndex, this.base_open_element_count); + this.detached_context_breadcrumbs.unshift( + ...detachedContextBreadcrumbs, + ); + this.base_open_element_count = existingIndex; + } + this.open_elements = this.open_elements.slice(0, existingIndex); + this.open_element_namespaces = this.open_element_namespaces.slice(0, existingIndex); + this.open_element_integration_node_types = this.open_element_integration_node_types.slice(0, existingIndex); + this.open_element_foster_parented_table_indices = this.open_element_foster_parented_table_indices.slice(0, existingIndex); + if ( + token.namespaceName === "html" && + (TABLE_CELL_ELEMENTS.has(token.tagName) || ACTIVE_FORMATTING_MARKER_ELEMENTS.has(token.tagName)) + ) { + this.#clearActiveFormattingElementsUpToLastMarker(); + } + this.#setCurrentNamespace( + token.skipSerialization && this.pending_real_token + ? "html" + : this.#namespaceForStackTop(), + ); + } + this.breadcrumbs = this.#breadcrumbStack(); + } + + return this.last_error === null; + } + + #snapshotProcessorState() { + return { + openElements: [...this.open_elements], + openElementNamespaces: [...this.open_element_namespaces], + openElementIntegrationNodeTypes: [...this.open_element_integration_node_types], + openElementFosterParentedTableIndices: [...this.open_element_foster_parented_table_indices], + detachedContextBreadcrumbs: [...this.detached_context_breadcrumbs], + detachedBreadcrumbs: this.detached_breadcrumbs.map((breadcrumb) => ({ ...breadcrumb })), + breadcrumbs: [...this.breadcrumbs], + currentNamespace: this.current_namespace, + currentTokenNamespace: this.current_token_namespace, + delayedSyntheticTokens: this.delayed_synthetic_tokens.map((token) => ({ + ...token, + breadcrumbs: [...token.breadcrumbs], + attributes: (token.attributes ?? []).map((attribute) => ({ ...attribute })), + })), + deferredTableOpener: this.deferred_table_opener === null + ? null + : { + ...this.deferred_table_opener, + breadcrumbs: [...this.deferred_table_opener.breadcrumbs], + attributes: this.deferred_table_opener.attributes.map((attribute) => ({ ...attribute })), + }, + deferredTableChildOpeners: this.deferred_table_child_openers.map((token) => ({ + ...token, + breadcrumbs: [...token.breadcrumbs], + attributes: token.attributes.map((attribute) => ({ ...attribute })), + })), + pendingForeignTableFosteredTextTableIndex: this.pending_foreign_table_fostered_text_table_index, + pendingNestedAnchorOuterCloserAfterDeferredTableIndex: this.pending_nested_anchor_outer_closer_after_deferred_table_index, + pendingNestedAnchorActiveRemovalAfterDeferredTable: this.pending_nested_anchor_active_removal_after_deferred_table, + pendingNestedAnchorDivActiveRemovalAfterDeferredTable: this.pending_nested_anchor_div_active_removal_after_deferred_table, + activeFormattingElements: this.active_formatting_elements.map((entry) => this.#cloneActiveFormattingElement(entry)), + paragraphAdoptionPreclosedFormattingElements: this.paragraph_adoption_preclosed_formatting_elements.map((entry) => ({ ...entry })), + specialStartAdoptionPreclosedFormattingElements: this.special_start_adoption_preclosed_formatting_elements.map((entry) => ({ ...entry })), + deepAnchorReconstructedDivStartOffsets: [...this.deep_anchor_reconstructed_div_start_offsets], + tableNobrReconstructedStartOffsets: [...this.table_nobr_reconstructed_start_offsets], + ignoredSelectFormattingElements: [...this.ignored_select_formatting_elements.entries()], + templateInsertionModes: [...this.template_insertion_modes], + encodingConfidence: this.encoding_confidence, + baseOpenElementCount: this.base_open_element_count, + fullParserInsertionMode: this.full_parser_insertion_mode, + fullParserScaffolded: this.full_parser_scaffolded, + fullParserSeenDoctype: this.full_parser_seen_doctype, + framesetOk: this.frameset_ok, + preFramesetIgnoredElementDepth: this.pre_frameset_ignored_element_depth, + formElementPointer: this.form_element_pointer, + }; + } + + #restoreProcessorState(state) { + this.current_virtual = null; + this.virtual_tokens = []; + this.pending_real_token = false; + this.pending_real_parser_state = null; + this.skip_current_token = false; + this.delayed_synthetic_tokens = (state.delayedSyntheticTokens ?? []).map((token) => ({ + ...token, + breadcrumbs: [...token.breadcrumbs], + attributes: (token.attributes ?? []).map((attribute) => ({ ...attribute })), + })); + this.deferred_table_opener = state.deferredTableOpener === null || state.deferredTableOpener === undefined + ? null + : { + ...state.deferredTableOpener, + breadcrumbs: [...state.deferredTableOpener.breadcrumbs], + attributes: (state.deferredTableOpener.attributes ?? []).map((attribute) => ({ ...attribute })), + }; + this.deferred_table_child_openers = (state.deferredTableChildOpeners ?? []).map((token) => ({ + ...token, + breadcrumbs: [...token.breadcrumbs], + attributes: (token.attributes ?? []).map((attribute) => ({ ...attribute })), + })); + this.pending_foreign_table_fostered_text_table_index = state.pendingForeignTableFosteredTextTableIndex ?? null; + this.pending_nested_anchor_outer_closer_after_deferred_table_index = state.pendingNestedAnchorOuterCloserAfterDeferredTableIndex ?? null; + this.pending_nested_anchor_active_removal_after_deferred_table = state.pendingNestedAnchorActiveRemovalAfterDeferredTable ?? false; + this.pending_nested_anchor_div_active_removal_after_deferred_table = state.pendingNestedAnchorDivActiveRemovalAfterDeferredTable ?? false; + this.full_parser_insertion_mode = state.fullParserInsertionMode; + this.full_parser_scaffolded = state.fullParserScaffolded; + this.full_parser_seen_doctype = state.fullParserSeenDoctype; + this.frameset_ok = state.framesetOk; + this.pre_frameset_ignored_element_depth = state.preFramesetIgnoredElementDepth ?? 0; + this.form_element_pointer = state.formElementPointer; + this.temporary_reopened_head = false; + this.open_elements = [...state.openElements]; + this.open_element_namespaces = [...state.openElementNamespaces]; + this.open_element_integration_node_types = [...state.openElementIntegrationNodeTypes]; + this.open_element_foster_parented_table_indices = [...(state.openElementFosterParentedTableIndices ?? this.open_elements.map(() => null))]; + this.detached_context_breadcrumbs = [...state.detachedContextBreadcrumbs]; + this.detached_breadcrumbs = (state.detachedBreadcrumbs ?? []).map((breadcrumb) => ({ ...breadcrumb })); + this.active_formatting_elements = state.activeFormattingElements.map((entry) => this.#cloneActiveFormattingElement(entry)); + this.paragraph_adoption_preclosed_formatting_elements = (state.paragraphAdoptionPreclosedFormattingElements ?? []).map((entry) => ({ ...entry })); + this.special_start_adoption_preclosed_formatting_elements = (state.specialStartAdoptionPreclosedFormattingElements ?? []).map((entry) => ({ ...entry })); + this.deep_anchor_reconstructed_div_start_offsets = new Set(state.deepAnchorReconstructedDivStartOffsets ?? []); + this.table_nobr_reconstructed_start_offsets = new Set(state.tableNobrReconstructedStartOffsets ?? []); + this.ignored_select_formatting_elements = new Map(state.ignoredSelectFormattingElements); + this.template_insertion_modes = [...state.templateInsertionModes]; + this.encoding_confidence = state.encodingConfidence; + this.base_open_element_count = state.baseOpenElementCount; + this.breadcrumbs = [...state.breadcrumbs]; + this.current_namespace = state.currentNamespace; + this.current_token_namespace = state.currentTokenNamespace; + super.change_parsing_namespace(this.current_namespace); + } + + #setCompatModeFromCurrentDoctype() { + const doctype = this.get_doctype_info(); + this.compat_mode = doctype?.indicated_compatibility_mode === "quirks" + ? WP_HTML_Tag_Processor.QUIRKS_MODE + : WP_HTML_Tag_Processor.NO_QUIRKS_MODE; + } + + #subdivideCurrentTextToken() { + if (!this.is_virtual() && this.parser_state === STATE_TEXT_NODE) { + super.subdivide_text_appropriately(); + } + } + + #consumeRawTextFragmentToken() { + if ( + this.raw_text_fragment_context === null || + this.raw_text_fragment_consumed || + this.html === "" + ) { + return false; + } + + let text = RCDATA_FRAGMENT_CONTEXT_ELEMENTS.has(this.raw_text_fragment_context) + ? WP_HTML_Decoder.decode_text_node(this.html) + : this.html; + text = replaceNulls(text); + this.raw_text_fragment_consumed = true; + this.current_synthetic_token = { + tokenType: "#text", + tokenName: "#text", + modifiableText: text, + }; + this.parser_state = STATE_TEXT_NODE; + this.text_node_classification = splitHtmlWhitespace(text).length === 0 + ? WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE + : WP_HTML_Tag_Processor.TEXT_IS_GENERIC; + this.current_token_namespace = "html"; + this.breadcrumbs = this.#breadcrumbStack("#text"); + return true; + } + + #consumePlaintextTextToken() { + if (!this.plaintext_pending || this.plaintext_text_consumed) { + return false; + } + + const span = this.#nativeCurrentSpan(); + const html = super.get_updated_html(); + this.plaintext_content_start = span === null + ? html.length + : Math.min(html.length, span.start + span.length); + const text = replaceNulls(html.slice(this.plaintext_content_start)); + this.plaintext_pending = false; + this.plaintext_text_consumed = true; + if (text === "") { + return false; + } + + this.current_synthetic_token = { + tokenType: "#text", + tokenName: "#text", + modifiableText: text, + rawText: true, + }; + this.current_virtual = null; + this.parser_state = STATE_TEXT_NODE; + this.text_node_classification = splitHtmlWhitespace(text).length === 0 + ? WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE + : WP_HTML_Tag_Processor.TEXT_IS_GENERIC; + this.current_token_namespace = "html"; + this.breadcrumbs = this.#breadcrumbStack("#text"); + return true; + } + + #hasFutureNoframesStartTag() { + const span = this.#nativeCurrentSpan(); + const start = span === null ? 0 : span.start + span.length; + return /<\s*noframes(?:[\t\n\f\r />]|$)/i.test(super.get_updated_html().slice(start)); + } + + #delayCurrentCommentToken(breadcrumbs = this.#breadcrumbStack("#comment")) { + this.delayed_synthetic_tokens.push({ + tokenType: "#comment", + tokenName: "#comment", + commentText: this.get_full_comment_text() ?? "", + namespaceName: this.current_namespace, + breadcrumbs, + }); + this.skip_current_token = true; + return true; + } + + #serializeTextToken() { + const text = this.get_modifiable_text() ?? ""; + if (this.current_synthetic_token?.rawText === true) { + return text; + } + if ( + this.raw_text_fragment_context !== null && + !RCDATA_FRAGMENT_CONTEXT_ELEMENTS.has(this.raw_text_fragment_context) + ) { + return text; + } + return htmlEscape(text); + } + + #getVirtualAttribute(name) { + const attributes = this.current_virtual?.attributes ?? []; + const wantedName = phpInternalStringCoerce(name, "name"); + const normalizedWantedName = this.current_token_namespace === "html" ? asciiLower(wantedName) : wantedName; + + for (const attribute of attributes) { + const attributeName = this.current_token_namespace === "html" ? asciiLower(attribute.name) : attribute.name; + if (attributeName === normalizedWantedName) { + return attribute.value; + } + } + + return null; + } + + #getVirtualAttributeNamesWithPrefix(prefix) { + const attributes = this.current_virtual?.attributes ?? []; + const wantedPrefix = phpInternalStringCoerce(prefix, "prefix"); + const normalizedWantedPrefix = this.current_token_namespace === "html" ? asciiLower(wantedPrefix) : wantedPrefix; + const names = []; + + for (const attribute of attributes) { + const attributeName = this.current_token_namespace === "html" ? asciiLower(attribute.name) : attribute.name; + if (attributeName.startsWith(normalizedWantedPrefix)) { + names.push(attribute.name); + } + } + + return names.length === 0 ? null : names; + } + + #getSyntheticAttribute(name) { + const attributes = this.current_synthetic_token?.attributes ?? []; + const wantedName = phpInternalStringCoerce(name, "name"); + const normalizedWantedName = this.current_token_namespace === "html" ? asciiLower(wantedName) : wantedName; + + for (const attribute of attributes) { + const attributeName = this.current_token_namespace === "html" ? asciiLower(attribute.name) : attribute.name; + if (attributeName === normalizedWantedName) { + return attribute.value; + } + } + + return null; + } + + #getSyntheticAttributeNamesWithPrefix(prefix) { + const attributes = this.current_synthetic_token?.attributes ?? []; + const wantedPrefix = phpInternalStringCoerce(prefix, "prefix"); + const normalizedWantedPrefix = this.current_token_namespace === "html" ? asciiLower(wantedPrefix) : wantedPrefix; + const names = []; + + for (const attribute of attributes) { + const attributeName = this.current_token_namespace === "html" ? asciiLower(attribute.name) : attribute.name; + if (attributeName.startsWith(normalizedWantedPrefix)) { + names.push(attribute.name); + } + } + + return names.length === 0 ? null : names; + } + + #virtualHasClass(className) { + const wantedClass = phpInternalStringCoerce(className, "wanted_class"); + const comparableClassName = this.#comparableClassName(wantedClass.replaceAll("\0", "\uFFFD")); + return this.#virtualClassEntries().some((entry) => entry.comparable === comparableClassName); + } + + #virtualClassList() { + return this.#virtualClassEntries().map((entry) => this.#virtualUsesQuirksMode() ? entry.comparable : entry.name); + } + + #virtualClassEntries() { + const classAttribute = this.#getVirtualAttribute("class"); + if (typeof classAttribute !== "string") { + return []; + } + + const entries = []; + for (const className of splitHtmlWhitespace(classAttribute.replaceAll("\0", "\uFFFD"))) { + const comparable = this.#comparableClassName(className); + if (entries.some((entry) => entry.comparable === comparable)) { + continue; + } + entries.push({ name: className, comparable }); + } + return entries; + } + + #comparableClassName(className) { + return this.#virtualUsesQuirksMode() ? asciiLower(className) : className; + } + + #virtualUsesQuirksMode() { + return this.compat_mode === WP_HTML_Tag_Processor.QUIRKS_MODE; + } + + #createActiveFormattingElement(tagName) { + return { + tagName, + namespaceName: this.current_token_namespace, + attributes: this.#currentTokenAttributes(), + templateDepth: this.#countOpenHtmlElements("TEMPLATE"), + openElementIndex: this.open_elements.length - 1, + }; + } + + #insertActiveFormattingElement(entry) { + let equivalentEntries = 0; + for (let i = this.active_formatting_elements.length - 1; i >= 0; i -= 1) { + if (this.#isActiveFormattingMarker(this.active_formatting_elements[i])) { + break; + } + + if (!this.#activeFormattingElementsAreEquivalent(entry, this.active_formatting_elements[i])) { + continue; + } + + equivalentEntries += 1; + if (equivalentEntries === 3) { + this.active_formatting_elements.splice(i, 1); + break; + } + } + + this.active_formatting_elements.push(entry); + } + + #insertActiveFormattingMarker() { + this.active_formatting_elements.push({ + marker: true, + templateDepth: this.#countOpenHtmlElements("TEMPLATE"), + }); + } + + #clearActiveFormattingElementsUpToLastMarker() { + while (this.active_formatting_elements.length > 0) { + const entry = this.active_formatting_elements.pop(); + if (this.#isActiveFormattingMarker(entry)) { + break; + } + } + } + + #isActiveFormattingMarker(entry) { + return entry?.marker === true; + } + + #rememberIgnoredSelectFormattingElement(tagName) { + if (!ADOPTION_AGENCY_END_TAGS.has(tagName)) { + return; + } + + this.ignored_select_formatting_elements.set( + tagName, + (this.ignored_select_formatting_elements.get(tagName) ?? 0) + 1, + ); + } + + #consumeIgnoredSelectFormattingElement(tagName) { + const count = this.ignored_select_formatting_elements.get(tagName) ?? 0; + if (count < 1) { + return false; + } + + if (count === 1) { + this.ignored_select_formatting_elements.delete(tagName); + } else { + this.ignored_select_formatting_elements.set(tagName, count - 1); + } + return true; + } + + #activeFormattingElementsAreEquivalent(left, right) { + if ( + this.#isActiveFormattingMarker(left) || + this.#isActiveFormattingMarker(right) || + left.tagName !== right.tagName || + left.namespaceName !== right.namespaceName || + (left.templateDepth ?? 0) !== (right.templateDepth ?? 0) || + left.attributes.length !== right.attributes.length + ) { + return false; + } + + const rightAttributes = new Map(); + for (const attribute of right.attributes) { + rightAttributes.set( + this.#activeFormattingAttributeName(right, attribute), + attribute.value, + ); + } + + for (const attribute of left.attributes) { + const attributeName = this.#activeFormattingAttributeName(left, attribute); + if (!rightAttributes.has(attributeName) || rightAttributes.get(attributeName) !== attribute.value) { + return false; + } + } + + return true; + } + + #activeFormattingAttributeName(entry, attribute) { + return entry.namespaceName === "html" ? asciiLower(attribute.name) : attribute.name; + } + + #cloneActiveFormattingElement(entry) { + if (this.#isActiveFormattingMarker(entry)) { + return { + marker: true, + templateDepth: entry.templateDepth ?? 0, + }; + } + + return { + tagName: entry.tagName, + namespaceName: entry.namespaceName, + templateDepth: entry.templateDepth ?? 0, + openElementIndex: entry.openElementIndex ?? null, + attributes: entry.attributes.map((attribute) => ({ + name: attribute.name, + value: attribute.value, + })), + }; + } + + #currentTokenAttributes() { + const attributeNames = super.get_attribute_names_with_prefix("") ?? []; + const attributes = []; + const seenAttributeNames = new Set(); + + for (const attributeName of attributeNames) { + const comparableName = this.current_token_namespace === "html" ? asciiLower(attributeName) : attributeName; + if (seenAttributeNames.has(comparableName)) { + continue; + } + seenAttributeNames.add(comparableName); + attributes.push({ + name: attributeName, + value: super.get_attribute(attributeName), + }); + } + + return attributes; + } + + #queueReconstructActiveFormattingElements() { + let firstMissingIndex = this.active_formatting_elements.length; + while (firstMissingIndex > 0) { + const entry = this.active_formatting_elements[firstMissingIndex - 1]; + if (this.#isActiveFormattingMarker(entry)) { + break; + } + if (this.#activeFormattingElementIsOpen(entry)) { + break; + } + firstMissingIndex -= 1; + } + + if (firstMissingIndex === this.active_formatting_elements.length) { + return false; + } + + for (let i = firstMissingIndex; i < this.active_formatting_elements.length; i += 1) { + const entry = this.active_formatting_elements[i]; + if (this.#isActiveFormattingMarker(entry)) { + continue; + } + this.#queueActiveFormattingElementEntry(entry); + } + + return true; + } + + #queueReconstructActiveFormattingElementsBeforeDeferredTable() { + if (this.deferred_table_opener === null) { + return false; + } + + const tableIndex = this.#lastOpenElementIndex("TABLE", "html"); + if (tableIndex === -1) { + return false; + } + + let firstMissingIndex = this.active_formatting_elements.length; + while (firstMissingIndex > 0) { + const entry = this.active_formatting_elements[firstMissingIndex - 1]; + if (this.#isActiveFormattingMarker(entry)) { + break; + } + if (this.#activeFormattingElementIsOpen(entry)) { + break; + } + firstMissingIndex -= 1; + } + + if (firstMissingIndex === this.active_formatting_elements.length) { + return false; + } + + for (let i = firstMissingIndex; i < this.active_formatting_elements.length; i += 1) { + const entry = this.active_formatting_elements[i]; + if (this.#isActiveFormattingMarker(entry)) { + continue; + } + this.#queueActiveFormattingElementEntry(entry, tableIndex); + } + + return true; + } + + #queueDeepFormattingElementSpecialStartPreclosure(tagName) { + if (tagName !== "DIV") { + return false; + } + + const topIndex = this.open_elements.length - 1; + if ( + topIndex < 0 || + this.open_element_namespaces[topIndex] !== "html" || + !FORMATTING_ELEMENTS.has(this.open_elements[topIndex]) + ) { + return false; + } + + for (let i = topIndex - 1; i >= 0; i -= 1) { + if (this.open_element_namespaces[i] !== "html") { + return false; + } + + if (!FORMATTING_ELEMENTS.has(this.open_elements[i])) { + if (isSpecialBoundary(this.open_elements[i], this.open_element_namespaces[i])) { + return false; + } + continue; + } + + const formattingTagName = this.open_elements[i]; + if (formattingTagName === "I") { + const activeFormattingElementIndex = this.#lastActiveFormattingElementIndex(formattingTagName); + const activeFormattingElement = this.active_formatting_elements[activeFormattingElementIndex]; + const followingEntries = this.#activeFormattingElementsAfterIndex(activeFormattingElementIndex); + if ( + activeFormattingElementIndex !== -1 && + followingEntries.length === 1 && + followingEntries[0].tagName === "B" && + followingEntries[0].namespaceName === "html" && + this.#hasOnlyOpenFormattingElementsAfterIndex(i) && + !hasSpecialBoundaryAfter(this.open_elements, this.open_element_namespaces, i) && + this.#formattingEndTagPrecedesElementClose(formattingTagName, tagName) + ) { + this.#queueVirtualPopsFrom(i); + if ( + this.deferred_table_opener !== null && + this.#currentFosterParentedTableIndex() !== null + ) { + this.#queueActiveFormattingElementsAfterIndex(activeFormattingElementIndex); + } else { + this.#queueActiveFormattingElementsAfterIndexAsEmpty(activeFormattingElementIndex); + } + this.#replaceActiveFormattingElementsFromIndex(activeFormattingElementIndex, [ + ...followingEntries, + activeFormattingElement, + ]); + return true; + } + + continue; + } + + if (formattingTagName === "B") { + const activeFormattingElementIndex = this.#lastActiveFormattingElementIndex(formattingTagName); + const followingEntries = this.#activeFormattingElementsAfterIndex(activeFormattingElementIndex); + const preservedEntries = followingEntries.filter((entry) => ( + entry.tagName === "I" && + entry.namespaceName === "html" + )).slice(-2); + if ( + activeFormattingElementIndex !== -1 && + preservedEntries.length === 2 && + this.#hasOnlyOpenFormattingOrCiteElementsAfterIndex(i) && + !hasSpecialBoundaryAfter(this.open_elements, this.open_element_namespaces, i) && + this.#formattingEndTagPrecedesElementClose(formattingTagName, tagName) + ) { + this.#markSpecialStartAdoptionPreclosedFormattingElement(formattingTagName, "html", tagName, "text-self"); + this.#queueVirtualPopsFrom(i); + this.#queueActiveFormattingElementEntries(preservedEntries); + this.#replaceActiveFormattingElementsAfterIndex(activeFormattingElementIndex, preservedEntries); + return true; + } + + continue; + } + + if (formattingTagName !== "A") { + continue; + } + + if (!this.#hasOnlyOpenFormattingElementsAfterIndex(i)) { + return false; + } + + const activeFormattingElementIndex = this.#lastActiveFormattingElementIndex(formattingTagName); + const activeFormattingElement = this.active_formatting_elements[activeFormattingElementIndex]; + const followingEntries = this.#activeFormattingElementsAfterIndex(activeFormattingElementIndex); + if ( + activeFormattingElementIndex !== -1 && + followingEntries.length === 1 && + followingEntries[0].tagName === "B" && + followingEntries[0].namespaceName === "html" && + !hasSpecialBoundaryAfter(this.open_elements, this.open_element_namespaces, i) && + this.#formattingEndTagPrecedesElementClose(formattingTagName, tagName) + ) { + this.#markSpecialStartAdoptionPreclosedFormattingElement(formattingTagName, "html", tagName, "reconstruct-before-nested-div"); + this.#queueVirtualPopsFrom(i); + this.#queueActiveFormattingElementEntries(followingEntries); + this.#replaceActiveFormattingElementsFromIndex(activeFormattingElementIndex, [ + ...followingEntries, + activeFormattingElement, + ]); + return true; + } + + if ( + activeFormattingElementIndex === -1 || + followingEntries.length < 4 || + hasSpecialBoundaryAfter(this.open_elements, this.open_element_namespaces, i) || + !this.#formattingEndTagPrecedesElementClose(formattingTagName, tagName) + ) { + return false; + } + + this.#markSpecialStartAdoptionPreclosedFormattingElement(formattingTagName, "html", tagName); + this.#queueVirtualPopsFrom(i); + this.#queueActiveFormattingElementEntries(followingEntries.slice(-3)); + this.#removeActiveFormattingElementsAfterIndexBeforeTail(activeFormattingElementIndex, 3); + return true; + } + + return false; + } + + #queueFormattingElementAncestorSpecialStartPreclosure(tagName) { + if (!FORMATTING_ELEMENT_ANCESTOR_PRECLOSURE_START_TAGS.has(tagName)) { + return false; + } + + const topIndex = this.open_elements.length - 1; + if ( + topIndex < 1 || + this.open_element_namespaces[topIndex] !== "html" || + FORMATTING_ELEMENTS.has(this.open_elements[topIndex]) + ) { + return false; + } + + for (let i = topIndex - 1; i >= 0; i -= 1) { + if (this.open_element_namespaces[i] !== "html") { + return false; + } + + if (!FORMATTING_ELEMENTS.has(this.open_elements[i])) { + if (isSpecialBoundary(this.open_elements[i], this.open_element_namespaces[i])) { + return false; + } + continue; + } + + const formattingTagName = this.open_elements[i]; + if (tagName === "ASIDE" && formattingTagName !== "B") { + continue; + } + const activeFormattingElementIndex = this.#lastActiveFormattingElementIndex(formattingTagName); + if ( + activeFormattingElementIndex === -1 || + hasSpecialBoundaryAfter(this.open_elements, this.open_element_namespaces, i) + ) { + return false; + } + if (!this.#formattingEndTagPrecedesElementClose(formattingTagName, tagName)) { + continue; + } + const reconstructionMode = tagName === "ASIDE" + ? this.#asideFormattingPreclosureReconstructionMode(activeFormattingElementIndex, i, tagName) + : "self"; + if (reconstructionMode === null) { + return false; + } + + this.#markSpecialStartAdoptionPreclosedFormattingElement(formattingTagName, "html", tagName, reconstructionMode); + this.#queueVirtualPopsFrom(i); + if ( + tagName === "ASIDE" && + reconstructionMode === "wrap-following" + ) { + this.#queueActiveFormattingElementsAfterIndex(activeFormattingElementIndex); + } else if (reconstructionMode === "empty-following-then-self") { + this.#queueActiveFormattingElementsAfterIndexAsEmpty(activeFormattingElementIndex); + } + return true; + } + + return false; + } + + #queueFormattingElementSpecialStartPreclosure(tagName) { + if (!FORMATTING_ELEMENT_SPECIAL_PRECLOSURE_START_TAGS.has(tagName)) { + return false; + } + + if (this.#shouldKeepDeepAnchorAroundCurrentNestedDiv(tagName)) { + return false; + } + + const topIndex = this.open_elements.length - 1; + if ( + topIndex < 0 || + this.open_element_namespaces[topIndex] !== "html" || + !FORMATTING_ELEMENTS.has(this.open_elements[topIndex]) + ) { + return false; + } + + const formattingTagName = this.open_elements[topIndex]; + const openNobrIndex = tagName === "NOBR" ? this.#lastOpenElementIndex("NOBR", "html") : -1; + if ( + tagName === "NOBR" && + formattingTagName !== "NOBR" && + openNobrIndex !== -1 && + this.#lastOpenElementIndex("TABLE", "html") > openNobrIndex + ) { + return false; + } + if ( + tagName === "DIV" && + formattingTagName !== "NOBR" && + this.deferred_table_opener !== null && + this.#currentFosterParentedTableIndex() !== null && + !this.#formattingEndTagPrecedesTableModeStart(formattingTagName) + ) { + return false; + } + + if ( + ( + tagName === "NOBR" && + (openNobrIndex === -1 || openNobrIndex >= topIndex) + ) || + this.#lastActiveFormattingElementIndex(formattingTagName) === -1 || + ( + tagName === "DIV" && + formattingTagName !== "NOBR" && + this.#hasOpenFormattingElementBeforeIndex(topIndex) + ) || + ( + !this.#formattingEndTagPrecedesElementClose(formattingTagName, tagName) && + (tagName !== "NOBR" || !this.#currentTokenHasNoFollowingTags()) && + ( + tagName !== "DIV" || + formattingTagName !== "NOBR" || + !this.#openFormattingElementBeforeIndexPrecedesElementClose(topIndex, tagName) + ) + ) + ) { + return false; + } + + this.#markSpecialStartAdoptionPreclosedFormattingElement(formattingTagName, "html", tagName); + this.#queueVirtualPopsFrom(topIndex); + return true; + } + + #asideFormattingPreclosureReconstructionMode(activeIndex, openIndex, elementTagName) { + const openElementsAfterIndex = this.open_elements.length - openIndex - 1; + let activeFormattingElementsAfterIndex = 0; + let followingFormattingEndPrecedesElementClose = false; + for (let i = activeIndex + 1; i < this.active_formatting_elements.length; i += 1) { + const entry = this.active_formatting_elements[i]; + if (this.#isActiveFormattingMarker(entry)) { + break; + } + activeFormattingElementsAfterIndex += 1; + if (this.#formattingEndTagPrecedesElementClose(entry.tagName, elementTagName)) { + followingFormattingEndPrecedesElementClose = true; + } + } + + if (activeFormattingElementsAfterIndex !== 1 || openElementsAfterIndex < 3) { + return null; + } + + if (openElementsAfterIndex > 3) { + return "self"; + } + + return followingFormattingEndPrecedesElementClose + ? "empty-following-then-self" + : "wrap-following"; + } + + #formattingEndTagPrecedesTableModeStart(formattingTagName) { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + let at = span.start + span.length; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + if (nextTag === false) { + return false; + } + + if (nextTag.is_closing) { + if (nextTag.tag_name === formattingTagName) { + return true; + } + if (nextTag.tag_name === "DIV") { + return false; + } + at = nextTag.token_end; + continue; + } + + if (TABLE_MODE_START_TAGS.has(nextTag.tag_name)) { + return false; + } + + at = nextTag.token_end; + } + } + + #queueActiveFormattingElementsAfterIndex(index) { + this.#queueActiveFormattingElementEntries(this.#activeFormattingElementsAfterIndex(index)); + } + + #queueActiveFormattingElementEntries(entries) { + for (const entry of entries) { + this.#queueActiveFormattingElementEntry(entry); + } + } + + #queueActiveFormattingElementEntry(entry, fosterParentedTableIndex = undefined) { + entry.openElementIndex = this.#futureOpenElementIndexForVirtualPush(); + if (fosterParentedTableIndex === undefined) { + this.#queueVirtualPush(entry.tagName, entry.namespaceName, entry.attributes); + return; + } + + this.#queueVirtualPush(entry.tagName, entry.namespaceName, entry.attributes, null, fosterParentedTableIndex); + } + + #futureOpenElementIndexForVirtualPush() { + let length = this.open_elements.length; + for (const token of this.virtual_tokens) { + length += token.operation === "push" ? 1 : -1; + } + return Math.max(0, length); + } + + #activeFormattingElementsAfterIndex(index) { + const entries = []; + for (let i = index + 1; i < this.active_formatting_elements.length; i += 1) { + const entry = this.active_formatting_elements[i]; + if (this.#isActiveFormattingMarker(entry)) { + break; + } + entries.push(entry); + } + return entries; + } + + #hasOnlyOpenFormattingElementsAfterIndex(index) { + for (let i = index + 1; i < this.open_elements.length; i += 1) { + if ( + this.open_element_namespaces[i] !== "html" || + !FORMATTING_ELEMENTS.has(this.open_elements[i]) + ) { + return false; + } + } + return true; + } + + #hasOnlyOpenFormattingOrCiteElementsAfterIndex(index) { + for (let i = index + 1; i < this.open_elements.length; i += 1) { + if ( + this.open_element_namespaces[i] !== "html" || + ( + this.open_elements[i] !== "CITE" && + !FORMATTING_ELEMENTS.has(this.open_elements[i]) + ) + ) { + return false; + } + } + return true; + } + + #removeActiveFormattingElementsAfterIndexBeforeTail(index, tailCount) { + let endIndex = index + 1; + while ( + endIndex < this.active_formatting_elements.length && + !this.#isActiveFormattingMarker(this.active_formatting_elements[endIndex]) + ) { + endIndex += 1; + } + + const removeEndIndex = Math.max(index + 1, endIndex - tailCount); + for (let i = removeEndIndex - 1; i > index; i -= 1) { + const entry = this.active_formatting_elements[i]; + this.active_formatting_elements.splice(i, 1); + this.#clearParagraphAdoptionPreclosedFormattingElements(entry.tagName, entry.namespaceName); + this.#clearSpecialStartAdoptionPreclosedFormattingElements(entry.tagName, entry.namespaceName); + } + } + + #replaceActiveFormattingElementsFromIndex(index, entries) { + let endIndex = index; + while ( + endIndex < this.active_formatting_elements.length && + !this.#isActiveFormattingMarker(this.active_formatting_elements[endIndex]) + ) { + endIndex += 1; + } + + this.active_formatting_elements.splice( + index, + endIndex - index, + ...entries.map((entry) => this.#cloneActiveFormattingElement(entry)), + ); + } + + #replaceActiveFormattingElementsAfterIndex(index, entries) { + let endIndex = index + 1; + while ( + endIndex < this.active_formatting_elements.length && + !this.#isActiveFormattingMarker(this.active_formatting_elements[endIndex]) + ) { + endIndex += 1; + } + + this.active_formatting_elements.splice( + index + 1, + endIndex - index - 1, + ...entries.map((entry) => this.#cloneActiveFormattingElement(entry)), + ); + } + + #queueActiveFormattingElementsAfterIndexAsEmpty(index) { + const queued = []; + for (let i = index + 1; i < this.active_formatting_elements.length; i += 1) { + const entry = this.active_formatting_elements[i]; + if (this.#isActiveFormattingMarker(entry)) { + break; + } + this.#queueActiveFormattingElementEntry(entry); + queued.push(entry); + } + + for (let i = queued.length - 1; i >= 0; i -= 1) { + this.#queueVirtualPop(queued[i].tagName, queued[i].namespaceName); + } + } + + #queueActiveFormattingElement(tagName, namespaceName) { + for (let i = this.active_formatting_elements.length - 1; i >= 0; i -= 1) { + const entry = this.active_formatting_elements[i]; + if (this.#isActiveFormattingMarker(entry)) { + return false; + } + if (entry.tagName === tagName && entry.namespaceName === namespaceName) { + this.#queueActiveFormattingElementEntry(entry); + return true; + } + } + + return false; + } + + #queueActiveFormattingElementWithFollowingElements(tagName, namespaceName) { + for (let i = this.active_formatting_elements.length - 1; i >= 0; i -= 1) { + const entry = this.active_formatting_elements[i]; + if (this.#isActiveFormattingMarker(entry)) { + return false; + } + if (entry.tagName !== tagName || entry.namespaceName !== namespaceName) { + continue; + } + + for (let j = i + 1; j < this.active_formatting_elements.length; j += 1) { + const followingEntry = this.active_formatting_elements[j]; + if (this.#isActiveFormattingMarker(followingEntry)) { + break; + } + this.#queueActiveFormattingElementEntry(followingEntry); + } + this.#queueActiveFormattingElementEntry(entry); + return true; + } + + return false; + } + + #hasOpenFormattingElementBeforeIndex(index) { + for (let i = index - 1; i >= 0; i -= 1) { + if (this.open_element_namespaces[i] !== "html") { + return false; + } + if (FORMATTING_ELEMENTS.has(this.open_elements[i])) { + return true; + } + if (isSpecialBoundary(this.open_elements[i], this.open_element_namespaces[i])) { + return false; + } + } + + return false; + } + + #openFormattingElementBeforeIndexPrecedesElementClose(index, elementTagName) { + for (let i = index - 1; i >= 0; i -= 1) { + if (this.open_element_namespaces[i] !== "html") { + return false; + } + if (FORMATTING_ELEMENTS.has(this.open_elements[i])) { + return ( + this.#lastActiveFormattingElementIndex(this.open_elements[i]) !== -1 && + this.#formattingEndTagPrecedesElementClose(this.open_elements[i], elementTagName) + ); + } + if (isSpecialBoundary(this.open_elements[i], this.open_element_namespaces[i])) { + return false; + } + } + + return false; + } + + #currentTokenHasNoFollowingTags() { + const span = this.#currentRealTokenSpan(); + return span !== null && runtime.scanNextTag(this.html, span.start + span.length) === false; + } + + #formattingEndTagPrecedesElementClose(formattingTagName, elementTagName) { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + let at = span.start + span.length; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + if (nextTag === false) { + return false; + } + + if (nextTag.is_closing && nextTag.tag_name === formattingTagName) { + return true; + } + + if ( + (nextTag.is_closing && nextTag.tag_name === elementTagName) || + (!nextTag.is_closing && nextTag.tag_name === "TABLE") + ) { + return false; + } + + at = nextTag.token_end; + } + } + + #queueNestedAnchorBlockAdoptionPreclosure(tagName) { + if (!NESTED_ANCHOR_BLOCK_PRECLOSURE_START_TAGS.has(tagName)) { + return false; + } + + const topIndex = this.open_elements.length - 1; + if ( + topIndex < 0 || + this.open_elements[topIndex] !== "A" || + this.open_element_namespaces[topIndex] !== "html" || + this.#lastActiveFormattingElementIndex("A") === -1 || + this.#shouldKeepDeepAnchorAroundCurrentNestedDiv(tagName) || + !this.#nestedAnchorStartPrecedesBlockEnd(tagName) + ) { + return false; + } + + this.#queueVirtualPopsFrom(topIndex); + return true; + } + + #nestedAnchorStartPrecedesBlockEnd(tagName) { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + let at = span.start + span.length; + let sameTagDepth = 0; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + if (nextTag === false) { + return false; + } + + if (!nextTag.is_closing && nextTag.tag_name === tagName && tagName === "DIV") { + sameTagDepth += 1; + at = nextTag.token_end; + continue; + } + + if ( + !nextTag.is_closing && + ( + nextTag.tag_name === "A" || + NESTED_ANCHOR_RECONSTRUCTING_START_TAGS.has(nextTag.tag_name) + ) + ) { + return true; + } + + if (nextTag.is_closing && nextTag.tag_name === "A") { + return true; + } + + if ( + nextTag.is_closing && + nextTag.tag_name === tagName && + sameTagDepth > 0 + ) { + sameTagDepth -= 1; + at = nextTag.token_end; + continue; + } + + if ( + (nextTag.is_closing && nextTag.tag_name === tagName) || + (!nextTag.is_closing && nextTag.tag_name === "TABLE") + ) { + return false; + } + + at = nextTag.token_end; + } + } + + #shouldReconstructActiveAnchorForStartTag(tagName) { + if ( + this.#lastActiveFormattingElementIndex("A") === -1 || + this.#lastOpenElementIndex("A", "html") !== -1 + ) { + return false; + } + + return ( + NESTED_ANCHOR_RECONSTRUCTING_START_TAGS.has(tagName) || + this.#shouldReconstructDeepAnchorBeforeNestedDiv(tagName) || + ( + tagName === "P" && + !this.#hasParagraphAdoptionPreclosedFormattingElement("A", "html") && + this.#formattingEndTagPrecedesParagraphClose("A") + ) + ); + } + + #shouldReconstructDeepAnchorBeforeNestedDiv(tagName) { + if ( + tagName !== "DIV" || + this.open_elements.at(-1) !== "DIV" || + this.open_element_namespaces.at(-1) !== "html" || + !this.#hasSpecialStartAdoptionPreclosedFormattingElement( + "A", + "html", + "DIV", + "reconstruct-before-nested-div", + ) || + this.#countOpenHtmlElementsAfterLast("B", "DIV") > 8 + ) { + return false; + } + + const span = this.#currentRealTokenSpan(); + if (span === null || this.deep_anchor_reconstructed_div_start_offsets.has(span.start)) { + return false; + } + + this.deep_anchor_reconstructed_div_start_offsets.add(span.start); + return true; + } + + #shouldKeepDeepAnchorAroundCurrentNestedDiv(tagName) { + if ( + tagName !== "DIV" || + this.open_elements.at(-1) !== "A" || + this.open_element_namespaces.at(-1) !== "html" || + !this.#hasSpecialStartAdoptionPreclosedFormattingElement( + "A", + "html", + "DIV", + "reconstruct-before-nested-div", + ) || + this.#countOpenHtmlElementsAfterLast("B", "DIV") < 8 + ) { + return false; + } + + return true; + } + + #alreadyReconstructedTableNobrForCurrentToken(tagName) { + if ( + tagName !== "NOBR" || + this.#lastOpenElementIndex("TABLE", "html") === -1 || + this.#currentFosterParentedTableIndex() !== null + ) { + return false; + } + + const span = this.#currentRealTokenSpan(); + return span !== null && this.table_nobr_reconstructed_start_offsets.has(span.start); + } + + #rememberTableNobrReconstructionForCurrentToken(tagName) { + if ( + tagName !== "NOBR" || + this.#lastOpenElementIndex("TABLE", "html") === -1 || + this.#currentFosterParentedTableIndex() !== null + ) { + return; + } + + const span = this.#currentRealTokenSpan(); + if (span !== null) { + this.table_nobr_reconstructed_start_offsets.add(span.start); + } + } + + #countOpenHtmlElementsAfterLast(afterTagName, countedTagName) { + const afterIndex = this.#lastOpenElementIndex(afterTagName, "html"); + if (afterIndex === -1) { + return 0; + } + + let count = 0; + for (let i = afterIndex + 1; i < this.open_elements.length; i += 1) { + if ( + this.open_elements[i] === countedTagName && + this.open_element_namespaces[i] === "html" + ) { + count += 1; + } + } + return count; + } + + #shouldReconstructActiveFontForStartTag(tagName) { + return ( + FONT_PARAGRAPH_ADOPTION_RECONSTRUCTING_START_TAGS.has(tagName) && + this.#lastActiveFormattingElementIndex("FONT") !== -1 && + this.#lastOpenElementIndex("FONT", "html") === -1 + ); + } + + #queueParagraphAdoptionFormattingPreclosure(tagName) { + if (tagName !== "P" || this.current_namespace !== "html") { + return false; + } + + if (this.#findClosablePInButtonScopeForStartTag(tagName) !== -1) { + return false; + } + + const topIndex = this.open_elements.length - 1; + if ( + topIndex < 0 || + this.open_element_namespaces[topIndex] !== "html" || + !FORMATTING_ELEMENTS.has(this.open_elements[topIndex]) + ) { + return false; + } + + for (let i = topIndex; i >= 0; i -= 1) { + if (this.open_element_namespaces[i] !== "html") { + return false; + } + + if (!FORMATTING_ELEMENTS.has(this.open_elements[i])) { + if (isSpecialBoundary(this.open_elements[i], this.open_element_namespaces[i])) { + return false; + } + continue; + } + + const formattingTagName = this.open_elements[i]; + const activeFormattingElementIndex = this.#lastActiveFormattingElementIndex(formattingTagName); + if ( + activeFormattingElementIndex === -1 || + hasSpecialBoundaryAfter(this.open_elements, this.open_element_namespaces, i) + ) { + return false; + } + if ( + !this.#formattingEndTagPrecedesParagraphClose(formattingTagName) && + !( + formattingTagName === "B" && + !this.#hasOpenFormattingElementBeforeIndex(i) && + this.#formattingEndTagPrecedesElementClose(formattingTagName, tagName) + ) + ) { + continue; + } + + const reconstructionMode = ( + i < topIndex && + this.#activeFormattingElementAfterIndexPrecedesParagraphClose(activeFormattingElementIndex, formattingTagName) + ) + ? "following-inside" + : "self"; + + this.#markParagraphAdoptionPreclosedFormattingElement(formattingTagName, "html", reconstructionMode); + this.#queueVirtualPopsFrom(i); + if (i < topIndex) { + if (reconstructionMode === "following-inside") { + this.#queueActiveFormattingElementsAfterIndexAsEmpty(activeFormattingElementIndex); + } else { + this.#queueActiveFormattingElementsAfterIndex(activeFormattingElementIndex); + } + } + return true; + } + + return false; + } + + #queueParagraphAdoptionPreclosedFormattingElementsForText() { + if ( + this.open_element_namespaces.at(-1) !== "html" || + this.open_elements.at(-1) !== "P" + ) { + return false; + } + + for (let i = this.paragraph_adoption_preclosed_formatting_elements.length - 1; i >= 0; i -= 1) { + const entry = this.paragraph_adoption_preclosed_formatting_elements[i]; + if ( + this.#lastOpenElementIndex(entry.tagName, entry.namespaceName) === -1 && + this.#lastActiveFormattingElementIndex(entry.tagName) !== -1 + ) { + return entry.reconstructionMode === "following-inside" + ? this.#queueActiveFormattingElementWithFollowingElements(entry.tagName, entry.namespaceName) + : this.#queueActiveFormattingElement(entry.tagName, entry.namespaceName); + } + } + + return false; + } + + #queueSpecialStartAdoptionPreclosedFormattingElementsForText() { + if ( + this.open_element_namespaces.at(-1) !== "html" || + !FORMATTING_ELEMENT_SPECIAL_PRECLOSURE_START_TAGS.has(this.open_elements.at(-1)) + ) { + return false; + } + + const containerTagName = this.open_elements.at(-1); + for (const entry of this.special_start_adoption_preclosed_formatting_elements) { + if ( + (entry.tagName === "A" || entry.reconstructionMode === "text-self") && + entry.containerTagName === containerTagName && + this.#lastOpenElementIndex(entry.tagName, entry.namespaceName) === -1 && + this.#lastActiveFormattingElementIndex(entry.tagName) !== -1 + ) { + return this.#queueActiveFormattingElement(entry.tagName, entry.namespaceName); + } + } + + return false; + } + + #activeFormattingElementAfterIndexPrecedesParagraphClose(index, skippedEndTagName) { + for (let i = index + 1; i < this.active_formatting_elements.length; i += 1) { + const entry = this.active_formatting_elements[i]; + if (this.#isActiveFormattingMarker(entry)) { + return false; + } + if (this.#formattingEndTagPrecedesParagraphCloseAfterSkippedEndTag(entry.tagName, skippedEndTagName)) { + return true; + } + } + + return false; + } + + #formattingEndTagPrecedesParagraphCloseAfterSkippedEndTag(tagName, skippedEndTagName) { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + let skipped = false; + let at = span.start + span.length; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + if (nextTag === false) { + return false; + } + + if (nextTag.is_closing && nextTag.tag_name === tagName) { + return true; + } + + if (!skipped && nextTag.is_closing && nextTag.tag_name === skippedEndTagName) { + skipped = true; + at = nextTag.token_end; + continue; + } + + if ( + (nextTag.is_closing && nextTag.tag_name === "P") || + (!nextTag.is_closing && this.#shouldClosePForStartTag(nextTag.tag_name)) + ) { + return false; + } + + if (tagName !== "A" && nextTag.is_closing) { + return false; + } + + at = nextTag.token_end; + } + } + + #formattingEndTagPrecedesParagraphClose(tagName) { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + let at = span.start + span.length; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + if (nextTag === false) { + return false; + } + + if (nextTag.is_closing && nextTag.tag_name === tagName) { + return true; + } + + if ( + (nextTag.is_closing && nextTag.tag_name === "P") || + (!nextTag.is_closing && this.#shouldClosePForStartTag(nextTag.tag_name)) + ) { + return false; + } + + if ( + tagName === "FONT" && + nextTag.is_closing && + FONT_PARAGRAPH_ADOPTION_SKIPPABLE_END_TAGS.has(nextTag.tag_name) + ) { + at = nextTag.token_end; + continue; + } + + if (tagName !== "A" && nextTag.is_closing) { + return false; + } + + if (!nextTag.is_closing) { + if (tagName === "A" && nextTag.tag_name === "A") { + return true; + } + if ( + tagName === "FONT" && + ( + FORMATTING_ELEMENTS.has(nextTag.tag_name) || + ACTIVE_FORMATTING_RECONSTRUCTING_START_TAGS.has(nextTag.tag_name) || + FONT_PARAGRAPH_ADOPTION_RECONSTRUCTING_START_TAGS.has(nextTag.tag_name) + ) + ) { + at = nextTag.token_end; + continue; + } + return false; + } + + at = nextTag.token_end; + } + } + + #markParagraphAdoptionPreclosedFormattingElement(tagName, namespaceName, reconstructionMode = "self") { + this.paragraph_adoption_preclosed_formatting_elements.push({ tagName, namespaceName, reconstructionMode }); + } + + #consumeParagraphAdoptionPreclosedFormattingElement(tagName, namespaceName) { + for (let i = this.paragraph_adoption_preclosed_formatting_elements.length - 1; i >= 0; i -= 1) { + const entry = this.paragraph_adoption_preclosed_formatting_elements[i]; + if (entry.tagName !== tagName || entry.namespaceName !== namespaceName) { + continue; + } + + this.paragraph_adoption_preclosed_formatting_elements.splice(i, 1); + return entry; + } + + return null; + } + + #hasParagraphAdoptionPreclosedFormattingElement(tagName, namespaceName) { + return this.paragraph_adoption_preclosed_formatting_elements.some((entry) => ( + entry.tagName === tagName && entry.namespaceName === namespaceName + )); + } + + #clearParagraphAdoptionPreclosedFormattingElements(tagName, namespaceName) { + this.paragraph_adoption_preclosed_formatting_elements = this.paragraph_adoption_preclosed_formatting_elements.filter((entry) => ( + entry.tagName !== tagName || entry.namespaceName !== namespaceName + )); + } + + #markSpecialStartAdoptionPreclosedFormattingElement(tagName, namespaceName, containerTagName, reconstructionMode = "self") { + this.special_start_adoption_preclosed_formatting_elements.push({ tagName, namespaceName, containerTagName, reconstructionMode }); + } + + #consumeSpecialStartAdoptionPreclosedFormattingElement(tagName, namespaceName, containerTagName) { + for (let i = this.special_start_adoption_preclosed_formatting_elements.length - 1; i >= 0; i -= 1) { + const entry = this.special_start_adoption_preclosed_formatting_elements[i]; + if ( + entry.tagName !== tagName || + entry.namespaceName !== namespaceName || + entry.containerTagName !== containerTagName + ) { + continue; + } + + this.special_start_adoption_preclosed_formatting_elements.splice(i, 1); + return entry; + } + + return null; + } + + #hasSpecialStartAdoptionPreclosedFormattingElement(tagName, namespaceName, containerTagName, reconstructionMode = null) { + return this.special_start_adoption_preclosed_formatting_elements.some((entry) => ( + entry.tagName === tagName && + entry.namespaceName === namespaceName && + entry.containerTagName === containerTagName && + (reconstructionMode === null || entry.reconstructionMode === reconstructionMode) + )); + } + + #clearSpecialStartAdoptionPreclosedFormattingElements(tagName, namespaceName) { + this.special_start_adoption_preclosed_formatting_elements = this.special_start_adoption_preclosed_formatting_elements.filter((entry) => ( + entry.tagName !== tagName || entry.namespaceName !== namespaceName + )); + } + + #applyFullParserInsertionMode(tokenType, tokenName) { + if (!this.is_full_parser) { + return false; + } + + const tagName = tokenType === "#tag" ? this.#getCurrentTreeTagName() : tokenName; + if (tagName === null) { + return false; + } + + const isCloser = tokenType === "#tag" && this.is_tag_closer(); + if (this.#shouldIgnoreDocumentStartTagInTemplateContent(tokenType, tagName, isCloser)) { + if (this.#currentTemplateInsertionMode() === "in_template") { + this.#setCurrentTemplateInsertionMode("in_body"); + } + this.skip_current_token = true; + return true; + } + + if (this.#shouldIgnoreFrameStartTagInTemplateContent(tokenType, tagName, isCloser)) { + if (this.#currentTemplateInsertionMode() === "in_template") { + this.#setCurrentTemplateInsertionMode("in_body"); + } + this.skip_current_token = true; + return true; + } + + if (this.#isInHeadTemplateContent()) { + return false; + } + + if (this.current_namespace !== "html") { + return false; + } + + const isWhitespaceText = ( + tokenType === "#text" && + this.text_node_classification === WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE + ); + const isNullText = ( + tokenType === "#text" && + this.text_node_classification === WP_HTML_Tag_Processor.TEXT_IS_NULL_SEQUENCE + ); + const isIgnorablePreBodyText = isWhitespaceText || isNullText; + + while (true) { + switch (this.full_parser_insertion_mode) { + case "initial": + if (tokenType === "#doctype") { + this.full_parser_seen_doctype = true; + this.#setCompatModeFromCurrentDoctype(); + this.full_parser_insertion_mode = "before_html"; + return false; + } + + if (isWhitespaceText) { + this.skip_current_token = true; + return true; + } + + if (tokenType === "#comment" || tokenType === "#funky-comment" || tokenType === "#presumptuous-tag") { + return false; + } + + this.compat_mode = WP_HTML_Tag_Processor.QUIRKS_MODE; + this.full_parser_insertion_mode = "before_html"; + continue; + + case "before_html": + if (tokenType === "#doctype" || isIgnorablePreBodyText) { + this.skip_current_token = true; + return true; + } + + if (tokenType === "#comment" || tokenType === "#funky-comment" || tokenType === "#presumptuous-tag") { + return false; + } + + if (tokenType === "#tag" && !isCloser && tagName === "HTML") { + this.full_parser_scaffolded = true; + this.full_parser_insertion_mode = "before_head"; + return false; + } + + if (isCloser && tagName !== "HEAD" && tagName !== "BODY" && tagName !== "HTML") { + this.skip_current_token = true; + return true; + } + + this.full_parser_scaffolded = true; + this.full_parser_insertion_mode = "before_head"; + this.#queueVirtualPush("HTML"); + return this.#reprocessCurrentTokenAfterVirtualTokens(); + + case "before_head": + if (tokenType === "#doctype" || isIgnorablePreBodyText) { + this.skip_current_token = true; + return true; + } + + if (tokenType === "#comment" || tokenType === "#funky-comment" || tokenType === "#presumptuous-tag") { + return false; + } + + if (tokenType === "#tag" && !isCloser && tagName === "HTML") { + this.skip_current_token = true; + return true; + } + + if (tokenType === "#tag" && !isCloser && tagName === "HEAD") { + this.full_parser_insertion_mode = "in_head"; + return false; + } + + if (isCloser && tagName !== "HEAD" && tagName !== "BODY" && tagName !== "HTML") { + this.skip_current_token = true; + return true; + } + + this.full_parser_insertion_mode = "in_head"; + this.#queueVirtualPush("HEAD"); + return this.#reprocessCurrentTokenAfterVirtualTokens(); + + case "in_head": + if (tokenType === "#doctype") { + this.skip_current_token = true; + return true; + } + + if (tokenType === "#tag" && !isCloser && tagName === "NOSCRIPT") { + this.full_parser_insertion_mode = "in_head_noscript"; + return false; + } + + if ( + isIgnorablePreBodyText || + tokenType === "#comment" || + tokenType === "#funky-comment" || + tokenType === "#presumptuous-tag" || + (tokenType === "#tag" && !isCloser && HEAD_CONTENT_ELEMENTS.has(tagName)) + ) { + return false; + } + + if (tokenType === "#tag" && !isCloser && tagName === "HTML") { + this.skip_current_token = true; + return true; + } + + if (isCloser && tagName === "HEAD") { + this.full_parser_insertion_mode = "after_head"; + return false; + } + + if ((tokenType === "#tag" && !isCloser && tagName === "HEAD") || (isCloser && tagName !== "BODY" && tagName !== "HTML")) { + this.skip_current_token = true; + return true; + } + + this.full_parser_insertion_mode = "after_head"; + this.#queueVirtualPop("HEAD"); + return this.#reprocessCurrentTokenAfterVirtualTokens(); + + case "in_head_noscript": + if (tokenType === "#doctype") { + this.skip_current_token = true; + return true; + } + + if ( + isWhitespaceText || + tokenType === "#comment" || + tokenType === "#funky-comment" || + tokenType === "#presumptuous-tag" || + (tokenType === "#tag" && !isCloser && IN_HEAD_NOSCRIPT_ALLOWED_START_TAGS.has(tagName)) + ) { + return false; + } + + if (tokenType === "#tag" && !isCloser && tagName === "HTML") { + this.skip_current_token = true; + return true; + } + + if (isCloser && tagName === "NOSCRIPT") { + this.full_parser_insertion_mode = "in_head"; + return false; + } + + if ( + (tokenType === "#tag" && !isCloser && (tagName === "HEAD" || tagName === "NOSCRIPT")) || + (isCloser && tagName !== "BR") + ) { + this.skip_current_token = true; + return true; + } + + this.full_parser_insertion_mode = "in_head"; + this.#queueVirtualPop("NOSCRIPT"); + return this.#reprocessCurrentTokenAfterVirtualTokens(); + + case "after_head": + if (tokenType === "#doctype") { + this.skip_current_token = true; + return true; + } + + if (this.pre_frameset_ignored_element_depth > 0) { + const skipped = this.#skipIgnoredElementBeforeFrameset( + tokenType, + tagName, + isCloser, + isIgnorablePreBodyText, + ); + if (skipped) { + return true; + } + } + + if ( + this.pre_frameset_paragraph_ignored && + isWhitespaceText && + this.#currentTokenPrecedesStartTag("FRAMESET") + ) { + this.#ignoreCurrentToken(); + return true; + } + + if ( + isIgnorablePreBodyText || + tokenType === "#comment" || + tokenType === "#funky-comment" || + tokenType === "#presumptuous-tag" + ) { + return false; + } + + if (tokenType === "#tag" && !isCloser && tagName === "HTML") { + this.skip_current_token = true; + return true; + } + + if (tokenType === "#tag" && !isCloser && tagName === "BODY") { + this.frameset_ok = false; + this.full_parser_insertion_mode = "in_body"; + return false; + } + + if (tokenType === "#tag" && !isCloser && tagName === "FRAMESET") { + this.pre_frameset_paragraph_ignored = false; + this.full_parser_insertion_mode = "in_frameset"; + return false; + } + + if (tokenType === "#tag" && !isCloser && this.#hiddenInputPrecedesFrameset(tagName)) { + this.#ignoreCurrentToken(); + return true; + } + + if (tokenType === "#tag" && !isCloser && this.#ignoredStartTagPrecedesFrameset(tagName)) { + this.#ignoreCurrentToken(); + return true; + } + + if (tokenType === "#tag" && !isCloser && this.#ignoredFrameNoisePrecedesFrameset(tagName)) { + this.#ignoreCurrentToken(); + return true; + } + + if (tokenType === "#tag" && !isCloser && this.#openElementChainPrecedesFrameset(tagName)) { + this.pre_frameset_paragraph_ignored = true; + this.#ignoreCurrentToken(); + return true; + } + + if (tokenType === "#tag" && !isCloser && this.#closedElementPrecedesFrameset(tagName)) { + this.pre_frameset_ignored_element_depth = 1; + this.#ignoreCurrentToken(); + return true; + } + + if (tokenType === "#tag" && !isCloser && this.#paragraphPrecedesFrameset(tagName)) { + this.pre_frameset_paragraph_ignored = true; + this.#ignoreCurrentToken(); + return true; + } + + if (tokenType === "#tag" && !isCloser && tagName === "TEMPLATE") { + this.full_parser_insertion_mode = "in_head"; + this.#silentlyReopenFullParserElement("HEAD"); + return false; + } + + if (tokenType === "#tag" && !isCloser && AFTER_HEAD_TEMPORARY_HEAD_START_TAGS.has(tagName)) { + this.full_parser_insertion_mode = "in_head"; + this.#silentlyReopenFullParserElement("HEAD"); + this.temporary_reopened_head = true; + return false; + } + + if (tokenType === "#tag" && !isCloser && HEAD_CONTENT_ELEMENTS.has(tagName)) { + this.full_parser_insertion_mode = "in_head"; + this.#silentlyReopenFullParserElement("HEAD"); + return false; + } + + if (tokenType === "#tag" && !isCloser && tagName === "HEAD") { + this.skip_current_token = true; + return true; + } + + if ( + isCloser && + (tagName === "BODY" || tagName === "HTML") && + this.#currentTokenPrecedesStartTag("FRAMESET") + ) { + this.skip_current_token = true; + return true; + } + + if (isCloser && tagName !== "BODY" && tagName !== "HTML") { + this.skip_current_token = true; + return true; + } + + this.full_parser_insertion_mode = "in_body"; + this.pre_frameset_paragraph_ignored = false; + this.#queueVirtualPush("BODY"); + return this.#reprocessCurrentTokenAfterVirtualTokens(); + + case "in_body": + if (tokenType === "#doctype") { + this.skip_current_token = true; + return true; + } + + if (tokenType === "#tag" && !isCloser && (tagName === "HTML" || tagName === "BODY")) { + if (tagName === "BODY") { + this.frameset_ok = false; + } + this.skip_current_token = true; + return true; + } + + if ( + tokenType === "#tag" && + !isCloser && + !this.preserve_in_body_ignored_start_tags && + this.template_insertion_modes.length === 0 && + !this.#isInTableInsertionContext() && + IN_BODY_IGNORED_START_TAGS.has(tagName) + ) { + this.skip_current_token = true; + return true; + } + + if (tokenType === "#tag" && !isCloser && tagName === "FRAMESET") { + if (this.open_elements.length <= 1 || this.open_elements[1] !== "BODY" || !this.frameset_ok) { + this.skip_current_token = true; + return true; + } + + this.#bailUnsupported("Cannot process non-ignored FRAMESET tags."); + return true; + } + + if (isCloser && tagName === "BODY") { + this.full_parser_insertion_mode = "after_body"; + this.skip_current_token = true; + return true; + } + + if (isCloser && tagName === "HTML") { + this.full_parser_insertion_mode = "after_after_body"; + this.skip_current_token = true; + return true; + } + + if (tokenType === "#text" && !isWhitespaceText && !isNullText) { + this.frameset_ok = false; + } else if (tokenType === "#tag" && !isCloser && this.#startTagClearsFramesetOk(tagName)) { + this.frameset_ok = false; + } + + return false; + + case "after_body": + if ( + tokenType === "#comment" || + tokenType === "#funky-comment" || + tokenType === "#presumptuous-tag" + ) { + if (this.open_elements.length > 1) { + this.#queueVirtualPopsFrom(1); + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + return true; + } + return false; + } + + if (tokenType === "#doctype") { + this.skip_current_token = true; + return true; + } + + if (tokenType === "#tag" && !isCloser && tagName === "HTML") { + this.full_parser_insertion_mode = "in_body"; + continue; + } + + if (isCloser && tagName === "HTML") { + this.full_parser_insertion_mode = "after_after_body"; + this.skip_current_token = true; + return true; + } + + if (isWhitespaceText) { + return false; + } + + if (tokenType === "#tag" && !isCloser && tagName === "TEMPLATE") { + this.full_parser_insertion_mode = "in_body"; + this.#silentlyReopenFullParserElement("BODY"); + return false; + } + + this.full_parser_insertion_mode = "in_body"; + continue; + + case "after_after_body": + if ( + tokenType === "#comment" || + tokenType === "#funky-comment" || + tokenType === "#presumptuous-tag" + ) { + if (this.open_elements.length > 0) { + this.#queueVirtualPopsFrom(0); + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + return true; + } + return false; + } + + if (tokenType === "#doctype" || (tokenType === "#tag" && !isCloser && tagName === "HTML")) { + this.full_parser_insertion_mode = "in_body"; + continue; + } + + if (isWhitespaceText) { + return false; + } + + this.full_parser_insertion_mode = "in_body"; + continue; + + case "in_frameset": + if (tokenType === "#text") { + if (isWhitespaceText) { + return false; + } + return this.#filterFramesetTextToken(); + } + + if ( + tokenType === "#comment" || + tokenType === "#funky-comment" || + tokenType === "#presumptuous-tag" + ) { + return false; + } + + if (tokenType === "#doctype") { + this.skip_current_token = true; + return true; + } + + if (tokenType === "#tag" && !isCloser && tagName === "HTML") { + this.full_parser_insertion_mode = "in_body"; + continue; + } + + if (tokenType === "#tag" && !isCloser && tagName === "FRAMESET") { + return false; + } + + if (isCloser && tagName === "FRAMESET") { + const topIndex = this.open_elements.length - 1; + if (this.open_elements[topIndex] === "HTML") { + this.skip_current_token = true; + return true; + } + if (this.open_elements[topIndex - 1] !== "FRAMESET") { + this.full_parser_insertion_mode = "after_frameset"; + } + return false; + } + + if (tokenType === "#tag" && !isCloser && tagName === "FRAME") { + return false; + } + + if (tokenType === "#tag" && !isCloser && tagName === "NOFRAMES") { + return false; + } + + this.skip_current_token = true; + return true; + + case "after_frameset": + if (tokenType === "#text") { + if (isWhitespaceText) { + return false; + } + return this.#filterFramesetTextToken(); + } + + if ( + tokenType === "#comment" || + tokenType === "#funky-comment" || + tokenType === "#presumptuous-tag" + ) { + return false; + } + + if (tokenType === "#doctype") { + this.skip_current_token = true; + return true; + } + + if (tokenType === "#tag" && !isCloser && tagName === "HTML") { + this.full_parser_insertion_mode = "in_body"; + continue; + } + + if (isCloser && tagName === "HTML") { + this.full_parser_insertion_mode = "after_after_frameset"; + this.skip_current_token = true; + return true; + } + + if (tokenType === "#tag" && !isCloser && tagName === "NOFRAMES") { + return false; + } + + this.skip_current_token = true; + return true; + + case "after_after_frameset": + if ( + tokenType === "#comment" || + tokenType === "#funky-comment" || + tokenType === "#presumptuous-tag" + ) { + if (this.#hasFutureNoframesStartTag()) { + if (tokenType === "#comment") { + return this.#delayCurrentCommentToken(["#comment"]); + } + + this.#bailUnsupported("Content outside of HTML is unsupported."); + return true; + } + if (this.open_elements.length > 0) { + this.#queueVirtualPopsFrom(0); + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + return true; + } + return false; + } + + if (tokenType === "#doctype" || (tokenType === "#tag" && !isCloser && tagName === "HTML")) { + this.full_parser_insertion_mode = "in_body"; + continue; + } + + if (tokenType === "#text") { + if (isWhitespaceText) { + return false; + } + return this.#filterFramesetTextToken(); + } + + if (tokenType === "#tag" && !isCloser && tagName === "NOFRAMES") { + return false; + } + + this.skip_current_token = true; + return true; + + default: + return false; + } + } + } + + #filterFramesetTextToken() { + const text = this.get_modifiable_text() ?? ""; + let whitespace = ""; + for (let i = 0; i < text.length; i += 1) { + if (isHtmlWhitespaceCode(text.charCodeAt(i))) { + whitespace += text[i]; + } + } + + if (whitespace === "") { + this.skip_current_token = true; + this.breadcrumbs = this.#breadcrumbStack(); + return true; + } + + this.current_synthetic_token = { + tokenType: "#text", + tokenName: "#text", + modifiableText: whitespace, + readOnly: true, + }; + this.parser_state = STATE_TEXT_NODE; + this.text_node_classification = WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE; + this.current_token_namespace = "html"; + this.breadcrumbs = this.#breadcrumbStack("#text"); + this.skip_current_token = false; + return true; + } + + #queueVirtualPush( + tagName, + namespaceName = "html", + attributes = [], + integrationNodeType = null, + fosterParentedTableIndex = this.#currentFosterParentedTableIndex(), + ) { + this.virtual_tokens.push({ + operation: "push", + tagName, + namespaceName, + integrationNodeType, + fosterParentedTableIndex, + attributes: attributes.map((attribute) => ({ + name: attribute.name, + value: attribute.value, + })), + }); + } + + #queueVirtualPop(tagName, namespaceName = "html") { + this.virtual_tokens.push({ + operation: "pop", + tagName, + namespaceName, + }); + } + + #reprocessCurrentTokenAfterVirtualTokens() { + this.pending_real_token = true; + this.pending_real_parser_state = this.parser_state; + return true; + } + + #removeActiveFormattingElement(tagName) { + for (let i = this.active_formatting_elements.length - 1; i >= 0; i -= 1) { + if (this.#isActiveFormattingMarker(this.active_formatting_elements[i])) { + break; + } + + if (this.active_formatting_elements[i].tagName === tagName) { + this.active_formatting_elements.splice(i, 1); + this.#clearParagraphAdoptionPreclosedFormattingElements(tagName, "html"); + this.#clearSpecialStartAdoptionPreclosedFormattingElements(tagName, "html"); + return true; + } + } + return false; + } + + #removeStaleActiveFormattingElementsForClose(tagName) { + let openCount = this.#countOpenHtmlElements(tagName); + let activeCount = 0; + for (let i = this.active_formatting_elements.length - 1; i >= 0; i -= 1) { + const entry = this.active_formatting_elements[i]; + if (this.#isActiveFormattingMarker(entry)) { + break; + } + if (entry.tagName === tagName && entry.namespaceName === "html") { + activeCount += 1; + } + } + let removed = false; + + for (let i = this.active_formatting_elements.length - 1; i >= 0 && activeCount > openCount; i -= 1) { + const entry = this.active_formatting_elements[i]; + if (this.#isActiveFormattingMarker(entry)) { + break; + } + if (entry.tagName === tagName && entry.namespaceName === "html") { + this.active_formatting_elements.splice(i, 1); + this.#clearParagraphAdoptionPreclosedFormattingElements(tagName, "html"); + this.#clearSpecialStartAdoptionPreclosedFormattingElements(tagName, "html"); + activeCount -= 1; + removed = true; + } + } + + return removed; + } + + #removeActiveFormattingElementsForClose(tagName) { + this.#removeStaleActiveFormattingElementsForClose(tagName); + return this.#removeActiveFormattingElement(tagName); + } + + #clearActiveFormattingElementsForTemplateClose(templateIndex) { + const closedTemplateDepth = this.#countOpenHtmlElements("TEMPLATE", templateIndex + 1); + this.active_formatting_elements = this.active_formatting_elements.filter((entry) => ( + (entry.templateDepth ?? 0) < closedTemplateDepth + )); + } + + #applyTemplateInsertionModeForStartTag(tagName) { + if ( + this.current_namespace !== "html" || + this.template_insertion_modes.length === 0 || + tagName === "TEMPLATE" + ) { + return false; + } + + const mode = this.#currentTemplateInsertionMode(); + if (mode === "in_template") { + if (TEMPLATE_HEAD_START_TAGS.has(tagName)) { + return false; + } + + if (TEMPLATE_TABLE_WRAPPER_START_TAGS.has(tagName)) { + this.#setCurrentTemplateInsertionMode("in_table"); + return this.#applyTemplateInsertionModeForStartTag(tagName); + } + + if (tagName === "COL") { + this.#setCurrentTemplateInsertionMode("in_column_group"); + return this.#applyTemplateInsertionModeForStartTag(tagName); + } + + if (tagName === "TR") { + this.#setCurrentTemplateInsertionMode("in_table_body"); + return this.#applyTemplateInsertionModeForStartTag(tagName); + } + + if (TABLE_CELL_ELEMENTS.has(tagName)) { + this.#setCurrentTemplateInsertionMode("in_row"); + return this.#applyTemplateInsertionModeForStartTag(tagName); + } + + this.#setCurrentTemplateInsertionMode("in_body"); + return this.#applyTemplateInsertionModeForStartTag(tagName); + } + + if (mode === "in_column_group") { + if (tagName === "COL") { + return false; + } + + this.#ignoreCurrentToken(); + return true; + } + + if (mode === "in_table") { + if (tagName === "COL") { + this.#setCurrentTemplateInsertionMode("in_column_group"); + this.#queueVirtualPush("COLGROUP"); + return this.#reprocessCurrentTokenAfterVirtualTokens(); + } + + if (tagName === "TR" || TABLE_CELL_ELEMENTS.has(tagName)) { + this.#setCurrentTemplateInsertionMode("in_table_body"); + this.#queueVirtualPush("TBODY"); + return this.#reprocessCurrentTokenAfterVirtualTokens(); + } + + if (TABLE_SECTION_ELEMENTS.has(tagName)) { + this.#setCurrentTemplateInsertionMode("in_table_body"); + } + return false; + } + + if (mode === "in_table_body") { + if (TEMPLATE_TABLE_WRAPPER_START_TAGS.has(tagName)) { + const topIndex = this.open_elements.length - 1; + if ( + topIndex >= 0 && + this.open_element_namespaces[topIndex] === "html" && + TABLE_SECTION_ELEMENTS.has(this.open_elements[topIndex]) + ) { + this.#setCurrentTemplateInsertionMode("in_table"); + this.#queueVirtualPopsFrom(topIndex); + return this.#reprocessCurrentTokenAfterVirtualTokens(); + } + + this.#ignoreCurrentToken(); + return true; + } + + if (tagName === "SELECT") { + const topIndex = this.open_elements.length - 1; + if ( + topIndex >= 0 && + this.open_element_namespaces[topIndex] === "html" && + TABLE_SECTION_ELEMENTS.has(this.open_elements[topIndex]) + ) { + this.#setCurrentTemplateInsertionMode("in_table"); + this.#queueVirtualPopsFrom(topIndex); + return this.#reprocessCurrentTokenAfterVirtualTokens(); + } + } + + if (TABLE_CELL_ELEMENTS.has(tagName)) { + this.#setCurrentTemplateInsertionMode("in_row"); + this.#queueVirtualPush("TR"); + return this.#reprocessCurrentTokenAfterVirtualTokens(); + } + + if (tagName === "TR") { + this.#setCurrentTemplateInsertionMode("in_row"); + } + return false; + } + + if (mode === "in_row") { + if (TEMPLATE_TABLE_WRAPPER_START_TAGS.has(tagName)) { + this.#ignoreCurrentToken(); + return true; + } + if (tagName === "DIV" && this.#currentHtmlElementIs("TR")) { + this.#setCurrentTemplateInsertionMode("in_table_body"); + this.#queueVirtualPopsFrom(this.open_elements.length - 1); + return this.#reprocessCurrentTokenAfterVirtualTokens(); + } + return false; + } + + if (mode !== "in_body") { + return false; + } + + if (IN_BODY_IGNORED_START_TAGS.has(tagName)) { + this.#ignoreCurrentToken(); + return true; + } + + return false; + } + + #applyTemplateInsertionModeForEndTag(tagName) { + if (this.template_insertion_modes.length === 0) { + return; + } + + if (tagName === "TR") { + this.#setCurrentTemplateInsertionMode("in_table_body"); + } else if (TABLE_SECTION_ELEMENTS.has(tagName)) { + this.#setCurrentTemplateInsertionMode("in_table"); + } else if (tagName === "CAPTION") { + this.#setCurrentTemplateInsertionMode("in_table"); + } + } + + #shouldIgnoreTextInColumnGroup(tokenType) { + return ( + tokenType === "#text" && + ( + ( + this.template_insertion_modes.length > 0 && + this.#currentTemplateInsertionMode() === "in_column_group" + ) || + ( + !this.is_full_parser && + this.context_namespace === "html" && + this.context_node === "COLGROUP" && + this.#currentHtmlElementIs("COLGROUP") + ) + ) + ); + } + + #currentTemplateInsertionMode() { + return this.template_insertion_modes[this.template_insertion_modes.length - 1] ?? "in_body"; + } + + #setCurrentTemplateInsertionMode(mode) { + if (this.template_insertion_modes.length > 0) { + this.template_insertion_modes[this.template_insertion_modes.length - 1] = mode; + } + } + + #popTemplateInsertionMode() { + if (this.template_insertion_modes.length > 0) { + this.template_insertion_modes.pop(); + } + } + + #ignoreCurrentToken() { + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.skip_current_token = true; + this.#setCurrentNamespace(this.#namespaceForStackTop()); + } + + #bailUnsupported(message) { + this.last_error = WP_HTML_Processor.ERROR_UNSUPPORTED; + this.unsupported_exception = this.#createUnsupportedException(message); + this.virtual_tokens = []; + this.pending_real_token = false; + this.pending_real_parser_state = null; + this.skip_current_token = true; + } + + #createUnsupportedException(message) { + const span = this.is_virtual() ? null : this.#currentRealTokenSpan(); + return new WP_HTML_Unsupported_Exception( + message, + this.get_token_name() ?? "", + span?.start ?? 0, + this.is_virtual() ? "" : this.#currentRealTokenString(), + this.open_elements, + this.active_formatting_elements + .filter((entry) => !this.#isActiveFormattingMarker(entry)) + .map((entry) => entry.tagName), + ); + } + + #currentRealTokenSpan() { + return runtime.withOutPair((startPtr, lengthPtr) => { + if (!wasm.wp_html_api_rust_tag_processor_current_span(this.pointer, startPtr, lengthPtr)) { + return null; + } + return { + start: runtime.readU32(startPtr), + length: runtime.readU32(lengthPtr), + }; + }); + } + + #currentRealTokenString() { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return ""; + } + + const html = runtime.readOutputBytes((out) => ( + wasm.wp_html_api_rust_tag_processor_get_html(this.pointer, out) + )); + return html === null ? "" : textDecoder.decode(html.slice(span.start, span.start + span.length)); + } + + #currentSpecialAtomicText(tagName) { + const tokenMarkup = this.#currentRealTokenString(); + const startTag = completeStartTagAt(tokenMarkup, 0); + if (startTag === null) { + return this.get_modifiable_text() ?? ""; + } + + const closerEnd = findSpecialAtomicCloserEnd(tokenMarkup, startTag.end, tagName); + if (closerEnd === null) { + return this.get_modifiable_text() ?? ""; + } + + const closerStart = tokenMarkup.lastIndexOf("</", closerEnd - 1); + if (closerStart < startTag.end) { + return this.get_modifiable_text() ?? ""; + } + + return replaceNulls(tokenMarkup.slice(startTag.end, closerStart)); + } + + #bailIfExceededMaxBookmarks() { + const maxBookmarks = this.constructor.MAX_BOOKMARKS ?? WP_HTML_Processor.MAX_BOOKMARKS; + if (this.open_elements.length <= maxBookmarks) { + return false; + } + + this.last_error = WP_HTML_Processor.ERROR_EXCEEDED_MAX_BOOKMARKS; + this.unsupported_exception = null; + this.virtual_tokens = []; + this.pending_real_token = false; + this.pending_real_parser_state = null; + this.skip_current_token = true; + return true; + } + + #unsupportedEncodingMetaMessage() { + if (typeof this.get_attribute("charset") === "string") { + return "Cannot yet process META tags with charset to determine encoding."; + } + + const httpEquiv = this.get_attribute("http-equiv"); + const content = this.get_attribute("content"); + if ( + typeof httpEquiv === "string" && + typeof content === "string" && + httpEquiv.toLowerCase() === "content-type" + ) { + return "Cannot yet process META tags with http-equiv Content-Type to determine encoding."; + } + + return null; + } + + #queueVirtualPreclosuresForStartTag(tagName) { + if (this.#queueForeignContentBreakoutForStartTag(tagName)) { + return true; + } + + if (this.current_namespace !== "html") { + return false; + } + + if (this.#queueFosteredFragmentFormattingElementPreclosureForTableStartTag(tagName)) { + return true; + } + + if (tagName === "NOBR" && this.#currentHtmlElementIs("NOBR")) { + this.#removeActiveFormattingElementsForClose("NOBR"); + this.#queueVirtualPopsFrom(this.open_elements.length - 1); + return true; + } + + if (CAPTION_CLOSING_START_TAGS.has(tagName)) { + const captionIndex = this.#findElementInTableScope("CAPTION"); + if (captionIndex !== -1) { + this.#queueVirtualPopsFrom(captionIndex); + return true; + } + } + + if (COLGROUP_CLOSING_START_TAGS.has(tagName) && this.#currentHtmlElementIs("COLGROUP")) { + this.#queueVirtualPopsFrom(this.open_elements.length - 1); + return true; + } + + if (this.current_namespace === "html" && SELECT_IN_TABLE_BREAKOUT_TAGS.has(tagName)) { + const selectIndex = this.#lastOpenElementIndex("SELECT", "html"); + if (selectIndex !== -1 && this.#openHtmlElementBefore("TABLE", selectIndex)) { + this.#queueVirtualPopsFrom(selectIndex); + return true; + } + } + + if (this.#queueForeignIntegrationPointTableBreakoutForStartTag(tagName)) { + return true; + } + + if (tagName === "TABLE") { + const tableIndex = this.#tableStartTagPreclosureIndex(); + if (tableIndex !== -1) { + this.#queueVirtualPopsFrom(tableIndex); + return true; + } + } + + if (this.current_namespace === "html" && SELECT_BREAKOUT_START_TAGS.has(tagName)) { + const selectIndex = this.#lastOpenElementIndex("SELECT", "html"); + if (selectIndex !== -1 && selectIndex >= this.base_open_element_count) { + this.#queueVirtualPopsFrom(selectIndex); + return true; + } + } + + if ( + this.current_namespace === "html" && + ( + tagName === "OPTION" || + tagName === "OPTGROUP" || + (tagName === "HR" && this.#hasOpenHtmlElement("SELECT")) + ) + ) { + const topIndex = this.open_elements.length - 1; + if ( + topIndex >= 0 && + this.open_elements[topIndex] === "OPTION" && + this.open_element_namespaces[topIndex] === "html" + ) { + this.#queueVirtualPopsFrom(topIndex); + return true; + } + + if ( + (tagName === "OPTGROUP" || tagName === "HR") && + topIndex >= 0 && + this.open_elements[topIndex] === "OPTGROUP" && + this.open_element_namespaces[topIndex] === "html" && + this.#hasOpenHtmlElement("SELECT") + ) { + this.#queueVirtualPopsFrom(topIndex); + return true; + } + } + + if ( + this.current_namespace === "html" && + tagName === "FORM" && + ( + this.#hasOpenHtmlElement("TEMPLATE") || + !this.#hasOpenHtmlElement("FORM") + ) + ) { + const paragraphIndex = this.#findOpenElementBeforeBoundary("P", BUTTON_SCOPE_BOUNDARIES); + if (paragraphIndex !== -1 && !this.#hasForeignIntegrationPointAfter(paragraphIndex)) { + this.#queueVirtualPopsFrom(paragraphIndex); + return true; + } + } + + const paragraphIndex = this.#findClosablePInButtonScopeForStartTag(tagName); + if (paragraphIndex !== -1) { + this.#queueVirtualPopsFrom(paragraphIndex); + return true; + } + + if ( + RUBY_IMPLIED_END_TAG_START_TAGS.has(tagName) && + this.#hasOpenHtmlElement("RUBY") && + this.#currentHtmlElementHasRubyImpliedEndTagForStartTag(tagName) + ) { + this.#queueVirtualPopsFrom(this.open_elements.length - 1); + return true; + } + + if (tagName === "LI") { + const listItemIndex = this.#findOpenElementBeforeBoundary("LI", LIST_ITEM_SCOPE_BOUNDARIES); + if (listItemIndex !== -1) { + this.#queueVirtualPopsFrom(listItemIndex); + return true; + } + } + + if (tagName === "DD" || tagName === "DT") { + const descriptionIndex = this.#findOpenElementBeforeBoundary( + (nodeName) => nodeName === "DD" || nodeName === "DT", + LIST_ITEM_SCOPE_BOUNDARIES, + ); + if (descriptionIndex !== -1) { + this.#queueVirtualPopsFrom(descriptionIndex); + return true; + } + } + + if (HEADING_ELEMENTS.has(tagName)) { + const topIndex = this.open_elements.length - 1; + if ( + topIndex >= 0 && + this.open_element_namespaces[topIndex] === "html" && + HEADING_ELEMENTS.has(this.open_elements[topIndex]) + ) { + this.#queueVirtualPopsFrom(topIndex); + return true; + } + } + + if (tagName === "A" || tagName === "NOBR") { + const formattingElementIndex = this.#lastOpenElementIndex(tagName, "html"); + if ( + tagName === "A" && + formattingElementIndex === -1 && + this.#lastActiveFormattingElementIndex("A") !== -1 && + this.open_elements.at(-1) === "DIV" && + this.pending_nested_anchor_div_active_removal_after_deferred_table + ) { + this.pending_nested_anchor_div_active_removal_after_deferred_table = false; + this.#removeActiveFormattingElement("A"); + return false; + } + if (formattingElementIndex !== -1) { + const activeFormattingElementIndex = this.#lastActiveFormattingElementIndex(tagName); + if ( + tagName === "A" && + activeFormattingElementIndex === -1 + ) { + return false; + } + if (this.#canRepresentNestedAnchorFosteredBeforeDeferredTable(tagName)) { + this.pending_nested_anchor_outer_closer_after_deferred_table_index = formattingElementIndex; + this.pending_nested_anchor_active_removal_after_deferred_table = this.#shouldRemoveNestedAnchorActiveAfterDeferredTable(); + this.#removeActiveFormattingElement(tagName); + return false; + } + if ( + tagName === "NOBR" && + this.#lastOpenElementIndex("TABLE", "html") > formattingElementIndex + ) { + return false; + } + if ( + ( + tagName === "NOBR" && + activeFormattingElementIndex !== -1 && + this.#hasOpenActiveFormattingElementAfterIndex(activeFormattingElementIndex) + ) || + hasSpecialBoundaryAfter(this.open_elements, this.open_element_namespaces, formattingElementIndex) + ) { + this.#bailUnsupported( + tagName === "A" + ? "Cannot process nested A elements which require adoption agency reconstruction." + : "Cannot process nested NOBR elements which require adoption agency reconstruction.", + ); + return true; + } + + this.#queueVirtualPopsFrom(formattingElementIndex); + this.#removeActiveFormattingElement(tagName); + return true; + } + } + + if (TABLE_CELL_BOUNDARY_START_TAGS.has(tagName)) { + const cellIndex = this.#findElementInTableScope((nodeName) => TABLE_CELL_ELEMENTS.has(nodeName)); + if (cellIndex !== -1) { + this.#queueVirtualPopsFrom(cellIndex); + return true; + } + } + + if (TABLE_ROW_BOUNDARY_START_TAGS.has(tagName)) { + const rowIndex = this.#findElementInTableScope("TR"); + if (rowIndex !== -1) { + this.#queueVirtualPopsFrom(rowIndex); + return true; + } + } + + if (TABLE_SECTION_BOUNDARY_START_TAGS.has(tagName)) { + const sectionIndex = this.#findElementInTableScope((nodeName) => TABLE_SECTION_ELEMENTS.has(nodeName)); + if (sectionIndex !== -1) { + this.#queueVirtualPopsFrom(sectionIndex); + return true; + } + } + + return false; + } + + #queueForeignIntegrationPointTableBreakoutForStartTag(tagName) { + if ( + this.current_namespace !== "html" || + tagName !== "TABLE" + ) { + return false; + } + + const tableIndex = this.#lastOpenElementIndex("TABLE", "html"); + if (tableIndex === -1) { + return false; + } + + for (let i = tableIndex + 1; i < this.open_elements.length; i += 1) { + if ( + this.open_element_namespaces[i] !== "html" && + this.open_element_integration_node_types[i] !== null + ) { + this.#queueVirtualPopsFrom(i); + return true; + } + } + + return false; + } + + #queueForeignContentBreakoutForStartTag(tagName) { + if ( + this.current_namespace === "html" || + ( + !FOREIGN_CONTENT_HTML_BREAKOUT_START_TAGS.has(tagName) && + !this.#isFontBreakoutStartTag(tagName) + ) + ) { + return false; + } + + const firstForeignIndex = this.#firstForeignElementToPopForHtmlBreakout(); + if (firstForeignIndex === -1) { + return false; + } + + this.#queueVirtualPopsFrom(firstForeignIndex); + return true; + } + + #isFontBreakoutStartTag(tagName) { + return ( + tagName === "FONT" && + ( + this.get_attribute("color") !== null || + this.get_attribute("face") !== null || + this.get_attribute("size") !== null + ) + ); + } + + #isHiddenInputStartTag(tagName) { + if (tagName !== "INPUT") { + return false; + } + + const typeAttribute = this.get_attribute("type"); + return typeof typeAttribute === "string" && typeAttribute.toLowerCase() === "hidden"; + } + + #hiddenInputPrecedesFrameset(tagName) { + if (!this.#isHiddenInputStartTag(tagName)) { + return false; + } + + return this.#currentTokenPrecedesStartTag("FRAMESET"); + } + + #ignoredStartTagPrecedesFrameset(tagName) { + return AFTER_HEAD_FRAMESET_IGNORED_START_TAGS.has(tagName) && + this.#currentTokenPrecedesStartTag("FRAMESET"); + } + + #ignoredFrameNoisePrecedesFrameset(tagName) { + if (tagName !== "FRAME") { + return false; + } + + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + let at = span.start + span.length; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + if (nextTag === false) { + return false; + } + + if (!this.#isWhitespacePreFramesetText(this.html.slice(at, nextTag.tag_start))) { + return false; + } + + if (!nextTag.is_closing) { + return nextTag.tag_name === "FRAMESET"; + } + + if (nextTag.tag_name !== "FRAME") { + return false; + } + + at = nextTag.token_end; + } + } + + #closedElementPrecedesFrameset(tagName) { + if (!AFTER_HEAD_FRAMESET_IGNORED_CLOSED_START_TAGS.has(tagName)) { + return false; + } + + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + let depth = 1; + let at = span.start + span.length; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + if (nextTag === false) { + return false; + } + + if (!this.#isIgnorablePreFramesetText(this.html.slice(at, nextTag.tag_start))) { + return false; + } + + if (nextTag.is_closing) { + depth -= 1; + if (depth === 0) { + const followingTag = runtime.scanNextTag(this.html, nextTag.token_end); + return followingTag !== false && + !followingTag.is_closing && + followingTag.tag_name === "FRAMESET" && + this.#isIgnorablePreFramesetText(this.html.slice(nextTag.token_end, followingTag.tag_start)); + } + } else if (!VOID_ELEMENTS.has(nextTag.tag_name) && !nextTag.has_self_closing_flag) { + depth += 1; + } + + at = nextTag.token_end; + } + } + + #openElementChainPrecedesFrameset(tagName) { + if (!AFTER_HEAD_FRAMESET_IGNORED_OPEN_START_TAGS.has(tagName)) { + return false; + } + + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + let at = span.start + span.length; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + if (nextTag === false) { + return false; + } + + if (!this.#isWhitespacePreFramesetText(this.html.slice(at, nextTag.tag_start))) { + return false; + } + + if (nextTag.is_closing) { + return false; + } + + if (nextTag.tag_name === "FRAMESET") { + return true; + } + + if (!AFTER_HEAD_FRAMESET_IGNORED_OPEN_START_TAGS.has(nextTag.tag_name)) { + return false; + } + + at = nextTag.token_end; + } + } + + #skipIgnoredElementBeforeFrameset(tokenType, tagName, isCloser, isIgnorablePreBodyText) { + if (tokenType === "#text") { + if (!isIgnorablePreBodyText) { + this.pre_frameset_ignored_element_depth = 0; + return false; + } + + this.#ignoreCurrentToken(); + return true; + } + + if (tokenType !== "#tag") { + this.pre_frameset_ignored_element_depth = 0; + return false; + } + + if (isCloser) { + this.pre_frameset_ignored_element_depth -= 1; + } else if (!VOID_ELEMENTS.has(tagName) && !this.has_self_closing_flag()) { + this.pre_frameset_ignored_element_depth += 1; + } + + this.#ignoreCurrentToken(); + return true; + } + + #isIgnorablePreFramesetText(text) { + return text.split("").every((char) => { + const code = char.charCodeAt(0); + return code === 0 || isHtmlWhitespaceCode(code); + }); + } + + #isWhitespacePreFramesetText(text) { + return text.split("").every((char) => isHtmlWhitespaceCode(char.charCodeAt(0))); + } + + #paragraphPrecedesFrameset(tagName) { + return tagName === "P" && this.#currentTokenPrecedesStartTag("FRAMESET"); + } + + #currentTokenPrecedesStartTag(tagName) { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + const afterToken = span.start + span.length; + const nextTag = runtime.scanNextTag(this.html, afterToken); + if ( + nextTag === false || + nextTag.is_closing || + nextTag.tag_name !== tagName + ) { + return false; + } + + return this.html.slice(afterToken, nextTag.tag_start).split("").every((char) => ( + isHtmlWhitespaceCode(char.charCodeAt(0)) + )); + } + + #startTagClearsFramesetOk(tagName) { + if (tagName === "INPUT") { + return !this.#isHiddenInputStartTag(tagName); + } + + return FRAMESET_NOT_OK_START_TAGS.has(tagName); + } + + #firstForeignElementToPopForHtmlBreakout() { + for (let i = this.open_elements.length - 1; i >= 0; i -= 1) { + const namespaceName = this.open_element_namespaces[i]; + if (namespaceName === "html" || this.open_element_integration_node_types[i] !== null) { + return i + 1; + } + } + + return 0; + } + + #lastActiveFormattingElementIndex(tagName) { + for (let i = this.active_formatting_elements.length - 1; i >= 0; i -= 1) { + if (this.#isActiveFormattingMarker(this.active_formatting_elements[i])) { + break; + } + + if (this.active_formatting_elements[i].tagName === tagName) { + return i; + } + } + return -1; + } + + #hasOpenActiveFormattingElementAfterIndex(index) { + for (let i = index + 1; i < this.active_formatting_elements.length; i += 1) { + const entry = this.active_formatting_elements[i]; + if (this.#isActiveFormattingMarker(entry)) { + return false; + } + if (this.#lastOpenElementIndex(entry.tagName, entry.namespaceName) !== -1) { + return true; + } + } + + return false; + } + + #queueVirtualPreclosuresForEndTag(tagName) { + if ( + ( + tagName === "BR" || + tagName === "P" || + this.#hasOpenHtmlElementBeforeForeignBreakout(tagName) + ) && + this.#queueForeignContentBreakoutForEndTag() + ) { + return true; + } + + if (!this.#hasElementInTableScope("TABLE")) { + return false; + } + + if ( + this.#currentHtmlElementIs("COLGROUP") && + tagName !== "COL" && + tagName !== "COLGROUP" && + tagName !== "TEMPLATE" + ) { + this.#queueVirtualPopsFrom(this.open_elements.length - 1); + return true; + } + + if (tagName === "TABLE") { + const captionIndex = this.#findElementInTableScope("CAPTION"); + if (captionIndex !== -1) { + this.#queueVirtualPopsFrom(captionIndex); + return true; + } + } + + if (SELECT_IN_TABLE_BREAKOUT_TAGS.has(tagName) && this.#hasElementInTableScope(tagName)) { + const selectIndex = this.#lastOpenElementIndex("SELECT", "html"); + if (selectIndex !== -1 && this.#openHtmlElementBefore("TABLE", selectIndex)) { + this.#queueVirtualPopsFrom(selectIndex); + return true; + } + } + + if (tagName === "TR" && !this.#hasElementInTableScope("TR")) { + return false; + } + + if (TABLE_SECTION_ELEMENTS.has(tagName) && !this.#hasElementInTableScope(tagName)) { + return false; + } + + if (tagName === "TABLE" || tagName === "TR" || TABLE_SECTION_ELEMENTS.has(tagName)) { + const cellIndex = this.#findElementInTableScope((nodeName) => TABLE_CELL_ELEMENTS.has(nodeName)); + if (cellIndex !== -1) { + this.#queueVirtualPopsFrom(cellIndex); + return true; + } + } + + if (tagName === "TABLE" || TABLE_SECTION_ELEMENTS.has(tagName)) { + const rowIndex = this.#findElementInTableScope("TR"); + if (rowIndex !== -1) { + this.#queueVirtualPopsFrom(rowIndex); + return true; + } + } + + if (tagName === "TABLE") { + const sectionIndex = this.#findElementInTableScope((nodeName) => TABLE_SECTION_ELEMENTS.has(nodeName)); + if (sectionIndex !== -1) { + this.#queueVirtualPopsFrom(sectionIndex); + return true; + } + } + + return false; + } + + #queueForeignContentBreakoutForEndTag() { + if (this.current_namespace === "html") { + return false; + } + + const firstForeignIndex = this.#firstForeignElementToPopForHtmlBreakout(); + if (firstForeignIndex === -1) { + return false; + } + + this.#queueVirtualPopsFrom(firstForeignIndex); + return true; + } + + #hasOpenHtmlElementBeforeForeignBreakout(tagName) { + if (this.current_namespace === "html") { + return false; + } + + const firstForeignIndex = this.#firstForeignElementToPopForHtmlBreakout(); + const parentIndex = firstForeignIndex - 1; + return ( + parentIndex >= 0 && + this.open_elements[parentIndex] === tagName && + this.open_element_namespaces[parentIndex] === "html" + ); + } + + #queueVirtualOpenersForStartTag(tagName) { + if ( + this.current_namespace !== "html" || + ( + !this.#hasElementInTableScope("TABLE") && + !this.#isOpenTableSectionFragmentContext() + ) + ) { + return false; + } + + const queued = []; + if (tagName === "COL" && this.#currentHtmlElementIs("TABLE")) { + queued.push("COLGROUP"); + } + + if ( + tagName === "TR" && + !this.#hasElementInTableScope((nodeName) => TABLE_SECTION_ELEMENTS.has(nodeName)) + ) { + queued.push("TBODY"); + } + + if (TABLE_CELL_ELEMENTS.has(tagName)) { + if (!this.#hasElementInTableScope((nodeName) => TABLE_SECTION_ELEMENTS.has(nodeName))) { + queued.push("TBODY"); + } + if (!this.#hasElementInTableScope("TR")) { + queued.push("TR"); + } + } + + if (this.deferred_table_opener !== null) { + for (const queuedTagName of queued) { + this.#deferImplicitTableWrapperOpen(queuedTagName); + } + return false; + } + + for (const queuedTagName of queued) { + this.virtual_tokens.push({ + operation: "push", + tagName: queuedTagName, + namespaceName: "html", + }); + } + + return queued.length > 0; + } + + #isOpenTableSectionFragmentContext() { + return ( + !this.is_full_parser && + this.context_namespace === "html" && + TABLE_SECTION_ELEMENTS.has(this.context_node) && + this.#currentHtmlElementIs(this.context_node) + ); + } + + #deferImplicitTableWrapperOpen(tagName) { + this.open_elements.push(tagName); + this.open_element_namespaces.push("html"); + this.open_element_integration_node_types.push(null); + this.open_element_foster_parented_table_indices.push(this.#currentFosterParentedTableIndex()); + this.breadcrumbs = this.#breadcrumbStack(); + this.deferred_table_child_openers.push({ + tokenType: "#tag", + tokenName: tagName, + tagName, + namespaceName: "html", + attributes: [], + breadcrumbs: [...this.breadcrumbs], + hasSelfClosingFlag: false, + }); + this.#setCurrentNamespace("html"); + } + + #queueFosteredFragmentFormattingElementPreclosureForTableStartTag(tagName) { + if ( + this.is_full_parser || + !this.#tableFragmentContextHandlesStartTag(tagName) + ) { + return false; + } + + const topIndex = this.open_elements.length - 1; + if ( + topIndex < this.base_open_element_count || + this.open_elements[topIndex] !== "A" || + this.open_element_namespaces[topIndex] !== "html" + ) { + return false; + } + + const fosterParentedBaseIndex = this.open_element_foster_parented_table_indices[topIndex]; + if (fosterParentedBaseIndex === null || fosterParentedBaseIndex >= topIndex) { + return false; + } + + this.#queueVirtualPopsFrom(topIndex); + this.#removeActiveFormattingElement("A"); + return true; + } + + #tableFragmentContextHandlesStartTag(tagName) { + if ( + this.is_full_parser || + this.context_namespace !== "html" + ) { + return false; + } + + if (this.context_node === "TABLE") { + return TABLE_MODE_START_TAGS.has(tagName); + } + + if (this.context_node === "COLGROUP") { + return tagName === "COL" || tagName === "COLGROUP"; + } + + if (TABLE_SECTION_ELEMENTS.has(this.context_node)) { + return this.#isHandledInTableBodyMode(tagName, false); + } + + if (this.context_node === "TR") { + return this.#isHandledInTableRowMode(tagName, false); + } + + return false; + } + + #queueFullParserScaffold() { + this.virtual_tokens.push( + { + operation: "push", + tagName: "HTML", + namespaceName: "html", + }, + { + operation: "push", + tagName: "HEAD", + namespaceName: "html", + }, + { + operation: "pop", + tagName: "HEAD", + namespaceName: "html", + }, + { + operation: "push", + tagName: "BODY", + namespaceName: "html", + }, + ); + } + + #queueFullParserMissingBodyAtEof() { + if ( + !this.is_full_parser || + !["before_head", "in_head", "in_head_noscript", "after_head"].includes(this.full_parser_insertion_mode) || + this.#hasOpenHtmlElement("BODY") || + this.#hasOpenHtmlElement("FRAMESET") + ) { + return false; + } + + const topIndex = this.open_elements.length - 1; + if (this.full_parser_insertion_mode === "before_head") { + if ( + topIndex < 0 || + this.open_elements[topIndex] !== "HTML" || + this.open_element_namespaces[topIndex] !== "html" + ) { + return false; + } + + this.#queueVirtualPush("HEAD"); + this.#queueVirtualPop("HEAD"); + } else if (this.full_parser_insertion_mode === "in_head_noscript") { + if ( + topIndex < 1 || + this.open_elements[topIndex] !== "NOSCRIPT" || + this.open_element_namespaces[topIndex] !== "html" || + this.open_elements[topIndex - 1] !== "HEAD" || + this.open_element_namespaces[topIndex - 1] !== "html" + ) { + return false; + } + + this.#queueVirtualPop("NOSCRIPT"); + this.#queueVirtualPop("HEAD"); + } else { + if ( + topIndex < 0 || + this.open_elements[topIndex] !== "HEAD" || + this.open_element_namespaces[topIndex] !== "html" + ) { + if ( + this.full_parser_insertion_mode === "in_head" && + this.#hasOpenHtmlElement("HEAD") + ) { + this.#queueVirtualPopsFrom(this.#lastOpenElementIndex("HEAD", "html") + 1); + this.#queueVirtualPop("HEAD"); + } else if ( + this.full_parser_insertion_mode !== "after_head" || + topIndex < 0 || + this.open_elements[topIndex] !== "HTML" || + this.open_element_namespaces[topIndex] !== "html" + ) { + return false; + } + } else { + this.#queueVirtualPop("HEAD"); + } + } + + this.full_parser_insertion_mode = "in_body"; + this.#queueVirtualPush("BODY"); + return true; + } + + #shouldConsumeEofCommentBeforeMissingBody() { + return ( + this.is_full_parser && + [ + "initial", + "before_html", + "before_head", + "in_head", + "in_head_noscript", + "after_head", + ].includes(this.full_parser_insertion_mode) + ); + } + + #consumeFullParserEofComment() { + if ( + !this.is_full_parser || + this.synthetic_eof_comment_consumed || + !super.paused_at_incomplete_token() + ) { + return false; + } + + const commentStart = this.#incompleteTokenStart(); + if (commentStart === null || !this.html.startsWith("<!--", commentStart)) { + return false; + } + + let commentText = this.html.slice(commentStart + 4); + if (commentText.endsWith("--")) { + commentText = commentText.slice(0, -2); + } + + this.synthetic_eof_comment_consumed = true; + this.current_synthetic_token = { + tokenType: "#comment", + tokenName: "#comment", + commentText, + }; + this.parser_state = STATE_COMMENT; + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack("#comment"); + return true; + } + + #incompleteTokenIsEofComment() { + const tokenStart = this.#incompleteTokenStart(); + return tokenStart !== null && this.html.startsWith("<!--", tokenStart); + } + + #incompleteTokenStart() { + if (!super.paused_at_incomplete_token()) { + return null; + } + + const span = this.#nativeCurrentSpan(); + return span === null ? 0 : span.start + span.length; + } + + #skipIncompleteSelectBreakoutStartTag() { + const tokenStart = this.#incompleteTokenStart(); + if (tokenStart === null) { + return false; + } + + const startTag = completeStartTagAt(this.html, tokenStart); + if (startTag === null || !SELECT_BREAKOUT_START_TAGS.has(startTag.tagName)) { + return false; + } + + const selectIndex = this.#lastOpenElementIndex("SELECT", "html"); + if (selectIndex === -1 || selectIndex >= this.base_open_element_count) { + return false; + } + + wasm.wp_html_api_rust_tag_processor_seek(this.pointer, startTag.end); + this.parser_state = STATE_READY; + this.current_virtual = null; + this.current_synthetic_token = null; + this.skip_current_token = false; + this.breadcrumbs = this.#breadcrumbStack(); + return true; + } + + #skipIncompleteFullParserEndTag() { + if (!this.is_full_parser) { + return false; + } + + const tokenStart = this.#incompleteTokenStart(); + if (tokenStart === null || !incompleteEndTagAt(this.html, tokenStart)) { + return false; + } + + wasm.wp_html_api_rust_tag_processor_seek(this.pointer, this.html.length); + this.parser_state = STATE_READY; + this.current_virtual = null; + this.current_synthetic_token = null; + this.skip_current_token = false; + this.breadcrumbs = this.#breadcrumbStack(); + return true; + } + + #skipIncompleteFullParserQuotedStartTag() { + if (!this.is_full_parser) { + return false; + } + + const tokenStart = this.#incompleteTokenStart(); + if (tokenStart === null || !incompleteQuotedStartTagAt(this.html, tokenStart)) { + return false; + } + + wasm.wp_html_api_rust_tag_processor_seek(this.pointer, this.html.length); + this.parser_state = STATE_READY; + this.current_virtual = null; + this.current_synthetic_token = null; + this.skip_current_token = false; + this.breadcrumbs = this.#breadcrumbStack(); + return true; + } + + #skipIncompleteFullParserStartTag() { + if (!this.is_full_parser) { + return false; + } + + const tokenStart = this.#incompleteTokenStart(); + if (tokenStart === null || !incompleteStartTagAt(this.html, tokenStart)) { + return false; + } + + wasm.wp_html_api_rust_tag_processor_seek(this.pointer, this.html.length); + this.parser_state = STATE_READY; + this.current_virtual = null; + this.current_synthetic_token = null; + this.skip_current_token = false; + this.breadcrumbs = this.#breadcrumbStack(); + return true; + } + + #seekPastCurrentStartTag() { + const span = this.#nativeCurrentSpan(); + if (span === null) { + return false; + } + + const startTag = completeStartTagAt(super.get_updated_html(), span.start); + if ( + startTag === null || + startTag.end <= span.start || + startTag.end > span.start + span.length + ) { + return false; + } + + wasm.wp_html_api_rust_tag_processor_seek(this.pointer, startTag.end); + return true; + } + + #nativeCurrentSpan() { + return runtime.withOutPair((startPtr, lengthPtr) => { + if (!wasm.wp_html_api_rust_tag_processor_current_span(this.pointer, startPtr, lengthPtr)) { + return null; + } + return { + start: runtime.readU32(startPtr), + length: runtime.readU32(lengthPtr), + }; + }); + } + + #queueEofVirtualClosers() { + if (this.open_elements.length <= this.base_open_element_count) { + return false; + } + + this.#queueVirtualPopsFrom(this.base_open_element_count); + return true; + } + + #queueVirtualPopsFrom(index) { + for (let i = this.open_elements.length - 1; i >= index; i -= 1) { + this.virtual_tokens.push({ + operation: "pop", + tagName: this.open_elements[i], + namespaceName: this.open_element_namespaces[i], + skipSerialization: i < this.base_open_element_count, + }); + } + } + + #getCurrentTreeTagName() { + const rawTagName = super.get_tag(); + if (rawTagName === null) { + return null; + } + + return normalizeTagNameForNamespace( + rawTagName, + this.#namespaceForCurrentStartTag(rawTagName), + ); + } + + #namespaceForCurrentStartTag(tagName) { + if ( + this.current_namespace === "html" && + MATHML_TEXT_INTEGRATION_FOREIGN_START_TAGS.has(tagName) + ) { + const topIndex = this.open_elements.length - 1; + if ( + topIndex >= 0 && + this.open_element_namespaces[topIndex] === "math" && + this.open_element_integration_node_types[topIndex] === "math" + ) { + return "math"; + } + } + + return namespaceForTag(tagName, this.current_namespace); + } + + #integrationNodeTypeForCurrentStartTag(tagName, namespaceName) { + if (namespaceName === "svg") { + return SVG_HTML_INTEGRATION_POINT_ELEMENTS.has(tagName) ? "html" : null; + } + + if (namespaceName !== "math") { + return null; + } + + if (MATHML_TEXT_INTEGRATION_POINT_ELEMENTS.has(tagName)) { + return "math"; + } + + if (tagName !== "ANNOTATION-XML") { + return null; + } + + const encoding = this.get_attribute("encoding"); + return ( + typeof encoding === "string" && + MATHML_HTML_INTEGRATION_POINT_ENCODINGS.has(encoding.toLowerCase()) + ) + ? "html" + : null; + } + + #childNamespaceForStackEntry(tagName, tokenNamespace, integrationNodeType) { + if (integrationNodeType !== null) { + return "html"; + } + + return tokenNamespace; + } + + #lastOpenElementIndex(tagName, namespaceName) { + for (let i = this.open_elements.length - 1; i >= 0; i -= 1) { + if ( + this.open_elements[i] === tagName && + this.open_element_namespaces[i] === namespaceName + ) { + return i; + } + } + return -1; + } + + #namespaceForEndTag(tagName) { + const topIndex = this.open_elements.length - 1; + if ( + topIndex >= 0 && + this.open_elements[topIndex] === tagName && + this.open_element_namespaces[topIndex] !== "html" + ) { + return this.open_element_namespaces[topIndex]; + } + + if ( + topIndex >= 0 && + this.current_namespace === "html" && + this.open_element_namespaces[topIndex] !== "html" && + this.open_element_integration_node_types[topIndex] !== null + ) { + return this.open_element_namespaces[topIndex]; + } + + return this.current_namespace; + } + + #hasElementInTableScope(match) { + return this.#findElementInTableScope(match) !== -1; + } + + #isInTableInsertionContext() { + for (let i = this.open_elements.length - 1; i >= 0; i -= 1) { + const nodeName = this.open_elements[i]; + if (this.open_element_namespaces[i] !== "html") { + continue; + } + + if ( + nodeName === "TABLE" || + nodeName === "CAPTION" || + nodeName === "COLGROUP" || + nodeName === "TR" || + TABLE_SECTION_ELEMENTS.has(nodeName) || + TABLE_CELL_ELEMENTS.has(nodeName) + ) { + return true; + } + + if (nodeName === "HTML" || nodeName === "TEMPLATE") { + return false; + } + } + + return false; + } + + #findElementInTableScope(match) { + const predicate = typeof match === "function" ? match : (nodeName) => nodeName === match; + for (let i = this.open_elements.length - 1; i >= 0; i -= 1) { + const nodeName = this.open_elements[i]; + const namespaceName = this.open_element_namespaces[i]; + if (namespaceName === "html" && predicate(nodeName)) { + return i; + } + + if ( + namespaceName === "html" && + (nodeName === "HTML" || nodeName === "TABLE" || nodeName === "TEMPLATE") + ) { + return -1; + } + } + return -1; + } + + #shouldPopTableFormImmediately(tagName, namespaceName) { + if (tagName !== "FORM" || namespaceName !== "html") { + return false; + } + + const tableIndex = this.open_elements.lastIndexOf("TABLE"); + if (tableIndex === -1) { + return false; + } + + for (let i = tableIndex + 1; i < this.open_elements.length; i += 1) { + if ( + this.open_element_namespaces[i] === "html" && + (this.open_elements[i] === "TD" || this.open_elements[i] === "TH") + ) { + return false; + } + } + + return true; + } + + #applySimpleHtmlSemanticClosures(tagName) { + if (this.current_namespace !== "html") { + return; + } + + if ( + ( + tagName === "OPTION" || + tagName === "OPTGROUP" || + (tagName === "HR" && this.#hasOpenHtmlElement("SELECT")) + ) + ) { + this.#popCurrentHtmlElementIf("OPTION"); + if ((tagName === "OPTGROUP" || tagName === "HR") && this.#hasOpenHtmlElement("SELECT")) { + this.#popCurrentHtmlElementIf("OPTGROUP"); + } + } + + if (this.#findClosablePInButtonScopeForStartTag(tagName) !== -1) { + this.#closePInButtonScope(); + } + + if (tagName === "BUTTON") { + this.#popLastMatchingBeforeBoundary("BUTTON", BUTTON_SCOPE_BOUNDARIES); + } + + if (HEADING_ELEMENTS.has(tagName)) { + const topIndex = this.open_elements.length - 1; + if ( + topIndex >= 0 && + this.open_element_namespaces[topIndex] === "html" && + HEADING_ELEMENTS.has(this.open_elements[topIndex]) + ) { + this.open_elements.pop(); + this.open_element_namespaces.pop(); + this.open_element_integration_node_types.pop(); + this.open_element_foster_parented_table_indices.pop(); + this.#setCurrentNamespace(this.#namespaceForStackTop()); + } + return; + } + + if (tagName === "LI") { + this.#popLastMatchingBeforeBoundary("LI", LIST_ITEM_SCOPE_BOUNDARIES); + return; + } + + if (tagName === "DD" || tagName === "DT") { + this.#popLastMatchingBeforeBoundary( + (nodeName) => nodeName === "DD" || nodeName === "DT", + LIST_ITEM_SCOPE_BOUNDARIES, + ); + return; + } + + if ( + RUBY_IMPLIED_END_TAG_START_TAGS.has(tagName) && + this.#hasOpenHtmlElement("RUBY") + ) { + while (this.#currentHtmlElementHasRubyImpliedEndTagForStartTag(tagName)) { + this.open_elements.pop(); + this.open_element_namespaces.pop(); + this.open_element_integration_node_types.pop(); + this.open_element_foster_parented_table_indices.pop(); + } + this.#setCurrentNamespace(this.#namespaceForStackTop()); + } + } + + #closePInButtonScope() { + return this.#popLastMatchingBeforeBoundary("P", BUTTON_SCOPE_BOUNDARIES); + } + + #shouldClosePForStartTag(tagName) { + return ( + P_CLOSING_START_TAGS.has(tagName) && + (tagName !== "TABLE" || this.compat_mode !== WP_HTML_Tag_Processor.QUIRKS_MODE) + ); + } + + #shouldIgnoreInBodyFragmentStartTag(tagName) { + const isInBodyFragmentContext = ( + this.context_namespace === "html" && + (this.context_node === "BODY" || this.context_node === "DIV") + ) || this.context_integration_node_type === "html" || this.detached_context_breadcrumbs.length > 0; + + return ( + !this.is_full_parser && + isInBodyFragmentContext && + this.current_namespace === "html" && + this.template_insertion_modes.length === 0 && + (tagName === "BODY" || tagName === "FRAMESET" || tagName === "HTML") + ); + } + + #shouldIgnoreInBodyStartTag(tagName) { + return ( + IN_BODY_IGNORED_START_TAGS.has(tagName) && + !(this.context_namespace === "html" && this.context_node === "FRAMESET" && tagName === "FRAME") + ); + } + + #shouldIgnoreTableContextTableStartTag(tagName) { + return ( + !this.is_full_parser && + tagName === "TABLE" && + this.context_namespace === "html" && + this.context_node === "TABLE" && + this.#currentHtmlElementIs("TABLE") + ); + } + + #shouldIgnoreTableRowContextBoundaryStartTag(tagName) { + return ( + !this.is_full_parser && + TABLE_ROW_BOUNDARY_START_TAGS.has(tagName) && + this.context_namespace === "html" && + this.context_node === "TR" && + this.#currentHtmlElementIs("TR") + ); + } + + #shouldIgnoreTableSectionContextBoundaryStartTag(tagName) { + return ( + !this.is_full_parser && + TABLE_SECTION_BOUNDARY_START_TAGS.has(tagName) && + this.context_namespace === "html" && + TABLE_SECTION_ELEMENTS.has(this.context_node) && + this.#currentHtmlElementIs(this.context_node) + ); + } + + #shouldIgnoreCaptionContextBoundaryStartTag(tagName) { + return ( + !this.is_full_parser && + this.context_namespace === "html" && + this.context_node === "CAPTION" && + (tagName === "HTML" || CAPTION_CLOSING_START_TAGS.has(tagName)) && + this.#lastOpenElementIndex("CAPTION", "html") !== -1 + ); + } + + #shouldIgnoreColgroupFragmentFosteredStartTag(tagName) { + const span = this.#currentRealTokenSpan(); + return ( + tagName === "A" && + !this.is_full_parser && + this.context_namespace === "html" && + this.context_node === "COLGROUP" && + this.#currentHtmlElementIs("COLGROUP") && + span !== null && + this.#currentFosteredFragmentStartIsFollowedByTableStartTag(span.start + span.length) + ); + } + + #findClosablePInButtonScopeForStartTag(tagName) { + if (!this.#shouldClosePForStartTag(tagName)) { + return -1; + } + + const paragraphIndex = this.#findOpenElementBeforeBoundary("P", BUTTON_SCOPE_BOUNDARIES); + return paragraphIndex !== -1 && !this.#hasForeignIntegrationPointAfter(paragraphIndex) + ? paragraphIndex + : -1; + } + + #currentHtmlElementHasImpliedEndTag() { + const topIndex = this.open_elements.length - 1; + return ( + topIndex >= 0 && + this.open_element_namespaces[topIndex] === "html" && + IMPLIED_END_TAG_ELEMENTS.has(this.open_elements[topIndex]) + ); + } + + #currentHtmlElementHasRubyImpliedEndTagForStartTag(tagName) { + const topIndex = this.open_elements.length - 1; + if ( + topIndex < 0 || + this.open_element_namespaces[topIndex] !== "html" || + !IMPLIED_END_TAG_ELEMENTS.has(this.open_elements[topIndex]) + ) { + return false; + } + + return ( + this.open_elements[topIndex] !== "RTC" || + tagName === "RB" || + tagName === "RTC" + ); + } + + #shouldIgnoreEndTagInTableContext(tagName) { + if (this.current_namespace !== "html" || !this.#hasElementInTableScope("TABLE")) { + return false; + } + + if (this.#currentHtmlElementIs("TABLE")) { + return TABLE_MODE_IGNORED_END_TAGS.has(tagName); + } + + if (this.#currentHtmlElementIs("COLGROUP")) { + return tagName !== "COLGROUP" && TABLE_MODE_IGNORED_END_TAGS.has(tagName); + } + + const topIndex = this.open_elements.length - 1; + const currentNode = topIndex >= 0 && this.open_element_namespaces[topIndex] === "html" + ? this.open_elements[topIndex] + : null; + + if (TABLE_SECTION_ELEMENTS.has(currentNode)) { + return TABLE_BODY_MODE_IGNORED_END_TAGS.has(tagName); + } + + if (currentNode === "TR") { + return TABLE_ROW_MODE_IGNORED_END_TAGS.has(tagName); + } + + if (TABLE_CELL_ELEMENTS.has(currentNode)) { + return TABLE_CELL_MODE_IGNORED_END_TAGS.has(tagName); + } + + return false; + } + + #shouldIgnoreEndTagClosingOutsideTemplate(tagName, namespaceName, existingIndex) { + if (namespaceName !== "html" || tagName === "TEMPLATE") { + return false; + } + + const templateIndex = this.#lastOpenElementIndex("TEMPLATE", "html"); + return templateIndex !== -1 && existingIndex !== -1 && existingIndex < templateIndex; + } + + #hasForeignIntegrationPointAfter(index) { + for (let i = index + 1; i < this.open_elements.length; i += 1) { + if ( + this.open_element_namespaces[i] !== "html" && + this.open_element_integration_node_types[i] !== null + ) { + return true; + } + } + + return false; + } + + #popLastMatchingBeforeBoundary(match, boundaries) { + const predicate = typeof match === "function" ? match : (nodeName) => nodeName === match; + for (let i = this.open_elements.length - 1; i >= 0; i -= 1) { + const nodeName = this.open_elements[i]; + const namespaceName = this.open_element_namespaces[i]; + if (namespaceName === "html" && predicate(nodeName)) { + this.open_elements = this.open_elements.slice(0, i); + this.open_element_namespaces = this.open_element_namespaces.slice(0, i); + this.open_element_integration_node_types = this.open_element_integration_node_types.slice(0, i); + this.open_element_foster_parented_table_indices = this.open_element_foster_parented_table_indices.slice(0, i); + this.#setCurrentNamespace(this.#namespaceForStackTop()); + return true; + } + + if (boundaries.has(this.#scopeBoundaryNameForOpenElement(i))) { + return false; + } + } + return false; + } + + #popLastMatching(match) { + const predicate = typeof match === "function" ? match : (nodeName) => nodeName === match; + for (let i = this.open_elements.length - 1; i >= 0; i -= 1) { + if (predicate(this.open_elements[i])) { + this.open_elements = this.open_elements.slice(0, i); + this.open_element_namespaces = this.open_element_namespaces.slice(0, i); + this.open_element_integration_node_types = this.open_element_integration_node_types.slice(0, i); + this.open_element_foster_parented_table_indices = this.open_element_foster_parented_table_indices.slice(0, i); + this.#setCurrentNamespace(this.#namespaceForStackTop()); + return true; + } + } + return false; + } + + #popCurrentHtmlElementIf(tagName) { + const topIndex = this.open_elements.length - 1; + if (!this.#currentHtmlElementIs(tagName)) { + return false; + } + + this.open_elements.pop(); + this.open_element_namespaces.pop(); + this.open_element_integration_node_types.pop(); + this.open_element_foster_parented_table_indices.pop(); + this.#setCurrentNamespace(this.#namespaceForStackTop()); + return true; + } + + #hasOpenHtmlElement(tagName) { + return this.open_elements.some((nodeName, index) => ( + nodeName === tagName && + this.open_element_namespaces[index] === "html" + )); + } + + #scopeBoundaryNameForOpenElement(index) { + const namespaceName = this.open_element_namespaces[index]; + const nodeName = this.open_elements[index]; + return namespaceName === "html" ? nodeName : `${namespaceName} ${nodeName}`; + } + + #countOpenHtmlElements(tagName, endIndex = this.open_elements.length) { + let count = 0; + for (let i = 0; i < endIndex; i += 1) { + if ( + this.open_elements[i] === tagName && + this.open_element_namespaces[i] === "html" + ) { + count += 1; + } + } + return count; + } + + #silentlyReopenFullParserElement(tagName) { + const topIndex = this.open_elements.length - 1; + if ( + !this.is_full_parser || + topIndex < 0 || + this.open_elements[topIndex] !== "HTML" || + this.open_element_namespaces[topIndex] !== "html" + ) { + return false; + } + + this.open_elements.push(tagName); + this.open_element_namespaces.push("html"); + this.open_element_integration_node_types.push(null); + this.open_element_foster_parented_table_indices.push(this.#currentFosterParentedTableIndex()); + this.#setCurrentNamespace(this.#childNamespaceForStackEntry(tagName, "html", null)); + return true; + } + + #closeTemporaryReopenedHeadAfterCurrentToken() { + if (!this.temporary_reopened_head) { + return false; + } + + this.temporary_reopened_head = false; + this.full_parser_insertion_mode = "after_head"; + const headIndex = this.#lastOpenElementIndex("HEAD", "html"); + if (headIndex === -1) { + return false; + } + + this.open_elements.splice(headIndex, 1); + this.open_element_namespaces.splice(headIndex, 1); + this.open_element_integration_node_types.splice(headIndex, 1); + this.open_element_foster_parented_table_indices.splice(headIndex, 1); + this.#setCurrentNamespace(this.#namespaceForStackTop()); + return true; + } + + #isInHeadTemplateContent() { + const templateIndex = this.#lastOpenElementIndex("TEMPLATE", "html"); + if (templateIndex === -1) { + return false; + } + + for (let i = templateIndex - 1; i >= 0; i -= 1) { + if (this.open_element_namespaces[i] !== "html") { + continue; + } + + if (this.open_elements[i] === "BODY") { + return false; + } + + if (this.open_elements[i] === "HEAD") { + return true; + } + } + + return false; + } + + #shouldIgnoreDocumentStartTagInTemplateContent(tokenType, tagName, isCloser) { + return ( + tokenType === "#tag" && + !isCloser && + (tagName === "HTML" || tagName === "BODY") && + this.#hasOpenHtmlElement("TEMPLATE") + ); + } + + #shouldIgnoreFrameStartTagInTemplateContent(tokenType, tagName, isCloser) { + if ( + tokenType !== "#tag" || + isCloser || + (tagName !== "FRAME" && tagName !== "FRAMESET") || + !this.#hasOpenHtmlElement("TEMPLATE") + ) { + return false; + } + + return this.full_parser_insertion_mode === "in_body" || this.#isInHeadTemplateContent(); + } + + #openHtmlElementBefore(tagName, beforeIndex) { + for (let i = beforeIndex - 1; i >= 0; i -= 1) { + if ( + this.open_elements[i] === tagName && + this.open_element_namespaces[i] === "html" + ) { + return true; + } + } + return false; + } + + #currentHtmlElementIs(tagName) { + const topIndex = this.open_elements.length - 1; + return ( + topIndex >= 0 && + this.open_elements[topIndex] === tagName && + this.open_element_namespaces[topIndex] === "html" + ); + } + + #isInTableTextContext() { + const topIndex = this.open_elements.length - 1; + if ( + topIndex < 0 || + this.open_element_namespaces[topIndex] !== "html" || + !TABLE_TEXT_CURRENT_NODE_ELEMENTS.has(this.open_elements[topIndex]) + ) { + return false; + } + + if (this.open_elements[topIndex] === "TEMPLATE") { + const tableIndex = this.#lastOpenElementIndex("TABLE", "html"); + const selectIndex = this.#lastOpenElementIndex("SELECT", "html"); + if (tableIndex !== -1 && selectIndex > tableIndex && selectIndex < topIndex) { + return false; + } + } + + return this.open_elements[topIndex] === "TABLE" || this.#openHtmlElementBefore("TABLE", topIndex); + } + + #shouldDeferCurrentTableOpener(tagName, namespaceName) { + return ( + (this.is_full_parser || this.#canDeferTableInFragment()) && + this.deferred_table_opener === null && + tagName === "TABLE" && + namespaceName === "html" && + this.#currentTableStartIsFollowedByFosteredContent() + ); + } + + #canDeferTableInFragment() { + return this.#canDeferBodyTableInFragment() || this.#canDeferNestedTableInFragment(); + } + + #canDeferBodyTableInFragment() { + return ( + !this.is_full_parser && + this.context_namespace === "html" && + this.context_node === "BODY" && + this.#currentHtmlElementIs("TABLE") && + this.#currentBodyFragmentTableStartIsFollowedByDirectFosteredContent() + ); + } + + #currentBodyFragmentTableStartIsFollowedByDirectFosteredContent() { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + const afterToken = span.start + span.length; + const nextTag = runtime.scanNextTag(this.html, afterToken); + const text = this.html.slice(afterToken, this.#fosterLookaheadTextEnd(afterToken, nextTag)); + if (!this.#isIgnorableTableText(text)) { + return true; + } + + if (this.#currentTableStartContentPrecedesFosteredStart()) { + return true; + } + + return ( + nextTag !== false && + !nextTag.is_closing && + ( + this.#isFosteredElementTableStartTag(nextTag.tag_name) || + this.#isFosteredInputStartTag(nextTag) || + ( + nextTag.tag_name === "COLGROUP" && + this.#colgroupStartPrecedesDirectFosteredContent(nextTag.token_end) + ) + ) + ); + } + + #colgroupStartPrecedesDirectFosteredContent(at) { + const nextTag = runtime.scanNextTag(this.html, at); + const text = this.html.slice(at, this.#fosterLookaheadTextEnd(at, nextTag)); + return ( + !this.#isIgnorableTableText(text) || + ( + nextTag !== false && + !nextTag.is_closing && + this.#isFosteredTableLookaheadStartTag(nextTag) + ) + ); + } + + #canDeferNestedTableInFragment() { + return ( + !this.is_full_parser && + this.context_namespace === "html" && + ( + TABLE_SECTION_ELEMENTS.has(this.context_node) || + this.context_node === "TR" || + TABLE_CELL_ELEMENTS.has(this.context_node) + ) && + ( + this.deferred_table_opener !== null || + this.#currentHtmlElementIs("TABLE") + ) + ); + } + + #currentTableStartIsFollowedByFosteredContent() { + return ( + this.#currentTokenIsFollowedByFosteredTableContent( + new Set(["COL", "COLGROUP", "FORM", "INPUT", "TBODY", "TEMPLATE", "TFOOT", "THEAD", "TR"]), + ) || + this.#currentTableStartCellOpenerPrecedesFosteredText() || + this.#currentTableStartCellOpenerPrecedesFosteredStart() || + this.#currentTableStartCellContentPrecedesFosteredText() || + this.#currentTableStartContentPrecedesFosteredStart() || + this.#currentTableStartForeignCellCloserPrecedesFosteredText() + ); + } + + #shouldDeferCurrentTableChildOpener(tagName, namespaceName) { + return ( + (this.is_full_parser || this.#canDeferNestedTableInFragment() || this.#canDeferBodyTableChildInFragment()) && + this.deferred_table_opener !== null && + namespaceName === "html" && + ( + ( + this.#isDeferredTableChildOpenerTag(tagName) && + ( + TABLE_CELL_ELEMENTS.has(tagName) + ? ( + this.#currentTableCellStartIsFollowedByFosteredTableContent() || + this.#currentTableCellStartContentPrecedesFosteredText() || + this.#currentTableCellStartContentPrecedesFosteredStart() || + this.#currentTableCellStartPrecedesForeignCellCloserFosteredText() + ) + : this.#currentTokenIsFollowedByFosteredTableContent(this.#deferredTableChildLookaheadTags(tagName)) + ) + ) || + ( + tagName === "TR" && + ( + this.#currentTableRowStartPrecedesFosteredTextAfterCell() || + this.#currentTableRowStartPrecedesCellContentFosteredText() || + this.#currentTableRowStartPrecedesCellContentFosteredStart() || + this.#currentTableRowStartPrecedesForeignCellCloserFosteredText() + ) + ) || + ( + tagName === "TABLE" && + this.#currentNestedTableStartPrecedesDeferredFosteredStart() + ) || + this.#isDeferredTableHiddenInputChildOpener(tagName) + ) + ); + } + + #canDeferBodyTableChildInFragment() { + return ( + !this.is_full_parser && + this.context_namespace === "html" && + this.context_node === "BODY" && + this.#lastOpenElementIndex("TABLE", "html") >= this.base_open_element_count + ); + } + + #queueDeferredTableOpenerBeforeCurrentToken(tokenType) { + if (this.deferred_table_opener === null) { + return false; + } + + if (this.#currentFosterParentedTableIndex() !== null && tokenType !== "#tag") { + return false; + } + + if (tokenType === "#text") { + if ( + this.text_node_classification === WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE && + this.#currentTextChunkPrecedesDeferredTableChildOpener() + ) { + return false; + } + + return ( + this.text_node_classification === WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE && + !this.#currentTextChunkPrecedesFosteredTableText() && + this.#queueDeferredTableOpener() + ); + } + + if ( + tokenType === "#comment" || + tokenType === "#funky-comment" || + tokenType === "#presumptuous-tag" + ) { + return this.#queueDeferredTableOpener(); + } + + if (tokenType !== "#tag") { + return this.#queueDeferredTableOpener(); + } + + const tagName = this.#getCurrentTreeTagName(); + if (tagName === null) { + return false; + } + const isHtmlStart = this.current_namespace === "html"; + + if (this.is_tag_closer()) { + if (this.#currentFosterParentedTableIndex() !== null) { + if (tagName === "TABLE" || this.#hasDeferredTableChildOpener(tagName)) { + return this.#queueFosteredElementPopsBeforeDeferredTable(); + } + return false; + } + + if (tagName === "TABLE" && this.#currentDeferredNestedTableCloserPrecedesFosteredStart()) { + return false; + } + + if (tagName === "TEMPLATE" && this.#hasDeferredTableChildOpener("TEMPLATE")) { + return false; + } + + if (tagName === "FORM" && this.#hasDeferredTableChildOpener("FORM")) { + return false; + } + + if (this.#currentTableStructureCloserPrecedesFosteredTableContent(tagName)) { + return false; + } + + return ( + (tagName === "TABLE" || this.#hasDeferredTableChildOpener(tagName)) && + this.#queueDeferredTableOpener() + ); + } + + if (this.#currentFosterParentedTableIndex() !== null) { + if (tagName === "TABLE" || tagName === "TR" || TABLE_CELL_ELEMENTS.has(tagName)) { + return this.#queueFosteredElementPopsBeforeDeferredTable(); + } + return false; + } + + if ( + ( + ( + isHtmlStart && + this.#isDeferredTableChildOpenerTag(tagName) + ) || + ( + isHtmlStart && + this.#isDeferredTableHiddenInputChildOpener(tagName) + ) + ) && + ( + TABLE_CELL_ELEMENTS.has(tagName) + ? ( + this.#currentTableCellStartIsFollowedByFosteredTableContent() || + this.#currentTableCellStartContentPrecedesFosteredText() || + this.#currentTableCellStartContentPrecedesFosteredStart() || + this.#currentTableCellStartPrecedesForeignCellCloserFosteredText() + ) + : this.#currentTokenIsFollowedByFosteredTableContent(this.#deferredTableChildLookaheadTags(tagName)) + ) + ) { + return false; + } + + if ( + isHtmlStart && + tagName === "TR" && + ( + this.#currentTableRowStartPrecedesFosteredTextAfterCell() || + this.#currentTableRowStartPrecedesCellContentFosteredText() || + this.#currentTableRowStartPrecedesCellContentFosteredStart() || + this.#currentTableRowStartPrecedesForeignCellCloserFosteredText() + ) + ) { + return false; + } + + if (isHtmlStart && SPECIAL_ATOMIC_ELEMENTS.has(tagName)) { + if (this.#isFosteredAtomicTableStartTag(tagName)) { + return false; + } + if (isHtmlStart && TABLE_MODE_START_TAGS.has(tagName)) { + return this.#queueDeferredTableOpener(); + } + this.#bailUnsupported("Foster parenting is not supported."); + return true; + } + + if (isHtmlStart && this.#isFosteredInputTableStartTag(tagName)) { + return false; + } + + if (isHtmlStart && tagName === "TABLE" && this.#currentNestedTableStartPrecedesDeferredFosteredStart()) { + return false; + } + + return isHtmlStart && TABLE_MODE_START_TAGS.has(tagName) && this.#queueDeferredTableOpener(); + } + + #representFosteredTextBeforeDeferredTable() { + if ( + (!this.is_full_parser && !this.#canRepresentFosteredTextInFragment()) || + this.deferred_table_opener === null + ) { + return false; + } + + if ( + this.text_node_classification === WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE && + this.#currentTextChunkPrecedesDeferredTableChildOpener() + ) { + return this.#deferCurrentTextAsTableChild(); + } + + if ( + this.text_node_classification === WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE && + this.#currentTextChunkPrecedesIgnoredEndTagFosteredText() + ) { + return this.#deferCurrentTextAsTableChild(); + } + + if ( + this.#currentHtmlElementIs("TABLE") && + this.text_node_classification === WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE && + this.#currentTextChunkPrecedesFosteredTableToken() + ) { + return this.#deferCurrentTextAsTableChild(); + } + + if ( + this.#currentHtmlElementIs("TR") && + this.text_node_classification === WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE && + this.#currentTextChunkPrecedesFosteredTableToken() + ) { + return this.#deferCurrentTextAsTableChild(); + } + + if ( + this.#currentHtmlElementIs("COLGROUP") && + ( + this.#currentTextHasLeadingIgnorableTableText() || + ( + this.text_node_classification === WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE && + this.#currentTextChunkPrecedesFosteredTableText() + ) + ) + ) { + return this.text_node_classification === WP_HTML_Tag_Processor.TEXT_IS_WHITESPACE && + !this.#currentTextHasLeadingIgnorableTableText() && + this.#deferCurrentTextAsTableChild(); + } + + const tableIndex = this.#lastOpenElementIndex("TABLE", "html"); + if (tableIndex === -1) { + return false; + } + + if (this.#hasUnreconstructedActiveFormattingElement()) { + return false; + } + + this.current_token_namespace = "html"; + this.breadcrumbs = this.#breadcrumbStack("#text", tableIndex); + this.frameset_ok = false; + return true; + } + + #canRepresentFosteredTextInFragment() { + return ( + this.context_namespace === "html" && + this.context_node === "BODY" && + this.#lastOpenElementIndex("TABLE", "html") >= this.base_open_element_count + ); + } + + #currentFosterParentedTableIndex() { + return this.open_element_foster_parented_table_indices.at(-1) ?? null; + } + + #hasUnreconstructedActiveFormattingElement() { + for (let i = this.active_formatting_elements.length - 1; i >= 0; i -= 1) { + const entry = this.active_formatting_elements[i]; + if (this.#isActiveFormattingMarker(entry)) { + return false; + } + + if (!this.#activeFormattingElementIsOpen(entry)) { + return true; + } + } + + return false; + } + + #activeFormattingElementIsOpen(entry) { + if (this.#isActiveFormattingMarker(entry)) { + return true; + } + + if (Number.isInteger(entry.openElementIndex)) { + const index = entry.openElementIndex; + return ( + index >= 0 && + index < this.open_elements.length && + this.open_elements[index] === entry.tagName && + this.open_element_namespaces[index] === entry.namespaceName + ); + } + + return this.#lastOpenElementIndex(entry.tagName, entry.namespaceName) !== -1; + } + + #queueNestedAnchorOuterCloserAfterDeferredTable() { + const index = this.pending_nested_anchor_outer_closer_after_deferred_table_index; + if (index === null) { + return false; + } + + if (this.#lastOpenElementIndex("TABLE", "html") !== -1) { + return false; + } + + this.pending_nested_anchor_outer_closer_after_deferred_table_index = null; + const shouldRemoveActiveAnchor = this.pending_nested_anchor_active_removal_after_deferred_table; + this.pending_nested_anchor_active_removal_after_deferred_table = false; + if ( + index < 0 || + index >= this.open_elements.length || + this.open_elements[index] !== "A" || + this.open_element_namespaces[index] !== "html" + ) { + return false; + } + + this.#queueVirtualPopsFrom(index); + if (shouldRemoveActiveAnchor) { + this.#removeActiveFormattingElement("A"); + this.pending_nested_anchor_div_active_removal_after_deferred_table = true; + } + return true; + } + + #queueFosteredElementPopsBeforeDeferredTable() { + if (this.deferred_table_opener === null) { + return false; + } + + const tableIndex = this.#currentFosterParentedTableIndex(); + if (tableIndex === null) { + return false; + } + + const firstFosteredIndex = this.open_element_foster_parented_table_indices.findIndex( + (fosterParentedTableIndex, index) => ( + index > tableIndex && + fosterParentedTableIndex === tableIndex + ), + ); + if (firstFosteredIndex === -1) { + return false; + } + + this.#queueVirtualPopsFrom(firstFosteredIndex); + return true; + } + + #currentTableStartCellOpenerPrecedesFosteredText() { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + const afterToken = span.start + span.length; + const nextTag = runtime.scanNextTag(this.html, afterToken); + const text = this.html.slice(afterToken, this.#fosterLookaheadTextEnd(afterToken, nextTag)); + return ( + this.#isIgnorableTableText(text) && + nextTag !== false && + !nextTag.is_closing && + TABLE_CELL_ELEMENTS.has(nextTag.tag_name) && + this.#tableStructureEndTagsPrecedeFosteredText(nextTag.token_end) + ); + } + + #currentTableStartCellOpenerPrecedesFosteredStart() { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + const nextTag = this.#nextNonWhitespaceTag(span.start + span.length); + return ( + nextTag !== false && + !nextTag.is_closing && + TABLE_CELL_ELEMENTS.has(nextTag.tag_name) && + this.#cellContentPrecedesFosteredStartAt(nextTag.token_end) + ); + } + + #currentTableStartCellContentPrecedesFosteredText() { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + let at = span.start + span.length; + const sectionTag = this.#nextNonWhitespaceTag(at); + if ( + sectionTag !== false && + !sectionTag.is_closing && + TABLE_SECTION_ELEMENTS.has(sectionTag.tag_name) + ) { + at = sectionTag.token_end; + } + + return this.#rowCellContentPrecedesFosteredTextAt(at); + } + + #currentTableStartContentPrecedesFosteredStart() { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + let at = span.start + span.length; + const sectionTag = this.#nextNonWhitespaceTag(at); + if ( + sectionTag !== false && + !sectionTag.is_closing && + TABLE_SECTION_ELEMENTS.has(sectionTag.tag_name) + ) { + at = sectionTag.token_end; + } + + return this.#rowSequencePrecedesFosteredStartAt(at); + } + + #currentTableStartForeignCellCloserPrecedesFosteredText() { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + let at = span.start + span.length; + const nextTag = this.#nextNonWhitespaceTag(at); + if ( + nextTag !== false && + !nextTag.is_closing && + nextTag.tag_name === "TR" + ) { + at = nextTag.token_end; + } + + return this.#foreignCellCloserPrecedesFosteredTextAt(at, false); + } + + #currentTableRowStartPrecedesCellContentFosteredText() { + const span = this.#currentRealTokenSpan(); + return span !== null && this.#rowContentPrecedesFosteredTextAt( + span.start + span.length, + ); + } + + #currentTableRowStartPrecedesCellContentFosteredStart() { + const span = this.#currentRealTokenSpan(); + return span !== null && this.#rowContentPrecedesFosteredStartAt( + span.start + span.length, + ); + } + + #currentTableRowStartPrecedesForeignCellCloserFosteredText() { + const span = this.#currentRealTokenSpan(); + return span !== null && this.#foreignCellCloserPrecedesFosteredTextAt( + span.start + span.length, + false, + ); + } + + #currentTableCellStartContentPrecedesFosteredText() { + const span = this.#currentRealTokenSpan(); + return span !== null && this.#cellContentPrecedesFosteredTextAt( + span.start + span.length, + ); + } + + #currentTableCellStartContentPrecedesFosteredStart() { + const span = this.#currentRealTokenSpan(); + return span !== null && this.#cellContentPrecedesFosteredStartAt( + span.start + span.length, + ); + } + + #currentTableCellStartPrecedesForeignCellCloserFosteredText() { + const span = this.#currentRealTokenSpan(); + return span !== null && this.#foreignCellCloserPrecedesFosteredTextAt( + span.start + span.length, + true, + ); + } + + #nextNonWhitespaceTag(at) { + const nextTag = runtime.scanNextTag(this.html, at); + const text = this.html.slice(at, this.#fosterLookaheadTextEnd(at, nextTag)); + return this.#isIgnorableTableText(text) ? nextTag : false; + } + + #rowCellContentPrecedesFosteredTextAt(at) { + const rowTag = this.#nextNonWhitespaceTag(at); + if ( + rowTag === false || + rowTag.is_closing || + rowTag.tag_name !== "TR" + ) { + return false; + } + + return this.#rowContentPrecedesFosteredTextAt(rowTag.token_end); + } + + #rowSequencePrecedesFosteredStartAt(at) { + const rowTag = this.#nextNonWhitespaceTag(at); + if (rowTag === false || rowTag.is_closing) { + return false; + } + + if (this.#isFosteredTableLookaheadStartTag(rowTag)) { + return true; + } + + if (rowTag.tag_name !== "TR") { + return false; + } + + return this.#rowContentPrecedesFosteredStartAt(rowTag.token_end); + } + + #rowContentPrecedesFosteredTextAt(at) { + const cellTag = this.#nextNonWhitespaceTag(at); + if ( + cellTag === false || + cellTag.is_closing || + !TABLE_CELL_ELEMENTS.has(cellTag.tag_name) + ) { + return false; + } + + return this.#cellContentPrecedesFosteredTextAt(cellTag.token_end); + } + + #rowContentPrecedesFosteredStartAt(at) { + const cellTag = this.#nextNonWhitespaceTag(at); + if (cellTag === false || cellTag.is_closing) { + return false; + } + + if (this.#isFosteredTableLookaheadStartTag(cellTag)) { + return true; + } + + if (!TABLE_CELL_ELEMENTS.has(cellTag.tag_name)) { + return false; + } + + return this.#cellContentPrecedesFosteredStartAt(cellTag.token_end); + } + + #cellContentPrecedesFosteredTextAt(at) { + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + if (nextTag === false) { + return false; + } + + if (!nextTag.is_closing) { + if (nextTag.tag_name === "TABLE") { + const tableEnd = this.#matchingTableEndAfterStart(nextTag); + if (tableEnd === null) { + return false; + } + + at = tableEnd; + continue; + } + + if (TABLE_MODE_START_TAGS.has(nextTag.tag_name)) { + return false; + } + + at = nextTag.token_end; + continue; + } + + if ( + TABLE_CELL_ELEMENTS.has(nextTag.tag_name) || + nextTag.tag_name === "TR" || + TABLE_SECTION_ELEMENTS.has(nextTag.tag_name) || + nextTag.tag_name === "TABLE" + ) { + return this.#fosteredTextAfterTableStructureEnd(nextTag.token_end); + } + + at = nextTag.token_end; + } + } + + #cellContentPrecedesFosteredStartAt(at) { + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + if (nextTag === false) { + return false; + } + + if (!nextTag.is_closing) { + if (nextTag.tag_name === "TABLE") { + const tableEnd = this.#matchingTableEndAfterStart(nextTag); + if (tableEnd === null) { + return false; + } + + at = tableEnd; + continue; + } + + if (TABLE_MODE_START_TAGS.has(nextTag.tag_name)) { + return false; + } + + at = nextTag.token_end; + continue; + } + + if ( + TABLE_CELL_ELEMENTS.has(nextTag.tag_name) || + nextTag.tag_name === "TR" || + TABLE_SECTION_ELEMENTS.has(nextTag.tag_name) || + nextTag.tag_name === "TABLE" + ) { + return this.#tableStructureEndPrecedesFosteredStart(nextTag.token_end); + } + + at = nextTag.token_end; + } + } + + #matchingTableEndAfterStart(startTag) { + return this.#matchingTableEndAfterStartAt(startTag.token_end); + } + + #matchingTableEndAfterStartAt(at) { + let depth = 1; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + if (nextTag === false) { + return null; + } + + if (nextTag.tag_name === "TABLE") { + depth += nextTag.is_closing ? -1 : 1; + if (depth === 0) { + return nextTag.token_end; + } + } + + at = nextTag.token_end; + } + } + + #currentNestedTableStartPrecedesDeferredFosteredStart() { + if ( + this.deferred_table_opener === null || + this.current_namespace !== "html" + ) { + return false; + } + + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + const tableEnd = this.#matchingTableEndAfterStartAt(span.start + span.length); + return tableEnd !== null && this.#cellContentPrecedesFosteredStartAt(tableEnd); + } + + #currentDeferredNestedTableCloserPrecedesFosteredStart() { + if ( + this.deferred_table_opener === null || + this.current_namespace !== "html" + ) { + return false; + } + + const deferredTableIndex = this.deferred_table_opener.breadcrumbs.length - 1; + const currentTableIndex = this.#lastOpenElementIndex("TABLE", "html"); + if (currentTableIndex <= deferredTableIndex) { + return false; + } + + const span = this.#currentRealTokenSpan(); + return span !== null && this.#cellContentPrecedesFosteredStartAt(span.start + span.length); + } + + #tableStructureEndPrecedesFosteredStart(at) { + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + const text = this.html.slice(at, this.#fosterLookaheadTextEnd(at, nextTag)); + if (!this.#isIgnorableTableText(text) || nextTag === false) { + return false; + } + + if (nextTag.is_closing) { + if ( + this.#isTableStructureFosterLookaheadEndTag(nextTag.tag_name) || + this.#isIgnoredFosterLookaheadEndTag(nextTag.tag_name) + ) { + at = nextTag.token_end; + continue; + } + + return false; + } + + if (nextTag.tag_name === "TR") { + return this.#rowContentPrecedesFosteredStartAt(nextTag.token_end); + } + + if (TABLE_SECTION_ELEMENTS.has(nextTag.tag_name)) { + return this.#rowSequencePrecedesFosteredStartAt(nextTag.token_end); + } + + return this.#isFosteredTableLookaheadStartTag(nextTag); + } + } + + #fosteredTextAfterTableStructureEnd(at) { + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + const text = this.html.slice(at, this.#fosterLookaheadTextEnd(at, nextTag)); + if (!this.#isIgnorableTableText(text)) { + return true; + } + + if ( + nextTag !== false && + nextTag.is_closing && + ( + this.#isTableStructureFosterLookaheadEndTag(nextTag.tag_name) || + this.#isIgnoredFosterLookaheadEndTag(nextTag.tag_name) + ) + ) { + at = nextTag.token_end; + continue; + } + + return false; + } + } + + #foreignCellCloserPrecedesFosteredTextAt(at, sawCellStart) { + let sawForeignStart = false; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + const text = this.html.slice(at, this.#fosterLookaheadTextEnd(at, nextTag)); + if (!this.#isIgnorableTableText(text)) { + return false; + } + + if (nextTag === false) { + return false; + } + + if (!nextTag.is_closing) { + if (!sawCellStart && TABLE_CELL_ELEMENTS.has(nextTag.tag_name)) { + sawCellStart = true; + at = nextTag.token_end; + continue; + } + + if (sawCellStart && FOREIGN_CONTENT_START_TAGS.has(nextTag.tag_name)) { + sawForeignStart = true; + at = nextTag.token_end; + continue; + } + + if (sawCellStart && sawForeignStart) { + at = nextTag.token_end; + continue; + } + + return false; + } + + if ( + sawCellStart && + sawForeignStart && + TABLE_CELL_ELEMENTS.has(nextTag.tag_name) + ) { + return this.#fosteredTextAfterForeignCellCloser(nextTag.token_end); + } + + if (sawCellStart && sawForeignStart) { + at = nextTag.token_end; + continue; + } + + return false; + } + } + + #fosteredTextAfterForeignCellCloser(at) { + return this.#fosteredTextAfterTableStructureEnd(at); + } + + #currentTableRowStartPrecedesFosteredTextAfterCell() { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + let at = span.start + span.length; + let sawCellStart = false; + let sawStructureEndTag = false; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + const text = this.html.slice(at, this.#fosterLookaheadTextEnd(at, nextTag)); + if (!this.#isIgnorableTableText(text) && sawStructureEndTag) { + return true; + } + + if (nextTag === false) { + return false; + } + + if (!nextTag.is_closing) { + if (!sawCellStart && TABLE_CELL_ELEMENTS.has(nextTag.tag_name)) { + sawCellStart = true; + at = nextTag.token_end; + continue; + } + + return false; + } + + if (!sawCellStart || !this.#isTableStructureFosterLookaheadEndTag(nextTag.tag_name)) { + return false; + } + + sawStructureEndTag = true; + at = nextTag.token_end; + } + } + + #currentTokenIsFollowedByFosteredTableContent(allowedWrapperTags) { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + return this.#tokenEndIsFollowedByFosteredTableContent( + span.start + span.length, + allowedWrapperTags, + ); + } + + #tokenEndIsFollowedByFosteredTableContent(at, allowedWrapperTags) { + let wrappers = allowedWrapperTags; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + const text = this.html.slice(at, this.#fosterLookaheadTextEnd(at, nextTag)); + if (!this.#isIgnorableTableText(text)) { + return true; + } + if ( + nextTag !== false && + nextTag.is_closing && + this.#isSkippedFosterLookaheadEndTag(nextTag.tag_name) + ) { + at = nextTag.token_end; + continue; + } + if ( + nextTag !== false && + nextTag.is_closing && + this.#isFosteredTableEndTag(nextTag.tag_name) + ) { + return true; + } + if ( + nextTag !== false && + nextTag.is_closing && + nextTag.tag_name === "TEMPLATE" + ) { + at = nextTag.token_end; + continue; + } + if (nextTag === false || nextTag.is_closing) { + return false; + } + if ( + FOREIGN_CONTENT_START_TAGS.has(nextTag.tag_name) || + nextTag.tag_name === "SELECT" || + this.#isFosteredVoidTableStartTag(nextTag.tag_name) || + this.#isFosteredInputStartTag(nextTag) || + this.#isFosteredAtomicTableStartTag(nextTag.tag_name) || + this.#isFosteredElementTableStartTag(nextTag.tag_name) + ) { + return true; + } + if (this.#isDeferredTableAtomicChildOpenerTag(nextTag.tag_name)) { + const atomicEnd = this.#specialAtomicTagEnd(nextTag); + if (atomicEnd === null) { + return false; + } + wrappers = this.#deferredTableChildLookaheadTags(nextTag.tag_name); + at = atomicEnd; + continue; + } + if ( + nextTag.tag_name === "INPUT" && + wrappers.has("INPUT") && + !this.#isFosteredInputStartTag(nextTag) + ) { + wrappers = this.#deferredTableChildLookaheadTags(nextTag.tag_name); + at = nextTag.token_end; + continue; + } + if (!wrappers.has(nextTag.tag_name)) { + return false; + } + + wrappers = this.#deferredTableChildLookaheadTags(nextTag.tag_name); + at = nextTag.token_end; + } + } + + #currentTableCellStartIsFollowedByFosteredTableContent() { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + let at = span.start + span.length; + let sawCellBoundary = false; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + const text = this.html.slice(at, this.#fosterLookaheadTextEnd(at, nextTag)); + if (!this.#isIgnorableTableText(text)) { + if (sawCellBoundary) { + return true; + } + if ( + nextTag !== false && + nextTag.is_closing && + ( + TABLE_CELL_ELEMENTS.has(nextTag.tag_name) || + TABLE_SECTION_ELEMENTS.has(nextTag.tag_name) || + nextTag.tag_name === "TABLE" || + nextTag.tag_name === "TR" + ) + ) { + sawCellBoundary = true; + at = nextTag.token_end; + continue; + } + return false; + } + + if ( + nextTag !== false && + nextTag.is_closing && + this.#isSkippedFosterLookaheadEndTag(nextTag.tag_name) + ) { + sawCellBoundary = true; + at = nextTag.token_end; + continue; + } + + if (nextTag === false || nextTag.is_closing) { + if ( + nextTag !== false && + ( + TABLE_CELL_ELEMENTS.has(nextTag.tag_name) || + TABLE_SECTION_ELEMENTS.has(nextTag.tag_name) || + nextTag.tag_name === "TABLE" || + nextTag.tag_name === "TR" + ) + ) { + sawCellBoundary = true; + at = nextTag.token_end; + continue; + } + + return false; + } + + return sawCellBoundary && ( + FOREIGN_CONTENT_START_TAGS.has(nextTag.tag_name) || + nextTag.tag_name === "SELECT" || + this.#isFosteredVoidTableStartTag(nextTag.tag_name) || + this.#isFosteredInputStartTag(nextTag) || + this.#isFosteredAtomicTableStartTag(nextTag.tag_name) || + this.#isFosteredElementTableStartTag(nextTag.tag_name) + ); + } + } + + #isIgnoredFosterLookaheadEndTag(tagName) { + return ( + tagName === "BLINK" || + tagName === "BODY" || + tagName === "HTML" || + tagName === "KBD" || + tagName === "PRE" || + tagName === "SELECT" || + FORMATTING_ELEMENTS.has(tagName) || + HEADING_ELEMENTS.has(tagName) + ); + } + + #isSkippedFosterLookaheadEndTag(tagName) { + return ( + this.#isIgnoredFosterLookaheadEndTag(tagName) || + this.#isTableStructureFosterLookaheadEndTag(tagName) + ); + } + + #isTableStructureFosterLookaheadEndTag(tagName) { + return ( + TABLE_CELL_ELEMENTS.has(tagName) || + tagName === "TR" || + TABLE_SECTION_ELEMENTS.has(tagName) + ); + } + + #isFosteredTableEndTag(tagName) { + return tagName === "BR" || tagName === "P"; + } + + #missingParagraphCloserFosterParentedTableIndex() { + if (this.deferred_table_opener === null) { + return null; + } + + const tableIndex = this.#lastOpenElementIndex("TABLE", "html"); + return tableIndex === -1 ? null : tableIndex; + } + + #canRepresentMissingParagraphCloserBeforeDeferredTable() { + return ( + this.deferred_table_opener !== null && + this.current_namespace === "html" && + this.#currentHtmlElementIs("TABLE") && + this.#lastOpenElementIndex("TABLE", "html") !== -1 + ); + } + + #isDeferredTableChildOpenerTag(tagName) { + return ( + tagName === "COLGROUP" || + tagName === "COL" || + tagName === "FORM" || + tagName === "TEMPLATE" || + tagName === "TR" || + TABLE_CELL_ELEMENTS.has(tagName) || + TABLE_SECTION_ELEMENTS.has(tagName) || + this.#isDeferredTableAtomicChildOpenerTag(tagName) + ); + } + + #deferredTableChildLookaheadTags(tagName) { + if (TABLE_SECTION_ELEMENTS.has(tagName)) { + return new Set(["TR"]); + } + + if (tagName === "FORM" || tagName === "INPUT") { + return new Set(["INPUT"]); + } + + return new Set(); + } + + #isDeferredTableHiddenInputChildOpener(tagName) { + return tagName === "INPUT" && !this.#isFosteredInputTableStartTag(tagName); + } + + #isDeferredTableAtomicChildOpenerTag(tagName) { + return ( + !this.#hasOpenHtmlElement("SELECT") && + SPECIAL_ATOMIC_ELEMENTS.has(tagName) && + TABLE_MODE_START_TAGS.has(tagName) + ); + } + + #specialAtomicTagEnd(nextTag) { + const startTag = completeStartTagAt(this.html, nextTag.tag_start); + if (startTag === null) { + return null; + } + + return findSpecialAtomicCloserEnd(this.html, startTag.end, nextTag.tag_name); + } + + #hasDeferredTableChildOpener(tagName) { + return this.deferred_table_child_openers.some((token) => token.tagName === tagName); + } + + #skipDeferredTableTemplateCloser(tagName, namespaceName) { + if ( + tagName !== "TEMPLATE" || + namespaceName !== "html" || + this.deferred_table_opener === null || + !this.#hasDeferredTableChildOpener("TEMPLATE") || + !this.#currentHtmlElementIs("TEMPLATE") + ) { + return false; + } + + const templateIndex = this.open_elements.length - 1; + this.#clearActiveFormattingElementsForTemplateClose(templateIndex); + this.#popTemplateInsertionMode(); + this.open_elements.pop(); + this.open_element_namespaces.pop(); + this.open_element_integration_node_types.pop(); + this.open_element_foster_parented_table_indices.pop(); + this.#setCurrentNamespace(this.#namespaceForStackTop()); + this.current_token_namespace = namespaceName; + this.breadcrumbs = this.#breadcrumbStack(); + this.skip_current_token = true; + return true; + } + + #ignoreCrossedForeignTableStructureCloserBeforeFosteredText(tagName) { + if ( + !( + TABLE_CELL_ELEMENTS.has(tagName) || + tagName === "TR" || + TABLE_SECTION_ELEMENTS.has(tagName) + ) || + !this.#currentForeignTableStructureCloserPrecedesFosteredText() + ) { + return false; + } + + const tableIndex = this.#lastOpenElementIndex("TABLE", "html"); + if (tableIndex === -1) { + return false; + } + + this.pending_foreign_table_fostered_text_table_index = tableIndex; + this.current_token_namespace = this.current_namespace; + this.breadcrumbs = this.#breadcrumbStack(); + this.skip_current_token = true; + return true; + } + + #currentForeignTableStructureCloserPrecedesFosteredText() { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + let at = span.start + span.length; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + const text = this.html.slice( + at, + this.#fosterLookaheadTextEnd(at, nextTag), + ); + if (!this.#isIgnorableTableText(text)) { + return true; + } + + if (nextTag === false || !nextTag.is_closing) { + return false; + } + + if ( + TABLE_CELL_ELEMENTS.has(nextTag.tag_name) || + nextTag.tag_name === "TR" || + TABLE_SECTION_ELEMENTS.has(nextTag.tag_name) || + this.#isIgnoredFosterLookaheadEndTag(nextTag.tag_name) + ) { + at = nextTag.token_end; + continue; + } + + return false; + } + } + + #representPendingForeignTableFosteredText(tokenType) { + if (this.pending_foreign_table_fostered_text_table_index === null) { + return false; + } + + const tableIndex = this.pending_foreign_table_fostered_text_table_index; + if ( + tokenType !== "#text" || + tableIndex < 0 || + tableIndex >= this.open_elements.length || + this.open_elements[tableIndex] !== "TABLE" || + this.open_element_namespaces[tableIndex] !== "html" + ) { + this.pending_foreign_table_fostered_text_table_index = null; + return false; + } + + if (this.text_node_classification === WP_HTML_Tag_Processor.TEXT_IS_NULL_SEQUENCE) { + this.pending_foreign_table_fostered_text_table_index = null; + this.skip_current_token = true; + return true; + } + + this.pending_foreign_table_fostered_text_table_index = null; + this.current_token_namespace = "html"; + this.breadcrumbs = this.#breadcrumbStack("#text", tableIndex); + this.frameset_ok = false; + return true; + } + + #skipDeferredTableStructureCloserBeforeFosteredText(tagName, namespaceName, existingIndex) { + if ( + namespaceName !== "html" || + existingIndex === -1 || + !this.#currentTableStructureCloserPrecedesFosteredTableContent(tagName) + ) { + return false; + } + + this.current_token_namespace = this.open_element_namespaces[existingIndex]; + this.#applyTemplateInsertionModeForEndTag(tagName); + this.open_elements = this.open_elements.slice(0, existingIndex); + this.open_element_namespaces = this.open_element_namespaces.slice(0, existingIndex); + this.open_element_integration_node_types = this.open_element_integration_node_types.slice(0, existingIndex); + this.open_element_foster_parented_table_indices = this.open_element_foster_parented_table_indices.slice(0, existingIndex); + if (TABLE_CELL_ELEMENTS.has(tagName)) { + this.#clearActiveFormattingElementsUpToLastMarker(); + } + this.breadcrumbs = this.#breadcrumbStack(); + this.#setCurrentNamespace(this.#namespaceForStackTop()); + this.skip_current_token = true; + return true; + } + + #deferCurrentTextAsTableChild() { + this.deferred_table_child_openers.push({ + tokenType: "#text", + tokenName: "#text", + modifiableText: this.get_modifiable_text() ?? "", + namespaceName: this.current_namespace, + breadcrumbs: this.#breadcrumbStack("#text"), + attributes: [], + textClassification: this.text_node_classification, + }); + this.skip_current_token = true; + return true; + } + + #shouldDeferCurrentStartInsideDeferredTable(tagName, namespaceName) { + if ( + this.deferred_table_opener === null || + this.skip_current_token || + this.#currentFosterParentedTableIndex() !== null || + (namespaceName === "html" && TABLE_MODE_START_TAGS.has(tagName)) + ) { + return false; + } + + const tableIndex = this.#lastOpenElementIndex("TABLE", "html"); + const topIndex = this.open_elements.length - 1; + return tableIndex !== -1 && topIndex > tableIndex; + } + + #shouldDeferCurrentTextInsideDeferredTable() { + if ( + this.deferred_table_opener === null || + this.text_node_classification === WP_HTML_Tag_Processor.TEXT_IS_NULL_SEQUENCE || + this.#currentFosterParentedTableIndex() !== null + ) { + return false; + } + + const tableIndex = this.#lastOpenElementIndex("TABLE", "html"); + const topIndex = this.open_elements.length - 1; + return ( + tableIndex !== -1 && + topIndex > tableIndex && + this.#deferCurrentTextAsTableChild() + ); + } + + #currentTableStructureCloserPrecedesFosteredTableText(tagName) { + if ( + this.deferred_table_opener === null || + !( + TABLE_CELL_ELEMENTS.has(tagName) || + tagName === "TR" || + TABLE_SECTION_ELEMENTS.has(tagName) + ) + ) { + return false; + } + + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + let at = span.start + span.length; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + const text = this.html.slice( + at, + this.#fosterLookaheadTextEnd(at, nextTag), + ); + if (!this.#isIgnorableTableText(text)) { + return true; + } + + if (nextTag === false || !nextTag.is_closing) { + return false; + } + + if (nextTag.tag_name === "TABLE") { + return false; + } + + if ( + TABLE_CELL_ELEMENTS.has(nextTag.tag_name) || + nextTag.tag_name === "TR" || + TABLE_SECTION_ELEMENTS.has(nextTag.tag_name) || + this.#isIgnoredFosterLookaheadEndTag(nextTag.tag_name) + ) { + at = nextTag.token_end; + continue; + } + + return false; + } + } + + #currentTableStructureCloserPrecedesFosteredTableContent(tagName) { + return ( + this.#currentTableStructureCloserPrecedesFosteredTableText(tagName) || + this.#currentTableStructureCloserPrecedesFosteredTableStart(tagName) + ); + } + + #currentTableStructureCloserPrecedesFosteredTableStart(tagName) { + if ( + this.deferred_table_opener === null || + !( + TABLE_CELL_ELEMENTS.has(tagName) || + tagName === "TR" || + TABLE_SECTION_ELEMENTS.has(tagName) + ) + ) { + return false; + } + + const span = this.#currentRealTokenSpan(); + return span !== null && this.#tableStructureEndPrecedesFosteredStart( + span.start + span.length, + ); + } + + #tableStructureEndTagsPrecedeFosteredText(at) { + let sawStructureEndTag = false; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + const text = this.html.slice(at, this.#fosterLookaheadTextEnd(at, nextTag)); + if (!this.#isIgnorableTableText(text)) { + return sawStructureEndTag; + } + + if ( + nextTag === false || + !nextTag.is_closing || + !this.#isTableStructureFosterLookaheadEndTag(nextTag.tag_name) + ) { + return false; + } + + sawStructureEndTag = true; + at = nextTag.token_end; + } + } + + #isFosteredVoidTableStartTag(tagName) { + return VOID_ELEMENTS.has(tagName) && !TABLE_MODE_START_TAGS.has(tagName) && tagName !== "INPUT"; + } + + #isFosteredInputStartTag(nextTag) { + return ( + nextTag.tag_name === "INPUT" && + !inputStartTagHasHiddenType(this.html.slice(nextTag.tag_start, nextTag.token_end)) + ); + } + + #isFosteredInputTableStartTag(tagName) { + if (tagName !== "INPUT") { + return false; + } + + const typeAttribute = this.get_attribute("type"); + return !(typeof typeAttribute === "string" && typeAttribute.toLowerCase() === "hidden"); + } + + #isFosteredAtomicTableStartTag(tagName) { + return tagName === "TITLE"; + } + + #isFosteredElementTableStartTag(tagName) { + return tagName === "A" || tagName === "B" || tagName === "CENTER" || tagName === "DIV" || tagName === "FONT" || tagName === "I" || tagName === "LI" || tagName === "NOBR" || tagName === "P" || tagName === "PLAINTEXT" || tagName === "S"; + } + + #shouldReconstructActiveFormattingBeforeFosteredStart(tagName) { + if ( + this.deferred_table_opener === null || + this.current_namespace !== "html" || + !this.#hasUnreconstructedActiveFormattingElement() + ) { + return false; + } + + return ( + FOREIGN_CONTENT_START_TAGS.has(tagName) || + tagName === "SELECT" || + this.#isFosteredVoidTableStartTag(tagName) || + this.#isFosteredInputTableStartTag(tagName) || + this.#isFosteredAtomicTableStartTag(tagName) || + (tagName === "NOBR" && !this.#currentHtmlElementIs("NOBR")) + ); + } + + #fosterParentedStartTableIndex(tagName) { + const topIndex = this.open_elements.length - 1; + if (this.#isRepresentableStandaloneFosteredStartInTableFragment(tagName, topIndex)) { + return topIndex; + } + + const tableIndex = this.#lastOpenElementIndex("TABLE", "html"); + const cellIndex = Math.max( + this.#lastOpenElementIndex("TD", "html"), + this.#lastOpenElementIndex("TH", "html"), + ); + if ( + this.deferred_table_opener !== null && + tableIndex !== -1 && + cellIndex > tableIndex && + !TABLE_MODE_START_TAGS.has(tagName) && + this.#currentFosterParentedTableIndex() === null + ) { + return -1; + } + + if ( + this.deferred_table_opener === null || + this.current_namespace !== "html" || + ( + !FOREIGN_CONTENT_START_TAGS.has(tagName) && + tagName !== "SELECT" && + !this.#isFosteredElementTableStartTag(tagName) + ) + ) { + return -1; + } + + return tableIndex; + } + + #representFosteredVoidStartBeforeDeferredTable(tagName) { + if ( + (!this.is_full_parser && !this.#canRepresentFosteredStartInFragment()) || + this.deferred_table_opener === null || + this.current_namespace !== "html" || + !VOID_ELEMENTS.has(tagName) || + ( + TABLE_MODE_START_TAGS.has(tagName) && + !this.#isFosteredInputTableStartTag(tagName) + ) + ) { + return false; + } + + const tableIndex = this.#lastOpenElementIndex("TABLE", "html"); + if (tableIndex === -1) { + return false; + } + + this.current_token_namespace = this.#namespaceForCurrentStartTag(super.get_tag()); + const endIndex = this.#currentFosterParentedTableIndex() === tableIndex + ? this.open_elements.length + : tableIndex; + this.breadcrumbs = this.#breadcrumbStack(tagName, endIndex); + return true; + } + + #canRepresentFosteredStartInFragment() { + return ( + this.context_namespace === "html" && + this.context_node === "BODY" && + this.#lastOpenElementIndex("TABLE", "html") >= this.base_open_element_count + ); + } + + #representFosteredAtomicStartBeforeDeferredTable(tagName) { + if ( + !this.is_full_parser || + this.deferred_table_opener === null || + this.current_namespace !== "html" || + !this.#isFosteredAtomicTableStartTag(tagName) + ) { + return false; + } + + const tableIndex = this.#lastOpenElementIndex("TABLE", "html"); + if (tableIndex === -1) { + return false; + } + + this.current_token_namespace = "html"; + const endIndex = this.#currentFosterParentedTableIndex() === tableIndex + ? this.open_elements.length + : tableIndex; + this.breadcrumbs = this.#breadcrumbStack(tagName, endIndex); + return true; + } + + #currentTextChunkPrecedesFosteredTableText() { + if (this.deferred_table_opener === null) { + return false; + } + + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + const afterToken = span.start + span.length; + const nextTag = runtime.scanNextTag(this.html, afterToken); + const text = this.html.slice( + afterToken, + this.#fosterLookaheadTextEnd(afterToken, nextTag), + ); + if (!this.#isIgnorableTableText(text)) { + return true; + } + + if (nextTag === false) { + return false; + } + + if (this.#fosteredTextFollowsIgnoredEndTag(nextTag)) { + return true; + } + + if (nextTag.is_closing) { + return this.#isFosteredTableEndTag(nextTag.tag_name); + } + + return this.#isFosteredTableLookaheadStartTag(nextTag); + } + + #currentTextChunkPrecedesFosteredTableToken() { + if (this.deferred_table_opener === null) { + return false; + } + + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + const afterToken = span.start + span.length; + const nextTag = runtime.scanNextTag(this.html, afterToken); + const text = this.html.slice( + afterToken, + this.#fosterLookaheadTextEnd(afterToken, nextTag), + ); + if (!this.#isIgnorableTableText(text) || nextTag === false) { + return false; + } + + if (nextTag.is_closing) { + return this.#isFosteredTableEndTag(nextTag.tag_name); + } + + return this.#isFosteredTableLookaheadStartTag(nextTag); + } + + #currentTextChunkPrecedesDeferredTableChildOpener() { + if ( + this.deferred_table_opener === null || + !this.#currentHtmlElementIs("TABLE") + ) { + return false; + } + + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + const nextTag = this.#nextNonWhitespaceTag(span.start + span.length); + if ( + nextTag === false || + nextTag.is_closing || + !this.#isDeferredTableChildOpenerTag(nextTag.tag_name) + ) { + return false; + } + + return this.#tokenEndIsFollowedByFosteredTableContent( + nextTag.token_end, + this.#deferredTableChildLookaheadTags(nextTag.tag_name), + ); + } + + #currentTextChunkPrecedesIgnoredEndTagFosteredText() { + if (this.deferred_table_opener === null) { + return false; + } + + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + const afterToken = span.start + span.length; + const nextTag = runtime.scanNextTag(this.html, afterToken); + const text = this.html.slice( + afterToken, + this.#fosterLookaheadTextEnd(afterToken, nextTag), + ); + return ( + this.#isIgnorableTableText(text) && + nextTag !== false && + this.#fosteredTextFollowsIgnoredEndTag(nextTag) + ); + } + + #fosteredTextFollowsIgnoredEndTag(nextTag) { + if ( + nextTag === false || + !nextTag.is_closing || + !this.#isIgnoredFosterLookaheadEndTag(nextTag.tag_name) + ) { + return false; + } + + let at = nextTag.token_end; + while (true) { + const followingTag = runtime.scanNextTag(this.html, at); + const text = this.html.slice( + at, + this.#fosterLookaheadTextEnd(at, followingTag), + ); + if (!this.#isIgnorableTableText(text)) { + return true; + } + + if ( + followingTag !== false && + followingTag.is_closing && + this.#isIgnoredFosterLookaheadEndTag(followingTag.tag_name) + ) { + at = followingTag.token_end; + continue; + } + + return false; + } + } + + #isFosteredTableLookaheadStartTag(nextTag) { + return ( + FOREIGN_CONTENT_START_TAGS.has(nextTag.tag_name) || + nextTag.tag_name === "SELECT" || + this.#isFosteredVoidTableStartTag(nextTag.tag_name) || + this.#isFosteredInputStartTag(nextTag) || + this.#isFosteredAtomicTableStartTag(nextTag.tag_name) || + this.#isFosteredElementTableStartTag(nextTag.tag_name) + ); + } + + #canRepresentNestedAnchorFosteredBeforeDeferredTable(tagName) { + return ( + tagName === "A" && + this.deferred_table_opener !== null && + this.current_namespace === "html" && + this.#lastOpenElementIndex("TABLE", "html") !== -1 && + this.#currentFosterParentedTableIndex() === null && + ( + this.#nestedAnchorFosteredBeforeDeferredTablePrecedesTableContent() || + this.#nestedAnchorFosteredBeforeDeferredTableEnd() || + this.#nestedAnchorFosteredBeforeTemplateDeferredTable() + ) + ); + } + + #nestedAnchorFosteredBeforeTemplateDeferredTable() { + const tableIndex = this.#lastOpenElementIndex("TABLE", "html"); + const templateIndex = this.#lastOpenElementIndex("TEMPLATE", "html"); + return templateIndex !== -1 && tableIndex > templateIndex; + } + + #nestedAnchorFosteredBeforeDeferredTableEnd() { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + const nextTag = this.#nextNonWhitespaceTag(span.start + span.length); + return ( + nextTag !== false && + nextTag.is_closing && + nextTag.tag_name === "TABLE" + ); + } + + #shouldRemoveNestedAnchorActiveAfterDeferredTable() { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + const tableCloser = this.#nextNonWhitespaceTag(span.start + span.length); + if ( + tableCloser === false || + !tableCloser.is_closing || + tableCloser.tag_name !== "TABLE" + ) { + return false; + } + + const afterTableCloser = tableCloser.token_end; + const nextTag = runtime.scanNextTag(this.html, afterTableCloser); + const text = this.html.slice( + afterTableCloser, + this.#fosterLookaheadTextEnd(afterTableCloser, nextTag), + ); + if (!this.#isIgnorableTableText(text)) { + return false; + } + + return ( + nextTag === false || + nextTag.is_closing || + ( + nextTag.tag_name === "A" || + ( + !FORMATTING_ELEMENTS.has(nextTag.tag_name) && + !ACTIVE_FORMATTING_RECONSTRUCTING_START_TAGS.has(nextTag.tag_name) + ) + ) + ); + } + + #nestedAnchorFosteredBeforeDeferredTablePrecedesTableContent() { + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + let at = span.start + span.length; + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + if (nextTag === false) { + return false; + } + + if (nextTag.is_closing) { + return false; + } + + if ( + nextTag.tag_name === "TR" || + TABLE_CELL_ELEMENTS.has(nextTag.tag_name) || + TABLE_SECTION_ELEMENTS.has(nextTag.tag_name) + ) { + return true; + } + + at = nextTag.token_end; + } + } + + #fosterLookaheadTextEnd(at, nextTag) { + let end = nextTag === false ? this.html.length : nextTag.tag_start; + const commentStart = this.html.indexOf("<!--", at); + if (commentStart !== -1 && commentStart < end) { + end = commentStart; + } + return end; + } + + #isIgnorableTableText(text) { + return text.split("").every((char) => { + const code = char.charCodeAt(0); + return code === 0 || isHtmlWhitespaceCode(code); + }); + } + + #currentTextHasLeadingIgnorableTableText() { + const text = this.get_modifiable_text() ?? ""; + for (let i = 0; i < text.length; i += 1) { + const code = text.charCodeAt(i); + if (code !== 0 && !isHtmlWhitespaceCode(code)) { + return i > 0; + } + } + return false; + } + + #shouldBailUnsupportedTableFosterParenting(tagName, isCloser) { + if (this.current_namespace !== "html") { + return false; + } + + const topIndex = this.open_elements.length - 1; + if (topIndex < 0 || this.open_element_namespaces[topIndex] !== "html") { + return false; + } + + if ( + isCloser && + tagName === "P" && + this.#canRepresentMissingParagraphCloserBeforeDeferredTable() + ) { + return false; + } + + if ( + !isCloser && + this.#isRepresentableForeignStartInTableFragment(tagName, topIndex) + ) { + return false; + } + + if ( + !isCloser && + this.#isRepresentableStandaloneFosteredStartInTableFragment(tagName, topIndex) + ) { + return false; + } + + if ( + !isCloser && + this.#canCloseTemplateRowForStartTag(tagName) + ) { + return false; + } + + if ( + !isCloser && + this.#canCloseTemplateTableBodyForStartTag(tagName) + ) { + return false; + } + + const currentNode = this.open_elements[topIndex]; + if (currentNode === "TABLE") { + return this.#wouldUseUnsupportedTableFosterParenting(tagName, isCloser); + } + + if (currentNode === "COLGROUP") { + return ( + !(tagName === "COL" || tagName === "COLGROUP") && + this.#wouldUseUnsupportedTableFosterParenting(tagName, isCloser) + ); + } + + if (TABLE_SECTION_ELEMENTS.has(currentNode)) { + return ( + !this.#isHandledInTableBodyMode(tagName, isCloser) && + this.#wouldUseUnsupportedTableFosterParenting(tagName, isCloser) + ); + } + + if (currentNode === "TR") { + return ( + !this.#isHandledInTableRowMode(tagName, isCloser) && + this.#wouldUseUnsupportedTableFosterParenting(tagName, isCloser) + ); + } + + return false; + } + + #isRepresentableForeignStartInTableFragment(tagName, topIndex) { + return ( + FOREIGN_CONTENT_START_TAGS.has(tagName) && + !this.is_full_parser && + this.context_namespace === "html" && + ( + this.context_node === "TR" || + TABLE_SECTION_ELEMENTS.has(this.context_node) + ) && + topIndex === this.base_open_element_count - 1 && + this.open_elements[topIndex] === this.context_node + ); + } + + #isRepresentableStandaloneFosteredStartInTableFragment(tagName, topIndex) { + if ( + tagName !== "A" || + this.is_full_parser || + this.context_namespace !== "html" || + topIndex !== this.base_open_element_count - 1 || + !this.#currentContextCanRepresentStandaloneFosteredStart() + ) { + return false; + } + + const span = this.#currentRealTokenSpan(); + if (span === null) { + return false; + } + + if (span.start + span.length === this.html.length) { + return true; + } + + return this.#currentFosteredFragmentStartIsFollowedByTableStartTag(span.start + span.length); + } + + #currentFosteredFragmentStartIsFollowedByTableStartTag(at) { + while (true) { + const nextTag = runtime.scanNextTag(this.html, at); + const text = this.html.slice(at, nextTag === false ? this.html.length : nextTag.tag_start); + if (!this.#isIgnorableTableText(text)) { + return false; + } + + if (nextTag === false) { + return false; + } + + if ( + nextTag.is_closing && + this.#isIgnoredFosterLookaheadEndTag(nextTag.tag_name) + ) { + at = nextTag.token_end; + continue; + } + + return !nextTag.is_closing && this.#tableFragmentContextHandlesStartTag(nextTag.tag_name); + } + } + + #currentContextCanRepresentStandaloneFosteredStart() { + const topIndex = this.open_elements.length - 1; + return ( + (this.context_node === "TABLE" && this.open_elements[topIndex] === "TABLE") || + ( + TABLE_SECTION_ELEMENTS.has(this.context_node) && + this.open_elements[topIndex] === this.context_node + ) + ); + } + + #canCloseTemplateRowForStartTag(tagName) { + return ( + tagName === "DIV" && + this.template_insertion_modes.length > 0 && + this.#currentTemplateInsertionMode() === "in_row" && + this.#currentHtmlElementIs("TR") + ); + } + + #canCloseTemplateTableBodyForStartTag(tagName) { + const topIndex = this.open_elements.length - 1; + return ( + tagName === "SELECT" && + this.template_insertion_modes.length > 0 && + this.#currentTemplateInsertionMode() === "in_table_body" && + topIndex >= 0 && + this.open_element_namespaces[topIndex] === "html" && + TABLE_SECTION_ELEMENTS.has(this.open_elements[topIndex]) + ); + } + + #wouldUseUnsupportedTableFosterParenting(tagName, isCloser) { + if (isCloser) { + return !( + tagName === "TABLE" || + tagName === "TEMPLATE" || + TABLE_MODE_IGNORED_END_TAGS.has(tagName) + ); + } + + if (!TABLE_MODE_START_TAGS.has(tagName)) { + return true; + } + + if (tagName !== "INPUT") { + return false; + } + + const typeAttribute = this.get_attribute("type"); + return !(typeof typeAttribute === "string" && typeAttribute.toLowerCase() === "hidden"); + } + + #isHandledInTableBodyMode(tagName, isCloser) { + if (isCloser) { + return ( + tagName === "TABLE" || + TABLE_SECTION_ELEMENTS.has(tagName) || + TABLE_BODY_MODE_IGNORED_END_TAGS.has(tagName) + ); + } + + return tagName === "TR" || TABLE_CELL_ELEMENTS.has(tagName) || TABLE_SECTION_BOUNDARY_START_TAGS.has(tagName); + } + + #isHandledInTableRowMode(tagName, isCloser) { + if (isCloser) { + return ( + tagName === "TABLE" || + tagName === "TR" || + TABLE_SECTION_ELEMENTS.has(tagName) || + TABLE_ROW_MODE_IGNORED_END_TAGS.has(tagName) + ); + } + + return TABLE_CELL_ELEMENTS.has(tagName) || TABLE_ROW_BOUNDARY_START_TAGS.has(tagName); + } + + #tableStartTagPreclosureIndex() { + const topIndex = this.open_elements.length - 1; + if (topIndex < 0 || this.open_element_namespaces[topIndex] !== "html") { + return -1; + } + + const currentNode = this.open_elements[topIndex]; + if ( + currentNode !== "TABLE" && + currentNode !== "TR" && + !TABLE_SECTION_ELEMENTS.has(currentNode) + ) { + return -1; + } + + return this.#lastOpenElementIndex("TABLE", "html"); + } + + #shouldDetachFormCloser(tagName, namespaceName, formIndex) { + if (tagName !== "FORM" || namespaceName !== "html" || formIndex === -1) { + return false; + } + + for (let i = this.open_elements.length - 1; i > formIndex; i -= 1) { + if ( + this.open_element_namespaces[i] !== "html" || + !IMPLIED_END_TAG_ELEMENTS.has(this.open_elements[i]) + ) { + return true; + } + } + + return false; + } + + #detachFormElementFromOpenStack(formIndex) { + this.current_token_namespace = this.open_element_namespaces[formIndex]; + this.detached_breadcrumbs.push({ + index: formIndex, + tagName: this.open_elements[formIndex], + }); + this.open_elements.splice(formIndex, 1); + this.open_element_namespaces.splice(formIndex, 1); + this.open_element_integration_node_types.splice(formIndex, 1); + this.open_element_foster_parented_table_indices.splice(formIndex, 1); + this.breadcrumbs = this.#breadcrumbStack(); + this.#setCurrentNamespace(this.#namespaceForStackTop()); + } + + #pruneDetachedBreadcrumbs() { + this.detached_breadcrumbs = this.detached_breadcrumbs.filter( + (breadcrumb) => breadcrumb.index < this.open_elements.length, + ); + } + + #hasOnlyTableElementsAfter(index) { + if (index >= this.open_elements.length - 1) { + return false; + } + + for (let i = index + 1; i < this.open_elements.length; i += 1) { + if ( + this.open_element_namespaces[i] !== "html" || + !FORM_TABLE_DESCENDANT_ELEMENTS.has(this.open_elements[i]) + ) { + return false; + } + } + + return true; + } + + #hasHtmlScopeBoundaryAfter(index, boundaries) { + for (let i = index + 1; i < this.open_elements.length; i += 1) { + if (boundaries.has(this.#scopeBoundaryNameForOpenElement(i))) { + return true; + } + } + + return false; + } + + #shouldIgnoreAdoptionAgencyEndTagOutsideScope(tagName, namespaceName, formattingElementIndex) { + return ( + namespaceName === "html" && + formattingElementIndex !== -1 && + ADOPTION_AGENCY_END_TAGS.has(tagName) && + this.#lastActiveFormattingElementIndex(tagName) !== -1 && + this.#hasHtmlScopeBoundaryAfter(formattingElementIndex, DEFAULT_SCOPE_BOUNDARIES) + ); + } + + #shouldIgnoreCappedDeepAnchorEndTag(tagName, namespaceName, formattingElementIndex) { + return ( + tagName === "A" && + namespaceName === "html" && + formattingElementIndex !== -1 && + this.#lastActiveFormattingElementIndex("A") !== -1 && + this.#hasSpecialStartAdoptionPreclosedFormattingElement( + "A", + "html", + "DIV", + "reconstruct-before-nested-div", + ) && + this.#countOpenHtmlElementsAfterLast("B", "DIV") >= 8 && + hasSpecialBoundaryAfter( + this.open_elements, + this.open_element_namespaces, + formattingElementIndex, + ) + ); + } + + #shouldIgnoreAdoptionAgencyEndTagWithStaleEntry(tagName, namespaceName) { + if ( + namespaceName !== "html" || + !ADOPTION_AGENCY_END_TAGS.has(tagName) || + this.#lastActiveFormattingElementIndex(tagName) === -1 + ) { + return false; + } + + let activeCount = 0; + for (let i = this.active_formatting_elements.length - 1; i >= 0; i -= 1) { + const entry = this.active_formatting_elements[i]; + if (this.#isActiveFormattingMarker(entry)) { + break; + } + if (entry.tagName === tagName && entry.namespaceName === "html") { + activeCount += 1; + } + } + + return activeCount > this.#countOpenHtmlElements(tagName); + } + + #queueParagraphAdoptionReconstructionForEndTag(tagName, namespaceName, formattingElementIndex) { + if ( + namespaceName !== "html" || + formattingElementIndex !== -1 || + !ADOPTION_AGENCY_END_TAGS.has(tagName) || + this.#lastActiveFormattingElementIndex(tagName) === -1 || + this.open_element_namespaces.at(-1) !== "html" || + this.open_elements.at(-1) !== "P" + ) { + return false; + } + + const marker = this.#consumeParagraphAdoptionPreclosedFormattingElement(tagName, namespaceName); + if (marker === null) { + return false; + } + + return marker.reconstructionMode === "following-inside" + ? this.#queueActiveFormattingElementWithFollowingElements(tagName, namespaceName) + : this.#queueActiveFormattingElement(tagName, namespaceName); + } + + #queueSpecialStartAdoptionReconstructionForEndTag(tagName, namespaceName, formattingElementIndex) { + const containerTagName = this.open_elements.at(-1); + + if ( + namespaceName !== "html" || + formattingElementIndex !== -1 || + !ADOPTION_AGENCY_END_TAGS.has(tagName) || + this.#lastActiveFormattingElementIndex(tagName) === -1 || + this.open_element_namespaces.at(-1) !== "html" || + !FORMATTING_ELEMENT_SPECIAL_PRECLOSURE_START_TAGS.has(containerTagName) + ) { + return false; + } + + const marker = this.#consumeSpecialStartAdoptionPreclosedFormattingElement(tagName, namespaceName, containerTagName); + if (marker === null) { + return false; + } + + if (marker.reconstructionMode === "empty-following-then-self") { + return this.#queueActiveFormattingElementWithFollowingElements(tagName, namespaceName); + } + + return this.#queueActiveFormattingElement(tagName, namespaceName); + } + + #shouldBailUnsupportedAdoptionAgency(tagName, namespaceName, formattingElementIndex) { + return ( + namespaceName === "html" && + formattingElementIndex !== -1 && + ADOPTION_AGENCY_END_TAGS.has(tagName) && + this.#lastActiveFormattingElementIndex(tagName) !== -1 && + hasSpecialBoundaryAfter( + this.open_elements, + this.open_element_namespaces, + formattingElementIndex, + ) + ); + } + + #shouldIgnoreAdoptionAgencyEndTagFallback(tagName, namespaceName, formattingElementIndex) { + return ( + namespaceName === "html" && + formattingElementIndex === -1 && + ADOPTION_AGENCY_END_TAGS.has(tagName) && + this.#lastActiveFormattingElementIndex(tagName) === -1 + ); + } + + #findOpenElementBeforeBoundary(match, boundaries) { + const predicate = typeof match === "function" ? match : (nodeName) => nodeName === match; + for (let i = this.open_elements.length - 1; i >= 0; i -= 1) { + const nodeName = this.open_elements[i]; + const namespaceName = this.open_element_namespaces[i]; + if (namespaceName === "html" && predicate(nodeName)) { + return i; + } + + if (boundaries.has(this.#scopeBoundaryNameForOpenElement(i))) { + return -1; + } + } + return -1; + } + + #serializeCurrentTag() { + const tagName = replaceNulls(this.get_tag() ?? ""); + if (tagName === "") { + return ""; + } + + const inHtml = this.get_namespace() === "html"; + const qualifiedName = replaceNulls(inHtml ? tagName.toLowerCase() : this.get_qualified_tag_name()); + + if (this.is_tag_closer()) { + return `</${qualifiedName}>`; + } + + let html = `<${qualifiedName}`; + const attributeNames = this.get_attribute_names_with_prefix("") ?? []; + const seenAttributeNames = new Set(); + let previousAttributeWasTrue = false; + + for (const attributeName of attributeNames) { + const qualifiedAttributeName = replaceNulls(this.get_qualified_attribute_name(attributeName)); + if (seenAttributeNames.has(qualifiedAttributeName)) { + continue; + } + seenAttributeNames.add(qualifiedAttributeName); + + if (previousAttributeWasTrue && qualifiedAttributeName.startsWith("=")) { + html += '=""'; + } + + html += ` ${qualifiedAttributeName}`; + const value = this.get_attribute(attributeName); + if (typeof value === "string") { + html += `="${htmlEscape(replaceNulls(value))}"`; + } + previousAttributeWasTrue = value === true; + } + + if (!inHtml && this.has_self_closing_flag()) { + html += " /"; + } + + html += ">"; + + if (inHtml && SPECIAL_ATOMIC_ELEMENTS.has(tagName)) { + let text = this.get_modifiable_text() ?? ""; + if (tagName === "IFRAME" || tagName === "NOEMBED" || tagName === "NOFRAMES") { + text = ""; + } else if (tagName !== "SCRIPT" && tagName !== "STYLE") { + text = htmlEscape(text); + } + if (tagName === "TEXTAREA" && text.startsWith("\n")) { + html += "\n"; + } + html += `${text}</${qualifiedName}>`; + } + + return html; + } + + #namespaceForStackTop() { + if ( + this.open_element_namespaces.length === 1 && + this.open_elements[0] === "HTML" && + this.detached_context_breadcrumbs.length > 0 + ) { + return this.#childNamespaceForStackEntry( + this.context_node, + this.context_namespace, + this.context_integration_node_type, + ); + } + + return this.open_element_namespaces.length === 0 + ? "html" + : this.#childNamespaceForStackEntry( + this.open_elements[this.open_elements.length - 1], + this.open_element_namespaces[this.open_element_namespaces.length - 1], + this.open_element_integration_node_types[this.open_element_integration_node_types.length - 1], + ); + } + + #setCurrentNamespace(namespaceName) { + this.current_namespace = namespaceName; + super.change_parsing_namespace(namespaceName); + } + } + + return { + WP_HTML_Decoder, + WP_HTML_Unsupported_Exception, + WP_HTML_Span, + WP_HTML_Text_Replacement, + WP_HTML_Attribute_Token, + WP_HTML_Token, + WP_HTML_Stack_Event, + WP_HTML_Active_Formatting_Elements, + WP_HTML_Open_Elements, + WP_HTML_Processor_State, + WP_HTML_Tag_Processor, + WP_HTML_Processor, + WP_HTML_Doctype_Info, + scanNextTag: (html, offset = 0) => runtime.scanNextTag(html, offset), + version: () => runtime.version(), + wasm: wasmExports, + }; +} + +function wasmExportsFromInput(input) { + if (isWebAssemblyInstantiatedSource(input)) { + return input.instance.exports; + } + + if (input instanceof WebAssembly.Instance) { + return input.exports; + } + + return input; +} + +const REQUIRED_WASM_FUNCTION_EXPORTS = [ + "wp_html_api_rust_alloc", + "wp_html_api_rust_core_version", + "wp_html_api_rust_dealloc", + "wp_html_api_rust_decoder_attribute_starts_with", + "wp_html_api_rust_decoder_code_point_to_utf8_bytes", + "wp_html_api_rust_decoder_decode", + "wp_html_api_rust_decoder_read_character_reference", + "wp_html_api_rust_scan_next_tag", + "wp_html_api_rust_tag_processor_add_class", + "wp_html_api_rust_tag_processor_apply_lexical_update", + "wp_html_api_rust_tag_processor_class_list", + "wp_html_api_rust_tag_processor_current_comment_type", + "wp_html_api_rust_tag_processor_current_span", + "wp_html_api_rust_tag_processor_current_token_type", + "wp_html_api_rust_tag_processor_free", + "wp_html_api_rust_tag_processor_get_attribute", + "wp_html_api_rust_tag_processor_get_attribute_names_with_prefix", + "wp_html_api_rust_tag_processor_get_html", + "wp_html_api_rust_tag_processor_get_modifiable_text", + "wp_html_api_rust_tag_processor_get_tag", + "wp_html_api_rust_tag_processor_has_class", + "wp_html_api_rust_tag_processor_has_self_closing_flag", + "wp_html_api_rust_tag_processor_is_tag_closer", + "wp_html_api_rust_tag_processor_new", + "wp_html_api_rust_tag_processor_next_tag", + "wp_html_api_rust_tag_processor_next_token", + "wp_html_api_rust_tag_processor_paused_at_incomplete", + "wp_html_api_rust_tag_processor_remove_attribute", + "wp_html_api_rust_tag_processor_remove_class", + "wp_html_api_rust_tag_processor_script_content_type", + "wp_html_api_rust_tag_processor_seek", + "wp_html_api_rust_tag_processor_set_attribute", + "wp_html_api_rust_tag_processor_set_modifiable_text", + "wp_html_api_rust_tag_processor_set_namespace", + "wp_html_api_rust_tag_processor_subdivide_text_appropriately", +]; + +function isWebAssemblyExports(input) { + return input !== null && + typeof input === "object" && + input.memory instanceof WebAssembly.Memory && + typeof input.wp_html_api_rust_alloc === "function" && + typeof input.wp_html_api_rust_dealloc === "function"; +} + +function validateWasmExports(wasm) { + const missing = []; + if (!(wasm.memory instanceof WebAssembly.Memory)) { + missing.push("memory"); + } + for (const exportName of REQUIRED_WASM_FUNCTION_EXPORTS) { + if (typeof wasm[exportName] !== "function") { + missing.push(exportName); + } + } + if (missing.length > 0) { + throw new Error(`WASM module is missing required HTML API exports: ${missing.join(", ")}.`); + } +} + +class WasmRuntime { + constructor(wasm) { + this.wasm = wasm; + if (!isWebAssemblyExports(wasm)) { + throw new Error("WASM module does not expose the expected HTML API runtime functions."); + } + validateWasmExports(wasm); + } + + version() { + return this.readCString(this.wasm.wp_html_api_rust_core_version()); + } + + encode(value) { + if (value instanceof Uint8Array) { + return value; + } + if (value instanceof ArrayBuffer) { + return new Uint8Array(value); + } + if (ArrayBuffer.isView(value)) { + return new Uint8Array(value.buffer, value.byteOffset, value.byteLength); + } + return textEncoder.encode(String(value)); + } + + allocBytes(bytes) { + const len = bytes.length; + const allocationLen = Math.max(1, len); + const ptr = this.wasm.wp_html_api_rust_alloc(allocationLen); + if (!ptr) { + throw new Error(`Failed to allocate ${allocationLen} bytes in WASM memory.`); + } + this.bytes().set(bytes, ptr); + return { ptr, len, allocationLen }; + } + + freeBytes(allocation) { + if (allocation && allocation.ptr) { + this.wasm.wp_html_api_rust_dealloc(allocation.ptr, allocation.allocationLen); + } + } + + withBytes(bytes, callback) { + const allocation = this.allocBytes(bytes); + try { + return callback({ ptr: allocation.ptr, len: allocation.len }); + } finally { + this.freeBytes(allocation); + } + } + + withEncoded(value, callback) { + return this.withBytes(this.encode(value), callback); + } + + withOutSlice(callback) { + const allocation = this.allocBytes(new Uint8Array(8)); + try { + return callback(allocation.ptr); + } finally { + this.freeBytes(allocation); + } + } + + withOutPair(callback) { + const allocation = this.allocBytes(new Uint8Array(8)); + try { + return callback(allocation.ptr, allocation.ptr + 4); + } finally { + this.freeBytes(allocation); + } + } + + readOutputString(callback) { + return this.withOutSlice((out) => { + if (!callback(out)) { + return null; + } + return this.readStringFromOut(out); + }); + } + + readOutputBytes(callback) { + return this.withOutSlice((out) => { + if (!callback(out)) { + return null; + } + return this.readBytesFromOut(out); + }); + } + + readStringFromOut(out) { + const { ptr, len } = this.readSlice(out); + return this.decode(ptr, len); + } + + readBytesFromOut(out) { + const { ptr, len } = this.readSlice(out); + return this.bytes().slice(ptr, ptr + len); + } + + readSlice(out) { + return { + ptr: this.readU32(out), + len: this.readU32(out + 4), + }; + } + + readU32(ptr) { + return new DataView(this.wasm.memory.buffer).getUint32(ptr, true); + } + + readCString(ptr) { + const memory = this.bytes(); + let end = ptr; + while (memory[end] !== 0) { + end += 1; + } + return textDecoder.decode(memory.subarray(ptr, end)); + } + + decode(ptr, len) { + return textDecoder.decode(this.bytes().subarray(ptr, ptr + len)); + } + + bytes() { + return new Uint8Array(this.wasm.memory.buffer); + } + + decoderDecode(context, text) { + const input = this.encode(text); + const contextKind = decodeContextKind(context); + return this.withBytes(input, ({ ptr, len }) => { + const output = this.allocBytes(new Uint8Array(Math.max(1, len))); + const outLen = this.allocBytes(new Uint8Array(4)); + try { + if (!this.wasm.wp_html_api_rust_decoder_decode( + contextKind, + ptr, + len, + output.ptr, + output.allocationLen, + outLen.ptr, + )) { + return ""; + } + + const decodedLen = this.readU32(outLen.ptr); + return textDecoder.decode(this.bytes().subarray(output.ptr, output.ptr + decodedLen)); + } finally { + this.freeBytes(outLen); + this.freeBytes(output); + } + }); + } + + decoderReadCharacterReference(context, text, at = 0, matchByteLength = null) { + const input = this.encode(text); + const contextKind = decodeContextKind(context); + const normalizedAt = phpStringOffsetParameterCoerce(at, "at"); + if (!Number.isFinite(normalizedAt) || normalizedAt < 0) { + return null; + } + return this.withBytes(input, ({ ptr, len }) => { + const outputCapacity = Math.max(4, len - normalizedAt); + const output = this.allocBytes(new Uint8Array(outputCapacity)); + const lengths = this.allocBytes(new Uint8Array(8)); + try { + if (!this.wasm.wp_html_api_rust_decoder_read_character_reference( + contextKind, + ptr, + len, + normalizedAt, + output.ptr, + output.allocationLen, + lengths.ptr, + lengths.ptr + 4, + )) { + return null; + } + + if (matchByteLength && typeof matchByteLength === "object") { + matchByteLength.value = this.readU32(lengths.ptr + 4); + } + + const decodedLen = this.readU32(lengths.ptr); + return textDecoder.decode(this.bytes().subarray(output.ptr, output.ptr + decodedLen)); + } finally { + this.freeBytes(lengths); + this.freeBytes(output); + } + }); + } + + decoderAttributeStartsWith(haystack, searchText, asciiCaseInsensitive) { + return this.withEncoded(haystack, (haystackBytes) => ( + this.withEncoded(searchText, (searchBytes) => ( + Boolean(this.wasm.wp_html_api_rust_decoder_attribute_starts_with( + haystackBytes.ptr, + haystackBytes.len, + searchBytes.ptr, + searchBytes.len, + Boolean(asciiCaseInsensitive), + )) + )) + )); + } + + decoderCodePointToUtf8Bytes(codePoint) { + const numericCodePoint = phpInternalIntegerParameterCoerce(codePoint, "code_point"); + const normalizedCodePoint = Number.isFinite(numericCodePoint) && + numericCodePoint >= 0 && + numericCodePoint <= 0x10ffff + ? Math.trunc(numericCodePoint) + : 0x110000; + const output = this.allocBytes(new Uint8Array(4)); + const outLen = this.allocBytes(new Uint8Array(4)); + try { + if (!this.wasm.wp_html_api_rust_decoder_code_point_to_utf8_bytes( + normalizedCodePoint, + output.ptr, + output.allocationLen, + outLen.ptr, + )) { + return "\uFFFD"; + } + + const decodedLen = this.readU32(outLen.ptr); + return textDecoder.decode(this.bytes().subarray(output.ptr, output.ptr + decodedLen)); + } finally { + this.freeBytes(outLen); + this.freeBytes(output); + } + } + + scanNextTag(html, offset = 0) { + const input = this.encode(html); + const normalizedOffset = Math.max(0, phpIntegerCast(offset)); + return this.withBytes(input, ({ ptr, len }) => { + const out = this.allocBytes(new Uint8Array(32)); + try { + if (!this.wasm.wp_html_api_rust_scan_next_tag(ptr, len, normalizedOffset, out.ptr)) { + return false; + } + + const tagStart = this.readU32(out.ptr); + const tagEnd = this.readU32(out.ptr + 4); + const nameStart = this.readU32(out.ptr + 8); + const nameLen = this.readU32(out.ptr + 12); + const memory = this.bytes(); + return { + tag_start: tagStart, + tag_end: tagEnd, + name_start: nameStart, + name_len: nameLen, + name_length: nameLen, + tag_name: asciiUpper(textDecoder.decode(memory.subarray(ptr + nameStart, ptr + nameStart + nameLen))), + is_closing: Boolean(memory[out.ptr + 16]), + has_self_closing_flag: Boolean(memory[out.ptr + 17]), + token_end: this.readU32(out.ptr + 20), + token_type: memory[out.ptr + 24], + }; + } finally { + this.freeBytes(out); + } + }); + } +} + +function decodeContextKind(context) { + return context === "attribute" ? DECODE_CONTEXT_ATTRIBUTE : DECODE_CONTEXT_DATA; +} + +function asciiUpper(value) { + return value.replace(/[a-z]/g, (char) => char.toUpperCase()); +} + +function asciiLower(value) { + return value.replace(/[A-Z]/g, (char) => char.toLowerCase()); +} + +function splitUnitSeparatedString(value) { + return value === "" ? [] : value.split("\x1f"); +} + +function splitNullSeparatedAscii(bytes) { + if (bytes.length === 0) { + return []; + } + const parts = []; + let start = 0; + for (let i = 0; i <= bytes.length; i += 1) { + if (i === bytes.length || bytes[i] === 0) { + parts.push(textDecoder.decode(bytes.subarray(start, i))); + start = i + 1; + } + } + return parts; +} + +function createDoctypeInfo(name, publicIdentifier, systemIdentifier, forceQuirksFlag) { + return new WP_HTML_Doctype_Info( + name, + publicIdentifier, + systemIdentifier, + forceQuirksFlag, + DOCTYPE_INFO_INTERNAL, + ); +} + +function parsePublicIdentifier(doctype, at, end, name) { + const quote = doctype[at]; + if (quote !== '"' && quote !== "'") { + return createDoctypeInfo(name, null, null, true); + } + + at += 1; + const identifierStart = at; + const identifierEnd = doctype.indexOf(quote, at); + const boundedIdentifierEnd = identifierEnd === -1 || identifierEnd > end ? end : identifierEnd; + const publicIdentifier = replaceNulls(doctype.slice(identifierStart, boundedIdentifierEnd)); + + if (identifierEnd === -1 || identifierEnd >= end || doctype[identifierEnd] !== quote) { + return createDoctypeInfo(name, publicIdentifier, null, true); + } + + at = skipHtmlWhitespace(doctype, identifierEnd + 1, end); + if (at >= end) { + return createDoctypeInfo(name, publicIdentifier, null, false); + } + + return parseSystemIdentifier(doctype, at, end, name, publicIdentifier); +} + +function parseSystemIdentifier(doctype, at, end, name, publicIdentifier) { + const quote = doctype[at]; + if (quote !== '"' && quote !== "'") { + return createDoctypeInfo(name, publicIdentifier, null, true); + } + + at += 1; + const identifierStart = at; + const identifierEnd = doctype.indexOf(quote, at); + const boundedIdentifierEnd = identifierEnd === -1 || identifierEnd > end ? end : identifierEnd; + const systemIdentifier = replaceNulls(doctype.slice(identifierStart, boundedIdentifierEnd)); + + if (identifierEnd === -1 || identifierEnd >= end || doctype[identifierEnd] !== quote) { + return createDoctypeInfo(name, publicIdentifier, systemIdentifier, true); + } + + return createDoctypeInfo(name, publicIdentifier, systemIdentifier, false); +} + +function doctypeCompatibilityMode(name, publicIdentifier, systemIdentifier, forceQuirksFlag) { + if (forceQuirksFlag) { + return "quirks"; + } + + if (name === "html" && publicIdentifier === null && systemIdentifier === null) { + return "no-quirks"; + } + + if (name !== "html") { + return "quirks"; + } + + const systemIdentifierIsMissing = systemIdentifier === null; + const publicId = publicIdentifier === null ? "" : publicIdentifier.toLowerCase(); + const systemId = systemIdentifier === null ? "" : systemIdentifier.toLowerCase(); + + if ( + publicId === "-//w3o//dtd w3 html strict 3.0//en//" || + publicId === "-/w3c/dtd html 4.0 transitional/en" || + publicId === "html" + ) { + return "quirks"; + } + + if (systemId === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") { + return "quirks"; + } + + if (publicId === "") { + return "no-quirks"; + } + + if (QUIRKS_PUBLIC_IDENTIFIER_PREFIXES.some((prefix) => publicId.startsWith(prefix))) { + return "quirks"; + } + + if ( + systemIdentifierIsMissing && + ( + publicId.startsWith("-//w3c//dtd html 4.01 frameset//") || + publicId.startsWith("-//w3c//dtd html 4.01 transitional//") + ) + ) { + return "quirks"; + } + + if ( + publicId.startsWith("-//w3c//dtd xhtml 1.0 frameset//") || + publicId.startsWith("-//w3c//dtd xhtml 1.0 transitional//") + ) { + return "limited-quirks"; + } + + if ( + !systemIdentifierIsMissing && + ( + publicId.startsWith("-//w3c//dtd html 4.01 frameset//") || + publicId.startsWith("-//w3c//dtd html 4.01 transitional//") + ) + ) { + return "limited-quirks"; + } + + return "no-quirks"; +} + +function skipHtmlWhitespace(value, at, end) { + while (at < end && isHtmlWhitespaceCode(value.charCodeAt(at))) { + at += 1; + } + return at; +} + +function completeStartTagAt(value, at) { + const end = value.length; + if (value.charCodeAt(at) !== 0x3c /* < */) { + return null; + } + + let nameStart = at + 1; + if (nameStart >= end || !isAsciiAlphaCode(value.charCodeAt(nameStart))) { + return null; + } + + let nameEnd = nameStart + 1; + while (nameEnd < end && !isTagNameDelimiterCode(value.charCodeAt(nameEnd))) { + nameEnd += 1; + } + + let quote = null; + for (let i = nameEnd; i < end; i += 1) { + const code = value.charCodeAt(i); + if (quote !== null) { + if (code === quote) { + quote = null; + } + continue; + } + + if (code === 0x22 /* " */ || code === 0x27 /* ' */) { + quote = code; + continue; + } + + if (code === 0x3e /* > */) { + return { + tagName: asciiUpper(value.slice(nameStart, nameEnd)), + end: i + 1, + }; + } + } + + return null; +} + +function inputStartTagHasHiddenType(markup) { + const match = /(?:^|[\t\n\f\r /])type(?:[\t\n\f\r ]*=[\t\n\f\r ]*(?:"([^"]*)"|'([^']*)'|([^\t\n\f\r />]*)))?/i.exec(markup); + if (match === null) { + return false; + } + + const value = match[1] ?? match[2] ?? match[3] ?? ""; + return value.toLowerCase() === "hidden"; +} + +function incompleteBogusCommentAtEof(value) { + if (value.includes(">")) { + return false; + } + + return value.startsWith("<!") || value.startsWith("<?") || /^<\/[^A-Za-z>]/.test(value); +} + +function findSpecialAtomicCloserEnd(value, offset, tagName) { + if (tagName === "SCRIPT") { + return findScriptCloserEnd(value, offset); + } + + let at = offset; + while (at + tagName.length + 2 <= value.length) { + const closerStart = value.indexOf("</", at); + if (closerStart === -1) { + return null; + } + + const nameStart = closerStart + 2; + const nameEnd = nameStart + tagName.length; + if ( + nameEnd <= value.length && + asciiStartsWithAt(value, tagName, nameStart) && + (nameEnd === value.length || isTagNameDelimiterCode(value.charCodeAt(nameEnd))) + ) { + return completeTagEndAfterName(value, nameEnd); + } + + at = closerStart + 2; + } + + return null; +} + +function findScriptCloserEnd(value, offset) { + let at = offset; + let escaped = false; + let doubleEscaped = false; + + while (at < value.length) { + if (value.startsWith("<!-->", at)) { + at += 5; + continue; + } + + if (value.startsWith("<!--", at)) { + escaped = true; + doubleEscaped = false; + at += 4; + continue; + } + + if ((escaped || doubleEscaped) && value.startsWith("-->", at)) { + escaped = false; + doubleEscaped = false; + at += 3; + continue; + } + + if (asciiStartsWithAt(value, "</script", at)) { + const nameEnd = at + "</script".length; + if (nameEnd === value.length || isTagNameDelimiterCode(value.charCodeAt(nameEnd))) { + if (doubleEscaped) { + doubleEscaped = false; + escaped = true; + at = nameEnd; + continue; + } + + return completeTagEndAfterName(value, nameEnd); + } + } + + if (escaped && asciiStartsWithAt(value, "<script", at)) { + const nameEnd = at + "<script".length; + if (nameEnd === value.length || isTagNameDelimiterCode(value.charCodeAt(nameEnd))) { + doubleEscaped = true; + at = nameEnd; + continue; + } + } + + at += 1; + } + + return null; +} + +function completeTagEndAfterName(value, nameEnd) { + let quote = null; + for (let i = nameEnd; i < value.length; i += 1) { + const code = value.charCodeAt(i); + if (quote !== null) { + if (code === quote) { + quote = null; + } + continue; + } + + if (code === 0x22 /* " */ || code === 0x27 /* ' */) { + quote = code; + continue; + } + + if (code === 0x3e /* > */) { + return i + 1; + } + } + + return null; +} + +function incompleteEndTagAt(value, at) { + const end = value.length; + if ( + value.charCodeAt(at) !== 0x3c /* < */ || + value.charCodeAt(at + 1) !== 0x2f /* / */ + ) { + return false; + } + + const nameStart = at + 2; + if (nameStart >= end || !isAsciiAlphaCode(value.charCodeAt(nameStart))) { + return false; + } + + for (let i = nameStart + 1; i < end; i += 1) { + const code = value.charCodeAt(i); + if (code === 0x3c /* < */ || code === 0x3e /* > */) { + return false; + } + } + + return true; +} + +function incompleteQuotedStartTagAt(value, at) { + const end = value.length; + if (value.charCodeAt(at) !== 0x3c /* < */) { + return false; + } + + let nameStart = at + 1; + if (nameStart >= end || !isAsciiAlphaCode(value.charCodeAt(nameStart))) { + return false; + } + + let nameEnd = nameStart + 1; + while (nameEnd < end && !isTagNameDelimiterCode(value.charCodeAt(nameEnd))) { + nameEnd += 1; + } + + let afterEquals = false; + let quote = null; + for (let i = nameEnd; i < end; i += 1) { + const code = value.charCodeAt(i); + if (quote !== null) { + if (code === quote) { + quote = null; + } + continue; + } + + if (afterEquals && (code === 0x22 /* " */ || code === 0x27 /* ' */)) { + quote = code; + afterEquals = false; + continue; + } + + if (code === 0x3d /* = */) { + afterEquals = true; + continue; + } + + if (code === 0x3e /* > */ || code === 0x3c /* < */) { + return false; + } + + if (!isHtmlWhitespaceCode(code)) { + afterEquals = false; + } + } + + return quote !== null; +} + +function incompleteStartTagAt(value, at) { + const end = value.length; + if (value.charCodeAt(at) !== 0x3c /* < */) { + return false; + } + + const nameStart = at + 1; + if (nameStart >= end || !isAsciiAlphaCode(value.charCodeAt(nameStart))) { + return false; + } + + for (let i = nameStart + 1; i < end; i += 1) { + const code = value.charCodeAt(i); + if (code === 0x3c /* < */ || code === 0x3e /* > */) { + return false; + } + } + + return true; +} + +function isHtmlWhitespaceCode(code) { + return code === 0x20 || code === 0x09 || code === 0x0a || code === 0x0c || code === 0x0d; +} + +function isAsciiAlphaCode(code) { + return (code >= 0x41 && code <= 0x5a) || (code >= 0x61 && code <= 0x7a); +} + +function isTagNameDelimiterCode(code) { + return code === 0x20 || + code === 0x09 || + code === 0x0a || + code === 0x0c || + code === 0x0d || + code === 0x2f || + code === 0x3e; +} + +function splitHtmlWhitespace(value) { + const parts = []; + let start = 0; + + for (let i = 0; i <= value.length; i += 1) { + if (i < value.length && !isHtmlWhitespaceCode(value.charCodeAt(i))) { + continue; + } + + if (i > start) { + parts.push(value.slice(start, i)); + } + start = i + 1; + } + + return parts; +} + +function asciiStartsWithAt(value, needle, at) { + return value.slice(at, at + needle.length).toLowerCase() === needle.toLowerCase(); +} + +function replaceNulls(value) { + return value.replace(/\0/g, "\uFFFD"); +} + +function isValidAttributeName(value) { + if (value.length === 0) { + return false; + } + + for (let i = 0; i < value.length; i += 1) { + const code = value.codePointAt(i); + if (code > 0xffff) { + i += 1; + } + + if ( + code <= 0x20 || + code === 0x22 || + code === 0x26 || + code === 0x27 || + code === 0x2f || + code === 0x3c || + code === 0x3d || + code === 0x3e || + isUnicodeNoncharacter(code) + ) { + return false; + } + } + + return true; +} + +function isUnicodeNoncharacter(code) { + return ( + (code >= 0xfdd0 && code <= 0xfdef) || + (code <= 0x10ffff && (code & 0xfffe) === 0xfffe) + ); +} + +function phpIntegerCast(value) { + if (typeof value === "number") { + return Number.isFinite(value) ? Math.trunc(value) : 0; + } + if (typeof value === "boolean") { + return value ? 1 : 0; + } + if (typeof value === "string") { + const match = value.trimStart().match(/^[+-]?\d+/); + return match ? Number.parseInt(match[0], 10) : 0; + } + if (Array.isArray(value)) { + return value.length === 0 ? 0 : 1; + } + if (value !== null && (typeof value === "object" || typeof value === "function")) { + return 1; + } + return 0; +} + +const PHP_INT_MIN = -9223372036854775808n; +const PHP_INT_MAX = 9223372036854775807n; +const PHP_INT_MIN_NUMBER = Number(PHP_INT_MIN); +const PHP_INT_MAX_NUMBER = Number(PHP_INT_MAX); + +function isPhpIntegerArrayKeyString(value) { + if (value === "0") { + return true; + } + if (value === "" || value[0] === "+") { + return false; + } + + const digits = value[0] === "-" ? value.slice(1) : value; + if (digits === "" || digits[0] === "0" || !/^\d+$/.test(digits)) { + return false; + } + + const integer = BigInt(value); + return integer >= PHP_INT_MIN && integer <= PHP_INT_MAX; +} + +function phpArrayKeyParameterCoerce(value, parameterName) { + if (typeof value === "string") { + return isPhpIntegerArrayKeyString(value) ? `i:${BigInt(value).toString()}` : `s:${value}`; + } + + if (typeof value === "number") { + const integer = Number.isFinite(value) ? Math.trunc(value) : 0; + return `i:${Object.is(integer, -0) ? 0 : integer}`; + } + + if (typeof value === "boolean") { + return `i:${value ? 1 : 0}`; + } + + if (value === null) { + return "s:"; + } + + throw new TypeError(`Argument $${parameterName} must be of type array-key.`); +} + +function phpClassUpdateKey(value, parameterName) { + if (typeof value === "string") { + if (isPhpIntegerArrayKeyString(value)) { + return { isIntegerKey: true, name: BigInt(value).toString() }; + } + return { isIntegerKey: false, name: value }; + } + + if (typeof value === "number") { + const integer = phpIntegerCast(value); + return { isIntegerKey: true, name: String(Object.is(integer, -0) ? 0 : integer) }; + } + + if (typeof value === "boolean") { + return { isIntegerKey: true, name: value ? "1" : "0" }; + } + + if (value === null) { + return { isIntegerKey: false, name: "" }; + } + + throw new TypeError(`Argument $${parameterName} must be of type array-key.`); +} + +function classUpdateMapKey(className) { + return `${className.isIntegerKey ? "i" : "s"}:${className.name}`; +} + +function classUpdateComparable(className, isQuirksMode) { + if (isQuirksMode) { + return `s:${asciiLower(className.name)}`; + } + + return classUpdateMapKey(className); +} + +function classTokenComparable(className, isQuirksMode) { + return `s:${isQuirksMode ? asciiLower(className) : className}`; +} + +function applyClassNameUpdates(existingClass, updates, isQuirksMode) { + let className = ""; + let at = 0; + let modified = false; + const seen = new Set(); + const toRemove = new Set( + updates + .filter((update) => update.operation === CLASS_UPDATE_REMOVE) + .map((update) => classUpdateComparable(update, isQuirksMode)), + ); + + while (at < existingClass.length) { + const whitespaceStart = at; + while (at < existingClass.length && isHtmlClassWhitespace(existingClass[at])) { + at += 1; + } + + const nameStart = at; + while (at < existingClass.length && !isHtmlClassWhitespace(existingClass[at])) { + at += 1; + } + + if (nameStart === at) { + break; + } + + const name = existingClass.slice(nameStart, at); + const comparableName = classTokenComparable(name, isQuirksMode); + if (toRemove.has(comparableName)) { + modified = true; + continue; + } + + if (seen.has(comparableName)) { + continue; + } + + seen.add(comparableName); + if (className !== "") { + className += existingClass.slice(whitespaceStart, nameStart); + } + className += name; + } + + for (const update of updates) { + const comparableName = classUpdateComparable(update, isQuirksMode); + if (update.operation === CLASS_UPDATE_ADD && !seen.has(comparableName)) { + modified = true; + className += className.length > 0 ? " " : ""; + className += update.name; + } + } + + return { className, modified }; +} + +function isHtmlClassWhitespace(value) { + return value === " " || value === "\t" || value === "\n" || value === "\f" || value === "\r"; +} + +function phpIntegerParameterCoerce(value, parameterName) { + if (value === null) { + throw new TypeError(`Argument $${parameterName} must be of type int.`); + } + + if (typeof value === "string") { + const trimmed = value.trim(); + if ( + trimmed === "" || + !/^[+-]?(?:(?:\d+\.?\d*)|(?:\.\d+))(?:[eE][+-]?\d+)?$/.test(trimmed) + ) { + throw new TypeError(`Argument $${parameterName} must be numeric.`); + } + if (/^\+?\d+$/.test(trimmed) && BigInt(trimmed) > PHP_INT_MAX) { + throw new TypeError(`Argument $${parameterName} must be of type int.`); + } + const numericValue = Number(trimmed); + if ( + !Number.isFinite(numericValue) || + numericValue > PHP_INT_MAX_NUMBER || + numericValue < PHP_INT_MIN_NUMBER + ) { + throw new TypeError(`Argument $${parameterName} must be of type int.`); + } + return Math.trunc(numericValue); + } + + if ( + typeof value === "number" && + ( + !Number.isFinite(value) || + value > PHP_INT_MAX_NUMBER || + value < PHP_INT_MIN_NUMBER + ) + ) { + throw new TypeError(`Argument $${parameterName} must be of type int.`); + } + + if ( + typeof value === "object" || + typeof value === "function" || + typeof value === "symbol" || + typeof value === "undefined" + ) { + throw new TypeError(`Argument $${parameterName} must be of type int.`); + } + + return phpIntegerCast(value); +} + +function phpInternalIntegerParameterCoerce(value, parameterName) { + if (value === null) { + return 0; + } + + return phpIntegerParameterCoerce(value, parameterName); +} + +function phpUntypedStringLength(value, parameterName) { + if (value === null || value === false) { + return 0; + } + if (value === true) { + return 1; + } + if (typeof value === "string") { + return value.length; + } + if (typeof value === "number") { + return phpNumberToString(value).length; + } + throw new TypeError(`Argument $${parameterName} must be of type string.`); +} + +function phpAttributeStartsWithNonStringScalar(haystack, searchText) { + const haystackLength = phpUntypedStringLength(haystack, "haystack"); + const searchLength = phpUntypedStringLength(searchText, "search_text"); + + if (searchLength === 0 || haystackLength === 0) { + return true; + } + + return typeof haystack !== "string" && typeof searchText !== "string"; +} + +function phpStringOffsetParameterCoerce(value, parameterName) { + if (value === null) { + return 0; + } + + if (typeof value === "number") { + return Number.isFinite(value) ? Math.trunc(value) : value; + } + + if (typeof value === "boolean") { + return value ? 1 : 0; + } + + if (typeof value === "string") { + const trimmed = value.trim(); + if (!/^[+-]?\d+$/.test(trimmed)) { + throw new TypeError(`Argument $${parameterName} must be of type int.`); + } + return Number.parseInt(trimmed, 10); + } + + throw new TypeError(`Argument $${parameterName} must be of type int.`); +} + +function phpStringParameterCoerce(value, parameterName, nullable = false) { + if (value === null) { + if (nullable) { + return null; + } + throw new TypeError(`Argument $${parameterName} must be of type string.`); + } + + if ( + typeof value === "object" || + typeof value === "function" || + typeof value === "symbol" || + typeof value === "undefined" + ) { + throw new TypeError(`Argument $${parameterName} must be of type string.`); + } + + if (typeof value === "boolean") { + return value ? "1" : ""; + } + + if (typeof value === "number") { + return phpNumberToString(value); + } + + return String(value); +} + +function phpInternalStringCoerce(value, parameterName) { + if (value === null) { + return ""; + } + + if ( + typeof value === "object" || + typeof value === "function" || + typeof value === "symbol" || + typeof value === "undefined" + ) { + throw new TypeError(`Argument $${parameterName} must be of type string.`); + } + + if (typeof value === "boolean") { + return value ? "1" : ""; + } + + if (typeof value === "number") { + return phpNumberToString(value); + } + + return String(value); +} + +function phpInterpolatedStringCoerce(value, parameterName) { + if (Array.isArray(value)) { + return "Array"; + } + + if (value === null) { + return ""; + } + + if ( + typeof value === "object" || + typeof value === "function" || + typeof value === "symbol" || + typeof value === "undefined" + ) { + throw new TypeError(`Argument $${parameterName} could not be converted to string.`); + } + + if (typeof value === "boolean") { + return value ? "1" : ""; + } + + if (typeof value === "number") { + return phpNumberToString(value); + } + + return String(value); +} + +function phpNumberToString(value) { + if (Number.isNaN(value)) { + return "NAN"; + } + if (value === Infinity) { + return "INF"; + } + if (value === -Infinity) { + return "-INF"; + } + if (Object.is(value, -0)) { + return "-0"; + } + if (value === 0) { + return "0"; + } + if (Number.isSafeInteger(value)) { + return String(value); + } + return phpFloatToString(value); +} + +const PHP_FLOAT_STRING_PRECISION = 14; + +const cachedPowersOfTen = new Map([[0, 1n]]); + +function powerOfTen(exponent) { + let power = cachedPowersOfTen.get(exponent); + if (power === undefined) { + power = 10n ** BigInt(exponent); + cachedPowersOfTen.set(exponent, power); + } + return power; +} + +function doubleParts(value) { + const buffer = new ArrayBuffer(8); + const view = new DataView(buffer); + view.setFloat64(0, Math.abs(value), false); + + const high = view.getUint32(0, false); + const low = view.getUint32(4, false); + const exponentBits = (high >>> 20) & 0x7ff; + const significandBits = (BigInt(high & 0xfffff) << 32n) | BigInt(low); + + if (exponentBits === 0) { + return { + exponent: -1074, + significand: significandBits, + }; + } + + return { + exponent: exponentBits - 1023 - 52, + significand: (1n << 52n) | significandBits, + }; +} + +function compareDoublePartsToPowerOfTen(parts, decimalExponent) { + let leftNumerator = parts.significand; + let leftDenominator = 1n; + let rightNumerator = 1n; + let rightDenominator = 1n; + + if (parts.exponent >= 0) { + leftNumerator <<= BigInt(parts.exponent); + } else { + leftDenominator <<= BigInt(-parts.exponent); + } + + if (decimalExponent >= 0) { + rightNumerator = powerOfTen(decimalExponent); + } else { + rightDenominator = powerOfTen(-decimalExponent); + } + + const left = leftNumerator * rightDenominator; + const right = rightNumerator * leftDenominator; + + return left < right ? -1 : left > right ? 1 : 0; +} + +function decimalExponentForDouble(value, parts) { + let decimalExponent = Math.floor(Math.log10(Math.abs(value))); + + while (compareDoublePartsToPowerOfTen(parts, decimalExponent) < 0) { + decimalExponent -= 1; + } + while (compareDoublePartsToPowerOfTen(parts, decimalExponent + 1) >= 0) { + decimalExponent += 1; + } + + return decimalExponent; +} + +function roundQuotientToEven(numerator, denominator) { + const quotient = numerator / denominator; + const doubledRemainder = (numerator % denominator) * 2n; + + if ( + doubledRemainder > denominator || + (doubledRemainder === denominator && quotient % 2n === 1n) + ) { + return quotient + 1n; + } + + return quotient; +} + +function roundedFloatSignificand(parts, decimalExponent) { + const scale = PHP_FLOAT_STRING_PRECISION - 1 - decimalExponent; + let numerator = parts.significand; + let denominator = 1n; + + if (parts.exponent >= 0) { + numerator <<= BigInt(parts.exponent); + } else { + denominator <<= BigInt(-parts.exponent); + } + + if (scale >= 0) { + numerator *= powerOfTen(scale); + } else { + denominator *= powerOfTen(-scale); + } + + return roundQuotientToEven(numerator, denominator); +} + +function trimTrailingZeros(value) { + return value.replace(/0+$/, ""); +} + +function formatPhpScientificFloat(sign, significand, decimalExponent) { + const digits = significand.toString().padStart(PHP_FLOAT_STRING_PRECISION, "0"); + const fraction = trimTrailingZeros(digits.slice(1)) || "0"; + const exponentSign = decimalExponent >= 0 ? "+" : ""; + + return `${sign}${digits[0]}.${fraction}E${exponentSign}${decimalExponent}`; +} + +function formatPhpFixedFloat(sign, significand, decimalExponent) { + const digits = significand.toString().padStart(PHP_FLOAT_STRING_PRECISION, "0"); + const decimalPoint = decimalExponent + 1; + let integer; + let fraction; + + if (decimalPoint <= 0) { + integer = "0"; + fraction = `${"0".repeat(-decimalPoint)}${digits}`; + } else if (decimalPoint >= digits.length) { + integer = `${digits}${"0".repeat(decimalPoint - digits.length)}`; + fraction = ""; + } else { + integer = digits.slice(0, decimalPoint); + fraction = digits.slice(decimalPoint); + } + + fraction = trimTrailingZeros(fraction); + + return fraction === "" ? `${sign}${integer}` : `${sign}${integer}.${fraction}`; +} + +function phpFloatToString(value) { + const sign = value < 0 ? "-" : ""; + const parts = doubleParts(value); + let decimalExponent = decimalExponentForDouble(value, parts); + let significand = roundedFloatSignificand(parts, decimalExponent); + const significandLimit = powerOfTen(PHP_FLOAT_STRING_PRECISION); + + if (significand >= significandLimit) { + significand /= 10n; + decimalExponent += 1; + } + + return decimalExponent < -4 || decimalExponent >= PHP_FLOAT_STRING_PRECISION + ? formatPhpScientificFloat(sign, significand, decimalExponent) + : formatPhpFixedFloat(sign, significand, decimalExponent); +} + +function phpBooleanParameterCoerce(value, parameterName) { + if ( + value === null || + typeof value === "object" || + typeof value === "function" || + typeof value === "symbol" || + typeof value === "undefined" + ) { + throw new TypeError(`Argument $${parameterName} must be of type bool.`); + } + + if (typeof value === "string") { + return value !== "" && value !== "0"; + } + + if (typeof value === "number" && Number.isNaN(value)) { + return true; + } + + return Boolean(value); +} + +function phpArrayParameterCoerce(value, parameterName) { + if (!Array.isArray(value)) { + throw new TypeError(`Argument $${parameterName} must be of type array.`); + } + + return [...value]; +} + +function phpTokenParameterCoerce(value, parameterName) { + if (!(value instanceof WP_HTML_Token)) { + throw new TypeError(`Argument $${parameterName} must be of type WP_HTML_Token.`); + } + + return value; +} + +function contextNamespace(nodeName) { + if (nodeName === "SVG") { + return "svg"; + } + if (nodeName === "MATH") { + return "math"; + } + return "html"; +} + +function namespaceForTag(tagName, currentNamespace) { + if (tagName === "SVG") { + return "svg"; + } + if (tagName === "MATH") { + return "math"; + } + + if (currentNamespace === "html") { + return "html"; + } + + return currentNamespace; +} + +function normalizeTagNameForNamespace(tagName, namespaceName) { + if (tagName === null) { + return null; + } + + return namespaceName === "html" && tagName === "IMAGE" ? "IMG" : tagName; +} + +function tokenExpectsCloser(tokenName, namespaceName, hasSelfClosingFlag) { + if (!tokenName || tokenName[0] === "#" || tokenName === "html") { + return false; + } + + if (namespaceName === "html") { + return !VOID_ELEMENTS.has(tokenName) && !SPECIAL_ATOMIC_ELEMENTS.has(tokenName); + } + + return !hasSelfClosingFlag; +} + +function hasSpecialBoundaryAfter(openElements, namespaces, index) { + for (let i = index + 1; i < openElements.length; i += 1) { + if (isSpecialBoundary(openElements[i], namespaces[i])) { + return true; + } + } + return false; +} + +function normalizeSpecialTagInput(tagName) { + if (tagName && typeof tagName === "object") { + const hasNodeName = Object.prototype.hasOwnProperty.call(tagName, "node_name"); + const hasCamelNodeName = Object.prototype.hasOwnProperty.call(tagName, "nodeName"); + const hasTagName = Object.prototype.hasOwnProperty.call(tagName, "tagName"); + const hasNamespace = Object.prototype.hasOwnProperty.call(tagName, "namespace"); + const hasCamelNamespace = Object.prototype.hasOwnProperty.call(tagName, "namespaceName"); + const nodeName = phpInternalStringCoerce( + hasNodeName ? tagName.node_name : hasCamelNodeName ? tagName.nodeName : hasTagName ? tagName.tagName : "", + "node_name", + ); + const namespaceName = phpInternalStringCoerce( + hasNamespace ? tagName.namespace : hasCamelNamespace ? tagName.namespaceName : "", + "namespace", + ); + + if (hasNodeName || hasNamespace) { + return { + nodeName: namespaceName === "html" ? asciiUpper(nodeName) : nodeName, + namespaceName, + }; + } + + return { + nodeName: asciiUpper(nodeName), + namespaceName: asciiLower(namespaceName), + }; + } + + return { + nodeName: asciiUpper(String(tagName)), + namespaceName: "html", + }; +} + +function isSpecialBoundary(nodeName, namespaceName) { + if (namespaceName === "html") { + return END_TAG_SPECIAL_BOUNDARIES.has(nodeName); + } + + if (namespaceName === "math") { + return ["MI", "MO", "MN", "MS", "MTEXT", "ANNOTATION-XML"].includes(nodeName); + } + + if (namespaceName === "svg") { + return ["DESC", "FOREIGNOBJECT", "TITLE"].includes(nodeName); + } + + return false; +} + +function serializeDoctype(doctype) { + if (doctype === null) { + return ""; + } + + let html = "<!DOCTYPE"; + if (doctype.name) { + html += ` ${doctype.name}`; + } + + if (doctype.public_identifier !== null) { + const quote = doctype.public_identifier.includes('"') ? "'" : '"'; + html += ` PUBLIC ${quote}${doctype.public_identifier}${quote}`; + } + + if (doctype.system_identifier !== null) { + if (doctype.public_identifier === null) { + html += " SYSTEM"; + } + const quote = doctype.system_identifier.includes('"') ? "'" : '"'; + html += ` ${quote}${doctype.system_identifier}${quote}`; + } + + return `${html}>`; +} + +function htmlEscape(value) { + return String(value).replace(/[&"'<>]/g, (char) => { + switch (char) { + case "&": + return "&amp;"; + case '"': + return "&quot;"; + case "'": + return "&apos;"; + case "<": + return "&lt;"; + case ">": + return "&gt;"; + default: + return char; + } + }); +} + +function qualifySvgTagName(lowerTagName) { + const adjusted = new Map([ + ["altglyph", "altGlyph"], + ["altglyphdef", "altGlyphDef"], + ["altglyphitem", "altGlyphItem"], + ["animatecolor", "animateColor"], + ["animatemotion", "animateMotion"], + ["animatetransform", "animateTransform"], + ["clippath", "clipPath"], + ["feblend", "feBlend"], + ["fecolormatrix", "feColorMatrix"], + ["fecomponenttransfer", "feComponentTransfer"], + ["fecomposite", "feComposite"], + ["feconvolvematrix", "feConvolveMatrix"], + ["fediffuselighting", "feDiffuseLighting"], + ["fedisplacementmap", "feDisplacementMap"], + ["fedistantlight", "feDistantLight"], + ["fedropshadow", "feDropShadow"], + ["feflood", "feFlood"], + ["fefunca", "feFuncA"], + ["fefuncb", "feFuncB"], + ["fefuncg", "feFuncG"], + ["fefuncr", "feFuncR"], + ["fegaussianblur", "feGaussianBlur"], + ["feimage", "feImage"], + ["femerge", "feMerge"], + ["femergenode", "feMergeNode"], + ["femorphology", "feMorphology"], + ["feoffset", "feOffset"], + ["fepointlight", "fePointLight"], + ["fespecularlighting", "feSpecularLighting"], + ["fespotlight", "feSpotLight"], + ["fetile", "feTile"], + ["feturbulence", "feTurbulence"], + ["foreignobject", "foreignObject"], + ["glyphref", "glyphRef"], + ["lineargradient", "linearGradient"], + ["radialgradient", "radialGradient"], + ["textpath", "textPath"], + ]); + return adjusted.get(lowerTagName) ?? lowerTagName; +} + +function qualifyForeignAttributeName(namespaceName, attributeName) { + const lowerAttributeName = asciiLower(attributeName); + + if (namespaceName === "math" && lowerAttributeName === "definitionurl") { + return "definitionURL"; + } + + if (namespaceName === "svg") { + const adjusted = new Map([ + ["attributename", "attributeName"], + ["attributetype", "attributeType"], + ["basefrequency", "baseFrequency"], + ["baseprofile", "baseProfile"], + ["calcmode", "calcMode"], + ["clippathunits", "clipPathUnits"], + ["diffuseconstant", "diffuseConstant"], + ["edgemode", "edgeMode"], + ["filterunits", "filterUnits"], + ["glyphref", "glyphRef"], + ["gradienttransform", "gradientTransform"], + ["gradientunits", "gradientUnits"], + ["kernelmatrix", "kernelMatrix"], + ["kernelunitlength", "kernelUnitLength"], + ["keypoints", "keyPoints"], + ["keysplines", "keySplines"], + ["keytimes", "keyTimes"], + ["lengthadjust", "lengthAdjust"], + ["limitingconeangle", "limitingConeAngle"], + ["markerheight", "markerHeight"], + ["markerunits", "markerUnits"], + ["markerwidth", "markerWidth"], + ["maskcontentunits", "maskContentUnits"], + ["maskunits", "maskUnits"], + ["numoctaves", "numOctaves"], + ["pathlength", "pathLength"], + ["patterncontentunits", "patternContentUnits"], + ["patterntransform", "patternTransform"], + ["patternunits", "patternUnits"], + ["pointsatx", "pointsAtX"], + ["pointsaty", "pointsAtY"], + ["pointsatz", "pointsAtZ"], + ["preservealpha", "preserveAlpha"], + ["preserveaspectratio", "preserveAspectRatio"], + ["primitiveunits", "primitiveUnits"], + ["refx", "refX"], + ["refy", "refY"], + ["repeatcount", "repeatCount"], + ["repeatdur", "repeatDur"], + ["requiredextensions", "requiredExtensions"], + ["requiredfeatures", "requiredFeatures"], + ["specularconstant", "specularConstant"], + ["specularexponent", "specularExponent"], + ["spreadmethod", "spreadMethod"], + ["startoffset", "startOffset"], + ["stddeviation", "stdDeviation"], + ["stitchtiles", "stitchTiles"], + ["surfacescale", "surfaceScale"], + ["systemlanguage", "systemLanguage"], + ["tablevalues", "tableValues"], + ["targetx", "targetX"], + ["targety", "targetY"], + ["textlength", "textLength"], + ["viewbox", "viewBox"], + ["viewtarget", "viewTarget"], + ["xchannelselector", "xChannelSelector"], + ["ychannelselector", "yChannelSelector"], + ["zoomandpan", "zoomAndPan"], + ]); + const adjustedName = adjusted.get(lowerAttributeName); + if (adjustedName !== undefined) { + return adjustedName; + } + } + + const foreignAdjusted = new Map([ + ["xlink:actuate", "xlink actuate"], + ["xlink:arcrole", "xlink arcrole"], + ["xlink:href", "xlink href"], + ["xlink:role", "xlink role"], + ["xlink:show", "xlink show"], + ["xlink:title", "xlink title"], + ["xlink:type", "xlink type"], + ["xml:lang", "xml lang"], + ["xml:space", "xml space"], + ["xmlns", "xmlns"], + ["xmlns:xlink", "xmlns xlink"], + ]); + return foreignAdjusted.get(lowerAttributeName) ?? attributeName; +} diff --git a/ext/html-api-rust/wasm/wp_html_api_rust_core.wasm.d.ts b/ext/html-api-rust/wasm/wp_html_api_rust_core.wasm.d.ts new file mode 100644 index 0000000000000..0755010db83b1 --- /dev/null +++ b/ext/html-api-rust/wasm/wp_html_api_rust_core.wasm.d.ts @@ -0,0 +1,2 @@ +declare const wasmUrl: string; +export default wasmUrl; diff --git a/ext/html-api-rust/wp_html_api_rust.c b/ext/html-api-rust/wp_html_api_rust.c new file mode 100644 index 0000000000000..8ca215ddd5032 --- /dev/null +++ b/ext/html-api-rust/wp_html_api_rust.c @@ -0,0 +1,2968 @@ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "php.h" +#include "Zend/zend_interfaces.h" +#include "ext/standard/info.h" +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "php_wp_html_api_rust.h" + +typedef struct _wp_html_api_rust_tag_scan { + size_t tag_start; + size_t tag_end; + size_t name_start; + size_t name_len; + bool is_closing; + bool has_self_closing_flag; + size_t token_end; + unsigned char token_type; +} wp_html_api_rust_tag_scan; + +typedef struct _wp_html_api_rust_byte_slice { + const unsigned char *ptr; + size_t len; +} wp_html_api_rust_byte_slice; + +typedef struct _wp_html_tag_processor_object { + void *native; + zend_long seek_count; + zend_object std; +} wp_html_tag_processor_object; + +typedef struct _wp_html_api_rust_text_replacement { + size_t start; + size_t length; + zend_string *text; +} wp_html_api_rust_text_replacement; + +static zend_class_entry *wp_html_tag_processor_ce; +static zend_class_entry *wp_html_processor_ce; +static zend_object_handlers wp_html_tag_processor_handlers; + +extern const char *wp_html_api_rust_core_version(void); +extern bool wp_html_api_rust_scan_next_tag( + const unsigned char *html, + size_t html_len, + size_t offset, + wp_html_api_rust_tag_scan *out +); +extern void *wp_html_api_rust_tag_processor_new(const unsigned char *html, size_t html_len); +extern void wp_html_api_rust_tag_processor_free(void *processor); +extern bool wp_html_api_rust_tag_processor_next_tag( + void *processor, + const unsigned char *query, + size_t query_len, + bool visit_closers +); +extern bool wp_html_api_rust_tag_processor_next_token(void *processor); +extern void wp_html_api_rust_tag_processor_seek(void *processor, size_t offset); +extern void wp_html_api_rust_tag_processor_set_namespace(void *processor, unsigned char namespace_id); +extern bool wp_html_api_rust_tag_processor_apply_lexical_update( + void *processor, + size_t start, + size_t length, + const unsigned char *replacement, + size_t replacement_len +); +extern bool wp_html_api_rust_tag_processor_current_span( + const void *processor, + size_t *start, + size_t *length +); +extern unsigned char wp_html_api_rust_tag_processor_current_token_type(const void *processor); +extern bool wp_html_api_rust_tag_processor_paused_at_incomplete(const void *processor); +extern unsigned char wp_html_api_rust_tag_processor_subdivide_text_appropriately(void *processor); +extern bool wp_html_api_rust_tag_processor_get_modifiable_text( + void *processor, + wp_html_api_rust_byte_slice *out +); +extern bool wp_html_api_rust_tag_processor_set_modifiable_text( + void *processor, + const unsigned char *text, + size_t text_len +); +extern unsigned char wp_html_api_rust_tag_processor_current_comment_type(const void *processor); +extern unsigned char wp_html_api_rust_tag_processor_script_content_type(const void *processor); +extern bool wp_html_api_rust_tag_processor_get_tag( + const void *processor, + wp_html_api_rust_byte_slice *out +); +extern bool wp_html_api_rust_tag_processor_is_tag_closer(const void *processor); +extern bool wp_html_api_rust_tag_processor_has_self_closing_flag(const void *processor); +extern unsigned char wp_html_api_rust_tag_processor_get_attribute( + void *processor, + const unsigned char *name, + size_t name_len, + wp_html_api_rust_byte_slice *out +); +extern unsigned char wp_html_api_rust_tag_processor_get_attribute_names_with_prefix( + void *processor, + const unsigned char *prefix, + size_t prefix_len, + wp_html_api_rust_byte_slice *out +); +extern bool wp_html_api_rust_tag_processor_set_attribute( + void *processor, + const unsigned char *name, + size_t name_len, + const unsigned char *value, + size_t value_len, + unsigned char value_kind +); +extern bool wp_html_api_rust_tag_processor_remove_attribute( + void *processor, + const unsigned char *name, + size_t name_len +); +extern bool wp_html_api_rust_tag_processor_add_class( + void *processor, + const unsigned char *class_name, + size_t class_name_len, + bool quirks_mode +); +extern bool wp_html_api_rust_tag_processor_remove_class( + void *processor, + const unsigned char *class_name, + size_t class_name_len, + bool quirks_mode +); +extern unsigned char wp_html_api_rust_tag_processor_has_class( + void *processor, + const unsigned char *class_name, + size_t class_name_len, + bool quirks_mode +); +extern unsigned char wp_html_api_rust_tag_processor_class_list( + void *processor, + wp_html_api_rust_byte_slice *out, + bool quirks_mode +); +extern bool wp_html_api_rust_tag_processor_get_html( + const void *processor, + wp_html_api_rust_byte_slice *out +); + +PHP_INI_BEGIN() + PHP_INI_ENTRY("wp_html_api_rust.replace_html_api", "0", PHP_INI_SYSTEM, NULL) +PHP_INI_END() + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_wp_html_api_rust_version, 0, 0, IS_STRING, 0) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_wp_html_api_rust_scan_next_tag, 0, 1, MAY_BE_ARRAY | MAY_BE_FALSE) + ZEND_ARG_TYPE_INFO(0, html, IS_STRING, 0) + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, offset, IS_LONG, 0, "0") +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_wp_html_tag_processor_construct, 0, 0, 1) + ZEND_ARG_INFO(0, html) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_wp_html_tag_processor_next_tag, 0, 0, _IS_BOOL, 0) + ZEND_ARG_INFO_WITH_DEFAULT_VALUE(0, query, "null") +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_wp_html_tag_processor_get_tag, 0, 0, IS_STRING, 1) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_wp_html_tag_processor_get_attribute, 0, 0, 1) + ZEND_ARG_INFO(0, name) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_wp_html_tag_processor_get_attribute_names_with_prefix, 0, 1, IS_ARRAY, 1) + ZEND_ARG_INFO(0, prefix) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_wp_html_tag_processor_set_attribute, 0, 2, _IS_BOOL, 0) + ZEND_ARG_INFO(0, name) + ZEND_ARG_INFO(0, value) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_wp_html_tag_processor_remove_attribute, 0, 1, _IS_BOOL, 0) + ZEND_ARG_INFO(0, name) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_wp_html_tag_processor_class_mutation, 0, 1, _IS_BOOL, 0) + ZEND_ARG_INFO(0, class_name) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_wp_html_tag_processor_has_class, 0, 1, _IS_BOOL, 1) + ZEND_ARG_INFO(0, wanted_class) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_wp_html_tag_processor_class_list, 0, 0, 0) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_wp_html_tag_processor_bool, 0, 0, _IS_BOOL, 0) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_wp_html_tag_processor_nullable_string, 0, 0, IS_STRING, 1) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_wp_html_tag_processor_set_modifiable_text, 0, 1, _IS_BOOL, 0) + ZEND_ARG_TYPE_INFO(0, plaintext_content, IS_STRING, 0) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_wp_html_tag_processor_nullable_mixed, 0, 0, 0) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_wp_html_tag_processor_bookmark, 0, 1, _IS_BOOL, 0) + ZEND_ARG_INFO(0, bookmark_name) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_wp_html_tag_processor_change_namespace, 0, 1, _IS_BOOL, 0) + ZEND_ARG_TYPE_INFO(0, new_namespace, IS_STRING, 0) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_wp_html_processor_construct, 0, 0, 1) + ZEND_ARG_INFO(0, html) + ZEND_ARG_INFO_WITH_DEFAULT_VALUE(0, use_the_static_create_methods_instead, "null") +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_wp_html_processor_create_fragment, 0, 0, 1) + ZEND_ARG_INFO(0, html) + ZEND_ARG_INFO_WITH_DEFAULT_VALUE(0, context, "\"<body>\"") + ZEND_ARG_INFO_WITH_DEFAULT_VALUE(0, encoding, "\"UTF-8\"") +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_wp_html_processor_create_full_parser, 0, 0, 1) + ZEND_ARG_INFO(0, html) + ZEND_ARG_INFO_WITH_DEFAULT_VALUE(0, encoding, "\"UTF-8\"") +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_wp_html_tag_processor_get_html, 0, 0, IS_STRING, 0) +ZEND_END_ARG_INFO() + +static inline wp_html_tag_processor_object *wp_html_tag_processor_from_obj(zend_object *obj) +{ + return (wp_html_tag_processor_object *) ((char *) obj - XtOffsetOf(wp_html_tag_processor_object, std)); +} + +#define Z_WP_HTML_TAG_PROCESSOR_P(zv) wp_html_tag_processor_from_obj(Z_OBJ_P((zv))) + +static zend_string *wp_html_api_rust_uppercase_ascii_slice(const unsigned char *ptr, size_t len) +{ + zend_string *string = zend_string_alloc(len, 0); + size_t i; + + for (i = 0; i < len; i++) { + unsigned char byte = ptr[i]; + ZSTR_VAL(string)[i] = (byte >= 'a' && byte <= 'z') ? (char) (byte - 32) : (char) byte; + } + + ZSTR_VAL(string)[len] = '\0'; + return string; +} + +static zend_string *wp_html_api_rust_lowercase_ascii_slice(const unsigned char *ptr, size_t len) +{ + zend_string *string = zend_string_alloc(len, 0); + size_t i; + + for (i = 0; i < len; i++) { + unsigned char byte = ptr[i]; + ZSTR_VAL(string)[i] = (byte >= 'A' && byte <= 'Z') ? (char) (byte + 32) : (char) byte; + } + + ZSTR_VAL(string)[len] = '\0'; + return string; +} + +static bool wp_html_api_rust_zend_string_equals_literal(zend_string *string, const char *literal, size_t literal_len) +{ + return ZSTR_LEN(string) == literal_len && 0 == memcmp(ZSTR_VAL(string), literal, literal_len); +} + +static zend_string *wp_html_api_rust_svg_qualified_tag_name(zend_string *lower_tag_name) +{ +#define WP_HTML_API_RUST_SVG_TAG(adjusted, canonical) \ + if (wp_html_api_rust_zend_string_equals_literal(lower_tag_name, adjusted, sizeof(adjusted) - 1)) { \ + return zend_string_init(canonical, sizeof(canonical) - 1, 0); \ + } + + WP_HTML_API_RUST_SVG_TAG("altglyph", "altGlyph") + WP_HTML_API_RUST_SVG_TAG("altglyphdef", "altGlyphDef") + WP_HTML_API_RUST_SVG_TAG("altglyphitem", "altGlyphItem") + WP_HTML_API_RUST_SVG_TAG("animatecolor", "animateColor") + WP_HTML_API_RUST_SVG_TAG("animatemotion", "animateMotion") + WP_HTML_API_RUST_SVG_TAG("animatetransform", "animateTransform") + WP_HTML_API_RUST_SVG_TAG("clippath", "clipPath") + WP_HTML_API_RUST_SVG_TAG("feblend", "feBlend") + WP_HTML_API_RUST_SVG_TAG("fecolormatrix", "feColorMatrix") + WP_HTML_API_RUST_SVG_TAG("fecomponenttransfer", "feComponentTransfer") + WP_HTML_API_RUST_SVG_TAG("fecomposite", "feComposite") + WP_HTML_API_RUST_SVG_TAG("feconvolvematrix", "feConvolveMatrix") + WP_HTML_API_RUST_SVG_TAG("fediffuselighting", "feDiffuseLighting") + WP_HTML_API_RUST_SVG_TAG("fedisplacementmap", "feDisplacementMap") + WP_HTML_API_RUST_SVG_TAG("fedistantlight", "feDistantLight") + WP_HTML_API_RUST_SVG_TAG("fedropshadow", "feDropShadow") + WP_HTML_API_RUST_SVG_TAG("feflood", "feFlood") + WP_HTML_API_RUST_SVG_TAG("fefunca", "feFuncA") + WP_HTML_API_RUST_SVG_TAG("fefuncb", "feFuncB") + WP_HTML_API_RUST_SVG_TAG("fefuncg", "feFuncG") + WP_HTML_API_RUST_SVG_TAG("fefuncr", "feFuncR") + WP_HTML_API_RUST_SVG_TAG("fegaussianblur", "feGaussianBlur") + WP_HTML_API_RUST_SVG_TAG("feimage", "feImage") + WP_HTML_API_RUST_SVG_TAG("femerge", "feMerge") + WP_HTML_API_RUST_SVG_TAG("femergenode", "feMergeNode") + WP_HTML_API_RUST_SVG_TAG("femorphology", "feMorphology") + WP_HTML_API_RUST_SVG_TAG("feoffset", "feOffset") + WP_HTML_API_RUST_SVG_TAG("fepointlight", "fePointLight") + WP_HTML_API_RUST_SVG_TAG("fespecularlighting", "feSpecularLighting") + WP_HTML_API_RUST_SVG_TAG("fespotlight", "feSpotLight") + WP_HTML_API_RUST_SVG_TAG("fetile", "feTile") + WP_HTML_API_RUST_SVG_TAG("feturbulence", "feTurbulence") + WP_HTML_API_RUST_SVG_TAG("foreignobject", "foreignObject") + WP_HTML_API_RUST_SVG_TAG("glyphref", "glyphRef") + WP_HTML_API_RUST_SVG_TAG("lineargradient", "linearGradient") + WP_HTML_API_RUST_SVG_TAG("radialgradient", "radialGradient") + WP_HTML_API_RUST_SVG_TAG("textpath", "textPath") + +#undef WP_HTML_API_RUST_SVG_TAG + + return zend_string_copy(lower_tag_name); +} + +static zend_string *wp_html_api_rust_svg_qualified_attribute_name(zend_string *lower_attribute_name) +{ +#define WP_HTML_API_RUST_SVG_ATTRIBUTE(adjusted, canonical) \ + if (wp_html_api_rust_zend_string_equals_literal(lower_attribute_name, adjusted, sizeof(adjusted) - 1)) { \ + return zend_string_init(canonical, sizeof(canonical) - 1, 0); \ + } + + WP_HTML_API_RUST_SVG_ATTRIBUTE("attributename", "attributeName") + WP_HTML_API_RUST_SVG_ATTRIBUTE("attributetype", "attributeType") + WP_HTML_API_RUST_SVG_ATTRIBUTE("basefrequency", "baseFrequency") + WP_HTML_API_RUST_SVG_ATTRIBUTE("baseprofile", "baseProfile") + WP_HTML_API_RUST_SVG_ATTRIBUTE("calcmode", "calcMode") + WP_HTML_API_RUST_SVG_ATTRIBUTE("clippathunits", "clipPathUnits") + WP_HTML_API_RUST_SVG_ATTRIBUTE("diffuseconstant", "diffuseConstant") + WP_HTML_API_RUST_SVG_ATTRIBUTE("edgemode", "edgeMode") + WP_HTML_API_RUST_SVG_ATTRIBUTE("filterunits", "filterUnits") + WP_HTML_API_RUST_SVG_ATTRIBUTE("glyphref", "glyphRef") + WP_HTML_API_RUST_SVG_ATTRIBUTE("gradienttransform", "gradientTransform") + WP_HTML_API_RUST_SVG_ATTRIBUTE("gradientunits", "gradientUnits") + WP_HTML_API_RUST_SVG_ATTRIBUTE("kernelmatrix", "kernelMatrix") + WP_HTML_API_RUST_SVG_ATTRIBUTE("kernelunitlength", "kernelUnitLength") + WP_HTML_API_RUST_SVG_ATTRIBUTE("keypoints", "keyPoints") + WP_HTML_API_RUST_SVG_ATTRIBUTE("keysplines", "keySplines") + WP_HTML_API_RUST_SVG_ATTRIBUTE("keytimes", "keyTimes") + WP_HTML_API_RUST_SVG_ATTRIBUTE("lengthadjust", "lengthAdjust") + WP_HTML_API_RUST_SVG_ATTRIBUTE("limitingconeangle", "limitingConeAngle") + WP_HTML_API_RUST_SVG_ATTRIBUTE("markerheight", "markerHeight") + WP_HTML_API_RUST_SVG_ATTRIBUTE("markerunits", "markerUnits") + WP_HTML_API_RUST_SVG_ATTRIBUTE("markerwidth", "markerWidth") + WP_HTML_API_RUST_SVG_ATTRIBUTE("maskcontentunits", "maskContentUnits") + WP_HTML_API_RUST_SVG_ATTRIBUTE("maskunits", "maskUnits") + WP_HTML_API_RUST_SVG_ATTRIBUTE("numoctaves", "numOctaves") + WP_HTML_API_RUST_SVG_ATTRIBUTE("pathlength", "pathLength") + WP_HTML_API_RUST_SVG_ATTRIBUTE("patterncontentunits", "patternContentUnits") + WP_HTML_API_RUST_SVG_ATTRIBUTE("patterntransform", "patternTransform") + WP_HTML_API_RUST_SVG_ATTRIBUTE("patternunits", "patternUnits") + WP_HTML_API_RUST_SVG_ATTRIBUTE("pointsatx", "pointsAtX") + WP_HTML_API_RUST_SVG_ATTRIBUTE("pointsaty", "pointsAtY") + WP_HTML_API_RUST_SVG_ATTRIBUTE("pointsatz", "pointsAtZ") + WP_HTML_API_RUST_SVG_ATTRIBUTE("preservealpha", "preserveAlpha") + WP_HTML_API_RUST_SVG_ATTRIBUTE("preserveaspectratio", "preserveAspectRatio") + WP_HTML_API_RUST_SVG_ATTRIBUTE("primitiveunits", "primitiveUnits") + WP_HTML_API_RUST_SVG_ATTRIBUTE("refx", "refX") + WP_HTML_API_RUST_SVG_ATTRIBUTE("refy", "refY") + WP_HTML_API_RUST_SVG_ATTRIBUTE("repeatcount", "repeatCount") + WP_HTML_API_RUST_SVG_ATTRIBUTE("repeatdur", "repeatDur") + WP_HTML_API_RUST_SVG_ATTRIBUTE("requiredextensions", "requiredExtensions") + WP_HTML_API_RUST_SVG_ATTRIBUTE("requiredfeatures", "requiredFeatures") + WP_HTML_API_RUST_SVG_ATTRIBUTE("specularconstant", "specularConstant") + WP_HTML_API_RUST_SVG_ATTRIBUTE("specularexponent", "specularExponent") + WP_HTML_API_RUST_SVG_ATTRIBUTE("spreadmethod", "spreadMethod") + WP_HTML_API_RUST_SVG_ATTRIBUTE("startoffset", "startOffset") + WP_HTML_API_RUST_SVG_ATTRIBUTE("stddeviation", "stdDeviation") + WP_HTML_API_RUST_SVG_ATTRIBUTE("stitchtiles", "stitchTiles") + WP_HTML_API_RUST_SVG_ATTRIBUTE("surfacescale", "surfaceScale") + WP_HTML_API_RUST_SVG_ATTRIBUTE("systemlanguage", "systemLanguage") + WP_HTML_API_RUST_SVG_ATTRIBUTE("tablevalues", "tableValues") + WP_HTML_API_RUST_SVG_ATTRIBUTE("targetx", "targetX") + WP_HTML_API_RUST_SVG_ATTRIBUTE("targety", "targetY") + WP_HTML_API_RUST_SVG_ATTRIBUTE("textlength", "textLength") + WP_HTML_API_RUST_SVG_ATTRIBUTE("viewbox", "viewBox") + WP_HTML_API_RUST_SVG_ATTRIBUTE("viewtarget", "viewTarget") + WP_HTML_API_RUST_SVG_ATTRIBUTE("xchannelselector", "xChannelSelector") + WP_HTML_API_RUST_SVG_ATTRIBUTE("ychannelselector", "yChannelSelector") + WP_HTML_API_RUST_SVG_ATTRIBUTE("zoomandpan", "zoomAndPan") + +#undef WP_HTML_API_RUST_SVG_ATTRIBUTE + + return NULL; +} + +static zend_string *wp_html_api_rust_foreign_qualified_attribute_name(zend_string *lower_attribute_name) +{ +#define WP_HTML_API_RUST_FOREIGN_ATTRIBUTE(adjusted, canonical) \ + if (wp_html_api_rust_zend_string_equals_literal(lower_attribute_name, adjusted, sizeof(adjusted) - 1)) { \ + return zend_string_init(canonical, sizeof(canonical) - 1, 0); \ + } + + WP_HTML_API_RUST_FOREIGN_ATTRIBUTE("xlink:actuate", "xlink actuate") + WP_HTML_API_RUST_FOREIGN_ATTRIBUTE("xlink:arcrole", "xlink arcrole") + WP_HTML_API_RUST_FOREIGN_ATTRIBUTE("xlink:href", "xlink href") + WP_HTML_API_RUST_FOREIGN_ATTRIBUTE("xlink:role", "xlink role") + WP_HTML_API_RUST_FOREIGN_ATTRIBUTE("xlink:show", "xlink show") + WP_HTML_API_RUST_FOREIGN_ATTRIBUTE("xlink:title", "xlink title") + WP_HTML_API_RUST_FOREIGN_ATTRIBUTE("xlink:type", "xlink type") + WP_HTML_API_RUST_FOREIGN_ATTRIBUTE("xml:lang", "xml lang") + WP_HTML_API_RUST_FOREIGN_ATTRIBUTE("xml:space", "xml space") + WP_HTML_API_RUST_FOREIGN_ATTRIBUTE("xmlns", "xmlns") + WP_HTML_API_RUST_FOREIGN_ATTRIBUTE("xmlns:xlink", "xmlns xlink") + +#undef WP_HTML_API_RUST_FOREIGN_ATTRIBUTE + + return NULL; +} + +static bool wp_html_api_rust_ascii_eq_ci(const unsigned char *left, size_t left_len, const char *right) +{ + size_t i; + size_t right_len = strlen(right); + + if (left_len != right_len) { + return false; + } + + for (i = 0; i < left_len; i++) { + unsigned char left_byte = left[i]; + unsigned char right_byte = (unsigned char) right[i]; + + if (left_byte >= 'a' && left_byte <= 'z') { + left_byte = (unsigned char) (left_byte - 32); + } + + if (right_byte >= 'a' && right_byte <= 'z') { + right_byte = (unsigned char) (right_byte - 32); + } + + if (left_byte != right_byte) { + return false; + } + } + + return true; +} + +static void wp_html_api_rust_doing_it_wrong(const char *function_name, const char *message, const char *version) +{ + zval callable; + zval retval; + zval params[3]; + zend_fcall_info fci; + zend_fcall_info_cache fcc; + + if (!zend_hash_str_exists(CG(function_table), "_doing_it_wrong", sizeof("_doing_it_wrong") - 1)) { + return; + } + + ZVAL_STRING(&callable, "_doing_it_wrong"); + ZVAL_STRING(&params[0], function_name); + ZVAL_STRING(&params[1], message); + ZVAL_STRING(&params[2], version); + + memset(&fci, 0, sizeof(fci)); + memset(&fcc, 0, sizeof(fcc)); + + fci.size = sizeof(fci); + fci.function_name = callable; + fci.retval = &retval; + fci.params = params; + fci.param_count = 3; + + if (SUCCESS == zend_call_function(&fci, &fcc)) { + zval_ptr_dtor(&retval); + } + + zval_ptr_dtor(&params[2]); + zval_ptr_dtor(&params[1]); + zval_ptr_dtor(&params[0]); + zval_ptr_dtor(&callable); +} + +static bool wp_html_api_rust_is_valid_attribute_name(const char *name, size_t name_len) +{ + size_t i; + uint32_t codepoint; + unsigned char byte; + + if (0 == name_len) { + return false; + } + + for (i = 0; i < name_len; i++) { + byte = (unsigned char) name[i]; + + if ( + byte <= 0x1f || + '"' == byte || + '\'' == byte || + '>' == byte || + '&' == byte || + '<' == byte || + '/' == byte || + ' ' == byte || + '=' == byte + ) { + return false; + } + + if (byte < 0x80) { + continue; + } + + codepoint = 0; + if ((byte & 0xe0) == 0xc0 && i + 1 < name_len) { + codepoint = ((uint32_t) (byte & 0x1f) << 6) | + ((uint32_t) ((unsigned char) name[i + 1] & 0x3f)); + i += 1; + } else if ((byte & 0xf0) == 0xe0 && i + 2 < name_len) { + codepoint = ((uint32_t) (byte & 0x0f) << 12) | + ((uint32_t) ((unsigned char) name[i + 1] & 0x3f) << 6) | + ((uint32_t) ((unsigned char) name[i + 2] & 0x3f)); + i += 2; + } else if ((byte & 0xf8) == 0xf0 && i + 3 < name_len) { + codepoint = ((uint32_t) (byte & 0x07) << 18) | + ((uint32_t) ((unsigned char) name[i + 1] & 0x3f) << 12) | + ((uint32_t) ((unsigned char) name[i + 2] & 0x3f) << 6) | + ((uint32_t) ((unsigned char) name[i + 3] & 0x3f)); + i += 3; + } + + if ( + (codepoint >= 0xfdd0 && codepoint <= 0xfdef) || + (codepoint <= 0x10ffff && 0xfffe == (codepoint & 0xfffe)) + ) { + return false; + } + } + + return true; +} + +static void wp_html_tag_processor_update_parser_state(zval *object, const char *state) +{ + zend_update_property_string( + wp_html_tag_processor_ce, + Z_OBJ_P(object), + "parser_state", + sizeof("parser_state") - 1, + state + ); +} + +static void wp_html_tag_processor_update_parser_state_from_native(zval *object, void *native) +{ + const char *state = "STATE_READY"; + + switch (wp_html_api_rust_tag_processor_current_token_type(native)) { + case 1: + state = "STATE_MATCHED_TAG"; + break; + case 2: + state = "STATE_TEXT_NODE"; + break; + case 3: + state = "STATE_COMMENT"; + break; + case 4: + state = "STATE_DOCTYPE"; + break; + case 5: + state = "STATE_CDATA_NODE"; + break; + case 6: + state = "STATE_PRESUMPTUOUS_TAG"; + break; + case 7: + state = "STATE_FUNKY_COMMENT"; + break; + } + + wp_html_tag_processor_update_parser_state(object, state); +} + +static void wp_html_tag_processor_sync_html_property(zval *object, void *native) +{ + wp_html_api_rust_byte_slice html; + + if (NULL == native || !wp_html_api_rust_tag_processor_get_html(native, &html)) { + zend_update_property_string( + wp_html_tag_processor_ce, + Z_OBJ_P(object), + "html", + sizeof("html") - 1, + "" + ); + return; + } + + zend_update_property_stringl( + wp_html_tag_processor_ce, + Z_OBJ_P(object), + "html", + sizeof("html") - 1, + (const char *) html.ptr, + html.len + ); +} + +static zval *wp_html_tag_processor_read_bookmarks(zval *object, zval *rv) +{ + zval *bookmarks = zend_read_property( + wp_html_tag_processor_ce, + Z_OBJ_P(object), + "bookmarks", + sizeof("bookmarks") - 1, + 0, + rv + ); + + if (IS_ARRAY != Z_TYPE_P(bookmarks)) { + zval empty_bookmarks; + + array_init(&empty_bookmarks); + zend_update_property( + wp_html_tag_processor_ce, + Z_OBJ_P(object), + "bookmarks", + sizeof("bookmarks") - 1, + &empty_bookmarks + ); + zval_ptr_dtor(&empty_bookmarks); + + bookmarks = zend_read_property( + wp_html_tag_processor_ce, + Z_OBJ_P(object), + "bookmarks", + sizeof("bookmarks") - 1, + 0, + rv + ); + } + + return bookmarks; +} + +static bool wp_html_tag_processor_read_long_property(zval *object, const char *name, size_t name_len, zend_long *out) +{ + zval rv; + zval *value; + + if (IS_OBJECT != Z_TYPE_P(object)) { + return false; + } + + value = zend_read_property(Z_OBJCE_P(object), Z_OBJ_P(object), name, name_len, 1, &rv); + if (IS_LONG == Z_TYPE_P(value)) { + *out = Z_LVAL_P(value); + return true; + } + + if (IS_DOUBLE == Z_TYPE_P(value)) { + *out = (zend_long) Z_DVAL_P(value); + return true; + } + + return false; +} + +static bool wp_html_tag_processor_read_string_property(zval *object, const char *name, size_t name_len, zend_string **out) +{ + zval rv; + zval *value; + + if (IS_OBJECT != Z_TYPE_P(object)) { + return false; + } + + value = zend_read_property(Z_OBJCE_P(object), Z_OBJ_P(object), name, name_len, 1, &rv); + if (IS_STRING != Z_TYPE_P(value)) { + return false; + } + + *out = Z_STR_P(value); + return true; +} + +static bool wp_html_api_rust_is_html_whitespace(unsigned char byte) +{ + return ' ' == byte || '\t' == byte || '\f' == byte || '\r' == byte || '\n' == byte; +} + +static bool wp_html_tag_processor_parser_state_is(zval *object, const char *state, size_t state_len) +{ + zval rv; + zval *parser_state; + + parser_state = zend_read_property( + wp_html_tag_processor_ce, + Z_OBJ_P(object), + "parser_state", + sizeof("parser_state") - 1, + 1, + &rv + ); + + return ( + IS_STRING == Z_TYPE_P(parser_state) && + state_len == Z_STRLEN_P(parser_state) && + 0 == memcmp(Z_STRVAL_P(parser_state), state, state_len) + ); +} + +static bool wp_html_tag_processor_parser_state_is_terminal(zval *object) +{ + return ( + wp_html_tag_processor_parser_state_is(object, "STATE_COMPLETE", sizeof("STATE_COMPLETE") - 1) || + wp_html_tag_processor_parser_state_is(object, "STATE_INCOMPLETE_INPUT", sizeof("STATE_INCOMPLETE_INPUT") - 1) + ); +} + +static bool wp_html_tag_processor_is_quirks_mode(zval *object) +{ + zval rv; + zval *compat_mode; + + compat_mode = zend_read_property( + wp_html_tag_processor_ce, + Z_OBJ_P(object), + "compat_mode", + sizeof("compat_mode") - 1, + 1, + &rv + ); + + return ( + IS_STRING == Z_TYPE_P(compat_mode) && + sizeof("quirks-mode") - 1 == Z_STRLEN_P(compat_mode) && + 0 == memcmp(Z_STRVAL_P(compat_mode), "quirks-mode", sizeof("quirks-mode") - 1) + ); +} + +static zend_long wp_html_tag_processor_max_bookmarks(zval *object) +{ + zend_string *processor_class_name; + zend_class_entry *processor_ce; + + processor_class_name = zend_string_init("WP_HTML_Processor", sizeof("WP_HTML_Processor") - 1, 0); + processor_ce = zend_lookup_class(processor_class_name); + zend_string_release(processor_class_name); + + if (NULL != processor_ce && instanceof_function(Z_OBJCE_P(object), processor_ce)) { + return 10000; + } + + return 10; +} + +static int wp_html_api_rust_compare_text_replacements(const void *left_ptr, const void *right_ptr) +{ + const wp_html_api_rust_text_replacement *left = (const wp_html_api_rust_text_replacement *) left_ptr; + const wp_html_api_rust_text_replacement *right = (const wp_html_api_rust_text_replacement *) right_ptr; + int by_text; + + if (left->start < right->start) { + return -1; + } + + if (left->start > right->start) { + return 1; + } + + by_text = zend_binary_strcmp( + ZSTR_VAL(left->text), + ZSTR_LEN(left->text), + ZSTR_VAL(right->text), + ZSTR_LEN(right->text) + ); + if (0 != by_text) { + return by_text; + } + + if (left->length < right->length) { + return -1; + } + + if (left->length > right->length) { + return 1; + } + + return 0; +} + +static bool wp_html_tag_processor_apply_lexical_updates(zval *object, void *native) +{ + zval rv; + zval *updates; + zval *update; + zval empty_updates; + wp_html_api_rust_text_replacement *replacements; + uint32_t replacement_count = 0; + uint32_t replacement_index = 0; + zend_long accumulated_shift = 0; + bool applied = true; + + if (NULL == native) { + return false; + } + + updates = zend_read_property( + wp_html_tag_processor_ce, + Z_OBJ_P(object), + "lexical_updates", + sizeof("lexical_updates") - 1, + 1, + &rv + ); + + if (IS_ARRAY != Z_TYPE_P(updates) || 0 == zend_hash_num_elements(Z_ARRVAL_P(updates))) { + return true; + } + + replacements = safe_emalloc(zend_hash_num_elements(Z_ARRVAL_P(updates)), sizeof(wp_html_api_rust_text_replacement), 0); + + ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(updates), update) { + zend_long start; + zend_long length; + zend_string *text; + + if ( + IS_OBJECT != Z_TYPE_P(update) || + !wp_html_tag_processor_read_long_property(update, "start", sizeof("start") - 1, &start) || + !wp_html_tag_processor_read_long_property(update, "length", sizeof("length") - 1, &length) || + !wp_html_tag_processor_read_string_property(update, "text", sizeof("text") - 1, &text) || + start < 0 || + length < 0 + ) { + applied = false; + break; + } + + replacements[replacement_count].start = (size_t) start; + replacements[replacement_count].length = (size_t) length; + replacements[replacement_count].text = zend_string_copy(text); + ++replacement_count; + } ZEND_HASH_FOREACH_END(); + + if (applied && replacement_count > 1) { + qsort( + replacements, + replacement_count, + sizeof(wp_html_api_rust_text_replacement), + wp_html_api_rust_compare_text_replacements + ); + } + + for (replacement_index = 0; applied && replacement_index < replacement_count; ++replacement_index) { + zend_long adjusted_start = (zend_long) replacements[replacement_index].start + accumulated_shift; + zend_long shift = (zend_long) ZSTR_LEN(replacements[replacement_index].text) - (zend_long) replacements[replacement_index].length; + + if ( + adjusted_start < 0 || + !wp_html_api_rust_tag_processor_apply_lexical_update( + native, + (size_t) adjusted_start, + replacements[replacement_index].length, + (const unsigned char *) ZSTR_VAL(replacements[replacement_index].text), + ZSTR_LEN(replacements[replacement_index].text) + ) + ) { + applied = false; + break; + } + + accumulated_shift += shift; + } + + for (replacement_index = 0; replacement_index < replacement_count; ++replacement_index) { + zend_string_release(replacements[replacement_index].text); + } + efree(replacements); + + if (!applied) { + return false; + } + + array_init(&empty_updates); + zend_update_property( + wp_html_tag_processor_ce, + Z_OBJ_P(object), + "lexical_updates", + sizeof("lexical_updates") - 1, + &empty_updates + ); + zval_ptr_dtor(&empty_updates); + + wp_html_tag_processor_sync_html_property(object, native); + return true; +} + +static void wp_html_tag_processor_create_span(zval *span, zend_long start, zend_long length) +{ + zend_string *span_class_name; + zend_class_entry *span_ce; + + span_class_name = zend_string_init("WP_HTML_Span", sizeof("WP_HTML_Span") - 1, 0); + span_ce = zend_lookup_class(span_class_name); + zend_string_release(span_class_name); + + if (NULL != span_ce) { + object_init_ex(span, span_ce); + zend_update_property_long(span_ce, Z_OBJ_P(span), "start", sizeof("start") - 1, start); + zend_update_property_long(span_ce, Z_OBJ_P(span), "length", sizeof("length") - 1, length); + return; + } + + object_init(span); + add_property_long(span, "start", start); + add_property_long(span, "length", length); +} + +static void wp_html_tag_processor_adjust_bookmarks_after_current_token_update( + zval *object, + zend_long old_start, + zend_long old_length, + zend_long new_start, + zend_long new_length +) { + zval rv; + zval *bookmarks; + zval *bookmark; + zend_long delta = new_length - old_length; + + if (old_start != new_start && 0 == delta) { + delta = new_start - old_start; + } + + if (0 == delta && old_length == new_length) { + return; + } + + bookmarks = wp_html_tag_processor_read_bookmarks(object, &rv); + ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(bookmarks), bookmark) { + zend_long start; + zend_long length; + + if ( + IS_OBJECT != Z_TYPE_P(bookmark) || + !wp_html_tag_processor_read_long_property(bookmark, "start", sizeof("start") - 1, &start) || + !wp_html_tag_processor_read_long_property(bookmark, "length", sizeof("length") - 1, &length) + ) { + continue; + } + + if (start == old_start) { + zend_update_property_long( + Z_OBJCE_P(bookmark), + Z_OBJ_P(bookmark), + "length", + sizeof("length") - 1, + new_length + ); + } else if (start > old_start) { + zend_update_property_long( + Z_OBJCE_P(bookmark), + Z_OBJ_P(bookmark), + "start", + sizeof("start") - 1, + start + delta + ); + } + } ZEND_HASH_FOREACH_END(); +} + +static bool wp_html_tag_processor_initialize(zval *object, const char *html, size_t html_len) +{ + wp_html_tag_processor_object *intern = Z_WP_HTML_TAG_PROCESSOR_P(object); + zval bookmarks; + zval lexical_updates; + + if (NULL != intern->native) { + wp_html_api_rust_tag_processor_free(intern->native); + } + + intern->native = wp_html_api_rust_tag_processor_new((const unsigned char *) html, html_len); + if (NULL == intern->native) { + zend_throw_error(NULL, "Failed to initialize WP_HTML_Tag_Processor native state"); + return false; + } + + intern->seek_count = 0; + wp_html_tag_processor_sync_html_property(object, intern->native); + wp_html_tag_processor_update_parser_state(object, "STATE_READY"); + zend_update_property_string( + wp_html_tag_processor_ce, + Z_OBJ_P(object), + "parsing_namespace", + sizeof("parsing_namespace") - 1, + "html" + ); + + array_init(&bookmarks); + zend_update_property( + wp_html_tag_processor_ce, + Z_OBJ_P(object), + "bookmarks", + sizeof("bookmarks") - 1, + &bookmarks + ); + zval_ptr_dtor(&bookmarks); + + array_init(&lexical_updates); + zend_update_property( + wp_html_tag_processor_ce, + Z_OBJ_P(object), + "lexical_updates", + sizeof("lexical_updates") - 1, + &lexical_updates + ); + zval_ptr_dtor(&lexical_updates); + + return true; +} + +static zend_object *wp_html_tag_processor_create_object(zend_class_entry *class_type) +{ + wp_html_tag_processor_object *intern = zend_object_alloc(sizeof(wp_html_tag_processor_object), class_type); + + zend_object_std_init(&intern->std, class_type); + object_properties_init(&intern->std, class_type); + + intern->native = NULL; + intern->seek_count = 0; + intern->std.handlers = &wp_html_tag_processor_handlers; + + return &intern->std; +} + +static void wp_html_tag_processor_free_obj(zend_object *object) +{ + wp_html_tag_processor_object *intern = wp_html_tag_processor_from_obj(object); + + if (NULL != intern->native) { + wp_html_api_rust_tag_processor_free(intern->native); + intern->native = NULL; + } + + zend_object_std_dtor(&intern->std); +} + +PHP_FUNCTION(wp_html_api_rust_version) +{ + ZEND_PARSE_PARAMETERS_NONE(); + + RETURN_STRING(wp_html_api_rust_core_version()); +} + +PHP_FUNCTION(wp_html_api_rust_scan_next_tag) +{ + char *html; + size_t html_len; + zend_long offset = 0; + wp_html_api_rust_tag_scan scan; + zend_string *tag_name; + + ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_STRING(html, html_len) + Z_PARAM_OPTIONAL + Z_PARAM_LONG(offset) + ZEND_PARSE_PARAMETERS_END(); + + if (offset < 0) { + offset = 0; + } + + if (!wp_html_api_rust_scan_next_tag((const unsigned char *) html, html_len, (size_t) offset, &scan)) { + RETURN_FALSE; + } + + tag_name = wp_html_api_rust_uppercase_ascii_slice((const unsigned char *) html + scan.name_start, scan.name_len); + + array_init(return_value); + add_assoc_str(return_value, "tag_name", tag_name); + add_assoc_long(return_value, "tag_start", (zend_long) scan.tag_start); + add_assoc_long(return_value, "tag_end", (zend_long) scan.tag_end); + add_assoc_long(return_value, "name_start", (zend_long) scan.name_start); + add_assoc_long(return_value, "name_length", (zend_long) scan.name_len); + add_assoc_bool(return_value, "is_closing", scan.is_closing); +} + +PHP_METHOD(WP_HTML_Tag_Processor, __construct) +{ + zval *html_param; + const char *html = ""; + size_t html_len = 0; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_ZVAL(html_param) + ZEND_PARSE_PARAMETERS_END(); + + if (IS_STRING == Z_TYPE_P(html_param)) { + html = Z_STRVAL_P(html_param); + html_len = Z_STRLEN_P(html_param); + } else { + wp_html_api_rust_doing_it_wrong( + "WP_HTML_Tag_Processor::__construct", + "The HTML parameter must be a string.", + "6.9.0" + ); + } + + if (!wp_html_tag_processor_initialize(ZEND_THIS, html, html_len)) { + RETURN_THROWS(); + } +} + +PHP_METHOD(WP_HTML_Tag_Processor, next_tag) +{ + zval *query = NULL; + zend_string *query_tag_name = NULL; + zend_string *query_class_name = NULL; + zend_long match_offset = 1; + zend_long found_matches = 0; + bool visit_closers = false; + wp_html_tag_processor_object *intern; + + ZEND_PARSE_PARAMETERS_START(0, 1) + Z_PARAM_OPTIONAL + Z_PARAM_ZVAL(query) + ZEND_PARSE_PARAMETERS_END(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + if (!wp_html_tag_processor_apply_lexical_updates(ZEND_THIS, intern->native)) { + RETURN_FALSE; + } + + if (NULL != query) { + if (IS_STRING == Z_TYPE_P(query)) { + query_tag_name = Z_STR_P(query); + } else if (IS_ARRAY == Z_TYPE_P(query)) { + zval *tag_name = zend_hash_str_find(Z_ARRVAL_P(query), "tag_name", sizeof("tag_name") - 1); + zval *class_name = zend_hash_str_find(Z_ARRVAL_P(query), "class_name", sizeof("class_name") - 1); + zval *query_match_offset = zend_hash_str_find(Z_ARRVAL_P(query), "match_offset", sizeof("match_offset") - 1); + zval *tag_closers = zend_hash_str_find(Z_ARRVAL_P(query), "tag_closers", sizeof("tag_closers") - 1); + + if (NULL != tag_name && IS_STRING == Z_TYPE_P(tag_name)) { + query_tag_name = Z_STR_P(tag_name); + } + + if (NULL != class_name && IS_STRING == Z_TYPE_P(class_name)) { + query_class_name = Z_STR_P(class_name); + } + + if (NULL != query_match_offset && IS_LONG == Z_TYPE_P(query_match_offset) && Z_LVAL_P(query_match_offset) > 0) { + match_offset = Z_LVAL_P(query_match_offset); + } + + if ( + NULL != tag_closers && + IS_STRING == Z_TYPE_P(tag_closers) && + sizeof("visit") - 1 == Z_STRLEN_P(tag_closers) && + 0 == memcmp(Z_STRVAL_P(tag_closers), "visit", sizeof("visit") - 1) + ) { + visit_closers = true; + } + } + } + + while (wp_html_api_rust_tag_processor_next_tag( + intern->native, + NULL == query_tag_name ? NULL : (const unsigned char *) ZSTR_VAL(query_tag_name), + NULL == query_tag_name ? 0 : ZSTR_LEN(query_tag_name), + visit_closers + )) { + if ( + NULL != query_class_name && + 2 != wp_html_api_rust_tag_processor_has_class( + intern->native, + (const unsigned char *) ZSTR_VAL(query_class_name), + ZSTR_LEN(query_class_name), + wp_html_tag_processor_is_quirks_mode(ZEND_THIS) + ) + ) { + continue; + } + + if (++found_matches < match_offset) { + continue; + } + + wp_html_tag_processor_update_parser_state_from_native(ZEND_THIS, intern->native); + RETURN_TRUE; + } + + wp_html_tag_processor_update_parser_state( + ZEND_THIS, + wp_html_api_rust_tag_processor_paused_at_incomplete(intern->native) ? "STATE_INCOMPLETE_INPUT" : "STATE_COMPLETE" + ); + RETURN_FALSE; +} + +PHP_METHOD(WP_HTML_Tag_Processor, next_token) +{ + wp_html_tag_processor_object *intern; + + ZEND_PARSE_PARAMETERS_NONE(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + if (!wp_html_tag_processor_apply_lexical_updates(ZEND_THIS, intern->native)) { + RETURN_FALSE; + } + + if (wp_html_api_rust_tag_processor_next_token(intern->native)) { + wp_html_tag_processor_update_parser_state_from_native(ZEND_THIS, intern->native); + RETURN_TRUE; + } + + wp_html_tag_processor_update_parser_state( + ZEND_THIS, + wp_html_api_rust_tag_processor_paused_at_incomplete(intern->native) ? "STATE_INCOMPLETE_INPUT" : "STATE_COMPLETE" + ); + RETURN_FALSE; +} + +PHP_METHOD(WP_HTML_Tag_Processor, get_tag) +{ + wp_html_tag_processor_object *intern; + wp_html_api_rust_byte_slice tag_name; + + ZEND_PARSE_PARAMETERS_NONE(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + if (!wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_MATCHED_TAG", sizeof("STATE_MATCHED_TAG") - 1)) { + if ( + !wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_COMMENT", sizeof("STATE_COMMENT") - 1) || + 4 != wp_html_api_rust_tag_processor_current_comment_type(intern->native) + ) { + RETURN_NULL(); + } + } + + if (!wp_html_api_rust_tag_processor_get_tag(intern->native, &tag_name)) { + RETURN_NULL(); + } + + if (4 == wp_html_api_rust_tag_processor_current_comment_type(intern->native)) { + RETURN_STRINGL((const char *) tag_name.ptr, tag_name.len); + } + + RETURN_STR(wp_html_api_rust_uppercase_ascii_slice(tag_name.ptr, tag_name.len)); +} + +PHP_METHOD(WP_HTML_Tag_Processor, get_attribute) +{ + char *name; + size_t name_len; + wp_html_tag_processor_object *intern; + wp_html_api_rust_byte_slice value; + unsigned char result; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STRING(name, name_len) + ZEND_PARSE_PARAMETERS_END(); + + if (!wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_MATCHED_TAG", sizeof("STATE_MATCHED_TAG") - 1)) { + RETURN_NULL(); + } + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + result = wp_html_api_rust_tag_processor_get_attribute( + intern->native, + (const unsigned char *) name, + name_len, + &value + ); + + switch (result) { + case 1: + RETURN_TRUE; + case 2: + RETURN_STRINGL((const char *) value.ptr, value.len); + default: + RETURN_NULL(); + } +} + +PHP_METHOD(WP_HTML_Tag_Processor, get_attribute_names_with_prefix) +{ + char *prefix; + size_t prefix_len; + wp_html_tag_processor_object *intern; + wp_html_api_rust_byte_slice names; + size_t start = 0; + size_t i; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STRING(prefix, prefix_len) + ZEND_PARSE_PARAMETERS_END(); + + if (!wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_MATCHED_TAG", sizeof("STATE_MATCHED_TAG") - 1)) { + RETURN_NULL(); + } + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + if (!wp_html_api_rust_tag_processor_get_attribute_names_with_prefix( + intern->native, + (const unsigned char *) prefix, + prefix_len, + &names + )) { + RETURN_NULL(); + } + + array_init(return_value); + if (0 == names.len) { + return; + } + + for (i = 0; i <= names.len; i++) { + if (i == names.len || 0 == names.ptr[i]) { + add_next_index_stringl(return_value, (const char *) names.ptr + start, i - start); + start = i + 1; + } + } +} + +PHP_METHOD(WP_HTML_Tag_Processor, set_attribute) +{ + char *name; + size_t name_len; + zval *value; + zend_string *value_string = NULL; + const unsigned char *value_ptr = NULL; + size_t value_len = 0; + unsigned char value_kind = 2; + bool result; + bool had_span; + size_t old_token_start = 0; + size_t old_token_length = 0; + size_t new_token_start = 0; + size_t new_token_length = 0; + wp_html_tag_processor_object *intern; + + ZEND_PARSE_PARAMETERS_START(2, 2) + Z_PARAM_STRING(name, name_len) + Z_PARAM_ZVAL(value) + ZEND_PARSE_PARAMETERS_END(); + + if (!wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_MATCHED_TAG", sizeof("STATE_MATCHED_TAG") - 1)) { + RETURN_FALSE; + } + + if (!wp_html_api_rust_is_valid_attribute_name(name, name_len)) { + wp_html_api_rust_doing_it_wrong( + "WP_HTML_Tag_Processor::set_attribute", + "Invalid attribute name.", + "6.2.0" + ); + RETURN_FALSE; + } + + if (IS_FALSE == Z_TYPE_P(value)) { + value_kind = 0; + } else if (IS_TRUE == Z_TYPE_P(value)) { + value_kind = 1; + } else { + value_string = zval_get_string(value); + value_ptr = (const unsigned char *) ZSTR_VAL(value_string); + value_len = ZSTR_LEN(value_string); + } + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + if (NULL != value_string) { + zend_string_release(value_string); + } + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + had_span = wp_html_api_rust_tag_processor_current_span(intern->native, &old_token_start, &old_token_length); + result = wp_html_api_rust_tag_processor_set_attribute( + intern->native, + (const unsigned char *) name, + name_len, + value_ptr, + value_len, + value_kind + ); + + if (NULL != value_string) { + zend_string_release(value_string); + } + + if (result) { + if ( + had_span && + wp_html_api_rust_tag_processor_current_span(intern->native, &new_token_start, &new_token_length) + ) { + wp_html_tag_processor_adjust_bookmarks_after_current_token_update( + ZEND_THIS, + (zend_long) old_token_start, + (zend_long) old_token_length, + (zend_long) new_token_start, + (zend_long) new_token_length + ); + } + wp_html_tag_processor_sync_html_property(ZEND_THIS, intern->native); + } + + RETURN_BOOL(result); +} + +PHP_METHOD(WP_HTML_Tag_Processor, remove_attribute) +{ + char *name; + size_t name_len; + wp_html_tag_processor_object *intern; + bool had_span; + size_t old_token_start = 0; + size_t old_token_length = 0; + size_t new_token_start = 0; + size_t new_token_length = 0; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STRING(name, name_len) + ZEND_PARSE_PARAMETERS_END(); + + if (!wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_MATCHED_TAG", sizeof("STATE_MATCHED_TAG") - 1)) { + RETURN_FALSE; + } + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + { + had_span = wp_html_api_rust_tag_processor_current_span(intern->native, &old_token_start, &old_token_length); + bool result = wp_html_api_rust_tag_processor_remove_attribute( + intern->native, + (const unsigned char *) name, + name_len + ); + + if (result) { + if ( + had_span && + wp_html_api_rust_tag_processor_current_span(intern->native, &new_token_start, &new_token_length) + ) { + wp_html_tag_processor_adjust_bookmarks_after_current_token_update( + ZEND_THIS, + (zend_long) old_token_start, + (zend_long) old_token_length, + (zend_long) new_token_start, + (zend_long) new_token_length + ); + } + wp_html_tag_processor_sync_html_property(ZEND_THIS, intern->native); + } + + RETURN_BOOL(result); + } +} + +PHP_METHOD(WP_HTML_Tag_Processor, add_class) +{ + char *class_name; + size_t class_name_len; + wp_html_tag_processor_object *intern; + bool had_span; + size_t old_token_start = 0; + size_t old_token_length = 0; + size_t new_token_start = 0; + size_t new_token_length = 0; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STRING(class_name, class_name_len) + ZEND_PARSE_PARAMETERS_END(); + + if (!wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_MATCHED_TAG", sizeof("STATE_MATCHED_TAG") - 1)) { + RETURN_FALSE; + } + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + { + had_span = wp_html_api_rust_tag_processor_current_span(intern->native, &old_token_start, &old_token_length); + bool result = wp_html_api_rust_tag_processor_add_class( + intern->native, + (const unsigned char *) class_name, + class_name_len, + wp_html_tag_processor_is_quirks_mode(ZEND_THIS) + ); + + if (result) { + if ( + had_span && + wp_html_api_rust_tag_processor_current_span(intern->native, &new_token_start, &new_token_length) + ) { + wp_html_tag_processor_adjust_bookmarks_after_current_token_update( + ZEND_THIS, + (zend_long) old_token_start, + (zend_long) old_token_length, + (zend_long) new_token_start, + (zend_long) new_token_length + ); + } + wp_html_tag_processor_sync_html_property(ZEND_THIS, intern->native); + } + + RETURN_BOOL(result); + } +} + +PHP_METHOD(WP_HTML_Tag_Processor, remove_class) +{ + char *class_name; + size_t class_name_len; + wp_html_tag_processor_object *intern; + bool had_span; + size_t old_token_start = 0; + size_t old_token_length = 0; + size_t new_token_start = 0; + size_t new_token_length = 0; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STRING(class_name, class_name_len) + ZEND_PARSE_PARAMETERS_END(); + + if (!wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_MATCHED_TAG", sizeof("STATE_MATCHED_TAG") - 1)) { + RETURN_FALSE; + } + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + { + had_span = wp_html_api_rust_tag_processor_current_span(intern->native, &old_token_start, &old_token_length); + bool result = wp_html_api_rust_tag_processor_remove_class( + intern->native, + (const unsigned char *) class_name, + class_name_len, + wp_html_tag_processor_is_quirks_mode(ZEND_THIS) + ); + + if (result) { + if ( + had_span && + wp_html_api_rust_tag_processor_current_span(intern->native, &new_token_start, &new_token_length) + ) { + wp_html_tag_processor_adjust_bookmarks_after_current_token_update( + ZEND_THIS, + (zend_long) old_token_start, + (zend_long) old_token_length, + (zend_long) new_token_start, + (zend_long) new_token_length + ); + } + wp_html_tag_processor_sync_html_property(ZEND_THIS, intern->native); + } + + RETURN_BOOL(result); + } +} + +PHP_METHOD(WP_HTML_Tag_Processor, has_class) +{ + char *class_name; + size_t class_name_len; + wp_html_tag_processor_object *intern; + unsigned char result; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STRING(class_name, class_name_len) + ZEND_PARSE_PARAMETERS_END(); + + if (!wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_MATCHED_TAG", sizeof("STATE_MATCHED_TAG") - 1)) { + RETURN_NULL(); + } + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + result = wp_html_api_rust_tag_processor_has_class( + intern->native, + (const unsigned char *) class_name, + class_name_len, + wp_html_tag_processor_is_quirks_mode(ZEND_THIS) + ); + + if (0 == result) { + RETURN_NULL(); + } + + RETURN_BOOL(2 == result); +} + +PHP_METHOD(WP_HTML_Tag_Processor, class_list) +{ + wp_html_tag_processor_object *intern; + wp_html_api_rust_byte_slice classes; + size_t start = 0; + size_t i; + + ZEND_PARSE_PARAMETERS_NONE(); + + if (!wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_MATCHED_TAG", sizeof("STATE_MATCHED_TAG") - 1)) { + RETURN_NULL(); + } + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + if (!wp_html_api_rust_tag_processor_class_list( + intern->native, + &classes, + wp_html_tag_processor_is_quirks_mode(ZEND_THIS) + )) { + RETURN_NULL(); + } + + array_init(return_value); + if (0 == classes.len) { + return; + } + + for (i = 0; i <= classes.len; i++) { + if (i == classes.len || 0x1f == classes.ptr[i]) { + add_next_index_stringl(return_value, (const char *) classes.ptr + start, i - start); + start = i + 1; + } + } +} + +PHP_METHOD(WP_HTML_Tag_Processor, is_tag_closer) +{ + wp_html_tag_processor_object *intern; + + ZEND_PARSE_PARAMETERS_NONE(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + if (!wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_MATCHED_TAG", sizeof("STATE_MATCHED_TAG") - 1)) { + RETURN_FALSE; + } + + RETURN_BOOL(wp_html_api_rust_tag_processor_is_tag_closer(intern->native)); +} + +PHP_METHOD(WP_HTML_Tag_Processor, has_self_closing_flag) +{ + wp_html_tag_processor_object *intern; + + ZEND_PARSE_PARAMETERS_NONE(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + if (!wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_MATCHED_TAG", sizeof("STATE_MATCHED_TAG") - 1)) { + RETURN_FALSE; + } + + RETURN_BOOL(wp_html_api_rust_tag_processor_has_self_closing_flag(intern->native)); +} + +PHP_METHOD(WP_HTML_Tag_Processor, get_token_name) +{ + wp_html_tag_processor_object *intern; + wp_html_api_rust_byte_slice tag_name; + + ZEND_PARSE_PARAMETERS_NONE(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + if (wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_TEXT_NODE", sizeof("STATE_TEXT_NODE") - 1)) { + RETURN_STRING("#text"); + } + + if (wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_COMMENT", sizeof("STATE_COMMENT") - 1)) { + RETURN_STRING("#comment"); + } + + if (wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_DOCTYPE", sizeof("STATE_DOCTYPE") - 1)) { + RETURN_STRING("html"); + } + + if (wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_CDATA_NODE", sizeof("STATE_CDATA_NODE") - 1)) { + RETURN_STRING("#cdata-section"); + } + + if (wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_PRESUMPTUOUS_TAG", sizeof("STATE_PRESUMPTUOUS_TAG") - 1)) { + RETURN_STRING("#presumptuous-tag"); + } + + if (wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_FUNKY_COMMENT", sizeof("STATE_FUNKY_COMMENT") - 1)) { + RETURN_STRING("#funky-comment"); + } + + if (!wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_MATCHED_TAG", sizeof("STATE_MATCHED_TAG") - 1)) { + RETURN_NULL(); + } + + if (!wp_html_api_rust_tag_processor_get_tag(intern->native, &tag_name)) { + RETURN_NULL(); + } + + RETURN_STR(wp_html_api_rust_uppercase_ascii_slice(tag_name.ptr, tag_name.len)); +} + +PHP_METHOD(WP_HTML_Tag_Processor, get_token_type) +{ + wp_html_tag_processor_object *intern; + + ZEND_PARSE_PARAMETERS_NONE(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + if (wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_MATCHED_TAG", sizeof("STATE_MATCHED_TAG") - 1)) { + RETURN_STRING("#tag"); + } + + if (wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_DOCTYPE", sizeof("STATE_DOCTYPE") - 1)) { + RETURN_STRING("#doctype"); + } + + if (wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_TEXT_NODE", sizeof("STATE_TEXT_NODE") - 1)) { + RETURN_STRING("#text"); + } + + if (wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_COMMENT", sizeof("STATE_COMMENT") - 1)) { + RETURN_STRING("#comment"); + } + + if (wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_CDATA_NODE", sizeof("STATE_CDATA_NODE") - 1)) { + RETURN_STRING("#cdata-section"); + } + + if (wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_PRESUMPTUOUS_TAG", sizeof("STATE_PRESUMPTUOUS_TAG") - 1)) { + RETURN_STRING("#presumptuous-tag"); + } + + if (wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_FUNKY_COMMENT", sizeof("STATE_FUNKY_COMMENT") - 1)) { + RETURN_STRING("#funky-comment"); + } + + RETURN_NULL(); +} + +PHP_METHOD(WP_HTML_Tag_Processor, paused_at_incomplete_token) +{ + wp_html_tag_processor_object *intern; + zval rv; + zval *state; + + ZEND_PARSE_PARAMETERS_NONE(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL != intern->native && wp_html_api_rust_tag_processor_paused_at_incomplete(intern->native)) { + RETURN_TRUE; + } + + state = zend_read_property( + wp_html_tag_processor_ce, + Z_OBJ_P(ZEND_THIS), + "parser_state", + sizeof("parser_state") - 1, + 1, + &rv + ); + + RETURN_BOOL( + IS_STRING == Z_TYPE_P(state) && + sizeof("STATE_INCOMPLETE_INPUT") - 1 == Z_STRLEN_P(state) && + 0 == memcmp(Z_STRVAL_P(state), "STATE_INCOMPLETE_INPUT", sizeof("STATE_INCOMPLETE_INPUT") - 1) + ); +} + +PHP_METHOD(WP_HTML_Tag_Processor, subdivide_text_appropriately) +{ + wp_html_tag_processor_object *intern; + zval rv; + zval *parser_state; + unsigned char classification; + + ZEND_PARSE_PARAMETERS_NONE(); + + parser_state = zend_read_property( + wp_html_tag_processor_ce, + Z_OBJ_P(ZEND_THIS), + "parser_state", + sizeof("parser_state") - 1, + 1, + &rv + ); + if ( + IS_STRING != Z_TYPE_P(parser_state) || + sizeof("STATE_TEXT_NODE") - 1 != Z_STRLEN_P(parser_state) || + 0 != memcmp(Z_STRVAL_P(parser_state), "STATE_TEXT_NODE", sizeof("STATE_TEXT_NODE") - 1) + ) { + RETURN_FALSE; + } + + zend_update_property_string( + wp_html_tag_processor_ce, + Z_OBJ_P(ZEND_THIS), + "text_node_classification", + sizeof("text_node_classification") - 1, + "TEXT_IS_GENERIC" + ); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + classification = wp_html_api_rust_tag_processor_subdivide_text_appropriately(intern->native); + if (1 == classification) { + zend_update_property_string( + wp_html_tag_processor_ce, + Z_OBJ_P(ZEND_THIS), + "text_node_classification", + sizeof("text_node_classification") - 1, + "TEXT_IS_NULL_SEQUENCE" + ); + RETURN_TRUE; + } + + if (2 == classification) { + zend_update_property_string( + wp_html_tag_processor_ce, + Z_OBJ_P(ZEND_THIS), + "text_node_classification", + sizeof("text_node_classification") - 1, + "TEXT_IS_WHITESPACE" + ); + RETURN_TRUE; + } + + RETURN_FALSE; +} + +PHP_METHOD(WP_HTML_Tag_Processor, get_modifiable_text) +{ + wp_html_tag_processor_object *intern; + wp_html_api_rust_byte_slice text; + + ZEND_PARSE_PARAMETERS_NONE(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + if (!wp_html_api_rust_tag_processor_get_modifiable_text(intern->native, &text)) { + RETURN_EMPTY_STRING(); + } + + RETURN_STRINGL((const char *) text.ptr, text.len); +} + +PHP_METHOD(WP_HTML_Tag_Processor, native_get_script_content_type) +{ + wp_html_tag_processor_object *intern; + unsigned char content_type; + + ZEND_PARSE_PARAMETERS_NONE(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + content_type = wp_html_api_rust_tag_processor_script_content_type(intern->native); + switch (content_type) { + case 1: + RETURN_STRING("javascript"); + case 2: + RETURN_STRING("json"); + default: + RETURN_NULL(); + } +} + +PHP_METHOD(WP_HTML_Tag_Processor, set_modifiable_text) +{ + char *text; + size_t text_len; + wp_html_tag_processor_object *intern; + zval rv; + zval *parser_state; + bool had_span; + size_t old_token_start = 0; + size_t old_token_length = 0; + size_t new_token_start = 0; + size_t new_token_length = 0; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STRING(text, text_len) + ZEND_PARSE_PARAMETERS_END(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + parser_state = zend_read_property( + wp_html_tag_processor_ce, + Z_OBJ_P(ZEND_THIS), + "parser_state", + sizeof("parser_state") - 1, + 1, + &rv + ); + if ( + IS_STRING == Z_TYPE_P(parser_state) && + ( + ( + sizeof("STATE_COMPLETE") - 1 == Z_STRLEN_P(parser_state) && + 0 == memcmp(Z_STRVAL_P(parser_state), "STATE_COMPLETE", sizeof("STATE_COMPLETE") - 1) + ) || + ( + sizeof("STATE_INCOMPLETE_INPUT") - 1 == Z_STRLEN_P(parser_state) && + 0 == memcmp(Z_STRVAL_P(parser_state), "STATE_INCOMPLETE_INPUT", sizeof("STATE_INCOMPLETE_INPUT") - 1) + ) + ) + ) { + RETURN_FALSE; + } + + had_span = wp_html_api_rust_tag_processor_current_span(intern->native, &old_token_start, &old_token_length); + if ( + wp_html_api_rust_tag_processor_set_modifiable_text( + intern->native, + (const unsigned char *) text, + text_len + ) + ) { + if ( + had_span && + wp_html_api_rust_tag_processor_current_span(intern->native, &new_token_start, &new_token_length) + ) { + wp_html_tag_processor_adjust_bookmarks_after_current_token_update( + ZEND_THIS, + (zend_long) old_token_start, + (zend_long) old_token_length, + (zend_long) new_token_start, + (zend_long) new_token_length + ); + } + wp_html_tag_processor_sync_html_property(ZEND_THIS, intern->native); + RETURN_TRUE; + } + + RETURN_FALSE; +} + +PHP_METHOD(WP_HTML_Tag_Processor, get_comment_type) +{ + wp_html_tag_processor_object *intern; + unsigned char comment_type; + + ZEND_PARSE_PARAMETERS_NONE(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + comment_type = wp_html_api_rust_tag_processor_current_comment_type(intern->native); + switch (comment_type) { + case 1: + RETURN_STRING("COMMENT_AS_ABRUPTLY_CLOSED_COMMENT"); + case 2: + RETURN_STRING("COMMENT_AS_CDATA_LOOKALIKE"); + case 3: + RETURN_STRING("COMMENT_AS_HTML_COMMENT"); + case 4: + RETURN_STRING("COMMENT_AS_PI_NODE_LOOKALIKE"); + case 5: + RETURN_STRING("COMMENT_AS_INVALID_HTML"); + } + + RETURN_NULL(); +} + +PHP_METHOD(WP_HTML_Tag_Processor, get_doctype_info) +{ + wp_html_tag_processor_object *intern; + wp_html_api_rust_byte_slice html; + size_t token_start; + size_t token_length; + zend_string *doctype_class_name; + zend_class_entry *doctype_ce; + zval raw_token; + zval retval; + + ZEND_PARSE_PARAMETERS_NONE(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + if ( + !wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_DOCTYPE", sizeof("STATE_DOCTYPE") - 1) || + 4 != wp_html_api_rust_tag_processor_current_token_type(intern->native) || + !wp_html_api_rust_tag_processor_current_span(intern->native, &token_start, &token_length) || + !wp_html_api_rust_tag_processor_get_html(intern->native, &html) || + token_start > html.len || + token_length > html.len - token_start + ) { + RETURN_NULL(); + } + + doctype_class_name = zend_string_init("WP_HTML_Doctype_Info", sizeof("WP_HTML_Doctype_Info") - 1, 0); + doctype_ce = zend_lookup_class(doctype_class_name); + zend_string_release(doctype_class_name); + + if (NULL == doctype_ce) { + RETURN_NULL(); + } + + ZVAL_STRINGL(&raw_token, (const char *) html.ptr + token_start, token_length); + ZVAL_NULL(&retval); + + if (NULL == zend_call_method_with_1_params(NULL, doctype_ce, NULL, "from_doctype_token", &retval, &raw_token)) { + zval_ptr_dtor(&raw_token); + RETURN_NULL(); + } + + zval_ptr_dtor(&raw_token); + RETURN_ZVAL(&retval, 1, 1); +} + +PHP_METHOD(WP_HTML_Tag_Processor, set_bookmark) +{ + zend_string *bookmark_name; + wp_html_tag_processor_object *intern; + zval rv; + zval *bookmarks; + zval span; + size_t token_start; + size_t token_length; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STR(bookmark_name) + ZEND_PARSE_PARAMETERS_END(); + + if (wp_html_tag_processor_parser_state_is_terminal(ZEND_THIS)) { + RETURN_FALSE; + } + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + if (!wp_html_api_rust_tag_processor_current_span(intern->native, &token_start, &token_length)) { + RETURN_FALSE; + } + + bookmarks = wp_html_tag_processor_read_bookmarks(ZEND_THIS, &rv); + if ( + NULL == zend_symtable_find(Z_ARRVAL_P(bookmarks), bookmark_name) && + zend_hash_num_elements(Z_ARRVAL_P(bookmarks)) >= + (uint32_t) wp_html_tag_processor_max_bookmarks(ZEND_THIS) + ) { + wp_html_api_rust_doing_it_wrong( + "WP_HTML_Tag_Processor::set_bookmark", + "Too many bookmarks: cannot create any more.", + "6.2.0" + ); + RETURN_FALSE; + } + + wp_html_tag_processor_create_span(&span, (zend_long) token_start, (zend_long) token_length); + zend_symtable_update(Z_ARRVAL_P(bookmarks), bookmark_name, &span); + + RETURN_TRUE; +} + +PHP_METHOD(WP_HTML_Tag_Processor, release_bookmark) +{ + zend_string *bookmark_name; + zval rv; + zval *bookmarks; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STR(bookmark_name) + ZEND_PARSE_PARAMETERS_END(); + + bookmarks = wp_html_tag_processor_read_bookmarks(ZEND_THIS, &rv); + RETURN_BOOL(SUCCESS == zend_symtable_del(Z_ARRVAL_P(bookmarks), bookmark_name)); +} + +PHP_METHOD(WP_HTML_Tag_Processor, has_bookmark) +{ + zend_string *bookmark_name; + zval rv; + zval *bookmarks; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STR(bookmark_name) + ZEND_PARSE_PARAMETERS_END(); + + bookmarks = wp_html_tag_processor_read_bookmarks(ZEND_THIS, &rv); + RETURN_BOOL(NULL != zend_symtable_find(Z_ARRVAL_P(bookmarks), bookmark_name)); +} + +PHP_METHOD(WP_HTML_Tag_Processor, seek) +{ + zend_string *bookmark_name; + wp_html_tag_processor_object *intern; + zval rv; + zval *bookmarks; + zval *bookmark; + zend_long bookmark_start; + zend_long bookmark_length; + size_t token_start; + size_t token_length; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STR(bookmark_name) + ZEND_PARSE_PARAMETERS_END(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + if (!wp_html_tag_processor_apply_lexical_updates(ZEND_THIS, intern->native)) { + RETURN_FALSE; + } + + bookmarks = wp_html_tag_processor_read_bookmarks(ZEND_THIS, &rv); + bookmark = zend_symtable_find(Z_ARRVAL_P(bookmarks), bookmark_name); + if (NULL == bookmark) { + wp_html_api_rust_doing_it_wrong( + "WP_HTML_Tag_Processor::seek", + "Unknown bookmark name.", + "6.2.0" + ); + RETURN_FALSE; + } + + if ( + !wp_html_tag_processor_read_long_property(bookmark, "start", sizeof("start") - 1, &bookmark_start) || + !wp_html_tag_processor_read_long_property(bookmark, "length", sizeof("length") - 1, &bookmark_length) + ) { + RETURN_FALSE; + } + + if ( + wp_html_api_rust_tag_processor_current_span(intern->native, &token_start, &token_length) && + token_start == (size_t) bookmark_start && + token_length == (size_t) bookmark_length + ) { + wp_html_tag_processor_update_parser_state_from_native(ZEND_THIS, intern->native); + RETURN_TRUE; + } + + if (++intern->seek_count > 1000) { + wp_html_api_rust_doing_it_wrong( + "WP_HTML_Tag_Processor::seek", + "Too many calls to seek() - this can lead to performance issues.", + "6.2.0" + ); + RETURN_FALSE; + } + + if (0 == bookmark_length) { + wp_html_api_rust_tag_processor_seek(intern->native, (size_t) bookmark_start); + wp_html_tag_processor_update_parser_state(ZEND_THIS, "STATE_READY"); + RETURN_TRUE; + } + + wp_html_api_rust_tag_processor_seek(intern->native, (size_t) bookmark_start); + if (wp_html_api_rust_tag_processor_next_token(intern->native)) { + wp_html_tag_processor_update_parser_state_from_native(ZEND_THIS, intern->native); + RETURN_TRUE; + } + + wp_html_tag_processor_update_parser_state( + ZEND_THIS, + wp_html_api_rust_tag_processor_paused_at_incomplete(intern->native) ? "STATE_INCOMPLETE_INPUT" : "STATE_COMPLETE" + ); + RETURN_FALSE; +} + +PHP_METHOD(WP_HTML_Tag_Processor, change_parsing_namespace) +{ + char *new_namespace; + size_t new_namespace_len; + wp_html_tag_processor_object *intern; + unsigned char namespace_id = 0; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STRING(new_namespace, new_namespace_len) + ZEND_PARSE_PARAMETERS_END(); + + if ( + !( + sizeof("html") - 1 == new_namespace_len && + 0 == memcmp(new_namespace, "html", sizeof("html") - 1) + ) && + !( + sizeof("math") - 1 == new_namespace_len && + 0 == memcmp(new_namespace, "math", sizeof("math") - 1) + ) && + !( + sizeof("svg") - 1 == new_namespace_len && + 0 == memcmp(new_namespace, "svg", sizeof("svg") - 1) + ) + ) { + RETURN_FALSE; + } + + zend_update_property_stringl( + wp_html_tag_processor_ce, + Z_OBJ_P(ZEND_THIS), + "parsing_namespace", + sizeof("parsing_namespace") - 1, + new_namespace, + new_namespace_len + ); + + if (!(sizeof("html") - 1 == new_namespace_len && 0 == memcmp(new_namespace, "html", sizeof("html") - 1))) { + namespace_id = 1; + } + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL != intern->native) { + wp_html_api_rust_tag_processor_set_namespace(intern->native, namespace_id); + } + + RETURN_TRUE; +} + +PHP_METHOD(WP_HTML_Tag_Processor, get_namespace) +{ + zval rv; + zval *namespace_value; + + ZEND_PARSE_PARAMETERS_NONE(); + + namespace_value = zend_read_property( + wp_html_tag_processor_ce, + Z_OBJ_P(ZEND_THIS), + "parsing_namespace", + sizeof("parsing_namespace") - 1, + 1, + &rv + ); + + if (IS_STRING == Z_TYPE_P(namespace_value)) { + RETURN_STR_COPY(Z_STR_P(namespace_value)); + } + + RETURN_STRING("html"); +} + +PHP_METHOD(WP_HTML_Tag_Processor, get_qualified_tag_name) +{ + zval tag_name; + zval namespace_name; + zend_string *lower_tag_name; + zend_string *qualified_tag_name; + + ZEND_PARSE_PARAMETERS_NONE(); + + ZVAL_NULL(&tag_name); + ZVAL_NULL(&namespace_name); + + if (NULL == zend_call_method_with_0_params(Z_OBJ_P(ZEND_THIS), Z_OBJCE_P(ZEND_THIS), NULL, "get_tag", &tag_name)) { + RETURN_NULL(); + } + + if (EG(exception)) { + zval_ptr_dtor(&tag_name); + RETURN_THROWS(); + } + + if (IS_STRING != Z_TYPE(tag_name)) { + zval_ptr_dtor(&tag_name); + RETURN_NULL(); + } + + if (NULL == zend_call_method_with_0_params(Z_OBJ_P(ZEND_THIS), Z_OBJCE_P(ZEND_THIS), NULL, "get_namespace", &namespace_name)) { + zval_ptr_dtor(&tag_name); + RETURN_NULL(); + } + + if (EG(exception)) { + zval_ptr_dtor(&namespace_name); + zval_ptr_dtor(&tag_name); + RETURN_THROWS(); + } + + if (IS_STRING != Z_TYPE(namespace_name)) { + zval_ptr_dtor(&namespace_name); + zval_ptr_dtor(&tag_name); + RETURN_NULL(); + } + + if (wp_html_api_rust_zend_string_equals_literal(Z_STR(namespace_name), "html", sizeof("html") - 1)) { + RETVAL_STR_COPY(Z_STR(tag_name)); + zval_ptr_dtor(&namespace_name); + zval_ptr_dtor(&tag_name); + return; + } + + lower_tag_name = wp_html_api_rust_lowercase_ascii_slice( + (const unsigned char *) Z_STRVAL(tag_name), + Z_STRLEN(tag_name) + ); + zval_ptr_dtor(&tag_name); + + if (wp_html_api_rust_zend_string_equals_literal(Z_STR(namespace_name), "svg", sizeof("svg") - 1)) { + qualified_tag_name = wp_html_api_rust_svg_qualified_tag_name(lower_tag_name); + zend_string_release(lower_tag_name); + zval_ptr_dtor(&namespace_name); + RETURN_STR(qualified_tag_name); + } + + zval_ptr_dtor(&namespace_name); + RETURN_STR(lower_tag_name); +} + +PHP_METHOD(WP_HTML_Tag_Processor, get_qualified_attribute_name) +{ + char *attribute_name; + size_t attribute_name_len; + zval namespace_name; + zend_string *lower_attribute_name; + zend_string *qualified_attribute_name = NULL; + + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STRING(attribute_name, attribute_name_len) + ZEND_PARSE_PARAMETERS_END(); + + ZVAL_NULL(&namespace_name); + if (NULL == zend_call_method_with_0_params(Z_OBJ_P(ZEND_THIS), Z_OBJCE_P(ZEND_THIS), NULL, "get_namespace", &namespace_name)) { + RETURN_STRINGL(attribute_name, attribute_name_len); + } + + if (EG(exception)) { + zval_ptr_dtor(&namespace_name); + RETURN_THROWS(); + } + + if ( + IS_STRING != Z_TYPE(namespace_name) || + wp_html_api_rust_zend_string_equals_literal(Z_STR(namespace_name), "html", sizeof("html") - 1) + ) { + zval_ptr_dtor(&namespace_name); + RETURN_STRINGL(attribute_name, attribute_name_len); + } + + lower_attribute_name = wp_html_api_rust_lowercase_ascii_slice((const unsigned char *) attribute_name, attribute_name_len); + + if ( + wp_html_api_rust_zend_string_equals_literal(Z_STR(namespace_name), "math", sizeof("math") - 1) && + wp_html_api_rust_zend_string_equals_literal(lower_attribute_name, "definitionurl", sizeof("definitionurl") - 1) + ) { + qualified_attribute_name = zend_string_init("definitionURL", sizeof("definitionURL") - 1, 0); + } else if (wp_html_api_rust_zend_string_equals_literal(Z_STR(namespace_name), "svg", sizeof("svg") - 1)) { + qualified_attribute_name = wp_html_api_rust_svg_qualified_attribute_name(lower_attribute_name); + } + + if (NULL == qualified_attribute_name) { + qualified_attribute_name = wp_html_api_rust_foreign_qualified_attribute_name(lower_attribute_name); + } + + zend_string_release(lower_attribute_name); + zval_ptr_dtor(&namespace_name); + + if (NULL != qualified_attribute_name) { + RETURN_STR(qualified_attribute_name); + } + + RETURN_STRINGL(attribute_name, attribute_name_len); +} + +PHP_METHOD(WP_HTML_Tag_Processor, get_full_comment_text) +{ + wp_html_tag_processor_object *intern; + wp_html_api_rust_byte_slice text; + wp_html_api_rust_byte_slice tag_name; + wp_html_api_rust_byte_slice html; + size_t token_start; + size_t token_length; + unsigned char comment_type; + zend_string *comment_text; + size_t offset; + bool starts_with_question_mark = false; + + ZEND_PARSE_PARAMETERS_NONE(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + if (wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_FUNKY_COMMENT", sizeof("STATE_FUNKY_COMMENT") - 1)) { + if (wp_html_api_rust_tag_processor_get_modifiable_text(intern->native, &text)) { + RETURN_STRINGL((const char *) text.ptr, text.len); + } + + RETURN_NULL(); + } + + if (!wp_html_tag_processor_parser_state_is(ZEND_THIS, "STATE_COMMENT", sizeof("STATE_COMMENT") - 1)) { + RETURN_NULL(); + } + + if (!wp_html_api_rust_tag_processor_get_modifiable_text(intern->native, &text)) { + RETURN_NULL(); + } + + comment_type = wp_html_api_rust_tag_processor_current_comment_type(intern->native); + switch (comment_type) { + case 1: + case 3: + RETURN_STRINGL((const char *) text.ptr, text.len); + + case 2: + comment_text = zend_string_alloc(sizeof("[CDATA[") - 1 + text.len + sizeof("]]") - 1, 0); + memcpy(ZSTR_VAL(comment_text), "[CDATA[", sizeof("[CDATA[") - 1); + memcpy(ZSTR_VAL(comment_text) + sizeof("[CDATA[") - 1, text.ptr, text.len); + memcpy(ZSTR_VAL(comment_text) + sizeof("[CDATA[") - 1 + text.len, "]]", sizeof("]]") - 1); + ZSTR_VAL(comment_text)[ZSTR_LEN(comment_text)] = '\0'; + RETURN_STR(comment_text); + + case 4: + if (!wp_html_api_rust_tag_processor_get_tag(intern->native, &tag_name)) { + RETURN_NULL(); + } + + comment_text = zend_string_alloc(1 + tag_name.len + text.len + 1, 0); + offset = 0; + ZSTR_VAL(comment_text)[offset++] = '?'; + memcpy(ZSTR_VAL(comment_text) + offset, tag_name.ptr, tag_name.len); + offset += tag_name.len; + memcpy(ZSTR_VAL(comment_text) + offset, text.ptr, text.len); + offset += text.len; + ZSTR_VAL(comment_text)[offset++] = '?'; + ZSTR_VAL(comment_text)[offset] = '\0'; + RETURN_STR(comment_text); + + case 5: + if ( + wp_html_api_rust_tag_processor_current_span(intern->native, &token_start, &token_length) && + wp_html_api_rust_tag_processor_get_html(intern->native, &html) && + token_start < html.len && + html.len - token_start > 1 && + '?' == html.ptr[token_start + 1] + ) { + starts_with_question_mark = true; + } + + if (!starts_with_question_mark) { + RETURN_STRINGL((const char *) text.ptr, text.len); + } + + comment_text = zend_string_alloc(1 + text.len, 0); + ZSTR_VAL(comment_text)[0] = '?'; + memcpy(ZSTR_VAL(comment_text) + 1, text.ptr, text.len); + ZSTR_VAL(comment_text)[ZSTR_LEN(comment_text)] = '\0'; + RETURN_STR(comment_text); + } + + RETURN_NULL(); +} + +PHP_METHOD(WP_HTML_Tag_Processor, get_updated_html) +{ + wp_html_tag_processor_object *intern; + wp_html_api_rust_byte_slice html; + + ZEND_PARSE_PARAMETERS_NONE(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + if (!wp_html_tag_processor_apply_lexical_updates(ZEND_THIS, intern->native)) { + RETURN_EMPTY_STRING(); + } + + if (!wp_html_api_rust_tag_processor_get_html(intern->native, &html)) { + RETURN_EMPTY_STRING(); + } + + wp_html_tag_processor_sync_html_property(ZEND_THIS, intern->native); + RETURN_STRINGL((const char *) html.ptr, html.len); +} + +PHP_METHOD(WP_HTML_Tag_Processor, __toString) +{ + wp_html_tag_processor_object *intern; + wp_html_api_rust_byte_slice html; + + ZEND_PARSE_PARAMETERS_NONE(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native) { + zend_throw_error(NULL, "WP_HTML_Tag_Processor is not initialized"); + RETURN_THROWS(); + } + + if (!wp_html_tag_processor_apply_lexical_updates(ZEND_THIS, intern->native)) { + RETURN_EMPTY_STRING(); + } + + if (!wp_html_api_rust_tag_processor_get_html(intern->native, &html)) { + RETURN_EMPTY_STRING(); + } + + RETURN_STRINGL((const char *) html.ptr, html.len); +} + +PHP_METHOD(WP_HTML_Processor, __construct) +{ + zval *html_param; + zval *unlock_param = NULL; + const char *html = ""; + size_t html_len = 0; + + ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_ZVAL(html_param) + Z_PARAM_OPTIONAL + Z_PARAM_ZVAL(unlock_param) + ZEND_PARSE_PARAMETERS_END(); + + if (IS_STRING == Z_TYPE_P(html_param)) { + html = Z_STRVAL_P(html_param); + html_len = Z_STRLEN_P(html_param); + } else { + wp_html_api_rust_doing_it_wrong( + "WP_HTML_Processor::__construct", + "The HTML parameter must be a string.", + "6.9.0" + ); + } + + if (!wp_html_tag_processor_initialize(ZEND_THIS, html, html_len)) { + RETURN_THROWS(); + } +} + +static void wp_html_processor_create_initialized(INTERNAL_FUNCTION_PARAMETERS, const char *html, size_t html_len) +{ + zend_class_entry *called_scope = zend_get_called_scope(execute_data); + + if (NULL == called_scope || !instanceof_function(called_scope, wp_html_processor_ce)) { + called_scope = wp_html_processor_ce; + } + + object_init_ex(return_value, called_scope); + if (!wp_html_tag_processor_initialize(return_value, html, html_len)) { + zval_ptr_dtor(return_value); + RETURN_THROWS(); + } +} + +PHP_METHOD(WP_HTML_Processor, create_fragment) +{ + zval *html_param; + zval *context_param = NULL; + zval *encoding_param = NULL; + const char *html; + size_t html_len; + + ZEND_PARSE_PARAMETERS_START(1, 3) + Z_PARAM_ZVAL(html_param) + Z_PARAM_OPTIONAL + Z_PARAM_ZVAL(context_param) + Z_PARAM_ZVAL(encoding_param) + ZEND_PARSE_PARAMETERS_END(); + + if (IS_STRING != Z_TYPE_P(html_param)) { + wp_html_api_rust_doing_it_wrong( + "WP_HTML_Processor::create_fragment", + "The HTML parameter must be a string.", + "6.9.0" + ); + RETURN_NULL(); + } + + if ( + NULL != context_param && + !( + IS_STRING == Z_TYPE_P(context_param) && + sizeof("<body>") - 1 == Z_STRLEN_P(context_param) && + 0 == memcmp(Z_STRVAL_P(context_param), "<body>", sizeof("<body>") - 1) + ) + ) { + RETURN_NULL(); + } + + if ( + NULL != encoding_param && + !( + IS_STRING == Z_TYPE_P(encoding_param) && + sizeof("UTF-8") - 1 == Z_STRLEN_P(encoding_param) && + 0 == memcmp(Z_STRVAL_P(encoding_param), "UTF-8", sizeof("UTF-8") - 1) + ) + ) { + RETURN_NULL(); + } + + html = Z_STRVAL_P(html_param); + html_len = Z_STRLEN_P(html_param); + wp_html_processor_create_initialized(INTERNAL_FUNCTION_PARAM_PASSTHRU, html, html_len); +} + +PHP_METHOD(WP_HTML_Processor, create_full_parser) +{ + zval *html_param; + zval *encoding_param = NULL; + const char *html; + size_t html_len; + + ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_ZVAL(html_param) + Z_PARAM_OPTIONAL + Z_PARAM_ZVAL(encoding_param) + ZEND_PARSE_PARAMETERS_END(); + + if (IS_STRING != Z_TYPE_P(html_param)) { + wp_html_api_rust_doing_it_wrong( + "WP_HTML_Processor::create_full_parser", + "The HTML parameter must be a string.", + "6.9.0" + ); + RETURN_NULL(); + } + + if ( + NULL != encoding_param && + !( + IS_STRING == Z_TYPE_P(encoding_param) && + sizeof("UTF-8") - 1 == Z_STRLEN_P(encoding_param) && + 0 == memcmp(Z_STRVAL_P(encoding_param), "UTF-8", sizeof("UTF-8") - 1) + ) + ) { + RETURN_NULL(); + } + + html = Z_STRVAL_P(html_param); + html_len = Z_STRLEN_P(html_param); + wp_html_processor_create_initialized(INTERNAL_FUNCTION_PARAM_PASSTHRU, html, html_len); +} + +PHP_METHOD(WP_HTML_Processor, get_last_error) +{ + ZEND_PARSE_PARAMETERS_NONE(); + + RETURN_NULL(); +} + +PHP_METHOD(WP_HTML_Processor, get_unsupported_exception) +{ + ZEND_PARSE_PARAMETERS_NONE(); + + RETURN_NULL(); +} + +PHP_METHOD(WP_HTML_Processor, is_virtual) +{ + ZEND_PARSE_PARAMETERS_NONE(); + + RETURN_FALSE; +} + +PHP_METHOD(WP_HTML_Processor, expects_closer) +{ + wp_html_tag_processor_object *intern; + wp_html_api_rust_byte_slice tag_name; + zval *node = NULL; + + ZEND_PARSE_PARAMETERS_START(0, 1) + Z_PARAM_OPTIONAL + Z_PARAM_ZVAL(node) + ZEND_PARSE_PARAMETERS_END(); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL == intern->native || !wp_html_api_rust_tag_processor_get_tag(intern->native, &tag_name)) { + RETURN_NULL(); + } + + if (wp_html_api_rust_tag_processor_is_tag_closer(intern->native)) { + RETURN_FALSE; + } + + if ( + wp_html_api_rust_ascii_eq_ci(tag_name.ptr, tag_name.len, "AREA") || + wp_html_api_rust_ascii_eq_ci(tag_name.ptr, tag_name.len, "BASE") || + wp_html_api_rust_ascii_eq_ci(tag_name.ptr, tag_name.len, "BR") || + wp_html_api_rust_ascii_eq_ci(tag_name.ptr, tag_name.len, "COL") || + wp_html_api_rust_ascii_eq_ci(tag_name.ptr, tag_name.len, "EMBED") || + wp_html_api_rust_ascii_eq_ci(tag_name.ptr, tag_name.len, "HR") || + wp_html_api_rust_ascii_eq_ci(tag_name.ptr, tag_name.len, "IMG") || + wp_html_api_rust_ascii_eq_ci(tag_name.ptr, tag_name.len, "INPUT") || + wp_html_api_rust_ascii_eq_ci(tag_name.ptr, tag_name.len, "LINK") || + wp_html_api_rust_ascii_eq_ci(tag_name.ptr, tag_name.len, "META") || + wp_html_api_rust_ascii_eq_ci(tag_name.ptr, tag_name.len, "PARAM") || + wp_html_api_rust_ascii_eq_ci(tag_name.ptr, tag_name.len, "SOURCE") || + wp_html_api_rust_ascii_eq_ci(tag_name.ptr, tag_name.len, "TRACK") || + wp_html_api_rust_ascii_eq_ci(tag_name.ptr, tag_name.len, "WBR") + ) { + RETURN_FALSE; + } + + RETURN_TRUE; +} + +PHP_METHOD(WP_HTML_Processor, get_breadcrumbs) +{ + wp_html_tag_processor_object *intern; + wp_html_api_rust_byte_slice tag_name; + + ZEND_PARSE_PARAMETERS_NONE(); + + array_init(return_value); + add_next_index_string(return_value, "HTML"); + add_next_index_string(return_value, "BODY"); + + intern = Z_WP_HTML_TAG_PROCESSOR_P(ZEND_THIS); + if (NULL != intern->native && wp_html_api_rust_tag_processor_get_tag(intern->native, &tag_name)) { + zend_string *upper = wp_html_api_rust_uppercase_ascii_slice(tag_name.ptr, tag_name.len); + add_next_index_str(return_value, upper); + } +} + +static const zend_function_entry wp_html_api_rust_functions[] = { + PHP_FE(wp_html_api_rust_version, arginfo_wp_html_api_rust_version) + PHP_FE(wp_html_api_rust_scan_next_tag, arginfo_wp_html_api_rust_scan_next_tag) + PHP_FE_END +}; + +static const zend_function_entry wp_html_tag_processor_methods[] = { + PHP_ME(WP_HTML_Tag_Processor, __construct, arginfo_wp_html_tag_processor_construct, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, next_tag, arginfo_wp_html_tag_processor_next_tag, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, next_token, arginfo_wp_html_tag_processor_bool, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, get_tag, arginfo_wp_html_tag_processor_get_tag, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, get_attribute, arginfo_wp_html_tag_processor_get_attribute, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, get_attribute_names_with_prefix, arginfo_wp_html_tag_processor_get_attribute_names_with_prefix, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, set_attribute, arginfo_wp_html_tag_processor_set_attribute, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, remove_attribute, arginfo_wp_html_tag_processor_remove_attribute, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, add_class, arginfo_wp_html_tag_processor_class_mutation, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, remove_class, arginfo_wp_html_tag_processor_class_mutation, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, has_class, arginfo_wp_html_tag_processor_has_class, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, class_list, arginfo_wp_html_tag_processor_class_list, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, is_tag_closer, arginfo_wp_html_tag_processor_bool, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, has_self_closing_flag, arginfo_wp_html_tag_processor_bool, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, get_token_name, arginfo_wp_html_tag_processor_nullable_string, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, get_token_type, arginfo_wp_html_tag_processor_nullable_string, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, paused_at_incomplete_token, arginfo_wp_html_tag_processor_bool, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, subdivide_text_appropriately, arginfo_wp_html_tag_processor_bool, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, get_modifiable_text, arginfo_wp_html_tag_processor_nullable_string, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, native_get_script_content_type, arginfo_wp_html_tag_processor_nullable_string, ZEND_ACC_PROTECTED) + PHP_ME(WP_HTML_Tag_Processor, set_modifiable_text, arginfo_wp_html_tag_processor_set_modifiable_text, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, get_comment_type, arginfo_wp_html_tag_processor_nullable_string, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, get_doctype_info, arginfo_wp_html_tag_processor_nullable_mixed, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, set_bookmark, arginfo_wp_html_tag_processor_bookmark, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, release_bookmark, arginfo_wp_html_tag_processor_bookmark, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, has_bookmark, arginfo_wp_html_tag_processor_bookmark, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, seek, arginfo_wp_html_tag_processor_bookmark, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, change_parsing_namespace, arginfo_wp_html_tag_processor_change_namespace, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, get_namespace, arginfo_wp_html_tag_processor_nullable_string, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, get_qualified_tag_name, arginfo_wp_html_tag_processor_nullable_string, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, get_qualified_attribute_name, arginfo_wp_html_tag_processor_get_attribute, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, get_full_comment_text, arginfo_wp_html_tag_processor_nullable_string, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, get_updated_html, arginfo_wp_html_tag_processor_get_html, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Tag_Processor, __toString, arginfo_wp_html_tag_processor_get_html, ZEND_ACC_PUBLIC) + PHP_FE_END +}; + +static const zend_function_entry wp_html_processor_methods[] = { + PHP_ME(WP_HTML_Processor, __construct, arginfo_wp_html_processor_construct, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Processor, create_fragment, arginfo_wp_html_processor_create_fragment, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC) + PHP_ME(WP_HTML_Processor, create_full_parser, arginfo_wp_html_processor_create_full_parser, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC) + PHP_ME(WP_HTML_Processor, get_last_error, arginfo_wp_html_tag_processor_nullable_string, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Processor, get_unsupported_exception, arginfo_wp_html_tag_processor_nullable_mixed, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Processor, is_virtual, arginfo_wp_html_tag_processor_bool, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Processor, expects_closer, arginfo_wp_html_tag_processor_nullable_mixed, ZEND_ACC_PUBLIC) + PHP_ME(WP_HTML_Processor, get_breadcrumbs, arginfo_wp_html_tag_processor_class_list, ZEND_ACC_PUBLIC) + PHP_FE_END +}; + +static void wp_html_api_rust_register_tag_processor_class(void) +{ + zend_class_entry ce; + + INIT_CLASS_ENTRY(ce, "WP_HTML_Tag_Processor_Native", wp_html_tag_processor_methods); + wp_html_tag_processor_ce = zend_register_internal_class(&ce); + wp_html_tag_processor_ce->create_object = wp_html_tag_processor_create_object; + + zend_declare_class_constant_long(wp_html_tag_processor_ce, "MAX_BOOKMARKS", sizeof("MAX_BOOKMARKS") - 1, 10); + zend_declare_class_constant_long(wp_html_tag_processor_ce, "MAX_SEEK_OPS", sizeof("MAX_SEEK_OPS") - 1, 1000); + zend_declare_class_constant_bool(wp_html_tag_processor_ce, "ADD_CLASS", sizeof("ADD_CLASS") - 1, true); + zend_declare_class_constant_bool(wp_html_tag_processor_ce, "REMOVE_CLASS", sizeof("REMOVE_CLASS") - 1, false); + zend_declare_class_constant_null(wp_html_tag_processor_ce, "SKIP_CLASS", sizeof("SKIP_CLASS") - 1); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "STATE_READY", sizeof("STATE_READY") - 1, "STATE_READY"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "STATE_COMPLETE", sizeof("STATE_COMPLETE") - 1, "STATE_COMPLETE"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "STATE_INCOMPLETE_INPUT", sizeof("STATE_INCOMPLETE_INPUT") - 1, "STATE_INCOMPLETE_INPUT"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "STATE_MATCHED_TAG", sizeof("STATE_MATCHED_TAG") - 1, "STATE_MATCHED_TAG"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "STATE_TEXT_NODE", sizeof("STATE_TEXT_NODE") - 1, "STATE_TEXT_NODE"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "STATE_CDATA_NODE", sizeof("STATE_CDATA_NODE") - 1, "STATE_CDATA_NODE"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "STATE_COMMENT", sizeof("STATE_COMMENT") - 1, "STATE_COMMENT"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "STATE_DOCTYPE", sizeof("STATE_DOCTYPE") - 1, "STATE_DOCTYPE"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "STATE_PRESUMPTUOUS_TAG", sizeof("STATE_PRESUMPTUOUS_TAG") - 1, "STATE_PRESUMPTUOUS_TAG"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "STATE_FUNKY_COMMENT", sizeof("STATE_FUNKY_COMMENT") - 1, "STATE_WP_FUNKY"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "COMMENT_AS_ABRUPTLY_CLOSED_COMMENT", sizeof("COMMENT_AS_ABRUPTLY_CLOSED_COMMENT") - 1, "COMMENT_AS_ABRUPTLY_CLOSED_COMMENT"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "COMMENT_AS_CDATA_LOOKALIKE", sizeof("COMMENT_AS_CDATA_LOOKALIKE") - 1, "COMMENT_AS_CDATA_LOOKALIKE"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "COMMENT_AS_HTML_COMMENT", sizeof("COMMENT_AS_HTML_COMMENT") - 1, "COMMENT_AS_HTML_COMMENT"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "COMMENT_AS_PI_NODE_LOOKALIKE", sizeof("COMMENT_AS_PI_NODE_LOOKALIKE") - 1, "COMMENT_AS_PI_NODE_LOOKALIKE"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "COMMENT_AS_INVALID_HTML", sizeof("COMMENT_AS_INVALID_HTML") - 1, "COMMENT_AS_INVALID_HTML"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "NO_QUIRKS_MODE", sizeof("NO_QUIRKS_MODE") - 1, "no-quirks-mode"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "QUIRKS_MODE", sizeof("QUIRKS_MODE") - 1, "quirks-mode"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "TEXT_IS_GENERIC", sizeof("TEXT_IS_GENERIC") - 1, "TEXT_IS_GENERIC"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "TEXT_IS_NULL_SEQUENCE", sizeof("TEXT_IS_NULL_SEQUENCE") - 1, "TEXT_IS_NULL_SEQUENCE"); + zend_declare_class_constant_string(wp_html_tag_processor_ce, "TEXT_IS_WHITESPACE", sizeof("TEXT_IS_WHITESPACE") - 1, "TEXT_IS_WHITESPACE"); + + zend_declare_property_null(wp_html_tag_processor_ce, "html", sizeof("html") - 1, ZEND_ACC_PROTECTED); + zend_declare_property_string(wp_html_tag_processor_ce, "parser_state", sizeof("parser_state") - 1, "STATE_READY", ZEND_ACC_PROTECTED); + zend_declare_property_string(wp_html_tag_processor_ce, "compat_mode", sizeof("compat_mode") - 1, "no-quirks-mode", ZEND_ACC_PROTECTED); + zend_declare_property_string(wp_html_tag_processor_ce, "parsing_namespace", sizeof("parsing_namespace") - 1, "html", ZEND_ACC_PRIVATE); + zend_declare_property_null(wp_html_tag_processor_ce, "comment_type", sizeof("comment_type") - 1, ZEND_ACC_PROTECTED); + zend_declare_property_string(wp_html_tag_processor_ce, "text_node_classification", sizeof("text_node_classification") - 1, "TEXT_IS_GENERIC", ZEND_ACC_PROTECTED); + zend_declare_property_null(wp_html_tag_processor_ce, "bookmarks", sizeof("bookmarks") - 1, ZEND_ACC_PROTECTED); + zend_declare_property_null(wp_html_tag_processor_ce, "lexical_updates", sizeof("lexical_updates") - 1, ZEND_ACC_PROTECTED); + + memcpy(&wp_html_tag_processor_handlers, zend_get_std_object_handlers(), sizeof(zend_object_handlers)); + wp_html_tag_processor_handlers.offset = XtOffsetOf(wp_html_tag_processor_object, std); + wp_html_tag_processor_handlers.free_obj = wp_html_tag_processor_free_obj; +} + +static void wp_html_api_rust_register_processor_class(void) +{ + zend_class_entry ce; + + INIT_CLASS_ENTRY(ce, "WP_HTML_Processor", wp_html_processor_methods); + wp_html_processor_ce = zend_register_internal_class_ex(&ce, wp_html_tag_processor_ce); + wp_html_processor_ce->create_object = wp_html_tag_processor_create_object; +} + +PHP_MINIT_FUNCTION(wp_html_api_rust) +{ + REGISTER_INI_ENTRIES(); + + if (INI_BOOL("wp_html_api_rust.replace_html_api")) { + wp_html_api_rust_register_tag_processor_class(); + } + + return SUCCESS; +} + +PHP_MSHUTDOWN_FUNCTION(wp_html_api_rust) +{ + UNREGISTER_INI_ENTRIES(); + + return SUCCESS; +} + +PHP_MINFO_FUNCTION(wp_html_api_rust) +{ + php_info_print_table_start(); + php_info_print_table_header(2, "wp_html_api_rust support", "enabled"); + php_info_print_table_row(2, "extension version", PHP_WP_HTML_API_RUST_VERSION); + php_info_print_table_row(2, "rust core version", wp_html_api_rust_core_version()); + php_info_print_table_row(2, "replace HTML API classes", INI_BOOL("wp_html_api_rust.replace_html_api") ? "enabled" : "disabled"); + php_info_print_table_end(); +} + +zend_module_entry wp_html_api_rust_module_entry = { + STANDARD_MODULE_HEADER, + "wp_html_api_rust", + wp_html_api_rust_functions, + PHP_MINIT(wp_html_api_rust), + PHP_MSHUTDOWN(wp_html_api_rust), + NULL, + NULL, + PHP_MINFO(wp_html_api_rust), + PHP_WP_HTML_API_RUST_VERSION, + STANDARD_MODULE_PROPERTIES +}; + +#ifdef COMPILE_DL_WP_HTML_API_RUST +# ifdef ZTS +ZEND_TSRMLS_CACHE_DEFINE() +# endif +ZEND_GET_MODULE(wp_html_api_rust) +#endif diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 35d91fad3129c..d4e62f6238b5d 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -7,6 +7,10 @@ * @since 6.4.0 */ +if ( class_exists( 'WP_HTML_Processor', false ) ) { + return; +} + /** * Core class used to safely parse and modify an HTML document. * @@ -5298,6 +5302,25 @@ public function has_self_closing_flag(): bool { return $this->is_virtual() ? false : parent::has_self_closing_flag(); } + /** + * Sets the modifiable text for the matched token, if matched. + * + * @since 6.9.0 Subclassed for the HTML Processor. + * + * @param string $plaintext_content New text content to represent in the matched token. + * @return bool Whether the text was able to update. + */ + public function set_modifiable_text( string $plaintext_content ): bool { + if ( + self::STATE_MATCHED_TAG === $this->parser_state && + 'html' !== $this->get_namespace() + ) { + return false; + } + + return parent::set_modifiable_text( $plaintext_content ); + } + /** * Returns the node name represented by the token. * @@ -5319,8 +5342,12 @@ public function has_self_closing_flag(): bool { * @return string|null Name of the matched token. */ public function get_token_name(): ?string { - return $this->is_virtual() - ? $this->current_element->token->node_name + if ( $this->is_virtual() ) { + return $this->current_element->token->node_name; + } + + return '#tag' === parent::get_token_type() + ? $this->get_tag() : parent::get_token_name(); } @@ -5617,7 +5644,7 @@ public function seek( $bookmark_name ): bool { $actual_bookmark_name = "_{$bookmark_name}"; $processor_started_at = $this->state->current_token ? $this->bookmarks[ $this->state->current_token->bookmark_name ]->start - : 0; + : ( WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state ? strlen( $this->html ) : 0 ); $bookmark_starts_at = $this->bookmarks[ $actual_bookmark_name ]->start; $direction = $bookmark_starts_at > $processor_started_at ? 'forward' : 'backward'; @@ -5730,7 +5757,7 @@ public function seek( $bookmark_name ): bool { * The processor will stop on virtual tokens, but bookmarks may not be set on them. * They should not be matched when seeking a bookmark, skip them. */ - if ( $this->is_virtual() ) { + if ( ! isset( $this->state->current_token ) || $this->is_virtual() ) { continue; } if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) { diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 77c1a471db5b1..e78d87264a78e 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -21,6 +21,24 @@ * @since 6.2.0 */ +if ( class_exists( 'WP_HTML_Tag_Processor_Native', false ) ) { + if ( class_exists( 'WP_HTML_Tag_Processor', false ) ) { + return; + } + + class WP_HTML_Tag_Processor extends WP_HTML_Tag_Processor_Native { + private function get_script_content_type(): ?string { + return $this->native_get_script_content_type(); + } + } + + return; +} + +if ( class_exists( 'WP_HTML_Tag_Processor', false ) ) { + return; +} + /** * Core class used to modify attributes in an HTML document for tags matching a query. *