From 5d120586a5e18c8b457f258d308b8f5b7f474a5c Mon Sep 17 00:00:00 2001 From: Nikita Skovoroda Date: Tue, 23 Dec 2025 06:21:00 +0400 Subject: [PATCH 1/7] flatter mappingTable --- index.js | 6 +++--- scripts/generateMappingTable.js | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/index.js b/index.js index 9e53f05..b6f3ccd 100644 --- a/index.js +++ b/index.js @@ -17,11 +17,11 @@ function findStatus(val) { const mid = Math.floor((start + end) / 2); const target = mappingTable[mid]; - const min = Array.isArray(target[0]) ? target[0][0] : target[0]; - const max = Array.isArray(target[0]) ? target[0][1] : target[0]; + const min = target[0]; + const max = min + target[1]; if (min <= val && max >= val) { - return target.slice(1); + return target.slice(2); } else if (min > val) { end = mid - 1; } else { diff --git a/scripts/generateMappingTable.js b/scripts/generateMappingTable.js index b986072..a5e51d9 100644 --- a/scripts/generateMappingTable.js +++ b/scripts/generateMappingTable.js @@ -32,12 +32,11 @@ async function main() { const range = cells[0].split(".."); const start = parseInt(range[0], 16); const end = parseInt(range[1] || range[0], 16); - cells[0] = end === start ? start : [start, end]; - + cells[0] = [start, end - start]; cells[1] = STATUS_MAPPING[cells[1]]; if (cells[1] === STATUS_MAPPING.valid) { - lines.push(cells.slice(0, 2)); + lines.push(cells.slice(0, 2).flat()); return; } @@ -55,7 +54,7 @@ async function main() { cells[2] = String.fromCodePoint(...replacement); } - lines.push(cells); + lines.push(cells.flat()); }); // We could drop valid chars, but those are only ~1000 ranges and From 92b36c4d8b1a0cfdb5df3247d7227054121d4bd6 Mon Sep 17 00:00:00 2001 From: Nikita Skovoroda Date: Tue, 23 Dec 2025 06:47:29 +0400 Subject: [PATCH 2/7] delta-code starts --- index.js | 8 ++++++++ scripts/generateMappingTable.js | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/index.js b/index.js index b6f3ccd..86a9f3a 100644 --- a/index.js +++ b/index.js @@ -13,6 +13,14 @@ function findStatus(val) { let start = 0; let end = mappingTable.length - 1; + // Unpack delta-coding in place once + if (mappingTable[2][0] < 10) { + let current = 0; + for (const row of mappingTable) { + row[0] = current += row[0]; + } + } + while (start <= end) { const mid = Math.floor((start + end) / 2); diff --git a/scripts/generateMappingTable.js b/scripts/generateMappingTable.js index a5e51d9..dd158e9 100644 --- a/scripts/generateMappingTable.js +++ b/scripts/generateMappingTable.js @@ -60,5 +60,12 @@ async function main() { // We could drop valid chars, but those are only ~1000 ranges and // binary search is way to quick to even notice that + // Delta-code starts + let last = 0; + for (const line of lines) { + line[0] -= last; + last += line[0]; + } + fs.writeFileSync(path.resolve(__dirname, "../lib/mappingTable.json"), JSON.stringify(lines)); } From 6367f61f7898a4fe56b01c09091fe4a4e215edcd Mon Sep 17 00:00:00 2001 From: Nikita Skovoroda Date: Tue, 23 Dec 2025 06:56:00 +0400 Subject: [PATCH 3/7] flat mapping table --- index.js | 28 +++++++++++++++++++--------- scripts/generateMappingTable.js | 7 +++++-- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/index.js b/index.js index 86a9f3a..b530dc8 100644 --- a/index.js +++ b/index.js @@ -2,25 +2,35 @@ const punycode = require("punycode/"); const regexes = require("./lib/regexes.js"); -const mappingTable = require("./lib/mappingTable.json"); +const mappingTableRaw = require("./lib/mappingTable.json"); const { STATUS_MAPPING } = require("./lib/statusMapping.js"); function containsNonASCII(str) { return /[^\x00-\x7F]/u.test(str); } +let mappingTable; + +function unpackMappingTable() { + if (mappingTable) { + return; + } + + mappingTable = []; + let current = 0; + while (mappingTableRaw.length > 0) { + const row = mappingTableRaw.splice(0, 4); // Destroying the original, for mem + row[0] = current += row[0]; + mappingTable.push(row); + } +} + function findStatus(val) { + unpackMappingTable(); + let start = 0; let end = mappingTable.length - 1; - // Unpack delta-coding in place once - if (mappingTable[2][0] < 10) { - let current = 0; - for (const row of mappingTable) { - row[0] = current += row[0]; - } - } - while (start <= end) { const mid = Math.floor((start + end) / 2); diff --git a/scripts/generateMappingTable.js b/scripts/generateMappingTable.js index dd158e9..ebbb5e2 100644 --- a/scripts/generateMappingTable.js +++ b/scripts/generateMappingTable.js @@ -36,7 +36,8 @@ async function main() { cells[1] = STATUS_MAPPING[cells[1]]; if (cells[1] === STATUS_MAPPING.valid) { - lines.push(cells.slice(0, 2).flat()); + cells[2] = 0; + lines.push(cells.slice(0, 3).flat()); return; } @@ -52,6 +53,8 @@ async function main() { }); cells[2] = String.fromCodePoint(...replacement); + } else { + cells[2] = 0; } lines.push(cells.flat()); @@ -67,5 +70,5 @@ async function main() { last += line[0]; } - fs.writeFileSync(path.resolve(__dirname, "../lib/mappingTable.json"), JSON.stringify(lines)); + fs.writeFileSync(path.resolve(__dirname, "../lib/mappingTable.json"), JSON.stringify(lines.flat())); } From 8e3814e9631214bea7fd1da76b82039f46cd9f5e Mon Sep 17 00:00:00 2001 From: Nikita Skovoroda Date: Tue, 23 Dec 2025 07:10:09 +0400 Subject: [PATCH 4/7] compact entries --- index.js | 12 +++++++----- scripts/generateMappingTable.js | 7 +++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/index.js b/index.js index b530dc8..1923313 100644 --- a/index.js +++ b/index.js @@ -19,7 +19,9 @@ function unpackMappingTable() { mappingTable = []; let current = 0; while (mappingTableRaw.length > 0) { - const row = mappingTableRaw.splice(0, 4); // Destroying the original, for mem + const status = mappingTableRaw[2]; + const rowSize = status === STATUS_MAPPING.mapped || status === STATUS_MAPPING.deviation ? 4 : 3; + const row = mappingTableRaw.splice(0, rowSize); // Destroying the original, for mem row[0] = current += row[0]; mappingTable.push(row); } @@ -54,9 +56,9 @@ function mapChars(domainName, { transitionalProcessing }) { let processed = ""; for (const ch of domainName) { - const [status, mapping] = findStatus(ch.codePointAt(0)); + const row = findStatus(ch.codePointAt(0)); // [status, mapping] - switch (status) { + switch (row[0]) { case STATUS_MAPPING.disallowed: processed += ch; break; @@ -66,12 +68,12 @@ function mapChars(domainName, { transitionalProcessing }) { if (transitionalProcessing && ch === "ẞ") { processed += "ss"; } else { - processed += mapping; + processed += row[1]; } break; case STATUS_MAPPING.deviation: if (transitionalProcessing) { - processed += mapping; + processed += row[1]; } else { processed += ch; } diff --git a/scripts/generateMappingTable.js b/scripts/generateMappingTable.js index ebbb5e2..727da85 100644 --- a/scripts/generateMappingTable.js +++ b/scripts/generateMappingTable.js @@ -35,9 +35,8 @@ async function main() { cells[0] = [start, end - start]; cells[1] = STATUS_MAPPING[cells[1]]; - if (cells[1] === STATUS_MAPPING.valid) { - cells[2] = 0; - lines.push(cells.slice(0, 3).flat()); + if (cells[1] !== STATUS_MAPPING.mapped && cells[1] !== STATUS_MAPPING.deviation) { + lines.push(cells.slice(0, 2).flat()); return; } @@ -54,7 +53,7 @@ async function main() { cells[2] = String.fromCodePoint(...replacement); } else { - cells[2] = 0; + throw new Error("Unexpected"); } lines.push(cells.flat()); From a52c1f8c63b3efb09406714d485f65b6a2cb70b0 Mon Sep 17 00:00:00 2001 From: Nikita Skovoroda Date: Tue, 23 Dec 2025 07:48:13 +0400 Subject: [PATCH 5/7] Split mapping and ranges --- index.js | 31 ++++++++++++++++++++----------- scripts/generateMappingTable.js | 22 ++++++++++++---------- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/index.js b/index.js index 1923313..498db86 100644 --- a/index.js +++ b/index.js @@ -2,28 +2,37 @@ const punycode = require("punycode/"); const regexes = require("./lib/regexes.js"); -const mappingTableRaw = require("./lib/mappingTable.json"); +const tablesRaw = require("./lib/mappingTable.json"); const { STATUS_MAPPING } = require("./lib/statusMapping.js"); function containsNonASCII(str) { return /[^\x00-\x7F]/u.test(str); } -let mappingTable; +let rangesTable, + mappingTable; function unpackMappingTable() { if (mappingTable) { return; } - mappingTable = []; + // Destroying the originals, for mem + + rangesTable = []; let current = 0; - while (mappingTableRaw.length > 0) { - const status = mappingTableRaw[2]; - const rowSize = status === STATUS_MAPPING.mapped || status === STATUS_MAPPING.deviation ? 4 : 3; - const row = mappingTableRaw.splice(0, rowSize); // Destroying the original, for mem + const [rangesRaw, mappingRaw] = tablesRaw; + while (rangesRaw.length > 0) { + const row = rangesRaw.splice(0, 2); row[0] = current += row[0]; - mappingTable.push(row); + rangesTable.push(row); + } + + mappingTable = []; + while (mappingRaw.length > 0) { + const status = mappingRaw[0]; + const rowSize = status === STATUS_MAPPING.mapped || status === STATUS_MAPPING.deviation ? 2 : 1; + mappingTable.push(mappingRaw.splice(0, rowSize)); } } @@ -31,17 +40,17 @@ function findStatus(val) { unpackMappingTable(); let start = 0; - let end = mappingTable.length - 1; + let end = rangesTable.length - 1; while (start <= end) { const mid = Math.floor((start + end) / 2); - const target = mappingTable[mid]; + const target = rangesTable[mid]; const min = target[0]; const max = min + target[1]; if (min <= val && max >= val) { - return target.slice(2); + return mappingTable[mid]; } else if (min > val) { end = mid - 1; } else { diff --git a/scripts/generateMappingTable.js b/scripts/generateMappingTable.js index 727da85..07e4a54 100644 --- a/scripts/generateMappingTable.js +++ b/scripts/generateMappingTable.js @@ -17,6 +17,7 @@ async function main() { } const body = await response.text(); + const ranges = []; const lines = []; body.split("\n").forEach(l => { @@ -33,16 +34,17 @@ async function main() { const start = parseInt(range[0], 16); const end = parseInt(range[1] || range[0], 16); cells[0] = [start, end - start]; - cells[1] = STATUS_MAPPING[cells[1]]; + ranges.push(cells.shift()); - if (cells[1] !== STATUS_MAPPING.mapped && cells[1] !== STATUS_MAPPING.deviation) { - lines.push(cells.slice(0, 2).flat()); + cells[0] = STATUS_MAPPING[cells[0]]; + if (cells[0] !== STATUS_MAPPING.mapped && cells[0] !== STATUS_MAPPING.deviation) { + lines.push(cells[0]); return; } - if (cells[2] !== undefined) { + if (cells[1] !== undefined) { // Parse replacement to int[] array - let replacement = cells[2].split(" "); + let replacement = cells[1].split(" "); if (replacement[0] === "") { // Empty array replacement = []; } @@ -51,7 +53,7 @@ async function main() { return parseInt(r, 16); }); - cells[2] = String.fromCodePoint(...replacement); + cells[1] = String.fromCodePoint(...replacement); } else { throw new Error("Unexpected"); } @@ -64,10 +66,10 @@ async function main() { // Delta-code starts let last = 0; - for (const line of lines) { - line[0] -= last; - last += line[0]; + for (const range of ranges) { + range[0] -= last; + last += range[0]; } - fs.writeFileSync(path.resolve(__dirname, "../lib/mappingTable.json"), JSON.stringify(lines.flat())); + fs.writeFileSync(path.resolve(__dirname, "../lib/mappingTable.json"), JSON.stringify([ranges.flat(), lines.flat()])); } From 029b465529c069a15322e56caf24d3c46ca07402 Mon Sep 17 00:00:00 2001 From: Nikita Skovoroda Date: Tue, 23 Dec 2025 08:07:51 +0400 Subject: [PATCH 6/7] Condense repeats of [1, 0] --- index.js | 13 ++++++++++--- scripts/generateMappingTable.js | 24 +++++++++++++++++++++++- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/index.js b/index.js index 498db86..b6239bc 100644 --- a/index.js +++ b/index.js @@ -23,9 +23,16 @@ function unpackMappingTable() { let current = 0; const [rangesRaw, mappingRaw] = tablesRaw; while (rangesRaw.length > 0) { - const row = rangesRaw.splice(0, 2); - row[0] = current += row[0]; - rangesTable.push(row); + if (rangesRaw[0] < 0) { + const repeats = -rangesRaw.shift(); // Treat as this many repeats of [1, 0] + for (let i = 0; i < repeats; i++) { + rangesTable.push([++current, 0]); + } + } else { + const row = rangesRaw.splice(0, 2); + row[0] = current += row[0]; + rangesTable.push(row); + } } mappingTable = []; diff --git a/scripts/generateMappingTable.js b/scripts/generateMappingTable.js index 07e4a54..01e30d3 100644 --- a/scripts/generateMappingTable.js +++ b/scripts/generateMappingTable.js @@ -71,5 +71,27 @@ async function main() { last += range[0]; } - fs.writeFileSync(path.resolve(__dirname, "../lib/mappingTable.json"), JSON.stringify([ranges.flat(), lines.flat()])); + // Condense repeats of N consecutive [1, 0] in ranges to -N + const rangesCondensed = []; + let repeats = 0; + for (const row of ranges) { + if (row[0] === 1 && row[1] === 0) { + repeats++; + continue; + } + + if (repeats > 0) { + rangesCondensed.push(-repeats); + repeats = 0; + } + + rangesCondensed.push(row); + } + + if (repeats > 0) { + rangesCondensed.push(-repeats); + } + + const tablesRaw = [rangesCondensed.flat(), lines.flat()]; + fs.writeFileSync(path.resolve(__dirname, "../lib/mappingTable.json"), JSON.stringify(tablesRaw)); } From 7b63a8bcb24579dd3238e549d571d61ccae100de Mon Sep 17 00:00:00 2001 From: Nikita Skovoroda Date: Tue, 23 Dec 2025 08:36:40 +0400 Subject: [PATCH 7/7] Split statuses and mappings, condense statuses --- index.js | 19 ++++++++++---- scripts/generateMappingTable.js | 46 ++++++++++++++++++++++++--------- 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/index.js b/index.js index b6239bc..389977d 100644 --- a/index.js +++ b/index.js @@ -21,7 +21,7 @@ function unpackMappingTable() { rangesTable = []; let current = 0; - const [rangesRaw, mappingRaw] = tablesRaw; + const [rangesRaw, statusesRaw, mappingRaw] = tablesRaw; while (rangesRaw.length > 0) { if (rangesRaw[0] < 0) { const repeats = -rangesRaw.shift(); // Treat as this many repeats of [1, 0] @@ -36,11 +36,20 @@ function unpackMappingTable() { } mappingTable = []; - while (mappingRaw.length > 0) { - const status = mappingRaw[0]; - const rowSize = status === STATUS_MAPPING.mapped || status === STATUS_MAPPING.deviation ? 2 : 1; - mappingTable.push(mappingRaw.splice(0, rowSize)); + for (const status of statusesRaw) { + if (status < 0) { + // Threat this as many repeats of STATUS_MAPPING.mapped + for (let i = 0; i < -status; i++) { + mappingTable.push([STATUS_MAPPING.mapped, mappingRaw.shift()]); + } + } else if (status === STATUS_MAPPING.mapped || status === STATUS_MAPPING.deviation) { + mappingTable.push([status, mappingRaw.shift()]); + } else { + mappingTable.push([status]); + } } + + statusesRaw.length = 0; // Destroy for mem } function findStatus(val) { diff --git a/scripts/generateMappingTable.js b/scripts/generateMappingTable.js index 01e30d3..b32abbc 100644 --- a/scripts/generateMappingTable.js +++ b/scripts/generateMappingTable.js @@ -18,7 +18,8 @@ async function main() { const body = await response.text(); const ranges = []; - const lines = []; + const statuses = []; + const mappings = []; body.split("\n").forEach(l => { l = l.split("#")[0]; // Remove comments @@ -36,15 +37,16 @@ async function main() { cells[0] = [start, end - start]; ranges.push(cells.shift()); - cells[0] = STATUS_MAPPING[cells[0]]; - if (cells[0] !== STATUS_MAPPING.mapped && cells[0] !== STATUS_MAPPING.deviation) { - lines.push(cells[0]); + const status = STATUS_MAPPING[cells.shift()]; + statuses.push(status); + + if (status !== STATUS_MAPPING.mapped && status !== STATUS_MAPPING.deviation) { return; } - if (cells[1] !== undefined) { + if (cells[0] !== undefined) { // Parse replacement to int[] array - let replacement = cells[1].split(" "); + let replacement = cells[0].split(" "); if (replacement[0] === "") { // Empty array replacement = []; } @@ -53,12 +55,10 @@ async function main() { return parseInt(r, 16); }); - cells[1] = String.fromCodePoint(...replacement); + mappings.push(String.fromCodePoint(...replacement)); } else { throw new Error("Unexpected"); } - - lines.push(cells.flat()); }); // We could drop valid chars, but those are only ~1000 ranges and @@ -71,7 +71,7 @@ async function main() { last += range[0]; } - // Condense repeats of N consecutive [1, 0] in ranges to -N + // Condense repeats of N consecutive [1, 0] in ranges to -N, flatten the rest const rangesCondensed = []; let repeats = 0; for (const row of ranges) { @@ -85,13 +85,35 @@ async function main() { repeats = 0; } - rangesCondensed.push(row); + rangesCondensed.push(...row); } if (repeats > 0) { rangesCondensed.push(-repeats); + repeats = 0; + } + + // Condense repeats of N consecutive STATUS_MAPPING.mapped to -N + const statusesCondensed = []; + for (const status of statuses) { + if (status === STATUS_MAPPING.mapped) { + repeats++; + continue; + } + + if (repeats > 0) { + statusesCondensed.push(repeats === 1 ? STATUS_MAPPING.mapped : -repeats); + repeats = 0; + } + + statusesCondensed.push(status); + } + + if (repeats > 0) { + statusesCondensed.push(repeats === 1 ? STATUS_MAPPING.mapped : -repeats); + repeats = 0; } - const tablesRaw = [rangesCondensed.flat(), lines.flat()]; + const tablesRaw = [rangesCondensed, statusesCondensed, mappings]; fs.writeFileSync(path.resolve(__dirname, "../lib/mappingTable.json"), JSON.stringify(tablesRaw)); }