diff --git a/index.js b/index.js index 9e53f05..389977d 100644 --- a/index.js +++ b/index.js @@ -2,26 +2,71 @@ const punycode = require("punycode/"); const regexes = require("./lib/regexes.js"); -const mappingTable = require("./lib/mappingTable.json"); +const tablesRaw = require("./lib/mappingTable.json"); const { STATUS_MAPPING } = require("./lib/statusMapping.js"); function containsNonASCII(str) { return /[^\x00-\x7F]/u.test(str); } +let rangesTable, + mappingTable; + +function unpackMappingTable() { + if (mappingTable) { + return; + } + + // Destroying the originals, for mem + + rangesTable = []; + let current = 0; + const [rangesRaw, statusesRaw, mappingRaw] = tablesRaw; + while (rangesRaw.length > 0) { + if (rangesRaw[0] < 0) { + const repeats = -rangesRaw.shift(); // Treat as this many repeats of [1, 0] + for (let i = 0; i < repeats; i++) { + rangesTable.push([++current, 0]); + } + } else { + const row = rangesRaw.splice(0, 2); + row[0] = current += row[0]; + rangesTable.push(row); + } + } + + mappingTable = []; + for (const status of statusesRaw) { + if (status < 0) { + // Threat this as many repeats of STATUS_MAPPING.mapped + for (let i = 0; i < -status; i++) { + mappingTable.push([STATUS_MAPPING.mapped, mappingRaw.shift()]); + } + } else if (status === STATUS_MAPPING.mapped || status === STATUS_MAPPING.deviation) { + mappingTable.push([status, mappingRaw.shift()]); + } else { + mappingTable.push([status]); + } + } + + statusesRaw.length = 0; // Destroy for mem +} + function findStatus(val) { + unpackMappingTable(); + let start = 0; - let end = mappingTable.length - 1; + let end = rangesTable.length - 1; while (start <= end) { const mid = Math.floor((start + end) / 2); - const target = mappingTable[mid]; - const min = Array.isArray(target[0]) ? target[0][0] : target[0]; - const max = Array.isArray(target[0]) ? target[0][1] : target[0]; + const target = rangesTable[mid]; + const min = target[0]; + const max = min + target[1]; if (min <= val && max >= val) { - return target.slice(1); + return mappingTable[mid]; } else if (min > val) { end = mid - 1; } else { @@ -36,9 +81,9 @@ function mapChars(domainName, { transitionalProcessing }) { let processed = ""; for (const ch of domainName) { - const [status, mapping] = findStatus(ch.codePointAt(0)); + const row = findStatus(ch.codePointAt(0)); // [status, mapping] - switch (status) { + switch (row[0]) { case STATUS_MAPPING.disallowed: processed += ch; break; @@ -48,12 +93,12 @@ function mapChars(domainName, { transitionalProcessing }) { if (transitionalProcessing && ch === "ẞ") { processed += "ss"; } else { - processed += mapping; + processed += row[1]; } break; case STATUS_MAPPING.deviation: if (transitionalProcessing) { - processed += mapping; + processed += row[1]; } else { processed += ch; } diff --git a/scripts/generateMappingTable.js b/scripts/generateMappingTable.js index b986072..b32abbc 100644 --- a/scripts/generateMappingTable.js +++ b/scripts/generateMappingTable.js @@ -17,7 +17,9 @@ async function main() { } const body = await response.text(); - const lines = []; + const ranges = []; + const statuses = []; + const mappings = []; body.split("\n").forEach(l => { l = l.split("#")[0]; // Remove comments @@ -32,18 +34,19 @@ async function main() { const range = cells[0].split(".."); const start = parseInt(range[0], 16); const end = parseInt(range[1] || range[0], 16); - cells[0] = end === start ? start : [start, end]; + cells[0] = [start, end - start]; + ranges.push(cells.shift()); - cells[1] = STATUS_MAPPING[cells[1]]; + const status = STATUS_MAPPING[cells.shift()]; + statuses.push(status); - if (cells[1] === STATUS_MAPPING.valid) { - lines.push(cells.slice(0, 2)); + if (status !== STATUS_MAPPING.mapped && status !== STATUS_MAPPING.deviation) { return; } - if (cells[2] !== undefined) { + if (cells[0] !== undefined) { // Parse replacement to int[] array - let replacement = cells[2].split(" "); + let replacement = cells[0].split(" "); if (replacement[0] === "") { // Empty array replacement = []; } @@ -52,14 +55,65 @@ async function main() { return parseInt(r, 16); }); - cells[2] = String.fromCodePoint(...replacement); + mappings.push(String.fromCodePoint(...replacement)); + } else { + throw new Error("Unexpected"); } - - lines.push(cells); }); // We could drop valid chars, but those are only ~1000 ranges and // binary search is way to quick to even notice that - fs.writeFileSync(path.resolve(__dirname, "../lib/mappingTable.json"), JSON.stringify(lines)); + // Delta-code starts + let last = 0; + for (const range of ranges) { + range[0] -= last; + last += range[0]; + } + + // Condense repeats of N consecutive [1, 0] in ranges to -N, flatten the rest + const rangesCondensed = []; + let repeats = 0; + for (const row of ranges) { + if (row[0] === 1 && row[1] === 0) { + repeats++; + continue; + } + + if (repeats > 0) { + rangesCondensed.push(-repeats); + repeats = 0; + } + + rangesCondensed.push(...row); + } + + if (repeats > 0) { + rangesCondensed.push(-repeats); + repeats = 0; + } + + // Condense repeats of N consecutive STATUS_MAPPING.mapped to -N + const statusesCondensed = []; + for (const status of statuses) { + if (status === STATUS_MAPPING.mapped) { + repeats++; + continue; + } + + if (repeats > 0) { + statusesCondensed.push(repeats === 1 ? STATUS_MAPPING.mapped : -repeats); + repeats = 0; + } + + statusesCondensed.push(status); + } + + if (repeats > 0) { + statusesCondensed.push(repeats === 1 ? STATUS_MAPPING.mapped : -repeats); + repeats = 0; + } + + const tablesRaw = [rangesCondensed, statusesCondensed, mappings]; + fs.writeFileSync(path.resolve(__dirname, "../lib/mappingTable.json"), JSON.stringify(tablesRaw)); }