Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 42 additions & 39 deletions src/builders/html_sanitizer/css/tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,23 +76,23 @@ class Tokenizer {
*
* https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#next-input-code-point
*/
private get nextInputCodePoint(): string | EOF {
private nextInputCodePoint(): string | EOF {
return this.css[this.pos];
}

private get nextTwoInputCodePoints(): [string | EOF, string | EOF] {
private nextTwoInputCodePoints(): [string | EOF, string | EOF] {
return [this.css[this.pos], this.css[this.pos + 1]];
}

private get nextThreeInputCodePoints(): [
private nextThreeInputCodePoints(): [
string | EOF,
string | EOF,
string | EOF,
] {
return [this.css[this.pos], this.css[this.pos + 1], this.css[this.pos + 2]];
}

private get currentInputCodePoint(): string | EOF {
private currentInputCodePoint(): string | EOF {
return this.css[this.pos - 1];
}

Expand Down Expand Up @@ -136,7 +136,7 @@ class Tokenizer {
// ":ho st", which is safe.
return {tokenKind: CssTokenKind.WHITESPACE};
}
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
if (codePoint === EOF) {
return {tokenKind: CssTokenKind.EOF};
Expand All @@ -147,8 +147,8 @@ class Tokenizer {
return this.consumeString(codePoint);
} else if (codePoint === '#') {
if (
this.isIdentCodePoint(this.nextInputCodePoint) ||
this.twoCodePointsAreValidEscape(...this.nextTwoInputCodePoints)
this.isIdentCodePoint(this.nextInputCodePoint()) ||
this.twoCodePointsAreValidEscape(...this.nextTwoInputCodePoints())
) {
// In spec there's also a step to check if the next three code points
// would start an ident sequence. However, the only reason to do so
Expand Down Expand Up @@ -208,7 +208,7 @@ class Tokenizer {
} else if (codePoint === '@') {
if (
this.threeCodePointsWouldStartAnIdentSequence(
...this.nextThreeInputCodePoints,
...this.nextThreeInputCodePoints(),
)
) {
const ident = this.consumeIdentSequence();
Expand Down Expand Up @@ -271,7 +271,7 @@ class Tokenizer {
value: '',
};
while (true) {
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
if (codePoint === EOF || codePoint === quote) {
return stringToken;
Expand All @@ -283,10 +283,10 @@ class Tokenizer {
stringToken.value = '';
return stringToken;
} else if (codePoint === '\\') {
if (this.nextInputCodePoint === EOF) {
if (this.nextInputCodePoint() === EOF) {
// > If the next input code point is EOF, do nothing.
continue;
} else if (this.isNewline(this.nextInputCodePoint)) {
} else if (this.isNewline(this.nextInputCodePoint())) {
this.consumeTheNextInputCodePoint();
} else {
const escapedCodePoint = this.consumeEscapedCodePoint();
Expand All @@ -300,7 +300,7 @@ class Tokenizer {

/** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#consume-an-escaped-code-point */
private consumeEscapedCodePoint(): string {
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
if (codePoint === EOF) {
return '\ufffd';
Expand All @@ -311,12 +311,15 @@ class Tokenizer {
// The spec assumes here that the first hex digit has already been
// consumed. So in fact, the maximum number of hex digits that can be
// consumed is 6.
while (this.isHexDigit(this.nextInputCodePoint) && hexDigits.length < 6) {
hexDigits += this.nextInputCodePoint;
while (
this.isHexDigit(this.nextInputCodePoint()) &&
hexDigits.length < 6
) {
hexDigits += this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
}
// Whitespace directly following an escape sequence is ignored.
if (this.isWhitespace(this.nextInputCodePoint)) {
if (this.isWhitespace(this.nextInputCodePoint())) {
this.consumeTheNextInputCodePoint();
}
// Needed to parse hexadecimal.
Expand All @@ -329,7 +332,7 @@ class Tokenizer {
}

private consumeAsMuchWhitespaceAsPossible() {
while (this.isWhitespace(this.nextInputCodePoint)) {
while (this.isWhitespace(this.nextInputCodePoint())) {
this.consumeTheNextInputCodePoint();
}
}
Expand All @@ -338,9 +341,9 @@ class Tokenizer {
private consumeIdentSequence(): string {
let result = '';
while (true) {
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
const codePoint2 = this.nextInputCodePoint;
const codePoint2 = this.nextInputCodePoint();
if (this.isIdentCodePoint(codePoint)) {
result += codePoint;
} else if (this.twoCodePointsAreValidEscape(codePoint, codePoint2)) {
Expand All @@ -355,15 +358,15 @@ class Tokenizer {
/** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#consume-an-ident-like-token */
private consumeIdentLikeToken(): CssToken | CssToken[] {
const ident = this.consumeIdentSequence();
if (/^url$/i.test(ident) && this.nextInputCodePoint === '(') {
if (/^url$/i.test(ident) && this.nextInputCodePoint() === '(') {
// TODO(securitymb): This algorithm may look a little weird but we're
// following the spec here exactly. We will see later on if this can be
// optimized.
this.consumeTheNextInputCodePoint();
while (this.nextTwoInputsPointsAreWhitespace()) {
this.consumeTheNextInputCodePoint();
}
const nextTwo = this.nextTwoInputCodePoints;
const nextTwo = this.nextTwoInputCodePoints();
if (
(this.isWhitespace(nextTwo[0]) &&
(nextTwo[1] === '"' || nextTwo[1] === "'")) ||
Expand All @@ -376,7 +379,7 @@ class Tokenizer {
} else {
return this.consumeUrlToken();
}
} else if (this.nextInputCodePoint === '(') {
} else if (this.nextInputCodePoint() === '(') {
this.consumeTheNextInputCodePoint();
// We lowercase the function name because function names are
// case-insensitive in CSS.
Expand Down Expand Up @@ -413,15 +416,15 @@ class Tokenizer {
let url = '';
this.consumeAsMuchWhitespaceAsPossible();
while (true) {
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
if (codePoint === ')' || codePoint === EOF) {
return this.createFunctionUrlToken(url);
} else if (this.isWhitespace(codePoint)) {
this.consumeAsMuchWhitespaceAsPossible();
if (
this.nextInputCodePoint === ')' ||
this.nextInputCodePoint === EOF
this.nextInputCodePoint() === ')' ||
this.nextInputCodePoint() === EOF
) {
this.consumeTheNextInputCodePoint();
return this.createFunctionUrlToken(url);
Expand Down Expand Up @@ -462,7 +465,7 @@ class Tokenizer {
/** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#consume-the-remnants-of-a-bad-url */
private consumeRemnantsOfBadUrl() {
while (true) {
const codePoint = this.nextInputCodePoint;
const codePoint = this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
if (codePoint === EOF || codePoint === ')') {
return;
Expand All @@ -484,23 +487,23 @@ class Tokenizer {
private consumeNumber(): string {
let repr = '';
{
const next = this.nextInputCodePoint;
const next = this.nextInputCodePoint();
if (next === '+' || next === '-') {
this.consumeTheNextInputCodePoint();
repr += next;
}
}
repr += this.consumeDigits();
{
const next = this.nextInputCodePoint;
const next = this.nextInputCodePoint();
const next2 = this.css[this.pos + 1];
if (next === '.' && this.isDigit(next2)) {
this.consumeTheNextInputCodePoint();
repr += '.' + this.consumeDigits();
}
}
{
const next = this.nextInputCodePoint;
const next = this.nextInputCodePoint();
const next2 = this.css[this.pos + 1];
const next3 = this.css[this.pos + 2];
if (next === 'e' || next === 'E') {
Expand All @@ -518,8 +521,8 @@ class Tokenizer {

private consumeDigits(): string {
let repr = '';
while (this.isDigit(this.nextInputCodePoint)) {
repr += this.nextInputCodePoint;
while (this.isDigit(this.nextInputCodePoint())) {
repr += this.nextInputCodePoint();
this.consumeTheNextInputCodePoint();
}
return repr;
Expand All @@ -533,7 +536,7 @@ class Tokenizer {
const repr = this.consumeNumber();
if (
this.threeCodePointsWouldStartAnIdentSequence(
...this.nextThreeInputCodePoints,
...this.nextThreeInputCodePoints(),
)
) {
return {
Expand All @@ -542,15 +545,15 @@ class Tokenizer {
dimension: this.consumeIdentSequence(),
};
}
if (this.nextInputCodePoint === '%') {
if (this.nextInputCodePoint() === '%') {
this.consumeTheNextInputCodePoint();
return {tokenKind: CssTokenKind.PERCENTAGE, repr};
}
return {tokenKind: CssTokenKind.NUMBER, repr};
}

private nextTwoInputsPointsAreWhitespace() {
return this.nextTwoInputCodePoints.every((c) => this.isWhitespace(c));
return this.nextTwoInputCodePoints().every((c) => this.isWhitespace(c));
}

/** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#check-if-two-code-points-are-a-valid-escape */
Expand All @@ -563,8 +566,8 @@ class Tokenizer {

private streamStartsWithValidEscape() {
return this.twoCodePointsAreValidEscape(
this.currentInputCodePoint,
this.nextInputCodePoint,
this.currentInputCodePoint(),
this.nextInputCodePoint(),
);
}

Expand All @@ -588,8 +591,8 @@ class Tokenizer {

private streamStartsWithANumber() {
return this.threeCodePointsWouldStartANumber(
this.currentInputCodePoint,
...this.nextTwoInputCodePoints,
this.currentInputCodePoint(),
...this.nextTwoInputCodePoints(),
);
}

Expand Down Expand Up @@ -618,8 +621,8 @@ class Tokenizer {

private streamStartsWithAnIdentSequence() {
return this.threeCodePointsWouldStartAnIdentSequence(
this.currentInputCodePoint,
...this.nextTwoInputCodePoints,
this.currentInputCodePoint(),
...this.nextTwoInputCodePoints(),
);
}

Expand Down