From ff3541493aaef93b37a22fc7ecb1c1355fe0dadb Mon Sep 17 00:00:00 2001 From: Sylvain Jermini Date: Wed, 20 May 2026 22:45:15 +0200 Subject: [PATCH 1/2] tmp --- .../jfiveparse/ProcessedInputStream.java | 126 ++++++++++ .../digitalfondue/jfiveparse/Tokenizer.java | 4 +- .../jfiveparse/TokenizerState.java | 232 +++++++++--------- 3 files changed, 249 insertions(+), 113 deletions(-) diff --git a/src/main/java/ch/digitalfondue/jfiveparse/ProcessedInputStream.java b/src/main/java/ch/digitalfondue/jfiveparse/ProcessedInputStream.java index 480b094..714daca 100644 --- a/src/main/java/ch/digitalfondue/jfiveparse/ProcessedInputStream.java +++ b/src/main/java/ch/digitalfondue/jfiveparse/ProcessedInputStream.java @@ -65,6 +65,43 @@ int readUntilAttributeValueUnquoted(ResizableCharBuilder builder) { return readUntilAttributeValueUnquotedInternal(builder); } + int readUntilTagName(ResizableCharBuilder builder) { + int chr; + while (!buffer.isEmpty) { + chr = buffer.removeFirst(); + if (Common.isTabLfFfCrOrSpace(chr) || chr == Characters.SOLIDUS || chr == Characters.GREATERTHAN_SIGN || chr == Characters.NULL || chr == Characters.EOF) { + return chr; + } + builder.append((char) chr); + } + return readUntilTagNameInternal(builder); + } + + int readUntilAttributeName(ResizableCharBuilder builder) { + int chr; + while (!buffer.isEmpty) { + chr = buffer.removeFirst(); + if (Common.isTabLfFfCrOrSpace(chr) || chr == Characters.SOLIDUS || chr == '=' || chr == '>' || chr == Characters.NULL || + chr == '"' || chr == '\'' || chr == Characters.LESSTHAN_SIGN || chr == Characters.EOF) { + return chr; + } + builder.append((char) chr); + } + return readUntilAttributeNameInternal(builder); + } + + int readUntilComment(ResizableCharBuilder builder) { + int chr; + while (!buffer.isEmpty) { + chr = buffer.removeFirst(); + if (chr == '-' || chr == Characters.NULL || chr == Characters.EOF) { + return chr; + } + builder.append((char) chr); + } + return readUntilCommentInternal(builder); + } + protected int readUntilInternal(ResizableCharBuilder builder, boolean stopAtAmpersand, boolean stopAtLessThan) { int chr; while ((chr = read()) != -1) { @@ -99,6 +136,40 @@ protected int readUntilAttributeValueUnquotedInternal(ResizableCharBuilder build return -1; } + protected int readUntilTagNameInternal(ResizableCharBuilder builder) { + int chr; + while ((chr = read()) != -1) { + if (Common.isTabLfFfCrOrSpace(chr) || chr == Characters.SOLIDUS || chr == Characters.GREATERTHAN_SIGN || chr == Characters.NULL) { + return chr; + } + builder.append((char) chr); + } + return -1; + } + + protected int readUntilAttributeNameInternal(ResizableCharBuilder builder) { + int chr; + while ((chr = read()) != -1) { + if (Common.isTabLfFfCrOrSpace(chr) || chr == Characters.SOLIDUS || chr == '=' || chr == '>' || chr == Characters.NULL || + chr == '"' || chr == '\'' || chr == Characters.LESSTHAN_SIGN) { + return chr; + } + builder.append((char) chr); + } + return -1; + } + + protected int readUntilCommentInternal(ResizableCharBuilder builder) { + int chr; + while ((chr = read()) != -1) { + if (chr == '-' || chr == Characters.NULL) { + return chr; + } + builder.append((char) chr); + } + return -1; + } + // int peekNextInputCharacter(int offset) { if (buffer.length() < offset) { @@ -230,6 +301,61 @@ protected int readUntilAttributeValueUnquotedInternal(ResizableCharBuilder build pos = n; return -1; } + + @Override + protected int readUntilTagNameInternal(ResizableCharBuilder builder) { + int n = length; + int i = pos; + while (i < n) { + char c = input[i]; + if (Common.isTabLfFfCrOrSpace(c) || c == Characters.SOLIDUS || c == Characters.GREATERTHAN_SIGN || c == Characters.NULL) { + builder.append(input, pos, i - pos); + pos = i + 1; + return c; + } + i++; + } + builder.append(input, pos, n - pos); + pos = n; + return -1; + } + + @Override + protected int readUntilAttributeNameInternal(ResizableCharBuilder builder) { + int n = length; + int i = pos; + while (i < n) { + char c = input[i]; + if (Common.isTabLfFfCrOrSpace(c) || c == Characters.SOLIDUS || c == '=' || c == '>' || c == Characters.NULL || + c == '"' || c == '\'' || c == Characters.LESSTHAN_SIGN) { + builder.append(input, pos, i - pos); + pos = i + 1; + return c; + } + i++; + } + builder.append(input, pos, n - pos); + pos = n; + return -1; + } + + @Override + protected int readUntilCommentInternal(ResizableCharBuilder builder) { + int n = length; + int i = pos; + while (i < n) { + char c = input[i]; + if (c == '-' || c == Characters.NULL) { + builder.append(input, pos, i - pos); + pos = i + 1; + return c; + } + i++; + } + builder.append(input, pos, n - pos); + pos = n; + return -1; + } } static final class ReaderProcessedInputStream extends ProcessedInputStream { diff --git a/src/main/java/ch/digitalfondue/jfiveparse/Tokenizer.java b/src/main/java/ch/digitalfondue/jfiveparse/Tokenizer.java index 8790d55..0141829 100644 --- a/src/main/java/ch/digitalfondue/jfiveparse/Tokenizer.java +++ b/src/main/java/ch/digitalfondue/jfiveparse/Tokenizer.java @@ -36,7 +36,7 @@ final class Tokenizer { // tag related private Attributes attributes; - private final ResizableCharBuilder currentAttributeName = new ResizableCharBuilder(); + final ResizableCharBuilder currentAttributeName = new ResizableCharBuilder(); ResizableCharBuilder currentAttributeValue; private int currentAttributeQuoteType; private boolean selfClosing; @@ -51,7 +51,7 @@ final class Tokenizer { private StringBuilder doctypeSystemIdentifier; // comment related - private ResizableCharBuilder commentToken; + ResizableCharBuilder commentToken; // private final ResizableCharBuilder temporaryBuffer = new ResizableCharBuilder(); diff --git a/src/main/java/ch/digitalfondue/jfiveparse/TokenizerState.java b/src/main/java/ch/digitalfondue/jfiveparse/TokenizerState.java index 6efe6f8..cf20553 100644 --- a/src/main/java/ch/digitalfondue/jfiveparse/TokenizerState.java +++ b/src/main/java/ch/digitalfondue/jfiveparse/TokenizerState.java @@ -159,36 +159,32 @@ static void handleEndTagOpenState(Tokenizer tokenizer, ProcessedInputStream proc } static void handleTagNameState(Tokenizer tokenizer, ProcessedInputStream processedInputStream) { - // bypass and optimization, as we are accumulating the tag name, we can do it here - // in a single loop, avoiding method calls - do { - int chr = processedInputStream.getNextInputCharacterAndConsume(); - switch (chr) { - case Characters.TAB: - case Characters.LF: - case Characters.FF: - case Characters.SPACE: - tokenizer.setState(BEFORE_ATTRIBUTE_NAME_STATE); - return; - case Characters.SOLIDUS: - tokenizer.setState(SELF_CLOSING_START_TAG_STATE); - return; - case Characters.GREATERTHAN_SIGN: - tokenizer.setState(DATA_STATE); - tokenizer.addCurrentAttributeAndEmitToken(); - return; - case Characters.NULL: - tokenizer.emitParseError(); - tokenizer.appendCurrentTagToken(Characters.REPLACEMENT_CHARACTER); - return; - case Characters.EOF: - tokenizer.emitParseErrorAndSetState(DATA_STATE); - processedInputStream.reconsume(chr); - return; - default: - tokenizer.tagName.append((char) chr); - } - } while (true); + int chr = processedInputStream.readUntilTagName(tokenizer.tagName); + switch (chr) { + case Characters.TAB: + case Characters.LF: + case Characters.FF: + case Characters.SPACE: + tokenizer.setState(BEFORE_ATTRIBUTE_NAME_STATE); + return; + case Characters.SOLIDUS: + tokenizer.setState(SELF_CLOSING_START_TAG_STATE); + return; + case Characters.GREATERTHAN_SIGN: + tokenizer.setState(DATA_STATE); + tokenizer.addCurrentAttributeAndEmitToken(); + return; + case Characters.NULL: + tokenizer.emitParseError(); + tokenizer.appendCurrentTagToken(Characters.REPLACEMENT_CHARACTER); + return; + case Characters.EOF: + tokenizer.emitParseErrorAndSetState(DATA_STATE); + processedInputStream.reconsume(chr); + return; + default: + break; + } } static void handleSelfClosingStartTagState(Tokenizer tokenizer, ProcessedInputStream processedInputStream) { @@ -229,7 +225,30 @@ static void handleRCDataState(Tokenizer tokenizer, ProcessedInputStream processe tokenizer.emitEOF(); // does nothing break; default: + int previousInsertionMode = tokenizer.getTokenHandlerInsertionMode(); tokenizer.emitCharacter(chr); + int currentInsertionMode = tokenizer.getTokenHandlerInsertionMode(); + ResizableCharBuilder textNode = tokenizer.getTokenHandlerInsertCharacterPreviousTextNode(); + if (tokenizer.getState() == RCDATA_STATE && previousInsertionMode == currentInsertionMode && textNode != null) { + int internalChr = processedInputStream.readUntil(textNode, true, true); + switch (internalChr) { + case Characters.AMPERSAND: + tokenizer.setState(CHARACTER_REFERENCE_IN_RCDATA_STATE); + return; + case Characters.LESSTHAN_SIGN: + tokenizer.setState(RCDATA_LESS_THAN_SIGN_STATE); + return; + case Characters.NULL: + tokenizer.emitParseError(); + tokenizer.emitCharacter(Characters.REPLACEMENT_CHARACTER); + return; + case Characters.EOF: + tokenizer.emitEOF(); + return; + default: + break; + } + } break; } } @@ -1552,25 +1571,23 @@ static void handleCommentStartDashState(Tokenizer tokenizer, ProcessedInputStrea } static void handleCommentState(Tokenizer tokenizer, ProcessedInputStream processedInputStream) { - do { - int chr = processedInputStream.getNextInputCharacterAndConsume(); - switch (chr) { - case Characters.HYPHEN_MINUS: - tokenizer.setState(COMMENT_END_DASH_STATE); - return; - case Characters.NULL: - tokenizer.emitParseError(); - tokenizer.appendCommentCharacter(Characters.REPLACEMENT_CHARACTER); - return; - case Characters.EOF: - tokenizer.emitParseErrorAndSetState(DATA_STATE); - tokenizer.emitComment(); - processedInputStream.reconsume(chr); - return; - default: - tokenizer.appendCommentCharacter(chr); - } - } while (true); + int chr = processedInputStream.readUntilComment(tokenizer.commentToken); + switch (chr) { + case Characters.HYPHEN_MINUS: + tokenizer.setState(COMMENT_END_DASH_STATE); + return; + case Characters.NULL: + tokenizer.emitParseError(); + tokenizer.appendCommentCharacter(Characters.REPLACEMENT_CHARACTER); + return; + case Characters.EOF: + tokenizer.emitParseErrorAndSetState(DATA_STATE); + tokenizer.emitComment(); + processedInputStream.reconsume(chr); + return; + default: + break; + } } static void handleCommentEndDashState(Tokenizer tokenizer, ProcessedInputStream processedInputStream) { @@ -2006,29 +2023,26 @@ static void handleDataState(Tokenizer tokenizer, ProcessedInputStream processedI && (currentInsertionMode == TreeConstructor.IM_IN_BODY || currentInsertionMode == TreeConstructor.IM_IN_CELL) && tokenizer.isTokenHandlerInHtmlContent() && textNode != null) { - for (;;) { - int internalChr = processedInputStream.getNextInputCharacterAndConsume(); - switch (internalChr) { - case Characters.EOF: - tokenizer.resetTokenHandlerInsertCharacterPreviousTextNode(); - tokenizer.emitEOF(); - return; - case Characters.NULL: - tokenizer.emitParseError(); - tokenizer.emitCharacter(internalChr); - return; - case Characters.AMPERSAND: - tokenizer.resetTokenHandlerInsertCharacterPreviousTextNode(); - tokenizer.setState(CHARACTER_REFERENCE_IN_DATA_STATE); - return; - case Characters.LESSTHAN_SIGN: - tokenizer.resetTokenHandlerInsertCharacterPreviousTextNode(); - tokenizer.setState(TAG_OPEN_STATE); - return; - default: - textNode.append((char) internalChr); - break; - } + int internalChr = processedInputStream.readUntil(textNode, true, true); + switch (internalChr) { + case Characters.EOF: + tokenizer.resetTokenHandlerInsertCharacterPreviousTextNode(); + tokenizer.emitEOF(); + return; + case Characters.NULL: + tokenizer.emitParseError(); + tokenizer.emitCharacter(internalChr); + return; + case Characters.AMPERSAND: + tokenizer.resetTokenHandlerInsertCharacterPreviousTextNode(); + tokenizer.setState(CHARACTER_REFERENCE_IN_DATA_STATE); + return; + case Characters.LESSTHAN_SIGN: + tokenizer.resetTokenHandlerInsertCharacterPreviousTextNode(); + tokenizer.setState(TAG_OPEN_STATE); + return; + default: + break; } } @@ -2114,45 +2128,41 @@ static void handleBeforeAttributeNameState(Tokenizer tokenizer, ProcessedInputSt } static void handleAttributeNameState(Tokenizer tokenizer, ProcessedInputStream processedInputStream) { - // vvv optimization vvv, we try to bypass as much as possible for the case "appendCurrentAttributeName" - do { - int chr = processedInputStream.getNextInputCharacterAndConsume(); - switch (chr) { - case Characters.TAB: - case Characters.LF: - case Characters.FF: - case Characters.SPACE: - tokenizer.setState(AFTER_ATTRIBUTE_NAME_STATE); - return; - case Characters.SOLIDUS: - tokenizer.setState(SELF_CLOSING_START_TAG_STATE); - return; - case Characters.EQUALS_SIGN: - tokenizer.setState(BEFORE_ATTRIBUTE_VALUE_STATE); - return; - case Characters.GREATERTHAN_SIGN: - tokenizer.setState(DATA_STATE); - tokenizer.addCurrentAttributeAndEmitToken(); - return; - case Characters.NULL: - tokenizer.emitParseError(); - tokenizer.appendCurrentAttributeName(Characters.REPLACEMENT_CHARACTER); - return; - case Characters.QUOTATION_MARK: - case Characters.APOSTROPHE: - case Characters.LESSTHAN_SIGN: - tokenizer.emitParseError(); - tokenizer.appendCurrentAttributeName(chr); - return; - case Characters.EOF: - tokenizer.emitParseErrorAndSetState(DATA_STATE); - processedInputStream.reconsume(chr); - return; - default: - tokenizer.appendCurrentAttributeName(chr); - break; - } - } while (true); + int chr = processedInputStream.readUntilAttributeName(tokenizer.currentAttributeName); + switch (chr) { + case Characters.TAB: + case Characters.LF: + case Characters.FF: + case Characters.SPACE: + tokenizer.setState(AFTER_ATTRIBUTE_NAME_STATE); + return; + case Characters.SOLIDUS: + tokenizer.setState(SELF_CLOSING_START_TAG_STATE); + return; + case Characters.EQUALS_SIGN: + tokenizer.setState(BEFORE_ATTRIBUTE_VALUE_STATE); + return; + case Characters.GREATERTHAN_SIGN: + tokenizer.setState(DATA_STATE); + tokenizer.addCurrentAttributeAndEmitToken(); + return; + case Characters.NULL: + tokenizer.emitParseError(); + tokenizer.appendCurrentAttributeName(Characters.REPLACEMENT_CHARACTER); + return; + case Characters.QUOTATION_MARK: + case Characters.APOSTROPHE: + case Characters.LESSTHAN_SIGN: + tokenizer.emitParseError(); + tokenizer.appendCurrentAttributeName(chr); + return; + case Characters.EOF: + tokenizer.emitParseErrorAndSetState(DATA_STATE); + processedInputStream.reconsume(chr); + return; + default: + break; + } } static void handleAfterAttributeNameState(Tokenizer tokenizer, ProcessedInputStream processedInputStream) { From 24c6c346b328e55f3672d38af0a0849e24719c46 Mon Sep 17 00:00:00 2001 From: Sylvain Jermini Date: Sun, 24 May 2026 15:04:52 +0200 Subject: [PATCH 2/2] refactor --- .../digitalfondue/jfiveparse/Characters.java | 2 + .../jfiveparse/ProcessedInputStream.java | 76 ++++++++++--------- 2 files changed, 43 insertions(+), 35 deletions(-) diff --git a/src/main/java/ch/digitalfondue/jfiveparse/Characters.java b/src/main/java/ch/digitalfondue/jfiveparse/Characters.java index f9eaf16..d7380bd 100644 --- a/src/main/java/ch/digitalfondue/jfiveparse/Characters.java +++ b/src/main/java/ch/digitalfondue/jfiveparse/Characters.java @@ -24,7 +24,9 @@ final class Characters { static final char EXCLAMATION_MARK = 0x0021; /** & */ static final char AMPERSAND = 0x0026; + /** < */ static final char LESSTHAN_SIGN = 0x003C; + /** > */ static final char GREATERTHAN_SIGN = 0x003E; static final char SOLIDUS = 0x002F; static final char QUESTION_MARK = 0x003F; diff --git a/src/main/java/ch/digitalfondue/jfiveparse/ProcessedInputStream.java b/src/main/java/ch/digitalfondue/jfiveparse/ProcessedInputStream.java index 714daca..50c86fa 100644 --- a/src/main/java/ch/digitalfondue/jfiveparse/ProcessedInputStream.java +++ b/src/main/java/ch/digitalfondue/jfiveparse/ProcessedInputStream.java @@ -52,12 +52,18 @@ int readUntilAttributeValue(ResizableCharBuilder builder, int quoteChar, boolean return readUntilAttributeValueInternal(builder, quoteChar, stopAtAmpersand); } + private static boolean mustStopReadUntilAttributeValueUnquoted(int chr) { + return Common.isTabLfFfCrOrSpace(chr) || chr == Characters.AMPERSAND || chr == Characters.GREATERTHAN_SIGN + || chr == Characters.NULL || chr == Characters.QUOTATION_MARK || + chr == Characters.APOSTROPHE || chr == Characters.LESSTHAN_SIGN || + chr == Characters.EQUALS_SIGN || chr == Characters.GRAVE_ACCENT || chr == Characters.EOF; + } + int readUntilAttributeValueUnquoted(ResizableCharBuilder builder) { int chr; while (!buffer.isEmpty) { chr = buffer.removeFirst(); - if (Common.isTabLfFfCrOrSpace(chr) || chr == Characters.AMPERSAND || chr == '>' || chr == Characters.NULL || - chr == '"' || chr == '\'' || chr == Characters.LESSTHAN_SIGN || chr == '=' || chr == '`' || chr == Characters.EOF) { + if (mustStopReadUntilAttributeValueUnquoted(chr)) { return chr; } builder.append((char) chr); @@ -77,12 +83,16 @@ int readUntilTagName(ResizableCharBuilder builder) { return readUntilTagNameInternal(builder); } + private static boolean mustStopReadUntilAttributeName(int chr) { + return Common.isTabLfFfCrOrSpace(chr) || chr == Characters.SOLIDUS || chr == Characters.EQUALS_SIGN || chr == Characters.GREATERTHAN_SIGN || chr == Characters.NULL || + chr == Characters.QUOTATION_MARK || chr == Characters.APOSTROPHE || chr == Characters.LESSTHAN_SIGN || chr == Characters.EOF; + } + int readUntilAttributeName(ResizableCharBuilder builder) { int chr; while (!buffer.isEmpty) { chr = buffer.removeFirst(); - if (Common.isTabLfFfCrOrSpace(chr) || chr == Characters.SOLIDUS || chr == '=' || chr == '>' || chr == Characters.NULL || - chr == '"' || chr == '\'' || chr == Characters.LESSTHAN_SIGN || chr == Characters.EOF) { + if (mustStopReadUntilAttributeName(chr)) { return chr; } builder.append((char) chr); @@ -94,7 +104,7 @@ int readUntilComment(ResizableCharBuilder builder) { int chr; while (!buffer.isEmpty) { chr = buffer.removeFirst(); - if (chr == '-' || chr == Characters.NULL || chr == Characters.EOF) { + if (chr == Characters.HYPHEN_MINUS || chr == Characters.NULL || chr == Characters.EOF) { return chr; } builder.append((char) chr); @@ -104,70 +114,68 @@ int readUntilComment(ResizableCharBuilder builder) { protected int readUntilInternal(ResizableCharBuilder builder, boolean stopAtAmpersand, boolean stopAtLessThan) { int chr; - while ((chr = read()) != -1) { + while ((chr = read()) != Characters.EOF) { if ((stopAtAmpersand && chr == Characters.AMPERSAND) || (stopAtLessThan && chr == Characters.LESSTHAN_SIGN) || chr == Characters.NULL) { return chr; } builder.append((char) chr); } - return -1; + return Characters.EOF; } protected int readUntilAttributeValueInternal(ResizableCharBuilder builder, int quoteChar, boolean stopAtAmpersand) { int chr; - while ((chr = read()) != -1) { + while ((chr = read()) != Characters.EOF) { if (chr == quoteChar || (stopAtAmpersand && chr == Characters.AMPERSAND) || chr == Characters.NULL) { return chr; } builder.append((char) chr); } - return -1; + return Characters.EOF; } protected int readUntilAttributeValueUnquotedInternal(ResizableCharBuilder builder) { int chr; - while ((chr = read()) != -1) { - if (Common.isTabLfFfCrOrSpace(chr) || chr == Characters.AMPERSAND || chr == '>' || chr == Characters.NULL || - chr == '"' || chr == '\'' || chr == Characters.LESSTHAN_SIGN || chr == '=' || chr == '`') { + while ((chr = read()) != Characters.EOF) { + if (mustStopReadUntilAttributeValueUnquoted(chr)) { return chr; } builder.append((char) chr); } - return -1; + return Characters.EOF; } protected int readUntilTagNameInternal(ResizableCharBuilder builder) { int chr; - while ((chr = read()) != -1) { + while ((chr = read()) != Characters.EOF) { if (Common.isTabLfFfCrOrSpace(chr) || chr == Characters.SOLIDUS || chr == Characters.GREATERTHAN_SIGN || chr == Characters.NULL) { return chr; } builder.append((char) chr); } - return -1; + return Characters.EOF; } protected int readUntilAttributeNameInternal(ResizableCharBuilder builder) { int chr; - while ((chr = read()) != -1) { - if (Common.isTabLfFfCrOrSpace(chr) || chr == Characters.SOLIDUS || chr == '=' || chr == '>' || chr == Characters.NULL || - chr == '"' || chr == '\'' || chr == Characters.LESSTHAN_SIGN) { + while ((chr = read()) != Characters.EOF) { + if (mustStopReadUntilAttributeName(chr)) { return chr; } builder.append((char) chr); } - return -1; + return Characters.EOF; } protected int readUntilCommentInternal(ResizableCharBuilder builder) { int chr; - while ((chr = read()) != -1) { - if (chr == '-' || chr == Characters.NULL) { + while ((chr = read()) != Characters.EOF) { + if (chr == Characters.HYPHEN_MINUS || chr == Characters.NULL) { return chr; } builder.append((char) chr); } - return -1; + return Characters.EOF; } // @@ -234,7 +242,7 @@ static class StringProcessedInputStream extends ProcessedInputStream { // used for test protected int getCharAt(int pos) { if (pos >= length) { - return -1; + return Characters.EOF; } return input[pos]; } @@ -244,7 +252,7 @@ protected int read() { if (pos < length) { return input[pos++]; } - return -1; + return Characters.EOF; } @Override @@ -262,7 +270,7 @@ protected int readUntilInternal(ResizableCharBuilder builder, boolean stopAtAmpe } builder.append(input, pos, n - pos); pos = n; - return -1; + return Characters.EOF; } @Override @@ -280,7 +288,7 @@ protected int readUntilAttributeValueInternal(ResizableCharBuilder builder, int } builder.append(input, pos, n - pos); pos = n; - return -1; + return Characters.EOF; } @Override @@ -289,8 +297,7 @@ protected int readUntilAttributeValueUnquotedInternal(ResizableCharBuilder build int i = pos; while (i < n) { char c = input[i]; - if (Common.isTabLfFfCrOrSpace(c) || c == Characters.AMPERSAND || c == '>' || c == Characters.NULL || - c == '"' || c == '\'' || c == Characters.LESSTHAN_SIGN || c == '=' || c == '`') { + if (mustStopReadUntilAttributeValueUnquoted(c)) { builder.append(input, pos, i - pos); pos = i + 1; return c; @@ -299,7 +306,7 @@ protected int readUntilAttributeValueUnquotedInternal(ResizableCharBuilder build } builder.append(input, pos, n - pos); pos = n; - return -1; + return Characters.EOF; } @Override @@ -317,7 +324,7 @@ protected int readUntilTagNameInternal(ResizableCharBuilder builder) { } builder.append(input, pos, n - pos); pos = n; - return -1; + return Characters.EOF; } @Override @@ -326,9 +333,8 @@ protected int readUntilAttributeNameInternal(ResizableCharBuilder builder) { int i = pos; while (i < n) { char c = input[i]; - if (Common.isTabLfFfCrOrSpace(c) || c == Characters.SOLIDUS || c == '=' || c == '>' || c == Characters.NULL || - c == '"' || c == '\'' || c == Characters.LESSTHAN_SIGN) { - builder.append(input, pos, i - pos); + if (mustStopReadUntilAttributeName(c)) { + builder.append(input, pos, i - pos); // append remaining pos = i + 1; return c; } @@ -336,7 +342,7 @@ protected int readUntilAttributeNameInternal(ResizableCharBuilder builder) { } builder.append(input, pos, n - pos); pos = n; - return -1; + return Characters.EOF; } @Override @@ -354,7 +360,7 @@ protected int readUntilCommentInternal(ResizableCharBuilder builder) { } builder.append(input, pos, n - pos); pos = n; - return -1; + return Characters.EOF; } }