Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/main/java/ch/digitalfondue/jfiveparse/Characters.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ final class Characters {
static final char EXCLAMATION_MARK = 0x0021;
/** & */
static final char AMPERSAND = 0x0026;
/** < */
static final char LESSTHAN_SIGN = 0x003C;
/** > */
static final char GREATERTHAN_SIGN = 0x003E;
static final char SOLIDUS = 0x002F;
static final char QUESTION_MARK = 0x003F;
Expand Down
166 changes: 149 additions & 17 deletions src/main/java/ch/digitalfondue/jfiveparse/ProcessedInputStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,51 +52,130 @@ int readUntilAttributeValue(ResizableCharBuilder builder, int quoteChar, boolean
return readUntilAttributeValueInternal(builder, quoteChar, stopAtAmpersand);
}

private static boolean mustStopReadUntilAttributeValueUnquoted(int chr) {
return Common.isTabLfFfCrOrSpace(chr) || chr == Characters.AMPERSAND || chr == Characters.GREATERTHAN_SIGN
|| chr == Characters.NULL || chr == Characters.QUOTATION_MARK ||
chr == Characters.APOSTROPHE || chr == Characters.LESSTHAN_SIGN ||
chr == Characters.EQUALS_SIGN || chr == Characters.GRAVE_ACCENT || chr == Characters.EOF;
}

int readUntilAttributeValueUnquoted(ResizableCharBuilder builder) {
int chr;
while (!buffer.isEmpty) {
chr = buffer.removeFirst();
if (Common.isTabLfFfCrOrSpace(chr) || chr == Characters.AMPERSAND || chr == '>' || chr == Characters.NULL ||
chr == '"' || chr == '\'' || chr == Characters.LESSTHAN_SIGN || chr == '=' || chr == '`' || chr == Characters.EOF) {
if (mustStopReadUntilAttributeValueUnquoted(chr)) {
return chr;
}
builder.append((char) chr);
}
return readUntilAttributeValueUnquotedInternal(builder);
}

int readUntilTagName(ResizableCharBuilder builder) {
int chr;
while (!buffer.isEmpty) {
chr = buffer.removeFirst();
if (Common.isTabLfFfCrOrSpace(chr) || chr == Characters.SOLIDUS || chr == Characters.GREATERTHAN_SIGN || chr == Characters.NULL || chr == Characters.EOF) {
return chr;
}
builder.append((char) chr);
}
return readUntilTagNameInternal(builder);
}

private static boolean mustStopReadUntilAttributeName(int chr) {
return Common.isTabLfFfCrOrSpace(chr) || chr == Characters.SOLIDUS || chr == Characters.EQUALS_SIGN || chr == Characters.GREATERTHAN_SIGN || chr == Characters.NULL ||
chr == Characters.QUOTATION_MARK || chr == Characters.APOSTROPHE || chr == Characters.LESSTHAN_SIGN || chr == Characters.EOF;
}

int readUntilAttributeName(ResizableCharBuilder builder) {
int chr;
while (!buffer.isEmpty) {
chr = buffer.removeFirst();
if (mustStopReadUntilAttributeName(chr)) {
return chr;
}
builder.append((char) chr);
}
return readUntilAttributeNameInternal(builder);
}

int readUntilComment(ResizableCharBuilder builder) {
int chr;
while (!buffer.isEmpty) {
chr = buffer.removeFirst();
if (chr == Characters.HYPHEN_MINUS || chr == Characters.NULL || chr == Characters.EOF) {
return chr;
}
builder.append((char) chr);
}
return readUntilCommentInternal(builder);
}

protected int readUntilInternal(ResizableCharBuilder builder, boolean stopAtAmpersand, boolean stopAtLessThan) {
int chr;
while ((chr = read()) != -1) {
while ((chr = read()) != Characters.EOF) {
if ((stopAtAmpersand && chr == Characters.AMPERSAND) || (stopAtLessThan && chr == Characters.LESSTHAN_SIGN) || chr == Characters.NULL) {
return chr;
}
builder.append((char) chr);
}
return -1;
return Characters.EOF;
}

protected int readUntilAttributeValueInternal(ResizableCharBuilder builder, int quoteChar, boolean stopAtAmpersand) {
int chr;
while ((chr = read()) != -1) {
while ((chr = read()) != Characters.EOF) {
if (chr == quoteChar || (stopAtAmpersand && chr == Characters.AMPERSAND) || chr == Characters.NULL) {
return chr;
}
builder.append((char) chr);
}
return -1;
return Characters.EOF;
}

protected int readUntilAttributeValueUnquotedInternal(ResizableCharBuilder builder) {
int chr;
while ((chr = read()) != -1) {
if (Common.isTabLfFfCrOrSpace(chr) || chr == Characters.AMPERSAND || chr == '>' || chr == Characters.NULL ||
chr == '"' || chr == '\'' || chr == Characters.LESSTHAN_SIGN || chr == '=' || chr == '`') {
while ((chr = read()) != Characters.EOF) {
if (mustStopReadUntilAttributeValueUnquoted(chr)) {
return chr;
}
builder.append((char) chr);
}
return Characters.EOF;
}

protected int readUntilTagNameInternal(ResizableCharBuilder builder) {
int chr;
while ((chr = read()) != Characters.EOF) {
if (Common.isTabLfFfCrOrSpace(chr) || chr == Characters.SOLIDUS || chr == Characters.GREATERTHAN_SIGN || chr == Characters.NULL) {
return chr;
}
builder.append((char) chr);
}
return Characters.EOF;
}

protected int readUntilAttributeNameInternal(ResizableCharBuilder builder) {
int chr;
while ((chr = read()) != Characters.EOF) {
if (mustStopReadUntilAttributeName(chr)) {
return chr;
}
builder.append((char) chr);
}
return Characters.EOF;
}

protected int readUntilCommentInternal(ResizableCharBuilder builder) {
int chr;
while ((chr = read()) != Characters.EOF) {
if (chr == Characters.HYPHEN_MINUS || chr == Characters.NULL) {
return chr;
}
builder.append((char) chr);
}
return -1;
return Characters.EOF;
}

//
Expand Down Expand Up @@ -163,7 +242,7 @@ static class StringProcessedInputStream extends ProcessedInputStream {
// used for test
protected int getCharAt(int pos) {
if (pos >= length) {
return -1;
return Characters.EOF;
}
return input[pos];
}
Expand All @@ -173,7 +252,7 @@ protected int read() {
if (pos < length) {
return input[pos++];
}
return -1;
return Characters.EOF;
}

@Override
Expand All @@ -191,7 +270,7 @@ protected int readUntilInternal(ResizableCharBuilder builder, boolean stopAtAmpe
}
builder.append(input, pos, n - pos);
pos = n;
return -1;
return Characters.EOF;
}

@Override
Expand All @@ -209,7 +288,7 @@ protected int readUntilAttributeValueInternal(ResizableCharBuilder builder, int
}
builder.append(input, pos, n - pos);
pos = n;
return -1;
return Characters.EOF;
}

@Override
Expand All @@ -218,8 +297,61 @@ protected int readUntilAttributeValueUnquotedInternal(ResizableCharBuilder build
int i = pos;
while (i < n) {
char c = input[i];
if (Common.isTabLfFfCrOrSpace(c) || c == Characters.AMPERSAND || c == '>' || c == Characters.NULL ||
c == '"' || c == '\'' || c == Characters.LESSTHAN_SIGN || c == '=' || c == '`') {
if (mustStopReadUntilAttributeValueUnquoted(c)) {
builder.append(input, pos, i - pos);
pos = i + 1;
return c;
}
i++;
}
builder.append(input, pos, n - pos);
pos = n;
return Characters.EOF;
}

@Override
protected int readUntilTagNameInternal(ResizableCharBuilder builder) {
int n = length;
int i = pos;
while (i < n) {
char c = input[i];
if (Common.isTabLfFfCrOrSpace(c) || c == Characters.SOLIDUS || c == Characters.GREATERTHAN_SIGN || c == Characters.NULL) {
builder.append(input, pos, i - pos);
pos = i + 1;
return c;
}
i++;
}
builder.append(input, pos, n - pos);
pos = n;
return Characters.EOF;
}

@Override
protected int readUntilAttributeNameInternal(ResizableCharBuilder builder) {
int n = length;
int i = pos;
while (i < n) {
char c = input[i];
if (mustStopReadUntilAttributeName(c)) {
builder.append(input, pos, i - pos); // append remaining
pos = i + 1;
return c;
}
i++;
}
builder.append(input, pos, n - pos);
pos = n;
return Characters.EOF;
}

@Override
protected int readUntilCommentInternal(ResizableCharBuilder builder) {
int n = length;
int i = pos;
while (i < n) {
char c = input[i];
if (c == '-' || c == Characters.NULL) {
builder.append(input, pos, i - pos);
pos = i + 1;
return c;
Expand All @@ -228,7 +360,7 @@ protected int readUntilAttributeValueUnquotedInternal(ResizableCharBuilder build
}
builder.append(input, pos, n - pos);
pos = n;
return -1;
return Characters.EOF;
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/ch/digitalfondue/jfiveparse/Tokenizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ final class Tokenizer {

// tag related
private Attributes attributes;
private final ResizableCharBuilder currentAttributeName = new ResizableCharBuilder();
final ResizableCharBuilder currentAttributeName = new ResizableCharBuilder();
ResizableCharBuilder currentAttributeValue;
private int currentAttributeQuoteType;
private boolean selfClosing;
Expand All @@ -51,7 +51,7 @@ final class Tokenizer {
private StringBuilder doctypeSystemIdentifier;

// comment related
private ResizableCharBuilder commentToken;
ResizableCharBuilder commentToken;

//
private final ResizableCharBuilder temporaryBuffer = new ResizableCharBuilder();
Expand Down
Loading
Loading