Skip to content

Commit 68dcb36

Browse files
committed
experiment: try to avoid a bunch of rewind calls when skipping newlines
1 parent ec67894 commit 68dcb36

1 file changed

Lines changed: 47 additions & 28 deletions

File tree

src/main/java/org/htmlunit/cyberneko/HTMLScanner.java

Lines changed: 47 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2116,42 +2116,60 @@ int skipNewlines() throws IOException {
21162116
}
21172117
}
21182118
char c = getCurrentChar();
2119+
if (c != '\n' && c != '\r') {
2120+
if (DEBUG_BUFFER) {
2121+
debugBufferIfNeeded(")skipNewlines: ", " -> 0");
2122+
}
2123+
return 0;
2124+
}
2125+
21192126
int newlines = 0;
2120-
if (c == '\n' || c == '\r') {
2121-
do {
2122-
c = getNextChar();
2123-
if (c == '\n') {
2124-
newlines++;
2125-
if (offset_ == length_) {
2126-
offset_ = newlines;
2127-
if (load(newlines) == -1) {
2128-
break;
2129-
}
2127+
do {
2128+
c = getCurrentChar(); // peek, no advance
2129+
if (c == '\n') {
2130+
newlines++;
2131+
// move forward
2132+
offset_++;
2133+
characterOffset_++;
2134+
columnNumber_++;
2135+
2136+
if (offset_ == length_) {
2137+
offset_ = newlines;
2138+
if (load(newlines) == -1) {
2139+
break;
21302140
}
21312141
}
2132-
else if (c == '\r') {
2133-
newlines++;
2134-
if (offset_ == length_) {
2135-
offset_ = newlines;
2136-
if (load(newlines) == -1) {
2137-
break;
2138-
}
2139-
}
2140-
if (getCurrentChar() == '\n') {
2141-
// skip
2142-
offset_++;
2143-
characterOffset_++;
2144-
columnNumber_++;
2142+
}
2143+
else if (c == '\r') {
2144+
newlines++;
2145+
// move forward
2146+
offset_++;
2147+
characterOffset_++;
2148+
columnNumber_++;
2149+
2150+
if (offset_ == length_) {
2151+
offset_ = newlines;
2152+
if (load(newlines) == -1) {
2153+
break;
21452154
}
21462155
}
2147-
else {
2148-
rewind();
2149-
break;
2156+
2157+
// \r\n pair: consume the \n
2158+
if (getCurrentChar() == '\n') {
2159+
// move forward
2160+
offset_++;
2161+
characterOffset_++;
2162+
columnNumber_++;
21502163
}
21512164
}
2152-
while (offset_ < length_ - 1);
2153-
incLine(newlines);
2165+
else {
2166+
break;
2167+
}
21542168
}
2169+
while (offset_ < length_ - 1);
2170+
2171+
incLine(newlines);
2172+
21552173
if (DEBUG_BUFFER) {
21562174
debugBufferIfNeeded(")skipNewlines: ", " -> " + newlines);
21572175
}
@@ -2178,6 +2196,7 @@ else if (lastChar == '\r') {
21782196
}
21792197
}
21802198
if (getCurrentChar() == '\n') {
2199+
// move forward
21812200
offset_++;
21822201
characterOffset_++;
21832202
columnNumber_++;

0 commit comments

Comments
 (0)