Skip to content

Commit 812dd09

Browse files
committed
reuse the plaintext scanner (like the other scanners) and avoid toLowerCase if not needed
1 parent 6ac9284 commit 812dd09

1 file changed

Lines changed: 18 additions & 7 deletions

File tree

src/main/java/org/htmlunit/cyberneko/HTMLScanner.java

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,11 @@ public class HTMLScanner implements XMLDocumentSource, XMLLocator, HTMLComponent
493493
*/
494494
protected final ScriptScanner fScriptScanner = new ScriptScanner();
495495

496+
/**
497+
* Special scanner used script tags.
498+
*/
499+
protected final PlainTextScanner fPlainTextScanner = new PlainTextScanner();
500+
496501
// temp vars
497502

498503
/** String buffer. */
@@ -614,8 +619,9 @@ public void evaluateInputSource(final XMLInputSource inputSource) {
614619
catch (final IOException e) {
615620
// ignore
616621
}
622+
617623
// preserve the plaintext scanning process
618-
setScanner(fScanner instanceof PlainTextScanner ? new PlainTextScanner() : previousScanner);
624+
setScanner(fScanner == fPlainTextScanner ? fPlainTextScanner : previousScanner);
619625
setScannerState(previousScannerState);
620626
fCurrentEntity = previousEntity;
621627
}
@@ -924,7 +930,7 @@ public void setInputSource(final XMLInputSource source) throws IOException {
924930
setScanner(fScriptScanner);
925931
}
926932
else if ("plaintext".equals(scannerTagLC)) {
927-
setScanner(new PlainTextScanner());
933+
setScanner(fPlainTextScanner);
928934
}
929935
else {
930936
setScanner(fSpecialScanner.setElementName(fFragmentSpecialScannerTag_, scannerTagLC));
@@ -2392,7 +2398,7 @@ else if (c == '/') {
23922398
final String enameLC;
23932399
if (SCAN_TRUE == scanStartElement) {
23942400
ename = scanStartElement_;
2395-
enameLC = ename.toLowerCase(Locale.ROOT);
2401+
enameLC = (fNamesElems == NAMES_LOWERCASE) ? ename : ename.toLowerCase(Locale.ROOT);
23962402
}
23972403
else {
23982404
ename = null;
@@ -2430,7 +2436,7 @@ else if ("title".equals(enameLC)
24302436
setScannerState(STATE_CONTENT);
24312437
}
24322438
else if ("plaintext".equals(enameLC)) {
2433-
setScanner(new PlainTextScanner());
2439+
setScanner(fPlainTextScanner);
24342440
}
24352441
else if (ename != null) {
24362442
final Element elem =
@@ -3000,7 +3006,9 @@ protected int scanStartElement(final boolean[] empty) throws IOException {
30003006
fBeginCharacterOffset = beginCharacterOffset;
30013007
if (fElementDepth == -1) {
30023008
if (fByteStream != null) {
3003-
final String enameLC = scanStartElement_.toLowerCase(Locale.ROOT);
3009+
final String enameLC = (fNamesElems == NAMES_LOWERCASE)
3010+
? scanStartElement_
3011+
: scanStartElement_.toLowerCase(Locale.ROOT);
30043012

30053013
if (!fIgnoreSpecifiedCharset_ && "meta".equals(enameLC)) {
30063014
if (DEBUG_CHARSET) {
@@ -3053,7 +3061,9 @@ else if ("body".equals(enameLC)) {
30533061
if (DEBUG_CALLBACKS) {
30543062
System.out.println("startElement(" + qName_ + ',' + attributes_ + ")");
30553063
}
3056-
if (empty[0] && !"br".equalsIgnoreCase(scanStartElement_)) {
3064+
if (empty[0] && !(fNamesElems == NAMES_LOWERCASE
3065+
? "br".equals(scanStartElement_)
3066+
: "br".equalsIgnoreCase(scanStartElement_))) {
30573067
fDocumentHandler.emptyElement(qName_, attributes_, locationAugs(fCurrentEntity));
30583068
}
30593069
else {
@@ -3664,11 +3674,12 @@ public class PlainTextScanner implements Scanner {
36643674

36653675
@Override
36663676
public int scan(final boolean complete) throws IOException {
3677+
xmlString_.clear();
36673678
scanCharacters(xmlString_, complete);
36683679
return SCAN_FALSE;
36693680
}
36703681

3671-
protected void scanCharacters(final XMLString buffer, final boolean complete) throws IOException {
3682+
private void scanCharacters(final XMLString buffer, final boolean complete) throws IOException {
36723683
while (true) {
36733684
final int c = fCurrentEntity.read();
36743685

0 commit comments

Comments
 (0)