1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
package org.jsoup.parser;
import org.jsoup.helper.DescendableLinkedList;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.util.ArrayList;
import java.util.List;
/**
* @author Jonathan Hedley
*/
abstract class TreeBuilder {
CharacterReader reader;
Tokeniser tokeniser;
protected Document doc; // current doc we are building into
protected DescendableLinkedList<Element> stack; // the stack of open elements
protected String baseUri; // current base uri, for creating new elements
protected Token currentToken; // currentToken is used only for error tracking.
protected ParseErrorList errors; // null when not tracking errors
protected void initialiseParse(String input, String baseUri, ParseErrorList errors) {
Validate.notNull(input, "String input must not be null");
Validate.notNull(baseUri, "BaseURI must not be null");
doc = new Document(baseUri);
reader = new CharacterReader(input);
this.errors = errors;
tokeniser = new Tokeniser(reader, errors);
stack = new DescendableLinkedList<Element>();
this.baseUri = baseUri;
}
Document parse(String input, String baseUri) {
return parse(input, baseUri, ParseErrorList.noTracking());
}
Document parse(String input, String baseUri, ParseErrorList errors) {
initialiseParse(input, baseUri, errors);
runParser();
return doc;
}
protected void runParser() {
while (true) {
Token token = tokeniser.read();
process(token);
if (token.type == Token.TokenType.EOF)
break;
}
}
protected abstract boolean process(Token token);
protected Element currentElement() {
return stack.getLast();
}
}
|