/
TreeBuilder.java
145 lines (122 loc) · 4.6 KB
/
TreeBuilder.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
package org.jsoup.parser;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import javax.annotation.Nullable;
import javax.annotation.ParametersAreNonnullByDefault;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
/**
* @author Jonathan Hedley
*/
abstract class TreeBuilder {
protected Parser parser;
CharacterReader reader;
Tokeniser tokeniser;
protected Document doc; // current doc we are building into
protected ArrayList<Element> stack; // the stack of open elements
protected String baseUri; // current base uri, for creating new elements
protected Token currentToken; // currentToken is used only for error tracking.
protected ParseSettings settings;
private Token.StartTag start = new Token.StartTag(); // start tag to process
private Token.EndTag end = new Token.EndTag();
abstract ParseSettings defaultSettings();
@ParametersAreNonnullByDefault
protected void initialiseParse(Reader input, String baseUri, Parser parser) {
Validate.notNull(input, "String input must not be null");
Validate.notNull(baseUri, "BaseURI must not be null");
Validate.notNull(parser);
doc = new Document(baseUri);
doc.parser(parser);
this.parser = parser;
settings = parser.settings();
reader = new CharacterReader(input);
currentToken = null;
tokeniser = new Tokeniser(reader, parser.getErrors());
stack = new ArrayList<>(32);
this.baseUri = baseUri;
}
@ParametersAreNonnullByDefault
Document parse(Reader input, String baseUri, Parser parser) {
initialiseParse(input, baseUri, parser);
runParser();
// tidy up - as the Parser and Treebuilder are retained in document for settings / fragments
reader.close();
reader = null;
tokeniser = null;
stack = null;
return doc;
}
/**
Create a new copy of this TreeBuilder
@return copy, ready for a new parse
*/
abstract TreeBuilder newInstance();
abstract List<Node> parseFragment(String inputFragment, Element context, String baseUri, Parser parser);
protected void runParser() {
final Tokeniser tokeniser = this.tokeniser;
final Token.TokenType eof = Token.TokenType.EOF;
while (true) {
Token token = tokeniser.read();
process(token);
token.reset();
if (token.type == eof)
break;
}
}
protected abstract boolean process(Token token);
protected boolean processStartTag(String name) {
final Token.StartTag start = this.start;
if (currentToken == start) { // don't recycle an in-use token
return process(new Token.StartTag().name(name));
}
return process(start.reset().name(name));
}
public boolean processStartTag(String name, Attributes attrs) {
final Token.StartTag start = this.start;
if (currentToken == start) { // don't recycle an in-use token
return process(new Token.StartTag().nameAttr(name, attrs));
}
start.reset();
start.nameAttr(name, attrs);
return process(start);
}
protected boolean processEndTag(String name) {
if (currentToken == end) { // don't recycle an in-use token
return process(new Token.EndTag().name(name));
}
return process(end.reset().name(name));
}
@Nullable protected Element currentElement() {
int size = stack.size();
return size > 0 ? stack.get(size-1) : null;
}
/**
Checks if the Current Element's normal name equals the supplied name.
@param normalName name to check
@return true if there is a current element on the stack, and its name equals the supplied
*/
protected boolean currentElementIs(String normalName) {
Element current = currentElement();
return current != null && current.normalName().equals(normalName);
}
/**
* If the parser is tracking errors, add an error at the current position.
* @param msg error message
*/
protected void error(String msg) {
ParseErrorList errors = parser.getErrors();
if (errors.canAddError())
errors.add(new ParseError(reader.pos(), msg));
}
/**
(An internal method, visible for Element. For HTML parse, signals that script and style text should be treated as
Data Nodes).
*/
protected boolean isContentForTagData(String normalName) {
return false;
}
}