1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
|
package org.jsoup.nodes;
import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
/**
A text node.
@author Jonathan Hedley, jonathan@hedley.net */
public class TextNode extends Node {
/*
TextNode is a node, and so by default comes with attributes and children. The attributes are seldom used, but use
memory, and the child nodes are never used. So we don't have them, and override accessors to attributes to create
them as needed on the fly.
*/
private static final String TEXT_KEY = "text";
String text;
/**
Create a new TextNode representing the supplied (unencoded) text).
@param text raw text
@param baseUri base uri
@see #createFromEncoded(String, String)
*/
public TextNode(String text, String baseUri) {
this.baseUri = baseUri;
this.text = text;
}
public String nodeName() {
return "#text";
}
/**
* Get the text content of this text node.
* @return Unencoded, normalised text.
* @see TextNode#getWholeText()
*/
public String text() {
return normaliseWhitespace(getWholeText());
}
/**
* Set the text content of this text node.
* @param text unencoded text
* @return this, for chaining
*/
public TextNode text(String text) {
this.text = text;
if (attributes != null)
attributes.put(TEXT_KEY, text);
return this;
}
/**
Get the (unencoded) text of this text node, including any newlines and spaces present in the original.
@return text
*/
public String getWholeText() {
return attributes == null ? text : attributes.get(TEXT_KEY);
}
/**
Test if this text node is blank -- that is, empty or only whitespace (including newlines).
@return true if this document is empty or only whitespace, false if it contains any text content.
*/
public boolean isBlank() {
return StringUtil.isBlank(getWholeText());
}
/**
* Split this text node into two nodes at the specified string offset. After splitting, this node will contain the
* original text up to the offset, and will have a new text node sibling containing the text after the offset.
* @param offset string offset point to split node at.
* @return the newly created text node containing the text after the offset.
*/
public TextNode splitText(int offset) {
Validate.isTrue(offset >= 0, "Split offset must be not be negative");
Validate.isTrue(offset < text.length(), "Split offset must not be greater than current text length");
String head = getWholeText().substring(0, offset);
String tail = getWholeText().substring(offset);
text(head);
TextNode tailNode = new TextNode(tail, this.baseUri());
if (parent() != null)
parent().addChildren(siblingIndex()+1, tailNode);
return tailNode;
}
void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
String html = Entities.escape(getWholeText(), out);
if (out.prettyPrint() && parent() instanceof Element && !((Element) parent()).preserveWhitespace()) {
html = normaliseWhitespace(html);
}
if (out.prettyPrint() && siblingIndex() == 0 && parentNode instanceof Element && ((Element) parentNode).tag().formatAsBlock() && !isBlank())
indent(accum, depth, out);
accum.append(html);
}
void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {}
public String toString() {
return outerHtml();
}
/**
* Create a new TextNode from HTML encoded (aka escaped) data.
* @param encodedText Text containing encoded HTML (e.g. &lt;)
* @return TextNode containing unencoded data (e.g. <)
*/
public static TextNode createFromEncoded(String encodedText, String baseUri) {
String text = Entities.unescape(encodedText);
return new TextNode(text, baseUri);
}
static String normaliseWhitespace(String text) {
text = StringUtil.normaliseWhitespace(text);
return text;
}
static String stripLeadingWhitespace(String text) {
return text.replaceFirst("^\\s+", "");
}
static boolean lastCharIsWhitespace(StringBuilder sb) {
return sb.length() != 0 && sb.charAt(sb.length() - 1) == ' ';
}
// attribute fiddling. create on first access.
private void ensureAttributes() {
if (attributes == null) {
attributes = new Attributes();
attributes.put(TEXT_KEY, text);
}
}
@Override
public String attr(String attributeKey) {
ensureAttributes();
return super.attr(attributeKey);
}
@Override
public Attributes attributes() {
ensureAttributes();
return super.attributes();
}
@Override
public Node attr(String attributeKey, String attributeValue) {
ensureAttributes();
return super.attr(attributeKey, attributeValue);
}
@Override
public boolean hasAttr(String attributeKey) {
ensureAttributes();
return super.hasAttr(attributeKey);
}
@Override
public Node removeAttr(String attributeKey) {
ensureAttributes();
return super.removeAttr(attributeKey);
}
@Override
public String absUrl(String attributeKey) {
ensureAttributes();
return super.absUrl(attributeKey);
}
}
|