下面列出了org.jsoup.nodes.TextNode#getWholeText ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
public void head(Node source, int depth) {
if (source instanceof Element) {
Element sourceEl = (Element) source;
if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
ElementMeta meta = createSafeElement(sourceEl);
Element destChild = meta.el;
destination.appendChild(destChild);
numDiscarded += meta.numAttribsDiscarded;
destination = destChild;
} else if (source != root) { // not a safe tag, so don't add. don't count root against discarded.
numDiscarded++;
}
} else if (source instanceof TextNode) {
TextNode sourceText = (TextNode) source;
TextNode destText = new TextNode(sourceText.getWholeText());
destination.appendChild(destText);
} else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) {
DataNode sourceData = (DataNode) source;
DataNode destData = new DataNode(sourceData.getWholeData());
destination.appendChild(destData);
} else { // else, we don't care about comments, xml proc instructions, etc
numDiscarded++;
}
}
public void head(Node source, int depth) {
if (source instanceof Element) {
Element sourceEl = (Element) source;
if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
ElementMeta meta = createSafeElement(sourceEl);
Element destChild = meta.el;
destination.appendChild(destChild);
numDiscarded += meta.numAttribsDiscarded;
destination = destChild;
} else if (source != root) { // not a safe tag, so don't add. don't count root against discarded.
numDiscarded++;
}
} else if (source instanceof TextNode) {
TextNode sourceText = (TextNode) source;
TextNode destText = new TextNode(sourceText.getWholeText());
destination.appendChild(destText);
} else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) {
DataNode sourceData = (DataNode) source;
DataNode destData = new DataNode(sourceData.getWholeData());
destination.appendChild(destData);
} else { // else, we don't care about comments, xml proc instructions, etc
numDiscarded++;
}
}
static boolean truncate(Document d, boolean reformat) {
int max = (reformat ? MAX_FORMAT_TEXT_SIZE : MAX_FULL_TEXT_SIZE);
int length = 0;
int images = 0;
for (Element elm : d.select("*")) {
if ("img".equals(elm.tagName()))
images++;
boolean skip = false;
for (Node child : elm.childNodes()) {
if (child instanceof TextNode) {
TextNode tnode = ((TextNode) child);
String text = tnode.getWholeText();
if (length < max) {
if (length + text.length() >= max) {
text = text.substring(0, max - length) + " ...";
tnode.text(text);
skip = true;
}
} else {
if (skip)
tnode.text("");
}
length += text.length();
}
}
if (length >= max && !skip)
elm.remove();
}
Log.i("Message size=" + length + " images=" + images);
return (length >= max);
}
/**
* Map a node to text.
*
* @param node the node
* @return the string
*/
private String mapToText(final Node node) {
if (node instanceof TextNode) {
final TextNode t = (TextNode) node;
return t.getWholeText();
} else {
return null;
}
}
private String convertNodeToText(HTMLNode htmlNode) {
Node node = htmlNode.underlyingNode;
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
return textNode.getWholeText();
}
if (node instanceof Element) {
Element element = (Element) node;
if (element.tagName().equals(BR_TAG)) {
return "\n";
}
if (isList(element)) {
return convertListElement(htmlNode.listNestedLevel);
}
if (element.tagName().equals(OL_TAG)) {
return "\n\n";
}
if (element.tagName().equals(LI_TAG)) {
return "\n" + StringUtils.repeat(" ", htmlNode.listNestedLevel) + "- ";
}
if (element.tagName().equals(P_TAG)) {
return "\n\n";
}
if (element.tagName().equals(IMG_TAG)) {
return generateImageAlternativeText(element);
}
}
return "";
}
private static void appendNormalisedText(StringBuilder accum,
TextNode textNode) {
String text = textNode.getWholeText();
if (preserveWhitespace(textNode.parent())
|| textNode instanceof CDataNode)
accum.append(text);
else
StringUtil.appendNormalisedWhitespace(accum, text,
lastCharIsWhitespace(accum));
}
private boolean fetchFinishedConfig(AppInfo appInfo, List<MRJob> mrJobs) {
InputStream is = null;
for (MRJob mrJob : mrJobs) {
String urlString = crawlConfig.endPointConfig.HSBasePath + "jobhistory/conf/" + mrJob.getId() + "?" + Constants.ANONYMOUS_PARAMETER;
try {
LOG.info("fetch job conf from {}", urlString);
is = InputStreamUtils.getInputStream(urlString, null, Constants.CompressionType.NONE);
final org.jsoup.nodes.Document doc = Jsoup.parse(is, "UTF-8", urlString);
doc.outputSettings().prettyPrint(false);
org.jsoup.select.Elements elements = doc.select("table[id=conf]").select("tbody").select("tr");
Map<String, String> hiveQueryLog = new HashMap<>();
Iterator<org.jsoup.nodes.Element> iter = elements.iterator();
while (iter.hasNext()) {
org.jsoup.nodes.Element element = iter.next();
org.jsoup.select.Elements tds = element.children();
String key = tds.get(0).text();
String value = "";
org.jsoup.nodes.Element valueElement = tds.get(1);
if (Constants.HIVE_QUERY_STRING.equals(key)) {
for (org.jsoup.nodes.Node child : valueElement.childNodes()) {
if (child instanceof TextNode) {
TextNode valueTextNode = (TextNode) child;
value = valueTextNode.getWholeText();
value = StringUtils.strip(value);
}
}
} else {
value = valueElement.text();
}
hiveQueryLog.put(key, value);
}
if (hiveQueryLog.containsKey(Constants.HIVE_QUERY_STRING)) {
collector.emit(new ValuesArray(appInfo.getUser(), mrJob.getId(), Constants.ResourceType.JOB_CONFIGURATION, hiveQueryLog), mrJob.getId());
}
} catch (Exception e) {
LOG.warn("fetch job conf from {} failed, {}", urlString, e);
e.printStackTrace();
return false;
} finally {
Utils.closeInputStream(is);
}
}
return true;
}