org.jsoup.nodes.TextNode#getWholeText ( )源码实例Demo

下面列出了org.jsoup.nodes.TextNode#getWholeText ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: astor   文件: Cleaner.java
public void head(Node source, int depth) {
    if (source instanceof Element) {
        Element sourceEl = (Element) source;

        if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
            ElementMeta meta = createSafeElement(sourceEl);
            Element destChild = meta.el;
            destination.appendChild(destChild);

            numDiscarded += meta.numAttribsDiscarded;
            destination = destChild;
        } else if (source != root) { // not a safe tag, so don't add. don't count root against discarded.
            numDiscarded++;
        }
    } else if (source instanceof TextNode) {
        TextNode sourceText = (TextNode) source;
        TextNode destText = new TextNode(sourceText.getWholeText());
        destination.appendChild(destText);
    } else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) {
      DataNode sourceData = (DataNode) source;
      DataNode destData = new DataNode(sourceData.getWholeData());
      destination.appendChild(destData);
    } else { // else, we don't care about comments, xml proc instructions, etc
        numDiscarded++;
    }
}
 
源代码2 项目: astor   文件: Cleaner.java
public void head(Node source, int depth) {
    if (source instanceof Element) {
        Element sourceEl = (Element) source;

        if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
            ElementMeta meta = createSafeElement(sourceEl);
            Element destChild = meta.el;
            destination.appendChild(destChild);

            numDiscarded += meta.numAttribsDiscarded;
            destination = destChild;
        } else if (source != root) { // not a safe tag, so don't add. don't count root against discarded.
            numDiscarded++;
        }
    } else if (source instanceof TextNode) {
        TextNode sourceText = (TextNode) source;
        TextNode destText = new TextNode(sourceText.getWholeText());
        destination.appendChild(destText);
    } else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) {
      DataNode sourceData = (DataNode) source;
      DataNode destData = new DataNode(sourceData.getWholeData());
      destination.appendChild(destData);
    } else { // else, we don't care about comments, xml proc instructions, etc
        numDiscarded++;
    }
}
 
源代码3 项目: FairEmail   文件: HtmlHelper.java
static boolean truncate(Document d, boolean reformat) {
    int max = (reformat ? MAX_FORMAT_TEXT_SIZE : MAX_FULL_TEXT_SIZE);

    int length = 0;
    int images = 0;
    for (Element elm : d.select("*")) {
        if ("img".equals(elm.tagName()))
            images++;

        boolean skip = false;
        for (Node child : elm.childNodes()) {
            if (child instanceof TextNode) {
                TextNode tnode = ((TextNode) child);
                String text = tnode.getWholeText();

                if (length < max) {
                    if (length + text.length() >= max) {
                        text = text.substring(0, max - length) + " ...";
                        tnode.text(text);
                        skip = true;
                    }
                } else {
                    if (skip)
                        tnode.text("");
                }

                length += text.length();
            }
        }

        if (length >= max && !skip)
            elm.remove();
    }

    Log.i("Message size=" + length + " images=" + images);

    return (length >= max);
}
 
源代码4 项目: baleen   文件: DocumentToJCasConverter.java
/**
 * Map a node to text.
 *
 * @param node the node
 * @return the string
 */
private String mapToText(final Node node) {
  if (node instanceof TextNode) {
    final TextNode t = (TextNode) node;
    return t.getWholeText();
  } else {
    return null;
  }
}
 
源代码5 项目: james-project   文件: JsoupHtmlTextExtractor.java
private String convertNodeToText(HTMLNode htmlNode) {
    Node node = htmlNode.underlyingNode;
    if (node instanceof TextNode) {
        TextNode textNode = (TextNode) node;
        return textNode.getWholeText();
    }
    if (node instanceof Element) {
        Element element = (Element) node;
        if (element.tagName().equals(BR_TAG)) {
            return "\n";
        }
        if (isList(element)) {
            return convertListElement(htmlNode.listNestedLevel);
        }
        if (element.tagName().equals(OL_TAG)) {
            return "\n\n";
        }
        if (element.tagName().equals(LI_TAG)) {
            return "\n" + StringUtils.repeat(" ", htmlNode.listNestedLevel) + "- ";
        }
        if (element.tagName().equals(P_TAG)) {
            return "\n\n";
        }
        if (element.tagName().equals(IMG_TAG)) {
            return generateImageAlternativeText(element);
        }
    }
    return "";
}
 
源代码6 项目: storm-crawler   文件: TextExtractor.java
private static void appendNormalisedText(StringBuilder accum,
        TextNode textNode) {
    String text = textNode.getWholeText();

    if (preserveWhitespace(textNode.parent())
            || textNode instanceof CDataNode)
        accum.append(text);
    else
        StringUtil.appendNormalisedWhitespace(accum, text,
                lastCharIsWhitespace(accum));
}
 
源代码7 项目: eagle   文件: HiveJobFetchSpout.java
private boolean fetchFinishedConfig(AppInfo appInfo, List<MRJob> mrJobs) {
    InputStream is = null;
    for (MRJob mrJob : mrJobs) {
        String urlString = crawlConfig.endPointConfig.HSBasePath + "jobhistory/conf/" + mrJob.getId() + "?" + Constants.ANONYMOUS_PARAMETER;
        try {
            LOG.info("fetch job conf from {}", urlString);
            is = InputStreamUtils.getInputStream(urlString, null, Constants.CompressionType.NONE);
            final org.jsoup.nodes.Document doc = Jsoup.parse(is, "UTF-8", urlString);
            doc.outputSettings().prettyPrint(false);
            org.jsoup.select.Elements elements = doc.select("table[id=conf]").select("tbody").select("tr");
            Map<String, String> hiveQueryLog = new HashMap<>();
            Iterator<org.jsoup.nodes.Element> iter = elements.iterator();
            while (iter.hasNext()) {
                org.jsoup.nodes.Element element = iter.next();
                org.jsoup.select.Elements tds = element.children();
                String key = tds.get(0).text();
                String value = "";
                org.jsoup.nodes.Element valueElement = tds.get(1);
                if (Constants.HIVE_QUERY_STRING.equals(key)) {
                    for (org.jsoup.nodes.Node child : valueElement.childNodes()) {
                        if (child instanceof TextNode) {
                            TextNode valueTextNode = (TextNode) child;
                            value = valueTextNode.getWholeText();
                            value = StringUtils.strip(value);
                        }
                    }
                } else {
                    value = valueElement.text();
                }
                hiveQueryLog.put(key, value);
            }
            if (hiveQueryLog.containsKey(Constants.HIVE_QUERY_STRING)) {
                collector.emit(new ValuesArray(appInfo.getUser(), mrJob.getId(), Constants.ResourceType.JOB_CONFIGURATION, hiveQueryLog), mrJob.getId());
            }
        } catch (Exception e) {
            LOG.warn("fetch job conf from {} failed, {}", urlString, e);
            e.printStackTrace();
            return false;
        } finally {
            Utils.closeInputStream(is);
        }
    }
    return true;
}
 
 方法所在类
 同类方法