下面列出了org.jsoup.nodes.Comment#org.jsoup.nodes.TextNode 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
@Override
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode) {
append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
} else if (name.equals("ul")) {
listNesting++;
} else if (name.equals("li")) {
append("\n ");
for (int i = 1; i < listNesting; i++) {
append(" ");
}
if (listNesting == 1) {
append("* ");
} else {
append("- ");
}
} else if (name.equals("dt")) {
append(" ");
} else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) {
append("\n");
}
}
private void generateTextContent(StringBuilder sb, List<Node> nodes) {
for (Node node : nodes) {
if (node instanceof Element) {
Element element = (Element) node;
switch (element.nodeName()) {
case "a":
sb.append(getTextLink(element));
break;
case "br":
sb.append('\n');
break;
default:
generateTextContent(sb, element.childNodes());
break;
}
} else if (node instanceof TextNode) {
sb.append(((TextNode) node).getWholeText());
}
}
}
@Override
protected String getChapterContent(BookChapterBean chapter) throws Exception {
Resource resource = epubBook.getResources().getByHref(chapter.getDurChapterUrl());
StringBuilder content = new StringBuilder();
Document doc = Jsoup.parse(new String(resource.getData(), mCharset));
Elements elements = doc.getAllElements();
for (Element element : elements) {
List<TextNode> contentEs = element.textNodes();
for (int i = 0; i < contentEs.size(); i++) {
String text = contentEs.get(i).text().trim();
text = StringUtils.formatHtml(text);
if (elements.size() > 1) {
if (text.length() > 0) {
if (content.length() > 0) {
content.append("\r\n");
}
content.append("\u3000\u3000").append(text);
}
} else {
content.append(text);
}
}
}
return content.toString();
}
private static String _getText(Document d, boolean full) {
truncate(d, !full);
for (Element bq : d.select("blockquote")) {
bq.prependChild(new TextNode("["));
bq.appendChild(new TextNode("]"));
}
String text = d.text();
if (full)
return text;
String preview = text.substring(0, Math.min(text.length(), PREVIEW_SIZE));
if (preview.length() < text.length())
preview += "…";
return preview;
}
@Override
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode) {
append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
} else if (name.equals("ul")) {
listNesting++;
} else if (name.equals("li")) {
append("\n ");
for (int i = 1; i < listNesting; i++) {
append(" ");
}
if (listNesting == 1) {
append("* ");
} else {
append("- ");
}
} else if (name.equals("dt")) {
append(" ");
} else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) {
append("\n");
}
}
@Override
protected String getChapterContent(BookChapterBean chapter) throws Exception {
Resource resource = epubBook.getResources().getByHref(chapter.getDurChapterUrl());
StringBuilder content = new StringBuilder();
Document doc = Jsoup.parse(new String(resource.getData(), mCharset));
Elements elements = doc.getAllElements();
for (Element element : elements) {
List<TextNode> contentEs = element.textNodes();
for (int i = 0; i < contentEs.size(); i++) {
String text = contentEs.get(i).text().trim();
text = StringUtils.formatHtml(text);
if (elements.size() > 1) {
if (text.length() > 0) {
if (content.length() > 0) {
content.append("\r\n");
}
content.append("\u3000\u3000").append(text);
}
} else {
content.append(text);
}
}
}
return content.toString();
}
private ArrayList<Collection> getBellowCollections(Document doc){
ArrayList<Collection> collections = new ArrayList<>();
Elements elements = doc.getElementsByClass(
"d-flex border-bottom border-gray-light pb-4 mb-5");
for (Element element : elements) {
Element titleElement = element.select("div > h2 > a").first();
Element descElement = element.select("div").last();
String id = titleElement.attr("href");
id = id.substring(id.lastIndexOf("/") + 1);
String title = titleElement.textNodes().get(0).toString();
List<TextNode> descTextNodes = descElement.textNodes();
int descIndex = descTextNodes.size() == 0 ? 0 : descTextNodes.size() - 1;
String desc = descTextNodes.get(descIndex).toString().trim();
Collection collection = new Collection(id, title, desc);
collections.add(collection);
}
return collections;
}
private static void readNodes(List<Node> nodeList, List<String> textList)
{
String tempText;
for(Node node : nodeList)
{
if(node.childNodeSize()>0)
{
readNodes(node.childNodes(), textList);
}
else
{
if(node.nodeName().equals("#text"))
{
tempText=((TextNode) node).getWholeText();
tempText=newline.matcher(tempText).replaceAll("");
if(!tempText.isEmpty())
textList.add(tempText);
}
}
}
}
private static void readNodesWithTags(List<Node> nodeList, List<Map.Entry<String,String>> textListMap, String tag)
{
for(Node node : nodeList)
{
if(node.childNodeSize()>0)
{
readNodesWithTags(node.childNodes(), textListMap, node.nodeName());
}
else
{
if(node.nodeName().equals("#text"))
{
if(tag.equalsIgnoreCase("body"))
tag="p";
textListMap.add(new AbstractMap.SimpleEntry<String,String>(tag, ((TextNode) node).getWholeText() ));
}
}
}
}
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
for (Node child : e.childNodes()) {
if (unlikely(child)) {
continue;
}
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
String txt = textNode.text();
accum.append(txt);
} else if (child instanceof Element) {
Element element = (Element) child;
if (accum.length() > 0 && element.isBlock()
&& !lastCharIsWhitespace(accum))
accum.append(' ');
else if (element.tagName().equals("br"))
accum.append(' ');
appendTextSkipHidden(element, accum, indent + 1);
}
}
}
public void initRawInfo()
{
StringBuilder sb = new StringBuilder();
for (Node n : this) {
// NodeHelper.cleanEmptyElements(n);
if (n instanceof TextNode) {
this.setTagName(getPath(n));
String nodeRawText = ((TextNode) n).text();
sb.append(Utils.normalizeBreaks(nodeRawText).trim());
if (NodeHelper.isLink(n)) {
charsCountInLinks += nodeRawText.length();
}
}
}
rawText = sb.toString();
}
public String getPath(Node n)
{
String nodePath = "";
while (n != null) {
if (n instanceof TextNode) {
n = n.parent();
}
if (NodeHelper.isInnerText(n)) {
n = n.parent();
}
String parentNodeName = n.nodeName();
nodePath = parentNodeName + "." + nodePath;
if (!parentNodeName.equalsIgnoreCase("html")) {
n = n.parent();
}
else {
break;
}
}
return nodePath;
}
@Override
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode) {
append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
} else if (name.equals("ul")) {
listNesting++;
} else if (name.equals("li")) {
append("\n ");
for (int i = 1; i < listNesting; i++) {
append(" ");
}
if (listNesting == 1) {
append("* ");
} else {
append("- ");
}
} else if (name.equals("dt")) {
append(" ");
} else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) {
append("\n");
}
}
private static void renderAsPlaintext(Node node, StringBuilder out) {
if (node instanceof TextNode) {
String text = ((TextNode) node).text();
if (out.length() == 0 || endsWithWhitespace(out)) {
text = trimLeft(text);
}
out.append(text);
return;
}
if (node instanceof Element) {
Element e = (Element) node;
if (e.tagName().equals("p") || e.tagName().equals("br")) {
trimRight(out);
if (out.length() > 0) {
out.append(PARAGRAPH_SEPARATOR);
}
}
for (Node child : e.childNodes()) {
renderAsPlaintext(child, out);
}
}
}
@Override
public String operate(Element element) {
int index = 0;
StringBuilder accum = new StringBuilder();
for (Node node : element.childNodes()) {
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
if (group == 0) {
accum.append(textNode.text());
} else if (++index == group) {
return textNode.text();
}
}
}
return accum.toString();
}
@Override
public String buildTextFromElement(Element element) {
StringBuilder elementText = new StringBuilder();
if (element.hasAttr(ALT_ATTR)) {
elementText.append(SPACER);
elementText.append(altAttrTextBuilder.buildTextFromElement(element));
}
for (Node child : element.childNodes()) {
if (child instanceof TextNode && !((TextNode)child).isBlank()) {
elementText.append(SPACER);
elementText.append(StringUtils.trim(((TextNode)child).text()));
} else if (child instanceof Element){
elementText.append(SPACER);
elementText.append(buildTextFromElement((Element)child));
}
}
return StringUtils.trim(elementText.toString());
}
/**
* Converts the given element and its children to a JSoup node with
* children.
*
* @param document
* A JSoup document
* @param element
* The element to convert
* @return A JSoup node containing the converted element
*/
public static Node toJsoup(Document document, Element element) {
if (element.isTextNode()) {
return new TextNode(element.getText(), document.baseUri());
}
org.jsoup.nodes.Element target = document
.createElement(element.getTag());
if (element.hasProperty("innerHTML")) {
target.html((String) element.getPropertyRaw("innerHTML"));
}
element.getAttributeNames().forEach(name -> {
String attributeValue = element.getAttribute(name);
if ("".equals(attributeValue)) {
target.attr(name, true);
} else {
target.attr(name, attributeValue);
}
});
element.getChildren()
.forEach(child -> target.appendChild(toJsoup(document, child)));
return target;
}
/**
* Converts a given JSoup {@link org.jsoup.nodes.Node} and its children into
* a matching {@link com.vaadin.flow.dom.Element} hierarchy.
* <p>
* Only nodes of type {@link org.jsoup.nodes.TextNode} and
* {@link org.jsoup.nodes.Element} are converted - other node types return
* an empty optional.
*
* @param node
* JSoup node to convert
* @return element with the matching hierarchy as the given node, or empty
*/
public static Optional<Element> fromJsoup(Node node) {
Element ret;
if (node instanceof TextNode) {
return Optional.of(Element.createText(((TextNode) node).text()));
} else if (node instanceof org.jsoup.nodes.Element) {
ret = new Element(((org.jsoup.nodes.Element)node).tagName());
} else {
LoggerFactory.getLogger(ElementUtil.class).error(
"Could not convert a {}, '{}' into {}!",
Node.class.getName(), node, Element.class.getName());
return Optional.empty();
}
node.attributes().asList().forEach(attribute -> ret
.setAttribute(attribute.getKey(), attribute.getValue()));
List<Node> childNodes = node.childNodes();
if (!childNodes.isEmpty()) {
childNodes.forEach(
child -> fromJsoup(child).ifPresent(ret::appendChild));
}
return Optional.of(ret);
}
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
for (Node child : e.childNodes()) {
if (unlikely(child)) {
continue;
}
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
String txt = textNode.text();
accum.append(txt);
} else if (child instanceof Element) {
Element element = (Element) child;
if (accum.length() > 0 && element.isBlock()
&& !lastCharIsWhitespace(accum))
accum.append(' ');
else if (element.tagName().equals("br"))
accum.append(' ');
appendTextSkipHidden(element, accum, indent + 1);
}
}
}
public static String autoDigest(String str, int size) {
StringBuilder sb = new StringBuilder();
Document document = Jsoup.parseBodyFragment(str);
List<Node> allTextNode = new ArrayList<>();
getAllTextNode(document.childNodes(), allTextNode);
int tLength = 0;
for (Node node : allTextNode) {
if (node instanceof TextNode) {
sb.append(node.parent().outerHtml());
tLength += ((TextNode) node).text().length();
if (tLength > size) {
sb.append(" ...");
break;
}
}
}
String digest = sb.toString();
Elements elements = Jsoup.parse(str).body().select("video");
if (elements != null && !elements.isEmpty()) {
digest = elements.get(0).toString() + "<br/>" + digest;
}
return digest.trim();
}
private void addTextNode(TextNode tNode) {
String text = tNode.text().trim();
if (text.isEmpty()) {
return;
}
String xpath = JsoupHelper.getXpath(tNode);
tNodeList.add(tNode);
xpathMap.put(tNode, xpath);
CountInfo countInfo = new CountInfo(tNode);
ArrayList<CountInfo> countInfoList = countMap.get(xpath);
if (countInfoList == null) {
countInfoList = new ArrayList<CountInfo>();
countMap.put(xpath, countInfoList);
}
countInfoList.add(countInfo);
}
public void head(Node source, int depth) {
if (source instanceof Element) {
Element sourceEl = (Element) source;
if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
ElementMeta meta = createSafeElement(sourceEl);
Element destChild = meta.el;
destination.appendChild(destChild);
numDiscarded += meta.numAttribsDiscarded;
destination = destChild;
} else if (source != root) { // not a safe tag, so don't add. don't count root against discarded.
numDiscarded++;
}
} else if (source instanceof TextNode) {
TextNode sourceText = (TextNode) source;
TextNode destText = new TextNode(sourceText.getWholeText());
destination.appendChild(destText);
} else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) {
DataNode sourceData = (DataNode) source;
DataNode destData = new DataNode(sourceData.getWholeData());
destination.appendChild(destData);
} else { // else, we don't care about comments, xml proc instructions, etc
numDiscarded++;
}
}
public void head(Node source, int depth) {
if (source instanceof Element) {
Element sourceEl = (Element) source;
if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
ElementMeta meta = createSafeElement(sourceEl);
Element destChild = meta.el;
destination.appendChild(destChild);
numDiscarded += meta.numAttribsDiscarded;
destination = destChild;
} else if (source != root) { // not a safe tag, so don't add. don't count root against discarded.
numDiscarded++;
}
} else if (source instanceof TextNode) {
TextNode sourceText = (TextNode) source;
TextNode destText = new TextNode(sourceText.getWholeText());
destination.appendChild(destText);
} else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) {
DataNode sourceData = (DataNode) source;
DataNode destData = new DataNode(sourceData.getWholeData());
destination.appendChild(destData);
} else { // else, we don't care about comments, xml proc instructions, etc
numDiscarded++;
}
}
private void addTextNode(TextNode tNode) {
String text = tNode.text().trim();
if (text.isEmpty()) {
return;
}
String xpath = JsoupHelper.getXpath(tNode);
tNodeList.add(tNode);
xpathMap.put(tNode, xpath);
CountInfo countInfo = new CountInfo(tNode);
ArrayList<CountInfo> countInfoList = countMap.get(xpath);
if (countInfoList == null) {
countInfoList = new ArrayList<CountInfo>();
countMap.put(xpath, countInfoList);
}
countInfoList.add(countInfo);
}
@Override
public void head(Node node, int depth) {
if (node instanceof TextNode) {
TextNode text = (TextNode) node;
String textContent = text.text();
if (textLen >= maxTextLen) {
text.text("");
} else if (textLen + textContent.length() > maxTextLen) {
int ptr = maxTextLen - textLen;
if (!killwords) {
ptr = Functions.movePointerToJustBeforeLastWord(ptr, textContent) - 1;
}
text.text(textContent.substring(0, ptr) + ending);
textLen = maxTextLen;
} else {
textLen += textContent.length();
}
}
}
@Test
public void ruby_treeprocessor_should_be_registered() {
this.asciidoctor.createGroup()
.loadRubyClass(getClass().getResourceAsStream("/ruby-extensions/shell-session-tree-processor.rb"))
.rubyTreeprocessor("ShellSessionTreeProcessor")
.register();
String content = this.asciidoctor.convert(
" $ echo \"Hello, World!\"\n" +
" > Hello, World!\n" +
"\n" +
" $ gem install asciidoctor",
options().toFile(false).get());
final Document document = Jsoup.parse(content);
final TextNode commandElement = document.getElementsByClass("command").get(0).textNodes().get(0);
assertThat(commandElement.getWholeText(), is("echo \"Hello, World!\""));
final TextNode commandElement2 = document.getElementsByClass("command").get(1).textNodes().get(0);
assertThat(commandElement2.getWholeText(), is("gem install asciidoctor"));
}
@Test
public void ruby_treeprocessor_should_be_registered() {
final String rubyExtPath = classpath.getResource("ruby-extensions").getAbsolutePath();
final AsciidoctorJRuby asciidoctor = AsciidoctorJRuby.Factory.create(singletonList(rubyExtPath));
asciidoctor.rubyExtensionRegistry()
.requireLibrary("shell-session-tree-processor.rb")
.treeprocessor("ShellSessionTreeProcessor");
String content = asciidoctor.convert(
" $ echo \"Hello, World!\"\n" +
" > Hello, World!\n" +
"\n" +
" $ gem install asciidoctor",
options().toFile(false).get());
final Document document = Jsoup.parse(content);
final TextNode commandElement = document.getElementsByClass("command").get(0).textNodes().get(0);
assertThat(commandElement.getWholeText(), is("echo \"Hello, World!\""));
final TextNode commandElement2 = document.getElementsByClass("command").get(1).textNodes().get(0);
assertThat(commandElement2.getWholeText(), is("gem install asciidoctor"));
}
@Override
public String operate(Element element) {
int index = 0;
StringBuilder accum = new StringBuilder();
for (Node node : element.childNodes()) {
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
if (group == 0) {
accum.append(textNode.text());
} else if (++index == group) {
return textNode.text();
}
}
}
return accum.toString();
}
private String getText(Element element) {
return element.childNodes().stream()
.filter(node -> node instanceof TextNode)
.map(node -> (TextNode) node)
.map(TextNode::text)
.collect(Collectors.joining());
}
private static boolean hasVisibleContent(List<Node> nodes) {
for (Node node : nodes)
if (node instanceof TextNode && !((TextNode) node).isBlank())
return true;
else if (node instanceof Element) {
Element element = (Element) node;
if (!element.isBlock() &&
(element.hasText() ||
element.selectFirst("a") != null ||
element.selectFirst("img") != null))
return true;
}
return false;
}