org.jsoup.nodes.Comment#org.jsoup.nodes.Node源码实例Demo

下面列出了org.jsoup.nodes.Comment#org.jsoup.nodes.Node 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: intellij-quarkus   文件: HtmlToPlainText.java
@Override
public void head(Node node, int depth) {
    String name = node.nodeName();
    if (node instanceof TextNode) {
        append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
    } else if (name.equals("ul")) {
        listNesting++;
    } else if (name.equals("li")) {
        append("\n ");
        for (int i = 1; i < listNesting; i++) {
            append("  ");
        }
        if (listNesting == 1) {
            append("* ");
        } else {
            append("- ");
        }
    } else if (name.equals("dt")) {
        append("  ");
    } else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) {
        append("\n");
    }
}
 
源代码2 项目: openemm   文件: ComMailingContentServiceImpl.java
private void generateTextContent(StringBuilder sb, List<Node> nodes) {
    for (Node node : nodes) {
        if (node instanceof Element) {
            Element element = (Element) node;

            switch (element.nodeName()) {
                case "a":
                    sb.append(getTextLink(element));
                    break;

                case "br":
                    sb.append('\n');
                    break;

                default:
                    generateTextContent(sb, element.childNodes());
                    break;
            }
        } else if (node instanceof TextNode) {
            sb.append(((TextNode) node).getWholeText());
        }
    }
}
 
源代码3 项目: lemminx   文件: HtmlToPlainText.java
@Override
public void head(Node node, int depth) {
	String name = node.nodeName();
	if (node instanceof TextNode) {
		append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
	} else if (name.equals("ul")) {
		listNesting++;
	} else if (name.equals("li")) {
		append("\n ");
		for (int i = 1; i < listNesting; i++) {
			append("  ");
		}
		if (listNesting == 1) {
			append("* ");
		} else {
			append("- ");
		}
	} else if (name.equals("dt")) {
		append("  ");
	} else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) {
		append("\n");
	}
}
 
源代码4 项目: xsoup   文件: ElementOperator.java
@Override
public String operate(Element element) {
    int index = 0;
    StringBuilder accum = new StringBuilder();
    for (Node node : element.childNodes()) {
        if (node instanceof TextNode) {
            TextNode textNode = (TextNode) node;
            if (group == 0) {
                accum.append(textNode.text());
            } else if (++index == group) {
                return textNode.text();
            }
        }
    }
    return accum.toString();
}
 
源代码5 项目: emotional_analysis   文件: SongTest.java
/**
 * 解析出歌手 专辑
 * <p>Title: test4</p>
 * <p>Description: </p>
 * @throws Exception
 */
@Test
public void test4() throws Exception{
	 Response execute = Jsoup.connect("http://music.163.com/song?id=63650")
				.ignoreContentType(true).execute();
	 Document parse = execute.parse();
	 Elements elements = parse.getElementsByClass("s-fc7");
	 Element singerElement = elements.get(1);
	 Node singerChildNode = singerElement.childNode(0);
	 String singer = singerChildNode.toString();
	 //Album
	 Element albumElement = elements.get(2);
	 Node albumChildNode = albumElement.childNode(0);
	 String album = albumChildNode.toString();
	 System.out.println(singer+"--------"+album);
}
 
源代码6 项目: emotional_analysis   文件: SearchUtils.java
/**
 * 获取歌曲名称
 * <p>Title: getSongNameById</p>
 * <p>Description: </p>
 * @param songId
 * @return
 * @throws Exception
 */
public static String getSongNameById(long songId) throws Exception{
	String songName = null;
	Response execute = Jsoup.connect("http://music.163.com/m/song?id=" + songId)
			.header("User-Agent",
					"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36")
			.header("Cache-Control", "no-cache").timeout(2000000000)
			.execute();
	Document parse = execute.parse();
	Elements elementsByClass = parse.getElementsByClass("f-ff2");
	if(elementsByClass.size() > 0){
		Element element = elementsByClass.get(0);
		Node childNode = element.childNode(0);
		songName = childNode.toString();
	}else{
		songName = "ES中歌曲在网易云音乐中找不到";
	}
	return songName;
}
 
源代码7 项目: zrlog   文件: ParseUtil.java
public static String autoDigest(String str, int size) {
    StringBuilder sb = new StringBuilder();
    Document document = Jsoup.parseBodyFragment(str);
    List<Node> allTextNode = new ArrayList<>();
    getAllTextNode(document.childNodes(), allTextNode);
    int tLength = 0;
    for (Node node : allTextNode) {
        if (node instanceof TextNode) {
            sb.append(node.parent().outerHtml());
            tLength += ((TextNode) node).text().length();
            if (tLength > size) {
                sb.append(" ...");
                break;
            }
        }
    }
    String digest = sb.toString();
    Elements elements = Jsoup.parse(str).body().select("video");
    if (elements != null && !elements.isEmpty()) {
        digest = elements.get(0).toString() + "<br/>" + digest;
    }
    return digest.trim();
}
 
源代码8 项目: jsoup-learning   文件: NodeTraversor.java
/**
 * Start a depth-first traverse of the root and all of its descendants.
 * @param root the root node point to traverse.
 */
public void traverse(Node root) {
    Node node = root;
    int depth = 0;
    
    while (node != null) {
        visitor.head(node, depth);
        if (node.childNodeSize() > 0) {
            node = node.childNode(0);
            depth++;
        } else {
            while (node.nextSibling() == null && depth > 0) {
                visitor.tail(node, depth);
                node = node.parent();
                depth--;
            }
            visitor.tail(node, depth);
            if (node == root)
                break;
            node = node.nextSibling();
        }
    }
}
 
源代码9 项目: scava   文件: HtmlParser.java
private static void readNodes(List<Node> nodeList, List<String> textList)
{
	String tempText;
	for(Node node : nodeList)
	{
		if(node.childNodeSize()>0)
		{
			readNodes(node.childNodes(), textList);
		}
		else
		{
			if(node.nodeName().equals("#text"))
			{
				tempText=((TextNode) node).getWholeText();
				tempText=newline.matcher(tempText).replaceAll("");
				if(!tempText.isEmpty())
					textList.add(tempText);
			}
		}
	}
}
 
源代码10 项目: scava   文件: HtmlParser.java
private static void readNodesWithTags(List<Node> nodeList, List<Map.Entry<String,String>> textListMap, String tag)
{
	for(Node node : nodeList)
	{
		if(node.childNodeSize()>0)
		{
			readNodesWithTags(node.childNodes(), textListMap, node.nodeName());
		}
		else
		{
			if(node.nodeName().equals("#text"))
			{
				if(tag.equalsIgnoreCase("body"))
					tag="p";
				textListMap.add(new AbstractMap.SimpleEntry<String,String>(tag, ((TextNode) node).getWholeText() ));
			}
		}
	}
}
 
源代码11 项目: Xndroid   文件: OutputFormatter.java
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
    for (Node child : e.childNodes()) {
        if (unlikely(child)) {
            continue;
        }
        if (child instanceof TextNode) {
            TextNode textNode = (TextNode) child;
            String txt = textNode.text();
            accum.append(txt);
        } else if (child instanceof Element) {
            Element element = (Element) child;
            if (accum.length() > 0 && element.isBlock()
                    && !lastCharIsWhitespace(accum))
                accum.append(' ');
            else if (element.tagName().equals("br"))
                accum.append(' ');
            appendTextSkipHidden(element, accum, indent + 1);
        }
    }
}
 
源代码12 项目: dkpro-c4corpus   文件: Paragraph.java
public void initRawInfo()
{
    StringBuilder sb = new StringBuilder();
    for (Node n : this) {
        //            NodeHelper.cleanEmptyElements(n);
        if (n instanceof TextNode) {
            this.setTagName(getPath(n));
            String nodeRawText = ((TextNode) n).text();
            sb.append(Utils.normalizeBreaks(nodeRawText).trim());

            if (NodeHelper.isLink(n)) {
                charsCountInLinks += nodeRawText.length();
            }
        }
    }

    rawText = sb.toString();
}
 
源代码13 项目: zongtui-webcrawler   文件: ElementOperator.java
@Override
public String operate(Element element) {
    int index = 0;
    StringBuilder accum = new StringBuilder();
    for (Node node : element.childNodes()) {
        if (node instanceof TextNode) {
            TextNode textNode = (TextNode) node;
            if (group == 0) {
                accum.append(textNode.text());
            } else if (++index == group) {
                return textNode.text();
            }
        }
    }
    return accum.toString();
}
 
源代码14 项目: Asqatasun   文件: DeepTextElementBuilder.java
@Override
public String buildTextFromElement(Element element) {
    StringBuilder elementText = new StringBuilder();
    if (element.hasAttr(ALT_ATTR)) {
        elementText.append(SPACER);
        elementText.append(altAttrTextBuilder.buildTextFromElement(element));
    }
    for (Node child : element.childNodes()) {
        if (child instanceof TextNode && !((TextNode)child).isBlank()) {
           elementText.append(SPACER);
           elementText.append(StringUtils.trim(((TextNode)child).text()));
        } else if (child instanceof Element){
            elementText.append(SPACER);
            elementText.append(buildTextFromElement((Element)child));
        }
    }
    return StringUtils.trim(elementText.toString());
}
 
源代码15 项目: flow   文件: ElementUtil.java
/**
 * Converts the given element and its children to a JSoup node with
 * children.
 *
 * @param document
 *            A JSoup document
 * @param element
 *            The element to convert
 * @return A JSoup node containing the converted element
 */
public static Node toJsoup(Document document, Element element) {
    if (element.isTextNode()) {
        return new TextNode(element.getText(), document.baseUri());
    }

    org.jsoup.nodes.Element target = document
            .createElement(element.getTag());
    if (element.hasProperty("innerHTML")) {
        target.html((String) element.getPropertyRaw("innerHTML"));
    }

    element.getAttributeNames().forEach(name -> {
        String attributeValue = element.getAttribute(name);
        if ("".equals(attributeValue)) {
            target.attr(name, true);
        } else {
            target.attr(name, attributeValue);
        }
    });

    element.getChildren()
            .forEach(child -> target.appendChild(toJsoup(document, child)));

    return target;
}
 
源代码16 项目: netcdf-java   文件: NcepHtmlScraper.java
void parseTopDoc() throws IOException {
  String source = "https://www.nco.ncep.noaa.gov/pmb/docs/grib2/grib2_doc/";
  Document doc = Jsoup.parse(new URL(source), 5 * 1000); // 5 sec timeout
  // System.out.printf("%s%n", doc);

  Elements links = doc.select("a[href]");
  for (Element link : links) {
    // System.out.printf("%s", link);
    Node sib = link.nextSibling();
    String title = null;
    if (sib != null) {
      String sibt = sib.toString();
      title = StringUtil2.remove(sibt, "-").trim();
      // System.out.printf(" == '%s'", title);
    }
    if (link.text().equals("Table 4.2")) {
      // System.out.printf(" == ");
      parseTable42(link.attr("abs:href"), link.text(), title);

    } else {
      if (link.text().startsWith("Table 4")) {
        // System.out.printf(" == ");
        parseCodeTable(link.attr("abs:href"), link.text(), title);
      }
    }
    // System.out.printf("%n");
  }
}
 
源代码17 项目: astor   文件: Evaluator.java
@Override
public boolean matches(Element root, Element element) {
      	List<Node> family = element.childNodes();
          for (Node n : family) {
              if (!(n instanceof Comment || n instanceof XmlDeclaration || n instanceof DocumentType)) return false;
          }
      	return true;
}
 
源代码18 项目: jsoup-learning   文件: XmlTreeBuilderTest.java
@Test public void xmlFragment() {
    String xml = "<one src='/foo/' />Two<three><four /></three>";
    List<Node> nodes = Parser.parseXmlFragment(xml, "http://example.com/");
    assertEquals(3, nodes.size());

    assertEquals("http://example.com/foo/", nodes.get(0).absUrl("src"));
    assertEquals("one", nodes.get(0).nodeName());
    assertEquals("Two", ((TextNode)nodes.get(1)).text());
}
 
源代码19 项目: astor   文件: XmlTreeBuilderTest.java
@Test public void xmlFragment() {
    String xml = "<one src='/foo/' />Two<three><four /></three>";
    List<Node> nodes = Parser.parseXmlFragment(xml, "http://example.com/");
    assertEquals(3, nodes.size());

    assertEquals("http://example.com/foo/", nodes.get(0).absUrl("src"));
    assertEquals("one", nodes.get(0).nodeName());
    assertEquals("Two", ((TextNode)nodes.get(1)).text());
}
 
源代码20 项目: FairEmail   文件: HtmlHelper.java
static boolean truncate(Document d, boolean reformat) {
    int max = (reformat ? MAX_FORMAT_TEXT_SIZE : MAX_FULL_TEXT_SIZE);

    int length = 0;
    int images = 0;
    for (Element elm : d.select("*")) {
        if ("img".equals(elm.tagName()))
            images++;

        boolean skip = false;
        for (Node child : elm.childNodes()) {
            if (child instanceof TextNode) {
                TextNode tnode = ((TextNode) child);
                String text = tnode.getWholeText();

                if (length < max) {
                    if (length + text.length() >= max) {
                        text = text.substring(0, max - length) + " ...";
                        tnode.text(text);
                        skip = true;
                    }
                } else {
                    if (skip)
                        tnode.text("");
                }

                length += text.length();
            }
        }

        if (length >= max && !skip)
            elm.remove();
    }

    Log.i("Message size=" + length + " images=" + images);

    return (length >= max);
}
 
源代码21 项目: astor   文件: Parser.java
/**
 * Parse a fragment of HTML into the {@code body} of a Document.
 *
 * @param bodyHtml fragment of HTML
 * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
 *
 * @return Document, with empty head, and HTML parsed into body
 */
public static Document parseBodyFragment(String bodyHtml, String baseUri) {
    Document doc = Document.createShell(baseUri);
    Element body = doc.body();
    List<Node> nodeList = parseFragment(bodyHtml, body, baseUri);
    Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented
    for (int i = nodes.length - 1; i > 0; i--) {
        nodes[i].remove();
    }
    for (Node node : nodes) {
        body.appendChild(node);
    }
    return doc;
}
 
源代码22 项目: onedev   文件: TextNodeVisitor.java
@Override
public void tail(Node node, int depth) {
	if (node instanceof TextNode) {
		TextNode tn = (TextNode) node;
		if (isApplicable(tn))
			matchedNodes.add(tn);
	}
}
 
源代码23 项目: frameworkAggregate   文件: MyJsoup.java
private static List<FlowerCategory> getCategoryList() {

		List<FlowerCategory> categories = new ArrayList<FlowerCategory>();

		try {
			Document doc = Jsoup.connect("http://www.aihuhua.com/baike/").get();
			Elements catelist = doc.getElementsByClass("catelist");
			Element cates = catelist.first();
			List<Node> childNodes = cates.childNodes();
			for (int i = 0; i < childNodes.size(); i++) {
				Node node = childNodes.get(i);
				List<Node> childs = node.childNodes();
				if (childs != null && childs.size() > 0) {
					FlowerCategory category = new FlowerCategory();
					for (int j = 0; j < childs.size(); j++) {
						Node child = childs.get(j);
						if ("a".equals(child.nodeName())) {
							category.setUrl(child.attr("href"));
							category.setImgPath(child.childNode(1).attr("src"));
						} else if ("h2".equals(child.nodeName())) {
							category.setName(child.attr("title"));
						}
					}
					categories.add(category);
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
		}

		return categories;
	}
 
源代码24 项目: emotional_analysis   文件: IpProxy.java
public static List<IpEntity> getProxyIp(String url) throws Exception{
	ArrayList<IpEntity> ipList = new ArrayList<>();
	Response execute = Jsoup.connect(url)
			.header("User-Agent",
					"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36")
			.header("Cache-Control", "max-age=60").header("Accept", "*/*")
			.header("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6").header("Connection", "keep-alive")
			.header("Referer", "http://music.163.com/song?id=186016")
			.header("Origin", "http://music.163.com").header("Host", "music.163.com")
			.header("Content-Type", "application/x-www-form-urlencoded")
			.header("Cookie",
					"UM_distinctid=15e9863cf14335-0a09f939cd2af9-6d1b137c-100200-15e9863cf157f1; vjuids=414b87eb3.15e9863cfc1.0.ec99d6f660d09; _ntes_nnid=4543481cc76ab2fd3110ecaafd5f1288,1505795231854; _ntes_nuid=4543481cc76ab2fd3110ecaafd5f1288; __s_=1; __gads=ID=6cbc4ab41878c6b9:T=1505795247:S=ALNI_MbCe-bAY4kZyMbVKlS4T2BSuY75kw; usertrack=c+xxC1nMphjBCzKpBPJjAg==; NTES_CMT_USER_INFO=100899097%7Cm187****4250%7C%7Cfalse%7CbTE4NzAzNDE0MjUwQDE2My5jb20%3D; [email protected]|1507178162|2|mail163|00&99|CA&1506163335&mail163#hun&430800#10#0#0|187250&1|163|[email protected]; vinfo_n_f_l_n3=8ba0369be425c0d2.1.7.1505795231863.1507950353704.1508150387844; vjlast=1505795232.1508150167.11; Province=0450; City=0454; _ga=GA1.2.1044198758.1506584097; _gid=GA1.2.763458995.1508907342; JSESSIONID-WYYY=Zm%2FnBG6%2B1vb%2BfJp%5CJP8nIyBZQfABmnAiIqMM8fgXABoqI0PdVq%2FpCsSPDROY1APPaZnFgh14pR2pV9E0Vdv2DaO%2BKkifMncYvxRVlOKMEGzq9dTcC%2F0PI07KWacWqGpwO88GviAmX%2BVuDkIVNBEquDrJ4QKhTZ2dzyGD%2Bd2T%2BbiztinJ%3A1508946396692; _iuqxldmzr_=32; playerid=20572717; MUSIC_U=39d0b2b5e15675f10fd5d9c05e8a5d593c61fcb81368d4431bab029c28eff977d4a57de2f409f533b482feaf99a1b61e80836282123441c67df96e4bf32a71bc38be3a5b629323e7bf122d59fa1ed6a2; __remember_me=true; __csrf=2032a8f34f1f92412a49ba3d6f68b2db; __utma=94650624.1044198758.1506584097.1508939111.1508942690.40; __utmb=94650624.20.10.1508942690; __utmc=94650624; __utmz=94650624.1508394258.18.4.utmcsr=xujin.org|utmccn=(referral)|utmcmd=referral|utmcct=/")
			.method(Method.GET).ignoreContentType(true)
			.timeout(2099999999).execute();
	Document pageJson = execute.parse();
	Element body = pageJson.body();
	List<Node> childNodes = body.childNode(11).childNode(3).childNode(5).childNode(1).childNodes();
	//把前10位的代理IP放到List中
	for(int i = 2;i <= 30;i += 2){
		IpEntity ipEntity = new IpEntity();
		Node node = childNodes.get(i);
		List<Node> nodes = node.childNodes();
		String ip = nodes.get(3).childNode(0).toString();
		int port = Integer.parseInt(nodes.get(5).childNode(0).toString());
		ipEntity.setIp(ip);
		ipEntity.setPort(port);
		ipList.add(ipEntity);
	}
	return ipList;
}
 
源代码25 项目: emotional_analysis   文件: GetSongName.java
@Test
public void test1() throws Exception{
	Response execute = Jsoup.connect("http://music.163.com/m/song?id=" + 91445)
			.header("User-Agent",
					"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36")
			.header("Cache-Control", "no-cache").timeout(2000000000)
			.execute();
	Document parse = execute.parse();
	Elements elementsByClass = parse.getElementsByClass("f-ff2");
	Element element = elementsByClass.get(0);
	Node childNode = element.childNode(0);
	// 获取歌曲名称
	String songName = childNode.toString();
	System.out.println(songName);
}
 
源代码26 项目: NetDiscovery   文件: CssSelector.java
protected String getText(Element element) {
    StringBuilder accum = new StringBuilder();
    for (Node node : element.childNodes()) {
        if (node instanceof TextNode) {
            TextNode textNode = (TextNode) node;
            accum.append(textNode.text());
        }
    }
    return accum.toString();
}
 
源代码27 项目: jsoup-learning   文件: HtmlToPlainText.java
public void tail(Node node, int depth) {
    String name = node.nodeName();
    if (name.equals("br"))
        append("\n");
    else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5"))
        append("\n\n");
    else if (name.equals("a"))
        append(String.format(" <%s>", node.absUrl("href")));
}
 
源代码28 项目: Xndroid   文件: OutputFormatter.java
private boolean unlikely(Node e) {
    if (e.attr("class") != null && e.attr("class").toLowerCase().contains("caption"))
        return true;

    String style = e.attr("style");
    String clazz = e.attr("class");
    return unlikelyPattern.matcher(style).find() || unlikelyPattern.matcher(clazz).find();
}
 
源代码29 项目: astor   文件: HtmlParserTest.java
@Test public void handleNullContextInParseFragment() {
    String html = "<ol><li>One</li></ol><p>Two</p>";
    List<Node> nodes = Parser.parseFragment(html, null, "http://example.com/");
    assertEquals(1, nodes.size()); // returns <html> node (not document) -- no context means doc gets created
    assertEquals("html", nodes.get(0).nodeName());
    assertEquals("<html> <head></head> <body> <ol> <li>One</li> </ol> <p>Two</p> </body> </html>", StringUtil.normaliseWhitespace(nodes.get(0).outerHtml()));
}
 
源代码30 项目: SnowGraph   文件: StackOverflowParser.java
private static List<CodeInfo> parseHTMLNodeToParagraphs(Node node) {
	List<CodeInfo> paragraphList = new ArrayList<>();
	List<Node> childNodes = node.childNodes();
	for (Node childNode : childNodes) {
		if (childNode.nodeName().equals("p") || childNode.nodeName().equals("li")) continue;
		if (childNode.nodeName().equals("pre"))
			childNode.childNodes().stream()
					.filter(n -> n.nodeName().equals("code"))
					.map(n -> new CodeInfo(StringEscapeUtils.unescapeHtml4(((Element) n).text())))
					.forEach(paragraphList::add);
		else paragraphList.addAll(parseHTMLNodeToParagraphs(childNode));
	}
	return paragraphList;
}