下面列出了org.jsoup.nodes.Node#childNodes ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* Converts a given JSoup {@link org.jsoup.nodes.Node} and its children into
* a matching {@link com.vaadin.flow.dom.Element} hierarchy.
* <p>
* Only nodes of type {@link org.jsoup.nodes.TextNode} and
* {@link org.jsoup.nodes.Element} are converted - other node types return
* an empty optional.
*
* @param node
* JSoup node to convert
* @return element with the matching hierarchy as the given node, or empty
*/
public static Optional<Element> fromJsoup(Node node) {
Element ret;
if (node instanceof TextNode) {
return Optional.of(Element.createText(((TextNode) node).text()));
} else if (node instanceof org.jsoup.nodes.Element) {
ret = new Element(((org.jsoup.nodes.Element)node).tagName());
} else {
LoggerFactory.getLogger(ElementUtil.class).error(
"Could not convert a {}, '{}' into {}!",
Node.class.getName(), node, Element.class.getName());
return Optional.empty();
}
node.attributes().asList().forEach(attribute -> ret
.setAttribute(attribute.getKey(), attribute.getValue()));
List<Node> childNodes = node.childNodes();
if (!childNodes.isEmpty()) {
childNodes.forEach(
child -> fromJsoup(child).ifPresent(ret::appendChild));
}
return Optional.of(ret);
}
private static List<FlowerCategory> getCategoryList() {
List<FlowerCategory> categories = new ArrayList<FlowerCategory>();
try {
Document doc = Jsoup.connect("http://www.aihuhua.com/baike/").get();
Elements catelist = doc.getElementsByClass("catelist");
Element cates = catelist.first();
List<Node> childNodes = cates.childNodes();
for (int i = 0; i < childNodes.size(); i++) {
Node node = childNodes.get(i);
List<Node> childs = node.childNodes();
if (childs != null && childs.size() > 0) {
FlowerCategory category = new FlowerCategory();
for (int j = 0; j < childs.size(); j++) {
Node child = childs.get(j);
if ("a".equals(child.nodeName())) {
category.setUrl(child.attr("href"));
category.setImgPath(child.childNode(1).attr("src"));
} else if ("h2".equals(child.nodeName())) {
category.setName(child.attr("title"));
}
}
categories.add(category);
}
}
} catch (IOException e) {
e.printStackTrace();
}
return categories;
}
private static List<FlowerCategory> getCategoryList() {
List<FlowerCategory> categories = new ArrayList<FlowerCategory>();
try {
Document doc = Jsoup.connect("http://www.aihuhua.com/baike/").get();
Elements catelist = doc.getElementsByClass("catelist");
Element cates = catelist.first();
List<Node> childNodes = cates.childNodes();
for (int i = 0; i < childNodes.size(); i++) {
Node node = childNodes.get(i);
List<Node> childs = node.childNodes();
if (childs != null && childs.size() > 0) {
FlowerCategory category = new FlowerCategory();
for (int j = 0; j < childs.size(); j++) {
Node child = childs.get(j);
if ("a".equals(child.nodeName())) {
category.setUrl(child.attr("href"));
category.setImgPath(child.childNode(1).attr("src"));
} else if ("h2".equals(child.nodeName())) {
category.setName(child.attr("title"));
}
}
categories.add(category);
}
}
} catch (IOException e) {
e.printStackTrace();
}
return categories;
}
public static List<IpEntity> getProxyIp(String url) throws Exception{
ArrayList<IpEntity> ipList = new ArrayList<>();
Response execute = Jsoup.connect(url)
.header("User-Agent",
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36")
.header("Cache-Control", "max-age=60").header("Accept", "*/*")
.header("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6").header("Connection", "keep-alive")
.header("Referer", "http://music.163.com/song?id=186016")
.header("Origin", "http://music.163.com").header("Host", "music.163.com")
.header("Content-Type", "application/x-www-form-urlencoded")
.header("Cookie",
"UM_distinctid=15e9863cf14335-0a09f939cd2af9-6d1b137c-100200-15e9863cf157f1; vjuids=414b87eb3.15e9863cfc1.0.ec99d6f660d09; _ntes_nnid=4543481cc76ab2fd3110ecaafd5f1288,1505795231854; _ntes_nuid=4543481cc76ab2fd3110ecaafd5f1288; __s_=1; __gads=ID=6cbc4ab41878c6b9:T=1505795247:S=ALNI_MbCe-bAY4kZyMbVKlS4T2BSuY75kw; usertrack=c+xxC1nMphjBCzKpBPJjAg==; NTES_CMT_USER_INFO=100899097%7Cm187****4250%7C%7Cfalse%7CbTE4NzAzNDE0MjUwQDE2My5jb20%3D; [email protected]|1507178162|2|mail163|00&99|CA&1506163335&mail163#hun&430800#10#0#0|187250&1|163|[email protected]; vinfo_n_f_l_n3=8ba0369be425c0d2.1.7.1505795231863.1507950353704.1508150387844; vjlast=1505795232.1508150167.11; Province=0450; City=0454; _ga=GA1.2.1044198758.1506584097; _gid=GA1.2.763458995.1508907342; JSESSIONID-WYYY=Zm%2FnBG6%2B1vb%2BfJp%5CJP8nIyBZQfABmnAiIqMM8fgXABoqI0PdVq%2FpCsSPDROY1APPaZnFgh14pR2pV9E0Vdv2DaO%2BKkifMncYvxRVlOKMEGzq9dTcC%2F0PI07KWacWqGpwO88GviAmX%2BVuDkIVNBEquDrJ4QKhTZ2dzyGD%2Bd2T%2BbiztinJ%3A1508946396692; _iuqxldmzr_=32; playerid=20572717; MUSIC_U=39d0b2b5e15675f10fd5d9c05e8a5d593c61fcb81368d4431bab029c28eff977d4a57de2f409f533b482feaf99a1b61e80836282123441c67df96e4bf32a71bc38be3a5b629323e7bf122d59fa1ed6a2; __remember_me=true; __csrf=2032a8f34f1f92412a49ba3d6f68b2db; __utma=94650624.1044198758.1506584097.1508939111.1508942690.40; __utmb=94650624.20.10.1508942690; __utmc=94650624; __utmz=94650624.1508394258.18.4.utmcsr=xujin.org|utmccn=(referral)|utmcmd=referral|utmcct=/")
.method(Method.GET).ignoreContentType(true)
.timeout(2099999999).execute();
Document pageJson = execute.parse();
Element body = pageJson.body();
List<Node> childNodes = body.childNode(11).childNode(3).childNode(5).childNode(1).childNodes();
//把前10位的代理IP放到List中
for(int i = 2;i <= 30;i += 2){
IpEntity ipEntity = new IpEntity();
Node node = childNodes.get(i);
List<Node> nodes = node.childNodes();
String ip = nodes.get(3).childNode(0).toString();
int port = Integer.parseInt(nodes.get(5).childNode(0).toString());
ipEntity.setIp(ip);
ipEntity.setPort(port);
ipList.add(ipEntity);
}
return ipList;
}
/**
* Inserts a table.
*
* @param parent
* the parent {@link MList}
* @param context
* the current {@link Context}
* @param node
* the table {@link Node};
*/
private void insertTable(MList parent, Context context, Node node) {
final MTable table = new MTableImpl();
parent.add(table);
for (Node child : node.childNodes()) {
if ("tr".equals(child.nodeName())) {
final MRow row = new MRowImpl();
table.getRows().add(row);
for (Node rowChild : child.childNodes()) {
if ("th".equals(rowChild.nodeName()) || "td".equals(rowChild.nodeName())) {
final MList contents = new MListImpl();
final MCell cell = new MCellImpl(contents, null);
final Context localContext;
if ("th".equals(rowChild.nodeName())) {
cell.setHAlignment(HAlignment.CENTER);
localContext = context.copy();
setModifiers(localContext, MStyle.FONT_BOLD);
} else {
localContext = context;
}
row.getCells().add(cell);
for (Node cellChild : rowChild.childNodes()) {
walkNodeTree(contents, localContext, cellChild);
}
}
}
}
}
}
private static List<CodeInfo> parseHTMLNodeToParagraphs(Node node) {
List<CodeInfo> paragraphList = new ArrayList<>();
List<Node> childNodes = node.childNodes();
for (Node childNode : childNodes) {
if (childNode.nodeName().equals("p") || childNode.nodeName().equals("li")) continue;
if (childNode.nodeName().equals("pre"))
childNode.childNodes().stream()
.filter(n -> n.nodeName().equals("code"))
.map(n -> new CodeInfo(StringEscapeUtils.unescapeHtml4(((Element) n).text())))
.forEach(paragraphList::add);
else paragraphList.addAll(parseHTMLNodeToParagraphs(childNode));
}
return paragraphList;
}
/**
* Walk the HTML document node by node, creating annotations and text.
*
* @param builder the builder
* @param root the root
* @param depth the depth
*/
private void walk(
final JCasBuilder builder, final Node root, final int depth, final boolean captureText) {
if (root == null) {
return;
}
final int begin = builder.getCurrentOffset();
if (captureText) {
// Generate the text and the annotations
final String text = mapToText(root);
if (!Strings.isNullOrEmpty(text)) {
builder.addText(text);
}
}
List<Annotation> annotations = null;
if (root instanceof Element) {
annotations = mapElementToAnnotations(builder.getJCas(), (Element) root);
}
// BUG: With multiple mappers depth here is wrong! It puts all mappers at the same depth...
// (though in fairness they are all the same begin-end and same element too)
// Walk the children
if (root.childNodeSize() > 0) {
for (final Node node : root.childNodes()) {
walk(builder, node, depth + 1, captureText);
}
}
// Add annotations to the JCas
final int end = builder.getCurrentOffset();
if (annotations != null && !annotations.isEmpty()) {
builder.addAnnotations(annotations, begin, end, depth);
}
}
private void appendHtml(StringBuilder sb, Node node) {
for (Node childNode : node.childNodes()) {
switch (childNode.nodeName()) {
case PARAGRAPH_NODE:
sb.append(NEW_LINE);
appendHtml(sb, childNode);
break;
case ORDERED_LIST_NODE:
case UN_ORDERED_LIST_NODE:
appendHtml(sb, childNode);
break;
case LIST_ITEM_NODE:
final String marker = childNode.parent().nodeName().equals(ORDERED_LIST_NODE)
? ORDERED_LIST_ITEM_ASCIDOC_STYLE
: UNORDERED_LIST_ITEM_ASCIDOC_STYLE;
sb.append(NEW_LINE);
sb.append(marker);
appendHtml(sb, childNode);
break;
case LINK_NODE:
final String link = childNode.attr(HREF_ATTRIBUTE);
sb.append("link:");
sb.append(link);
final StringBuilder caption = new StringBuilder();
appendHtml(caption, childNode);
sb.append(String.format(LINK_ATTRIBUTE_FORMAT, caption.toString().trim()));
break;
case CODE_NODE:
sb.append(BACKTICK);
appendHtml(sb, childNode);
sb.append(BACKTICK);
break;
case BOLD_NODE:
case EMPHASIS_NODE:
sb.append(STAR);
appendHtml(sb, childNode);
sb.append(STAR);
break;
case ITALICS_NODE:
sb.append(UNDERSCORE);
appendHtml(sb, childNode);
sb.append(UNDERSCORE);
break;
case UNDERLINE_NODE:
sb.append(UNDERLINE_ASCIDOC_STYLE);
sb.append(HASH);
appendHtml(sb, childNode);
sb.append(HASH);
break;
case SMALL_NODE:
sb.append(SMALL_ASCIDOC_STYLE);
sb.append(HASH);
appendHtml(sb, childNode);
sb.append(HASH);
break;
case BIG_NODE:
sb.append(BIG_ASCIDOC_STYLE);
sb.append(HASH);
appendHtml(sb, childNode);
sb.append(HASH);
break;
case SUB_SCRIPT_NODE:
sb.append(SUB_SCRIPT_ASCIDOC_STYLE);
appendHtml(sb, childNode);
sb.append(SUB_SCRIPT_ASCIDOC_STYLE);
break;
case SUPER_SCRIPT_NODE:
sb.append(SUPER_SCRIPT_ASCIDOC_STYLE);
appendHtml(sb, childNode);
sb.append(SUPER_SCRIPT_ASCIDOC_STYLE);
break;
case DEL_NODE:
case S_NODE:
case STRIKE_NODE:
sb.append(LINE_THROUGH_ASCIDOC_STYLE);
sb.append(HASH);
appendHtml(sb, childNode);
sb.append(HASH);
break;
case NEW_LINE_NODE:
sb.append(NEW_LINE);
break;
case TEXT_NODE:
appendEscapedAsciiDoc(sb, ((TextNode) childNode).text());
break;
default:
appendHtml(sb, childNode);
break;
}
}
}