org.jsoup.nodes.Document#select ( )源码实例Demo

下面列出了org.jsoup.nodes.Document#select ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: jsoup-learning   文件: SelectorTest.java
@Test public void testNestedHas() {
    Document doc = Jsoup.parse("<div><p><span>One</span></p></div> <div><p>Two</p></div>");
    Elements divs = doc.select("div:has(p:has(span))");
    assertEquals(1, divs.size());
    assertEquals("One", divs.first().text());

    // test matches in has
    divs = doc.select("div:has(p:matches((?i)two))");
    assertEquals(1, divs.size());
    assertEquals("div", divs.first().tagName());
    assertEquals("Two", divs.first().text());

    // test contains in has
    divs = doc.select("div:has(p:contains(two))");
    assertEquals(1, divs.size());
    assertEquals("div", divs.first().tagName());
    assertEquals("Two", divs.first().text());
}
 
源代码2 项目: guanggoo-android   文件: AuthCheckTask.java
@Override
protected void successOnUI(String data) {
    super.successOnUI(data);

    if (!mIsCanceled) {
        Document doc;
        try {
            doc = get(ConstantUtil.VERIFY_TELEPHONE_URL);
        } catch (IOException e) {
            e.printStackTrace();
            return;
        }

        Elements elements = doc.select("button#getSmsCode");

        final boolean telephoneVerified = elements.isEmpty();

        mHandler.post(() -> {
            App.getInstance().mGlobal.telephoneVerified.setValue(telephoneVerified);
        });
    }
}
 
源代码3 项目: Hentoid   文件: NexusParser.java
@Override
protected List<String> parseImages(@NonNull Content content) throws IOException {
    List<String> result = new ArrayList<>();

    progressStart(content.getQtyPages());
    /*
     * Open all pages and grab the URL of the displayed image
     */
    for (int i = 0; i < content.getQtyPages(); i++) {
        String readerUrl = content.getReaderUrl().replace("001", Helper.formatIntAsStr(i + 1, 3));
        Document doc = getOnlineDocument(readerUrl);
        if (doc != null) {
            Elements elements = doc.select("section a img");
            if (elements != null && !elements.isEmpty()) {
                Element e = elements.first();
                result.add(e.attr("src"));
            }
        }
        progressPlus();
    }

    progressComplete();

    return result;
}
 
源代码4 项目: job   文件: ZhilianEmailResumeParser.java
protected void tryFetchContact(ZhilianResume resume, Document doc) {
  final String SPLIT1 = "url=";
  final String SPLIT2 = "ldparam=";
  Elements as = doc.select("table table table table tr td a"); 
  for(Element elem : as) {
    String href = elem.attr("href");
    if(href.contains(SPLIT2) && href.contains(SPLIT1)) {
      String url = href.substring(href.lastIndexOf(SPLIT1) + SPLIT1.length(), href.length());
      String content;
      try {
        content = Request.Get(url).execute().returnContent().asString();
        Document doc2 = Jsoup.parse(content);
        Elements infos = doc2.select("div.login_content p a");
        resume.setName(infos.get(0).text());
        resume.setPhone(infos.get(1).text());
        resume.setMail(infos.get(2).text());
      } catch (Exception e) {
        e.printStackTrace(System.err);
      } 
      
      return;
    }
  }
}
 
源代码5 项目: japicmp   文件: ITClassFileFormatVersion.java
@Test
public void testClassFileFormatVersionIsPresent() throws IOException {
	Path htmlPath = Paths.get(System.getProperty("user.dir"), "target", "japicmp", "class-file-format-version.html");
	if (!Files.exists(htmlPath)) {
		return; //in JDK 1.7 case
	}
	Document document = Jsoup.parse(htmlPath.toFile(), Charset.forName("UTF-8").toString());
	Elements classFileFormatElements = document.select(".class_fileFormatVersion");
	assertThat(classFileFormatElements.isEmpty(), is(false));
	Elements tdCells = classFileFormatElements.select("table > tbody > tr > td");
	assertThat(tdCells.isEmpty(), is(false));
	for (Element element : tdCells) {
		String text = element.text();
		if (!"MODIFIED".equals(text) && !"50.0".equals(text) && !"52.0".equals(text)) {
			Assert.fail("text of HTML element does not equal 'MODIFIED' or 50.0 or 52.0: " + text);
		}
	}
}
 
protected void findUrls(String referer, String htmlPage, Map<String, String> allPages) {
		log.debug("find urls on this web page: " + referer);
		if (abort) {
			appendMessage("aborted");
			return;
		}
		Document document = Jsoup.parse(htmlPage);
		Elements newsHeadlines = document.select("a");
		Iterator<Element> iterator = newsHeadlines.iterator();
		while (iterator.hasNext()) {
			if (abort) {
				appendMessage("aborted");
				break;
			}
			Element element = (Element) iterator.next();
			element.setBaseUri(referer);
//			System.out.println("base uri: "+ check.getUrl());
//			System.out.println("referer: "+ referer);
			String url = element.absUrl("href").trim();
			log.debug("spider check found url: " + url);
			if (!url.toString().isEmpty() && !url.startsWith("mailto:") && !SinglePageCheckService.ignoreUrl(url, check.getDoNotFollowUrls()) && url.startsWith(check.getUrl()) && !url.equals(referer)) {
				log.debug("spider check put to all pages url: " + url);
				allPages.put(url, referer);
			}
		}
	}
 
源代码7 项目: FairEmail   文件: HtmlHelper.java
private static String _getText(Document d, boolean full) {
    truncate(d, !full);

    for (Element bq : d.select("blockquote")) {
        bq.prependChild(new TextNode("["));
        bq.appendChild(new TextNode("]"));
    }

    String text = d.text();
    if (full)
        return text;

    String preview = text.substring(0, Math.min(text.length(), PREVIEW_SIZE));
    if (preview.length() < text.length())
        preview += "…";

    return preview;
}
 
源代码8 项目: ogham   文件: JsoupCssInliner.java
/**
 * Applies the styles to a <code>data-cssstyle</code> attribute. This is
 * because the styles need to be applied sequentially, but before the
 * <code>style</code> defined for the element inline.
 *
 * @param doc
 *            the html document
 */
private static void extractStyles(Document doc, String stylesheet) {
	String cleanedStylesheet = ignoreAtRules(stylesheet);
	cleanedStylesheet = NEW_LINES.matcher(cleanedStylesheet).replaceAll("");
	cleanedStylesheet = COMMENTS.matcher(cleanedStylesheet).replaceAll("");
	cleanedStylesheet = SPACES.matcher(cleanedStylesheet).replaceAll(" ");
	String styleRules = cleanedStylesheet.trim();
	String delims = "{}";
	StringTokenizer st = new StringTokenizer(styleRules, delims);
	while (st.countTokens() > 1) {
		String selector = st.nextToken();
		String properties = st.nextToken();
		Elements selectedElements = doc.select(selector.trim());
		for (Element selElem : selectedElements) {
			String oldProperties = selElem.attr(TEMP_STYLE_ATTR);
			selElem.attr(TEMP_STYLE_ATTR, oldProperties.length() > 0 ? concatenateProperties(oldProperties, properties) : properties);
		}
	}
}
 
源代码9 项目: PicKing   文件: XiuMM.java
@Override
public Map<ContentsActivity.parameter, Object> getContent(String baseUrl, String currentUrl, byte[] result, Map<ContentsActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<AlbumInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select("div.album");
    for (Element element : elements) {
        AlbumInfo temp = new AlbumInfo();

        Elements title = element.select("span.name");
        if (title.size() > 0)
            temp.setTitle(title.get(0).text());

        Elements album = element.select(".pic_box a");
        temp.setAlbumUrl(album.attr("href"));
        Elements pic = album.select("img");
        if (pic.size() > 0)
            temp.setPicUrl(pic.get(0).attr("src"));
        urls.add(temp);
    }
    resultMap.put(ContentsActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(ContentsActivity.parameter.RESULT, urls);
    return resultMap;
}
 
源代码10 项目: ogham   文件: JsoupCssInliner.java
/**
 * Replace link tags with style tags in order to keep the same inclusion
 * order
 *
 * @param doc
 *            the html document
 * @param cssContents
 *            the list of external css files with their content
 */
private static void internStyles(Document doc, List<ExternalCss> cssContents) {
	Elements els = doc.select(CSS_LINKS_SELECTOR);
	for (Element e : els) {
		if (isInlineModeAllowed(e, InlineModes.STYLE_ATTR)) {
			String path = e.attr(HREF_ATTR);
			ExternalCss css = getCss(cssContents, path);
			if (css != null) {
				Element style = new Element(Tag.valueOf(STYLE_TAG), "");
				style.appendChild(new DataNode(getCssContent(css)));
				e.replaceWith(style);
			}
		}
	}
}
 
源代码11 项目: nju-lib-downloader   文件: BookClass.java
/**
 * 从服务器查询当前分类下图书的数量。包含所有子分类下的图书
 *
 * @return 当前分类下图书的数量
 * @throws IOException 查询失败
 */
public int queryBooksSize() throws IOException {
    checkCookie();
    String data = "fenlei=" + this.getId() + "&mark=all&Page=1&totalnumber=-1";
    String Url = NJULib.baseUrl + "/markbook/classifybookview.jsp";
    String html = MyHttpRequest.postWithCookie(data, Url, null, cookie, "UTF-8", "GBK", 1000);
    // System.out.println(html);
    Document doc = Jsoup.parse(html);
    Elements form = doc.select("input[name=totalnumber]");
    if (!form.isEmpty()) {
        String booksize = form.get(0).attr("value");
        return Integer.parseInt(booksize);
    }
    return 0;
}
 
源代码12 项目: js-dossier   文件: EndToEndTest.java
private static String extractPageData(Document document) {
  Elements elements = document.select("main[data-page-data]");
  checkState(!elements.isEmpty(), "Main element not found in %s", document);
  Element element = Iterables.getOnlyElement(elements);

  Gson gson = new GsonBuilder().setPrettyPrinting().create();

  String data = element.attributes().dataset().get("page-data");
  JsonArray json = gson.fromJson(data, JsonArray.class);
  return gson.toJson(json).trim();
}
 
源代码13 项目: baleen   文件: Jsp101HeadingsTest.java
@Test
public void testNoneHeading() {
  Document document =
      Jsoup.parseBodyFragment(
          "<p><b>This is a group heading:</b></p><p>This is not a group heading</p><p>This is not a group heading.</p>");

  manipulator.manipulate(document);

  Elements h2s = document.select("h2");
  assertEquals(0, h2s.size());
}
 
源代码14 项目: astor   文件: SelectorTest.java
@Test public void testPseudoEquals() {
    Document doc = Jsoup.parse("<div><p>One</p><p>Two</p><p>Three</>p></div><div><p>Four</p>");
    Elements ps = doc.select("div p:eq(0)");
    assertEquals(2, ps.size());
    assertEquals("One", ps.get(0).text());
    assertEquals("Four", ps.get(1).text());

    Elements ps2 = doc.select("div:eq(0) p:eq(0)");
    assertEquals(1, ps2.size());
    assertEquals("One", ps2.get(0).text());
    assertEquals("p", ps2.get(0).tagName());
}
 
源代码15 项目: ripme   文件: NewsfilterRipper.java
@Override
protected List<String> getURLsFromPage(Document page) {
    List<String> imgURLs = new ArrayList<>();
    Elements thumbnails = page.select("#galleryImages .inner-block img");
    for (Element thumb : thumbnails) {
        String thumbUrl = thumb.attr("src");
        String picUrl = thumbUrl.replace("thumbs/", "");
        // use HTTP instead of HTTPS (less headaches)
        imgURLs.add(picUrl.replaceFirst("https://", "http://"));
    }
    return imgURLs;
}
 
源代码16 项目: ripme   文件: ListalRipper.java
@Override
public Document getNextPage(Document page) throws IOException {
    Document nextPage = super.getNextPage(page);
    switch (urlType) {
        case LIST:
            if (!page.select(".loadmoreitems").isEmpty()) {
                // All items are not loaded.
                // Load remaining items using postUrl.

                String offSet = page.select(".loadmoreitems").last().attr("data-offset");
                Map<String, String> postParams = new HashMap<>();
                postParams.put("listid", listId);
                postParams.put("offset", offSet);
                try {
                    nextPage = Http.url(postUrl).data(postParams).retries(3).post();
                } catch (IOException e1) {
                    LOGGER.error("Failed to load more images after " + offSet, e1);
                    throw e1;
                }
            }
            break;

        case FOLDER:
            Elements pageLinks = page.select(".pages a");
            if (!pageLinks.isEmpty() && pageLinks.last().text().startsWith("Next")) {
                String nextUrl = pageLinks.last().attr("abs:href");
                nextPage = Http.url(nextUrl).retries(3).get();
            }
            break;

        case UNKNOWN:
        default:
    }
    return nextPage;
}
 
源代码17 项目: ankihelper   文件: RenRenCiDianSentence.java
public List<Definition> wordLookup(String key) {
        try {
//            Document doc = Jsoup.connect(wordUrl + key)
//                    .userAgent("Mozilla")
//                    .timeout(5000)
//                    .get();
            Request request = new Request.Builder().url(wordUrl + key)
                    //.addHeader("User-Agent", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Mobile Safari/537.36")
                    .addHeader("User-Agent", Constant.UA)
                    .build();
            String rawhtml = MyApplication.getOkHttpClient().newCall(request).execute().body().string();
            Document doc = Jsoup.parse(rawhtml);
            List<Definition> definitionList = new ArrayList<>();

            for(Element audioEle : doc.select("ul.slides > li")){
                HashMap<String, String> eleMap = new HashMap<>();
                String audioUrl = "";
                Elements audioElements = audioEle.select("audio");
                if(audioElements.size() > 0){
                    audioUrl = audioElements.get(0).attr("src");
                }
                String audioName = key + "_rrcd_" + Utils.getRandomHexString(8) + ".mp3";
                String imageUrl = "";
                Elements imageElements = audioEle.select("img");
                if(imageElements.size() > 0){
                    imageUrl = imageElements.get(0).attr("src");
                }
                String imageName = key + "_rrcd_" + Utils.getRandomHexString(8) + ".png";
                String channel = getSingleQueryResult(audioEle, "div.mTop", false).trim();
                String en = getSingleQueryResult(audioEle, "div.mBottom", true)
                        .replaceAll("<em>", "<b>")
                        .replaceAll("</em>", "</b>");
                String cn = getSingleQueryResult(audioEle, "div.mFoot", true)
                         .replaceAll("<em>", "<b>")
                        .replaceAll("</em>", "</b>");
                String context = getSingleQueryResult(audioEle, "div.mTextend", true);
                String detailUrl = "http://www.91dict.com" + audioEle.select("a.viewdetail").get(0).attr("href");
                String audioTag = String.format("[sound:%s]", Constant.AUDIO_SUB_DIRECTORY + File.separator + audioName);
                String html = String.format(tplt_card,
                        en,
                        audioTag,
                        cn,
                        "<font color=grey>" + channel + "</font>",
                        Constant.IMAGE_SUB_DIRECTORY + File.separator + imageName,
                        detailUrl
                        );

                String html_ui = String.format(tplt_ui,
                        en,
                        cn,
                        "<font color=grey>" + channel + "</font>"
                );
                eleMap.put(EXP_ELE[0], key);
                eleMap.put(EXP_ELE[1], html);
                definitionList.add(new Definition(eleMap, html_ui, imageUrl, imageName, audioUrl, audioName));
            }

            return definitionList;

        } catch (IOException ioe) {
            //Log.d("time out", Log.getStackTraceString(ioe));
            //Toast.makeText(MyApplication.getContext(), Log.getStackTraceString(ioe), Toast.LENGTH_SHORT).show();
            return new ArrayList<Definition>();
        }

    }
 
源代码18 项目: hipda   文件: HiParser.java
private static SimpleListBean parseSmsDetail(Document doc) {
    if (doc == null) {
        return null;
    }

    //get my uid and username
    Elements uidMenuES = doc.select("#umenu cite a.noborder");
    if (uidMenuES.size() < 1) {
        return null;
    }
    String mySpaceUrl = Utils.nullToText(uidMenuES.first().attr("href"));
    String myUid = Utils.getMiddleString(mySpaceUrl, "uid=", "&");
    String myUsername = uidMenuES.first().text();

    Elements smslistES = doc.select("li.s_clear");
    if (smslistES.size() < 1) {
        return null;
    }

    SimpleListBean list = new SimpleListBean();
    for (int i = 0; i < smslistES.size(); ++i) {
        Element smsE = smslistES.get(i);
        SimpleListItemBean item = new SimpleListItemBean();

        // author
        Elements pciteES = smsE.select("p.cite");
        if (pciteES.size() == 0) {
            continue;
        }
        Elements citeES = pciteES.first().select("cite");
        if (citeES.size() == 0) {
            continue;
        }
        item.setAuthor(citeES.first().text());

        // avatar
        Elements avatarES = smsE.select("a.avatar");
        if (avatarES.size() > 0) {
            if (item.getAuthor().equals(myUsername)) {
                item.setUid(myUid);
            } else {
                String spaceUrl = Utils.nullToText(avatarES.first().attr("href"));
                item.setUid(Utils.getMiddleString(spaceUrl, "uid=", "&"));
            }
            item.setAvatarUrl(HiUtils.getAvatarUrlByUid(item.getUid()));
        }

        // time
        item.setTime(pciteES.first().ownText());

        // info
        Elements summaryES = smsE.select("div.summary");
        if (summaryES.size() == 0) {
            continue;
        }
        item.setInfo(summaryES.first().html());

        // new
        Elements imgES = pciteES.first().select("img");
        if (imgES.size() > 0) {
            if (imgES.first().attr("src").contains(HiUtils.NewPMImage)) {
                item.setNew(true);
            }
        }

        list.add(item);
    }

    return list;
}
 
源代码19 项目: astor   文件: SelectorTest.java
@Test public void testByAttributeRegexCombined() {
    Document doc = Jsoup.parse("<div><table class=x><td>Hello</td></table></div>");
    Elements els = doc.select("div table[class~=x|y]");
    assertEquals(1, els.size());
    assertEquals("Hello", els.text());
}
 
源代码20 项目: astor   文件: SelectorTest.java
@Test public void testPseudoCombined() {
    Document doc = Jsoup.parse("<div class='foo'><p>One</p><p>Two</p></div><div><p>Three</p><p>Four</p></div>");
    Elements ps = doc.select("div.foo p:gt(0)");
    assertEquals(1, ps.size());
    assertEquals("Two", ps.get(0).text());
}