org.jsoup.nodes.Document#getElementsByClass ( )源码实例Demo

下面列出了org.jsoup.nodes.Document#getElementsByClass ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: zuihou-admin-boot   文件: CityStats.java
public static void parseVillagetr(String url, Area countyArea) {
    String htmlStr = HttpUtil.get(url, CHARSET);
    Document document = Jsoup.parse(htmlStr);
    Elements trs = document.getElementsByClass("villagetr");

    List<Area> counties = new LinkedList<Area>();
    int sort = 1;
    for (Element tr : trs) {
        Elements tds = tr.getElementsByTag("td");
        if (tds == null || tds.size() != 3) {
            continue;
        }
        String villagetrCode = tds.get(0).text();
        String villagetrName = tds.get(2).text();

        Area villagetrArea = Area.builder().code(villagetrCode).label(villagetrName).source(url)
                .sortValue(sort++).level(new RemoteData<>("VILLAGETR")).fullName(countyArea.getFullName() + villagetrName)
                .build();
        StaticLog.info("		村级数据:  {}  ", villagetrArea);

        counties.add(villagetrArea);

    }
    countyArea.setChildren(counties);
}
 
源代码2 项目: zuihou-admin-boot   文件: CityParser.java
private List<Area> parseCity(String provinceName, String url) {
        String htmlStr = HttpUtil.get(url, CHARSET);
        Document document = Jsoup.parse(htmlStr);
        Elements trs = document.getElementsByClass("citytr");

        List<Area> cities = new LinkedList<Area>();
        int sort = 1;
        for (Element tr : trs) {
            Elements links = tr.getElementsByTag("a");
            String href = links.get(0).attr("href");
            String cityCode = links.get(0).text();
//            String cityCode = links.get(0).text().substring(0, 4);
            String cityName = links.get(1).text();

            Area cityArea = Area.builder()
                    .label(cityName).code(cityCode).source(url).sortValue(sort++)
                    .level(new RemoteData<>("CITY"))
                    .fullName(provinceName + cityName)
                    .build();
            cityArea.setChildren(parseCounty(provinceName + cityName, COMMON_URL + href));
            StaticLog.info("	市级数据:  {}  ", cityArea);

            cities.add(cityArea);
        }
        return cities;
    }
 
源代码3 项目: 4pdaClient-plus   文件: NewDevDbApi.java
public static ArrayList<DevCatalog> parseBrands(IHttpClient client, String devicesTypeUrl) throws Throwable {
    String pageBody = client.performGet(devicesTypeUrl + "all").getResponseBody();
    Document doc = Jsoup.parse(pageBody);
    ArrayList<DevCatalog> res = new ArrayList<>();

    Elements con = doc.getElementsByClass("word-list");
    Elements con1 = con.select("li");
    for (Element element1 : con1) {
        String brandsLink = element1.getElementsByTag("a").attr("href");
        String brandsName = element1.text();
        DevCatalog f = new DevCatalog(brandsLink, brandsName);
        f.setType(DevCatalog.DEVICE_BRAND);
        res.add(f);
    }
    return res;
}
 
private void pullACG17News() throws IOException, InterruptedException {
    HttpRequest request = HttpRequest.newBuilder()
            .uri(URI.create("http://acg17.com/category/news/")).GET().build();
    String body = httpClient.send(request, HttpResponse.BodyHandlers.ofString()).body();
    Document doc = Jsoup.parse(body);
    Elements elements = doc.getElementsByClass("item-list");
    List<ACGNew> acgNewList = elements.stream().map(e -> {
        String style = e.getElementsByClass("attachment-tie-medium size-tie-medium wp-post-image").get(0).attr("style");
        String cover = style.substring(style.indexOf("url(") + 4, style.indexOf(")"));
        Element t = e.getElementsByClass("post-box-title").get(0).child(0);
        LocalDate createDate = LocalDate.parse(e.getElementsByClass("tie-date").get(0).text().replaceAll("[年月]", "-").replace("日", ""));
        String intro = e.getElementsByClass("entry").get(0).child(0).text();
        String title = t.text();
        String rerfererUrl = t.attr("href");
        return new ACGNew(title, intro, NewsCrawlerConstant.ACG17, cover, rerfererUrl, createDate, NewsCrawlerConstant.ACG17);
    }).collect(Collectors.toList());
    process(acgNewList, "class", "entry");
}
 
private List<Integer> querySubjectId(Integer pageNum) throws IOException, InterruptedException {
    List<Integer> idList = new ArrayList<>(24);
    int currentIndex = 0;
    //开始查找id并添加到文件
    for (; currentIndex < pageNum; currentIndex++) {
        System.out.println("开始爬取第" + currentIndex + "页");
        HttpRequest request = HttpRequest.newBuilder()
                .uri(URI.create("https://bangumi.tv/anime/browser/?sort=date&page=" + currentIndex)).GET().build();
        String body = httpClient.send(request, HttpResponse.BodyHandlers.ofString()).body();
        //jsoup提取文本
        Document doc = Jsoup.parse(body);
        Elements elements = doc.getElementsByClass("subjectCover cover ll");
        elements.forEach(e -> {
            idList.add(Integer.parseInt(e.attr("href").replaceAll("\\D", "") + "\n"));
        });
    }
    return idList;
}
 
源代码6 项目: MHViewer   文件: JsoupUtils.java
@Nullable
public static Element getElementByClass(Document doc, String className) {
    Elements elements = doc.getElementsByClass(className);
    if (elements != null && elements.size() > 0) {
        return elements.get(0);
    } else {
        return null;
    }
}
 
源代码7 项目: zuihou-admin-cloud   文件: CityStats.java
public static void parseProvince(String url) {

        String htmlStr = HttpUtil.get(url, CHARSET);

        Document document = Jsoup.parse(htmlStr);

        // 获取 class='provincetr' 的元素
        Elements elements = document.getElementsByClass("provincetr");
        List<Area> provinces = new LinkedList<Area>();
        int sort = 1;
        for (Element element : elements) {
            // 获取 elements 下属性是 href 的元素
            Elements links = element.getElementsByAttribute("href");
            for (Element link : links) {
                String provinceName = link.text();
                String href = link.attr("href");
                String provinceCode = href.substring(0, 2);

                StaticLog.info("provinceName: {} , provinceCode: {} .", provinceName, provinceCode);

                Area provinceArea = Area.builder().code(provinceCode).label(provinceName).source(url)
                        .sortValue(sort++).fullName(provinceName).level(new RemoteData<>("PROVINCE"))
                        .build();

                StaticLog.info("省级数据:  {}  ", provinceArea);

                parseCity(COMMON_URL + href, provinceArea);
                provinces.add(provinceArea);
            }
        }
        StaticLog.info(JSONUtil.toJsonPrettyStr(provinces));
    }
 
源代码8 项目: zuihou-admin-boot   文件: CityStats.java
public static void parseTowntr(String url, Area countyArea) {
    String htmlStr = HttpUtil.get(url, CHARSET);
    Document document = Jsoup.parse(htmlStr);
    Elements trs = document.getElementsByClass("towntr");

    List<Area> counties = new LinkedList<Area>();
    int sort = 1;
    for (Element tr : trs) {
        Elements links = tr.getElementsByTag("a");
        if (links == null || links.size() != 2) {
            continue;
        }
        String href = links.get(0).attr("href");
        String towntrCode = links.get(0).text().substring(0, 9);
        String towntrName = links.get(1).text();

        Area towntrArea = Area.builder().label(towntrName).code(towntrCode).source(url)
                .sortValue(sort++).level(new RemoteData<>("TOWNTR")).fullName(countyArea.getFullName() + towntrName)
                .build();

        StaticLog.info("		乡镇级数据:  {}  ", towntrArea);

        parseVillagetr(COMMON_URL + href.subSequence(2, 5).toString() + "/" + href.substring(5, 7) + "/" + href,
                countyArea);

        counties.add(towntrArea);
    }
    countyArea.setChildren(counties);
}
 
源代码9 项目: ambiverse-nlu   文件: WikiCorpusTask.java
private String retrieveLinkInLanguage(Document document, String language) {
		Elements elementsByClass = document.getElementsByClass("interwiki-" + language);
		if (elementsByClass == null || elementsByClass.isEmpty()) {
//		logger.info("link in " + language + " was not found");
			return null;
		}
		return elementsByClass.first().child(0).attr("href");
	}
 
源代码10 项目: JavaRushTasks   文件: MoikrugStrategy.java
@Override
public List<Vacancy> getVacancies(String searchString)
{
    List<Vacancy> Vacancies = new ArrayList<>();
    int pageNum = 0;
    Document doc = null;
    while(true)
    {
        try {
            doc = getDocument(searchString, pageNum);
        } catch (IOException e) {
            e.printStackTrace();
        }
        Elements vacancies = doc.getElementsByClass("job");
        if (vacancies.size()==0) break;
        for (Element element: vacancies)
        {
            if (element != null)
            {
                Vacancy vac = new Vacancy();
                vac.setTitle(element.getElementsByAttributeValue("class", "title").text());
                vac.setCompanyName(element.getElementsByAttributeValue("class", "company_name").text());
                vac.setSiteName(URL_FORMAT);
                vac.setUrl("https://moikrug.ru" + element.select("a[class=job_icon]").attr("href"));
                String salary = element.getElementsByAttributeValue("class", "salary").text();
                String city = element.getElementsByAttributeValue("class", "location").text();
                vac.setSalary(salary.length()==0 ? "" : salary);
                vac.setCity(city.length()==0 ? "" : city);
                Vacancies.add(vac);
            }
        }
        pageNum++;
    }
    return Vacancies;
}
 
源代码11 项目: Toutiao   文件: SearchArticleVideoViewBinder.java
private Map<String, String> parseJson(String content) {
    Document doc = Jsoup.parse(content);
    Elements elements = doc.getElementsByClass("tt-video-box");
    String id = elements.get(0).attr("tt-videoid");
    String imageUrl = elements.get(0).attr("tt-poster");
    Map<String, String> map = new HashMap<>();
    if (!TextUtils.isEmpty(id)) {
        map.put("id", id);
    }
    if (!TextUtils.isEmpty(imageUrl)) {
        map.put("imageUrl", imageUrl);
    }
    return map;
}
 
源代码12 项目: EhViewer   文件: JsoupUtils.java
@Nullable
public static Element getElementByClass(Document doc, String className) {
    Elements elements = doc.getElementsByClass(className);
    if (elements != null && elements.size() > 0) {
        return elements.get(0);
    } else {
        return null;
    }
}
 
源代码13 项目: newblog   文件: HTTPStudy.java
public static void baidu(String keyword) throws Exception {
    String content = HttpHelper.getInstance().get(baseURL.replaceAll("keyword", keyword));
    Document jsoup = Jsoup.parse(content);
    Elements elements = jsoup.getElementsByClass("result");
    for (Element element : elements) {
        String str = element.select(".c-showurl").text();
        if (str.contains("www.wenzhihuai.com")) {
            String wenzhihuai = element.select(".t").select("a").attr("href");
            HttpHelper.getInstance().get(wenzhihuai);
            logger.info("百度->温志怀URL:" + wenzhihuai);
        }
    }
}
 
源代码14 项目: TrackRay   文件: ConfluenceServerRCE.java
@Override
public Object start() {

    println("请输入要读取的文件 如/etc/passwd,输入exit退出");

    while(true){
        String input = getInput();
        if (input.equals("exit"))
            break;

        if (input.startsWith("/"))
            input = input.substring(1,input.length());
        String format = String.format(readFilePayload, "file:///".concat(input));
        Document parse = attack(format);

        if (parse!=null){
            Elements wiki = parse.getElementsByClass("wiki-content");
            if (wiki!=null&&wiki.hasText()){
                String text = wiki.html();
                println("=========================");
                sendColorMsg(Message.RED(HtmlUtils.htmlEscape(text)));
                println("=========================");
            }
        }

    }

    return "";
}
 
源代码15 项目: zuihou-admin-cloud   文件: CityParser.java
/**
     * 乡镇级数据
     *
     * @param url
     * @return
     */
    public List<Area> parseTowntr(String fullName, String url) {
        String htmlStr = HttpUtil.get(url, CHARSET);
        Document document = Jsoup.parse(htmlStr);
        Elements trs = document.getElementsByClass("towntr");

        List<Area> counties = new LinkedList<Area>();
        int sort = 1;
        for (Element tr : trs) {
            Elements links = tr.getElementsByTag("a");
            if (links == null || links.size() != 2) {
                continue;
            }
            String href = links.get(0).attr("href");
            String towntrCode = links.get(0).text();
//            String towntrCode = links.get(0).text().substring(0, 6);
            String towntrName = links.get(1).text();

            Area towntrArea = Area.builder()
                    .label(towntrName).code(towntrCode).source(url)
                    .fullName(fullName + towntrName)
                    .level(new RemoteData<>("TOWNTR"))
                    .sortValue(sort++)
//                    .nodes(parseVillagetr(fullName + towntrName, COMMON_URL + href.subSequence(2, 5).toString() + "/" + href.substring(5, 7) + "/" + href))
                    .build();

            StaticLog.info("			乡镇级数据:  {}  ", towntrArea);

            counties.add(towntrArea);
        }
        return counties;
    }
 
源代码16 项目: MtgDesktopCompanion   文件: TCGPlayerDeckSniffer.java
@Override
public List<RetrievableDeck> getDeckList() throws IOException {
	String url = getString(URL) + "/magic/deck/search?format=" + getString(FORMAT);
	logger.debug("get List deck at " + url);
	List<RetrievableDeck> list = new ArrayList<>();
	int maxPage = getInt(MAX_PAGE);

	for (int i = 1; i <= maxPage; i++) {
		url = getString(URL) + "/magic/deck/search?format=" + getString(FORMAT) + "&page=" + i;
		Document d = Jsoup.parse(IncapsulaParser.readUrl(url));
		
		for (Element tr : d.getElementsByClass("gradeA")) {
			RetrievableDeck deck = new RetrievableDeck();

			String mana = "";

			Element manaEl = tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(0);
			if (manaEl.toString().contains("white-mana"))
				mana += "{W}";
			if (manaEl.toString().contains("blue-mana"))
				mana += "{U}";
			if (manaEl.toString().contains("black-mana"))
				mana += "{B}";
			if (manaEl.toString().contains("red-mana"))
				mana += "{R}";
			if (manaEl.toString().contains("green-mana"))
				mana += "{G}";

			String deckName = tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(1).text();
			String link = getString(URL) + tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(1).getElementsByTag("a").attr("href");
			String deckPlayer = tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(2).text();
			String deckDesc = tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(3).text();
			
			deck.setColor(mana);
			deck.setAuthor(deckPlayer);
			deck.setName(deckName);
			deck.setDescription(deckDesc);
			
			try {
				deck.setUrl(new URI(link));
			} catch (URISyntaxException e) {
				deck.setUrl(null);
			}

			list.add(deck);

		}

	}

	return list;

}
 
源代码17 项目: mamute   文件: CustomVRaptorIntegration.java
protected Elements getElementsByClass(String html, String cssClass) {
	Document document = Jsoup.parse(html);
	return document.getElementsByClass(cssClass);
}
 
源代码18 项目: zhangshangwuda   文件: OneKeyWifi.java
public static String getErrorMessage(String html) {
	Document doc = null;
	doc = Jsoup.parse(html);
	Elements links = doc.getElementsByClass("msg");
	return links.text().toString();
}
 
源代码19 项目: xmpp   文件: addPingLun.java
public static void main(String[] args) {
	File in = new File("index.html");
	News_pinglunDaoImpl ndi=new News_pinglunDaoImpl();
	try {

		Document doc = Jsoup.parse(in, "UTF-8", "");

		Elements e1 = doc.getElementsByClass("comment_item");

		for (int i = e1.size()-1; i>=0; i--) {
			String ptime=e1.get(i).getElementsByClass("ptime").text();
			ptime=ptime.replaceAll("����", "");
			System.out.println(ptime
					+ "\t"
					+e1.get(i).getElementsByClass("username")
					.text()
					+ "\t"
					+ (e1.get(i).getElementsByTag("img").attr("src"))
					+ "\t"
					+ e1.get(i).getElementsByClass("comment_body").text());
			
			int id = 30;//����id
			String user = e1.get(i).getElementsByClass("username").text()+ ";"
			+ (e1.get(i).getElementsByTag("img").attr("src"));
			String plocation = "";
			String pcontent = e1.get(i).getElementsByClass("comment_body").text();
			String zan = "0";
			News_pinglun news = new News_pinglun(id, user, plocation, ptime,
					pcontent, zan);
			if (ndi.save(news)) {
				

			}

		}

	} catch (IOException e) {

		// TODO Auto-generated catch block

		e.printStackTrace();

	}

}
 
源代码20 项目: KaellyBot   文件: Character.java
public static Character getCharacter(String url, Language lg) throws IOException {
    Document doc = JSoupManager.getDocument(url);
    String bigSkinURL = doc.getElementsByClass("ak-entitylook").first().attr("style");
    bigSkinURL = bigSkinURL.substring(bigSkinURL.indexOf("https://"), bigSkinURL.indexOf(")"));
    String littleSkinURL = doc.getElementsByClass("ak-entitylook").last().toString();
    littleSkinURL = littleSkinURL.substring(littleSkinURL.indexOf("https://"), littleSkinURL.indexOf(")"));
    String pseudo = doc.getElementsByClass("ak-return-link").first().text();
    String level = doc.getElementsByClass("ak-directories-level").first().text()
            .replace(Translator.getLabel(lg, "whois.extract.level"), "").trim();
    String classe = doc.getElementsByClass("ak-directories-breed").first().text();
    String server = doc.getElementsByClass("ak-directories-server-name").first().text();
    String score = doc.getElementsByClass("ak-score-text").first().text() + " ("
            + doc.getElementsByClass("ak-progress-bar-text").first().text() + ")";

    // Optional
    String guildName = null;
    String guildUrl = null;
    String alliName = null;
    String alliUrl = null;

    Elements elem = doc.getElementsByClass("ak-infos-guildname");

    if (!elem.isEmpty()) {
        guildName = elem.first().text();
        guildUrl = elem.first().select("a").attr("abs:href");

        elem = doc.getElementsByClass("ak-infos-alliancename");

        if (!elem.isEmpty()) {
            alliName = elem.first().text();
            alliUrl = elem.first().select("a").attr("abs:href");
        }
    }

    StringBuilder ladderXP = new StringBuilder();
    StringBuilder ladderKoli = new StringBuilder();
    StringBuilder ladderSuccess = new StringBuilder();

    elem = doc.getElementsByClass("ak-container ak-table ak-responsivetable");
    if (!elem.isEmpty()) {
        ladderXP.append(doc.getElementsByClass("ak-total-xp").first().text()).append("\n");

        for(Element cote : doc.getElementsByClass("ak-total-kolizeum"))
            if (! cote.text().endsWith("-1"))
                ladderKoli.append(cote.text().replace(Translator.getLabel(lg, "whois.extract.koli"), "").trim()).append("\n");

        Elements trs = elem.first().getElementsByTag("tbody").first().getElementsByTag("tr");
        for (Element tr : trs) {
            String ladderText = tr.getElementsByTag("td").first().text() + " : ";
            tr.getElementsByTag("td").first().remove();
            if (!tr.getElementsByTag("td").first().text().equals("-"))
                ladderXP.append(ladderText).append(EmojiManager.getEmojiForLadder(tr.getElementsByTag("td").first().text())).append("\n");
            if (!tr.getElementsByTag("td").get(1).text().equals("-"))
                ladderKoli.append(ladderText).append(EmojiManager.getEmojiForLadder(tr.getElementsByTag("td").get(1).text())).append("\n");
            if (!tr.getElementsByTag("td").last().text().equals("-"))
                ladderSuccess.append(ladderText).append(EmojiManager.getEmojiForLadder(tr.getElementsByTag("td").last().text())).append("\n");
        }
    }

    return new Character(pseudo, level, classe, server, score,
            guildName, guildUrl, alliName, alliUrl, littleSkinURL, bigSkinURL, url,
            ladderXP.toString(), ladderKoli.toString(), ladderSuccess.toString());
}