下面列出了org.jsoup.nodes.Document#selectFirst ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
private static Float parseRating(String rawData) {
rawData = rawData.trim();
if (rawData == null || rawData.equals("")) {
logger.warn("Got bad data: empty string");
return null;
}
Document doc = Jsoup.parse(rawData);
Element body = doc.selectFirst("div#root");
if (body == null)
return null;
Element ratingBody =
body.selectFirst("div.TeacherInfo__StyledTeacher-ti1fio-1.fIlNyU");
Element ratingInnerBody = ratingBody.selectFirst("div").selectFirst(
"div.RatingValue__AvgRating-qw8sqy-1.gIgExh");
String ratingValue =
ratingInnerBody
.selectFirst("div.RatingValue__Numerator-qw8sqy-2.gxuTRq")
.html()
.trim();
try {
return Float.parseFloat(ratingValue);
} catch (NumberFormatException exception) {
logger.warn("The instructor exist but having N/A rating");
return null;
}
}
public static String getMyIp() {
try {
String html = HttpUtils.getResponseContent(MY_IP_API);
Document doc = Jsoup.parse(html);
Element element = doc.selectFirst("div.tableNormal");
Element ele = element.selectFirst("table").select("td").get(1);
String ip = element.selectFirst("a").text();
// System.out.println(ip);
return ip;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
static ApkVersion parseFromCoolApk(String html) {
Document document = Jsoup.parse(html);
String versionName = "-1";
String versionInfo = null;
if (document != null) {
Element element = document.selectFirst("title");
if (element != null) {
String text = element.text();
Pattern p = Pattern.compile("\\d(\\.\\d)+");
Matcher m = p.matcher(text);
if (m.find()) {
versionName = m.group();
}
}
Element rootInfoEle = document.selectFirst(".apk_left_title:contains(新版特性)");
if (rootInfoEle != null) {
Element infoEle = rootInfoEle.selectFirst(".apk_left_title_info");
if (infoEle != null) {
versionInfo = HtmlCompat.fromHtml(infoEle.toString(), HtmlCompat.FROM_HTML_MODE_COMPACT)
.toString().trim();
}
}
}
return new ApkVersion(versionName, versionInfo);
}
public static Topic getTopicWithReply(String ampHtml){
Topic topic = new Topic();
Document document = Jsoup.parse(ampHtml);
Element topicNode = document.selectFirst(".topic_node");
Member member = new Member(document.selectFirst(".topic_author").text(), document.selectFirst(".topic_author_avatar").attr("src"));
topic.setTitle(document.selectFirst(".topic_title").text());
topic.setMember(member);
topic.setContent(document.selectFirst(".topic_content").html());
topic.setClicks(HtmlUtil.matcherGroup1Int(Pattern.compile("(\\d+)"), document.selectFirst(".topic_hits").text()));
topic.setAgo(document.selectFirst(".topic_created").text());
topic.setNode(new Node(HtmlUtil.matcherGroup1(Pattern.compile("/go/(\\w+)"), topicNode.html()),topicNode.selectFirst("a").text()));
topic.setClicks(HtmlUtil.matcherGroup1Int(Pattern.compile("(\\d+)"), document.selectFirst(".topic_stats").text()));
return topic;
}
@Test
public void shouldBuildStatusReportHtmlWithAgentStatusReportLink() throws IOException, TemplateException {
KubernetesPod pod = mock(KubernetesPod.class);
when(pod.getJobIdentifier()).thenReturn(new JobIdentifier(3243546575676657L));
when(pod.getCreationTimestamp()).thenReturn(new Date());
KubernetesNode node = mock(KubernetesNode.class);
when(node.getPods()).thenReturn(singletonList(pod));
KubernetesCluster cluster = mock(KubernetesCluster.class);
when(cluster.getNodes()).thenReturn(singletonList(node));
when(cluster.getPluginId()).thenReturn("cd.go.contrib.elastic.agent.kubernetes");
PluginStatusReportViewBuilder builder = PluginStatusReportViewBuilder.instance();
String build = builder.build(builder.getTemplate("status-report.template.ftlh"), cluster);
Document document = Jsoup.parse(build);
Element link = document.selectFirst("tbody tr td a");
System.out.println(link);
assertThat(link.attr("href"), is("/go/admin/status_reports/cd.go.contrib.elastic.agent.kubernetes/agent/?job_id=3243546575676657"));
}
private AudioTrack extractTrackFromHtml(String videoId, Document document) {
Element element = document.selectFirst("#js-initial-watch-data");
if (element != null) {
String data = element.attributes().get("data-api-data");
if (data == null) {
return null;
}
JsonObject object = new JsonObject(data);
JsonObject video = object.getJsonObject("video");
String uploader = object.getJsonObject("owner").getString("nickname");
String title = video.getString("title");
long duration = video.getLong("duration") * 1000;
return new NicoAudioTrack(new AudioTrackInfo(title, uploader, duration, videoId, false, getWatchUrl(videoId)), this);
}
return null;
}
private static String parseLink(String rawData) {
logger.debug("parsing raw RMP data to link...");
rawData = rawData.trim();
if (rawData == null || rawData.equals("")) {
logger.warn("Got bad data: empty string");
return null;
}
Document doc = Jsoup.parse(rawData);
Element body = doc.selectFirst("body.search_results");
Element container = body.selectFirst("div#container");
Element innerBody = container.selectFirst("div#body");
Element mainContent = innerBody.selectFirst("div#mainContent");
Element resBox = mainContent.selectFirst("div#searchResultsBox");
Element listings = resBox.selectFirst("div.listings-wrap");
if (listings == null) {
return null;
}
Element innerListings = listings.selectFirst("ul.listings");
Elements professors = innerListings.select("li.listing.PROFESSOR");
for (Element element : professors) {
String school =
element.selectFirst("span.sub").toString(); //<- Bugs at this line
if (school.contains("New York University") || school.contains("NYU")) {
return element.selectFirst("a").attr("href").split("=")[1];
}
}
return null;
}
public static SectionAttribute parse(@NotNull String rawData) {
logger.debug("parsing raw catalog section data into SectionAttribute...");
rawData = rawData.trim();
if (rawData.equals("")) {
logger.warn("Got bad data: empty string");
return null; // the course doesn't exist
}
Document doc = Jsoup.parse(rawData);
Element failed = doc.selectFirst("div.alert.alert-info");
if (failed != null) {
logger.warn("Got bad data: " + failed.text());
return null; // the course doesn't exist
}
Elements elements = doc.select("a");
String link = null;
for (Element element : elements) {
String el = element.attr("href");
if (el.contains("mapBuilding")) {
link = el;
}
}
doc.select("a").unwrap();
doc.select("i").unwrap();
doc.select("b").unwrap();
Element outerDataSection = doc.selectFirst("body > section.main");
Element innerDataSection = outerDataSection.selectFirst("> section");
Element courseNameDiv = innerDataSection.selectFirst("> div.primary-head");
String courseName = courseNameDiv.text();
Elements dataDivs =
innerDataSection.select("> div.section-content.clearfix");
Map<String, String> secData = parseSectionAttributes(dataDivs);
return parsingElements(secData, courseName, link);
}
public static void parseRegistrationNumber(String data) {
Document secData = Jsoup.parse(data);
Element body = secData.selectFirst("body");
Element section = body.selectFirst("section.main > section");
Elements sections = section.select("div");
for (Element element : sections) {
if (element.text().equals("Results") || element.text().equals("Okay")) {
continue;
}
System.out.println(element.text());
}
}
private String retrieveCsrfToken(String controllerUrl) throws IOException {
Document doc = Jsoup.connect(controllerUrl).get();
Element element = doc.selectFirst("input[name='csrfToken']");
if (element != null) {
return element.attributes().get("value");
}
return null;
}
private String getMagnetString1(String urlString) throws Exception {
Document doc = getDoc(urlString);
Element el = doc.selectFirst(".btn.btn-success.btn-xs");
Pattern pattern = Pattern.compile("magnet_link\\(\\'(.{1,})\\'\\);", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(el.attr("onclick"));
if (matcher.matches()) {
return "magnet:?xt=urn:btih:" + matcher.group(1);
} else {
return null;
}
}
@Override
public String startPay(String orderNo, Double amount, String channelCode) {
String pid = ConfigHolder.getConfigValue("abcyzf.pid");
String notifyUrl = ConfigHolder.getConfigValue("abcyzf.notifyUrl");
String returnUrl = ConfigHolder.getConfigValue("abcyzf.returnUrl");
String name = ConfigHolder.getConfigValue("abcyzf.name");
String signType = "MD5";
Map<String, Object> params = new HashMap<>();
params.put("pid", pid);
params.put("type", channelCode);
params.put("out_trade_no", orderNo);
params.put("notify_url", notifyUrl);
params.put("return_url", returnUrl);
params.put("name", name);
params.put("money", String.valueOf(amount));
params.put("sign_type", signType);
params.put("sign", generateRequestSign(orderNo, String.valueOf(amount), channelCode));
String result = HttpUtil.get(ConfigHolder.getConfigValue("abcyzf.payUrl"), params);
System.err.println(result);
if (StrUtil.isBlank(result)) {
throw new BizException(BizError.发起支付异常);
}
String payUrl = result;
try {
Document document = Jsoup.parse(result);
Element element = document.selectFirst("script");
if (element != null) {
payUrl = element.data().replace("window.location.href='", "");
payUrl = payUrl.substring(0, payUrl.length() - 2);
}
} catch (Exception e) {
throw new BizException(BizError.发起支付异常);
}
return payUrl;
}
@Test
public void basicXSLTTestIsProcessedCorrectly() throws Exception {
// Obtaining response and basic tests
MvcResult response = this.mvc
//
.perform(get("/test"))
//
//.andDo(print())
//
.andExpect(status().isOk())
.andExpect(content().contentTypeCompatibleWith(MediaType.TEXT_HTML))
.andExpect(content().string(containsString("Test label")))
//
.andReturn()
//
;
// Check the model
final Object model = response.getModelAndView().getModel().get(XsltConfiguration.XML_SOURCE_TAG);
assertNotNull("Model object returned is not null", model);
assertThat("Model object is of the appropriate class", model, instanceOf(App.class));
// App app = (App) model;
// Further App checking...
// Check the response
Document html = Jsoup.parse(response.getResponse().getContentAsString());
Element headerElement = html.selectFirst("h1");
assertNotNull("We have a title", headerElement);
assertThat("We have a title", "TEST", equalTo(headerElement.text()));
}
public static Topic getTopicAndReplies(String html){
Topic topic = new Topic();
Document document = Jsoup.parse(html);
Element header = document.selectFirst("#Main > .box");
String headerHtml = header.toString();
Element middleEle = document.selectFirst("#Main > .box > .cell > span");
Element contentEle = header.selectFirst(".topic_content");
Element subtleEle = header.selectFirst(".subtle");
String publishedTime = document.selectFirst("meta[property=article:published_time]")
.attr("content")
.replaceAll("[TZ]", " ");
topic.setCreated(TimeUtil.strToTimestamp(publishedTime,null));
topic.setId(matcherGroup1Int(Pattern.compile("(\\d{2,})"),
document.selectFirst("meta[property=og:url]").attr("content")));
topic.setTitle(header.selectFirst(".header > h1").text());
topic.setClicks(matcherGroup1Int(PATTERN_TOPIC_CLICK, headerHtml));
topic.setAgo(matcherGroup1(Pattern.compile("· ([^·]+) ·"),
header.selectFirst(".header > small").toString()));
topic.setFavors(matcherGroup1Int(PATTERN_TOPIC_FAVORS, headerHtml));
topic.setContent_rendered("\n"
+ (contentEle == null ? "<br>" : contentEle.toString())
+ (subtleEle == null ? " " : subtleEle.toString())
+ "\n\t---");
topic.setMember(new Member(
matcherGroup1(PATTERN_TOPIC_USERNAME, headerHtml),
matcherGroup1(PATTERN_TOPIC_USER_AVATAR, headerHtml)));
topic.setNode(new Node(
document.selectFirst("meta[property=article:tag]").attr("content"),
document.selectFirst("meta[property=article:section]").attr("content")));
if (middleEle != null){
String lastTouched = matcherGroup1(Pattern.compile("直到 ([^+]+)"), middleEle.toString());
topic.setLast_touched(lastTouched.isEmpty() ? 0 : TimeUtil.strToTimestamp(lastTouched,null));
topic.setReplies(matcherGroup1Int(PATTERN_TOPIC_REPLY_COUNT, middleEle.toString()));
}
topic.setReplyList(getReplies(document, topic.getMember().getUsername()));
return topic;
}
private void validateActivationLink(String subjectMail, String firstCssQuery, Message message) throws MessagingException, IOException, TechnicalException, FailureException {
final Document doc = Jsoup.parse(getTextFromMessage(message));
final Element link = doc.selectFirst(firstCssQuery);
try {
final String response = httpService.get(link.attr("href"));
log.debug("response is {}.", response);
} catch (final HttpServiceException e) {
log.error(Messages.format(Messages.getMessage(Messages.FAIL_MESSAGE_MAIL_ACTIVATION), subjectMail), e);
new Result.Failure<>("", Messages.format(Messages.getMessage(Messages.FAIL_MESSAGE_MAIL_ACTIVATION), subjectMail), false, Context.getCallBack(Callbacks.RESTART_WEB_DRIVER));
}
}
@Override
protected List<String> parseImages(@NonNull Content content) throws Exception {
// Fetch the book gallery page
Document doc = getOnlineDocument(content.getGalleryUrl());
if (null == doc)
throw new ParseException("Document unreachable : " + content.getGalleryUrl());
Element mangaPagesContainer = doc.selectFirst(".reading-content script");
List<Element> galleryPages = doc.select("#dgwt-jg-2 a"); // same for zone
List<Element> galleryPages2 = doc.select(".unite-gallery img"); // same for zone
List<Element> bestPages = doc.select("#gallery-2 a");
return parseImages(mangaPagesContainer, galleryPages, galleryPages2, bestPages);
}
@Test
public void should_create_simple_slides() throws IOException {
String filename = "sample";
File inputFile = new File("build/resources/test/" + filename + ".adoc");
File outputFile1 = new File(inputFile.getParentFile(), filename + ".html");
removeFileIfItExists(outputFile1);
AsciidoctorInvoker.main(new String[]{
"-b", "revealjs",
"-r", "asciidoctor-diagram",
"-a", "revealjsdir=https://cdn.jsdelivr.net/npm/[email protected]",
inputFile.getAbsolutePath()
});
Document doc = Jsoup.parse(outputFile1, "UTF-8");
assertThat(outputFile1.exists(), is(true));
List<String> stylesheets = doc.head().getElementsByTag("link").stream()
.filter(element -> "stylesheet".equals(element.attr("rel")))
.map(element -> element.attr("href"))
.collect(toList());
assertThat(stylesheets,
hasItems(
"https://cdn.jsdelivr.net/npm/[email protected]/css/reveal.css",
"https://cdn.jsdelivr.net/npm/[email protected]/css/theme/black.css"));
Element diagramSlide = doc.selectFirst("#diagram");
assertThat(diagramSlide, notNullValue());
Element diagram = diagramSlide.selectFirst("div.imageblock img");
assertThat(diagram, notNullValue());
assertThat(diagram.attr("src"), startsWith("data:image/svg+xml;base64,"));
}