org.jsoup.nodes.Document#body ( )源码实例Demo

下面列出了org.jsoup.nodes.Document#body ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: NClientV2   文件: LoginWebView.java
@Override
public void fetchUrl(String url, String html) {
    Document jsoup=Jsoup.parse(html);
    Element body=jsoup.body();
    Element form=body.getElementsByTag("form").first();
    body.getElementsByClass("lead").first().text("Tested");
    form.tagName("div");
    form.before("<script>\n" +
            "document.getElementsByClassName('lead')[0].innerHTML='test';\n"+
            "alert('test');\n"+
            "function intercept(){\n" +
            "    password=document.getElementById('id_password').value;\n" +
            "    email=document.getElementById('id_username_or_email').value;\n" +
            "    token=document.getElementsByName('csrfmiddlewaretoken')[0].value;\n" +
            "    captcha=document.getElementById('g-recaptcha-response').value;\n" +
            "     Interceptor.intercept(email,password,token,captcha);\n" +
            "}\n" +
            "</script>");
    form.getElementsByAttributeValue("type","submit").first().attr("onclick","intercept()");
    removeFetcher(fetcher);
    String encodedHtml = Base64.encodeToString(jsoup.outerHtml().getBytes(), Base64.NO_PADDING);
    loadDataWithBaseURL(Utility.getBaseUrl(), encodedHtml,"text/html","base64",null);
}
 
源代码2 项目: flow   文件: BootstrapHandlerTest.java
@Test
public void renderUI() throws IOException {
    TestUI anotherUI = new TestUI();
    initUI(testUI);
    anotherUI.getInternals().setSession(session);
    VaadinRequest vaadinRequest = createVaadinRequest();
    anotherUI.doInit(vaadinRequest, 0);
    anotherUI.getRouter().initializeUI(anotherUI, request);
    anotherUI.getInternals()
            .setContextRoot(contextRootRelativePath(request));
    BootstrapContext bootstrapContext = new BootstrapContext(vaadinRequest,
            null, session, anotherUI, this::contextRootRelativePath);

    Document page = pageBuilder.getBootstrapPage(bootstrapContext);
    Element body = page.body();

    assertEquals(2, body.childNodeSize());
    assertEquals("noscript", body.child(0).tagName());
}
 
源代码3 项目: astor   文件: HtmlParserTest.java
@Test public void createsDocumentStructure() {
    String html = "<meta name=keywords /><link rel=stylesheet /><title>jsoup</title><p>Hello world</p>";
    Document doc = Jsoup.parse(html);
    Element head = doc.head();
    Element body = doc.body();

    assertEquals(1, doc.children().size()); // root node: contains html node
    assertEquals(2, doc.child(0).children().size()); // html node: head and body
    assertEquals(3, head.children().size());
    assertEquals(1, body.children().size());

    assertEquals("keywords", head.getElementsByTag("meta").get(0).attr("name"));
    assertEquals(0, body.getElementsByTag("meta").size());
    assertEquals("jsoup", doc.title());
    assertEquals("Hello world", body.text());
    assertEquals("Hello world", body.children().get(0).text());
}
 
源代码4 项目: astor   文件: SelectorTest.java
@Test @MultiLocaleTest public void containsData() {
    String html = "<p>function</p><script>FUNCTION</script><style>item</style><span><!-- comments --></span>";
    Document doc = Jsoup.parse(html);
    Element body = doc.body();

    Elements dataEls1 = body.select(":containsData(function)");
    Elements dataEls2 = body.select("script:containsData(function)");
    Elements dataEls3 = body.select("span:containsData(comments)");
    Elements dataEls4 = body.select(":containsData(o)");
    Elements dataEls5 = body.select("style:containsData(ITEM)");

    assertEquals(2, dataEls1.size()); // body and script
    assertEquals(1, dataEls2.size());
    assertEquals(dataEls1.last(), dataEls2.first());
    assertEquals("<script>FUNCTION</script>", dataEls2.outerHtml());
    assertEquals(1, dataEls3.size());
    assertEquals("span", dataEls3.first().tagName());
    assertEquals(3, dataEls4.size());
    assertEquals("body", dataEls4.first().tagName());
    assertEquals("script", dataEls4.get(1).tagName());
    assertEquals("span", dataEls4.get(2).tagName());
    assertEquals(1, dataEls5.size());
}
 
源代码5 项目: astor   文件: HtmlParserTest.java
@Test public void createsDocumentStructure() {
    String html = "<meta name=keywords /><link rel=stylesheet /><title>jsoup</title><p>Hello world</p>";
    Document doc = Jsoup.parse(html);
    Element head = doc.head();
    Element body = doc.body();

    assertEquals(1, doc.children().size()); // root node: contains html node
    assertEquals(2, doc.child(0).children().size()); // html node: head and body
    assertEquals(3, head.children().size());
    assertEquals(1, body.children().size());

    assertEquals("keywords", head.getElementsByTag("meta").get(0).attr("name"));
    assertEquals(0, body.getElementsByTag("meta").size());
    assertEquals("jsoup", doc.title());
    assertEquals("Hello world", body.text());
    assertEquals("Hello world", body.children().get(0).text());
}
 
源代码6 项目: astor   文件: SelectorTest.java
@Test @MultiLocaleTest public void containsData() {
    String html = "<p>function</p><script>FUNCTION</script><style>item</style><span><!-- comments --></span>";
    Document doc = Jsoup.parse(html);
    Element body = doc.body();

    Elements dataEls1 = body.select(":containsData(function)");
    Elements dataEls2 = body.select("script:containsData(function)");
    Elements dataEls3 = body.select("span:containsData(comments)");
    Elements dataEls4 = body.select(":containsData(o)");
    Elements dataEls5 = body.select("style:containsData(ITEM)");

    assertEquals(2, dataEls1.size()); // body and script
    assertEquals(1, dataEls2.size());
    assertEquals(dataEls1.last(), dataEls2.first());
    assertEquals("<script>FUNCTION</script>", dataEls2.outerHtml());
    assertEquals(1, dataEls3.size());
    assertEquals("span", dataEls3.first().tagName());
    assertEquals(3, dataEls4.size());
    assertEquals("body", dataEls4.first().tagName());
    assertEquals("script", dataEls4.get(1).tagName());
    assertEquals("span", dataEls4.get(2).tagName());
    assertEquals(1, dataEls5.size());
}
 
源代码7 项目: emotional_analysis   文件: IpProxy.java
public static List<IpEntity> getProxyIp(String url) throws Exception{
	ArrayList<IpEntity> ipList = new ArrayList<>();
	Response execute = Jsoup.connect(url)
			.header("User-Agent",
					"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36")
			.header("Cache-Control", "max-age=60").header("Accept", "*/*")
			.header("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6").header("Connection", "keep-alive")
			.header("Referer", "http://music.163.com/song?id=186016")
			.header("Origin", "http://music.163.com").header("Host", "music.163.com")
			.header("Content-Type", "application/x-www-form-urlencoded")
			.header("Cookie",
					"UM_distinctid=15e9863cf14335-0a09f939cd2af9-6d1b137c-100200-15e9863cf157f1; vjuids=414b87eb3.15e9863cfc1.0.ec99d6f660d09; _ntes_nnid=4543481cc76ab2fd3110ecaafd5f1288,1505795231854; _ntes_nuid=4543481cc76ab2fd3110ecaafd5f1288; __s_=1; __gads=ID=6cbc4ab41878c6b9:T=1505795247:S=ALNI_MbCe-bAY4kZyMbVKlS4T2BSuY75kw; usertrack=c+xxC1nMphjBCzKpBPJjAg==; NTES_CMT_USER_INFO=100899097%7Cm187****4250%7C%7Cfalse%7CbTE4NzAzNDE0MjUwQDE2My5jb20%3D; [email protected]|1507178162|2|mail163|00&99|CA&1506163335&mail163#hun&430800#10#0#0|187250&1|163|[email protected]; vinfo_n_f_l_n3=8ba0369be425c0d2.1.7.1505795231863.1507950353704.1508150387844; vjlast=1505795232.1508150167.11; Province=0450; City=0454; _ga=GA1.2.1044198758.1506584097; _gid=GA1.2.763458995.1508907342; JSESSIONID-WYYY=Zm%2FnBG6%2B1vb%2BfJp%5CJP8nIyBZQfABmnAiIqMM8fgXABoqI0PdVq%2FpCsSPDROY1APPaZnFgh14pR2pV9E0Vdv2DaO%2BKkifMncYvxRVlOKMEGzq9dTcC%2F0PI07KWacWqGpwO88GviAmX%2BVuDkIVNBEquDrJ4QKhTZ2dzyGD%2Bd2T%2BbiztinJ%3A1508946396692; _iuqxldmzr_=32; playerid=20572717; MUSIC_U=39d0b2b5e15675f10fd5d9c05e8a5d593c61fcb81368d4431bab029c28eff977d4a57de2f409f533b482feaf99a1b61e80836282123441c67df96e4bf32a71bc38be3a5b629323e7bf122d59fa1ed6a2; __remember_me=true; __csrf=2032a8f34f1f92412a49ba3d6f68b2db; __utma=94650624.1044198758.1506584097.1508939111.1508942690.40; __utmb=94650624.20.10.1508942690; __utmc=94650624; __utmz=94650624.1508394258.18.4.utmcsr=xujin.org|utmccn=(referral)|utmcmd=referral|utmcct=/")
			.method(Method.GET).ignoreContentType(true)
			.timeout(2099999999).execute();
	Document pageJson = execute.parse();
	Element body = pageJson.body();
	List<Node> childNodes = body.childNode(11).childNode(3).childNode(5).childNode(1).childNodes();
	//把前10位的代理IP放到List中
	for(int i = 2;i <= 30;i += 2){
		IpEntity ipEntity = new IpEntity();
		Node node = childNodes.get(i);
		List<Node> nodes = node.childNodes();
		String ip = nodes.get(3).childNode(0).toString();
		int port = Integer.parseInt(nodes.get(5).childNode(0).toString());
		ipEntity.setIp(ip);
		ipEntity.setPort(port);
		ipList.add(ipEntity);
	}
	return ipList;
}
 
源代码8 项目: ShareBox   文件: BaseSoup.java
public Map<String, Object> doParse(Object... arg) {
    mArguments = arg;
    if (mValues == null) {
        mValues = new HashMap<>();
    }
    Document doc = Jsoup.parse(mHtml);
    mHeader = doc.head();
    mBody = doc.body();
    parse(doc, mHeader, mBody, mValues);
    return mValues;
}
 
源代码9 项目: springboot-admin   文件: JsoupUtils.java
public static String getBodyHtml(String html) {
	if (StringUtils.isNotBlank(html)) {
		Document document = Jsoup.parse(html);
		if (null != document && document.body() != null) {
			return document.body().html().toString();
		}
	}
	return html;
}
 
源代码10 项目: baleen   文件: RemoveEmptyText.java
@Override
public void manipulate(Document document) {
  Element body = document.body();

  while (!removeEmpty(body)) {
    // Repeat as needed.... work done in the while
  }
}
 
源代码11 项目: zeppelin   文件: ZeppelinRDisplay.java
public static RDisplay render( String html, String imageWidth) {

    Document document = Jsoup.parse(html);
    document.outputSettings().prettyPrint(false);

    Element body = document.body();

    if (body.getElementsByTag("p").isEmpty()) {
      return new RDisplay(body.html(), Type.HTML, Code.SUCCESS);
    }

    String bodyHtml = body.html();

    if (! bodyHtml.contains("<img")
      &&  ! bodyHtml.contains("<script")
      && ! bodyHtml.contains("%html ")
      && ! bodyHtml.contains("%table ")
      && ! bodyHtml.contains("%img ")
    ) {
      return textDisplay(body);
    }

    if (bodyHtml.contains("%table")) {
      return tableDisplay(body);
    }

    if (bodyHtml.contains("%img")) {
      return imgDisplay(body);
    }

    return htmlDisplay(body, imageWidth);
  }
 
源代码12 项目: astor   文件: Parser.java
/**
 * Parse a fragment of HTML into the {@code body} of a Document.
 *
 * @param bodyHtml fragment of HTML
 * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
 *
 * @return Document, with empty head, and HTML parsed into body
 */
public static Document parseBodyFragment(String bodyHtml, String baseUri) {
    Document doc = Document.createShell(baseUri);
    Element body = doc.body();
    List<Node> nodeList = parseFragment(bodyHtml, body, baseUri);
    Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented
    for (int i = nodes.length - 1; i > 0; i--) {
        nodes[i].remove();
    }
    for (Node node : nodes) {
        body.appendChild(node);
    }
    return doc;
}
 
源代码13 项目: astor   文件: Parser.java
/**
 * Parse a fragment of HTML into the {@code body} of a Document.
 *
 * @param bodyHtml fragment of HTML
 * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
 *
 * @return Document, with empty head, and HTML parsed into body
 */
public static Document parseBodyFragment(String bodyHtml, String baseUri) {
    Document doc = Document.createShell(baseUri);
    Element body = doc.body();
    List<Node> nodeList = parseFragment(bodyHtml, body, baseUri);
    Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented
    for (int i = nodes.length - 1; i > 0; i--) {
        nodes[i].remove();
    }
    for (Node node : nodes) {
        body.appendChild(node);
    }
    return doc;
}
 
源代码14 项目: astor   文件: HtmlParserTest.java
@Test public void parsesComments() {
    String html = "<html><head></head><body><img src=foo><!-- <table><tr><td></table> --><p>Hello</p></body></html>";
    Document doc = Jsoup.parse(html);

    Element body = doc.body();
    Comment comment = (Comment) body.childNode(1); // comment should not be sub of img, as it's an empty tag
    assertEquals(" <table><tr><td></table> ", comment.getData());
    Element p = body.child(1);
    TextNode text = (TextNode) p.childNode(0);
    assertEquals("Hello", text.getWholeText());
}
 
源代码15 项目: jsoup-learning   文件: Parser.java
/**
 * Parse a fragment of HTML into the {@code body} of a Document.
 *
 * @param bodyHtml fragment of HTML
 * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
 *
 * @return Document, with empty head, and HTML parsed into body
 */
public static Document parseBodyFragment(String bodyHtml, String baseUri) {
    Document doc = Document.createShell(baseUri);
    Element body = doc.body();
    List<Node> nodeList = parseFragment(bodyHtml, body, baseUri);
    Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented
    for (Node node : nodes) {
        body.appendChild(node);
    }
    return doc;
}
 
源代码16 项目: firing-range   文件: TagServlet.java
@Override
public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException {
  if (request.getParameter("q") == null) {
    Responses.sendError(response, "Missing q parameter", 400);
    return;
  }

  String  q = request.getParameter("q");
  Document doc = Jsoup.parseBodyFragment(q);
  Element body = doc.body();
  Elements elements = body.getAllElements();
  if (!(q.contains("body"))){
    elements.remove(body);
  }

  if (elements.isEmpty()) {
    Responses.sendError(response, "Invalid input, no tags", 400);
    return;
  }

  String allowedTag = "";
  String allowedAttribute = "";
  if (request.getPathInfo() != null) {
    String pathInfo = request.getPathInfo().substring(1);
    if (pathInfo.contains("/")) {
      allowedTag = pathInfo.split("/", 2)[0];
      allowedAttribute = pathInfo.split("/")[1];
    } else {
      allowedTag = pathInfo;
    }      
  }
  handleRequest(elements, response, allowedTag, allowedAttribute);
}
 
源代码17 项目: rebuild   文件: SMSender.java
/**
 * @return
 * @throws IOException
 */
protected static Element getMailTemplate() throws IOException {
	File tmp = SysConfiguration.getFileOfRes("locales/email_zh-CN.html");
	Document html = Jsoup.parse(tmp, "utf-8");
	return html.body();
}
 
源代码18 项目: baleen   文件: AbstractHtmlConsumer.java
@Override
protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException {
  final File f = getFileName(jCas);
  final DocumentAnnotation da = getDocumentAnnotation(jCas);

  final Document doc =
      Jsoup.parse("<!DOCTYPE html>\n<html lang=\"" + da.getLanguage() + "\"></html>");
  doc.outputSettings(new Document.OutputSettings().prettyPrint(false));
  final Element head = doc.head();

  if (!Strings.isNullOrEmpty(css)) {
    final Element cssLink = head.appendElement("link");
    cssLink.attr("rel", "stylesheet");
    cssLink.attr("href", css);
  }

  final Element charset = head.appendElement("meta");
  charset.attr("charset", "utf-8");

  appendMeta(head, "document.type", da.getDocType());
  appendMeta(head, "document.sourceUri", da.getSourceUri());
  appendMeta(head, "externalId", da.getHash());

  appendMeta(head, "document.classification", da.getDocumentClassification());
  appendMeta(
      head,
      "document.caveats",
      String.join(",", UimaTypesUtils.toArray(da.getDocumentCaveats())));
  appendMeta(
      head,
      "document.releasability",
      String.join(",", UimaTypesUtils.toArray(da.getDocumentReleasability())));

  String title = null;
  for (final Metadata md : JCasUtil.select(jCas, Metadata.class)) {
    appendMeta(head, md.getKey(), md.getValue());
    if ("documentTitle".equalsIgnoreCase(md.getKey())) {
      title = md.getValue();
    }
  }

  if (!Strings.isNullOrEmpty(title)) {
    doc.title(title);
  }

  final Element body = doc.body();

  writeBody(jCas, body);

  try {
    FileUtils.writeStringToFile(f, doc.html(), Charset.defaultCharset());
  } catch (final IOException e) {
    throw new AnalysisEngineProcessException(e);
  }
}
 
源代码19 项目: something.apk   文件: ThreadPageRequest.java
public static ThreadPage processThreadPage(Document document, boolean showImages, boolean showAvatars, boolean hidePreviouslyReadImages, long jumpToPost, String redirectedUrl){
    ArrayList<HashMap<String, String>> posts = new ArrayList<HashMap<String, String>>();

    int currentPage, maxPage = 1, threadId, forumId, unread;
    String jumpToId = jumpToPost > 0 ? "#post"+jumpToPost : null;

    String ptiFragment = null;
    if(!TextUtils.isEmpty(redirectedUrl)){
        Uri url = Uri.parse(redirectedUrl);
        ptiFragment = url.getFragment();
        if("lastpost".matches(ptiFragment)){
            ptiFragment = null;
            jumpToId = "#lastpost";
        }
    }


    Element pages = document.getElementsByClass("pages").first();
    currentPage = FastUtils.safeParseInt(pages.getElementsByAttribute("selected").attr("value"), 1);
    Element lastPage = pages.getElementsByTag("option").last();
    if(lastPage != null){
        maxPage = FastUtils.safeParseInt(lastPage.attr("value"), 1);
    }

    boolean bookmarked = document.getElementsByClass("unbookmark").size() > 0;

    String threadTitle = TextUtils.htmlEncode(document.getElementsByClass("bclast").first().text());

    Element body = document.body();
    forumId = Integer.parseInt(body.attr("data-forum"));
    threadId = Integer.parseInt(body.attr("data-thread"));

    Elements threadbars = document.getElementsByClass("threadbar");
    boolean canReply = !Constants.isArchiveForum(forumId) && threadbars.first().getElementsByAttributeValueContaining("src", "images/forum-closed.gif").size() == 0;

    unread = parsePosts(document, posts, showImages, showAvatars, hidePreviouslyReadImages, ptiFragment, canReply, currentPage == maxPage, forumId);

    StringBuilder builder = new StringBuilder(2048);

    int previouslyRead = posts.size()-unread;

    HashMap<String, String> headerArgs = new HashMap<String, String>();
    headerArgs.put("jumpToPostId", jumpToId);
    headerArgs.put("fontSize", SomePreferences.fontSize);
    headerArgs.put("theme", getTheme(forumId));
    headerArgs.put("previouslyRead", previouslyRead > 0 && unread > 0 ? previouslyRead+" Previous Post"+(previouslyRead > 1 ? "s":"") : null);
    MustCache.applyHeaderTemplate(builder, headerArgs);

    for(HashMap<String, String> post : posts){
        MustCache.applyPostTemplate(builder, post);
    }

    MustCache.applyFooterTemplate(builder, null);

    ThreadItem cachedThread = ThreadManager.getThread(threadId);
    if(cachedThread != null){
        cachedThread.updateUnreadCount(currentPage, maxPage, SomePreferences.threadPostPerPage);
    }

    return new ThreadPage(builder.toString(), currentPage, maxPage, threadId, forumId, threadTitle, -unread, bookmarked, canReply);

}
 
源代码20 项目: firing-range   文件: Expression.java
@Override
public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException {
  if (request.getParameter("q") == null) {
    Responses.sendError(response, "Missing q parameter", 400);
    return;
  }

  String  q = request.getParameter("q");
  Document doc = Jsoup.parseBodyFragment(q);
  Element body = doc.body();
  Elements elements = body.getAllElements();
  elements.remove(body);
  if (elements.isEmpty()) {
    Responses.sendError(response, "Invalid input, no tags", 400);
    return;
  }

  StringBuilder res = new StringBuilder();
  for (Element element : elements) {
    boolean validElement = true;

    Attributes attributes = element.attributes();
    for (Attribute attribute : attributes) {
      if (attribute.getKey().toLowerCase().startsWith("on")
          || attribute.getKey().toLowerCase().equals("href")
          || attribute.getKey().toLowerCase().equals("src")) {
        validElement = false;
      }

      if (attribute.getKey().toLowerCase().equals("style")
          && attribute.getValue().toLowerCase().contains("expression")) {
        validElement = false;
      }
    }

    if (validElement) {
      res.append(element.toString());
    }
  }
  Responses.sendXssed(response, res.toString());
}