org.jsoup.nodes.Document#toString ( )源码实例Demo

下面列出了org.jsoup.nodes.Document#toString ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: MMDownloader   文件: Downloader.java
/**
 * Jsoup을 이용한 HTML 코드 파싱.
 *
 * @param eachArchiveAddress 실제 만화가 담긴 아카이브 주소
 * @return 성공하면 html 코드를 리턴
 */
private String getHtmlPageJsoup(String eachArchiveAddress) throws Exception {
	print.info("고속 연결 시도중...\n");

	// pageSource = Html코드를 포함한 페이지 소스코드가 담길 스트링, domain = http://wasabisyrup.com <-마지막 / 안붙음!
	String pageSource = null;

	// POST방식으로 아예 처음부터 비밀번호를 body에 담아 전달
	Response response = Jsoup.connect(eachArchiveAddress)
			.userAgent(UserAgent.getUserAgent())
			.header("charset", "utf-8")
			.header("Accept-Encoding", "gzip") //20171126 gzip 추가
			.timeout(MAX_WAIT_TIME) // timeout
			.data("pass", PASSWORD)    // 20180429 기준 마루마루에서 reCaptcha를 사용하기에 의미없음
			.followRedirects(true)
			.execute();

	Document preDoc = response.parse(); //받아온 HTML 코드를 저장

	// <div class="gallery-template">이 만화 담긴 곳.
	if (preDoc.select("div.gallery-template").isEmpty()) {
		throw new RuntimeException("Jsoup Parsing Failed: No tag found");
	} else { // 만약 Jsoup 파싱 시 내용 있으면 성공
		pageSource = preDoc.toString();
	}

	print.info("고속 연결 성공!\n");
	return pageSource; //성공 시 html코드 리턴
}
 
源代码2 项目: jpress   文件: TemplateRender.java
public String buildNormalHtml(String content) {
    if (StrUtil.isBlank(content)) {
        return content;
    }


    Document doc = Jsoup.parse(content);
    doc.outputSettings().prettyPrint(false);
    doc.outputSettings().outline(false);

    Elements jsElements = doc.select("script");
    replace(jsElements, "src");

    Elements imgElements = doc.select("img");
    replace(imgElements, "src");

    Elements linkElements = doc.select("link");
    replace(linkElements, "href");

    //开启模板预览功能
    if (templatePreviewEnable && TemplateManager.me().getPreviewTemplate() != null) {
        Elements aElements = doc.select("a");
        replacePreviewHref(aElements);
    }

    return doc.toString();
}
 
源代码3 项目: jpress   文件: _WechatArticleImport.java
private String processContentImages(String content, List<String> imageUrls) {

        Document doc = Jsoup.parse(content);
        Elements imgElements = doc.select("img");
        if (imgElements != null) {
            Iterator<Element> iterator = imgElements.iterator();
            while (iterator.hasNext()) {
                Element element = iterator.next();

                String imageUrl = element.hasAttr("src")
                        ? element.attr("src")
                        : element.attr("data-src");

//http://mmbiz.qpic.cn/mmbiz/4gZTdZfnQeDvQqCZFuVvYv8scGS7sEQTRETgISib1blz5iclAtnsccaJhaugmKc
// hhm8mFOtjnicibibumazy8wPS6Xg/640?tp=webp&wxfrom=5&wx_lazy=1&wx_co=1

                imageUrl = replaceLast(imageUrl, "/", "__");
                imageUrl = imageUrl.startsWith("http://")
                        ? imageUrl.replace("http://", "/attachment/")
                        : imageUrl.replace("https://", "/attachment/s");

                imageUrl = imageUrl.replace("?",".png?");

                element.removeAttr("data-src");
                element.attr("src",imageUrl);

                imageUrls.add(imageUrl);
            }
        }

        return doc.toString();
    }
 
/**
 * Rewrite all links in an HTML string based on the extensionless URLs settings.
 *
 * @param value The HTML string.
 * @param requestHost The host name from the request.
 * @return The HTML string with rewritten URLs.
 */
public String rewriteAllLinks(final String html, final String requestHost) {
    Document document = Jsoup.parse(html);
    Elements links = document.select("a[href]");
    Elements metas = document.select("meta[content]");

    updateAttribute(links, "href", requestHost);
    updateAttribute(metas, "content", requestHost);

    return document.toString();
}
 
源代码5 项目: astor   文件: HtmlParserTest.java
@Test
public void testInvalidTableContents() throws IOException {
    File in = ParseTest.getFile("/htmltests/table-invalid-elements.html");
    Document doc = Jsoup.parse(in, "UTF-8");
    doc.outputSettings().prettyPrint(true);
    String rendered = doc.toString();
    int endOfEmail = rendered.indexOf("Comment");
    int guarantee = rendered.indexOf("Why am I here?");
    assertTrue("Comment not found", endOfEmail > -1);
    assertTrue("Search text not found", guarantee > -1);
    assertTrue("Search text did not come after comment", guarantee > endOfEmail);
}
 
源代码6 项目: astor   文件: HtmlParserTest.java
@Test
public void testInvalidTableContents() throws IOException {
    File in = ParseTest.getFile("/htmltests/table-invalid-elements.html");
    Document doc = Jsoup.parse(in, "UTF-8");
    doc.outputSettings().prettyPrint(true);
    String rendered = doc.toString();
    int endOfEmail = rendered.indexOf("Comment");
    int guarantee = rendered.indexOf("Why am I here?");
    assertTrue("Comment not found", endOfEmail > -1);
    assertTrue("Search text not found", guarantee > -1);
    assertTrue("Search text did not come after comment", guarantee > endOfEmail);
}
 
@Override
        protected String doInBackground(String... f_url) {
            try {

                Document doc = Jsoup.connect(f_url[0]).get();
                String html = doc.toString();

                type = false;

                //for caption
                int indexcaption = html.indexOf("edge_media_to_caption");
                indexcaption += 48;

                int startCaption = html.indexOf("\"", indexcaption);
                startCaption += 1;
                int endCaption = html.indexOf("\"", startCaption);

                String strCaption = null;
                strCaption = html.substring(startCaption, endCaption);

                //setting caption flag=0 for caption flag=1 for vid flag=2 for image
                publishProgress("0", strCaption);

                //for video
                int indexVid = html.indexOf("\"video_url\"");
                indexVid += 11;
                int startVid = html.indexOf("\"", indexVid);
                startVid += 1;
                int endVid = html.indexOf("\"", startVid);

                String urlVid = null;
                urlVid = html.substring(startVid, endVid);

                if (!urlVid.equalsIgnoreCase("en")) {
                    // it is a vid show play btn
                    type = true;
                }

                //for image url
                int index = html.indexOf("display_url");
                index += 13;
                int start = html.indexOf("\"", index);
                start += 1;
                int end = html.indexOf("\"", start);
                //                System.out.println("start:"+start+ "end:"+ end);
                String urlImage = html.substring(start, end);


//                Bitmap mIcon11 = null;
//                try {
//                    InputStream in = new java.net.URL(urlImage).openStream();
//                    mIcon11 = BitmapFactory.decodeStream(in);
//                } catch (Exception e) {
//                    Log.e("Error", e.getMessage());
//                    e.printStackTrace();
//                }
//                return mIcon11;
                return urlImage;

            } catch (Exception e) {
                Log.e("Error: ", e.getMessage());
            }

            return null;
        }
 
@Override
protected String doInBackground(String... f_url) {
    try {

        Document doc = Jsoup.connect(f_url[0]).get();
        String html = doc.toString();

        type = false;

        //for caption
        int indexcaption = html.indexOf("edge_media_to_caption");
        indexcaption += 48;

        int startCaption = html.indexOf("\"", indexcaption);
        startCaption += 1;
        int endCaption = html.indexOf("\"", startCaption);

        String strCaption = null;
        strCaption = html.substring(startCaption, endCaption);

        //setting caption flag=0 for caption flag=1 for vid flag=2 for image
        publishProgress("0", strCaption);

        //for video
        int indexVid = html.indexOf("\"video_url\"");
        indexVid += 11;
        int startVid = html.indexOf("\"", indexVid);
        startVid += 1;
        int endVid = html.indexOf("\"", startVid);

        String urlVid = null;
        urlVid = html.substring(startVid, endVid);

        if (!urlVid.equalsIgnoreCase("en")) {
            // it is a vid show play btn
            type = true;
        }

        //for image url
        int index = html.indexOf("display_url");
        index += 13;
        int start = html.indexOf("\"", index);
        start += 1;
        int end = html.indexOf("\"", start);
        //                System.out.println("start:"+start+ "end:"+ end);
        String urlImage = html.substring(start, end);

        return urlImage;

    } catch (Exception e) {
        Log.e("Error: ", e.getMessage());
    }

    return null;
}
 
源代码9 项目: V2EX   文件: HtmlUtil.java
public static String applyHtmlStyle(String html, Context context){

        TypedValue typedColor = new TypedValue();
        context.getTheme().resolveAttribute(R.attr.attr_color_text, typedColor, true);
        int txtColor = typedColor.data;
        context.getTheme().resolveAttribute(R.attr.attr_color_text_link, typedColor, true);
        int linkColor = typedColor.data;
        context.getTheme().resolveAttribute(R.attr.attr_color_accent, typedColor, true);
        int codeColor = typedColor.data;
        context.getTheme().resolveAttribute(R.attr.attr_color_text_secondary, typedColor, true);
        int codeBackground = typedColor.data;

        String textColorStr = "#" +
                Integer.toHexString(Color.red(txtColor)) +
                Integer.toHexString(Color.green(txtColor)) +
                Integer.toHexString(Color.blue(txtColor));
        String linkColorStr  = "#" +
                Integer.toHexString(Color.red(linkColor)) +
                Integer.toHexString(Color.green(linkColor)) +
                Integer.toHexString(Color.blue(linkColor));
        String codeColorStr  = "#" +
                Integer.toHexString(Color.red(codeColor)) +
                Integer.toHexString(Color.green(codeColor)) +
                Integer.toHexString(Color.blue(codeColor));
        String codeBackgroundStr  = "#" +
                Integer.toHexString(Color.red(codeBackground)) +
                Integer.toHexString(Color.green(codeBackground)) +
                Integer.toHexString(Color.blue(codeBackground));

        if (html == null || html.equals("")){
            return "";
        }
        Document document = Jsoup.parse(html);
        document.head()
                .append(
                "<style type=\"text/css\">" +
                "body{width:95%;}" +
                        "* {" +
                "   color:" + textColorStr + ";" +
                "}" +
                "a {" +
                "   color:" + linkColorStr + ";" +
                        "word-wrap:break-word;" +
                "}" +
                "code,pre {" +
                "    color: " + codeColorStr + ";" +
                "    background: " + codeBackgroundStr + ";" +
                "    padding: 3px;" +
                "    border-radius: 5px;" +
                        "word-wrap:normal;" +
                "} img { border:1px solid grey;}" +
                "</style>");
        document.head()
                .append("<meta name=\"content-type\" content=\"text/html; charset=utf-8\">" +
                        "<meta http-equlv=\"Content-Type\" content=\"text/html;charset=utf-8\">");
        for (Element img:document.select("img")){
            img.attr("width","100%");
            img.attr("height","auto");
        }
        document.charset(Charset.forName("utf-8"));
        return document.toString();
    }
 
源代码10 项目: jboot   文件: RenderHelpler.java
public static String processCDN(String content, String domain) {
    if (StrUtil.isBlank(content)) {
        return content;
    }


    Document doc = Jsoup.parse(content);

    Elements jsElements = doc.select("script[src]");
    replace(jsElements, "src", domain);

    Elements imgElements = doc.select("img[src]");
    replace(imgElements, "src", domain);


    Elements linkElements = doc.select("link[href]");
    replace(linkElements, "href", domain);

    return doc.toString();

}
 
源代码11 项目: cute-proxy   文件: HtmlBeautifier.java
@Override
public String beautify(String s, Charset charset) {
    Document doc = Jsoup.parse(s);
    doc.outputSettings().indentAmount(4);
    return doc.toString();
}