org.jsoup.nodes.Document#outerHtml ( )源码实例Demo

下面列出了org.jsoup.nodes.Document#outerHtml ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: v9porn   文件: GoogleRecaptchaVerifyPresenter.java
private String injectJs(String oldHtml) {
    if (TextUtils.isEmpty(oldHtml)) {
        return "";
    }
    Document doc = Jsoup.parse(oldHtml);
    doc.head().append("<script type=\"text/javascript\">\n" +
            "        function getPostData() {\n" +
            "            let recaptcha = document.getElementById(\"g-recaptcha-response\").value;\n" +
            "            if (!recaptcha || recaptcha === '') {\n" +
            "                recaptcha = document.getElementById(\"g-recaptcha-response\").innerHTML;\n" +
            "            }\n" +
            "            const action = document.getElementById('challenge-form').getAttribute(\"action\");\n" +
            "            const r = document.getElementsByName(\"r\")[0].getAttribute(\"value\");\n" +
            "            const id = document.getElementById('id').getAttribute(\"value\");\n" +
            "            return action + \",\" + r + \",\" + id + \",\" + recaptcha;\n" +
            "        }\n" +
            "    </script>");

    String html = doc.outerHtml();
    Log.d(TAG, "JS注入完成");
    return html;
}
 
源代码2 项目: v9porn   文件: GoogleRecaptchaVerifyPresenter.java
private String injectJs(String oldHtml) {
    if (TextUtils.isEmpty(oldHtml)) {
        return "";
    }
    Document doc = Jsoup.parse(oldHtml);
    doc.head().append("<script type=\"text/javascript\">\n" +
            "        function getPostData() {\n" +
            "            let recaptcha = document.getElementById(\"g-recaptcha-response\").value;\n" +
            "            if (!recaptcha || recaptcha === '') {\n" +
            "                recaptcha = document.getElementById(\"g-recaptcha-response\").innerHTML;\n" +
            "            }\n" +
            "            const action = document.getElementById('challenge-form').getAttribute(\"action\");\n" +
            "            const r = document.getElementsByName(\"r\")[0].getAttribute(\"value\");\n" +
            "            const id = document.getElementById('id').getAttribute(\"value\");\n" +
            "            return action + \",\" + r + \",\" + id + \",\" + recaptcha;\n" +
            "        }\n" +
            "    </script>");

    String html = doc.outerHtml();
    Log.d(TAG, "JS注入完成");
    return html;
}
 
源代码3 项目: ripme   文件: VkRipper.java
public static String getVideoURLAtPage(String url) throws IOException {
    Document doc = Http.url(url)
                       .userAgent(USER_AGENT)
                       .get();
    String html = doc.outerHtml();
    String videoURL = null;
    for (String quality : new String[] {"1080", "720", "480", "240"}) {
        quality = "url" + quality + "\\\":\\\"";
        if (html.contains(quality)) {
            videoURL = html.substring(html.indexOf(quality) + quality.length());
            videoURL = videoURL.substring(0, videoURL.indexOf("\""));
            videoURL = videoURL.replace("\\", "");
            break;
        }
    }
    if (videoURL == null) {
        throw new IOException("Could not find video URL at " + url);
    }
    return videoURL;
}
 
@Override
public String doFormat(String code, LineEnding ending) {
    Document document;
    switch (formatter.syntax()) {
    case html:
        document = Jsoup.parse(code, "", Parser.htmlParser());
        break;
    case xml:
        document = Jsoup.parse(code, "", Parser.xmlParser());
        break;
    default:
        throw new IllegalArgumentException(formatter.syntax() + " is not allowed as syntax");
    }
    document.outputSettings(formatter);

    String formattedCode = document.outerHtml();
    if (code.equals(formattedCode)) {
        return null;
    }
    return formattedCode;
}
 
源代码5 项目: ogham   文件: JsoupAttachImageInliner.java
@Override
public ContentWithImages inline(String htmlContent, List<ImageResource> images) {
	Document doc = Jsoup.parse(htmlContent);
	List<Attachment> attachments = new ArrayList<>(images.size());
	for (ImageResource image : images) {
		// search all images in the HTML with the provided path or URL that
		// are not skipped
		Elements imgs = getImagesToAttach(doc, image);
		if (!imgs.isEmpty()) {
			String contentId = idGenerator.generate(image.getName());
			// generate attachment
			Attachment attachment = new Attachment(new ByteResource(image.getName(), image.getContent()), null, INLINE, format(CONTENT_ID, contentId));
			// update the HTML to use the generated content id instead of
			// the path or URL
			for (Element img : imgs) {
				img.attr(SRC_ATTR, format(SRC_VALUE, contentId));
				img.attr(INLINED_ATTR, true);
			}
			attachments.add(attachment);
		}
	}
	return new ContentWithImages(doc.outerHtml(), attachments);
}
 
源代码6 项目: Natty   文件: CheckUtils.java
public static String stripBody(Post post) {
    String body = post.getBody();
    Document doc = Jsoup.parse("<body>"+body+"</body>");
    doc.getElementsByTag("a").remove();
    doc.getElementsByTag("code").remove();
    doc.getElementsByTag("img").remove();
    doc.getElementsByTag("pre").remove();
    doc.getElementsByTag("blockquote").remove();
    return doc.outerHtml();
}
 
源代码7 项目: Asqatasun   文件: Rgaa3Extractor.java
private static void createTestcaseFiles() throws IOException {
    File srcDir = new File(RGAA3_TESTCASE_PATH);
    for (File file : srcDir.listFiles()) {
        String fileName = file.getName().replace("Rgaa30Rule", "").replace(".java", "");
        String theme = fileName.substring(0, 2);
        String crit = fileName.substring(2, 4);
        String test = fileName.substring(4, 6);
        String testKey = Integer.valueOf(theme).toString()+"-"+Integer.valueOf(crit).toString()+"-"+Integer.valueOf(test).toString();
        String wrongKey = theme+"."+crit+"."+test;
        for (File testcase : file.listFiles()) {
            if (testcase.isFile() && testcase.getName().contains("html")) {
                Document doc = Jsoup.parse(FileUtils.readFileToString(testcase));
                Element detail = doc.select(".test-detail").first();
                if (detail == null) {
                    System.out.println(doc.outerHtml());
                } else {
                    detail.tagName("div");
                    detail.text("");
                    for (Element el : detail.children()) {
                        el.remove();
                    }
                    if (!detail.hasAttr("lang")) {
                        detail.attr("lang", "fr");
                    }
                    detail.append("\n"+RGAA3.get(testKey).ruleRawHtml+"\n");
                    doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
                    doc.outputSettings().outline(false);
                    doc.outputSettings().indentAmount(4);
                    String outputHtml = doc.outerHtml();
                    if (outputHtml.contains(wrongKey)) {
                        outputHtml = outputHtml.replaceAll(wrongKey, RGAA3.get(testKey).getRuleDot());
                    }
                    FileUtils.writeStringToFile(testcase, outputHtml);
                }
            }
        }
    }
}
 
源代码8 项目: Asqatasun   文件: HTMLJsoupCleanerImpl.java
@Override
 public void run() {
     dirtyHTML = removeBadNamespaceDefinition(dirtyHTML);
     Document doc = Jsoup.parse(dirtyHTML);
     doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
     doc.outputSettings().outline(true);
     doc.outputSettings().indentAmount(2);
     removeComments(doc);
     removeMalformedAttributes(doc);
     result = doc.outerHtml();
}
 
源代码9 项目: ogham   文件: JsoupCssInliner.java
@Override
public String inline(String htmlContent, List<ExternalCss> cssContents) {
	Document doc = Jsoup.parse(htmlContent);

	internStyles(doc, cssContents);
	String stylesheet = fetchStyles(doc);
	extractStyles(doc, stylesheet);
	applyStyles(doc);

	return doc.outerHtml();
}
 
源代码10 项目: ogham   文件: JsoupBase64ImageInliner.java
@Override
public ContentWithImages inline(String htmlContent, List<ImageResource> images) {
	Document doc = Jsoup.parse(htmlContent);
	for (ImageResource image : images) {
		Elements imgs = getImagesToInline(doc, image);
		for (Element img : imgs) {
			img.attr(SRC_ATTR, MessageFormat.format(BASE64_URI, image.getMimetype(), Base64Utils.encodeToString(image.getContent())));
			img.attr(INLINED_ATTR, true);
		}
	}
	return new ContentWithImages(doc.outerHtml(), new ArrayList<Attachment>(0));
}
 
源代码11 项目: wisdom   文件: BrowserWatchFilter.java
private String addJavascript(String content) {
	Document sourcePage = Jsoup.parse(content);
	sourcePage.body().lastElementSibling().after(
			String.format("<script src=\"%s\"></script>", "/assets/javascript/browserWatch.js")
			);
	return sourcePage.outerHtml();
}
 
源代码12 项目: apogen   文件: DomDistance.java
public static String cleanDomFromText(File f) throws IOException {

		Document d = Jsoup.parse(f, null);

		stringsToRemove = new LinkedList<String>();

		// System.out.println(d);

		getStringsToRemove(d.getAllElements());

		// System.err.println(stringsToRemove);

		String domToString = d.outerHtml();

		for (String s : stringsToRemove) {
			domToString = domToString.replace(s, "");
		}

		// System.out.println(domToString);

		return domToString;
	}
 
源代码13 项目: astor   文件: HtmlParserTest.java
@Test public void testNormalisesIsIndex() {
    Document doc = Jsoup.parse("<body><isindex action='/submit'></body>");
    String html = doc.outerHtml();
    assertEquals("<form action=\"/submit\"> <hr> <label>This is a searchable index. Enter search keywords: <input name=\"isindex\"></label> <hr> </form>",
            StringUtil.normaliseWhitespace(doc.body().html()));
}
 
源代码14 项目: astor   文件: HtmlParserTest.java
@Test public void testNormalisesIsIndex() {
    Document doc = Jsoup.parse("<body><isindex action='/submit'></body>");
    String html = doc.outerHtml();
    assertEquals("<form action=\"/submit\"> <hr> <label>This is a searchable index. Enter search keywords: <input name=\"isindex\"></label> <hr> </form>",
            StringUtil.normaliseWhitespace(doc.body().html()));
}
 
源代码15 项目: ogham   文件: CssInlineUtils.java
/**
 * Remove attributes that are used only by Ogham:
 * <ul>
 * <li>{@link CssInlinerConstants#INLINE_MODE_ATTR}</li>
 * <li>{@link CssInlinerConstants#INLINED_ATTR}</li>
 * </ul>
 * 
 * @param html
 *            the html to clean
 * @return the cleaned html
 */
public static String removeOghamAttributes(String html) {
	Document doc = Jsoup.parse(html);
	Elements nodes = doc.select("["+INLINE_MODE_ATTR+"], ["+INLINED_ATTR+"]");
	for (Element node : nodes) {
		node.removeAttr(INLINE_MODE_ATTR);
		node.removeAttr(INLINED_ATTR);
	}
	return doc.outerHtml();
}
 
源代码16 项目: ogham   文件: ImageInlineUtils.java
/**
 * Remove attributes that are used only by Ogham:
 * <ul>
 * <li>{@link ImageInlinerConstants#INLINE_MODE_ATTR}</li>
 * <li>{@link ImageInlinerConstants#INLINED_ATTR}</li>
 * </ul>
 * 
 * @param html
 *            the html to clean
 * @return the cleaned html
 */
public static String removeOghamAttributes(String html) {
	Document doc = Jsoup.parse(html);
	Elements imgs = doc.select("img");
	for (Element img : imgs) {
		img.removeAttr(INLINE_MODE_ATTR);
		img.removeAttr(INLINED_ATTR);
	}
	return doc.outerHtml();
}