org.jsoup.nodes.Entities.EscapeMode#org.jsoup.nodes.Document.OutputSettings源码实例Demo

下面列出了org.jsoup.nodes.Entities.EscapeMode#org.jsoup.nodes.Document.OutputSettings 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。

源代码1 项目: docx4j-template   文件: XHTMLDocumentHandler.java
/**
 * Jsoup.parse(in, charsetName, baseUri)
 */
@Override
public Document handle( InputStream input) throws IOException{
	//获取Jsoup参数
	String charsetName = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_CHARSETNAME, Docx4jConstants.DEFAULT_CHARSETNAME );
	String baseUri = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_BASEURI,"");
	//使用Jsoup将html转换成Document对象
	Document doc = Jsoup.parse(input, charsetName, baseUri);
	
	OutputSettings outputSettings = new OutputSettings();
	
	outputSettings.prettyPrint(false);
	
	/*
	outputSettings.syntax(syntax)
	outputSettings.charset(charset)
	outputSettings*/
	doc.outputSettings(outputSettings);
	
	//返回Document对象
	return doc;
}
 
源代码2 项目: crawler-jsoup-maven   文件: JsoupTest.java
public static void main(String[] args) {
    
    String d = "<span><div>test</div></span>";
    Document doc = Jsoup.parse(d);
    Element div = doc.select("div").first(); // <div></div>
    div.html("<p>lorem ipsum</p>"); // <div><p>lorem ipsum</p></div>
    div.prepend("<p>First</p>");
    div.append("<p>Last</p>");
    // now: <div><p>First</p><p>lorem ipsum</p><p>Last</p></div>
    div.appendElement(d);
    Element span = doc.select("span").first(); // <span>One</span>
    span.wrap("<li><a href='http://example.com/'></a></li>");
    // now: <li><a href="http://example.com"><span>One</span></a></li>
    System.out.println(doc.html());
    
    String s = Jsoup.clean(doc.html(), "", Whitelist.relaxed(), new OutputSettings().prettyPrint(false));
    
    System.out.println(s);
}
 
源代码3 项目: astor   文件: DocumentTest.java
@Test public void testHtmlAndXmlSyntax() {
    String h = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'>&lt;&gt;&amp;&quot;<foo />bar";
    Document doc = Jsoup.parse(h);

    doc.outputSettings().syntax(Syntax.html);
    assertEquals("<!doctype html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async checked src=\"&amp;<>&quot;\">&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());

    doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
    assertEquals("<!DOCTYPE html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async=\"\" checked=\"checked\" src=\"&amp;<>&quot;\" />&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());
}
 
源代码4 项目: astor   文件: DocumentTest.java
@Test public void testHtmlAndXmlSyntax() {
    String h = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'>&lt;&gt;&amp;&quot;<foo />bar";
    Document doc = Jsoup.parse(h);

    doc.outputSettings().syntax(Syntax.html);
    assertEquals("<!doctype html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async checked src=\"&amp;<>&quot;\">&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());

    doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
    assertEquals("<!DOCTYPE html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async=\"\" checked=\"checked\" src=\"&amp;<>&quot;\" />&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());
}
 
源代码5 项目: astor   文件: DocumentTest.java
@Test public void testHtmlAndXmlSyntax() {
    String h = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'>&lt;&gt;&amp;&quot;<foo />bar";
    Document doc = Jsoup.parse(h);

    doc.outputSettings().syntax(Syntax.html);
    assertEquals("<!doctype html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async checked src=\"&amp;<>&quot;\">&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());

    doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
    assertEquals("<!DOCTYPE html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async=\"\" checked=\"checked\" src=\"&amp;<>&quot;\" />&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());
}
 
源代码6 项目: bbs   文件: TextFilterManage.java
/**
 * 富文本过滤标签
 * @param request
 * @param html
 * @return
 */
public String filterTag(HttpServletRequest request,String html) {  
	if(StringUtils.isBlank(html)) return ""; 
	Whitelist whitelist = this.filterParameter(null);

    //return Jsoup.clean(html, Configuration.getUrl(request),whitelist); 
	return Jsoup.clean(html, Configuration.getUrl(request),whitelist,new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
public static String sanitizeSimpleText(String unsafeHtmlContent) {
	OutputSettings outputSettings = new OutputSettings().indentAmount(0).prettyPrint(false);
	Whitelist whiteList = Whitelist.simpleText().preserveRelativeLinks(false);
	String sanitized = Jsoup.clean(unsafeHtmlContent, "", whiteList, outputSettings);
	sanitized = StringEscapeUtils.unescapeHtml(sanitized);
	return sanitized;
}
 
@Override
public void init(Map<String, String> options, ConfigurationSource cfg) {
    super.initCfg(cfg);

    formatter = new OutputSettings();
    formatter.charset(Charset.forName(options.getOrDefault("charset", StandardCharsets.UTF_8.name())));
    formatter.escapeMode(EscapeMode.valueOf(options.getOrDefault("escapeMode", EscapeMode.xhtml.name())));
    formatter.indentAmount(Integer.parseInt(options.getOrDefault("indentAmount", "4")));
    formatter.outline(Boolean.parseBoolean(options.getOrDefault("outlineMode", Boolean.TRUE.toString())));
    formatter.prettyPrint(Boolean.parseBoolean(options.getOrDefault("pretty", Boolean.TRUE.toString())));
    formatter.syntax(Syntax.valueOf(options.getOrDefault("syntax", Syntax.html.name())));
}
 
源代码9 项目: astor   文件: DocumentTest.java
@Test public void testHtmlAppendable() {
	String htmlContent = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>";
	Document document = Jsoup.parse(htmlContent);
	OutputSettings outputSettings = new OutputSettings();
	
	outputSettings.prettyPrint(false);
	document.outputSettings(outputSettings);
	assertEquals(htmlContent, document.html(new StringWriter()).toString());
}
 
源代码10 项目: astor   文件: DocumentTest.java
@Test public void testHtmlAppendable() {
	String htmlContent = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>";
	Document document = Jsoup.parse(htmlContent);
	OutputSettings outputSettings = new OutputSettings();
	
	outputSettings.prettyPrint(false);
	document.outputSettings(outputSettings);
	assertEquals(htmlContent, document.html(new StringWriter()).toString());
}
 
源代码11 项目: astor   文件: DocumentTest.java
@Test public void testHtmlAppendable() {
	String htmlContent = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>";
	Document document = Jsoup.parse(htmlContent);
	OutputSettings outputSettings = new OutputSettings();
	
	outputSettings.prettyPrint(false);
	document.outputSettings(outputSettings);
	assertEquals(htmlContent, document.html(new StringWriter()).toString());
}
 
源代码12 项目: doov   文件: HtmlSampleRulesTest.java
static String format(Context context, Document doc) {
    return "<!-- " + AstVisitorUtils.astToString(context.getRootMetadata(), LOCALE) + " -->\n"
            + doc.outputSettings(new OutputSettings().prettyPrint(true).indentAmount(2)).toString();
}
 
源代码13 项目: doov   文件: HtmlAnyMatchTest.java
static String format(Context context, Document doc) {
    return "<!-- " + AstVisitorUtils.astToString(context.getRootMetadata(), LOCALE) + " -->\n"
            + doc.outputSettings(new OutputSettings().prettyPrint(true).indentAmount(2)).toString();
}
 
源代码14 项目: SkyTube   文件: NewPipeService.java
private String filterHtml(String content) {
    return Jsoup.clean(content, "", Whitelist.basic(), new OutputSettings().prettyPrint(false));
}
 
源代码15 项目: zeppelin   文件: ZeppelinRDisplay.java
private static RDisplay textDisplay(Element body) {
  // remove HTML tag while preserving whitespaces and newlines
  String text = Jsoup.clean(body.html(), "",
    Whitelist.none(), new OutputSettings().prettyPrint(false));
  return new RDisplay(text, Type.TEXT, Code.SUCCESS);
}
 
源代码16 项目: bbs   文件: TextFilterManage.java
/**
 * 富文本过滤标签
 * @param request
 * @param html 内容
 * @param editorTag 评论编辑器标签
 * @return
 */
public String filterTag(HttpServletRequest request,String html,EditorTag editorTag) {  
	if(StringUtils.isBlank(html)) return ""; 
	Whitelist whitelist = this.filterParameter(editorTag);
	

	//return Jsoup.clean(html, Configuration.getUrl(request),whitelist); 

	return Jsoup.clean(html, Configuration.getUrl(request),whitelist,new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
@Secured({ "ROLE_ANONYMOUS", "ROLE_USER", "ROLE_ADMIN" })
@Override
public Layout createContent(final String parameters, final MenuBar menuBar, final Panel panel) {
	final VerticalLayout panelContent = createPanelContent();

	final String pageId = getPageId(parameters);

	getDocumentMenuItemFactory().createDocumentMenuBar(menuBar, pageId);

	LabelFactory.createHeader2Label(panelContent, DOCUMENT_DATA);

	final DataContainer<DocumentContentData, String> documentContentDataDataContainer = getApplicationManager()
			.getDataContainer(DocumentContentData.class);

	final List<DocumentContentData> documentContentlist = documentContentDataDataContainer
			.getAllBy(DocumentContentData_.id, pageId);

	if (!documentContentlist.isEmpty()) {

		final Panel formPanel = new Panel();
		formPanel.setSizeFull();

		panelContent.addComponent(formPanel);

		final FormLayout formContent = new FormLayout();
		formPanel.setContent(formContent);

		final String cleanContent = Jsoup.clean(documentContentlist.get(0).getContent(), "", Whitelist.simpleText(),
				new OutputSettings().indentAmount(4));

		final Label htmlContent = new Label(cleanContent, ContentMode.PREFORMATTED);

		formContent.addComponent(htmlContent);

		final DocumentWordCountRequest documentWordCountRequest = new DocumentWordCountRequest();
		documentWordCountRequest.setDocumentId(pageId);
		documentWordCountRequest.setMaxResults(MAX_RESULTS);
		documentWordCountRequest.setSessionId(RequestContextHolder.currentRequestAttributes().getSessionId());
		final DocumentWordCountResponse resp = (DocumentWordCountResponse) getApplicationManager()
				.service(documentWordCountRequest);

		if (resp.getWordCountMap() != null) {
			final Label wordCloud = new Label(createWordCloud(resp.getWordCountMap()), ContentMode.HTML);
			formContent.addComponent(wordCloud);
		}

		panelContent.setExpandRatio(formPanel, ContentRatio.GRID);

	}

	panel.setContent(panelContent);
	getPageActionEventHelper().createPageEvent(ViewAction.VISIT_DOCUMENT_VIEW, ApplicationEventGroup.USER, NAME,
			parameters, pageId);

	return panelContent;

}