下面列出了org.jsoup.nodes.Document#html ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
static String toHtml(Spanned spanned, Context context) {
HtmlEx converter = new HtmlEx(context);
String html = converter.toHtml(spanned, TO_HTML_PARAGRAPH_LINES_CONSECUTIVE);
// @Google: why convert size to and from in a different way?
Document doc = JsoupEx.parse(html);
for (Element element : doc.select("span")) {
String style = element.attr("style");
if (style.startsWith("font-size:")) {
int colon = style.indexOf(':');
int semi = style.indexOf("em;", colon);
if (semi > colon)
try {
String hsize = style.substring(colon + 1, semi).replace(',', '.');
float size = Float.parseFloat(hsize);
element.tagName(size < 1.0f ? "small" : "big");
element.attributes().remove("style");
} catch (NumberFormatException ex) {
Log.e(ex);
}
}
}
return doc.html();
}
/**
* 将html中外联的css变成内联,并去掉外联样式
* @author Frodez
* @date 2019-03-21
*/
@Override
public String revert(String html) {
Assert.notNull(html, "html must not be null");
try {
Document document = Jsoup.parse(html);
Elements links = document.select("link[href]");
Elements htmlElement = document.select("html");
for (Element iter : links) {
String path = iter.attr("href");
if (!path.endsWith(".css")) {
continue;
}
htmlElement.prepend(StrUtil.concat("<style type=\"text/css\">", FileUtil.readString(ResourceUtils
.getFile(StrUtil.concat(FreemarkerRender.getLoaderPath(), path))), "</style>"));
}
links.remove();
return document.html();
} catch (Exception e) {
log.error("[frodez.util.renderer.reverter.CSSReverter.revert]", e);
return html;
}
}
private String addResourcesInMessage(final MimeMessageHelper mailMessage, final String htmlText) throws Exception {
final Document document = Jsoup.parse(htmlText);
final List<String> resources = new ArrayList<>();
final Elements imageElements = document.getElementsByTag("img");
resources.addAll(imageElements.stream()
.filter(imageElement -> imageElement.hasAttr("src"))
.filter(imageElement -> !imageElement.attr("src").startsWith("http"))
.map(imageElement -> {
final String src = imageElement.attr("src");
imageElement.attr("src", "cid:" + src);
return src;
})
.collect(Collectors.toList()));
final String html = document.html();
mailMessage.setText(html, true);
for (final String res : resources) {
final FileSystemResource templateResource = new FileSystemResource(new File(templatesPath, res));
mailMessage.addInline(res, templateResource, getContentTypeByFileName(res));
}
return html;
}
private String addResourcesInMessage(final MimeMessageHelper mailMessage, final String htmlText) throws Exception {
final Document document = Jsoup.parse(htmlText);
final List<String> resources = new ArrayList<>();
final Elements imageElements = document.getElementsByTag("img");
resources.addAll(imageElements.stream()
.filter(imageElement -> imageElement.hasAttr("src"))
.filter(imageElement -> !imageElement.attr("src").startsWith("http"))
.map(imageElement -> {
final String src = imageElement.attr("src");
imageElement.attr("src", "cid:" + src);
return src;
})
.collect(Collectors.toList()));
final String html = document.html();
mailMessage.setText(html, true);
for (final String res : resources) {
final FileSystemResource templateResource = new FileSystemResource(new File(templatesPath, res));
mailMessage.addInline(res, templateResource, getContentTypeByFileName(res));
}
return html;
}
public boolean save(@NotNull Document document) throws Exception {
convertURLsToBase(document);
String documentAsString = document.html();
if (documentAsString.length() <= skipLength) {
System.out.println("Skipping:" + url);
return false;
}
File outputFile = new File((basePath != null ? basePath + "/" : "") + saveName);
outputFile.createNewFile();
System.out.println("Saving " + url + " to " + outputFile);
FileOutputStream fos = new FileOutputStream(outputFile);
fos.write(documentAsString.getBytes());
fos.flush();
fos.close();
if (latch != null) {
latch.countDown();
}
return true;
}
private String addResourcesInMessage(final MimeMessageHelper mailMessage, final String htmlText) throws Exception {
final Document document = Jsoup.parse(htmlText);
final List<String> resources = new ArrayList<>();
final Elements imageElements = document.getElementsByTag("img");
resources.addAll(imageElements.stream()
.filter(imageElement -> imageElement.hasAttr("src"))
.filter(imageElement -> !imageElement.attr("src").startsWith("http"))
.map(imageElement -> {
final String src = imageElement.attr("src");
imageElement.attr("src", "cid:" + src);
return src;
})
.collect(Collectors.toList()));
final String html = document.html();
mailMessage.setText(html, true);
for (final String res : resources) {
final FileSystemResource templateResource = new FileSystemResource(new File(templatesPath, res));
mailMessage.addInline(res, templateResource, getContentTypeByFileName(res));
}
return html;
}
public boolean save(@NotNull Document document) throws Exception {
convertURLsToBase(document);
String documentAsString = document.html();
if (documentAsString.length() <= skipLength) {
System.out.println("Skipping:" + url);
return false;
}
File outputFile = new File((basePath != null ? basePath + "/" : "") + saveName);
outputFile.createNewFile();
System.out.println("Saving " + url + " to " + outputFile);
FileOutputStream fos = new FileOutputStream(outputFile);
fos.write(documentAsString.getBytes());
fos.flush();
fos.close();
if (latch != null) {
latch.countDown();
}
return true;
}
private String parse(String str) {
Document document = Jsoup.parse(str, "", Parser.xmlParser());
String result = "";
switch (outType) {
case TYPE_TEXT:
result = document.text();
break;
case TYPE_HTML:
result = document.html();
break;
default:
result = document.text();
break;
}
return result;
}
private String parseBody(String body) {
Matcher m = PatternExtensions.compile("br \\/>\\s*(<fieldset>[\\S\\s]*<.form>)").matcher(body);
if (m.find()) {
body = "<form>" + m.group(1);
//body = + "</form><input type=\"button\" value=\"asdghjk\" onclick=\"jsonElem();\">";
body = body.replaceAll("<td class=\"row1\" width=\"30%\"><b>О себе:</b>[\\s\\S]*?</td>",
"<td class=\"row1\" width=\"30%\"><b>О себе</b></td>");
body = body.replaceAll("<td width=\"30%\" class=\"row1\" style='padding:6px;'><b>Город</b>[\\s\\S]*?</td>",
"<td class=\"row1\" width=\"30%\" style='padding:6px;'><b>Город</b></td>");
body = body.replaceAll("legend", "h2").replaceAll("<fieldset>", "<div class=\"field\">").replaceAll("</fieldset>", "</div>");
Document doc = Jsoup.parse(body);
doc.select(".formbuttonrow .button").remove();
doc.select(".formbuttonrow").append("<input type=\"button\" value=\"Сохранить\" onclick=\"jsonElem();\">");
doc.select("textarea").first().attr("maxlength", "500");
body = doc.html();
}
return body;
}
/**
* 处理 对话消息中的图片
* @param message
* @return
*/
public static String filterChatMessage(String message){
Document document = Jsoup.parse(message) ;
Elements pngs = document.select("img[src]");
for (Element element : pngs) {
String imgUrl = element.attr("src");
if(imgUrl.indexOf("/res/image") >= 0){
element.attr("class", "ukefu-media-image") ;
}
}
return document.html() ;
}
private String getUpdatedFileContent(List<Vacancy> vacancies) {
Document document = null;
try {
document = getDocument();
Element templateOriginal = document.getElementsByClass("template").first();
Element copyTemplate = templateOriginal.clone();
copyTemplate.removeAttr("style");
copyTemplate.removeClass("template");
document.select("tr[class=vacancy]").remove().not("tr[class=vacancy template");
for (Vacancy vacancy : vacancies) {
Element localClone = copyTemplate.clone();
localClone.getElementsByClass("city").first().text(vacancy.getCity());
localClone.getElementsByClass("companyName").first().text(vacancy.getCompanyName());
localClone.getElementsByClass("salary").first().text(vacancy.getSalary());
Element link =localClone.getElementsByTag("a").first();
link.text(vacancy.getTitle());
link.attr("href", vacancy.getUrl());
templateOriginal.before(localClone.outerHtml());
}
} catch (IOException e) {
e.printStackTrace();
return "Some exception occurred";
}
return document.html();
}
private String removeUnknownMacroElements(String result) {
Document document = Jsoup.parse(result);
document.getElementsByClass("wysiwyg-unknown-macro").stream()
.filter(element -> element.attr("src").contains("livingdoc"))
.forEach(element -> element.remove());
return document.html();
}
private static String addStyleAttributes(String htmlString) {
Document graphDoc = Jsoup.parse(htmlString);
Element htmlNode = graphDoc.getElementsByTag("html").get(0);
htmlNode.attr("style", "height: 100%");
Element bodyNode = graphDoc.getElementsByTag("body").get(0);
bodyNode.attr("style", "height: 90%; margin:0;");
return graphDoc.html();
}
@Test
public void givenMultiPagePdf_renderOnlyFirstPage_outputHtmlOnlyHasFirstPage() throws Exception
{
Document htmlDoc = convertWithPageRange(testPath + "3-page-document.pdf", 0, 1);
String htmlText = htmlDoc.html();
Assert.assertThat(htmlText, containsString("#1"));
Assert.assertThat(htmlText, not(containsString("#2")));
Assert.assertThat(htmlText, not(containsString("#3")));
}