下面列出了org.jsoup.nodes.Document#toString ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* Jsoup을 이용한 HTML 코드 파싱.
*
* @param eachArchiveAddress 실제 만화가 담긴 아카이브 주소
* @return 성공하면 html 코드를 리턴
*/
private String getHtmlPageJsoup(String eachArchiveAddress) throws Exception {
print.info("고속 연결 시도중...\n");
// pageSource = Html코드를 포함한 페이지 소스코드가 담길 스트링, domain = http://wasabisyrup.com <-마지막 / 안붙음!
String pageSource = null;
// POST방식으로 아예 처음부터 비밀번호를 body에 담아 전달
Response response = Jsoup.connect(eachArchiveAddress)
.userAgent(UserAgent.getUserAgent())
.header("charset", "utf-8")
.header("Accept-Encoding", "gzip") //20171126 gzip 추가
.timeout(MAX_WAIT_TIME) // timeout
.data("pass", PASSWORD) // 20180429 기준 마루마루에서 reCaptcha를 사용하기에 의미없음
.followRedirects(true)
.execute();
Document preDoc = response.parse(); //받아온 HTML 코드를 저장
// <div class="gallery-template">이 만화 담긴 곳.
if (preDoc.select("div.gallery-template").isEmpty()) {
throw new RuntimeException("Jsoup Parsing Failed: No tag found");
} else { // 만약 Jsoup 파싱 시 내용 있으면 성공
pageSource = preDoc.toString();
}
print.info("고속 연결 성공!\n");
return pageSource; //성공 시 html코드 리턴
}
public String buildNormalHtml(String content) {
if (StrUtil.isBlank(content)) {
return content;
}
Document doc = Jsoup.parse(content);
doc.outputSettings().prettyPrint(false);
doc.outputSettings().outline(false);
Elements jsElements = doc.select("script");
replace(jsElements, "src");
Elements imgElements = doc.select("img");
replace(imgElements, "src");
Elements linkElements = doc.select("link");
replace(linkElements, "href");
//开启模板预览功能
if (templatePreviewEnable && TemplateManager.me().getPreviewTemplate() != null) {
Elements aElements = doc.select("a");
replacePreviewHref(aElements);
}
return doc.toString();
}
private String processContentImages(String content, List<String> imageUrls) {
Document doc = Jsoup.parse(content);
Elements imgElements = doc.select("img");
if (imgElements != null) {
Iterator<Element> iterator = imgElements.iterator();
while (iterator.hasNext()) {
Element element = iterator.next();
String imageUrl = element.hasAttr("src")
? element.attr("src")
: element.attr("data-src");
//http://mmbiz.qpic.cn/mmbiz/4gZTdZfnQeDvQqCZFuVvYv8scGS7sEQTRETgISib1blz5iclAtnsccaJhaugmKc
// hhm8mFOtjnicibibumazy8wPS6Xg/640?tp=webp&wxfrom=5&wx_lazy=1&wx_co=1
imageUrl = replaceLast(imageUrl, "/", "__");
imageUrl = imageUrl.startsWith("http://")
? imageUrl.replace("http://", "/attachment/")
: imageUrl.replace("https://", "/attachment/s");
imageUrl = imageUrl.replace("?",".png?");
element.removeAttr("data-src");
element.attr("src",imageUrl);
imageUrls.add(imageUrl);
}
}
return doc.toString();
}
/**
* Rewrite all links in an HTML string based on the extensionless URLs settings.
*
* @param value The HTML string.
* @param requestHost The host name from the request.
* @return The HTML string with rewritten URLs.
*/
public String rewriteAllLinks(final String html, final String requestHost) {
Document document = Jsoup.parse(html);
Elements links = document.select("a[href]");
Elements metas = document.select("meta[content]");
updateAttribute(links, "href", requestHost);
updateAttribute(metas, "content", requestHost);
return document.toString();
}
@Test
public void testInvalidTableContents() throws IOException {
File in = ParseTest.getFile("/htmltests/table-invalid-elements.html");
Document doc = Jsoup.parse(in, "UTF-8");
doc.outputSettings().prettyPrint(true);
String rendered = doc.toString();
int endOfEmail = rendered.indexOf("Comment");
int guarantee = rendered.indexOf("Why am I here?");
assertTrue("Comment not found", endOfEmail > -1);
assertTrue("Search text not found", guarantee > -1);
assertTrue("Search text did not come after comment", guarantee > endOfEmail);
}
@Test
public void testInvalidTableContents() throws IOException {
File in = ParseTest.getFile("/htmltests/table-invalid-elements.html");
Document doc = Jsoup.parse(in, "UTF-8");
doc.outputSettings().prettyPrint(true);
String rendered = doc.toString();
int endOfEmail = rendered.indexOf("Comment");
int guarantee = rendered.indexOf("Why am I here?");
assertTrue("Comment not found", endOfEmail > -1);
assertTrue("Search text not found", guarantee > -1);
assertTrue("Search text did not come after comment", guarantee > endOfEmail);
}
@Override
protected String doInBackground(String... f_url) {
try {
Document doc = Jsoup.connect(f_url[0]).get();
String html = doc.toString();
type = false;
//for caption
int indexcaption = html.indexOf("edge_media_to_caption");
indexcaption += 48;
int startCaption = html.indexOf("\"", indexcaption);
startCaption += 1;
int endCaption = html.indexOf("\"", startCaption);
String strCaption = null;
strCaption = html.substring(startCaption, endCaption);
//setting caption flag=0 for caption flag=1 for vid flag=2 for image
publishProgress("0", strCaption);
//for video
int indexVid = html.indexOf("\"video_url\"");
indexVid += 11;
int startVid = html.indexOf("\"", indexVid);
startVid += 1;
int endVid = html.indexOf("\"", startVid);
String urlVid = null;
urlVid = html.substring(startVid, endVid);
if (!urlVid.equalsIgnoreCase("en")) {
// it is a vid show play btn
type = true;
}
//for image url
int index = html.indexOf("display_url");
index += 13;
int start = html.indexOf("\"", index);
start += 1;
int end = html.indexOf("\"", start);
// System.out.println("start:"+start+ "end:"+ end);
String urlImage = html.substring(start, end);
// Bitmap mIcon11 = null;
// try {
// InputStream in = new java.net.URL(urlImage).openStream();
// mIcon11 = BitmapFactory.decodeStream(in);
// } catch (Exception e) {
// Log.e("Error", e.getMessage());
// e.printStackTrace();
// }
// return mIcon11;
return urlImage;
} catch (Exception e) {
Log.e("Error: ", e.getMessage());
}
return null;
}
@Override
protected String doInBackground(String... f_url) {
try {
Document doc = Jsoup.connect(f_url[0]).get();
String html = doc.toString();
type = false;
//for caption
int indexcaption = html.indexOf("edge_media_to_caption");
indexcaption += 48;
int startCaption = html.indexOf("\"", indexcaption);
startCaption += 1;
int endCaption = html.indexOf("\"", startCaption);
String strCaption = null;
strCaption = html.substring(startCaption, endCaption);
//setting caption flag=0 for caption flag=1 for vid flag=2 for image
publishProgress("0", strCaption);
//for video
int indexVid = html.indexOf("\"video_url\"");
indexVid += 11;
int startVid = html.indexOf("\"", indexVid);
startVid += 1;
int endVid = html.indexOf("\"", startVid);
String urlVid = null;
urlVid = html.substring(startVid, endVid);
if (!urlVid.equalsIgnoreCase("en")) {
// it is a vid show play btn
type = true;
}
//for image url
int index = html.indexOf("display_url");
index += 13;
int start = html.indexOf("\"", index);
start += 1;
int end = html.indexOf("\"", start);
// System.out.println("start:"+start+ "end:"+ end);
String urlImage = html.substring(start, end);
return urlImage;
} catch (Exception e) {
Log.e("Error: ", e.getMessage());
}
return null;
}
public static String applyHtmlStyle(String html, Context context){
TypedValue typedColor = new TypedValue();
context.getTheme().resolveAttribute(R.attr.attr_color_text, typedColor, true);
int txtColor = typedColor.data;
context.getTheme().resolveAttribute(R.attr.attr_color_text_link, typedColor, true);
int linkColor = typedColor.data;
context.getTheme().resolveAttribute(R.attr.attr_color_accent, typedColor, true);
int codeColor = typedColor.data;
context.getTheme().resolveAttribute(R.attr.attr_color_text_secondary, typedColor, true);
int codeBackground = typedColor.data;
String textColorStr = "#" +
Integer.toHexString(Color.red(txtColor)) +
Integer.toHexString(Color.green(txtColor)) +
Integer.toHexString(Color.blue(txtColor));
String linkColorStr = "#" +
Integer.toHexString(Color.red(linkColor)) +
Integer.toHexString(Color.green(linkColor)) +
Integer.toHexString(Color.blue(linkColor));
String codeColorStr = "#" +
Integer.toHexString(Color.red(codeColor)) +
Integer.toHexString(Color.green(codeColor)) +
Integer.toHexString(Color.blue(codeColor));
String codeBackgroundStr = "#" +
Integer.toHexString(Color.red(codeBackground)) +
Integer.toHexString(Color.green(codeBackground)) +
Integer.toHexString(Color.blue(codeBackground));
if (html == null || html.equals("")){
return "";
}
Document document = Jsoup.parse(html);
document.head()
.append(
"<style type=\"text/css\">" +
"body{width:95%;}" +
"* {" +
" color:" + textColorStr + ";" +
"}" +
"a {" +
" color:" + linkColorStr + ";" +
"word-wrap:break-word;" +
"}" +
"code,pre {" +
" color: " + codeColorStr + ";" +
" background: " + codeBackgroundStr + ";" +
" padding: 3px;" +
" border-radius: 5px;" +
"word-wrap:normal;" +
"} img { border:1px solid grey;}" +
"</style>");
document.head()
.append("<meta name=\"content-type\" content=\"text/html; charset=utf-8\">" +
"<meta http-equlv=\"Content-Type\" content=\"text/html;charset=utf-8\">");
for (Element img:document.select("img")){
img.attr("width","100%");
img.attr("height","auto");
}
document.charset(Charset.forName("utf-8"));
return document.toString();
}
public static String processCDN(String content, String domain) {
if (StrUtil.isBlank(content)) {
return content;
}
Document doc = Jsoup.parse(content);
Elements jsElements = doc.select("script[src]");
replace(jsElements, "src", domain);
Elements imgElements = doc.select("img[src]");
replace(imgElements, "src", domain);
Elements linkElements = doc.select("link[href]");
replace(linkElements, "href", domain);
return doc.toString();
}
@Override
public String beautify(String s, Charset charset) {
Document doc = Jsoup.parse(s);
doc.outputSettings().indentAmount(4);
return doc.toString();
}