下面列出了org.jsoup.nodes.Document#setBaseUri ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
/**
* Jsoup.parse(String url, int timeoutMillis)
* Jsoup.connect(String url) 方法创建一个新的 Connection, 和 post() 取得和解析一个HTML文件。如果从该URL获取HTML时发生错误,便会抛出 IOException,应适当处理。
* 这两个方法只支持Web URLs (http和https 协议);
*/
@Override
public Document handle(String url, DataMap dataMap) throws IOException{
//获取Jsoup参数
String baseUri = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_BASEURI,"");
String userAgent = "Mozilla/5.0 (jsoup)";
int timeout = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_TIMEOUTMILLIS, Docx4jConstants.DEFAULT_TIMEOUTMILLIS);
//fetch the specified URL and parse to a HTML DOM
Document doc = Jsoup.connect(url)
.data(dataMap.getData1())
.data(dataMap.getData2())
.userAgent(userAgent)
.cookies(dataMap.getCookies())
.timeout(timeout)
.post();
doc.setBaseUri(baseUri);
//返回Document对象
return doc;
}
/**
* 解析用户信息
*
* @param responseBody
* @return
*/
public static User parseUserInfo(String responseBody) {
User user = new User();
try {
Document document = Jsoup.parse(responseBody);
document.setBaseUri(Constants.BASE_URL);
Element elementUser = document.getElementsByClass("userinfo").first();
Element elementAvatar = elementUser.getElementsByTag("img").first();
user.setAvatarUrl(elementAvatar.attr("src"));
user.setName(elementUser.getElementsByClass("name").first().text());
user.setInfo(elementUser.getElementsByClass("user_box").html());
Element btn_exit = document.getElementsByClass("btn_exit").first();
String url = btn_exit.child(0).attr("href");
UrlParamsMap map = new UrlParamsMap(url);
String formHash = map.get("formhash");
user.setFormHash(formHash);
LogMessage.i("formHash", formHash);
} catch (Exception e) {
LogMessage.w(TAG + "#parseUserInfo", e);
}
return user;
}
/**
* 解析相册
*
* @param responseBody
* @return
*/
public static AlbumWrap parseAubum(String responseBody) {
AlbumWrap albumWrap = new AlbumWrap();
List<String> albums = new ArrayList<String>();
Document document = Jsoup.parse(responseBody);
document.setBaseUri(Constants.BASE_URL);
Elements elements = document.getElementsByClass("postalbum_i");
for (Element album : elements) {
String url = album.absUrl("orig");
albums.add(url);
}
albumWrap.setUrls(albums);
String strCurpic = document.getElementById("curpic").text();
int curpic = Integer.valueOf(strCurpic) - 1;
albumWrap.setCurPosition(curpic);
return albumWrap;
}
/**
* Jsoup.parse(String url, int timeoutMillis)
* Jsoup.connect(String url) 方法创建一个新的 Connection, 和 post() 取得和解析一个HTML文件。如果从该URL获取HTML时发生错误,便会抛出 IOException,应适当处理。
* 这两个方法只支持Web URLs (http和https 协议);
*/
@Override
public Document handle(URL url) throws IOException{
//获取Jsoup参数
String baseUri = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_BASEURI,"");
int timeout = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_TIMEOUTMILLIS, Docx4jConstants.DEFAULT_TIMEOUTMILLIS);
//fetch the specified URL and parse to a HTML DOM
Document doc = Jsoup.parse(url,timeout);
doc.setBaseUri(baseUri);
//返回Document对象
return doc;
}
public static DomPage getDomPageByHtml(String html,String url){
Document doc= Jsoup.parse(html);
if(url!=null){
doc.setBaseUri(url);
}
DomPage domPage=new DomPage(doc);
return domPage;
}
public static DomPage getDomPageByHtml(String html,String url){
Document doc= Jsoup.parse(html);
if(url!=null){
doc.setBaseUri(url);
}
DomPage domPage=new DomPage(doc);
return domPage;
}
/**
* 解析引用回复的准备数据
*
* @param responseBody
* @return
*/
public static PrepareQuoteReply parsePrepareQuoteReply(String responseBody) {
PrepareQuoteReply quoteReply = new PrepareQuoteReply();
try {
Document document = Jsoup.parse(responseBody);
document.setBaseUri(Constants.BASE_URL);
Element postform = document.getElementById("postform");
String url = postform.absUrl("action");
String formhash = postform.getElementsByAttributeValue("name", "formhash").first().attr("value");
String posttime = postform.getElementsByAttributeValue("name", "posttime").first().attr("value");
String noticeauthor = postform.getElementsByAttributeValue("name", "noticeauthor").first().attr("value");
String noticetrimstr = postform.getElementsByAttributeValue("name", "noticetrimstr").first().attr("value");
String noticeauthormsg = postform.getElementsByAttributeValue("name", "noticeauthormsg").first().attr("value");
String reppid = postform.getElementsByAttributeValue("name", "reppid").first().attr("value");
String reppost = postform.getElementsByAttributeValue("name", "reppost").first().attr("value");
String quoteBody = postform.getElementsByTag("blockquote").first().toString();
quoteReply.setNoticeauthor(noticeauthor);
quoteReply.setNoticeauthormsg(noticeauthormsg);
quoteReply.setNoticetrimstr(noticetrimstr);
quoteReply.setPosttime(posttime);
quoteReply.setQuoteBody(quoteBody);
quoteReply.setReppid(reppid);
quoteReply.setUrl(url);
quoteReply.setFormhash(formhash);
quoteReply.setReppost(reppost);
} catch (Exception e) {
e.printStackTrace();
}
return quoteReply;
}
@Override
public boolean _extractTopicsFrom(File f, TopicMap t) throws Exception {
if(f.isDirectory())
throw new Exception("Directories are not supported.");
Document d = Jsoup.parse(f,"UTF-8");
d.setBaseUri(f.getAbsolutePath());
return extractTopicsFrom(d, f.getAbsolutePath(), t);
}
public static Item getItem(Language lg, String url) throws IOException {
Document doc = JSoupManager.getDocument(url);
doc.setBaseUri(url);
String name = doc.getElementsByClass("ak-return-link").first().text();
String level = doc.getElementsByClass("ak-encyclo-detail-level").first().text()
.replaceAll(Translator.getLabel(lg, "item.extract.level") + " ", "");
String type = doc.getElementsByClass("ak-encyclo-detail-type").last().children().last().text();
String skinURL = doc.getElementsByClass("ak-encyclo-detail-illu").first()
.getElementsByTag("img").first().attr("src");
String description = null;
String effects = null;
String caracteristics = null;
String conditions = null;
String set = null;
String setURL = null;
String recipe = null;
Elements titles = doc.getElementsByClass("ak-panel-title");
Elements lines;
StringBuilder tmp;
for (Element title : titles)
if (title.text().equals(Translator.getLabel(lg, "item.extract.description")))
description = title.parent().getElementsByClass("ak-panel-content").first().text();
else if (title.text().equals(Translator.getLabel(lg, "item.extract.effets")))
effects = extractStatsFromTitle(lg, title);
else if (title.text().equals(Translator.getLabel(lg, "item.extract.caracteristiques")))
caracteristics = extractLinesFromTitle(title);
else if (title.text().equals(Translator.getLabel(lg, "item.extract.evolution_effects")))
effects = extractEvolutionEffectsFromTitle(lg, url);
else if (title.text().equals(Translator.getLabel(lg, "item.extract.conditions")))
conditions = extractLinesFromTitle(title);
else if (title.text().contains(Translator.getLabel(lg, "item.extract.panoplie"))) {
set = title.getElementsByTag("a").first().text();
setURL = title.getElementsByTag("a").first().attr("abs:href");
} else if (title.text().equals(Translator.getLabel(lg, "item.extract.recette"))) {
lines = title.parent().getElementsByClass("ak-column");
tmp = new StringBuilder();
for (Element line : lines)
tmp.append(line.getElementsByClass("ak-front").text()).append(" [")
.append(line.getElementsByClass("ak-title").first().text()).append("](")
.append(line.getElementsByClass("ak-title").first()
.children().first().attr("abs:href")).append(")\n");
recipe = tmp.toString();
}
return new Item(name, type, level, description, effects, URLManager.abs(skinURL), url,
caracteristics, conditions, set, setURL, recipe);
}
public static Resource getResource(Language lg, String url) throws IOException {
Document doc = JSoupManager.getDocument(url);
doc.setBaseUri(url);
String name = doc.getElementsByClass("ak-return-link").first().text();
String level = null;
if (! doc.getElementsByClass("ak-encyclo-detail-level").isEmpty())
level = doc.getElementsByClass("ak-encyclo-detail-level").first().text()
.replaceAll(Translator.getLabel(lg, "resource.extract.level") + " ", "");
String type = doc.getElementsByClass("ak-encyclo-detail-type").last().children().last().text();
String skinURL = doc.getElementsByClass("ak-encyclo-detail-illu").first()
.getElementsByTag("img").first().attr("src");
String description = null;
String effects = null;
String bonus = null;
String sorts = null;
String recipe = null;
List<String> monsterDrops = new ArrayList<>();
Elements titles = doc.getElementsByClass("ak-panel-title");
Elements lines;
StringBuilder tmp;
for (Element title : titles)
if (title.text().equals(Translator.getLabel(lg, "resource.extract.description")))
description = title.parent().getElementsByClass("ak-panel-content").first().text();
else if (title.text().equals(Translator.getLabel(lg, "resource.extract.effets")))
effects = extractStatsFromTitle(lg, title);
else if (title.text().equals(Translator.getLabel(lg, "resource.extract.bonus")))
bonus = extractLinesFromTitle(title);
else if (title.text().equals(Translator.getLabel(lg, "resource.extract.sorts")))
sorts = title.parent().getElementsByClass("ak-panel-content").first().text();
else if (title.text().equals(Translator.getLabel(lg, "resource.extract.monsterDrop")))
monsterDrops = extractDrops(title.parent());
else if (title.text().equals(Translator.getLabel(lg, "resource.extract.recette"))){
lines = title.parent().getElementsByClass("ak-column");
tmp = new StringBuilder();
for (Element line : lines)
tmp.append(line.getElementsByClass("ak-front").text()).append(" [")
.append(line.getElementsByClass("ak-title").first().text()).append("](")
.append(line.getElementsByClass("ak-title").first()
.children().first().attr("abs:href")).append(")\n");
recipe = tmp.toString();
}
return new Resource(name, type, level, description, effects, URLManager.abs(skinURL), url,
bonus, sorts, recipe, monsterDrops);
}
@Override
public SubstitutionSchedule getSubstitutionSchedule()
throws IOException, JSONException, CredentialInvalidException {
new LoginHandler(scheduleData, credential, cookieProvider).handleLogin(executor, cookieStore);
SubstitutionSchedule v = SubstitutionSchedule.fromData(scheduleData);
Document doc = Jsoup.parse(httpGet(url, data.optString(PARAM_ENCODING, null)));
doc.setBaseUri(url);
Elements dayElems = doc.select("#vertretung > p > b, #vertretung > b");
Elements frames = doc.select("frame[src*=w00]");
if (dayElems.size() == 0 && frames.size() > 0) {
// doc is embedded in frame
doc = Jsoup.parse(httpGet(frames.get(0).absUrl("src"), data.optString(PARAM_ENCODING, null)));
dayElems = doc.select("#vertretung > p > b, #vertretung > b");
} else if (dayElems.size() == 0) {
// seen at GHS Berlin, different kinds of center > font > center ... stacked (sometimes within #vertretung)
dayElems = doc.select("center > font > p > b");
}
final List<String> allClasses = getAllClasses();
if (dayElems.size() > 0) {
// untis-info days
for (Element dayElem : dayElems) {
SubstitutionScheduleDay day = new SubstitutionScheduleDay();
day.setLastChangeString("");
String date = dayElem.text();
day.setDateString(date);
day.setDate(ParserUtils.parseDate(date));
Element next;
if (dayElem.parent().tagName().equals("p")) {
next = dayElem.parent().nextElementSibling().nextElementSibling();
} else {
next = dayElem.parent().select("p").first().nextElementSibling();
}
parseDay(day, next, v, null, allClasses);
}
} else if (doc.select("tr:has(td[align=center]):gt(0)").size() > 0) {
// untis-subst table
parseSubstitutionTable(v, null, doc);
}
v.setClasses(allClasses);
v.setTeachers(getAllTeachers());
return v;
}
/**
* 解析板块列表
*
* @param content
* @return
*/
public static List<PlateGroup> parsePlateGroupList(String content) {
List<PlateGroup> groups = new ArrayList<PlateGroup>();
Document document = Jsoup.parse(content);
document.setBaseUri(Constants.BASE_URL);
Elements elementsGroup = document.getElementsByClass("bm");
for (Element bm : elementsGroup) {
PlateGroup plateGroup = new PlateGroup();
Element bm_h = bm.getElementsByClass("bm_h").first();
String title = bm_h.text();
plateGroup.setTitle(title);
List<Plate> plates = new ArrayList<Plate>();
Elements plateElements = bm.getElementsByClass("bm_c");
for (Element bm_c : plateElements) {
Plate plate = new Plate();
//链接,第一个是版块链接,如果有第二个则是删除收藏连接
Elements as = bm_c.getElementsByTag("a");
Element a1 = as.first();
String plateTitle = a1.text();
String url = a1.absUrl("href");
Elements count = bm_c.getElementsByClass("xg1");
String xg1 = null;
if (count.size() != 0) {
xg1 = count.first().text();
} else {
xg1 = "(0)";
}
//判断是否收藏
String favoriteId = null;
if (as.size() > 1) {
String urlDelete = as.get(1).absUrl("href");
favoriteId = new UrlParamsMap(urlDelete).get("favid");
}
plate.setTitle(plateTitle);
plate.setUrl(url);
plate.setXg1(xg1);
plate.setFavoriteId(favoriteId);
plates.add(plate);
}
plateGroup.setPlates(plates);
groups.add(plateGroup);
}
return groups;
}