下面列出了org.jsoup.nodes.Document#text ( ) 实例代码,或者点击链接到github查看源代码,也可以在右侧发表评论。
private static String _getText(Document d, boolean full) {
truncate(d, !full);
for (Element bq : d.select("blockquote")) {
bq.prependChild(new TextNode("["));
bq.appendChild(new TextNode("]"));
}
String text = d.text();
if (full)
return text;
String preview = text.substring(0, Math.min(text.length(), PREVIEW_SIZE));
if (preview.length() < text.length())
preview += "…";
return preview;
}
private static String getip() {
Document doc = null;
String url = "http://checkip.amazonaws.com";
String ip = "";
try {
doc = Jsoup.connect(url).header("Cache-Control", "no-cache").ignoreHttpErrors(true).ignoreContentType(true).timeout(10 * 1000).get();
ip = doc.text();
} catch (IOException e) {
e.printStackTrace();
}
if (doc.text().length() < 7) {
// can't get ip address, let DuckDNS to resolve it
ip = "";
}
return ip;
}
private String parse(String str) {
Document document = Jsoup.parse(str, "", Parser.xmlParser());
String result = "";
switch (outType) {
case TYPE_TEXT:
result = document.text();
break;
case TYPE_HTML:
result = document.html();
break;
default:
result = document.text();
break;
}
return result;
}
@Override
protected Set<ShadowSocksDetailsEntity> parse(Document document) {
// SSR 订阅地址内容
String base64ssrLinks = document.text();
String ssrLinks = StringUtils.toEncodedString(Base64.decodeBase64(base64ssrLinks), StandardCharsets.UTF_8);
String[] ssrLinkList = ssrLinks.split("\n");
// log.debug("---------------->{}={}", ssrLinkList.length + "", ssrLinkList);
Set<ShadowSocksDetailsEntity> set = Collections.synchronizedSet(new HashSet<>(ssrLinkList.length));
Arrays.asList(ssrLinkList).parallelStream().forEach((str) -> {
try {
if (StringUtils.isNotBlank(str)) {
ShadowSocksDetailsEntity ss = parseLink(str.trim());
ss.setValid(false);
ss.setValidTime(new Date());
ss.setTitle("免费账号 | 云端框架");
ss.setRemarks("https://cloudfra.com/");
ss.setGroup("ShadowSocks-Share");
// 测试网络
if (isReachable(ss))
ss.setValid(true);
// 无论是否可用都入库
set.add(ss);
log.debug("*************** 第 {} 条 ***************{}{}", set.size(), System.lineSeparator(), ss);
// log.debug("{}", ss.getLink());
}
} catch (Exception e) {
log.error(e.getMessage(), e);
}
});
return set;
}
/**
* 解析页面
* process函数需要完成的有:
* 1.解析有用的信息,丢进去Page的List items中。之后save会进行存储!
*
* @param page page
*/
public void process(Page page) {
Document doc = page.getDocument();
String title = doc.title();
String text = doc.text();
Map<String, String> items = new HashMap<String, String>();
items.put("title", title);
items.put("text", text);
items.put("url", page.getUrlSeed().getUrl());
page.setItems(items);
}
@Override
protected Void doInBackground(Void... params) {
try {
Document doc1 = Jsoup.connect("http://evilinsult.com/generate_insult.php?lang=en").get();
title = doc1.text();
} catch (Exception ex) {
ex.printStackTrace();
title= ex.getMessage();
}
return null;
}
@Override
protected Void doInBackground(Void... params) {
try {
Document doc1 = Jsoup.connect("https://evilinsult.com/generate_insult.php?lang=en").get();
title = doc1.text();
} catch (Exception ex) {
ex.printStackTrace();
title= ex.getMessage();
}
return null;
}
@Override
protected Void doInBackground(Void... params) {
try {
Document doc1 = Jsoup.connect("https://evilinsult.com/generate_insult.php?lang="+language).get();
title = doc1.text();
} catch (Exception ex) {
ex.printStackTrace();
}
return null;
}
@Override
protected Void doInBackground(Void... params) {
try {
Document doc1 = Jsoup.connect("https://evilinsult.com/generate_insult.php?lang="+language).get();
title = doc1.text();
} catch (Exception ex) {
ex.printStackTrace();
}
return null;
}
private static String updateDuckDNS(String domain, String token, String ipaddress) {
String url = "http://www.duckdns.org/update?domains=" + domain + "&token=" + token + "&ip=" + ipaddress;
Document doc = null;
String ua = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2";
try {
// doc = Jsoup.connect(url).ignoreHttpErrors(true).timeout(10 * 1000).get();
doc = Jsoup.connect(url).userAgent(ua).ignoreHttpErrors(true).ignoreContentType(true).timeout(10 * 1000).get();
} catch (IOException e) {
e.printStackTrace();
}
return doc.text();
}
private static String[] downloadTrends() throws Exception
{
String query = constructQuery();
InputStream is = new URL(query).openStream();
try
{
StringWriter writer = new StringWriter();
IOUtils.copy(is, writer);
JSONObject json = new JSONObject(writer.toString());
json = json.getJSONObject("responseData");
json = json.getJSONObject("feed");
JSONArray arr = json.getJSONArray("entries");
List<String> res = new ArrayList();
for (int i = 0; i < arr.length(); i++)
{
JSONObject tmp = arr.getJSONObject(i);
String en = tmp.getString("content");
Document doc = Jsoup.parse(en);
for (Element t : doc.select("a"))
t.append(".");
en = doc.text();
res.addAll(Arrays.asList(en.split("\\.")));
}
return getNonEmptyTrends(res);
}
finally
{
is.close();
}
}
private void addHtmlFileToCharRep(File inputFile) throws IOException {
Document doc = Jsoup.parse(inputFile, ENCODING);
String innerText = doc.text();
java.util.StringTokenizer tokenizer = new java.util.StringTokenizer(innerText, " ");
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
addStringToCharRep(token);
}
}
public static String stripIframes(String htmlContent) {
Document doc = Jsoup.parse(htmlContent, "UTF-8");
doc.select("iframe").remove();
return doc.text();
}
public AbstractMap.SimpleEntry<Integer, Integer> fetchAndSave() throws Exception {
URL url = new URL(this.url);
SyndFeedInput input = new SyndFeedInput();
SyndFeed feed = input.build(new XmlReader(url));
int items = feed.getEntries().size();
if(items > 0){
log.info("Attempting to parse rss feed: "+ this.url );
log.info("This Feed has "+items +" items");
}
List <SyndEntry> entries = feed.getEntries();
for (SyndEntry item : entries){
log.info("Title: " + item.getTitle());
log.info("Link: " + item.getLink());
SyndContentImpl contentHolder = (SyndContentImpl) item.getContents().get(0);
String content = contentHolder.getValue();
//content might contain html data, let's clean it up
Document doc = Jsoup.parse(content);
content = doc.text();
try {
Result result = ld.detectLanguage(content, language);
if (result.languageCode.equals(language) && result.isReliable) {
FileSaver file = new FileSaver(content, this.language, "bs", item.getLink(), item.getUri(), String.valueOf(content.hashCode()));
String fileName = file.getFileName();
BlogPost post = new BlogPost(content,this.language,null,"bs",item.getLink(),item.getUri(),fileName);
if(DAO.saveEntry(post)) {
file.save(this.logDb);
numOfFiles++;
wrongCount = 0;
}
}
else{
log.info("Item " + item.getTitle() + "is in a diff languageCode, skipping this post "+ result.languageCode);
wrongCount ++;
if(wrongCount > 3){
log.info("Already found 3 posts in the wrong languageCode, skipping this blog");
}
break;
}
}
catch(Exception e){
log.error(e);
break;
}
}
return new AbstractMap.SimpleEntry<>(numOfFiles,wrongCount);
}
String getText(final HtmlBlock node) {
final Document document = Jsoup.parseBodyFragment(node.getChars().toString());
return document.text();
}
public static void main(String[] args) {
try{
// connect to the website '1
Connection connection = Jsoup.connect("http://www.bluetata.com");
// get the HTML document '2
Document doc = connection.get();
// parse text from HTML '3
String strHTML = doc.text();
// out put dom '4
System.out.println(strHTML);
}catch(IOException ioex){
ioex.printStackTrace();
}
}
@Override
public void onMessageLogged(IMessage message) {
// Split on / strip all HTML tags.
Document parsedMessage = Jsoup.parseBodyFragment(message.getMessage());
String strippedMessage = parsedMessage.text();
String ttsMessage;
if(mShortTtsMessagesEnabled) {
for (Element anchor : parsedMessage.getElementsByTag("A")) {
// Get just the domain portion of links
String href = anchor.attr("href");
// Only shorten anchors without custom text
if (href != null && href.equals(anchor.text())) {
String urlHostname = HtmlUtils.getHostnameFromLink(href);
if (urlHostname != null) {
anchor.text(getString(R.string.chat_message_tts_short_link, urlHostname));
}
}
}
ttsMessage = parsedMessage.text();
} else {
ttsMessage = strippedMessage;
}
String formattedTtsMessage = getString(R.string.notification_message,
message.getActorName(), ttsMessage);
// Read if TTS is enabled, the message is less than threshold, is a text message, and not deafened
if(mSettings.isTextToSpeechEnabled() &&
mTTS != null &&
formattedTtsMessage.length() <= TTS_THRESHOLD &&
getSessionUser() != null &&
!getSessionUser().isSelfDeafened()) {
mTTS.speak(formattedTtsMessage, TextToSpeech.QUEUE_ADD, null);
}
// TODO: create a customizable notification sieve
if (mSettings.isChatNotifyEnabled()) {
mMessageNotification.show(message);
}
mMessageLog.add(new IChatMessage.TextMessage(message));
}
public static String stripTags(String html) {
Document doc = Jsoup.parse(html);
return doc.text();
}
/**
* Collects 1A0001.
*
* @param userId the specified user id
* @return result
*/
public synchronized JSONObject collect1A0001(final String userId) {
final JSONObject ret = Results.falseResult();
if (!activityQueryService.is1A0001Today(userId)) {
ret.put(Keys.MSG, langPropsService.get("activityNotParticipatedLabel"));
return ret;
}
if (activityQueryService.isCollected1A0001Today(userId)) {
ret.put(Keys.MSG, langPropsService.get("activityParticipatedLabel"));
return ret;
}
final List<JSONObject> records = pointtransferQueryService.getLatestPointtransfers(userId,
Pointtransfer.TRANSFER_TYPE_C_ACTIVITY_1A0001, 1);
final JSONObject pointtransfer = records.get(0);
final String data = pointtransfer.optString(Pointtransfer.DATA_ID);
final String smallOrLarge = data.split("-")[1];
final int sum = pointtransfer.optInt(Pointtransfer.SUM);
String smallOrLargeResult = null;
try {
final Document doc = Jsoup.parse(new URL("http://stockpage.10jqka.com.cn/1A0001/quote/header/"), 5000);
final JSONObject result = new JSONObject(doc.text());
final String price = result.optJSONObject("data").optJSONObject("1A0001").optString("10");
if (!price.contains(".")) {
smallOrLargeResult = "0";
} else {
int endInt = 0;
if (price.split("\\.")[1].length() > 1) {
final String end = price.substring(price.length() - 1);
endInt = Integer.valueOf(end);
}
if (0 <= endInt && endInt <= 4) {
smallOrLargeResult = "0";
} else if (5 <= endInt && endInt <= 9) {
smallOrLargeResult = "1";
} else {
LOGGER.error("Activity 1A0001 collect result [" + endInt + "]");
}
}
} catch (final Exception e) {
LOGGER.log(Level.ERROR, "Collect 1A0001 failed", e);
ret.put(Keys.MSG, langPropsService.get("activity1A0001CollectFailLabel"));
return ret;
}
if (Strings.isEmptyOrNull(smallOrLarge)) {
ret.put(Keys.MSG, langPropsService.get("activity1A0001CollectFailLabel"));
return ret;
}
ret.put(Keys.STATUS_CODE, true);
if (StringUtils.equals(smallOrLarge, smallOrLargeResult)) {
final int amount = sum * 2;
final boolean succ = null != pointtransferMgmtService.transfer(Pointtransfer.ID_C_SYS, userId,
Pointtransfer.TRANSFER_TYPE_C_ACTIVITY_1A0001_COLLECT, amount,
DateFormatUtils.format(new Date(), "yyyyMMdd") + "-" + smallOrLargeResult);
if (succ) {
String msg = langPropsService.get("activity1A0001CollectSucc1Label");
msg = msg.replace("{point}", String.valueOf(amount));
ret.put(Keys.MSG, msg);
} else {
ret.put(Keys.MSG, langPropsService.get("activity1A0001CollectFailLabel"));
}
} else {
ret.put(Keys.MSG, langPropsService.get("activity1A0001CollectSucc0Label"));
}
return ret;
}
private void addHtmlFileToTermRep(File inputFile) throws IOException {
Document doc = Jsoup.parse(inputFile, ENCODING);
String innerText = doc.text();
addStringToTermRep(innerText);
}