use of org.jsoup.select.Elements in project CodeUtils by boredream.
the class ZhiHuReptile method parseAnswer.
public static Answer parseAnswer(Element answerRootElement) throws Exception {
// <a class="question_link" target="_blank" href="/question/25504353">经常上知乎会带来什么错觉?</a>
Element questionElement = answerRootElement.getElementsByAttributeValue("class", "question_link").get(0);
String questionName = questionElement.text();
String questionLink = hostUrl + questionElement.attr("href");
// <div class="zm-item-vote-info " data-votecount="40947">
Element voteCountElement = answerRootElement.getElementsByAttributeValueContaining("class", "zm-item-vote-info").get(0);
String voteCount = voteCountElement.attr("data-votecount");
// <span class="answer-date-link-wrap">
// <a class="answer-date-link last_updated meta-item" data-tip="s$t$发布于 2014-09-24" target="_blank" href="/question/25504353/answer/30949097">编辑于 2014-09-24</a>
// </span>
String answerLink = "";
Elements answerLinkElements = answerRootElement.getElementsByAttributeValueContaining("href", "answer");
if (answerLinkElements != null && answerLinkElements.size() > 0) {
Element answerLinkElement = answerLinkElements.get(0);
answerLink = hostUrl + answerLinkElement.attr("href");
} else {
System.out.println(answerRootElement + " ... no answer href");
}
// <div class="zh-summary summary clearfix">
// 211 985 高考就是纸老虎gpa 3.8 托福雅思都是渣研究生 博士后 本科毕业像条狗北上广 英美欧 要想成功出亚洲 白瘦美 高富帅 满街都是官二代设计师 程序猿 就我一人还没钱大长腿 一八零 六块腹肌才算赢健身房 瑜伽馆 二十开练都算晚ipad mbp 4k才能玩游戏flym…
// <a href="/question/25504353/answer/30949097" class="toggle-expand">显示全部</a>
// </div>
Element summaryElement = answerRootElement.getElementsByAttributeValue("class", "zh-summary summary clearfix").get(0);
String summary = summaryElement.text();
// remove 显示全部
summary = summary.substring(0, summary.length() - 4);
Answer answer = new Answer();
answer.questionName = questionName;
answer.questionLink = questionLink;
answer.voteCount = voteCount;
answer.answerLink = answerLink;
answer.summary = summary;
return answer;
}
use of org.jsoup.select.Elements in project CodeUtils by boredream.
the class ZhiHuReptile method getAnsersOfPage.
private static List<Answer> getAnsersOfPage(String topicUrl) throws Exception {
// http://www.zhihu.com/topic/19550517/top-answers
String topAnswersOfTopic = topicUrl + "/top-answers";
// <div class="content">
String response = HttpUtils.getString(topAnswersOfTopic);
Document parse = Jsoup.parse(response);
Elements elements = parse.getElementsByAttributeValue("class", "content");
List<Answer> answers = new ArrayList<Answer>();
for (Element element : elements) {
Answer answer = parseAnswer(element);
answers.add(answer);
}
return answers;
}
use of org.jsoup.select.Elements in project CodeUtils by boredream.
the class ZhiHuReptile method getAllTopics.
public static List<Topic> getAllTopics() throws Exception {
// <li data-id="99" class="current"><a href="#互联网">互联网</a></li>
List<Topic> topicses = new ArrayList<Topic>();
String response = HttpUtils.getString(allTopicsUrl);
Document parse = Jsoup.parse(response);
Elements elements = parse.getElementsByAttributeValueStarting("href", "#");
for (Element element : elements) {
Topic topic = new Topic();
topic.name = element.text();
topic.topic_id = element.parent().attr("data-id");
topicses.add(topic);
}
return topicses;
}
use of org.jsoup.select.Elements in project CodeUtils by boredream.
the class BCGMUtils method getMainUrl.
public static List<CaoMain> getMainUrl() {
// String response = HttpUtils.getString(mainUrl);
String response = FileUtils.readToString(new File("temp" + File.separator + "reptile" + File.separator + "bcgm_main.txt"), "UTF-8");
String type = null;
List<CaoMain> caos = new ArrayList<CaoMain>();
Document parse = Jsoup.parse(response);
Elements allElements = parse.getAllElements();
for (int i = 0; i < allElements.size(); i++) {
Element element = allElements.get(i);
// <span class="mw-headline"
// id=".E8.8D.89.E9.83.A8">草部</span>
String attrClass = element.attr("class");
if ("mw-headline".equals(attrClass + "")) {
type = element.text();
continue;
}
// <a
// href="/w/%E6%9C%AC%E8%8D%89%E7%BA%B2%E7%9B%AE/%E7%94%98%E8%8D%89"
// title="本草纲目/甘草">甘草</a>
String title = element.attr("title");
String href = element.attr("href");
if (type != null && href != null && title != null && title.startsWith("本草纲目/")) {
CaoMain cao = new CaoMain();
cao.setType(type);
cao.setName(element.text());
cao.setHref(DOMAIN_URL + href);
caos.add(cao);
}
}
return caos;
}
use of org.jsoup.select.Elements in project CodeUtils by boredream.
the class BCGMUtils method getDetailData.
public static String getDetailData(String caoName) throws Exception {
String detail = null;
String response = null;
response = HttpUtils.getString(DOMAIN_URL + URLEncoder.encode(caoName, "UTF-8"));
// String response = FileUtils.readToString(
// new File("temp" + File.separator + "reptile" + File.separator
// + "bcgm_detail.txt"), "UTF-8");
Document parse = Jsoup.parse(response);
StringBuilder sb = new StringBuilder();
Elements pes = parse.getElementsByTag("p");
for (Element e : pes) {
String text = e.text();
if (text.startsWith("--")) {
break;
}
sb.append(e.text() + "<br/>");
}
detail = sb.toString().trim();
return detail;
}
Aggregations