Search in sources :

Example 51 with Elements

use of org.jsoup.select.Elements in project CodeUtils by boredream.

the class ZhiHuReptile method parseAnswer.

public static Answer parseAnswer(Element answerRootElement) throws Exception {
    //		<a class="question_link" target="_blank" href="/question/25504353">经常上知乎会带来什么错觉?</a>
    Element questionElement = answerRootElement.getElementsByAttributeValue("class", "question_link").get(0);
    String questionName = questionElement.text();
    String questionLink = hostUrl + questionElement.attr("href");
    //		<div class="zm-item-vote-info " data-votecount="40947">
    Element voteCountElement = answerRootElement.getElementsByAttributeValueContaining("class", "zm-item-vote-info").get(0);
    String voteCount = voteCountElement.attr("data-votecount");
    //		<span class="answer-date-link-wrap">
    //		<a class="answer-date-link last_updated meta-item" data-tip="s$t$发布于 2014-09-24" target="_blank" href="/question/25504353/answer/30949097">编辑于 2014-09-24</a>
    //		</span>
    String answerLink = "";
    Elements answerLinkElements = answerRootElement.getElementsByAttributeValueContaining("href", "answer");
    if (answerLinkElements != null && answerLinkElements.size() > 0) {
        Element answerLinkElement = answerLinkElements.get(0);
        answerLink = hostUrl + answerLinkElement.attr("href");
    } else {
        System.out.println(answerRootElement + " ... no answer href");
    }
    //		<div class="zh-summary summary clearfix">
    //		211 985 高考就是纸老虎gpa 3.8 托福雅思都是渣研究生 博士后 本科毕业像条狗北上广 英美欧 要想成功出亚洲 白瘦美 高富帅 满街都是官二代设计师 程序猿 就我一人还没钱大长腿 一八零 六块腹肌才算赢健身房 瑜伽馆 二十开练都算晚ipad mbp 4k才能玩游戏flym…
    //		<a href="/question/25504353/answer/30949097" class="toggle-expand">显示全部</a>
    //		</div>
    Element summaryElement = answerRootElement.getElementsByAttributeValue("class", "zh-summary summary clearfix").get(0);
    String summary = summaryElement.text();
    // remove 显示全部
    summary = summary.substring(0, summary.length() - 4);
    Answer answer = new Answer();
    answer.questionName = questionName;
    answer.questionLink = questionLink;
    answer.voteCount = voteCount;
    answer.answerLink = answerLink;
    answer.summary = summary;
    return answer;
}
Also used : Element(org.jsoup.nodes.Element) Elements(org.jsoup.select.Elements)

Example 52 with Elements

use of org.jsoup.select.Elements in project CodeUtils by boredream.

the class ZhiHuReptile method getAnsersOfPage.

private static List<Answer> getAnsersOfPage(String topicUrl) throws Exception {
    //		http://www.zhihu.com/topic/19550517/top-answers
    String topAnswersOfTopic = topicUrl + "/top-answers";
    //		<div class="content">
    String response = HttpUtils.getString(topAnswersOfTopic);
    Document parse = Jsoup.parse(response);
    Elements elements = parse.getElementsByAttributeValue("class", "content");
    List<Answer> answers = new ArrayList<Answer>();
    for (Element element : elements) {
        Answer answer = parseAnswer(element);
        answers.add(answer);
    }
    return answers;
}
Also used : Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements)

Example 53 with Elements

use of org.jsoup.select.Elements in project CodeUtils by boredream.

the class ZhiHuReptile method getAllTopics.

public static List<Topic> getAllTopics() throws Exception {
    // <li data-id="99" class="current"><a href="#互联网">互联网</a></li>
    List<Topic> topicses = new ArrayList<Topic>();
    String response = HttpUtils.getString(allTopicsUrl);
    Document parse = Jsoup.parse(response);
    Elements elements = parse.getElementsByAttributeValueStarting("href", "#");
    for (Element element : elements) {
        Topic topic = new Topic();
        topic.name = element.text();
        topic.topic_id = element.parent().attr("data-id");
        topicses.add(topic);
    }
    return topicses;
}
Also used : Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements)

Example 54 with Elements

use of org.jsoup.select.Elements in project CodeUtils by boredream.

the class BCGMUtils method getMainUrl.

public static List<CaoMain> getMainUrl() {
    // String response = HttpUtils.getString(mainUrl);
    String response = FileUtils.readToString(new File("temp" + File.separator + "reptile" + File.separator + "bcgm_main.txt"), "UTF-8");
    String type = null;
    List<CaoMain> caos = new ArrayList<CaoMain>();
    Document parse = Jsoup.parse(response);
    Elements allElements = parse.getAllElements();
    for (int i = 0; i < allElements.size(); i++) {
        Element element = allElements.get(i);
        // <span class="mw-headline"
        // id=".E8.8D.89.E9.83.A8">草部</span>
        String attrClass = element.attr("class");
        if ("mw-headline".equals(attrClass + "")) {
            type = element.text();
            continue;
        }
        // <a
        // href="/w/%E6%9C%AC%E8%8D%89%E7%BA%B2%E7%9B%AE/%E7%94%98%E8%8D%89"
        // title="本草纲目/甘草">甘草</a>
        String title = element.attr("title");
        String href = element.attr("href");
        if (type != null && href != null && title != null && title.startsWith("本草纲目/")) {
            CaoMain cao = new CaoMain();
            cao.setType(type);
            cao.setName(element.text());
            cao.setHref(DOMAIN_URL + href);
            caos.add(cao);
        }
    }
    return caos;
}
Also used : Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) File(java.io.File)

Example 55 with Elements

use of org.jsoup.select.Elements in project CodeUtils by boredream.

the class BCGMUtils method getDetailData.

public static String getDetailData(String caoName) throws Exception {
    String detail = null;
    String response = null;
    response = HttpUtils.getString(DOMAIN_URL + URLEncoder.encode(caoName, "UTF-8"));
    // String response = FileUtils.readToString(
    // new File("temp" + File.separator + "reptile" + File.separator
    // + "bcgm_detail.txt"), "UTF-8");
    Document parse = Jsoup.parse(response);
    StringBuilder sb = new StringBuilder();
    Elements pes = parse.getElementsByTag("p");
    for (Element e : pes) {
        String text = e.text();
        if (text.startsWith("--")) {
            break;
        }
        sb.append(e.text() + "<br/>");
    }
    detail = sb.toString().trim();
    return detail;
}
Also used : Element(org.jsoup.nodes.Element) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements)

Aggregations

Elements (org.jsoup.select.Elements)709 Element (org.jsoup.nodes.Element)490 Document (org.jsoup.nodes.Document)362 ArrayList (java.util.ArrayList)213 IOException (java.io.IOException)151 Test (org.junit.Test)110 URL (java.net.URL)58 List (java.util.List)47 Matcher (java.util.regex.Matcher)42 Pattern (java.util.regex.Pattern)34 HashMap (java.util.HashMap)30 InputStream (java.io.InputStream)29 Jsoup (org.jsoup.Jsoup)28 Configuration (com.vaadin.addon.charts.model.Configuration)27 File (java.io.File)26 JSONObject (org.json.JSONObject)26 JSONException (org.json.JSONException)25 Collectors (java.util.stream.Collectors)23 URISyntaxException (java.net.URISyntaxException)22 BootstrapContext (com.vaadin.flow.server.BootstrapHandler.BootstrapContext)20