Search in sources :

Example 61 with Element

use of org.jsoup.nodes.Element in project mzzb-server by mingzuozhibi.

the class SakuraSpeedSpider method updateSakuraDiscs.

private void updateSakuraDiscs(Sakura sakura, Stream<Element> tableRows) {
    Set<Disc> toAdd = new LinkedHashSet<>(sakura.getDiscs().size());
    boolean isTop100 = "9999-99".equals(sakura.getKey());
    tableRows.forEach(tr -> {
        String href = tr.child(5).child(0).attr("href");
        String asin = href.substring(href.length() - 10);
        Disc disc = getOrCreateDisc(asin, tr);
        if (disc.getUpdateType() == UpdateType.Both && !isTop100) {
            disc.setUpdateType(UpdateType.Sakura);
        }
        if (disc.getUpdateType() == UpdateType.Sakura) {
            String[] sakuraRank = tr.child(0).text().split("/");
            disc.setThisRank(parseInteger(sakuraRank[0]));
            disc.setPrevRank(parseInteger(sakuraRank[1]));
            disc.setTotalPt(parseInteger(tr.child(2).text()));
            disc.setNicoBook(parseInteger(tr.child(3).text()));
            disc.setUpdateTime(sakura.getModifyTime());
            if (!Objects.equals(disc.getThisRank(), disc.getPrevRank())) {
                disc.setModifyTime(sakura.getModifyTime());
            }
        }
        toAdd.add(disc);
    });
    if (isTop100) {
        sakura.setDiscs(new LinkedList<>(toAdd));
    } else {
        boolean noExpiredSakura = noExpiredSakura(sakura);
        sakura.getDiscs().stream().filter(disc -> {
            return disc.getUpdateType() != UpdateType.Sakura || noExpiredSakura;
        }).filter(disc -> !toAdd.contains(disc)).forEach(disc -> {
            if (disc.getUpdateType() == UpdateType.Sakura) {
                disc.setUpdateType(UpdateType.Both);
            }
            toAdd.add(disc);
        });
        sakura.setDiscs(toAdd.stream().sorted().collect(Collectors.toList()));
    }
    LOGGER.debug("成功更新[{}]列表", sakura.getTitle());
}
Also used : java.util(java.util) Logger(org.slf4j.Logger) Util(mingzuozhibi.service.SakuraSpeedSpider.Util) LoggerFactory(org.slf4j.LoggerFactory) LocalDateTime(java.time.LocalDateTime) Autowired(org.springframework.beans.factory.annotation.Autowired) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Disc(mingzuozhibi.persist.disc.Disc) SakuraHelper.noExpiredSakura(mingzuozhibi.support.SakuraHelper.noExpiredSakura) Stream(java.util.stream.Stream) Dao(mingzuozhibi.support.Dao) ViewType(mingzuozhibi.persist.disc.Sakura.ViewType) Service(org.springframework.stereotype.Service) UpdateType(mingzuozhibi.persist.disc.Disc.UpdateType) Document(org.jsoup.nodes.Document) Element(org.jsoup.nodes.Element) LocalDate(java.time.LocalDate) DateTimeFormatter(java.time.format.DateTimeFormatter) Sakura(mingzuozhibi.persist.disc.Sakura) Jsoup(org.jsoup.Jsoup) Elements(org.jsoup.select.Elements) DiscType(mingzuozhibi.persist.disc.Disc.DiscType) Disc(mingzuozhibi.persist.disc.Disc)

Example 62 with Element

use of org.jsoup.nodes.Element in project Palm300Heroes by nicolite.

the class NewsModel method getTYAVideoContent.

public static String getTYAVideoContent(Document document) {
    Element body = document.body();
    Element head = document.head();
    Elements inner = body.getElementsByClass("inner");
    Elements images = inner.select("img[src]");
    for (Element element : images) {
        element.removeAttr("style");
        element.attr("src", element.attr("abs:src"));
    }
    Elements span = inner.select("span");
    for (Element element : span) {
        element.removeAttr("style");
    }
    Elements p = inner.select("p");
    for (Element element : p) {
        element.removeAttr("style");
    }
    Elements div = inner.select("div");
    for (Element element : div) {
        element.removeAttr("style");
    }
    Elements h1 = inner.select("h1");
    for (Element element : h1) {
        element.removeAttr("style");
    }
    Elements h2 = inner.select("h2");
    for (Element element : h2) {
        element.removeAttr("style");
    }
    Elements h3 = inner.select("h3");
    for (Element element : h3) {
        element.removeAttr("style");
    }
    Elements h4 = inner.select("h4");
    for (Element element : h4) {
        element.removeAttr("style");
    }
    Elements h5 = inner.select("h5");
    for (Element element : h5) {
        element.removeAttr("style");
    }
    Elements h6 = inner.select("h6");
    for (Element element : h6) {
        element.removeAttr("style");
    }
    head.empty();
    head.append("  <meta charset=\"utf-8\">" + "<meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">" + "<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">" + "<style type=\"text/css\">img{width: 100%; height: 100%; object-fit: contain}</style>");
    body.empty();
    body.append(inner.toString());
    String html = document.toString().replace("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">", "<!DOCTYPE html>").replace("xmlns=\"http://www.w3.org/1999/xhtml\"", "");
    LogUtils.d(TAG, html);
    return html;
}
Also used : Element(org.jsoup.nodes.Element) Elements(org.jsoup.select.Elements)

Example 63 with Element

use of org.jsoup.nodes.Element in project Palm300Heroes by nicolite.

the class NewsModel method getNewsList.

public static List<News> getNewsList(Document document, String newsType) {
    List<News> newsList = new ArrayList<>();
    Elements newses = document.body().getElementsByClass("newsList");
    for (Element element : newses) {
        Elements content = element.getElementsByTag("a");
        Elements dates = element.getElementsByTag("h2");
        for (int i = 0; i < Math.min(content.size(), dates.size()); i++) {
            News news = new News();
            news.setNewsTitle(content.get(i).text());
            news.setNewsUrl(content.get(i).attr("abs:href"));
            news.setNewsDate(dates.get(i).text());
            news.setNewsType(newsType);
            newsList.add(news);
        }
    }
    return newsList;
}
Also used : News(cn.nicolite.palm300heroes.model.bean.News) Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) Elements(org.jsoup.select.Elements)

Example 64 with Element

use of org.jsoup.nodes.Element in project Palm300Heroes by nicolite.

the class NewsModel method getTYACOSContent.

public static String getTYACOSContent(Document document) {
    Element body = document.body();
    Element head = document.head();
    Elements img = body.getElementsByClass("filmstrip").select("img");
    for (Element element : img) {
        element.removeAttr("style");
        element.attr("src", element.attr("abs:src"));
        element.removeAttr("width");
        element.removeAttr("height");
    }
    head.empty();
    head.append("  <meta charset=\"utf-8\">" + "<meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">" + "<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">" + "<style type=\"text/css\">img{width: 100%; height: 100%; object-fit: contain}</style>");
    body.empty();
    body.append(img.toString());
    String html = document.toString().replace("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">", "<!DOCTYPE html>").replace("xmlns=\"http://www.w3.org/1999/xhtml\"", "");
    LogUtils.d(TAG, html);
    return html;
}
Also used : Element(org.jsoup.nodes.Element) Elements(org.jsoup.select.Elements)

Example 65 with Element

use of org.jsoup.nodes.Element in project nixmash-blog by mintster.

the class JsoupUI method displayImages.

private void displayImages() {
    Elements media = doc.select("[src]");
    print("\nMedia: (%d)", media.size());
    for (Element src : media) {
        if (src.tagName().equals("img"))
            print(" * %s: <%s> %sx%s (%s)", src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"), trim(src.attr("alt"), 60));
        else
            print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
    }
}
Also used : Element(org.jsoup.nodes.Element) Elements(org.jsoup.select.Elements)

Aggregations

Element (org.jsoup.nodes.Element)1237 Document (org.jsoup.nodes.Document)559 Elements (org.jsoup.select.Elements)529 ArrayList (java.util.ArrayList)316 IOException (java.io.IOException)220 Test (org.junit.Test)144 ElementHandlerImpl (org.asqatasun.ruleimplementation.ElementHandlerImpl)90 File (java.io.File)87 URL (java.net.URL)82 Matcher (java.util.regex.Matcher)73 List (java.util.List)60 HashMap (java.util.HashMap)57 Pattern (java.util.regex.Pattern)54 Node (org.jsoup.nodes.Node)50 TextNode (org.jsoup.nodes.TextNode)48 InputStream (java.io.InputStream)38 JSONException (org.json.JSONException)36 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)35 Map (java.util.Map)34 JSONObject (org.json.JSONObject)34