Search in sources :

Example 81 with Elements

use of org.jsoup.select.Elements in project EhViewer by seven332.

the class GalleryDetailParser method parseLargePreviewSet.

/**
 * Parse large previews with regular expressions
 */
private static LargePreviewSet parseLargePreviewSet(Document d, String body) throws ParseException {
    try {
        LargePreviewSet largePreviewSet = new LargePreviewSet();
        Element gdt = d.getElementById("gdt");
        Elements gdtls = gdt.getElementsByClass("gdtl");
        int n = gdtls.size();
        if (n <= 0) {
            throw new ParseException("Can't parse large preview", body);
        }
        for (int i = 0; i < n; i++) {
            Element element = gdtls.get(i).child(0);
            String pageUrl = element.attr("href");
            element = element.child(0);
            String imageUrl = element.attr("src");
            if (Settings.getFixThumbUrl()) {
                imageUrl = EhUrl.getFixedPreviewThumbUrl(imageUrl);
            }
            int index = Integer.parseInt(element.attr("alt")) - 1;
            largePreviewSet.addItem(index, imageUrl, pageUrl);
        }
        return largePreviewSet;
    } catch (Exception e) {
        e.printStackTrace();
        throw new ParseException("Can't parse large preview", body);
    }
}
Also used : LargePreviewSet(com.hippo.ehviewer.client.data.LargePreviewSet) Element(org.jsoup.nodes.Element) ParseException(com.hippo.ehviewer.client.exception.ParseException) Elements(org.jsoup.select.Elements) EhException(com.hippo.ehviewer.client.exception.EhException) ParseException(com.hippo.ehviewer.client.exception.ParseException) OffensiveException(com.hippo.ehviewer.client.exception.OffensiveException) PiningException(com.hippo.ehviewer.client.exception.PiningException)

Example 82 with Elements

use of org.jsoup.select.Elements in project EhViewer by seven332.

the class GalleryDetailParser method parseTagGroups.

/**
 * Parse tag groups with html parser
 */
@NonNull
public static GalleryTagGroup[] parseTagGroups(Document document) {
    try {
        Element taglist = document.getElementById("taglist");
        Elements tagGroups = taglist.child(0).child(0).children();
        List<GalleryTagGroup> list = new ArrayList<>(tagGroups.size());
        for (int i = 0, n = tagGroups.size(); i < n; i++) {
            GalleryTagGroup group = parseTagGroup(tagGroups.get(i));
            if (null != group) {
                list.add(group);
            }
        }
        return list.toArray(new GalleryTagGroup[list.size()]);
    } catch (Exception e) {
        e.printStackTrace();
        return EMPTY_GALLERY_TAG_GROUP_ARRAY;
    }
}
Also used : GalleryTagGroup(com.hippo.ehviewer.client.data.GalleryTagGroup) Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) Elements(org.jsoup.select.Elements) EhException(com.hippo.ehviewer.client.exception.EhException) ParseException(com.hippo.ehviewer.client.exception.ParseException) OffensiveException(com.hippo.ehviewer.client.exception.OffensiveException) PiningException(com.hippo.ehviewer.client.exception.PiningException) NonNull(android.support.annotation.NonNull)

Example 83 with Elements

use of org.jsoup.select.Elements in project EhViewer by seven332.

the class GalleryDetailParser method parseDetailInfo.

private static void parseDetailInfo(GalleryDetail gd, Element e, String body) {
    Elements es = e.children();
    if (es.size() < 2) {
        return;
    }
    String key = StringUtils.trim(es.get(0).text());
    String value = StringUtils.trim(es.get(1).ownText());
    if (key.startsWith("Posted")) {
        gd.posted = value;
    } else if (key.startsWith("Parent")) {
        gd.parent = value;
    } else if (key.startsWith("Visible")) {
        gd.visible = value;
    } else if (key.startsWith("Language")) {
        gd.language = value;
    } else if (key.startsWith("File Size")) {
        gd.size = value;
    } else if (key.startsWith("Length")) {
        int index = value.indexOf(' ');
        if (index >= 0) {
            gd.pages = NumberUtils.parseIntSafely(value.substring(0, index), 1);
        } else {
            gd.pages = 1;
        }
    } else if (key.startsWith("Favorited")) {
        switch(value) {
            case "Never":
                gd.favoriteCount = 0;
                break;
            case "Once":
                gd.favoriteCount = 1;
                break;
            default:
                int index = value.indexOf(' ');
                if (index == -1) {
                    gd.favoriteCount = 0;
                } else {
                    gd.favoriteCount = NumberUtils.parseIntSafely(value.substring(0, index), 0);
                }
                break;
        }
    }
}
Also used : Elements(org.jsoup.select.Elements)

Example 84 with Elements

use of org.jsoup.select.Elements in project EhViewer by seven332.

the class GalleryDetailParser method parseComment.

@Nullable
@SuppressWarnings("ConstantConditions")
public static GalleryComment parseComment(Element element) {
    try {
        GalleryComment comment = new GalleryComment();
        // Id
        Element a = element.previousElementSibling();
        String name = a.attr("name");
        comment.id = Integer.parseInt(StringUtils.trim(name).substring(1));
        // Vote up and vote down
        Element c4 = JsoupUtils.getElementByClass(element, "c4");
        if (null != c4) {
            Elements es = c4.children();
            if (2 == es.size()) {
                comment.voteUp = !TextUtils.isEmpty(StringUtils.trim(es.get(0).attr("style")));
                comment.voteDown = !TextUtils.isEmpty(StringUtils.trim(es.get(1).attr("style")));
            }
        }
        // Vote state
        Element c7 = JsoupUtils.getElementByClass(element, "c7");
        if (null != c7) {
            comment.voteState = StringUtils.trim(c7.text());
        }
        // Score
        Element c5 = JsoupUtils.getElementByClass(element, "c5");
        if (null != c5) {
            Elements es = c5.children();
            if (!es.isEmpty()) {
                comment.score = NumberUtils.parseIntSafely(StringUtils.trim(es.get(0).text()), 0);
            }
        }
        // time
        Element c3 = JsoupUtils.getElementByClass(element, "c3");
        String temp = c3.ownText();
        temp = temp.substring("Posted on ".length(), temp.length() - " by:".length());
        comment.time = WEB_COMMENT_DATE_FORMAT.parse(temp).getTime();
        // user
        comment.user = c3.child(0).text();
        // comment
        comment.comment = JsoupUtils.getElementByClass(element, "c6").html();
        return comment;
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
}
Also used : Element(org.jsoup.nodes.Element) GalleryComment(com.hippo.ehviewer.client.data.GalleryComment) Elements(org.jsoup.select.Elements) EhException(com.hippo.ehviewer.client.exception.EhException) ParseException(com.hippo.ehviewer.client.exception.ParseException) OffensiveException(com.hippo.ehviewer.client.exception.OffensiveException) PiningException(com.hippo.ehviewer.client.exception.PiningException) Nullable(android.support.annotation.Nullable)

Example 85 with Elements

use of org.jsoup.select.Elements in project EhViewer by seven332.

the class GalleryDetailParser method parseDetail.

@SuppressWarnings("ConstantConditions")
private static void parseDetail(GalleryDetail gd, Document d, String body) throws ParseException {
    Matcher matcher = PATTERN_DETAIL.matcher(body);
    if (matcher.find()) {
        gd.gid = Long.parseLong(matcher.group(1));
        gd.token = matcher.group(2);
        gd.apiUid = NumberUtils.parseLongSafely(matcher.group(3), -1L);
        gd.apiKey = matcher.group(4);
    } else {
        throw new ParseException("Can't parse gallery detail", body);
    }
    matcher = PATTERN_TORRENT.matcher(body);
    if (matcher.find()) {
        gd.torrentUrl = StringUtils.unescapeXml(StringUtils.trim(matcher.group(1)));
        gd.torrentCount = NumberUtils.parseIntSafely(matcher.group(2), 0);
    } else {
        gd.torrentCount = 0;
        gd.torrentUrl = "";
    }
    matcher = PATTERN_ARCHIVE.matcher(body);
    if (matcher.find()) {
        gd.archiveUrl = StringUtils.unescapeXml(StringUtils.trim(matcher.group(1)));
    } else {
        gd.archiveUrl = "";
    }
    try {
        Element gm = JsoupUtils.getElementByClass(d, "gm");
        // Thumb url
        Element gd1 = gm.getElementById("gd1");
        try {
            gd.thumb = parseCoverStyle(StringUtils.trim(gd1.child(0).attr("style")));
        } catch (Exception e) {
            gd.thumb = "";
        }
        // Title
        Element gn = gm.getElementById("gn");
        if (null != gn) {
            gd.title = StringUtils.trim(gn.text());
        } else {
            gd.title = "";
        }
        // Jpn title
        Element gj = gm.getElementById("gj");
        if (null != gj) {
            gd.titleJpn = StringUtils.trim(gj.text());
        } else {
            gd.titleJpn = "";
        }
        // Category
        Element gdc = gm.getElementById("gdc");
        try {
            String href = gdc.child(0).attr("href");
            String category = href.substring(href.lastIndexOf('/') + 1);
            gd.category = EhUtils.getCategory(category);
        } catch (Exception e) {
            gd.category = EhUtils.UNKNOWN;
        }
        // Uploader
        Element gdn = gm.getElementById("gdn");
        if (null != gdn) {
            gd.uploader = StringUtils.trim(gdn.text());
        } else {
            gd.uploader = "";
        }
        Element gdd = gm.getElementById("gdd");
        gd.posted = "";
        gd.parent = "";
        gd.visible = "";
        gd.visible = "";
        gd.size = "";
        gd.pages = 0;
        gd.favoriteCount = 0;
        try {
            Elements es = gdd.child(0).child(0).children();
            for (int i = 0, n = es.size(); i < n; i++) {
                parseDetailInfo(gd, es.get(i), body);
            }
        } catch (Exception e) {
        // Ignore
        }
        // Rating count
        Element rating_count = gm.getElementById("rating_count");
        if (null != rating_count) {
            gd.ratingCount = NumberUtils.parseIntSafely(StringUtils.trim(rating_count.text()), 0);
        } else {
            gd.ratingCount = 0;
        }
        // Rating
        Element rating_label = gm.getElementById("rating_label");
        if (null != rating_label) {
            String ratingStr = StringUtils.trim(rating_label.text());
            if ("Not Yet Rated".equals(ratingStr)) {
                gd.rating = -1.0f;
            } else {
                int index = ratingStr.indexOf(' ');
                if (index == -1 || index >= ratingStr.length()) {
                    gd.rating = 0f;
                } else {
                    gd.rating = NumberUtils.parseFloatSafely(ratingStr.substring(index + 1), 0f);
                }
            }
        } else {
            gd.rating = -1.0f;
        }
        // isFavorited
        Element gdf = gm.getElementById("gdf");
        gd.isFavorited = null != gdf && !StringUtils.trim(gdf.text()).equals("Add to Favorites");
    } catch (Exception e) {
        throw new ParseException("Can't parse gallery detail", body);
    }
}
Also used : Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) ParseException(com.hippo.ehviewer.client.exception.ParseException) Elements(org.jsoup.select.Elements) EhException(com.hippo.ehviewer.client.exception.EhException) ParseException(com.hippo.ehviewer.client.exception.ParseException) OffensiveException(com.hippo.ehviewer.client.exception.OffensiveException) PiningException(com.hippo.ehviewer.client.exception.PiningException)

Aggregations

Elements (org.jsoup.select.Elements)709 Element (org.jsoup.nodes.Element)490 Document (org.jsoup.nodes.Document)362 ArrayList (java.util.ArrayList)213 IOException (java.io.IOException)151 Test (org.junit.Test)110 URL (java.net.URL)58 List (java.util.List)47 Matcher (java.util.regex.Matcher)42 Pattern (java.util.regex.Pattern)34 HashMap (java.util.HashMap)30 InputStream (java.io.InputStream)29 Jsoup (org.jsoup.Jsoup)28 Configuration (com.vaadin.addon.charts.model.Configuration)27 File (java.io.File)26 JSONObject (org.json.JSONObject)26 JSONException (org.json.JSONException)25 Collectors (java.util.stream.Collectors)23 URISyntaxException (java.net.URISyntaxException)22 BootstrapContext (com.vaadin.flow.server.BootstrapHandler.BootstrapContext)20