Search in sources :

Example 16 with DetailedItem

use of de.geeksfactory.opacclient.objects.DetailedItem in project opacclient by opacapp.

the class Pica method parse_search.

protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException {
    Document doc = Jsoup.parse(html);
    updateSearchSetValue(doc);
    if (doc.select(".error").size() > 0) {
        String error = doc.select(".error").first().text().trim();
        if (error.equals("Es wurde nichts gefunden.") || error.equals("Nothing has been found") || error.equals("Er is niets gevonden.") || error.equals("Rien n'a été trouvé.")) {
            // nothing found
            return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
        } else {
            // error
            throw new OpacErrorException(error);
        }
    }
    reusehtml = html;
    int results_total;
    String resultnumstr = doc.select(".pages").first().text();
    Pattern p = Pattern.compile("[0-9]+$");
    Matcher m = p.matcher(resultnumstr);
    if (m.find()) {
        resultnumstr = m.group();
    }
    if (resultnumstr.contains("(")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (resultnumstr.contains(": ")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
    } else {
        results_total = Integer.parseInt(resultnumstr);
    }
    List<SearchResult> results = new ArrayList<>();
    if (results_total == 1) {
        // Only one result
        DetailedItem singleResult = parse_result(html);
        SearchResult sr = new SearchResult();
        sr.setType(getMediaTypeInSingleResult(html));
        sr.setInnerhtml("<b>" + singleResult.getTitle() + "</b><br>" + singleResult.getDetails().get(0).getContent());
        results.add(sr);
    }
    Elements table = doc.select("table[summary=hitlist] tbody tr[valign=top]");
    // identifier = null;
    Elements links = doc.select("table[summary=hitlist] a");
    boolean haslink = false;
    for (int i = 0; i < links.size(); i++) {
        Element node = links.get(i);
        if (node.hasAttr("href") & node.attr("href").contains("SHW?") && !haslink) {
            haslink = true;
            try {
                List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(node.attr("href")), getDefaultEncoding());
                for (NameValuePair nv : anyurl) {
                    if (nv.getName().equals("identifier")) {
                        // identifier = nv.getValue();
                        break;
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        if (tr.select("td.hit img").size() > 0) {
            String[] fparts = tr.select("td img").get(0).attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", "")));
                }
            } else {
                sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", "")));
            }
        }
        Element middlething = tr.child(2);
        List<Node> children = middlething.childNodes();
        int childrennum = children.size();
        List<String[]> strings = new ArrayList<>();
        for (int ch = 0; ch < childrennum; ch++) {
            Node node = children.get(ch);
            if (node instanceof TextNode) {
                String text = ((TextNode) node).text().trim();
                if (text.length() > 3) {
                    strings.add(new String[] { "text", "", text });
                }
            } else if (node instanceof Element) {
                List<Node> subchildren = node.childNodes();
                for (int j = 0; j < subchildren.size(); j++) {
                    Node subnode = subchildren.get(j);
                    if (subnode instanceof TextNode) {
                        String text = ((TextNode) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(), "text", text, ((Element) node).className(), node.attr("style") });
                        }
                    } else if (subnode instanceof Element) {
                        String text = ((Element) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(), ((Element) subnode).tag().getName(), text, ((Element) node).className(), node.attr("style") });
                        }
                    }
                }
            }
        }
        StringBuilder description = new StringBuilder();
        int k = 0;
        for (String[] part : strings) {
            if (part[0].equals("a") && k == 0) {
                description.append("<b>").append(part[2]).append("</b>");
            } else if (k < 3) {
                description.append("<br />").append(part[2]);
            }
            k++;
        }
        sr.setInnerhtml(description.toString());
        sr.setNr(10 * (page - 1) + i);
        sr.setId(null);
        results.add(sr);
    }
    resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}
Also used : Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode) Node(org.jsoup.nodes.Node) ArrayList(java.util.ArrayList) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) URI(java.net.URI) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) ArrayList(java.util.ArrayList) List(java.util.List) Pattern(java.util.regex.Pattern) BasicNameValuePair(org.apache.http.message.BasicNameValuePair) NameValuePair(org.apache.http.NameValuePair) JSONException(org.json.JSONException) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) TextNode(org.jsoup.nodes.TextNode) JSONException(org.json.JSONException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult)

Example 17 with DetailedItem

use of de.geeksfactory.opacclient.objects.DetailedItem in project opacclient by opacapp.

the class SISIS method loadDetail.

protected DetailedItem loadDetail(String html) throws IOException {
    String html2 = httpGet(opac_url + "/singleHit.do?methodToCall=activateTab&tab=showTitleActive", ENCODING);
    String html3 = httpGet(opac_url + "/singleHit.do?methodToCall=activateTab&tab=showAvailabilityActive", ENCODING);
    String coverJs = null;
    Pattern coverPattern = Pattern.compile("\\$\\.ajax\\(\\{[\\n\\s]*url: '(jsp/result/cover" + ".jsp\\?[^']+')");
    Matcher coverMatcher = coverPattern.matcher(html);
    if (coverMatcher.find()) {
        coverJs = httpGet(opac_url + "/" + coverMatcher.group(1), ENCODING);
    }
    DetailedItem result = parseDetail(html, html2, html3, coverJs, data, stringProvider);
    try {
        if (!result.getCover().contains("amazon"))
            downloadCover(result);
    } catch (Exception e) {
    }
    return result;
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) URISyntaxException(java.net.URISyntaxException) JSONException(org.json.JSONException) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) ClientProtocolException(org.apache.http.client.ClientProtocolException) IOException(java.io.IOException)

Example 18 with DetailedItem

use of de.geeksfactory.opacclient.objects.DetailedItem in project opacclient by opacapp.

the class TouchPoint method parse_result.

protected DetailedItem parse_result(String html) throws IOException {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    DetailedItem result = new DetailedItem();
    result.setCover(findCoverUrl(doc, false));
    if (doc.select("#permalink-link").size() > 0) {
        String href = doc.select("#permalink-link").first().attr("href");
        JSONObject id = new JSONObject();
        try {
            id.put("url", href);
            result.setId(id.toString());
        } catch (JSONException e) {
            e.printStackTrace();
        }
    }
    for (Element tr : doc.select(".titleinfo tr")) {
        // Sometimes there is one th and one td, sometimes two tds
        String detailName = tr.select("th, td").first().text().trim();
        if (detailName.endsWith(":")) {
            detailName = detailName.substring(0, detailName.length() - 1);
        }
        String detailValue = tr.select("td").last().text().trim();
        result.addDetail(new Detail(detailName, detailValue));
        if (detailName.contains("ID in diesem Katalog") && result.getId() == null) {
            result.setId(detailValue);
        }
        if (detailName.equals("Titel")) {
            result.setTitle(detailValue);
        }
    }
    if (result.getDetails().size() == 0 && doc.select("#details").size() > 0) {
        // e.g. Bayreuth_Uni
        String dname = "";
        String dval = "";
        boolean in_value = true;
        for (Node n : doc.select("#details").first().childNodes()) {
            if (n instanceof Element && ((Element) n).tagName().equals("strong")) {
                if (in_value) {
                    if (dname.length() > 0 && dval.length() > 0) {
                        result.addDetail(new Detail(dname, dval));
                        if (dname.equals("Titel")) {
                            result.setTitle(dval);
                        }
                    }
                    dname = ((Element) n).text();
                    in_value = false;
                } else {
                    dname += ((Element) n).text();
                }
            } else {
                String t = null;
                if (n instanceof TextNode) {
                    t = ((TextNode) n).text();
                } else if (n instanceof Element) {
                    t = ((Element) n).text();
                }
                if (t != null) {
                    if (in_value) {
                        dval += t;
                    } else {
                        in_value = true;
                        dval = t;
                    }
                }
            }
        }
    }
    if (result.getTitle() == null) {
        result.setTitle(doc.select("h1").first().text());
    }
    // Copies
    String copiesParameter = doc.select("div[id^=ajax_holdings_url").attr("ajaxParameter").replace("&amp;", "");
    if (!"".equals(copiesParameter)) {
        String copiesHtml = httpGet(opac_url + "/" + copiesParameter, ENCODING);
        Document copiesDoc = Jsoup.parse(copiesHtml);
        List<String> table_keys = new ArrayList<>();
        for (Element th : copiesDoc.select(".data tr th")) {
            if (th.text().contains("Zweigstelle")) {
                table_keys.add("branch");
            } else if (th.text().contains("Status")) {
                table_keys.add("status");
            } else if (th.text().contains("Signatur")) {
                table_keys.add("signature");
            } else {
                table_keys.add(null);
            }
        }
        for (Element tr : copiesDoc.select(".data tr:has(td)")) {
            Copy copy = new Copy();
            int i = 0;
            for (Element td : tr.select("td")) {
                if (table_keys.get(i) != null) {
                    copy.set(table_keys.get(i), td.text().trim());
                }
                i++;
            }
            result.addCopy(copy);
        }
    }
    // Reservation Info, only works if the code above could find a URL
    if (!"".equals(copiesParameter)) {
        String reservationParameter = copiesParameter.replace("showHoldings", "showDocument");
        try {
            String reservationHtml = httpGet(opac_url + "/" + reservationParameter, ENCODING);
            Document reservationDoc = Jsoup.parse(reservationHtml);
            reservationDoc.setBaseUri(opac_url);
            if (reservationDoc.select("a[href*=requestItem.do]").size() == 1) {
                result.setReservable(true);
                result.setReservation_info(reservationDoc.select("a").first().attr("abs:href"));
            }
        } catch (Exception e) {
            e.printStackTrace();
        // fail silently
        }
    }
    try {
        Element isvolume = null;
        Map<String, String> volume = new HashMap<>();
        Elements links = doc.select(".data td a");
        int elcount = links.size();
        for (int eli = 0; eli < elcount; eli++) {
            List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(links.get(eli).attr("href")), "UTF-8");
            for (NameValuePair nv : anyurl) {
                if (nv.getName().equals("methodToCall") && nv.getValue().equals("volumeSearch")) {
                    isvolume = links.get(eli);
                } else if (nv.getName().equals("catKey")) {
                    volume.put("catKey", nv.getValue());
                } else if (nv.getName().equals("dbIdentifier")) {
                    volume.put("dbIdentifier", nv.getValue());
                }
            }
            if (isvolume != null) {
                volume.put("volume", "true");
                result.setVolumesearch(volume);
                break;
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return result;
}
Also used : NameValuePair(org.apache.http.NameValuePair) BasicNameValuePair(org.apache.http.message.BasicNameValuePair) HashMap(java.util.HashMap) Element(org.jsoup.nodes.Element) Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) ArrayList(java.util.ArrayList) JSONException(org.json.JSONException) TextNode(org.jsoup.nodes.TextNode) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) URI(java.net.URI) URISyntaxException(java.net.URISyntaxException) JSONException(org.json.JSONException) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) JSONObject(org.json.JSONObject) Copy(de.geeksfactory.opacclient.objects.Copy) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) Detail(de.geeksfactory.opacclient.objects.Detail)

Example 19 with DetailedItem

use of de.geeksfactory.opacclient.objects.DetailedItem in project opacclient by opacapp.

the class VuFind method parseDetail.

static DetailedItem parseDetail(String id, Document doc, JSONObject data) throws OpacErrorException, JSONException {
    if (doc.select("p.error, p.errorMsg, .alert-error").size() > 0) {
        throw new OpacErrorException(doc.select("p.error, p.errorMsg, .alert-error").text());
    }
    DetailedItem res = new DetailedItem();
    res.setId(id);
    Elements title = doc.select(".record h1, .record [itemprop=name], .record [property=name]");
    if (title.size() > 0) {
        res.setTitle(title.first().text());
    }
    for (Element img : doc.select(".record img, #cover img")) {
        String src = img.absUrl("src");
        if (src.contains("over")) {
            if (!src.contains("Unavailable")) {
                res.setCover(src);
            }
            break;
        }
    }
    String head = null;
    StringBuilder value = new StringBuilder();
    for (Element tr : doc.select(".record table").first().select("tr")) {
        if (tr.children().size() == 1) {
            if (tr.child(0).tagName().equals("th")) {
                if (head != null) {
                    res.addDetail(new Detail(head, value.toString()));
                    value = new StringBuilder();
                }
                head = tr.child(0).text();
            } else {
                if (!value.toString().equals(""))
                    value.append("\n");
                value.append(tr.child(0).text());
            }
        } else {
            String text = tr.child(1).text();
            if (tr.child(1).select("a").size() > 0) {
                String href = tr.child(1).select("a").attr("href");
                if (!href.startsWith("/") && !text.contains(data.getString("baseurl"))) {
                    text += " " + href;
                }
            }
            res.addDetail(new Detail(tr.child(0).text(), text));
        }
    }
    if (head != null)
        res.addDetail(new Detail(head, value.toString()));
    try {
        if (doc.select("#Volumes").size() > 0) {
            parseVolumes(res, doc, data);
        } else {
            parseCopies(res, doc, data);
        }
    } catch (JSONException e) {
        e.printStackTrace();
    }
    return res;
}
Also used : Element(org.jsoup.nodes.Element) JSONException(org.json.JSONException) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) Elements(org.jsoup.select.Elements) Detail(de.geeksfactory.opacclient.objects.Detail)

Example 20 with DetailedItem

use of de.geeksfactory.opacclient.objects.DetailedItem in project opacclient by opacapp.

the class WinBiap method parse_result.

private DetailedItem parse_result(String html) {
    Document doc = Jsoup.parse(html);
    DetailedItem item = new DetailedItem();
    if (doc.select(".cover").size() > 0) {
        Element cover = doc.select(".cover").first();
        if (cover.hasAttr("data-src")) {
            item.setCover(cover.attr("data-src"));
        } else if (cover.hasAttr("src") && !cover.attr("src").equals("images/empty.gif")) {
            item.setCover(cover.attr("src"));
        }
        item.setMediaType(getMediaType(cover, data));
    }
    String permalink = doc.select(".PermalinkTextarea").text();
    item.setId(getQueryParamsFirst(permalink).get("Id"));
    Elements trs = doc.select(".DetailInformation").first().select("tr");
    for (Element tr : trs) {
        String name = tr.select(".DetailInformationEntryName").text().replace(":", "");
        String value = tr.select(".DetailInformationEntryContent").text();
        switch(name) {
            case "Titel":
                item.setTitle(value);
                break;
            case "Stücktitel":
                item.setTitle(item.getTitle() + " " + value);
                break;
            default:
                item.addDetail(new Detail(name, value));
                break;
        }
    }
    DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
    trs = doc.select(".detailCopies .tableCopies > tbody > tr:not(.headerCopies)");
    for (Element tr : trs) {
        Copy copy = new Copy();
        copy.setBarcode(tr.select(".mediaBarcode").text().replace("#", ""));
        copy.setStatus(tr.select(".mediaStatus").text());
        if (tr.select(".DateofReturn .borrowUntil").size() > 0) {
            String returntime = tr.select(".DateofReturn .borrowUntil").text();
            try {
                copy.setReturnDate(fmt.parseLocalDate(returntime));
            } catch (IllegalArgumentException e) {
                e.printStackTrace();
            }
        }
        if (tr.select(".mediaBranch").size() > 0) {
            copy.setBranch(tr.select(".mediaBranch").text());
        }
        copy.setLocation(tr.select(".cellMediaItemLocation span").text());
        if (tr.select("#HyperLinkReservation").size() > 0) {
            copy.setResInfo(tr.select("#HyperLinkReservation").attr("href"));
            item.setReservable(true);
            item.setReservation_info("reservable");
        }
        item.addCopy(copy);
    }
    return item;
}
Also used : Copy(de.geeksfactory.opacclient.objects.Copy) Element(org.jsoup.nodes.Element) FormElement(org.jsoup.nodes.FormElement) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) Detail(de.geeksfactory.opacclient.objects.Detail)

Aggregations

DetailedItem (de.geeksfactory.opacclient.objects.DetailedItem)27 Detail (de.geeksfactory.opacclient.objects.Detail)18 Copy (de.geeksfactory.opacclient.objects.Copy)17 Element (org.jsoup.nodes.Element)15 Document (org.jsoup.nodes.Document)12 Elements (org.jsoup.select.Elements)12 JSONException (org.json.JSONException)11 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)10 IOException (java.io.IOException)8 JSONObject (org.json.JSONObject)7 NotReachableException (de.geeksfactory.opacclient.networking.NotReachableException)6 HashMap (java.util.HashMap)6 UnsupportedEncodingException (java.io.UnsupportedEncodingException)5 ArrayList (java.util.ArrayList)5 Matcher (java.util.regex.Matcher)5 Pattern (java.util.regex.Pattern)5 NameValuePair (org.apache.http.NameValuePair)5 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)5 Node (org.jsoup.nodes.Node)5 TextNode (org.jsoup.nodes.TextNode)5