Search in sources :

Example 21 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class Open method searchGetPage.

@Override
public SearchRequestResult searchGetPage(int page) throws IOException, OpacErrorException, JSONException {
    if (searchResultDoc == null)
        throw new NotReachableException();
    Document doc = searchResultDoc;
    if (doc.select("span[id$=DataPager1]").size() == 0) {
        /*
                New style: Page buttons using normal links
                We can go directly to the correct page
            */
        if (doc.select("a[id*=LinkButtonPageN]").size() > 0) {
            String href = doc.select("a[id*=LinkButtonPageN][href*=page]").first().attr("href");
            String url = href.replaceFirst("page=\\d+", "page=" + page);
            Document doc2 = Jsoup.parse(httpGet(url, getDefaultEncoding()));
            doc2.setBaseUri(url);
            return parse_search(doc2, page);
        } else {
            int totalCount;
            try {
                totalCount = Integer.parseInt(doc.select("span[id$=TotalItemsLabel]").first().text());
            } catch (Exception e) {
                totalCount = 0;
            }
            // Next page does not exist
            return new SearchRequestResult(new ArrayList<SearchResult>(), 0, totalCount);
        }
    } else {
        /*
                Old style: Page buttons using Javascript
                When there are many pages of results, there will only be links to the next 4 and
                previous 4 pages, so we will click links until it gets to the correct page.
            */
        Elements pageLinks = doc.select("span[id$=DataPager1]").first().select("a[id*=LinkButtonPageN], span[id*=LabelPageN]");
        int from = Integer.valueOf(pageLinks.first().text());
        int to = Integer.valueOf(pageLinks.last().text());
        Element linkToClick;
        boolean willBeCorrectPage;
        if (page < from) {
            linkToClick = pageLinks.first();
            willBeCorrectPage = false;
        } else if (page > to) {
            linkToClick = pageLinks.last();
            willBeCorrectPage = false;
        } else {
            linkToClick = pageLinks.get(page - from);
            willBeCorrectPage = true;
        }
        if (linkToClick.tagName().equals("span")) {
            // we are trying to get the page we are already on
            return parse_search(searchResultDoc, page);
        }
        Pattern pattern = Pattern.compile("javascript:__doPostBack\\('([^,]*)','([^\\)]*)'\\)");
        Matcher matcher = pattern.matcher(linkToClick.attr("href"));
        if (!matcher.find())
            throw new OpacErrorException(StringProvider.INTERNAL_ERROR);
        FormElement form = (FormElement) doc.select("form").first();
        MultipartBody data = formData(form, null).addFormDataPart("__EVENTTARGET", matcher.group(1)).addFormDataPart("__EVENTARGUMENT", matcher.group(2)).build();
        String postUrl = form.attr("abs:action");
        String html = httpPost(postUrl, data, "UTF-8");
        if (willBeCorrectPage) {
            // We clicked on the correct link
            Document doc2 = Jsoup.parse(html);
            doc2.setBaseUri(postUrl);
            return parse_search(doc2, page);
        } else {
            // There was no correct link, so try to find one again
            searchResultDoc = Jsoup.parse(html);
            searchResultDoc.setBaseUri(postUrl);
            return searchGetPage(page);
        }
    }
}
Also used : Pattern(java.util.regex.Pattern) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) FormElement(org.jsoup.nodes.FormElement) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) JSONException(org.json.JSONException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) FormElement(org.jsoup.nodes.FormElement) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult) MultipartBody(okhttp3.MultipartBody)

Example 22 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class Heidi method parse_search.

private SearchRequestResult parse_search(String html, int page) {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    int results_total = 0;
    if (doc.select("#heiditreffer").size() > 0) {
        String resstr = doc.select("#heiditreffer").text();
        String resnum = resstr.replaceAll("\\(([0-9.]+)([^0-9]*)\\)", "$1").replace(".", "");
        results_total = Integer.parseInt(resnum);
    }
    Elements table = doc.select("table.treffer tr");
    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        StringBuilder description = null;
        String author = "";
        for (Element link : tr.select("a")) {
            String kk = getQueryParamsFirst(link.absUrl("href")).get("katkey");
            if (kk != null) {
                sr.setId(kk);
                break;
            }
        }
        if (tr.select("span.Z3988").size() == 1) {
            // Luckily there is a <span class="Z3988"> item which provides
            // data in a standardized format.
            String zdata = tr.select("span.Z3988").attr("title").replace(";", "%3B").replace(":", "%3A").replace("/", "%2F");
            boolean hastitle = false;
            description = new StringBuilder();
            List<NameValuePair> z3988data = parse_z3988data(zdata);
            for (NameValuePair nv : z3988data) {
                if (nv.getValue() != null) {
                    if (!nv.getValue().trim().equals("")) {
                        if (nv.getName().equals("rft.btitle") && !hastitle) {
                            description.append("<b>").append(nv.getValue()).append("</b>");
                            hastitle = true;
                        } else if (nv.getName().equals("rft.atitle") && !hastitle) {
                            description.append("<b>").append(nv.getValue()).append("</b>");
                            hastitle = true;
                        } else if (nv.getName().equals("rft.au")) {
                            author = nv.getValue();
                        } else if (nv.getName().equals("rft.aufirst")) {
                            author = author + ", " + nv.getValue();
                        } else if (nv.getName().equals("rft.aulast")) {
                            author = nv.getValue();
                        } else if (nv.getName().equals("rft.date")) {
                            description.append("<br />").append(nv.getValue());
                        }
                    }
                }
            }
        }
        if (!"".equals(author)) {
            author = author + "<br />";
        }
        sr.setInnerhtml(author + description.toString());
        if (tr.select(".kurzstat").size() > 0) {
            String stattext = tr.select(".kurzstat").first().text();
            if (stattext.contains("ausleihbar")) {
                sr.setStatus(Status.GREEN);
            } else if (stattext.contains("online")) {
                sr.setStatus(Status.GREEN);
            } else if (stattext.contains("entliehen")) {
                sr.setStatus(Status.RED);
            } else if (stattext.contains("Präsenznutzung")) {
                sr.setStatus(Status.YELLOW);
            } else if (stattext.contains("bestellen")) {
                sr.setStatus(Status.YELLOW);
            }
        }
        if (tr.select(".typbild").size() > 0) {
            String typtext = tr.select(".typbild").first().text();
            if (typtext.contains("Buch")) {
                sr.setType(MediaType.BOOK);
            } else if (typtext.contains("DVD-ROM")) {
                sr.setType(MediaType.CD_SOFTWARE);
            } else if (typtext.contains("Online-Ressource")) {
                sr.setType(MediaType.EDOC);
            } else if (typtext.contains("DVD")) {
                sr.setType(MediaType.DVD);
            } else if (typtext.contains("Film")) {
                sr.setType(MediaType.MOVIE);
            } else if (typtext.contains("Zeitschrift")) {
                sr.setType(MediaType.MAGAZINE);
            } else if (typtext.contains("Musiknoten")) {
                sr.setType(MediaType.SCORE_MUSIC);
            } else if (typtext.contains("Bildliche Darstellung")) {
                sr.setType(MediaType.ART);
            } else if (typtext.contains("Zeitung")) {
                sr.setType(MediaType.NEWSPAPER);
            } else if (typtext.contains("Karte")) {
                sr.setType(MediaType.MAP);
            } else if (typtext.contains("Mehrteilig")) {
                sr.setType(MediaType.PACKAGE_BOOKS);
            }
        }
        results.add(sr);
    }
    // TODO
    return new SearchRequestResult(results, results_total, page);
}
Also used : BasicNameValuePair(org.apache.http.message.BasicNameValuePair) NameValuePair(org.apache.http.NameValuePair) Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult)

Example 23 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class Adis method parse_search.

private SearchRequestResult parse_search(Document doc, int page) throws OpacErrorException, SingleResultFound {
    if (doc.select(".message h1").size() > 0 && doc.select("#right #R06").size() == 0) {
        throw new OpacErrorException(doc.select(".message h1").text());
    }
    if (doc.select("#OPACLI").text().contains("nicht gefunden")) {
        throw new OpacErrorException(stringProvider.getString(StringProvider.NO_RESULTS));
    }
    int total_result_count = -1;
    List<SearchResult> results = new ArrayList<>();
    if (doc.select("#R06").size() > 0) {
        Pattern patNum = Pattern.compile(".*Treffer: .* von ([0-9]+)[^0-9]*");
        Matcher matcher = patNum.matcher(doc.select("#R06").text().trim());
        if (matcher.matches()) {
            total_result_count = Integer.parseInt(matcher.group(1));
        } else if (doc.select("#R06").text().trim().endsWith("Treffer: 1")) {
            total_result_count = 1;
        }
    }
    if (doc.select("#R03").size() == 1 && doc.select("#R03").text().trim().endsWith("Treffer: 1")) {
        throw new SingleResultFound();
    }
    Pattern patId = Pattern.compile("javascript:.*htmlOnLink\\('([0-9A-Za-z]+)'\\)");
    int nr = 1;
    String selector_row, selector_link, selector_img, selector_num, selector_text;
    if (doc.select("table.rTable_table tbody").size() > 0) {
        selector_row = "table.rTable_table tbody tr";
        selector_link = ".rTable_td_text a";
        selector_text = ".rList_name";
        selector_img = ".rTable_td_img img, .rTable_td_text img";
        selector_num = "tr td:first-child";
    } else {
        // New version, e.g. Berlin
        selector_row = ".rList li.rList_li_even, .rList li.rList_li_odd";
        selector_link = ".rList_titel a";
        selector_text = ".rList_name";
        selector_img = ".rlist_icon img, .rList_titel img, .rList_medium .icon, .rList_availability .icon, .rList_img img";
        selector_num = ".rList_num";
    }
    for (Element tr : doc.select(selector_row)) {
        SearchResult res = new SearchResult();
        Element innerele = tr.select(selector_link).first();
        innerele.select("img").remove();
        String descr = innerele.html();
        for (Element n : tr.select(selector_text)) {
            String t = n.text().replace("\u00a0", " ").trim();
            if (t.length() > 0) {
                descr += "<br />" + t.trim();
            }
        }
        res.setInnerhtml(descr);
        try {
            res.setNr(Integer.parseInt(tr.select(selector_num).text().trim()));
        } catch (NumberFormatException e) {
            res.setNr(nr);
        }
        Matcher matcher = patId.matcher(tr.select(selector_link).first().attr("href"));
        if (matcher.matches()) {
            res.setId(matcher.group(1));
        }
        for (Element img : tr.select(selector_img)) {
            String ttext = img.attr("title");
            String src = img.attr("abs:src");
            if (types.containsKey(ttext)) {
                res.setType(types.get(ttext));
            } else if (ttext.contains("+") && types.containsKey(ttext.split("\\+")[0].trim())) {
                res.setType(types.get(ttext.split("\\+")[0].trim()));
            } else if (ttext.matches(".*ist verf.+gbar") || ttext.contains("is available") || img.attr("href").contains("verfu_ja")) {
                res.setStatus(SearchResult.Status.GREEN);
            } else if (ttext.matches(".*nicht verf.+gbar") || ttext.contains("not available") || img.attr("href").contains("verfu_nein")) {
                res.setStatus(SearchResult.Status.RED);
            }
        }
        results.add(res);
        nr++;
    }
    updatePageform(doc);
    s_lastpage = page;
    String nextButton = doc.select("input[title=nächster], input[title=Vorwärts blättern]").attr("name");
    String previousButton = doc.select("input[title=nächster], input[title=Rückwärts blättern]").attr("name");
    if (!nextButton.equals(""))
        s_nextbutton = nextButton;
    if (!previousButton.equals(""))
        s_previousbutton = previousButton;
    return new SearchRequestResult(results, total_result_count, page);
}
Also used : Pattern(java.util.regex.Pattern) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult) Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) SearchResult(de.geeksfactory.opacclient.objects.SearchResult)

Aggregations

SearchResult (de.geeksfactory.opacclient.objects.SearchResult)23 SearchRequestResult (de.geeksfactory.opacclient.objects.SearchRequestResult)21 ArrayList (java.util.ArrayList)17 Element (org.jsoup.nodes.Element)16 Document (org.jsoup.nodes.Document)12 Elements (org.jsoup.select.Elements)12 JSONException (org.json.JSONException)11 Matcher (java.util.regex.Matcher)9 NotReachableException (de.geeksfactory.opacclient.networking.NotReachableException)8 IOException (java.io.IOException)8 Pattern (java.util.regex.Pattern)8 URISyntaxException (java.net.URISyntaxException)5 DetailedItem (de.geeksfactory.opacclient.objects.DetailedItem)4 MalformedURLException (java.net.MalformedURLException)4 URI (java.net.URI)4 HashMap (java.util.HashMap)4 NameValuePair (org.apache.http.NameValuePair)4 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)4 SearchQuery (de.geeksfactory.opacclient.searchfields.SearchQuery)3 TextSearchField (de.geeksfactory.opacclient.searchfields.TextSearchField)3