Search in sources :

Example 16 with SearchRequestResult

use of de.geeksfactory.opacclient.objects.SearchRequestResult in project opacclient by opacapp.

the class TouchPoint method parse_search.

protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException, IOException, IOException, SingleResultFound {
    Document doc = Jsoup.parse(html);
    if (doc.select("#RefineHitListForm").size() > 0) {
        // the results are located on a different page loaded via AJAX
        html = httpGet(opac_url + "/speedHitList.do?_=" + String.valueOf(System.currentTimeMillis() / 1000) + "&hitlistindex=0&exclusionList=", ENCODING);
        doc = Jsoup.parse(html);
    }
    if (doc.select(".nodata").size() > 0) {
        return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
    }
    doc.setBaseUri(opac_url + "/searchfoo");
    int results_total = -1;
    String resultnumstr = doc.select(".box-header h2, .box-header h1").first().text();
    if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) {
        throw new SingleResultFound();
    } else if (resultnumstr.contains("(")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (resultnumstr.contains(": ")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
    } else if (resultnumstr.contains("Treffer")) {
        try {
            results_total = Integer.parseInt(resultnumstr.replaceAll(".* ([0-9]+)$", "$1"));
        } catch (NumberFormatException e) {
        // pass
        }
    }
    Elements table = doc.select("table.data > tbody > tr");
    identifier = null;
    Elements links = doc.select("table.data a");
    boolean haslink = false;
    for (Element node : links) {
        if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) {
            haslink = true;
            try {
                List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(node.attr("href").replace(" ", "%20").replace("&amp;", "&")), ENCODING);
                for (NameValuePair nv : anyurl) {
                    if (nv.getName().equals("identifier")) {
                        identifier = nv.getValue();
                        break;
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        if (tr.select(".icn, img[width=32]").size() > 0) {
            String[] fparts = tr.select(".icn, img[width=32]").first().attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            String changedFname = fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", "");
            // File names can look like this: "20_DVD_Video.gif"
            Pattern pattern = Pattern.compile("(\\d+)_.*");
            Matcher matcher = pattern.matcher(changedFname);
            if (matcher.find()) {
                changedFname = matcher.group(1);
            }
            MediaType defaulttype = defaulttypes.get(changedFname);
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(defaulttype);
                }
            } else {
                sr.setType(defaulttype);
            }
        }
        String title;
        String text;
        if (tr.select(".results table").size() > 0) {
            // e.g. RWTH Aachen
            title = tr.select(".title a").text();
            text = tr.select(".title div").text();
        } else {
            // e.g. Schaffhausen, BSB München
            title = tr.select(".title, .hitlistTitle").text();
            text = tr.select(".results, .hitlistMetadata").first().ownText();
        }
        // we need to do some evil javascript parsing here to get the cover
        // and loan status of the item
        // get cover
        sr.setCover(findCoverUrl(tr, true));
        // get loan status and media ID
        if (tr.select("div[id^=loanstatus] + script").size() > 0) {
            String js = tr.select("div[id^=loanstatus] + script").first().html();
            String[] variables = new String[] { "loanstateDBId", "itemIdentifier", "hitlistIdentifier", "hitlistPosition", "duplicateHitlistIdentifier", "itemType", "titleStatus", "typeofHit", "context" };
            String ajaxUrl = matchJSVariable(js, "ajaxUrl");
            if (!"".equals(ajaxUrl)) {
                JSONObject id = new JSONObject();
                List<NameValuePair> map = new ArrayList<>();
                for (String variable : variables) {
                    String value = matchJSVariable(js, variable);
                    if (!"".equals(value)) {
                        map.add(new BasicNameValuePair(variable, value));
                    }
                    try {
                        if (variable.equals("itemIdentifier")) {
                            id.put("id", value);
                        } else if (variable.equals("loanstateDBId")) {
                            id.put("db", value);
                        }
                    } catch (JSONException e) {
                        e.printStackTrace();
                    }
                }
                sr.setId(id.toString());
                String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString();
                String loanStatusHtml = httpGet(url + "?" + URLEncodedUtils.format(map, "UTF-8"), ENCODING).replace("\r\n", "").trim();
                Document loanStatusDoc = Jsoup.parse(loanStatusHtml);
                String loanstatus = loanStatusDoc.text().replace("\u00bb", "").trim();
                if ((loanstatus.startsWith("entliehen") && loanstatus.contains("keine Vormerkung möglich") || loanstatus.contains("Keine Exemplare verfügbar"))) {
                    sr.setStatus(SearchResult.Status.RED);
                } else if (loanstatus.startsWith("entliehen") || loanstatus.contains("andere Zweigstelle")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                } else if ((loanstatus.startsWith("bestellbar") && !loanstatus.contains("nicht bestellbar")) || (loanstatus.startsWith("vorbestellbar") && !loanstatus.contains("nicht vorbestellbar")) || (loanstatus.startsWith("vorbestellbar") && !loanstatus.contains("nicht vorbestellbar")) || (loanstatus.startsWith("vormerkbar") && !loanstatus.contains("nicht vormerkbar")) || (loanstatus.contains("heute zurückgebucht")) || (loanstatus.contains("ausleihbar") && !loanstatus.contains("nicht ausleihbar"))) {
                    sr.setStatus(SearchResult.Status.GREEN);
                } else if (loanstatus.equals("")) {
                    // In special databases (like "Handschriften" in Winterthur) ID lookup is
                    // not possible, which we try to detect this way. We therefore also cannot
                    // use getResultById when accessing the results.
                    sr.setId(null);
                }
                if (sr.getType() != null) {
                    if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO) || sr.getType().equals(MediaType.MP3)) // Especially Onleihe.de ebooks are often marked
                    // green though they are not available.
                    {
                        sr.setStatus(SearchResult.Status.UNKNOWN);
                    }
                }
            }
        }
        sr.setInnerhtml(("<b>" + title + "</b><br/>") + text);
        sr.setNr(10 * (page - 1) + i + 1);
        results.add(sr);
    }
    resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}
Also used : Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) URI(java.net.URI) URL(java.net.URL) BasicNameValuePair(org.apache.http.message.BasicNameValuePair) MediaType(de.geeksfactory.opacclient.objects.SearchResult.MediaType) NameValuePair(org.apache.http.NameValuePair) BasicNameValuePair(org.apache.http.message.BasicNameValuePair) Pattern(java.util.regex.Pattern) JSONException(org.json.JSONException) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) URISyntaxException(java.net.URISyntaxException) JSONException(org.json.JSONException) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult) JSONObject(org.json.JSONObject)

Example 17 with SearchRequestResult

use of de.geeksfactory.opacclient.objects.SearchRequestResult in project opacclient by opacapp.

the class ResultsAdapterEndless method cacheInBackground.

@Override
protected boolean cacheInBackground() throws Exception {
    if (page < maxPage || getWrappedAdapter().getCount() < resultCount || (resultCount == -1 && objects.size() > 0 && !endReached)) {
        page++;
        SearchRequestResult result = listener.onLoadMore(page);
        itemsToAppend = result.getResults();
        /* When IOpac finds more than 200 results, the real result count is
            not known until the second page is loaded */
        maxPage = result.getPage_count();
        resultCount = result.getTotal_result_count();
        for (SearchResult item : itemsToAppend) {
            item.setPage(page);
        }
        return itemsToAppend != null;
    } else {
        endReached = true;
        return false;
    }
}
Also used : SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult) SearchResult(de.geeksfactory.opacclient.objects.SearchResult)

Example 18 with SearchRequestResult

use of de.geeksfactory.opacclient.objects.SearchRequestResult in project opacclient by opacapp.

the class WebOpacNet method parse_search.

private SearchRequestResult parse_search(String text, int page) throws OpacErrorException {
    if (!text.equals("")) {
        try {
            List<SearchResult> results = new ArrayList<>();
            JSONObject json = new JSONObject(text);
            int total_result_count = Integer.parseInt(json.getString("totalcount"));
            JSONArray resultList = json.getJSONArray("mobmeds");
            for (int i = 0; i < resultList.length(); i++) {
                JSONObject resultJson = resultList.getJSONObject(i);
                SearchResult result = new SearchResult();
                result.setId(resultJson.getString("medid"));
                String title = resultJson.getString("titel");
                String publisher = resultJson.getString("verlag");
                String series = resultJson.getString("reihe");
                String html = "<b>" + title + "</b><br />" + publisher + ", " + series;
                String type = resultJson.getString("iconurl").substring(12, 13);
                result.setType(defaulttypes.get(type));
                result.setInnerhtml(html);
                if (resultJson.getString("imageurl").length() > 0) {
                    result.setCover(resultJson.getString("imageurl"));
                }
                results.add(result);
            }
            return new SearchRequestResult(results, total_result_count, page);
        } catch (JSONException e) {
            e.printStackTrace();
            throw new OpacErrorException(stringProvider.getFormattedString(StringProvider.INTERNAL_ERROR_WITH_DESCRIPTION, e.getMessage()));
        }
    } else {
        return new SearchRequestResult(new ArrayList<SearchResult>(), 0, page);
    }
}
Also used : JSONObject(org.json.JSONObject) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult) ArrayList(java.util.ArrayList) JSONArray(org.json.JSONArray) JSONException(org.json.JSONException) SearchResult(de.geeksfactory.opacclient.objects.SearchResult)

Example 19 with SearchRequestResult

use of de.geeksfactory.opacclient.objects.SearchRequestResult in project opacclient by opacapp.

the class WinBiap method parse_search.

private SearchRequestResult parse_search(String html, int page) throws OpacErrorException, IOException {
    Document doc = Jsoup.parse(html);
    if (doc.select(".alert h4").text().contains("Keine Treffer gefunden")) {
        // no media found
        return new SearchRequestResult(new ArrayList<SearchResult>(), 0, page);
    }
    if (doc.select("errortype").size() > 0) {
        // Error (e.g. 404)
        throw new OpacErrorException(doc.select("errortype").text());
    }
    // Total count
    String header = doc.select(".ResultHeader").text();
    Pattern pattern = Pattern.compile("Die Suche ergab (\\d*) Treffer");
    Matcher matcher = pattern.matcher(header);
    int results_total;
    if (matcher.find()) {
        results_total = Integer.parseInt(matcher.group(1));
    } else {
        throw new OpacErrorException(stringProvider.getString(StringProvider.INTERNAL_ERROR));
    }
    // Results
    Elements trs = doc.select("#listview .ResultItem");
    List<SearchResult> results = new ArrayList<>();
    for (Element tr : trs) {
        SearchResult sr = new SearchResult();
        String author = tr.select(".autor").text();
        String title = tr.select(".title").text();
        String titleAddition = tr.select(".titleZusatz").text();
        String desc = tr.select(".smallDescription").text();
        sr.setInnerhtml("<b>" + (author.equals("") ? "" : author + "<br />") + title + (titleAddition.equals("") ? "" : " - <i>" + titleAddition + "</i>") + "</b><br /><small>" + desc + "</small>");
        if (tr.select(".coverWrapper input, .coverWrapper img").size() > 0) {
            Element cover = tr.select(".coverWrapper input, .coverWrapper img").first();
            if (cover.hasAttr("data-src")) {
                sr.setCover(cover.attr("data-src"));
            } else if (cover.hasAttr("src") && !cover.attr("src").contains("empty.gif") && !cover.attr("src").contains("leer.gif")) {
                sr.setCover(cover.attr("src"));
            }
            sr.setType(getMediaType(cover, data));
        }
        String link = tr.select("a[href*=detail.aspx]").attr("href");
        String base64 = getQueryParamsFirst(link).get("data");
        if (// Most of the time, the base64 string is
        base64.contains("-")) // followed by a hyphen and some
        // mysterious
        // letters that we don't want
        {
            base64 = base64.substring(0, base64.indexOf("-") - 1);
        }
        String decoded = new String(Base64.decode(base64), "UTF-8");
        pattern = Pattern.compile("CatalogueId=(\\d*)");
        matcher = pattern.matcher(decoded);
        if (matcher.find()) {
            sr.setId(matcher.group(1));
        } else {
            throw new OpacErrorException(stringProvider.getString(StringProvider.INTERNAL_ERROR));
        }
        if (tr.select(".mediaStatus").size() > 0) {
            Element status = tr.select(".mediaStatus").first();
            if (status.hasClass("StatusNotAvailable")) {
                sr.setStatus(Status.RED);
            } else if (status.hasClass("StatusAvailable")) {
                sr.setStatus(Status.GREEN);
            } else {
                sr.setStatus(Status.YELLOW);
            }
        } else if (tr.select(".showCopies").size() > 0) {
            // Multiple copies
            if (tr.nextElementSibling().select(".StatusNotAvailable").size() == 0) {
                sr.setStatus(Status.GREEN);
            } else if (tr.nextElementSibling().select(".StatusAvailable").size() == 0) {
                sr.setStatus(Status.RED);
            } else {
                sr.setStatus(Status.YELLOW);
            }
        }
        results.add(sr);
    }
    return new SearchRequestResult(results, results_total, page);
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) FormElement(org.jsoup.nodes.FormElement) ArrayList(java.util.ArrayList) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult)

Example 20 with SearchRequestResult

use of de.geeksfactory.opacclient.objects.SearchRequestResult in project opacclient by opacapp.

the class Open method searchGetPage.

@Override
public SearchRequestResult searchGetPage(int page) throws IOException, OpacErrorException, JSONException {
    if (searchResultDoc == null)
        throw new NotReachableException();
    Document doc = searchResultDoc;
    if (doc.select("span[id$=DataPager1]").size() == 0) {
        /*
                New style: Page buttons using normal links
                We can go directly to the correct page
            */
        if (doc.select("a[id*=LinkButtonPageN]").size() > 0) {
            String href = doc.select("a[id*=LinkButtonPageN][href*=page]").first().attr("href");
            String url = href.replaceFirst("page=\\d+", "page=" + page);
            Document doc2 = Jsoup.parse(httpGet(url, getDefaultEncoding()));
            doc2.setBaseUri(url);
            return parse_search(doc2, page);
        } else {
            int totalCount;
            try {
                totalCount = Integer.parseInt(doc.select("span[id$=TotalItemsLabel]").first().text());
            } catch (Exception e) {
                totalCount = 0;
            }
            // Next page does not exist
            return new SearchRequestResult(new ArrayList<SearchResult>(), 0, totalCount);
        }
    } else {
        /*
                Old style: Page buttons using Javascript
                When there are many pages of results, there will only be links to the next 4 and
                previous 4 pages, so we will click links until it gets to the correct page.
            */
        Elements pageLinks = doc.select("span[id$=DataPager1]").first().select("a[id*=LinkButtonPageN], span[id*=LabelPageN]");
        int from = Integer.valueOf(pageLinks.first().text());
        int to = Integer.valueOf(pageLinks.last().text());
        Element linkToClick;
        boolean willBeCorrectPage;
        if (page < from) {
            linkToClick = pageLinks.first();
            willBeCorrectPage = false;
        } else if (page > to) {
            linkToClick = pageLinks.last();
            willBeCorrectPage = false;
        } else {
            linkToClick = pageLinks.get(page - from);
            willBeCorrectPage = true;
        }
        if (linkToClick.tagName().equals("span")) {
            // we are trying to get the page we are already on
            return parse_search(searchResultDoc, page);
        }
        Pattern pattern = Pattern.compile("javascript:__doPostBack\\('([^,]*)','([^\\)]*)'\\)");
        Matcher matcher = pattern.matcher(linkToClick.attr("href"));
        if (!matcher.find())
            throw new OpacErrorException(StringProvider.INTERNAL_ERROR);
        FormElement form = (FormElement) doc.select("form").first();
        MultipartBody data = formData(form, null).addFormDataPart("__EVENTTARGET", matcher.group(1)).addFormDataPart("__EVENTARGUMENT", matcher.group(2)).build();
        String postUrl = form.attr("abs:action");
        String html = httpPost(postUrl, data, "UTF-8");
        if (willBeCorrectPage) {
            // We clicked on the correct link
            Document doc2 = Jsoup.parse(html);
            doc2.setBaseUri(postUrl);
            return parse_search(doc2, page);
        } else {
            // There was no correct link, so try to find one again
            searchResultDoc = Jsoup.parse(html);
            searchResultDoc.setBaseUri(postUrl);
            return searchGetPage(page);
        }
    }
}
Also used : Pattern(java.util.regex.Pattern) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) FormElement(org.jsoup.nodes.FormElement) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) JSONException(org.json.JSONException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) FormElement(org.jsoup.nodes.FormElement) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult) MultipartBody(okhttp3.MultipartBody)

Aggregations

SearchRequestResult (de.geeksfactory.opacclient.objects.SearchRequestResult)23 SearchResult (de.geeksfactory.opacclient.objects.SearchResult)21 ArrayList (java.util.ArrayList)19 Element (org.jsoup.nodes.Element)16 Document (org.jsoup.nodes.Document)13 Elements (org.jsoup.select.Elements)12 JSONException (org.json.JSONException)11 Matcher (java.util.regex.Matcher)9 NotReachableException (de.geeksfactory.opacclient.networking.NotReachableException)8 IOException (java.io.IOException)8 Pattern (java.util.regex.Pattern)8 URISyntaxException (java.net.URISyntaxException)5 NameValuePair (org.apache.http.NameValuePair)5 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)5 SearchQuery (de.geeksfactory.opacclient.searchfields.SearchQuery)4 TextSearchField (de.geeksfactory.opacclient.searchfields.TextSearchField)4 MalformedURLException (java.net.MalformedURLException)4 URI (java.net.URI)4 HashMap (java.util.HashMap)4 OpacErrorException (de.geeksfactory.opacclient.apis.OpacApi.OpacErrorException)3