Search in sources :

Example 1 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class BibliothecaSearchTest method testParseSearch.

@Test
public void testParseSearch() throws OpacApi.OpacErrorException, JSONException, NotReachableException {
    String html = readResource("/bibliotheca/resultlist/" + file);
    // we may not have all files for all libraries
    if (html == null)
        return;
    int page = 1;
    SearchRequestResult result = Bibliotheca.parseSearch(html, page, getData(file));
    assertTrue(result.getPage_count() > 0 || result.getTotal_result_count() > 0);
    assertTrue(result.getPage_index() == page);
    for (SearchResult item : result.getResults()) {
        assertNotNull(item.getId());
        assertNotNull(item.getType());
    }
    SearchResult firstItem = result.getResults().get(0);
    assertEquals(firstItem.getInnerhtml(), getFirstResultHtml(file));
}
Also used : SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) Test(org.junit.Test)

Example 2 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class Open method parse_search.

protected SearchRequestResult parse_search(Document doc, int page) throws OpacErrorException {
    searchResultDoc = doc;
    if (doc.select("#Label1, span[id$=LblInfoMessage]").size() > 0) {
        String message = doc.select("#Label1, span[id$=LblInfoMessage]").text();
        if (message.contains("keine Treffer")) {
            return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, page);
        } else {
            throw new OpacErrorException(message);
        }
    }
    int totalCount;
    if (doc.select("span[id$=TotalItemsLabel]").size() > 0) {
        totalCount = Integer.parseInt(doc.select("span[id$=TotalItemsLabel]").first().text().split("[ \\t\\xA0\\u1680\\u180e\\u2000-\\u200a\\u202f\\u205f\\u3000]")[0]);
    } else {
        throw new OpacErrorException(stringProvider.getString(StringProvider.UNKNOWN_ERROR));
    }
    Pattern idPattern = Pattern.compile("\\$(mdv|civ|dcv)(\\d+)\\$");
    Pattern weakIdPattern = Pattern.compile("(mdv|civ|dcv)(\\d+)[^\\d]");
    Elements elements = doc.select("div[id$=divMedium], div[id$=divComprehensiveItem], div[id$=divDependentCatalogue]");
    List<SearchResult> results = new ArrayList<>();
    int i = 0;
    List<CompletableFuture<Void>> futures = new ArrayList<>();
    for (Element element : elements) {
        final SearchResult result = new SearchResult();
        // Cover
        if (element.select("input[id$=mediumImage]").size() > 0) {
            result.setCover(element.select("input[id$=mediumImage]").first().attr("src"));
        } else if (element.select("img[id$=CoverView_Image]").size() > 0) {
            assignBestCover(result, getCoverUrlList(element.select("img[id$=CoverView_Image]").first()));
        }
        Element catalogueContent = element.select(".catalogueContent, .oclc-searchmodule-mediumview-content, .oclc-searchmodule-comprehensiveitemview-content, .oclc-searchmodule-dependentitemview-content").first();
        // Media Type
        if (catalogueContent.select("#spanMediaGrpIcon, .spanMediaGrpIcon").size() > 0) {
            String mediatype = catalogueContent.select("#spanMediaGrpIcon, .spanMediaGrpIcon").attr("class");
            if (mediatype.startsWith("itemtype ")) {
                mediatype = mediatype.substring("itemtype ".length());
            }
            SearchResult.MediaType defaulttype = defaulttypes.get(mediatype);
            if (defaulttype == null)
                defaulttype = SearchResult.MediaType.UNKNOWN;
            if (data.has("mediatypes")) {
                try {
                    result.setType(SearchResult.MediaType.valueOf(data.getJSONObject("mediatypes").getString(mediatype)));
                } catch (JSONException e) {
                    result.setType(defaulttype);
                }
            } else {
                result.setType(defaulttype);
            }
        } else {
            result.setType(SearchResult.MediaType.UNKNOWN);
        }
        // Text
        String title = catalogueContent.select("a[id$=LbtnShortDescriptionValue], a[id$=LbtnTitleValue]").text();
        String subtitle = catalogueContent.select("span[id$=LblSubTitleValue]").text();
        String author = catalogueContent.select("span[id$=LblAuthorValue]").text();
        String year = catalogueContent.select("span[id$=LblProductionYearValue]").text();
        String series = catalogueContent.select("span[id$=LblSeriesValue]").text();
        // Some libraries, such as Bern, have labels but no <span id="..Value"> tags
        int j = 0;
        for (Element div : catalogueContent.children()) {
            if (subtitle.equals("") && div.select("span").size() == 0 && j > 0 && j < 3) {
                subtitle = div.text().trim();
            }
            if (author.equals("") && div.select("span[id$=LblAuthor]").size() == 1) {
                author = div.text().trim();
                if (author.contains(":")) {
                    author = author.split(":")[1];
                }
            }
            if (year.equals("") && div.select("span[id$=LblProductionYear]").size() == 1) {
                year = div.text().trim();
                if (year.contains(":")) {
                    year = year.split(":")[1];
                }
            }
            j++;
        }
        StringBuilder text = new StringBuilder();
        text.append("<b>").append(title).append("</b>");
        if (!subtitle.equals(""))
            text.append("<br/>").append(subtitle);
        if (!author.equals(""))
            text.append("<br/>").append(author);
        if (!year.equals(""))
            text.append("<br/>").append(year);
        if (!series.equals(""))
            text.append("<br/>").append(series);
        result.setInnerhtml(text.toString());
        // ID
        Matcher matcher = idPattern.matcher(element.html());
        if (matcher.find()) {
            result.setId(matcher.group(2));
        } else {
            matcher = weakIdPattern.matcher(element.html());
            if (matcher.find()) {
                result.setId(matcher.group(2));
            }
        }
        // Availability
        if (result.getId() != null) {
            String url = opac_url + "/DesktopModules/OCLC.OPEN.PL.DNN.SearchModule/SearchService" + ".asmx/GetAvailability";
            String culture = element.select("input[name$=culture]").val();
            JSONObject data = new JSONObject();
            try {
                // Determine portalID value
                int portalId = 1;
                for (Element scripttag : doc.select("script")) {
                    String scr = scripttag.html();
                    if (scr.contains("LoadSharedCatalogueViewAvailabilityAsync")) {
                        Pattern portalIdPattern = Pattern.compile(".*LoadSharedCatalogueViewAvailabilityAsync\\([^,]*,[^,]*," + "[^0-9,]*([0-9]+)[^0-9,]*,.*\\).*");
                        Matcher portalIdMatcher = portalIdPattern.matcher(scr);
                        if (portalIdMatcher.find()) {
                            portalId = Integer.parseInt(portalIdMatcher.group(1));
                        }
                    }
                }
                data.put("portalId", portalId).put("mednr", result.getId()).put("culture", culture).put("requestCopyData", false).put("branchFilter", "");
                RequestBody entity = RequestBody.create(MEDIA_TYPE_JSON, data.toString());
                futures.add(asyncPost(url, entity, false).handle((response, throwable) -> {
                    if (throwable != null)
                        return null;
                    try {
                        JSONObject availabilityData = new JSONObject(response.body().string());
                        String isAvail = availabilityData.getJSONObject("d").getString("IsAvail");
                        switch(isAvail) {
                            case "true":
                                result.setStatus(SearchResult.Status.GREEN);
                                break;
                            case "false":
                                result.setStatus(SearchResult.Status.RED);
                                break;
                            case "digital":
                                result.setStatus(SearchResult.Status.UNKNOWN);
                                break;
                        }
                    } catch (JSONException | IOException e) {
                        e.printStackTrace();
                    }
                    return null;
                }));
            } catch (JSONException e) {
                e.printStackTrace();
            }
        }
        result.setNr(i);
        results.add(result);
    }
    CompletableFuture.allOf(futures.toArray(new CompletableFuture[futures.size()])).join();
    return new SearchRequestResult(results, totalCount, page);
}
Also used : URL(java.net.URL) Detail(de.geeksfactory.opacclient.objects.Detail) HashMap(java.util.HashMap) Volume(de.geeksfactory.opacclient.objects.Volume) CoverHolder(de.geeksfactory.opacclient.objects.CoverHolder) TextSearchField(de.geeksfactory.opacclient.searchfields.TextSearchField) Headers(okhttp3.Headers) Filter(de.geeksfactory.opacclient.objects.Filter) CheckboxSearchField(de.geeksfactory.opacclient.searchfields.CheckboxSearchField) Part.create(okhttp3.MultipartBody.Part.create) ArrayList(java.util.ArrayList) StringProvider(de.geeksfactory.opacclient.i18n.StringProvider) RequestBody(okhttp3.RequestBody) JSONException(org.json.JSONException) JSONObject(org.json.JSONObject) Matcher(java.util.regex.Matcher) Locale(java.util.Locale) Element(org.jsoup.nodes.Element) Map(java.util.Map) HttpClientFactory(de.geeksfactory.opacclient.networking.HttpClientFactory) Account(de.geeksfactory.opacclient.objects.Account) MediaType(okhttp3.MediaType) DateTimeFormat(org.joda.time.format.DateTimeFormat) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) MalformedURLException(java.net.MalformedURLException) BarcodeSearchField(de.geeksfactory.opacclient.searchfields.BarcodeSearchField) Set(java.util.Set) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult) IOException(java.io.IOException) SearchQuery(de.geeksfactory.opacclient.searchfields.SearchQuery) DropdownSearchField(de.geeksfactory.opacclient.searchfields.DropdownSearchField) Library(de.geeksfactory.opacclient.objects.Library) List(java.util.List) CompletableFuture(java8.util.concurrent.CompletableFuture) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) MultipartBody(okhttp3.MultipartBody) Document(org.jsoup.nodes.Document) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) SearchField(de.geeksfactory.opacclient.searchfields.SearchField) Jsoup(org.jsoup.Jsoup) Elements(org.jsoup.select.Elements) Pattern(java.util.regex.Pattern) AccountData(de.geeksfactory.opacclient.objects.AccountData) JSONArray(org.json.JSONArray) Copy(de.geeksfactory.opacclient.objects.Copy) FormElement(org.jsoup.nodes.FormElement) Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) FormElement(org.jsoup.nodes.FormElement) ArrayList(java.util.ArrayList) JSONException(org.json.JSONException) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) Elements(org.jsoup.select.Elements) CompletableFuture(java8.util.concurrent.CompletableFuture) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult) JSONObject(org.json.JSONObject) RequestBody(okhttp3.RequestBody)

Example 3 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class Primo method parse_search.

protected SearchRequestResult parse_search(Document doc, int page) throws OpacErrorException, IOException {
    doc.setBaseUri(opac_url + "/action/search.do");
    List<SearchResult> reslist = new ArrayList<>();
    int resnum = -1;
    if (doc.select(".EXLResultsNumbers").size() > 0) {
        try {
            resnum = Integer.valueOf(doc.select(".EXLResultsNumbers em, .PaginationLabel strong").first().text().trim().replace(".", "").replace(",", "").replace(" ", "").replace("Ergebnisse", ""));
        } catch (NumberFormatException e) {
            e.printStackTrace();
        }
    }
    for (Element resrow : doc.select(".EXLResult")) {
        SearchResult res = new SearchResult();
        StringBuilder description = new StringBuilder();
        description.append("<b>").append(resrow.select(".EXLResultTitle").text()).append("</b>");
        if (resrow.select(".EXLResultAuthor").size() > 0) {
            description.append("<br />").append(resrow.select(".EXLResultAuthor").text());
        }
        if (resrow.select(".EXLResultDetails").size() > 0) {
            description.append("<br />").append(resrow.select(".EXLResultDetails").text());
        }
        String availSelect = ".EXLResultAvailability span, .EXLResultAvailability em";
        if (resrow.select(availSelect).size() > 0) {
            description.append("<br />").append(resrow.select(availSelect).first().ownText());
        }
        res.setInnerhtml(description.toString());
        if (resrow.select(".EXLResultStatusAvailable").size() > 0) {
            res.setStatus(SearchResult.Status.GREEN);
        } else if (resrow.select(".EXLResultStatusNotAvailable").size() > 0) {
            res.setStatus(SearchResult.Status.RED);
        } else if (resrow.select(".EXLResultStatusMaybeAvailable").size() > 0) {
            res.setStatus(SearchResult.Status.YELLOW);
        }
        res.setPage(page);
        for (Element a : resrow.select(".EXLResultTitle a, a.EXLThumbnailLinkMarker, .EXLDetailsTab a")) {
            Map<String, String> q = getQueryParamsFirst(a.absUrl("href"));
            if (q.containsKey("doc")) {
                res.setId(q.get("doc"));
                break;
            }
        }
        if (res.getId() == null) {
            continue;
        }
        if (resrow.select("img.EXLBriefResultsCover").size() > 0) {
            String src = resrow.select("img.EXLBriefResultsCover").first().absUrl("src");
            if (!src.contains("pixel.png")) {
                res.setCover(src);
            }
        }
        for (Map.Entry<String, SearchResult.MediaType> cls : mediaTypeClasses.entrySet()) {
            if (resrow.hasClass(cls.getKey()) || resrow.select("." + cls.getKey()).size() > 0) {
                res.setType(cls.getValue());
                break;
            }
        }
        if (resrow.select("a.EXLBriefResultsDisplayMultipleLink").size() > 0) {
            String url = resrow.select("a.EXLBriefResultsDisplayMultipleLink").first().absUrl("href");
            List<SearchQuery> query = new ArrayList<>();
            TextSearchField field = new TextSearchField("url", "url", false, false, "url", false, false);
            field.setVisible(false);
            query.add(new SearchQuery(field, url));
            res.setChildQuery(query);
        }
        reslist.add(res);
    }
    return new SearchRequestResult(reslist, resnum, page);
}
Also used : SearchQuery(de.geeksfactory.opacclient.searchfields.SearchQuery) Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) TextSearchField(de.geeksfactory.opacclient.searchfields.TextSearchField) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class Bibliotheca method parseSearch.

public static SearchRequestResult parseSearch(String html, int page, JSONObject data) throws OpacErrorException {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(data.optString("baseurl"));
    Elements table = doc.select(".resulttab tr.result_trefferX, .resulttab tr.result_treffer");
    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        int contentindex = 1;
        if (tr.select("td a img").size() > 0) {
            String[] fparts = tr.select("td a img").get(0).attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", "")));
                }
            } else {
                sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", "")));
            }
        } else {
            if (tr.children().size() == 3) {
                contentindex = 2;
            }
        }
        sr.setInnerhtml(tr.child(contentindex).child(0).html());
        sr.setNr(i);
        Element link = tr.child(contentindex).select("a").first();
        try {
            if (link != null && link.attr("href").contains("detmediennr")) {
                Map<String, String> params = getQueryParamsFirst(link.attr("abs:href"));
                String nr = params.get("detmediennr");
                if (Integer.parseInt(nr) > i + 1) {
                    // Seems to be an ID…
                    if (params.get("detDB") != null) {
                        sr.setId("&detmediennr=" + nr + "&detDB=" + params.get("detDB"));
                    } else {
                        sr.setId("&detmediennr=" + nr);
                    }
                }
            }
        } catch (Exception e) {
        }
        try {
            if (tr.child(1).childNode(0) instanceof Comment) {
                Comment c = (Comment) tr.child(1).childNode(0);
                String comment = c.getData().trim();
                String id = comment.split(": ")[1];
                sr.setId(id);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        results.add(sr);
    }
    int results_total = -1;
    if (doc.select(".result_gefunden").size() > 0) {
        try {
            results_total = Integer.parseInt(doc.select(".result_gefunden").text().trim().replaceAll(".*[^0-9]+([0-9]+).*", "$1"));
        } catch (NumberFormatException e) {
            e.printStackTrace();
            results_total = -1;
        }
    } else if (doc.select(".resultzeile").size() > 0) {
        throw new OpacErrorException(doc.select(".resultzeile").text());
    }
    return new SearchRequestResult(results, results_total, page);
}
Also used : Comment(org.jsoup.nodes.Comment) Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) JSONException(org.json.JSONException) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) URISyntaxException(java.net.URISyntaxException) JSONException(org.json.JSONException) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult)

Example 5 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class VuFind method parse_search.

protected SearchRequestResult parse_search(Document doc, int page) throws OpacErrorException {
    doc.setBaseUri(opac_url + "/Search/Results");
    if (doc.select("p.error, p.errorMsg, .alert-error").size() > 0) {
        throw new OpacErrorException(doc.select("p.error, p.errorMsg, .alert-error").text());
    } else if (doc.select("div.result").size() == 0 && doc.select(".main p").size() > 0) {
        throw new OpacErrorException(doc.select(".main p").first().text());
    }
    int rescount = -1;
    if (doc.select(".resulthead").size() == 1) {
        rescount = Integer.parseInt(doc.select(".resulthead strong").get(2).text().replace(",", "").replace(".", ""));
    }
    List<SearchResult> reslist = new ArrayList<>();
    for (Element row : doc.select("div.result")) {
        SearchResult res = new SearchResult();
        Element z3988el = null;
        if (row.select("span.Z3988").size() == 1) {
            z3988el = row.select("span.3988").first();
        } else if (row.parent().tagName().equals("li") && row.parent().select("span.Z3988").size() > 0) {
            z3988el = row.parent().select("span.3988").first();
        }
        if (z3988el != null) {
            List<NameValuePair> z3988data;
            try {
                StringBuilder description = new StringBuilder();
                z3988data = URLEncodedUtils.parse(new URI("http://dummy/?" + z3988el.select("span.Z3988").attr("title")), "UTF-8");
                for (NameValuePair nv : z3988data) {
                    if (nv.getValue() != null) {
                        if (!nv.getValue().trim().equals("")) {
                            if (nv.getName().equals("rft.btitle")) {
                                description.append("<b>").append(nv.getValue()).append("</b>");
                            } else if (nv.getName().equals("rft.atitle")) {
                                description.append("<b>").append(nv.getValue()).append("</b>");
                            } else if (nv.getName().equals("rft.au")) {
                                description.append("<br />").append(nv.getValue());
                            } else if (nv.getName().equals("rft.date")) {
                                description.append("<br />").append(nv.getValue());
                            }
                        }
                    }
                }
                res.setInnerhtml(description.toString());
            } catch (URISyntaxException e) {
                e.printStackTrace();
            }
        } else {
            res.setInnerhtml(row.select("a.title").text());
        }
        if (row.hasClass("available") || row.hasClass("internet")) {
            res.setStatus(SearchResult.Status.GREEN);
        } else if (row.hasClass("reservable")) {
            res.setStatus(SearchResult.Status.YELLOW);
        } else if (row.hasClass("not-available")) {
            res.setStatus(SearchResult.Status.RED);
        } else if (row.select(".status.available").size() > 0) {
            res.setStatus(SearchResult.Status.GREEN);
        } else if (row.select(".status .label-success").size() > 0) {
            res.setStatus(SearchResult.Status.GREEN);
        } else if (row.select(".status .label-important").size() > 0) {
            res.setStatus(SearchResult.Status.RED);
        } else if (row.select(".status.checkedout").size() > 0) {
            res.setStatus(SearchResult.Status.RED);
        }
        for (Map.Entry<String, SearchResult.MediaType> entry : mediaTypeSelectors.entrySet()) {
            if (row.select(entry.getKey()).size() > 0) {
                res.setType(entry.getValue());
                break;
            }
        }
        for (Element img : row.select("img")) {
            String src = img.absUrl("src");
            if (src.contains("over")) {
                if (!src.contains("Unavailable")) {
                    res.setCover(src);
                }
                break;
            }
        }
        res.setPage(page);
        String href = row.select("a.title").first().absUrl("href");
        try {
            URL idurl = new URL(href);
            String path = idurl.getPath();
            Matcher matcher = idPattern.matcher(path);
            if (matcher.find()) {
                if (matcher.group().contains("/OpacrlRecord/")) {
                    res.setId("Opacrl:" + matcher.group(1));
                } else {
                    res.setId(matcher.group(1));
                }
            }
        } catch (MalformedURLException e) {
            e.printStackTrace();
        }
        reslist.add(res);
    }
    return new SearchRequestResult(reslist, rescount, page);
}
Also used : BasicNameValuePair(org.apache.http.message.BasicNameValuePair) NameValuePair(org.apache.http.NameValuePair) MalformedURLException(java.net.MalformedURLException) Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) URL(java.net.URL) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

SearchResult (de.geeksfactory.opacclient.objects.SearchResult)23 SearchRequestResult (de.geeksfactory.opacclient.objects.SearchRequestResult)21 ArrayList (java.util.ArrayList)17 Element (org.jsoup.nodes.Element)16 Document (org.jsoup.nodes.Document)12 Elements (org.jsoup.select.Elements)12 JSONException (org.json.JSONException)11 Matcher (java.util.regex.Matcher)9 NotReachableException (de.geeksfactory.opacclient.networking.NotReachableException)8 IOException (java.io.IOException)8 Pattern (java.util.regex.Pattern)8 URISyntaxException (java.net.URISyntaxException)5 DetailedItem (de.geeksfactory.opacclient.objects.DetailedItem)4 MalformedURLException (java.net.MalformedURLException)4 URI (java.net.URI)4 HashMap (java.util.HashMap)4 NameValuePair (org.apache.http.NameValuePair)4 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)4 SearchQuery (de.geeksfactory.opacclient.searchfields.SearchQuery)3 TextSearchField (de.geeksfactory.opacclient.searchfields.TextSearchField)3