Search in sources :

Example 11 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class IOpac method parse_search.

protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException, NotReachableException {
    Document doc = Jsoup.parse(html);
    if (doc.select("h4").size() > 0) {
        if (doc.select("h4").text().trim().startsWith("0 gefundene Medien")) {
            // nothing found
            return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
        } else if (!doc.select("h4").text().trim().contains("gefundene Medien") && !doc.select("h4").text().trim().contains("Es wurden mehr als")) {
            // error
            throw new OpacErrorException(doc.select("h4").text().trim());
        }
    } else if (doc.select("h1").size() > 0) {
        if (doc.select("h1").text().trim().contains("RUNTIME ERROR")) {
            // Server Error
            throw new NotReachableException("IOPAC RUNTIME ERROR");
        } else {
            throw new OpacErrorException(stringProvider.getFormattedString(StringProvider.UNKNOWN_ERROR_WITH_DESCRIPTION, doc.select("h1").text().trim()));
        }
    } else {
        return null;
    }
    updateRechnr(doc);
    reusehtml = html;
    results_total = -1;
    if (doc.select("h4").text().trim().contains("Es wurden mehr als")) {
        results_total = 200;
    } else {
        String resultnumstr = doc.select("h4").first().text();
        resultnumstr = resultnumstr.substring(0, resultnumstr.indexOf(" ")).trim();
        results_total = Integer.parseInt(resultnumstr);
    }
    List<SearchResult> results = new ArrayList<>();
    Elements tables = doc.select("table").first().select("tr:has(td)");
    Map<String, Integer> colmap = new HashMap<>();
    Element thead = doc.select("table").first().select("tr:has(th)").first();
    int j = 0;
    for (Element th : thead.select("th")) {
        String text = th.text().trim().toLowerCase(Locale.GERMAN);
        if (text.contains("cover")) {
            colmap.put("cover", j);
        } else if (text.contains("titel")) {
            colmap.put("title", j);
        } else if (text.contains("verfasser")) {
            colmap.put("author", j);
        } else if (text.contains("mtyp")) {
            colmap.put("category", j);
        } else if (text.contains("jahr")) {
            colmap.put("year", j);
        } else if (text.contains("signatur")) {
            colmap.put("shelfmark", j);
        } else if (text.contains("info")) {
            colmap.put("info", j);
        } else if (text.contains("abteilung")) {
            colmap.put("department", j);
        } else if (text.contains("verliehen") || text.contains("verl.")) {
            colmap.put("returndate", j);
        } else if (text.contains("anz.res")) {
            colmap.put("reservations", j);
        }
        j++;
    }
    if (colmap.size() == 0) {
        colmap.put("cover", 0);
        colmap.put("title", 1);
        colmap.put("author", 2);
        colmap.put("publisher", 3);
        colmap.put("year", 4);
        colmap.put("department", 5);
        colmap.put("shelfmark", 6);
        colmap.put("returndate", 7);
        colmap.put("category", 8);
    }
    for (int i = 0; i < tables.size(); i++) {
        Element tr = tables.get(i);
        SearchResult sr = new SearchResult();
        if (tr.select("td").get(colmap.get("cover")).select("img").size() > 0) {
            String imgUrl = tr.select("td").get(colmap.get("cover")).select("img").first().attr("src");
            sr.setCover(imgUrl);
        }
        // Media Type
        if (colmap.get("category") != null) {
            String mType = tr.select("td").get(colmap.get("category")).text().trim().replace("\u00a0", "");
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(mType.toLowerCase(Locale.GERMAN))));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(defaulttypes.get(mType.toLowerCase(Locale.GERMAN)));
                }
            } else {
                sr.setType(defaulttypes.get(mType.toLowerCase(Locale.GERMAN)));
            }
        }
        // Title and additional info
        String title;
        String additionalInfo = "";
        if (colmap.get("info") != null) {
            Element info = tr.select("td").get(colmap.get("info"));
            title = info.select("a[title=Details-Info], a[title=Details-Info1]").text().trim();
            String authorIn = info.text().substring(0, info.text().indexOf(title));
            if (authorIn.contains(":")) {
                authorIn = authorIn.replaceFirst("^([^:]*):(.*)$", "$1");
                additionalInfo += " - " + authorIn;
            }
        } else {
            title = tr.select("td").get(colmap.get("title")).text().trim().replace("\u00a0", "");
            if (title.contains("(") && title.indexOf("(") > 0) {
                additionalInfo += title.substring(title.indexOf("("));
                title = title.substring(0, title.indexOf("(") - 1).trim();
            }
            // Author
            if (colmap.containsKey("author")) {
                String author = tr.select("td").get(colmap.get("author")).text().trim().replace("\u00a0", "");
                additionalInfo += " - " + author;
            }
        }
        // Publisher
        if (colmap.containsKey("publisher")) {
            String publisher = tr.select("td").get(colmap.get("publisher")).text().trim().replace("\u00a0", "");
            additionalInfo += " (" + publisher;
        }
        // Year
        if (colmap.containsKey("year")) {
            String year = tr.select("td").get(colmap.get("year")).text().trim().replace("\u00a0", "");
            additionalInfo += ", " + year + ")";
        }
        sr.setInnerhtml("<b>" + title + "</b><br>" + additionalInfo);
        // Status
        String status = tr.select("td").get(colmap.get("returndate")).text().trim().replace("\u00a0", "");
        SimpleDateFormat df = new SimpleDateFormat("dd.MM.yyyy", Locale.GERMAN);
        try {
            df.parse(status);
            // this is a return date
            sr.setStatus(Status.RED);
            sr.setInnerhtml(sr.getInnerhtml() + "<br><i>" + stringProvider.getString(StringProvider.LENT_UNTIL) + " " + status + "</i>");
        } catch (ParseException e) {
            // this is a different status text
            String lc = status.toLowerCase(Locale.GERMAN);
            if ((lc.equals("") || lc.toLowerCase(Locale.GERMAN).contains("onleihe") || lc.contains("verleihbar") || lc.contains("entleihbar") || lc.contains("ausleihbar")) && !lc.contains("nicht")) {
                sr.setStatus(Status.GREEN);
            } else {
                sr.setStatus(Status.YELLOW);
                sr.setInnerhtml(sr.getInnerhtml() + "<br><i>" + status + "</i>");
            }
        }
        // In some libraries (for example search for "atelier" in Preetz)
        // the results are sorted differently than their numbers suggest, so
        // we need to detect the number ("recno") from the link
        String link = tr.select("a[href^=/cgi-bin/di.exe?page=]").attr("href");
        Map<String, String> params = getQueryParamsFirst(link);
        if (params.containsKey("recno")) {
            int recno = Integer.valueOf(params.get("recno"));
            sr.setNr(recno - 1);
        } else {
            // the above should work, but fall back to this if it doesn't
            sr.setNr(10 * (page - 1) + i);
        }
        // In some libraries (for example Preetz) we can detect the media ID
        // here using another link present in the search results
        Elements idLinks = tr.select("a[href^=/cgi-bin/di.exe?cMedNr]");
        if (idLinks.size() > 0) {
            Map<String, String> idParams = getQueryParamsFirst(idLinks.first().attr("href"));
            String id = idParams.get("cMedNr");
            sr.setId(id);
        } else {
            sr.setId(null);
        }
        results.add(sr);
    }
    return new SearchRequestResult(results, results_total, page);
}
Also used : NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) HashMap(java.util.HashMap) Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) JSONException(org.json.JSONException) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult) ParseException(java.text.ParseException) SimpleDateFormat(java.text.SimpleDateFormat)

Example 12 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class SearchResultListFragment method setSearchResult.

public void setSearchResult(final SearchRequestResult searchresult) {
    for (SearchResult result : searchresult.getResults()) {
        result.setPage(searchresult.getPage_index());
    }
    if (searchresult.getTotal_result_count() >= 0) {
        ((AppCompatActivity) getActivity()).getSupportActionBar().setSubtitle(getResources().getQuantityString(R.plurals.result_number, searchresult.getTotal_result_count(), searchresult.getTotal_result_count()));
    }
    if (searchresult.getResults().size() == 0 && searchresult.getTotal_result_count() <= 0) {
        setEmptyText(getString(R.string.no_results));
    }
    this.searchresult = searchresult;
    OpacApi api = null;
    try {
        api = app.getApi();
    } catch (OpacClient.LibraryRemovedException ignored) {
    }
    adapter = new ResultsAdapterEndless(getActivity(), searchresult, new OnLoadMoreListener() {

        @Override
        public SearchRequestResult onLoadMore(int page) throws Exception {
            SearchRequestResult res = app.getApi().searchGetPage(page);
            setLastLoadedPage(page);
            return res;
        }

        @Override
        public void onError(Exception e) {
            if (getActivity() != null) {
                if (e instanceof OpacErrorException) {
                    showConnectivityError(e.getMessage());
                } else if (e instanceof SSLSecurityException) {
                    showConnectivityError(getResources().getString(R.string.connection_error_detail_security));
                } else if (e instanceof NotReachableException) {
                    showConnectivityError(getResources().getString(R.string.connection_error_detail_nre));
                } else {
                    e.printStackTrace();
                    showConnectivityError();
                }
            }
        }

        @Override
        public void updateResultCount(int resultCount) {
            /*
                         * When IOpac finds more than 200 results, the real
						 * result count is not known until the second page is
						 * loaded
						 */
            if (resultCount >= 0 && getActivity() != null) {
                ((AppCompatActivity) getActivity()).getSupportActionBar().setSubtitle(getResources().getQuantityString(R.plurals.result_number, resultCount, resultCount));
            }
        }
    }, api);
    setListAdapter(adapter);
    getListView().setTextFilterEnabled(true);
    setListShown(true);
}
Also used : OnLoadMoreListener(de.geeksfactory.opacclient.frontend.ResultsAdapterEndless.OnLoadMoreListener) OpacClient(de.geeksfactory.opacclient.OpacClient) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) OpacApi(de.geeksfactory.opacclient.apis.OpacApi) SSLSecurityException(de.geeksfactory.opacclient.networking.SSLSecurityException) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) OpacErrorException(de.geeksfactory.opacclient.apis.OpacApi.OpacErrorException) JSONException(org.json.JSONException) OpacErrorException(de.geeksfactory.opacclient.apis.OpacApi.OpacErrorException) IOException(java.io.IOException) SSLSecurityException(de.geeksfactory.opacclient.networking.SSLSecurityException) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException)

Example 13 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class LibraryApiTestCases method scrollTestHelper.

public void scrollTestHelper(String q) throws OpacErrorException, IOException, JSONException {
    List<SearchQuery> query = new ArrayList<>();
    SearchField field = findFreeSearchOrTitle(fields);
    if (field == null) {
        throw new // TODO: prevent this
        OpacErrorException("There is no free or title search field");
    }
    query.add(new SearchQuery(field, q));
    SearchRequestResult res = api.search(query);
    assertTrue(res.getTotal_result_count() == -1 || res.getResults().size() <= res.getTotal_result_count());
    assertTrue(res.getResults().size() > 0);
    SearchResult third;
    if (res.getResults().size() >= 3) {
        third = res.getResults().get(2);
    } else {
        third = res.getResults().get(res.getResults().size() - 1);
    }
    DetailedItem detail;
    if (third.getId() != null) {
        detail = api.getResultById(third.getId(), "");
    } else {
        detail = api.getResult(third.getNr());
    }
    assertNotNull(detail);
    confirmDetail(third, detail);
    if (res.getResults().size() < res.getTotal_result_count()) {
        api.searchGetPage(2);
        SearchResult second = res.getResults().get(1);
        DetailedItem detail2;
        if (second.getId() != null) {
            detail2 = api.getResultById(second.getId(), "");
        } else {
            detail2 = api.getResult(second.getNr());
        }
        confirmDetail(second, detail2);
    }
}
Also used : SearchQuery(de.geeksfactory.opacclient.searchfields.SearchQuery) TextSearchField(de.geeksfactory.opacclient.searchfields.TextSearchField) SearchField(de.geeksfactory.opacclient.searchfields.SearchField) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult) ArrayList(java.util.ArrayList) OpacErrorException(de.geeksfactory.opacclient.apis.OpacApi.OpacErrorException) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem)

Example 14 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class Pica method parse_search.

protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException {
    Document doc = Jsoup.parse(html);
    updateSearchSetValue(doc);
    if (doc.select(".error").size() > 0) {
        String error = doc.select(".error").first().text().trim();
        if (error.equals("Es wurde nichts gefunden.") || error.equals("Nothing has been found") || error.equals("Er is niets gevonden.") || error.equals("Rien n'a été trouvé.")) {
            // nothing found
            return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
        } else {
            // error
            throw new OpacErrorException(error);
        }
    }
    reusehtml = html;
    int results_total;
    String resultnumstr = doc.select(".pages").first().text();
    Pattern p = Pattern.compile("[0-9]+$");
    Matcher m = p.matcher(resultnumstr);
    if (m.find()) {
        resultnumstr = m.group();
    }
    if (resultnumstr.contains("(")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (resultnumstr.contains(": ")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
    } else {
        results_total = Integer.parseInt(resultnumstr);
    }
    List<SearchResult> results = new ArrayList<>();
    if (results_total == 1) {
        // Only one result
        DetailedItem singleResult = parse_result(html);
        SearchResult sr = new SearchResult();
        sr.setType(getMediaTypeInSingleResult(html));
        sr.setInnerhtml("<b>" + singleResult.getTitle() + "</b><br>" + singleResult.getDetails().get(0).getContent());
        results.add(sr);
    }
    Elements table = doc.select("table[summary=hitlist] tbody tr[valign=top]");
    // identifier = null;
    Elements links = doc.select("table[summary=hitlist] a");
    boolean haslink = false;
    for (int i = 0; i < links.size(); i++) {
        Element node = links.get(i);
        if (node.hasAttr("href") & node.attr("href").contains("SHW?") && !haslink) {
            haslink = true;
            try {
                List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(node.attr("href")), getDefaultEncoding());
                for (NameValuePair nv : anyurl) {
                    if (nv.getName().equals("identifier")) {
                        // identifier = nv.getValue();
                        break;
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        if (tr.select("td.hit img").size() > 0) {
            String[] fparts = tr.select("td img").get(0).attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", "")));
                }
            } else {
                sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", "")));
            }
        }
        Element middlething = tr.child(2);
        List<Node> children = middlething.childNodes();
        int childrennum = children.size();
        List<String[]> strings = new ArrayList<>();
        for (int ch = 0; ch < childrennum; ch++) {
            Node node = children.get(ch);
            if (node instanceof TextNode) {
                String text = ((TextNode) node).text().trim();
                if (text.length() > 3) {
                    strings.add(new String[] { "text", "", text });
                }
            } else if (node instanceof Element) {
                List<Node> subchildren = node.childNodes();
                for (int j = 0; j < subchildren.size(); j++) {
                    Node subnode = subchildren.get(j);
                    if (subnode instanceof TextNode) {
                        String text = ((TextNode) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(), "text", text, ((Element) node).className(), node.attr("style") });
                        }
                    } else if (subnode instanceof Element) {
                        String text = ((Element) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(), ((Element) subnode).tag().getName(), text, ((Element) node).className(), node.attr("style") });
                        }
                    }
                }
            }
        }
        StringBuilder description = new StringBuilder();
        int k = 0;
        for (String[] part : strings) {
            if (part[0].equals("a") && k == 0) {
                description.append("<b>").append(part[2]).append("</b>");
            } else if (k < 3) {
                description.append("<br />").append(part[2]);
            }
            k++;
        }
        sr.setInnerhtml(description.toString());
        sr.setNr(10 * (page - 1) + i);
        sr.setId(null);
        results.add(sr);
    }
    resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}
Also used : Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode) Node(org.jsoup.nodes.Node) ArrayList(java.util.ArrayList) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) URI(java.net.URI) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) ArrayList(java.util.ArrayList) List(java.util.List) Pattern(java.util.regex.Pattern) BasicNameValuePair(org.apache.http.message.BasicNameValuePair) NameValuePair(org.apache.http.NameValuePair) JSONException(org.json.JSONException) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) TextNode(org.jsoup.nodes.TextNode) JSONException(org.json.JSONException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult)

Example 15 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class SISIS method parse_search.

public SearchRequestResult parse_search(String html, int page) throws OpacErrorException, SingleResultFound {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url + "/searchfoo");
    if (doc.select(".error").size() > 0) {
        throw new OpacErrorException(doc.select(".error").text().trim());
    } else if (doc.select(".nohits").size() > 0) {
        throw new OpacErrorException(doc.select(".nohits").text().trim());
    } else if (doc.select(".box-header h2, #nohits").text().contains("keine Treffer")) {
        return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
    }
    int results_total = -1;
    String resultnumstr = doc.select(".box-header h2").first().text();
    if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) {
        throw new SingleResultFound();
    } else if (resultnumstr.contains("(")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (resultnumstr.contains(": ")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
    }
    Elements table = doc.select("table.data tbody tr");
    identifier = null;
    Elements links = doc.select("table.data a");
    boolean haslink = false;
    for (int i = 0; i < links.size(); i++) {
        Element node = links.get(i);
        if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) {
            haslink = true;
            try {
                List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(node.attr("href").replace(" ", "%20").replace("&amp;", "&")), ENCODING);
                for (NameValuePair nv : anyurl) {
                    if (nv.getName().equals("identifier")) {
                        identifier = nv.getValue();
                        break;
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        if (tr.select("td img[title]").size() > 0) {
            String title = tr.select("td img").get(0).attr("title");
            String[] fparts = tr.select("td img").get(0).attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            MediaType default_by_fname = defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", ""));
            MediaType default_by_title = defaulttypes.get(title);
            MediaType default_name = default_by_title != null ? default_by_title : default_by_fname;
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(default_name);
                }
            } else {
                sr.setType(default_name);
            }
        }
        String alltext = tr.text();
        if (alltext.contains("eAudio") || alltext.contains("eMusic")) {
            sr.setType(MediaType.MP3);
        } else if (alltext.contains("eVideo")) {
            sr.setType(MediaType.EVIDEO);
        } else if (alltext.contains("eBook")) {
            sr.setType(MediaType.EBOOK);
        } else if (alltext.contains("Munzinger")) {
            sr.setType(MediaType.EDOC);
        }
        if (tr.children().size() > 3 && tr.child(3).select("img[title*=cover]").size() == 1) {
            sr.setCover(tr.child(3).select("img[title*=cover]").attr("abs:src"));
            if (sr.getCover().contains("showCover.do")) {
                downloadCover(sr);
            }
        }
        Element middlething;
        if (tr.children().size() > 2 && tr.child(2).select("a").size() > 0) {
            middlething = tr.child(2);
        } else {
            middlething = tr.child(1);
        }
        List<Node> children = middlething.childNodes();
        if (middlething.select("div").not("#hlrightblock,.bestellfunktionen").size() == 1) {
            Element indiv = middlething.select("div").not("#hlrightblock,.bestellfunktionen").first();
            if (indiv.select("a").size() > 0 && indiv.children().size() > 1) {
                children = indiv.childNodes();
            }
        } else if (middlething.select("span.titleData").size() == 1) {
            children = middlething.select("span.titleData").first().childNodes();
        }
        int childrennum = children.size();
        List<String[]> strings = new ArrayList<>();
        for (int ch = 0; ch < childrennum; ch++) {
            Node node = children.get(ch);
            if (node instanceof TextNode) {
                String text = ((TextNode) node).text().trim();
                if (text.length() > 3) {
                    strings.add(new String[] { "text", "", text });
                }
            } else if (node instanceof Element) {
                List<Node> subchildren = node.childNodes();
                for (int j = 0; j < subchildren.size(); j++) {
                    Node subnode = subchildren.get(j);
                    if (subnode instanceof TextNode) {
                        String text = ((TextNode) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(), "text", text, ((Element) node).className(), node.attr("style") });
                        }
                    } else if (subnode instanceof Element) {
                        String text = ((Element) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(), ((Element) subnode).tag().getName(), text, ((Element) node).className(), node.attr("style") });
                        }
                    }
                }
            }
        }
        StringBuilder description = null;
        if (tr.select("span.Z3988").size() == 1) {
            // Sometimes there is a <span class="Z3988"> item which provides
            // data in a standardized format.
            List<NameValuePair> z3988data;
            boolean hastitle = false;
            try {
                description = new StringBuilder();
                z3988data = URLEncodedUtils.parse(new URI("http://dummy/?" + tr.select("span.Z3988").attr("title")), "UTF-8");
                for (NameValuePair nv : z3988data) {
                    if (nv.getValue() != null) {
                        if (!nv.getValue().trim().equals("")) {
                            if (nv.getName().equals("rft.btitle") && !hastitle) {
                                description.append("<b>").append(nv.getValue()).append("</b>");
                                hastitle = true;
                            } else if (nv.getName().equals("rft.atitle") && !hastitle) {
                                description.append("<b>").append(nv.getValue()).append("</b>");
                                hastitle = true;
                            } else if (nv.getName().equals("rft.au")) {
                                description.append("<br />").append(nv.getValue());
                            } else if (nv.getName().equals("rft.date")) {
                                description.append("<br />").append(nv.getValue());
                            }
                        }
                    }
                }
            } catch (URISyntaxException e) {
                description = null;
            }
        }
        boolean described = false;
        if (description != null && description.length() > 0) {
            sr.setInnerhtml(description.toString());
            described = true;
        } else {
            description = new StringBuilder();
        }
        int k = 0;
        boolean yearfound = false;
        boolean titlefound = false;
        boolean sigfound = false;
        for (String[] part : strings) {
            if (!described) {
                if (part[0].equals("a") && (k == 0 || !titlefound)) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append("<b>").append(part[2]).append("</b>");
                    titlefound = true;
                } else if (part[2].matches("\\D*[0-9]{4}\\D*") && part[2].length() <= 10) {
                    yearfound = true;
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k == 1 && !yearfound) {
                    description.append("<br />");
                    description.append(part[2]);
                } else if (k > 1 && k < 4 && !sigfound && part[0].equals("text") && part[2].matches("^[A-Za-z0-9,\\- ]+$")) {
                    description.append("<br />");
                    description.append(part[2]);
                }
            }
            if (part.length == 4) {
                if (part[0].equals("span") && part[3].equals("textgruen")) {
                    sr.setStatus(SearchResult.Status.GREEN);
                } else if (part[0].equals("span") && part[3].equals("textrot")) {
                    sr.setStatus(SearchResult.Status.RED);
                }
            } else if (part.length == 5) {
                if (part[4].contains("purple")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                }
            }
            if (sr.getStatus() == null) {
                if ((part[2].contains("entliehen") && part[2].startsWith("Vormerkung ist leider nicht möglich")) || part[2].contains("Alle Exemplare des gewählten Titels sind entliehen") || part[2].contains("nur in anderer Zweigstelle ausleihbar und nicht bestellbar")) {
                    sr.setStatus(SearchResult.Status.RED);
                } else if (part[2].startsWith("entliehen") || part[2].contains("Ein Exemplar finden Sie in einer anderen Zweigstelle")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                } else if ((part[2].startsWith("bestellbar") && !part[2].contains("nicht bestellbar")) || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar")) || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar")) || (part[2].startsWith("vormerkbar") && !part[2].contains("nicht vormerkbar")) || (part[2].contains("heute zurückgebucht")) || (part[2].contains("ausleihbar") && !part[2].contains("nicht ausleihbar"))) {
                    sr.setStatus(SearchResult.Status.GREEN);
                }
                if (sr.getType() != null) {
                    if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO) || sr.getType().equals(MediaType.MP3)) // Especially Onleihe.de ebooks are often marked
                    // green though they are not available.
                    {
                        sr.setStatus(SearchResult.Status.UNKNOWN);
                    }
                }
            }
            k++;
        }
        if (!described) {
            sr.setInnerhtml(description.toString());
        }
        sr.setNr(10 * (page - 1) + i);
        sr.setId(null);
        results.add(sr);
    }
    resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}
Also used : Element(org.jsoup.nodes.Element) Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) ArrayList(java.util.ArrayList) URISyntaxException(java.net.URISyntaxException) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) URI(java.net.URI) MediaType(de.geeksfactory.opacclient.objects.SearchResult.MediaType) List(java.util.List) ArrayList(java.util.ArrayList) NameValuePair(org.apache.http.NameValuePair) BasicNameValuePair(org.apache.http.message.BasicNameValuePair) JSONException(org.json.JSONException) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) TextNode(org.jsoup.nodes.TextNode) URISyntaxException(java.net.URISyntaxException) JSONException(org.json.JSONException) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) ClientProtocolException(org.apache.http.client.ClientProtocolException) IOException(java.io.IOException) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult)

Aggregations

SearchResult (de.geeksfactory.opacclient.objects.SearchResult)23 SearchRequestResult (de.geeksfactory.opacclient.objects.SearchRequestResult)21 ArrayList (java.util.ArrayList)17 Element (org.jsoup.nodes.Element)16 Document (org.jsoup.nodes.Document)12 Elements (org.jsoup.select.Elements)12 JSONException (org.json.JSONException)11 Matcher (java.util.regex.Matcher)9 NotReachableException (de.geeksfactory.opacclient.networking.NotReachableException)8 IOException (java.io.IOException)8 Pattern (java.util.regex.Pattern)8 URISyntaxException (java.net.URISyntaxException)5 DetailedItem (de.geeksfactory.opacclient.objects.DetailedItem)4 MalformedURLException (java.net.MalformedURLException)4 URI (java.net.URI)4 HashMap (java.util.HashMap)4 NameValuePair (org.apache.http.NameValuePair)4 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)4 SearchQuery (de.geeksfactory.opacclient.searchfields.SearchQuery)3 TextSearchField (de.geeksfactory.opacclient.searchfields.TextSearchField)3