Search in sources :

Example 6 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class Zones method parse_search.

private SearchRequestResult parse_search(String html, int page) throws OpacErrorException {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url + "/APS_PRESENT_BIB");
    if (doc.select("#ErrorAdviceRow").size() > 0) {
        throw new OpacErrorException(doc.select("#ErrorAdviceRow").text().trim());
    }
    int results_total = -1;
    String searchHitsQuery = version18 ? "td:containsOwn(Total)" : ".searchHits";
    if (doc.select(searchHitsQuery).size() > 0) {
        results_total = Integer.parseInt(doc.select(searchHitsQuery).first().text().trim().replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (doc.select("span:matches(\\[\\d+/\\d+\\])").size() > 0) {
        // Zones 1.8 - searchGetPage
        String text = doc.select("span:matches(\\[\\d+/\\d+\\])").text();
        Pattern pattern = Pattern.compile("\\[\\d+/(\\d+)\\]");
        Matcher matcher = pattern.matcher(text);
        if (matcher.find()) {
            results_total = Integer.parseInt(matcher.group(1));
        }
    }
    if (doc.select(".pageNavLink").size() > 0) {
        // Zones 2.2
        searchobj = doc.select(".pageNavLink").first().attr("href").split("\\?")[0];
    } else if (doc.select("div[targetObject]").size() > 0) {
        // Zones 1.8 - search
        searchobj = doc.select("div[targetObject]").attr("targetObject").split("\\?")[0];
    } else {
        // Zones 1.8 - searchGetPage
        // The page contains a data structure that at first glance seems to be JSON, but uses
        // "=" instead of ":". So we parse it using regex...
        Pattern pattern = Pattern.compile("targetObject = \"([^\\?]+)[^\"]+\"");
        Matcher matcher = pattern.matcher(doc.html());
        if (matcher.find()) {
            searchobj = matcher.group(1);
        }
    }
    Elements table = doc.select(// Zones 2.2
    "#BrowseList > tbody > tr," + // Zones 1.8
    " .inRoundBox1");
    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        String typetext;
        if (version18) {
            String[] parts = tr.select("img[src^=IMG/MAT]").attr("src").split("/");
            typetext = parts[parts.length - 1].replace(".gif", "");
        } else {
            typetext = tr.select(".SummaryMaterialTypeField").text().replace("\n", " ").trim();
        }
        if (data.has("mediatypes")) {
            try {
                sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(typetext)));
            } catch (JSONException | IllegalArgumentException e) {
                sr.setType(defaulttypes.get(typetext));
            }
        } else {
            sr.setType(defaulttypes.get(typetext));
        }
        String imgUrl = null;
        if (version18) {
            if (tr.select("a[title=Titelbild]").size() > 0) {
                imgUrl = tr.select("a[title=Titelbild]").attr("href");
            } else if (tr.select("img[width=50]").size() > 0) {
                // TODO: better way to select these cover images? (found in Hannover)
                imgUrl = tr.select("img[width=50]").attr("src");
            }
        } else {
            if (tr.select(".SummaryImageCell img[id^=Bookcover]").size() > 0) {
                imgUrl = tr.select(".SummaryImageCell img[id^=Bookcover]").first().attr("src");
            }
        }
        sr.setCover(imgUrl);
        if (version18) {
            if (tr.select("img[src$=oci_1.gif]").size() > 0) {
                // probably can only appear when searching the catalog on a terminal in
                // the library.
                sr.setStatus(SearchResult.Status.GREEN);
            } else if (tr.select("img[src$=blob_amber.gif]").size() > 0) {
                sr.setStatus(SearchResult.Status.YELLOW);
            }
        }
        String desc = "";
        String childrenQuery = version18 ? "table[cellpadding=1] tr" : ".SummaryDataCell tr, .SummaryDataCellStripe tr";
        Elements children = tr.select(childrenQuery);
        int childrennum = children.size();
        boolean haslink = false;
        for (int ch = 0; ch < childrennum; ch++) {
            Element node = children.get(ch);
            if (getName(node).equals("Titel")) {
                desc += "<b>" + getValue(node).trim() + "</b><br />";
            } else if (getName(node).equals("Verfasser") || getName(node).equals("Jahr")) {
                desc += getValue(node).trim() + "<br />";
            }
            String linkSelector = version18 ? "a[href*=ShowStock], a[href*=APS_CAT_IDENTIFY]" : ".SummaryFieldData a.SummaryFieldLink";
            if (node.select(linkSelector).size() > 0 && !haslink) {
                String href = node.select(linkSelector).attr("abs:href");
                Map<String, String> hrefq = getQueryParamsFirst(href);
                if (hrefq.containsKey("no")) {
                    sr.setId(hrefq.get("no"));
                } else if (hrefq.containsKey("Key")) {
                    sr.setId(hrefq.get("Key"));
                }
                haslink = true;
            }
        }
        if (desc.endsWith("<br />")) {
            desc = desc.substring(0, desc.length() - 6);
        }
        sr.setInnerhtml(desc);
        sr.setNr(i);
        results.add(sr);
    }
    return new SearchRequestResult(results, results_total, page);
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) JSONException(org.json.JSONException) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult)

Example 7 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class Littera method executeSearch.

protected SearchRequestResult executeSearch(List<SearchQuery> query, int pageIndex) throws IOException, OpacErrorException, JSONException {
    final String searchUrl;
    if (!initialised) {
        start();
    }
    try {
        searchUrl = buildSearchUrl(query, pageIndex);
    } catch (URISyntaxException e) {
        throw new RuntimeException(e);
    }
    final String html = httpGet(searchUrl, getDefaultEncoding());
    final Document doc = Jsoup.parse(html);
    final Element navigation = doc.select(".result_view .navigation").first();
    final int totalResults = navigation != null ? parseTotalResults(navigation.text()) : 0;
    final Element ul = doc.select(".result_view ul.list").first();
    final List<SearchResult> results = new ArrayList<>();
    for (final Element li : ul.children()) {
        if (li.hasClass("zugangsmonat")) {
            continue;
        }
        final SearchResult result = new SearchResult();
        final Element title = li.select(".titelinfo a").first();
        result.setId(getQueryParamsFirst(title.attr("href")).get("id"));
        result.setInnerhtml(title.text() + "<br>" + title.parent().nextElementSibling().text());
        result.setNr(results.size());
        result.setPage(pageIndex);
        result.setType(MEDIA_TYPES.get(li.select(".statusinfo .ma").text()));
        result.setCover(getCover(li));
        final String statusImg = li.select(".status img").attr("src");
        result.setStatus(statusImg.contains("-yes") ? SearchResult.Status.GREEN : statusImg.contains("-no") ? SearchResult.Status.RED : null);
        results.add(result);
    }
    return new SearchRequestResult(results, totalResults, pageIndex);
}
Also used : SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult) Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) URISyntaxException(java.net.URISyntaxException) Document(org.jsoup.nodes.Document)

Example 8 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class SRU method parse_result.

private SearchRequestResult parse_result(String xml) throws OpacErrorException {
    searchDoc = Jsoup.parse(xml, "", Parser.xmlParser());
    if (searchDoc.select("diag|diagnostic").size() > 0) {
        throw new OpacErrorException(searchDoc.select("diag|message").text());
    }
    int resultcount;
    List<SearchResult> results = new ArrayList<>();
    resultcount = Integer.valueOf(searchDoc.select("zs|numberOfRecords").text());
    Elements records = searchDoc.select("zs|records > zs|record");
    int i = 0;
    for (Element record : records) {
        SearchResult sr = new SearchResult();
        String title = getDetail(record, "titleInfo title");
        String firstName = getDetail(record, "name > namePart[type=given]");
        String lastName = getDetail(record, "name > namePart[type=family]");
        String year = getDetail(record, "dateIssued");
        String mType = getDetail(record, "physicalDescription > form");
        String isbn = getDetail(record, "identifier[type=isbn]");
        String coverUrl = getDetail(record, "url[displayLabel=C Cover]");
        String additionalInfo = firstName + " " + lastName + ", " + year;
        sr.setInnerhtml("<b>" + title + "</b><br>" + additionalInfo);
        sr.setType(defaulttypes.get(mType));
        sr.setNr(i);
        sr.setId(getDetail(record, "recordIdentifier"));
        if (coverUrl.equals("")) {
            sr.setCover(ISBNTools.getAmazonCoverURL(isbn, false));
        } else {
            sr.setCover(coverUrl);
        }
        results.add(sr);
        i++;
    }
    return new SearchRequestResult(results, resultcount, 1);
}
Also used : SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult) Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) Elements(org.jsoup.select.Elements)

Example 9 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class TestApi method makeSearchResult.

private void makeSearchResult(String name, String url, boolean reservable) {
    SearchResult res = new SearchResult();
    res.setNr(list.size());
    res.setInnerhtml("<b>" + name + "</b><br/>Lorem ipsum <i>dolor</i> sit amet.");
    res.setCover(url);
    res.setType(SearchResult.MediaType.BOOK);
    list.add(res);
    DetailedItem item = new DetailedItem();
    item.setTitle(name);
    item.setReservable(reservable);
    item.setCover(url);
    item.addDetail(new Detail("Autor", "Max Mustermann"));
    item.addDetail(new Detail("Beschreibung", "Weit hinten, hinter den Wortbergen, " + "fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden " + "wohnen sie in Buchstabhausen an der Küste des Semantik, " + "eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren " + "Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches " + "Land, in dem einem gebratene Satzteile in den Mund fliegen.\n" + "\n" + "Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht" + " – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine " + "Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite " + "Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen " + "Kommata, wilden Fragezeichen und hinterhältigen Semikoli, " + "doch das Blindtextchen ließ sich nicht beirren.\n" + "Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und " + "machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen " + "hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt " + "Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen " + "Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die " + "Wange, dann setzte es seinen Weg fort.\n" + "\n" + "Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, " + "wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, " + "was von ihrem Ursprung noch übrig wäre, sei das Wort \"und\" und das " + "Blindtextchen solle umkehren und wieder in sein eigenes, " + "sicheres Land zurückkehren.\n" + "\n" + "Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, " + "bis ihm ein paar heimtückische Werbetexter auflauerten, " + "es mit Longe und Parole betrunken machten und es dann in ihre Agentur " + "schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn" + " es nicht umgeschrieben wurde, dann benutzen Sie es immernoch." + ""));
    detailList.add(item);
}
Also used : SearchResult(de.geeksfactory.opacclient.objects.SearchResult) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) Detail(de.geeksfactory.opacclient.objects.Detail)

Example 10 with SearchResult

use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.

the class BiBer1992 method parse_search.

/*
     * result table format: JSON "rows_per_hit" = 1: One <tr> per hit JSON
     * "rows_per_hit" = 2: Two <tr> per hit (default) <form> <table> <tr
     * valign="top"> <td class="td3" ...><a href=...><img ...></a></td> (row is
     * optional, only in some bibs) <td class="td2" ...><input ...></td> <td
     * width="34%">TITEL</td> <td width="34%">&nbsp;</td> <td width="6%"
     * align="center">2009</td> <td width="*" align="left">DVD0 Seew</td> </tr>
     * <tr valign="top"> <td class="td3" ...>&nbsp;...</td> <td class="td2"
     * ...>&nbsp;...</td> <td colspan="4" ...><font size="-1"><font
     * class="p1">Erwachsenenbibliothek</font></font><div
     * class="hr4"></div></td> </tr>
     */
private SearchRequestResult parse_search(String html, int page) {
    List<SearchResult> results = new ArrayList<>();
    Document doc = Jsoup.parse(html);
    if (doc.select("h3").text().contains("Es wurde nichts gefunden")) {
        return new SearchRequestResult(results, 0, page);
    }
    // <tr
    Elements trList = doc.select("form table tr[valign]");
    // valign="top">
    if (trList.size() == 0) {
        // Schwieberdingen
        trList = doc.select("table:has(input[type=checkbox]) tr");
    }
    Elements elem;
    int rows_per_hit = 2;
    if (trList.size() == 1 || (trList.size() > 1 && trList.get(0).select("input[type=checkbox]").size() > 0 && trList.get(1).select("input[type=checkbox]").size() > 0)) {
        rows_per_hit = 1;
    }
    try {
        rows_per_hit = data.getInt("rows_per_hit");
    } catch (JSONException e) {
    }
    // Overall search results
    // are very differently layouted, but have always the text:
    // "....Treffer Gesamt (nnn)"
    int results_total;
    Pattern pattern = Pattern.compile("Treffer Gesamt \\(([0-9]+)\\)");
    Matcher matcher = pattern.matcher(html);
    if (matcher.find()) {
        results_total = Integer.parseInt(matcher.group(1));
    } else {
        results_total = -1;
    }
    // limit to 20 entries
    // two rows per entry
    int numOfEntries = trList.size() / rows_per_hit;
    if (numOfEntries > numOfResultsPerPage) {
        numOfEntries = numOfResultsPerPage;
    }
    for (int i = 0; i < numOfEntries; i++) {
        Element tr = trList.get(i * rows_per_hit);
        SearchResult sr = new SearchResult();
        // ID as href tag
        elem = tr.select("td a");
        if (elem.size() > 0 && !elem.get(0).attr("href").contains("ISBN")) {
            // Exclude the cover links in Ludwigsburg as they lead to a page that misses the
            // reservation button
            String hrefID = elem.get(0).attr("href");
            sr.setId(hrefID);
        } else {
            // no ID as href found, look for the ID in the input form
            elem = tr.select("td input");
            if (elem.size() > 0) {
                String nameID = elem.get(0).attr("name").trim();
                String hrefID = "/" + opacDir + "/ftitle" + opacSuffix + "?LANG=de&FUNC=full&" + nameID + "=YES";
                sr.setId(hrefID);
            }
        }
        // media type
        elem = tr.select("td img");
        if (elem.size() > 0) {
            sr.setType(getMediaTypeFromImageFilename(sr, elem.get(0).attr("src"), data));
        }
        // description
        String desc = "";
        try {
            // array "searchtable" list the column numbers of the
            // description
            JSONArray searchtable = data.getJSONArray("searchtable");
            for (int j = 0; j < searchtable.length(); j++) {
                int colNum = searchtable.getInt(j);
                if (j > 0) {
                    desc = desc + "<br />";
                }
                String c = tr.child(colNum).html();
                if (tr.child(colNum).childNodes().size() == 1 && tr.child(colNum).select("a[href*=ftitle.]").size() > 0) {
                    c = tr.select("a[href*=ftitle.]").text();
                }
                desc = desc + c;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        // remove links "<a ...>...</a>
        // needed for Friedrichshafen: "Warenkorb", "Vormerkung"
        // Herford: "Medienkorb"
        desc = desc.replaceAll("<a .*?</a>", "");
        // remove newlines (useless in HTML)
        desc = desc.replaceAll("\\n", "");
        // remove hidden divs ("Titel übernommen!" in Wuerzburg)
        desc = desc.replaceAll("<div[^>]*style=\"display:none\">.*</div>", "");
        // remove all invalid HTML tags
        desc = desc.replaceAll("</?(tr|td|font|table|tbody|div)[^>]*>", "");
        // replace multiple line breaks by one
        desc = desc.replaceAll("(<br( /)?>\\s*)+", "<br>");
        sr.setInnerhtml(desc);
        if (tr.select("font.p04x09b").size() > 0 && tr.select("font.p02x09b").size() == 0) {
            sr.setStatus(Status.GREEN);
        } else if (tr.select("font.p04x09b").size() == 0 && tr.select("font.p02x09b").size() > 0) {
            sr.setStatus(Status.RED);
        } else if (tr.select("font.p04x09b").size() > 0 && tr.select("font.p02x09b").size() > 0) {
            sr.setStatus(Status.YELLOW);
        }
        // number
        sr.setNr(i / rows_per_hit);
        results.add(sr);
    }
    // m_resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) JSONArray(org.json.JSONArray) JSONException(org.json.JSONException) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) JSONException(org.json.JSONException) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) IOException(java.io.IOException) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult)

Aggregations

SearchResult (de.geeksfactory.opacclient.objects.SearchResult)23 SearchRequestResult (de.geeksfactory.opacclient.objects.SearchRequestResult)21 ArrayList (java.util.ArrayList)17 Element (org.jsoup.nodes.Element)16 Document (org.jsoup.nodes.Document)12 Elements (org.jsoup.select.Elements)12 JSONException (org.json.JSONException)11 Matcher (java.util.regex.Matcher)9 NotReachableException (de.geeksfactory.opacclient.networking.NotReachableException)8 IOException (java.io.IOException)8 Pattern (java.util.regex.Pattern)8 URISyntaxException (java.net.URISyntaxException)5 DetailedItem (de.geeksfactory.opacclient.objects.DetailedItem)4 MalformedURLException (java.net.MalformedURLException)4 URI (java.net.URI)4 HashMap (java.util.HashMap)4 NameValuePair (org.apache.http.NameValuePair)4 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)4 SearchQuery (de.geeksfactory.opacclient.searchfields.SearchQuery)3 TextSearchField (de.geeksfactory.opacclient.searchfields.TextSearchField)3