Search in sources :

Example 16 with Detail

use of de.geeksfactory.opacclient.objects.Detail in project opacclient by opacapp.

the class SISISSearchTest method testParseDetail.

@Test
public void testParseDetail() throws OpacApi.OpacErrorException, JSONException, IOException {
    String html1 = readResource("/sisis/result_detail/" + file.replace(".html", "_1.html"));
    String html2 = readResource("/sisis/result_detail/" + file.replace(".html", "_2.html"));
    String html3 = readResource("/sisis/result_detail/" + file.replace(".html", "_3.html"));
    String coverJs = readResource("/sisis/result_detail/" + file.replace(".html", ".js"));
    if (html1 == null || html2 == null || html3 == null) {
        // we may not have all files for all libraries
        return;
    }
    DetailedItem result = SISIS.parseDetail(html1, html2, html3, coverJs, new JSONObject(), new DummyStringProvider());
    assertTrue(result.getCopies().size() > 0);
    for (Copy copy : result.getCopies()) {
        assertContainsData(copy.getStatus());
        assertNullOrNotEmpty(copy.getBarcode());
        assertNullOrNotEmpty(copy.getBranch());
        assertNullOrNotEmpty(copy.getDepartment());
        assertNullOrNotEmpty(copy.getLocation());
        assertNullOrNotEmpty(copy.getReservations());
        assertNullOrNotEmpty(copy.getShelfmark());
        assertNullOrNotEmpty(copy.getUrl());
        if (copy.getStatus().equals("Entliehen"))
            assertNotNull(copy.getReturnDate());
    }
    for (Volume volume : result.getVolumes()) {
        assertContainsData(volume.getId());
        assertContainsData(volume.getTitle());
    }
    assertEquals(result.getTitle(), getDetailTitle(file));
    if (file.equals("berlin_htw.html")) {
        assertTrue(result.getDetails().contains(new Detail("Signatur:", "15/2322")));
        assertNotNull(result.getCover());
    }
}
Also used : DummyStringProvider(de.geeksfactory.opacclient.i18n.DummyStringProvider) JSONObject(org.json.JSONObject) Copy(de.geeksfactory.opacclient.objects.Copy) Volume(de.geeksfactory.opacclient.objects.Volume) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) Detail(de.geeksfactory.opacclient.objects.Detail) Test(org.junit.Test)

Example 17 with Detail

use of de.geeksfactory.opacclient.objects.Detail in project opacclient by opacapp.

the class Open method parse_result.

protected DetailedItem parse_result(Document doc) {
    DetailedItem item = new DetailedItem();
    // Title and Subtitle
    item.setTitle(doc.select("span[id$=LblShortDescriptionValue], span[id$=LblTitleValue]").text());
    String subtitle = doc.select("span[id$=LblSubTitleValue]").text();
    if (subtitle.equals("") && doc.select("span[id$=LblShortDescriptionValue]").size() > 0) {
        // Subtitle detection for Bern
        Element next = doc.select("span[id$=LblShortDescriptionValue]").first().parent().nextElementSibling();
        if (next.select("span").size() == 0) {
            subtitle = next.text().trim();
        }
    }
    if (!subtitle.equals("")) {
        item.addDetail(new Detail(stringProvider.getString(StringProvider.SUBTITLE), subtitle));
    }
    // Cover
    if (doc.select("input[id$=mediumImage]").size() > 0) {
        item.setCover(doc.select("input[id$=mediumImage]").attr("src"));
    } else if (doc.select("img[id$=CoverView_Image]").size() > 0) {
        assignBestCover(item, getCoverUrlList(doc.select("img[id$=CoverView_Image]").first()));
    }
    // ID
    item.setId(doc.select("input[id$=regionmednr]").val());
    // Description
    if (doc.select("span[id$=ucCatalogueContent_LblAnnotation]").size() > 0) {
        String name = doc.select("span[id$=lblCatalogueContent]").text();
        String value = doc.select("span[id$=ucCatalogueContent_LblAnnotation]").text();
        item.addDetail(new Detail(name, value));
    }
    // Parent
    if (doc.select("a[id$=HyperLinkParent]").size() > 0) {
        item.setCollectionId(doc.select("a[id$=HyperLinkParent]").first().attr("href"));
    }
    // Details
    String DETAIL_SELECTOR = "div[id$=CatalogueDetailView] .spacingBottomSmall:has(span+span)," + "div[id$=CatalogueDetailView] .spacingBottomSmall:has(span+a), " + "div[id$=CatalogueDetailView] .oclc-searchmodule-detail-data div:has" + "(span+span), " + "div[id$=CatalogueDetailView] .oclc-searchmodule-detail-data div:has" + "(span+a)";
    for (Element detail : doc.select(DETAIL_SELECTOR)) {
        String name = detail.select("span").get(0).text().replace(": ", "");
        String value = "";
        if (detail.select("a").size() > 1) {
            int i = 0;
            for (Element a : detail.select("a")) {
                if (i != 0) {
                    value += ", ";
                }
                value += a.text().trim();
                i++;
            }
        } else {
            value = detail.select("span, a").get(1).text();
            if (value.contains("hier klicken") && detail.select("a").size() > 0) {
                value = value + " " + detail.select("a").first().attr("href");
            }
        }
        item.addDetail(new Detail(name, value));
    }
    // Description
    if (doc.select("div[id$=CatalogueContent]").size() > 0) {
        String name = doc.select("div[id$=CatalogueContent] .oclc-module-header").text();
        String value = doc.select("div[id$=CatalogueContent] .oclc-searchmodule-detail-annotation").text();
        item.addDetail(new Detail(name, value));
    }
    // Copies
    Element table = doc.select("table[id$=grdViewMediumCopies]").first();
    if (table != null) {
        Elements trs = table.select("tr");
        List<String> columnmap = new ArrayList<>();
        for (Element th : trs.first().select("th")) {
            columnmap.add(getCopyColumnKey(th.text()));
        }
        DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
        for (int i = 1; i < trs.size(); i++) {
            Elements tds = trs.get(i).select("td");
            Copy copy = new Copy();
            for (int j = 0; j < tds.size(); j++) {
                if (columnmap.get(j) == null)
                    continue;
                String text = tds.get(j).text().replace("\u00a0", "");
                if (tds.get(j).select(".oclc-module-label").size() > 0 && tds.get(j).select("span").size() == 2) {
                    text = tds.get(j).select("span").get(1).text();
                }
                if (text.equals(""))
                    continue;
                copy.set(columnmap.get(j), text, fmt);
            }
            item.addCopy(copy);
        }
    }
    // Dependent (e.g. Verden)
    if (doc.select("div[id$=DivDependentCatalogue]").size() > 0) {
        String url = opac_url + "/DesktopModules/OCLC.OPEN.PL.DNN.SearchModule/SearchService.asmx/GetDependantCatalogues";
        JSONObject postData = new JSONObject();
        // Determine portalID value
        int portalId = 1;
        for (Element scripttag : doc.select("script")) {
            String scr = scripttag.html();
            if (scr.contains("LoadCatalogueViewDependantCataloguesAsync")) {
                Pattern portalIdPattern = Pattern.compile(".*LoadCatalogueViewDependantCataloguesAsync\\([^,]*,[^,]*," + "[^,]*,[^,]*,[^,]*,[^0-9,]*([0-9]+)[^0-9,]*,.*\\).*");
                Matcher portalIdMatcher = portalIdPattern.matcher(scr);
                if (portalIdMatcher.find()) {
                    portalId = Integer.parseInt(portalIdMatcher.group(1));
                }
            }
        }
        try {
            postData.put("portalId", portalId).put("mednr", item.getId()).put("tabUrl", opac_url + "/" + data.getJSONObject("urls").getString("simple_search") + NO_MOBILE + "&id=").put("branchFilter", "");
            RequestBody entity = RequestBody.create(MEDIA_TYPE_JSON, postData.toString());
            String json = httpPost(url, entity, getDefaultEncoding());
            JSONObject volumeData = new JSONObject(json);
            JSONArray cat = volumeData.getJSONObject("d").getJSONArray("Catalogues");
            for (int i = 0; i < cat.length(); i++) {
                JSONObject obj = cat.getJSONObject(i);
                Map<String, String> params = getQueryParamsFirst(obj.getString("DependantUrl"));
                item.addVolume(new Volume(params.get("id"), obj.getString("DependantTitle")));
            }
        } catch (JSONException | IOException e) {
            e.printStackTrace();
        }
    }
    return item;
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) FormElement(org.jsoup.nodes.FormElement) ArrayList(java.util.ArrayList) JSONArray(org.json.JSONArray) JSONException(org.json.JSONException) IOException(java.io.IOException) Elements(org.jsoup.select.Elements) JSONObject(org.json.JSONObject) Copy(de.geeksfactory.opacclient.objects.Copy) Volume(de.geeksfactory.opacclient.objects.Volume) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) Detail(de.geeksfactory.opacclient.objects.Detail) RequestBody(okhttp3.RequestBody)

Example 18 with Detail

use of de.geeksfactory.opacclient.objects.Detail in project opacclient by opacapp.

the class Primo method parse_detail.

protected DetailedItem parse_detail(String id, Document doc) throws OpacErrorException, IOException {
    DetailedItem res = new DetailedItem();
    res.setId(id);
    res.setTitle(doc.select(".EXLResultTitle").text());
    for (Element detrow : doc.select(".EXLDetailsContent li")) {
        String title = null;
        String value = "";
        for (Node node : detrow.childNodes()) {
            if (node instanceof Element && (((Element) node).tagName().equals("strong") || ((Element) node).hasClass("bib-EXLDetailsContent-item-title"))) {
                title = ((Element) node).text();
            } else if (node instanceof Element && title != null) {
                value += ((Element) node).text();
            } else if (node instanceof TextNode && title != null) {
                value += ((TextNode) node).text();
            }
        }
        if (title != null) {
            res.addDetail(new Detail(title, value.trim()));
        }
    }
    String html2 = httpGet(opac_url + "/action/display.do?ct=display&fn=search&vid=" + vid + "&doc=" + id + "&tabs=locationsTab", getDefaultEncoding());
    Document doc2 = Jsoup.parse(html2);
    if (doc2.select(".EXLLocationTitlesRow").size() > 0) {
        Map<Integer, String> copymap = new HashMap<>();
        int i = 0;
        for (Element th : doc2.select(".EXLLocationTitlesRow th")) {
            String title = th.text().toLowerCase(Locale.GERMAN).trim();
            if (title.contains("library") || title.contains("bibliothek") || title.contains("branch")) {
                copymap.put(i, "branch");
            } else if (title.contains("location") || title.contains("ort")) {
                copymap.put(i, "location");
            } else if (title.contains("call number") || title.contains("signatur")) {
                copymap.put(i, "signature");
            } else if (title.contains("due date") || title.contains("llig am") || title.contains("ausgeliehen bis") || title.contains("lligkeit") || title.contains("ausleihstatus")) {
                copymap.put(i, "returndate");
            } else if (title.contains("loan to") || title.contains("bezugsmodalit") || title.contains("ausleihm") || title.contains("status")) {
                copymap.put(i, "status");
            } else if (title.contains("queue") || title.contains("vormerker")) {
                copymap.put(i, "reservations");
            }
            i++;
        }
        DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
        DateTimeFormatter fmt2 = DateTimeFormat.forPattern("dd/MM/yyyy").withLocale(Locale.GERMAN);
        for (Element tr : doc2.select(".EXLLocationTable tr:not(.EXLLocationTitlesRow):not(" + ".EXLAdditionalFieldsRow)")) {
            int j = 0;
            Copy copy = new Copy();
            for (Element td : tr.children()) {
                String value = td.text().replace("\u00a0", " ").trim();
                if (copymap.containsKey(j) && !value.equals("")) {
                    try {
                        copy.set(copymap.get(j), value, fmt);
                    } catch (IllegalArgumentException e) {
                        try {
                            copy.set(copymap.get(j), value, fmt2);
                        } catch (IllegalArgumentException e2) {
                            e2.printStackTrace();
                        }
                    }
                }
                j++;
            }
            res.addCopy(copy);
        }
    } else if (doc2.select(".EXLLocationList").size() > 0) {
        // e.g. University of South Wales
        for (Element row : doc2.select(".EXLLocationList")) {
            Copy copy = new Copy();
            copy.setBranch(row.select(".EXLLocationsTitle").text());
            copy.setDepartment(row.select(".EXLLocationInfo strong").text());
            copy.setShelfmark(row.select(".EXLLocationInfo cite").text());
            copy.setStatus(row.select(".EXLLocationInfo em").text());
            res.addCopy(copy);
        }
    }
    if (res.getCopies().size() == 0) {
        // Online-Medium?
        String html3 = httpGet(opac_url + "/action/display.do?ct=display&fn=search&vid=" + vid + "&doc=" + id + "&tabs=viewOnlineTab", getDefaultEncoding());
        Document doc3 = Jsoup.parse(html3);
        doc3.setBaseUri(opac_url + "/action/display.do");
        if (doc3.select(".EXLTabHeaderContent a").size() > 0) {
            Element link = doc3.select(".EXLTabHeaderContent a").first();
            res.addDetail(new Detail(link.text().trim(), cleanUrl(link.absUrl("href"))));
        }
        for (Element link : doc3.select(".EXLViewOnlineLinksTitle a")) {
            res.addDetail(new Detail(link.text().trim(), cleanUrl(link.absUrl("href"))));
        }
    }
    return res;
}
Also used : HashMap(java.util.HashMap) Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode) Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) Document(org.jsoup.nodes.Document) Copy(de.geeksfactory.opacclient.objects.Copy) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) Detail(de.geeksfactory.opacclient.objects.Detail)

Example 19 with Detail

use of de.geeksfactory.opacclient.objects.Detail in project opacclient by opacapp.

the class Heidi method getResultById.

@Override
public DetailedItem getResultById(String id, final String homebranch) throws IOException {
    if (sessid == null) {
        start();
    }
    // Homebranch
    if (homebranch != null && !"".equals(homebranch)) {
        cookieStore.addCookie(new BasicClientCookie("zweig", homebranch));
    }
    String html = httpGet(opac_url + "/titel.cgi?katkey=" + id + "&sess=" + sessid, ENCODING, false, cookieStore);
    Document doc = Jsoup.parse(html);
    DetailedItem item = new DetailedItem();
    item.setId(id);
    Elements table = doc.select(".titelsatz tr");
    for (Element tr : table) {
        if (tr.select("th").size() == 0 || tr.select("td").size() == 0) {
            continue;
        }
        String d = tr.select("th").first().text();
        String c = tr.select("td").first().text();
        if (d.equals("Titel:")) {
            item.setTitle(c);
        } else if ((d.contains("URL") || d.contains("Link")) && tr.select("td a").size() > 0) {
            item.addDetail(new Detail(d, tr.select("td a").first().attr("href")));
        } else {
            item.addDetail(new Detail(d, c));
        }
    }
    if (doc.select(".ex table tr").size() > 0) {
        table = doc.select(".ex table tr");
        DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
        for (Element tr : table) {
            if (tr.hasClass("exueber") || tr.select(".exsig").size() == 0 || tr.select(".exso").size() == 0 || tr.select(".exstatus").size() == 0) {
                continue;
            }
            Copy copy = new Copy();
            copy.setShelfmark(tr.select(".exsig").first().text());
            copy.setBranch(tr.select(".exso").first().text());
            String status = tr.select(".exstatus").first().text();
            if (status.contains("entliehen bis")) {
                copy.setReturnDate(fmt.parseLocalDate(status.replaceAll("entliehen bis ([0-9.]+) .*", "$1")));
                copy.setReservations(status.replaceAll(".*\\(.*Vormerkungen: ([0-9]+)\\)", "$1"));
                copy.setStatus("entliehen");
            } else {
                copy.setStatus(status);
            }
            item.addCopy(copy);
        }
    }
    for (Element a : doc.select(".status1 a")) {
        if (a.attr("href").contains("bestellung.cgi")) {
            item.setReservable(true);
            item.setReservation_info(id);
            break;
        }
    }
    for (Element a : doc.select(".titelsatz a")) {
        if (a.text().trim().matches("B.+nde")) {
            Map<String, String> volumesearch = new HashMap<>();
            volumesearch.put("query", getQueryParamsFirst(a.attr("href")).get("query"));
            item.setVolumesearch(volumesearch);
        }
    }
    return item;
}
Also used : Copy(de.geeksfactory.opacclient.objects.Copy) HashMap(java.util.HashMap) Element(org.jsoup.nodes.Element) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) BasicClientCookie(org.apache.http.impl.cookie.BasicClientCookie) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) Detail(de.geeksfactory.opacclient.objects.Detail)

Example 20 with Detail

use of de.geeksfactory.opacclient.objects.Detail in project opacclient by opacapp.

the class BiBer1992 method parse_result.

/*
     * Two-column table inside of a form 1st column is category, e.g.
     * "Verfasser" 2nd column is content, e.g. "Bach, Johann Sebastian" In some
     * rows, the 1st column is empty, then 2nd column is continued text from row
     * above.
     *
     * Some libraries have a second section for the copies in stock (Exemplare).
     * This 2nd section has reverse layout.
     *
     * |-------------------| | Subject | Content | |-------------------| |
     * Subject | Content | |-------------------| | | Content |
     * |-------------------| | Subject | Content |
     * |-------------------------------------------------| | | Site | Signatur|
     * ID | State | |-------------------------------------------------| | |
     * Content | Content | Content | Content |
     * |-------------------------------------------------|
     */
private DetailedItem parse_result(String html) {
    DetailedItem item = new DetailedItem();
    Document document = Jsoup.parse(html);
    Elements rows = document.select("html body form table tr");
    // Elements rows = document.select("html body div form table tr");
    // Element rowReverseSubject = null;
    Detail detail = null;
    // prepare copiestable
    Copy copy_last_content = null;
    int copy_row = 0;
    String[] copy_keys = new String[] { "barcode", "branch", "department", "location", "status", "returndate", "reservations" };
    int[] copy_map = new int[] { 3, 1, -1, 1, 4, -1, -1 };
    try {
        JSONObject map = data.getJSONObject("copiestable");
        for (int i = 0; i < copy_keys.length; i++) {
            if (map.has(copy_keys[i])) {
                copy_map[i] = map.getInt(copy_keys[i]);
            }
        }
    } catch (Exception e) {
    // "copiestable" is optional
    }
    DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
    // go through all rows
    for (Element row : rows) {
        Elements columns = row.children();
        if (columns.size() == 2) {
            // HTML tag "&nbsp;" is encoded as 0xA0
            String firstColumn = columns.get(0).text().replace("\u00a0", " ").trim();
            String secondColumn = columns.get(1).text().replace("\u00a0", " ").trim();
            if (firstColumn.length() > 0) {
                // 1st column is category
                if (firstColumn.equalsIgnoreCase("titel")) {
                    detail = null;
                    item.setTitle(secondColumn);
                } else {
                    if (secondColumn.contains("hier klicken") && columns.get(1).select("a").size() > 0) {
                        secondColumn += " " + columns.get(1).select("a").first().attr("href");
                    }
                    detail = new Detail(firstColumn, secondColumn);
                    item.getDetails().add(detail);
                }
            } else {
                // category
                if (detail != null) {
                    String content = detail.getContent() + "\n" + secondColumn;
                    detail.setContent(content);
                } else {
                    // check if there is an amazon image
                    if (columns.get(0).select("a img[src]").size() > 0) {
                        item.setCover(columns.get(0).select("a img").first().attr("src"));
                    }
                }
            }
        } else if (columns.size() > 3) {
            // (copy_row > 0)
            if (copy_row > 0) {
                Copy copy = new Copy();
                for (int j = 0; j < copy_keys.length; j++) {
                    int col = copy_map[j];
                    if (col > -1) {
                        String text = "";
                        if (copy_keys[j].equals("branch")) {
                            // for "Standort" only use ownText() to suppress
                            // Link "Wegweiser"
                            text = columns.get(col).ownText().replace("\u00a0", " ").trim();
                        }
                        if (text.length() == 0) {
                            // text of children
                            text = columns.get(col).text().replace("\u00a0", " ").trim();
                        }
                        if (text.length() == 0) {
                            // this is sometimes the case for "Standort"
                            if (copy_keys[j].equals("status")) {
                                // but do it not for Status
                                text = " ";
                            } else {
                                if (copy_last_content != null) {
                                    text = copy_last_content.get(copy_keys[j]);
                                } else {
                                    text = "";
                                }
                            }
                        }
                        if (copy_keys[j].equals("reservations")) {
                            text = text.replace("Vorgemerkt: ", "").replace("Vorbestellt: ", "");
                        }
                        try {
                            copy.set(copy_keys[j], text, fmt);
                        } catch (IllegalArgumentException e) {
                            e.printStackTrace();
                        }
                    }
                }
                if (copy.getBranch() != null && copy.getLocation() != null && copy.getLocation().equals(copy.getBranch())) {
                    copy.setLocation(null);
                }
                item.addCopy(copy);
                copy_last_content = copy;
            }
            // ignore 1st row
            copy_row++;
        }
    // if columns.size
    }
    // for rows
    // We cannot check if media is reservable
    item.setReservable(true);
    if (opacDir.contains("opax")) {
        if (document.select("input[type=checkbox]").size() > 0) {
            item.setReservation_info(document.select("input[type=checkbox]").first().attr("name"));
        } else if (document.select("a[href^=reserv" + opacSuffix + "]").size() > 0) {
            String href = document.select("a[href^=reserv" + opacSuffix + "]").first().attr("href");
            item.setReservation_info(href.substring(href.indexOf("resF_")));
        } else {
            item.setReservable(false);
        }
    } else {
        item.setReservation_info(document.select("input[name=ID]").attr("value"));
    }
    return item;
}
Also used : Element(org.jsoup.nodes.Element) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) JSONException(org.json.JSONException) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) IOException(java.io.IOException) JSONObject(org.json.JSONObject) Copy(de.geeksfactory.opacclient.objects.Copy) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) Detail(de.geeksfactory.opacclient.objects.Detail)

Aggregations

Detail (de.geeksfactory.opacclient.objects.Detail)20 DetailedItem (de.geeksfactory.opacclient.objects.DetailedItem)18 Copy (de.geeksfactory.opacclient.objects.Copy)16 Element (org.jsoup.nodes.Element)14 Document (org.jsoup.nodes.Document)11 Elements (org.jsoup.select.Elements)11 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)10 JSONException (org.json.JSONException)9 JSONObject (org.json.JSONObject)7 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 NotReachableException (de.geeksfactory.opacclient.networking.NotReachableException)5 UnsupportedEncodingException (java.io.UnsupportedEncodingException)5 NameValuePair (org.apache.http.NameValuePair)4 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)4 Node (org.jsoup.nodes.Node)4 TextNode (org.jsoup.nodes.TextNode)4 Volume (de.geeksfactory.opacclient.objects.Volume)3 URI (java.net.URI)3 URISyntaxException (java.net.URISyntaxException)3