Search in sources :

Example 21 with DetailedItem

use of de.geeksfactory.opacclient.objects.DetailedItem in project opacclient by opacapp.

the class Zones method parse_result.

private DetailedItem parse_result(String id, String html) {
    Document doc = Jsoup.parse(html);
    DetailedItem result = new DetailedItem();
    result.setTitle("");
    boolean title_is_set = false;
    result.setId(id);
    String detailTrsQuery = version18 ? ".inRoundBox1 table table tr" : ".DetailDataCell table table:not(.inRecordHeader) tr";
    Elements detailtrs1 = doc.select(detailTrsQuery);
    for (int i = 0; i < detailtrs1.size(); i++) {
        Element tr = detailtrs1.get(i);
        int s = tr.children().size();
        if (tr.child(0).text().trim().equals("Titel") && !title_is_set) {
            result.setTitle(tr.child(s - 1).text().trim());
            title_is_set = true;
        } else if (s > 1) {
            Element valchild = tr.child(s - 1);
            if (valchild.select("table").isEmpty()) {
                String val = valchild.text().trim();
                if (val.length() > 0) {
                    result.addDetail(new Detail(tr.child(0).text().trim(), val));
                }
            }
        }
    }
    for (Element a : doc.select("a.SummaryActionLink")) {
        if (a.text().contains("Vormerken")) {
            result.setReservable(true);
            result.setReservation_info(a.attr("href"));
        }
    }
    Elements detaildiv = doc.select("div.record-item-new");
    if (!detaildiv.isEmpty()) {
        for (int i = 0; i < detaildiv.size(); i++) {
            Element dd = detaildiv.get(i);
            String text = "";
            for (Node node : dd.childNodes()) {
                if (node instanceof TextNode) {
                    String snip = ((TextNode) node).text();
                    if (snip.length() > 0) {
                        text += snip;
                    }
                } else if (node instanceof Element) {
                    if (((Element) node).tagName().equals("br")) {
                        text += "\n";
                    } else {
                        String snip = ((Element) node).text().trim();
                        if (snip.length() > 0) {
                            text += snip;
                        }
                    }
                }
            }
            result.addDetail(new Detail("", text));
        }
    }
    if (doc.select("span.z3988").size() > 0) {
        // Sometimes there is a <span class="Z3988"> item which provides
        // data in a standardized format.
        String z3988data = doc.select("span.z3988").first().attr("title").trim();
        for (String pair : z3988data.split("&")) {
            String[] nv = pair.split("=", 2);
            if (nv.length == 2) {
                if (!nv[1].trim().equals("")) {
                    if (nv[0].equals("rft.btitle") && result.getTitle().length() == 0) {
                        result.setTitle(nv[1]);
                    } else if (nv[0].equals("rft.atitle") && result.getTitle().length() == 0) {
                        result.setTitle(nv[1]);
                    } else if (nv[0].equals("rft.au")) {
                        result.addDetail(new Detail("Author", nv[1]));
                    }
                }
            }
        }
    }
    // Cover
    if (doc.select(".BookCover, .LargeBookCover").size() > 0) {
        result.setCover(doc.select(".BookCover, .LargeBookCover").first().attr("src"));
    }
    Elements copydivs = doc.select("div[id^=stock_]");
    String pop = "";
    for (int i = 0; i < copydivs.size(); i++) {
        Element div = copydivs.get(i);
        if (div.attr("id").startsWith("stock_head")) {
            pop = div.text().trim();
            continue;
        }
        Copy copy = new Copy();
        DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
        // This is getting very ugly - check if it is valid for libraries which are not Hamburg.
        // Seems to also work in Kiel (Zones 1.8, checked 10.10.2015)
        int j = 0;
        for (Node node : div.childNodes()) {
            try {
                if (node instanceof Element) {
                    if (((Element) node).tag().getName().equals("br")) {
                        copy.setBranch(pop);
                        result.addCopy(copy);
                        j = -1;
                    } else if (((Element) node).tag().getName().equals("b") && j == 1) {
                        copy.setLocation(((Element) node).text());
                    } else if (((Element) node).tag().getName().equals("b") && j > 1) {
                        copy.setStatus(((Element) node).text());
                    }
                    j++;
                } else if (node instanceof TextNode) {
                    if (j == 0) {
                        copy.setDepartment(((TextNode) node).text());
                    }
                    if (j == 2) {
                        copy.setBarcode(((TextNode) node).getWholeText().trim().split("\n")[0].trim());
                    }
                    if (j == 6) {
                        String text = ((TextNode) node).text().trim();
                        String date = text.substring(text.length() - 10);
                        try {
                            copy.setReturnDate(fmt.parseLocalDate(date));
                        } catch (IllegalArgumentException e) {
                            e.printStackTrace();
                        }
                    }
                    j++;
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    return result;
}
Also used : Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode) Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) JSONException(org.json.JSONException) IOException(java.io.IOException) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) Copy(de.geeksfactory.opacclient.objects.Copy) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) Detail(de.geeksfactory.opacclient.objects.Detail)

Example 22 with DetailedItem

use of de.geeksfactory.opacclient.objects.DetailedItem in project opacclient by opacapp.

the class SISISSearchTest method testParseDetail.

@Test
public void testParseDetail() throws OpacApi.OpacErrorException, JSONException, IOException {
    String html1 = readResource("/sisis/result_detail/" + file.replace(".html", "_1.html"));
    String html2 = readResource("/sisis/result_detail/" + file.replace(".html", "_2.html"));
    String html3 = readResource("/sisis/result_detail/" + file.replace(".html", "_3.html"));
    String coverJs = readResource("/sisis/result_detail/" + file.replace(".html", ".js"));
    if (html1 == null || html2 == null || html3 == null) {
        // we may not have all files for all libraries
        return;
    }
    DetailedItem result = SISIS.parseDetail(html1, html2, html3, coverJs, new JSONObject(), new DummyStringProvider());
    assertTrue(result.getCopies().size() > 0);
    for (Copy copy : result.getCopies()) {
        assertContainsData(copy.getStatus());
        assertNullOrNotEmpty(copy.getBarcode());
        assertNullOrNotEmpty(copy.getBranch());
        assertNullOrNotEmpty(copy.getDepartment());
        assertNullOrNotEmpty(copy.getLocation());
        assertNullOrNotEmpty(copy.getReservations());
        assertNullOrNotEmpty(copy.getShelfmark());
        assertNullOrNotEmpty(copy.getUrl());
        if (copy.getStatus().equals("Entliehen"))
            assertNotNull(copy.getReturnDate());
    }
    for (Volume volume : result.getVolumes()) {
        assertContainsData(volume.getId());
        assertContainsData(volume.getTitle());
    }
    assertEquals(result.getTitle(), getDetailTitle(file));
    if (file.equals("berlin_htw.html")) {
        assertTrue(result.getDetails().contains(new Detail("Signatur:", "15/2322")));
        assertNotNull(result.getCover());
    }
}
Also used : DummyStringProvider(de.geeksfactory.opacclient.i18n.DummyStringProvider) JSONObject(org.json.JSONObject) Copy(de.geeksfactory.opacclient.objects.Copy) Volume(de.geeksfactory.opacclient.objects.Volume) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) Detail(de.geeksfactory.opacclient.objects.Detail) Test(org.junit.Test)

Example 23 with DetailedItem

use of de.geeksfactory.opacclient.objects.DetailedItem in project opacclient by opacapp.

the class Open method parse_result.

protected DetailedItem parse_result(Document doc) {
    DetailedItem item = new DetailedItem();
    // Title and Subtitle
    item.setTitle(doc.select("span[id$=LblShortDescriptionValue], span[id$=LblTitleValue]").text());
    String subtitle = doc.select("span[id$=LblSubTitleValue]").text();
    if (subtitle.equals("") && doc.select("span[id$=LblShortDescriptionValue]").size() > 0) {
        // Subtitle detection for Bern
        Element next = doc.select("span[id$=LblShortDescriptionValue]").first().parent().nextElementSibling();
        if (next.select("span").size() == 0) {
            subtitle = next.text().trim();
        }
    }
    if (!subtitle.equals("")) {
        item.addDetail(new Detail(stringProvider.getString(StringProvider.SUBTITLE), subtitle));
    }
    // Cover
    if (doc.select("input[id$=mediumImage]").size() > 0) {
        item.setCover(doc.select("input[id$=mediumImage]").attr("src"));
    } else if (doc.select("img[id$=CoverView_Image]").size() > 0) {
        assignBestCover(item, getCoverUrlList(doc.select("img[id$=CoverView_Image]").first()));
    }
    // ID
    item.setId(doc.select("input[id$=regionmednr]").val());
    // Description
    if (doc.select("span[id$=ucCatalogueContent_LblAnnotation]").size() > 0) {
        String name = doc.select("span[id$=lblCatalogueContent]").text();
        String value = doc.select("span[id$=ucCatalogueContent_LblAnnotation]").text();
        item.addDetail(new Detail(name, value));
    }
    // Parent
    if (doc.select("a[id$=HyperLinkParent]").size() > 0) {
        item.setCollectionId(doc.select("a[id$=HyperLinkParent]").first().attr("href"));
    }
    // Details
    String DETAIL_SELECTOR = "div[id$=CatalogueDetailView] .spacingBottomSmall:has(span+span)," + "div[id$=CatalogueDetailView] .spacingBottomSmall:has(span+a), " + "div[id$=CatalogueDetailView] .oclc-searchmodule-detail-data div:has" + "(span+span), " + "div[id$=CatalogueDetailView] .oclc-searchmodule-detail-data div:has" + "(span+a)";
    for (Element detail : doc.select(DETAIL_SELECTOR)) {
        String name = detail.select("span").get(0).text().replace(": ", "");
        String value = "";
        if (detail.select("a").size() > 1) {
            int i = 0;
            for (Element a : detail.select("a")) {
                if (i != 0) {
                    value += ", ";
                }
                value += a.text().trim();
                i++;
            }
        } else {
            value = detail.select("span, a").get(1).text();
            if (value.contains("hier klicken") && detail.select("a").size() > 0) {
                value = value + " " + detail.select("a").first().attr("href");
            }
        }
        item.addDetail(new Detail(name, value));
    }
    // Description
    if (doc.select("div[id$=CatalogueContent]").size() > 0) {
        String name = doc.select("div[id$=CatalogueContent] .oclc-module-header").text();
        String value = doc.select("div[id$=CatalogueContent] .oclc-searchmodule-detail-annotation").text();
        item.addDetail(new Detail(name, value));
    }
    // Copies
    Element table = doc.select("table[id$=grdViewMediumCopies]").first();
    if (table != null) {
        Elements trs = table.select("tr");
        List<String> columnmap = new ArrayList<>();
        for (Element th : trs.first().select("th")) {
            columnmap.add(getCopyColumnKey(th.text()));
        }
        DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
        for (int i = 1; i < trs.size(); i++) {
            Elements tds = trs.get(i).select("td");
            Copy copy = new Copy();
            for (int j = 0; j < tds.size(); j++) {
                if (columnmap.get(j) == null)
                    continue;
                String text = tds.get(j).text().replace("\u00a0", "");
                if (tds.get(j).select(".oclc-module-label").size() > 0 && tds.get(j).select("span").size() == 2) {
                    text = tds.get(j).select("span").get(1).text();
                }
                if (text.equals(""))
                    continue;
                copy.set(columnmap.get(j), text, fmt);
            }
            item.addCopy(copy);
        }
    }
    // Dependent (e.g. Verden)
    if (doc.select("div[id$=DivDependentCatalogue]").size() > 0) {
        String url = opac_url + "/DesktopModules/OCLC.OPEN.PL.DNN.SearchModule/SearchService.asmx/GetDependantCatalogues";
        JSONObject postData = new JSONObject();
        // Determine portalID value
        int portalId = 1;
        for (Element scripttag : doc.select("script")) {
            String scr = scripttag.html();
            if (scr.contains("LoadCatalogueViewDependantCataloguesAsync")) {
                Pattern portalIdPattern = Pattern.compile(".*LoadCatalogueViewDependantCataloguesAsync\\([^,]*,[^,]*," + "[^,]*,[^,]*,[^,]*,[^0-9,]*([0-9]+)[^0-9,]*,.*\\).*");
                Matcher portalIdMatcher = portalIdPattern.matcher(scr);
                if (portalIdMatcher.find()) {
                    portalId = Integer.parseInt(portalIdMatcher.group(1));
                }
            }
        }
        try {
            postData.put("portalId", portalId).put("mednr", item.getId()).put("tabUrl", opac_url + "/" + data.getJSONObject("urls").getString("simple_search") + NO_MOBILE + "&id=").put("branchFilter", "");
            RequestBody entity = RequestBody.create(MEDIA_TYPE_JSON, postData.toString());
            String json = httpPost(url, entity, getDefaultEncoding());
            JSONObject volumeData = new JSONObject(json);
            JSONArray cat = volumeData.getJSONObject("d").getJSONArray("Catalogues");
            for (int i = 0; i < cat.length(); i++) {
                JSONObject obj = cat.getJSONObject(i);
                Map<String, String> params = getQueryParamsFirst(obj.getString("DependantUrl"));
                item.addVolume(new Volume(params.get("id"), obj.getString("DependantTitle")));
            }
        } catch (JSONException | IOException e) {
            e.printStackTrace();
        }
    }
    return item;
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) FormElement(org.jsoup.nodes.FormElement) ArrayList(java.util.ArrayList) JSONArray(org.json.JSONArray) JSONException(org.json.JSONException) IOException(java.io.IOException) Elements(org.jsoup.select.Elements) JSONObject(org.json.JSONObject) Copy(de.geeksfactory.opacclient.objects.Copy) Volume(de.geeksfactory.opacclient.objects.Volume) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) Detail(de.geeksfactory.opacclient.objects.Detail) RequestBody(okhttp3.RequestBody)

Example 24 with DetailedItem

use of de.geeksfactory.opacclient.objects.DetailedItem in project opacclient by opacapp.

the class Primo method parse_detail.

protected DetailedItem parse_detail(String id, Document doc) throws OpacErrorException, IOException {
    DetailedItem res = new DetailedItem();
    res.setId(id);
    res.setTitle(doc.select(".EXLResultTitle").text());
    for (Element detrow : doc.select(".EXLDetailsContent li")) {
        String title = null;
        String value = "";
        for (Node node : detrow.childNodes()) {
            if (node instanceof Element && (((Element) node).tagName().equals("strong") || ((Element) node).hasClass("bib-EXLDetailsContent-item-title"))) {
                title = ((Element) node).text();
            } else if (node instanceof Element && title != null) {
                value += ((Element) node).text();
            } else if (node instanceof TextNode && title != null) {
                value += ((TextNode) node).text();
            }
        }
        if (title != null) {
            res.addDetail(new Detail(title, value.trim()));
        }
    }
    String html2 = httpGet(opac_url + "/action/display.do?ct=display&fn=search&vid=" + vid + "&doc=" + id + "&tabs=locationsTab", getDefaultEncoding());
    Document doc2 = Jsoup.parse(html2);
    if (doc2.select(".EXLLocationTitlesRow").size() > 0) {
        Map<Integer, String> copymap = new HashMap<>();
        int i = 0;
        for (Element th : doc2.select(".EXLLocationTitlesRow th")) {
            String title = th.text().toLowerCase(Locale.GERMAN).trim();
            if (title.contains("library") || title.contains("bibliothek") || title.contains("branch")) {
                copymap.put(i, "branch");
            } else if (title.contains("location") || title.contains("ort")) {
                copymap.put(i, "location");
            } else if (title.contains("call number") || title.contains("signatur")) {
                copymap.put(i, "signature");
            } else if (title.contains("due date") || title.contains("llig am") || title.contains("ausgeliehen bis") || title.contains("lligkeit") || title.contains("ausleihstatus")) {
                copymap.put(i, "returndate");
            } else if (title.contains("loan to") || title.contains("bezugsmodalit") || title.contains("ausleihm") || title.contains("status")) {
                copymap.put(i, "status");
            } else if (title.contains("queue") || title.contains("vormerker")) {
                copymap.put(i, "reservations");
            }
            i++;
        }
        DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
        DateTimeFormatter fmt2 = DateTimeFormat.forPattern("dd/MM/yyyy").withLocale(Locale.GERMAN);
        for (Element tr : doc2.select(".EXLLocationTable tr:not(.EXLLocationTitlesRow):not(" + ".EXLAdditionalFieldsRow)")) {
            int j = 0;
            Copy copy = new Copy();
            for (Element td : tr.children()) {
                String value = td.text().replace("\u00a0", " ").trim();
                if (copymap.containsKey(j) && !value.equals("")) {
                    try {
                        copy.set(copymap.get(j), value, fmt);
                    } catch (IllegalArgumentException e) {
                        try {
                            copy.set(copymap.get(j), value, fmt2);
                        } catch (IllegalArgumentException e2) {
                            e2.printStackTrace();
                        }
                    }
                }
                j++;
            }
            res.addCopy(copy);
        }
    } else if (doc2.select(".EXLLocationList").size() > 0) {
        // e.g. University of South Wales
        for (Element row : doc2.select(".EXLLocationList")) {
            Copy copy = new Copy();
            copy.setBranch(row.select(".EXLLocationsTitle").text());
            copy.setDepartment(row.select(".EXLLocationInfo strong").text());
            copy.setShelfmark(row.select(".EXLLocationInfo cite").text());
            copy.setStatus(row.select(".EXLLocationInfo em").text());
            res.addCopy(copy);
        }
    }
    if (res.getCopies().size() == 0) {
        // Online-Medium?
        String html3 = httpGet(opac_url + "/action/display.do?ct=display&fn=search&vid=" + vid + "&doc=" + id + "&tabs=viewOnlineTab", getDefaultEncoding());
        Document doc3 = Jsoup.parse(html3);
        doc3.setBaseUri(opac_url + "/action/display.do");
        if (doc3.select(".EXLTabHeaderContent a").size() > 0) {
            Element link = doc3.select(".EXLTabHeaderContent a").first();
            res.addDetail(new Detail(link.text().trim(), cleanUrl(link.absUrl("href"))));
        }
        for (Element link : doc3.select(".EXLViewOnlineLinksTitle a")) {
            res.addDetail(new Detail(link.text().trim(), cleanUrl(link.absUrl("href"))));
        }
    }
    return res;
}
Also used : HashMap(java.util.HashMap) Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode) Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) Document(org.jsoup.nodes.Document) Copy(de.geeksfactory.opacclient.objects.Copy) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) Detail(de.geeksfactory.opacclient.objects.Detail)

Example 25 with DetailedItem

use of de.geeksfactory.opacclient.objects.DetailedItem in project opacclient by opacapp.

the class Bibliotheca method getResultById.

@Override
public DetailedItem getResultById(String a, String homebranch) throws IOException {
    if (!initialised) {
        start();
    }
    String html = httpGet(opac_url + "/index.asp?MedienNr=" + a, getDefaultEncoding());
    DetailedItem result = parseResult(html, data);
    if (result.getId() == null) {
        result.setId(a);
    }
    return result;
}
Also used : DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem)

Aggregations

DetailedItem (de.geeksfactory.opacclient.objects.DetailedItem)27 Detail (de.geeksfactory.opacclient.objects.Detail)18 Copy (de.geeksfactory.opacclient.objects.Copy)17 Element (org.jsoup.nodes.Element)15 Document (org.jsoup.nodes.Document)12 Elements (org.jsoup.select.Elements)12 JSONException (org.json.JSONException)11 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)10 IOException (java.io.IOException)8 JSONObject (org.json.JSONObject)7 NotReachableException (de.geeksfactory.opacclient.networking.NotReachableException)6 HashMap (java.util.HashMap)6 UnsupportedEncodingException (java.io.UnsupportedEncodingException)5 ArrayList (java.util.ArrayList)5 Matcher (java.util.regex.Matcher)5 Pattern (java.util.regex.Pattern)5 NameValuePair (org.apache.http.NameValuePair)5 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)5 Node (org.jsoup.nodes.Node)5 TextNode (org.jsoup.nodes.TextNode)5