Search in sources :

Example 31 with TextNode

use of org.jsoup.nodes.TextNode in project opacclient by opacapp.

the class PicaOld method parseResList.

static void parseResList(List<ReservedItem> media, Document doc, StringProvider stringProvider) throws OpacErrorException {
    Elements copytrs = doc.select("table[summary^=list] > tbody >  tr[valign=top]");
    int trs = copytrs.size();
    if (trs < 1) {
        throw new OpacErrorException(stringProvider.getString(StringProvider.COULD_NOT_LOAD_ACCOUNT));
    }
    assert (trs > 0);
    for (Element tr : copytrs) {
        ReservedItem item = new ReservedItem();
        if (tr.select("table[summary=title data]").size() > 0) {
            // Check if there is a checkbox to cancel this item
            if (tr.select("input").size() > 0) {
                item.setCancelData(tr.select("input").attr("value"));
            }
            Elements datatrs = tr.select("table[summary=title data] tr");
            item.setTitle(datatrs.get(0).text());
            List<TextNode> textNodes = datatrs.get(1).select("td").first().textNodes();
            List<TextNode> nodes = new ArrayList<>();
            Elements titles = datatrs.get(1).select("span.label-small");
            for (TextNode node : textNodes) {
                if (!node.text().equals(" ")) {
                    nodes.add(node);
                }
            }
            assert (nodes.size() == titles.size());
            for (int j = 0; j < nodes.size(); j++) {
                String title = titles.get(j).text();
                String value = nodes.get(j).text().trim().replace(";", "");
                // noinspection StatementWithEmptyBody
                if (title.contains("Signatur") || title.contains("shelf mark") || title.contains("signatuur")) {
                // not supported
                } else // noinspection StatementWithEmptyBody
                if (title.contains("Vormerkdatum")) {
                // not supported
                }
            }
        } else {
            // like in Kiel
            item.setTitle(tr.child(5).text().trim());
            item.setStatus(tr.child(17).text().trim());
            item.setCancelData(tr.child(1).select("input").attr("value"));
        }
        media.add(item);
    }
    assert (media.size() == trs);
}
Also used : Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) ReservedItem(de.geeksfactory.opacclient.objects.ReservedItem) TextNode(org.jsoup.nodes.TextNode) Elements(org.jsoup.select.Elements)

Example 32 with TextNode

use of org.jsoup.nodes.TextNode in project opacclient by opacapp.

the class SISIS method parse_search.

public SearchRequestResult parse_search(String html, int page) throws OpacErrorException, SingleResultFound {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url + "/searchfoo");
    if (doc.select(".error").size() > 0) {
        throw new OpacErrorException(doc.select(".error").text().trim());
    } else if (doc.select(".nohits").size() > 0) {
        throw new OpacErrorException(doc.select(".nohits").text().trim());
    } else if (doc.select(".box-header h2, #nohits").text().contains("keine Treffer")) {
        return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
    }
    int results_total = -1;
    String resultnumstr = doc.select(".box-header h2").first().text();
    if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) {
        throw new SingleResultFound();
    } else if (resultnumstr.contains("(")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (resultnumstr.contains(": ")) {
        results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
    }
    Elements table = doc.select("table.data tbody tr");
    identifier = null;
    Elements links = doc.select("table.data a");
    boolean haslink = false;
    for (int i = 0; i < links.size(); i++) {
        Element node = links.get(i);
        if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) {
            haslink = true;
            try {
                List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(node.attr("href").replace(" ", "%20").replace("&amp;", "&")), ENCODING);
                for (NameValuePair nv : anyurl) {
                    if (nv.getName().equals("identifier")) {
                        identifier = nv.getValue();
                        break;
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        if (tr.select("td img[title]").size() > 0) {
            String title = tr.select("td img").get(0).attr("title");
            String[] fparts = tr.select("td img").get(0).attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            MediaType default_by_fname = defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", ""));
            MediaType default_by_title = defaulttypes.get(title);
            MediaType default_name = default_by_title != null ? default_by_title : default_by_fname;
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(default_name);
                }
            } else {
                sr.setType(default_name);
            }
        }
        String alltext = tr.text();
        if (alltext.contains("eAudio") || alltext.contains("eMusic")) {
            sr.setType(MediaType.MP3);
        } else if (alltext.contains("eVideo")) {
            sr.setType(MediaType.EVIDEO);
        } else if (alltext.contains("eBook")) {
            sr.setType(MediaType.EBOOK);
        } else if (alltext.contains("Munzinger")) {
            sr.setType(MediaType.EDOC);
        }
        if (tr.children().size() > 3 && tr.child(3).select("img[title*=cover]").size() == 1) {
            sr.setCover(tr.child(3).select("img[title*=cover]").attr("abs:src"));
            if (sr.getCover().contains("showCover.do")) {
                downloadCover(sr);
            }
        }
        Element middlething;
        if (tr.children().size() > 2 && tr.child(2).select("a").size() > 0) {
            middlething = tr.child(2);
        } else {
            middlething = tr.child(1);
        }
        List<Node> children = middlething.childNodes();
        if (middlething.select("div").not("#hlrightblock,.bestellfunktionen").size() == 1) {
            Element indiv = middlething.select("div").not("#hlrightblock,.bestellfunktionen").first();
            if (indiv.select("a").size() > 0 && indiv.children().size() > 1) {
                children = indiv.childNodes();
            }
        } else if (middlething.select("span.titleData").size() == 1) {
            children = middlething.select("span.titleData").first().childNodes();
        }
        int childrennum = children.size();
        List<String[]> strings = new ArrayList<>();
        for (int ch = 0; ch < childrennum; ch++) {
            Node node = children.get(ch);
            if (node instanceof TextNode) {
                String text = ((TextNode) node).text().trim();
                if (text.length() > 3) {
                    strings.add(new String[] { "text", "", text });
                }
            } else if (node instanceof Element) {
                List<Node> subchildren = node.childNodes();
                for (int j = 0; j < subchildren.size(); j++) {
                    Node subnode = subchildren.get(j);
                    if (subnode instanceof TextNode) {
                        String text = ((TextNode) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(), "text", text, ((Element) node).className(), node.attr("style") });
                        }
                    } else if (subnode instanceof Element) {
                        String text = ((Element) subnode).text().trim();
                        if (text.length() > 3) {
                            strings.add(new String[] { ((Element) node).tag().getName(), ((Element) subnode).tag().getName(), text, ((Element) node).className(), node.attr("style") });
                        }
                    }
                }
            }
        }
        StringBuilder description = null;
        if (tr.select("span.Z3988").size() == 1) {
            // Sometimes there is a <span class="Z3988"> item which provides
            // data in a standardized format.
            List<NameValuePair> z3988data;
            boolean hastitle = false;
            try {
                description = new StringBuilder();
                z3988data = URLEncodedUtils.parse(new URI("http://dummy/?" + tr.select("span.Z3988").attr("title")), "UTF-8");
                for (NameValuePair nv : z3988data) {
                    if (nv.getValue() != null) {
                        if (!nv.getValue().trim().equals("")) {
                            if (nv.getName().equals("rft.btitle") && !hastitle) {
                                description.append("<b>").append(nv.getValue()).append("</b>");
                                hastitle = true;
                            } else if (nv.getName().equals("rft.atitle") && !hastitle) {
                                description.append("<b>").append(nv.getValue()).append("</b>");
                                hastitle = true;
                            } else if (nv.getName().equals("rft.au")) {
                                description.append("<br />").append(nv.getValue());
                            } else if (nv.getName().equals("rft.date")) {
                                description.append("<br />").append(nv.getValue());
                            }
                        }
                    }
                }
            } catch (URISyntaxException e) {
                description = null;
            }
        }
        boolean described = false;
        if (description != null && description.length() > 0) {
            sr.setInnerhtml(description.toString());
            described = true;
        } else {
            description = new StringBuilder();
        }
        int k = 0;
        boolean yearfound = false;
        boolean titlefound = false;
        boolean sigfound = false;
        for (String[] part : strings) {
            if (!described) {
                if (part[0].equals("a") && (k == 0 || !titlefound)) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append("<b>").append(part[2]).append("</b>");
                    titlefound = true;
                } else if (part[2].matches("\\D*[0-9]{4}\\D*") && part[2].length() <= 10) {
                    yearfound = true;
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
                    if (k != 0) {
                        description.append("<br />");
                    }
                    description.append(part[2]);
                } else if (k == 1 && !yearfound) {
                    description.append("<br />");
                    description.append(part[2]);
                } else if (k > 1 && k < 4 && !sigfound && part[0].equals("text") && part[2].matches("^[A-Za-z0-9,\\- ]+$")) {
                    description.append("<br />");
                    description.append(part[2]);
                }
            }
            if (part.length == 4) {
                if (part[0].equals("span") && part[3].equals("textgruen")) {
                    sr.setStatus(SearchResult.Status.GREEN);
                } else if (part[0].equals("span") && part[3].equals("textrot")) {
                    sr.setStatus(SearchResult.Status.RED);
                }
            } else if (part.length == 5) {
                if (part[4].contains("purple")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                }
            }
            if (sr.getStatus() == null) {
                if ((part[2].contains("entliehen") && part[2].startsWith("Vormerkung ist leider nicht möglich")) || part[2].contains("Alle Exemplare des gewählten Titels sind entliehen") || part[2].contains("nur in anderer Zweigstelle ausleihbar und nicht bestellbar")) {
                    sr.setStatus(SearchResult.Status.RED);
                } else if (part[2].startsWith("entliehen") || part[2].contains("Ein Exemplar finden Sie in einer anderen Zweigstelle")) {
                    sr.setStatus(SearchResult.Status.YELLOW);
                } else if ((part[2].startsWith("bestellbar") && !part[2].contains("nicht bestellbar")) || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar")) || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar")) || (part[2].startsWith("vormerkbar") && !part[2].contains("nicht vormerkbar")) || (part[2].contains("heute zurückgebucht")) || (part[2].contains("ausleihbar") && !part[2].contains("nicht ausleihbar"))) {
                    sr.setStatus(SearchResult.Status.GREEN);
                }
                if (sr.getType() != null) {
                    if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO) || sr.getType().equals(MediaType.MP3)) // Especially Onleihe.de ebooks are often marked
                    // green though they are not available.
                    {
                        sr.setStatus(SearchResult.Status.UNKNOWN);
                    }
                }
            }
            k++;
        }
        if (!described) {
            sr.setInnerhtml(description.toString());
        }
        sr.setNr(10 * (page - 1) + i);
        sr.setId(null);
        results.add(sr);
    }
    resultcount = results.size();
    return new SearchRequestResult(results, results_total, page);
}
Also used : Element(org.jsoup.nodes.Element) Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) ArrayList(java.util.ArrayList) URISyntaxException(java.net.URISyntaxException) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) URI(java.net.URI) MediaType(de.geeksfactory.opacclient.objects.SearchResult.MediaType) List(java.util.List) ArrayList(java.util.ArrayList) NameValuePair(org.apache.http.NameValuePair) BasicNameValuePair(org.apache.http.message.BasicNameValuePair) JSONException(org.json.JSONException) SearchResult(de.geeksfactory.opacclient.objects.SearchResult) TextNode(org.jsoup.nodes.TextNode) URISyntaxException(java.net.URISyntaxException) JSONException(org.json.JSONException) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) ClientProtocolException(org.apache.http.client.ClientProtocolException) IOException(java.io.IOException) SearchRequestResult(de.geeksfactory.opacclient.objects.SearchRequestResult)

Example 33 with TextNode

use of org.jsoup.nodes.TextNode in project opacclient by opacapp.

the class SISIS method prolongAll.

@Override
public ProlongAllResult prolongAll(Account account, int useraction, String selection) throws IOException {
    if (!initialised) {
        start();
    }
    if (System.currentTimeMillis() - logged_in > SESSION_LIFETIME || logged_in_as == null) {
        try {
            account(account);
        } catch (JSONException e) {
            e.printStackTrace();
            return new ProlongAllResult(MultiStepResult.Status.ERROR);
        } catch (OpacErrorException e) {
            return new ProlongAllResult(MultiStepResult.Status.ERROR, e.getMessage());
        }
    } else if (logged_in_as.getId() != account.getId()) {
        try {
            account(account);
        } catch (JSONException e) {
            e.printStackTrace();
            return new ProlongAllResult(MultiStepResult.Status.ERROR);
        } catch (OpacErrorException e) {
            return new ProlongAllResult(MultiStepResult.Status.ERROR, e.getMessage());
        }
    }
    // We have to call the page we originally found the link on first...
    String html = httpGet(opac_url + "/userAccount.do?methodToCall=renewalPossible&renewal=account", ENCODING);
    Document doc = Jsoup.parse(html);
    if (doc.select("table.data").size() > 0) {
        List<Map<String, String>> result = new ArrayList<>();
        for (Element td : doc.select("table.data tr td")) {
            Map<String, String> line = new HashMap<>();
            if (!td.text().contains("Titel") || !td.text().contains("Status")) {
                continue;
            }
            String nextNodeIs = "";
            for (Node n : td.childNodes()) {
                String text;
                if (n instanceof Element) {
                    text = ((Element) n).text();
                } else if (n instanceof TextNode) {
                    text = ((TextNode) n).text();
                } else {
                    continue;
                }
                if (text.trim().length() == 0) {
                    continue;
                }
                if (text.contains("Titel:")) {
                    nextNodeIs = ProlongAllResult.KEY_LINE_TITLE;
                } else if (text.contains("Verfasser:")) {
                    nextNodeIs = ProlongAllResult.KEY_LINE_AUTHOR;
                } else if (text.contains("Leihfristende:")) {
                    nextNodeIs = ProlongAllResult.KEY_LINE_NEW_RETURNDATE;
                } else if (text.contains("Status:")) {
                    nextNodeIs = ProlongAllResult.KEY_LINE_MESSAGE;
                } else if (text.contains("Mediennummer:") || text.contains("Signatur:")) {
                    nextNodeIs = "";
                } else if (nextNodeIs.length() > 0) {
                    line.put(nextNodeIs, text.trim());
                    nextNodeIs = "";
                }
            }
            result.add(line);
        }
        return new ProlongAllResult(MultiStepResult.Status.OK, result);
    }
    return new ProlongAllResult(MultiStepResult.Status.ERROR, stringProvider.getString(StringProvider.COULD_NOT_LOAD_ACCOUNT));
}
Also used : HashMap(java.util.HashMap) Element(org.jsoup.nodes.Element) Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) ArrayList(java.util.ArrayList) JSONException(org.json.JSONException) TextNode(org.jsoup.nodes.TextNode) Document(org.jsoup.nodes.Document) Map(java.util.Map) HashMap(java.util.HashMap)

Example 34 with TextNode

use of org.jsoup.nodes.TextNode in project opacclient by opacapp.

the class TouchPoint method parse_result.

protected DetailedItem parse_result(String html) throws IOException {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    DetailedItem result = new DetailedItem();
    result.setCover(findCoverUrl(doc, false));
    if (doc.select("#permalink-link").size() > 0) {
        String href = doc.select("#permalink-link").first().attr("href");
        JSONObject id = new JSONObject();
        try {
            id.put("url", href);
            result.setId(id.toString());
        } catch (JSONException e) {
            e.printStackTrace();
        }
    }
    for (Element tr : doc.select(".titleinfo tr")) {
        // Sometimes there is one th and one td, sometimes two tds
        String detailName = tr.select("th, td").first().text().trim();
        if (detailName.endsWith(":")) {
            detailName = detailName.substring(0, detailName.length() - 1);
        }
        String detailValue = tr.select("td").last().text().trim();
        result.addDetail(new Detail(detailName, detailValue));
        if (detailName.contains("ID in diesem Katalog") && result.getId() == null) {
            result.setId(detailValue);
        }
        if (detailName.equals("Titel")) {
            result.setTitle(detailValue);
        }
    }
    if (result.getDetails().size() == 0 && doc.select("#details").size() > 0) {
        // e.g. Bayreuth_Uni
        String dname = "";
        String dval = "";
        boolean in_value = true;
        for (Node n : doc.select("#details").first().childNodes()) {
            if (n instanceof Element && ((Element) n).tagName().equals("strong")) {
                if (in_value) {
                    if (dname.length() > 0 && dval.length() > 0) {
                        result.addDetail(new Detail(dname, dval));
                        if (dname.equals("Titel")) {
                            result.setTitle(dval);
                        }
                    }
                    dname = ((Element) n).text();
                    in_value = false;
                } else {
                    dname += ((Element) n).text();
                }
            } else {
                String t = null;
                if (n instanceof TextNode) {
                    t = ((TextNode) n).text();
                } else if (n instanceof Element) {
                    t = ((Element) n).text();
                }
                if (t != null) {
                    if (in_value) {
                        dval += t;
                    } else {
                        in_value = true;
                        dval = t;
                    }
                }
            }
        }
    }
    if (result.getTitle() == null) {
        result.setTitle(doc.select("h1").first().text());
    }
    // Copies
    String copiesParameter = doc.select("div[id^=ajax_holdings_url").attr("ajaxParameter").replace("&amp;", "");
    if (!"".equals(copiesParameter)) {
        String copiesHtml = httpGet(opac_url + "/" + copiesParameter, ENCODING);
        Document copiesDoc = Jsoup.parse(copiesHtml);
        List<String> table_keys = new ArrayList<>();
        for (Element th : copiesDoc.select(".data tr th")) {
            if (th.text().contains("Zweigstelle")) {
                table_keys.add("branch");
            } else if (th.text().contains("Status")) {
                table_keys.add("status");
            } else if (th.text().contains("Signatur")) {
                table_keys.add("signature");
            } else {
                table_keys.add(null);
            }
        }
        for (Element tr : copiesDoc.select(".data tr:has(td)")) {
            Copy copy = new Copy();
            int i = 0;
            for (Element td : tr.select("td")) {
                if (table_keys.get(i) != null) {
                    copy.set(table_keys.get(i), td.text().trim());
                }
                i++;
            }
            result.addCopy(copy);
        }
    }
    // Reservation Info, only works if the code above could find a URL
    if (!"".equals(copiesParameter)) {
        String reservationParameter = copiesParameter.replace("showHoldings", "showDocument");
        try {
            String reservationHtml = httpGet(opac_url + "/" + reservationParameter, ENCODING);
            Document reservationDoc = Jsoup.parse(reservationHtml);
            reservationDoc.setBaseUri(opac_url);
            if (reservationDoc.select("a[href*=requestItem.do]").size() == 1) {
                result.setReservable(true);
                result.setReservation_info(reservationDoc.select("a").first().attr("abs:href"));
            }
        } catch (Exception e) {
            e.printStackTrace();
        // fail silently
        }
    }
    try {
        Element isvolume = null;
        Map<String, String> volume = new HashMap<>();
        Elements links = doc.select(".data td a");
        int elcount = links.size();
        for (int eli = 0; eli < elcount; eli++) {
            List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(links.get(eli).attr("href")), "UTF-8");
            for (NameValuePair nv : anyurl) {
                if (nv.getName().equals("methodToCall") && nv.getValue().equals("volumeSearch")) {
                    isvolume = links.get(eli);
                } else if (nv.getName().equals("catKey")) {
                    volume.put("catKey", nv.getValue());
                } else if (nv.getName().equals("dbIdentifier")) {
                    volume.put("dbIdentifier", nv.getValue());
                }
            }
            if (isvolume != null) {
                volume.put("volume", "true");
                result.setVolumesearch(volume);
                break;
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return result;
}
Also used : NameValuePair(org.apache.http.NameValuePair) BasicNameValuePair(org.apache.http.message.BasicNameValuePair) HashMap(java.util.HashMap) Element(org.jsoup.nodes.Element) Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) ArrayList(java.util.ArrayList) JSONException(org.json.JSONException) TextNode(org.jsoup.nodes.TextNode) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) URI(java.net.URI) URISyntaxException(java.net.URISyntaxException) JSONException(org.json.JSONException) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) JSONObject(org.json.JSONObject) Copy(de.geeksfactory.opacclient.objects.Copy) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) Detail(de.geeksfactory.opacclient.objects.Detail)

Example 35 with TextNode

use of org.jsoup.nodes.TextNode in project opacclient by opacapp.

the class Zones method parse_result.

private DetailedItem parse_result(String id, String html) {
    Document doc = Jsoup.parse(html);
    DetailedItem result = new DetailedItem();
    result.setTitle("");
    boolean title_is_set = false;
    result.setId(id);
    String detailTrsQuery = version18 ? ".inRoundBox1 table table tr" : ".DetailDataCell table table:not(.inRecordHeader) tr";
    Elements detailtrs1 = doc.select(detailTrsQuery);
    for (int i = 0; i < detailtrs1.size(); i++) {
        Element tr = detailtrs1.get(i);
        int s = tr.children().size();
        if (tr.child(0).text().trim().equals("Titel") && !title_is_set) {
            result.setTitle(tr.child(s - 1).text().trim());
            title_is_set = true;
        } else if (s > 1) {
            Element valchild = tr.child(s - 1);
            if (valchild.select("table").isEmpty()) {
                String val = valchild.text().trim();
                if (val.length() > 0) {
                    result.addDetail(new Detail(tr.child(0).text().trim(), val));
                }
            }
        }
    }
    for (Element a : doc.select("a.SummaryActionLink")) {
        if (a.text().contains("Vormerken")) {
            result.setReservable(true);
            result.setReservation_info(a.attr("href"));
        }
    }
    Elements detaildiv = doc.select("div.record-item-new");
    if (!detaildiv.isEmpty()) {
        for (int i = 0; i < detaildiv.size(); i++) {
            Element dd = detaildiv.get(i);
            String text = "";
            for (Node node : dd.childNodes()) {
                if (node instanceof TextNode) {
                    String snip = ((TextNode) node).text();
                    if (snip.length() > 0) {
                        text += snip;
                    }
                } else if (node instanceof Element) {
                    if (((Element) node).tagName().equals("br")) {
                        text += "\n";
                    } else {
                        String snip = ((Element) node).text().trim();
                        if (snip.length() > 0) {
                            text += snip;
                        }
                    }
                }
            }
            result.addDetail(new Detail("", text));
        }
    }
    if (doc.select("span.z3988").size() > 0) {
        // Sometimes there is a <span class="Z3988"> item which provides
        // data in a standardized format.
        String z3988data = doc.select("span.z3988").first().attr("title").trim();
        for (String pair : z3988data.split("&")) {
            String[] nv = pair.split("=", 2);
            if (nv.length == 2) {
                if (!nv[1].trim().equals("")) {
                    if (nv[0].equals("rft.btitle") && result.getTitle().length() == 0) {
                        result.setTitle(nv[1]);
                    } else if (nv[0].equals("rft.atitle") && result.getTitle().length() == 0) {
                        result.setTitle(nv[1]);
                    } else if (nv[0].equals("rft.au")) {
                        result.addDetail(new Detail("Author", nv[1]));
                    }
                }
            }
        }
    }
    // Cover
    if (doc.select(".BookCover, .LargeBookCover").size() > 0) {
        result.setCover(doc.select(".BookCover, .LargeBookCover").first().attr("src"));
    }
    Elements copydivs = doc.select("div[id^=stock_]");
    String pop = "";
    for (int i = 0; i < copydivs.size(); i++) {
        Element div = copydivs.get(i);
        if (div.attr("id").startsWith("stock_head")) {
            pop = div.text().trim();
            continue;
        }
        Copy copy = new Copy();
        DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
        // This is getting very ugly - check if it is valid for libraries which are not Hamburg.
        // Seems to also work in Kiel (Zones 1.8, checked 10.10.2015)
        int j = 0;
        for (Node node : div.childNodes()) {
            try {
                if (node instanceof Element) {
                    if (((Element) node).tag().getName().equals("br")) {
                        copy.setBranch(pop);
                        result.addCopy(copy);
                        j = -1;
                    } else if (((Element) node).tag().getName().equals("b") && j == 1) {
                        copy.setLocation(((Element) node).text());
                    } else if (((Element) node).tag().getName().equals("b") && j > 1) {
                        copy.setStatus(((Element) node).text());
                    }
                    j++;
                } else if (node instanceof TextNode) {
                    if (j == 0) {
                        copy.setDepartment(((TextNode) node).text());
                    }
                    if (j == 2) {
                        copy.setBarcode(((TextNode) node).getWholeText().trim().split("\n")[0].trim());
                    }
                    if (j == 6) {
                        String text = ((TextNode) node).text().trim();
                        String date = text.substring(text.length() - 10);
                        try {
                            copy.setReturnDate(fmt.parseLocalDate(date));
                        } catch (IllegalArgumentException e) {
                            e.printStackTrace();
                        }
                    }
                    j++;
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    return result;
}
Also used : Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode) Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) JSONException(org.json.JSONException) IOException(java.io.IOException) NotReachableException(de.geeksfactory.opacclient.networking.NotReachableException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) Copy(de.geeksfactory.opacclient.objects.Copy) DetailedItem(de.geeksfactory.opacclient.objects.DetailedItem) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) Detail(de.geeksfactory.opacclient.objects.Detail)

Aggregations

TextNode (org.jsoup.nodes.TextNode)52 Element (org.jsoup.nodes.Element)41 Node (org.jsoup.nodes.Node)37 Document (org.jsoup.nodes.Document)19 ArrayList (java.util.ArrayList)16 Elements (org.jsoup.select.Elements)14 IOException (java.io.IOException)6 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)6 JSONException (org.json.JSONException)6 Copy (de.geeksfactory.opacclient.objects.Copy)5 DetailedItem (de.geeksfactory.opacclient.objects.DetailedItem)5 HashMap (java.util.HashMap)5 NameValuePair (org.apache.http.NameValuePair)5 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)5 Test (org.junit.jupiter.api.Test)5 NotReachableException (de.geeksfactory.opacclient.networking.NotReachableException)4 Detail (de.geeksfactory.opacclient.objects.Detail)4 UnsupportedEncodingException (java.io.UnsupportedEncodingException)4 URI (java.net.URI)4 Matcher (java.util.regex.Matcher)4