use of org.jsoup.nodes.TextNode in project opacclient by opacapp.
the class PicaOld method parseResList.
static void parseResList(List<ReservedItem> media, Document doc, StringProvider stringProvider) throws OpacErrorException {
Elements copytrs = doc.select("table[summary^=list] > tbody > tr[valign=top]");
int trs = copytrs.size();
if (trs < 1) {
throw new OpacErrorException(stringProvider.getString(StringProvider.COULD_NOT_LOAD_ACCOUNT));
}
assert (trs > 0);
for (Element tr : copytrs) {
ReservedItem item = new ReservedItem();
if (tr.select("table[summary=title data]").size() > 0) {
// Check if there is a checkbox to cancel this item
if (tr.select("input").size() > 0) {
item.setCancelData(tr.select("input").attr("value"));
}
Elements datatrs = tr.select("table[summary=title data] tr");
item.setTitle(datatrs.get(0).text());
List<TextNode> textNodes = datatrs.get(1).select("td").first().textNodes();
List<TextNode> nodes = new ArrayList<>();
Elements titles = datatrs.get(1).select("span.label-small");
for (TextNode node : textNodes) {
if (!node.text().equals(" ")) {
nodes.add(node);
}
}
assert (nodes.size() == titles.size());
for (int j = 0; j < nodes.size(); j++) {
String title = titles.get(j).text();
String value = nodes.get(j).text().trim().replace(";", "");
// noinspection StatementWithEmptyBody
if (title.contains("Signatur") || title.contains("shelf mark") || title.contains("signatuur")) {
// not supported
} else // noinspection StatementWithEmptyBody
if (title.contains("Vormerkdatum")) {
// not supported
}
}
} else {
// like in Kiel
item.setTitle(tr.child(5).text().trim());
item.setStatus(tr.child(17).text().trim());
item.setCancelData(tr.child(1).select("input").attr("value"));
}
media.add(item);
}
assert (media.size() == trs);
}
use of org.jsoup.nodes.TextNode in project opacclient by opacapp.
the class SISIS method parse_search.
public SearchRequestResult parse_search(String html, int page) throws OpacErrorException, SingleResultFound {
Document doc = Jsoup.parse(html);
doc.setBaseUri(opac_url + "/searchfoo");
if (doc.select(".error").size() > 0) {
throw new OpacErrorException(doc.select(".error").text().trim());
} else if (doc.select(".nohits").size() > 0) {
throw new OpacErrorException(doc.select(".nohits").text().trim());
} else if (doc.select(".box-header h2, #nohits").text().contains("keine Treffer")) {
return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
}
int results_total = -1;
String resultnumstr = doc.select(".box-header h2").first().text();
if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) {
throw new SingleResultFound();
} else if (resultnumstr.contains("(")) {
results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
} else if (resultnumstr.contains(": ")) {
results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
}
Elements table = doc.select("table.data tbody tr");
identifier = null;
Elements links = doc.select("table.data a");
boolean haslink = false;
for (int i = 0; i < links.size(); i++) {
Element node = links.get(i);
if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) {
haslink = true;
try {
List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(node.attr("href").replace(" ", "%20").replace("&", "&")), ENCODING);
for (NameValuePair nv : anyurl) {
if (nv.getName().equals("identifier")) {
identifier = nv.getValue();
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
List<SearchResult> results = new ArrayList<>();
for (int i = 0; i < table.size(); i++) {
Element tr = table.get(i);
SearchResult sr = new SearchResult();
if (tr.select("td img[title]").size() > 0) {
String title = tr.select("td img").get(0).attr("title");
String[] fparts = tr.select("td img").get(0).attr("src").split("/");
String fname = fparts[fparts.length - 1];
MediaType default_by_fname = defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", ""));
MediaType default_by_title = defaulttypes.get(title);
MediaType default_name = default_by_title != null ? default_by_title : default_by_fname;
if (data.has("mediatypes")) {
try {
sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
} catch (JSONException | IllegalArgumentException e) {
sr.setType(default_name);
}
} else {
sr.setType(default_name);
}
}
String alltext = tr.text();
if (alltext.contains("eAudio") || alltext.contains("eMusic")) {
sr.setType(MediaType.MP3);
} else if (alltext.contains("eVideo")) {
sr.setType(MediaType.EVIDEO);
} else if (alltext.contains("eBook")) {
sr.setType(MediaType.EBOOK);
} else if (alltext.contains("Munzinger")) {
sr.setType(MediaType.EDOC);
}
if (tr.children().size() > 3 && tr.child(3).select("img[title*=cover]").size() == 1) {
sr.setCover(tr.child(3).select("img[title*=cover]").attr("abs:src"));
if (sr.getCover().contains("showCover.do")) {
downloadCover(sr);
}
}
Element middlething;
if (tr.children().size() > 2 && tr.child(2).select("a").size() > 0) {
middlething = tr.child(2);
} else {
middlething = tr.child(1);
}
List<Node> children = middlething.childNodes();
if (middlething.select("div").not("#hlrightblock,.bestellfunktionen").size() == 1) {
Element indiv = middlething.select("div").not("#hlrightblock,.bestellfunktionen").first();
if (indiv.select("a").size() > 0 && indiv.children().size() > 1) {
children = indiv.childNodes();
}
} else if (middlething.select("span.titleData").size() == 1) {
children = middlething.select("span.titleData").first().childNodes();
}
int childrennum = children.size();
List<String[]> strings = new ArrayList<>();
for (int ch = 0; ch < childrennum; ch++) {
Node node = children.get(ch);
if (node instanceof TextNode) {
String text = ((TextNode) node).text().trim();
if (text.length() > 3) {
strings.add(new String[] { "text", "", text });
}
} else if (node instanceof Element) {
List<Node> subchildren = node.childNodes();
for (int j = 0; j < subchildren.size(); j++) {
Node subnode = subchildren.get(j);
if (subnode instanceof TextNode) {
String text = ((TextNode) subnode).text().trim();
if (text.length() > 3) {
strings.add(new String[] { ((Element) node).tag().getName(), "text", text, ((Element) node).className(), node.attr("style") });
}
} else if (subnode instanceof Element) {
String text = ((Element) subnode).text().trim();
if (text.length() > 3) {
strings.add(new String[] { ((Element) node).tag().getName(), ((Element) subnode).tag().getName(), text, ((Element) node).className(), node.attr("style") });
}
}
}
}
}
StringBuilder description = null;
if (tr.select("span.Z3988").size() == 1) {
// Sometimes there is a <span class="Z3988"> item which provides
// data in a standardized format.
List<NameValuePair> z3988data;
boolean hastitle = false;
try {
description = new StringBuilder();
z3988data = URLEncodedUtils.parse(new URI("http://dummy/?" + tr.select("span.Z3988").attr("title")), "UTF-8");
for (NameValuePair nv : z3988data) {
if (nv.getValue() != null) {
if (!nv.getValue().trim().equals("")) {
if (nv.getName().equals("rft.btitle") && !hastitle) {
description.append("<b>").append(nv.getValue()).append("</b>");
hastitle = true;
} else if (nv.getName().equals("rft.atitle") && !hastitle) {
description.append("<b>").append(nv.getValue()).append("</b>");
hastitle = true;
} else if (nv.getName().equals("rft.au")) {
description.append("<br />").append(nv.getValue());
} else if (nv.getName().equals("rft.date")) {
description.append("<br />").append(nv.getValue());
}
}
}
}
} catch (URISyntaxException e) {
description = null;
}
}
boolean described = false;
if (description != null && description.length() > 0) {
sr.setInnerhtml(description.toString());
described = true;
} else {
description = new StringBuilder();
}
int k = 0;
boolean yearfound = false;
boolean titlefound = false;
boolean sigfound = false;
for (String[] part : strings) {
if (!described) {
if (part[0].equals("a") && (k == 0 || !titlefound)) {
if (k != 0) {
description.append("<br />");
}
description.append("<b>").append(part[2]).append("</b>");
titlefound = true;
} else if (part[2].matches("\\D*[0-9]{4}\\D*") && part[2].length() <= 10) {
yearfound = true;
if (k != 0) {
description.append("<br />");
}
description.append(part[2]);
} else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
if (k != 0) {
description.append("<br />");
}
description.append(part[2]);
} else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
if (k != 0) {
description.append("<br />");
}
description.append(part[2]);
} else if (k == 1 && !yearfound) {
description.append("<br />");
description.append(part[2]);
} else if (k > 1 && k < 4 && !sigfound && part[0].equals("text") && part[2].matches("^[A-Za-z0-9,\\- ]+$")) {
description.append("<br />");
description.append(part[2]);
}
}
if (part.length == 4) {
if (part[0].equals("span") && part[3].equals("textgruen")) {
sr.setStatus(SearchResult.Status.GREEN);
} else if (part[0].equals("span") && part[3].equals("textrot")) {
sr.setStatus(SearchResult.Status.RED);
}
} else if (part.length == 5) {
if (part[4].contains("purple")) {
sr.setStatus(SearchResult.Status.YELLOW);
}
}
if (sr.getStatus() == null) {
if ((part[2].contains("entliehen") && part[2].startsWith("Vormerkung ist leider nicht möglich")) || part[2].contains("Alle Exemplare des gewählten Titels sind entliehen") || part[2].contains("nur in anderer Zweigstelle ausleihbar und nicht bestellbar")) {
sr.setStatus(SearchResult.Status.RED);
} else if (part[2].startsWith("entliehen") || part[2].contains("Ein Exemplar finden Sie in einer anderen Zweigstelle")) {
sr.setStatus(SearchResult.Status.YELLOW);
} else if ((part[2].startsWith("bestellbar") && !part[2].contains("nicht bestellbar")) || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar")) || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar")) || (part[2].startsWith("vormerkbar") && !part[2].contains("nicht vormerkbar")) || (part[2].contains("heute zurückgebucht")) || (part[2].contains("ausleihbar") && !part[2].contains("nicht ausleihbar"))) {
sr.setStatus(SearchResult.Status.GREEN);
}
if (sr.getType() != null) {
if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO) || sr.getType().equals(MediaType.MP3)) // Especially Onleihe.de ebooks are often marked
// green though they are not available.
{
sr.setStatus(SearchResult.Status.UNKNOWN);
}
}
}
k++;
}
if (!described) {
sr.setInnerhtml(description.toString());
}
sr.setNr(10 * (page - 1) + i);
sr.setId(null);
results.add(sr);
}
resultcount = results.size();
return new SearchRequestResult(results, results_total, page);
}
use of org.jsoup.nodes.TextNode in project opacclient by opacapp.
the class SISIS method prolongAll.
@Override
public ProlongAllResult prolongAll(Account account, int useraction, String selection) throws IOException {
if (!initialised) {
start();
}
if (System.currentTimeMillis() - logged_in > SESSION_LIFETIME || logged_in_as == null) {
try {
account(account);
} catch (JSONException e) {
e.printStackTrace();
return new ProlongAllResult(MultiStepResult.Status.ERROR);
} catch (OpacErrorException e) {
return new ProlongAllResult(MultiStepResult.Status.ERROR, e.getMessage());
}
} else if (logged_in_as.getId() != account.getId()) {
try {
account(account);
} catch (JSONException e) {
e.printStackTrace();
return new ProlongAllResult(MultiStepResult.Status.ERROR);
} catch (OpacErrorException e) {
return new ProlongAllResult(MultiStepResult.Status.ERROR, e.getMessage());
}
}
// We have to call the page we originally found the link on first...
String html = httpGet(opac_url + "/userAccount.do?methodToCall=renewalPossible&renewal=account", ENCODING);
Document doc = Jsoup.parse(html);
if (doc.select("table.data").size() > 0) {
List<Map<String, String>> result = new ArrayList<>();
for (Element td : doc.select("table.data tr td")) {
Map<String, String> line = new HashMap<>();
if (!td.text().contains("Titel") || !td.text().contains("Status")) {
continue;
}
String nextNodeIs = "";
for (Node n : td.childNodes()) {
String text;
if (n instanceof Element) {
text = ((Element) n).text();
} else if (n instanceof TextNode) {
text = ((TextNode) n).text();
} else {
continue;
}
if (text.trim().length() == 0) {
continue;
}
if (text.contains("Titel:")) {
nextNodeIs = ProlongAllResult.KEY_LINE_TITLE;
} else if (text.contains("Verfasser:")) {
nextNodeIs = ProlongAllResult.KEY_LINE_AUTHOR;
} else if (text.contains("Leihfristende:")) {
nextNodeIs = ProlongAllResult.KEY_LINE_NEW_RETURNDATE;
} else if (text.contains("Status:")) {
nextNodeIs = ProlongAllResult.KEY_LINE_MESSAGE;
} else if (text.contains("Mediennummer:") || text.contains("Signatur:")) {
nextNodeIs = "";
} else if (nextNodeIs.length() > 0) {
line.put(nextNodeIs, text.trim());
nextNodeIs = "";
}
}
result.add(line);
}
return new ProlongAllResult(MultiStepResult.Status.OK, result);
}
return new ProlongAllResult(MultiStepResult.Status.ERROR, stringProvider.getString(StringProvider.COULD_NOT_LOAD_ACCOUNT));
}
use of org.jsoup.nodes.TextNode in project opacclient by opacapp.
the class TouchPoint method parse_result.
protected DetailedItem parse_result(String html) throws IOException {
Document doc = Jsoup.parse(html);
doc.setBaseUri(opac_url);
DetailedItem result = new DetailedItem();
result.setCover(findCoverUrl(doc, false));
if (doc.select("#permalink-link").size() > 0) {
String href = doc.select("#permalink-link").first().attr("href");
JSONObject id = new JSONObject();
try {
id.put("url", href);
result.setId(id.toString());
} catch (JSONException e) {
e.printStackTrace();
}
}
for (Element tr : doc.select(".titleinfo tr")) {
// Sometimes there is one th and one td, sometimes two tds
String detailName = tr.select("th, td").first().text().trim();
if (detailName.endsWith(":")) {
detailName = detailName.substring(0, detailName.length() - 1);
}
String detailValue = tr.select("td").last().text().trim();
result.addDetail(new Detail(detailName, detailValue));
if (detailName.contains("ID in diesem Katalog") && result.getId() == null) {
result.setId(detailValue);
}
if (detailName.equals("Titel")) {
result.setTitle(detailValue);
}
}
if (result.getDetails().size() == 0 && doc.select("#details").size() > 0) {
// e.g. Bayreuth_Uni
String dname = "";
String dval = "";
boolean in_value = true;
for (Node n : doc.select("#details").first().childNodes()) {
if (n instanceof Element && ((Element) n).tagName().equals("strong")) {
if (in_value) {
if (dname.length() > 0 && dval.length() > 0) {
result.addDetail(new Detail(dname, dval));
if (dname.equals("Titel")) {
result.setTitle(dval);
}
}
dname = ((Element) n).text();
in_value = false;
} else {
dname += ((Element) n).text();
}
} else {
String t = null;
if (n instanceof TextNode) {
t = ((TextNode) n).text();
} else if (n instanceof Element) {
t = ((Element) n).text();
}
if (t != null) {
if (in_value) {
dval += t;
} else {
in_value = true;
dval = t;
}
}
}
}
}
if (result.getTitle() == null) {
result.setTitle(doc.select("h1").first().text());
}
// Copies
String copiesParameter = doc.select("div[id^=ajax_holdings_url").attr("ajaxParameter").replace("&", "");
if (!"".equals(copiesParameter)) {
String copiesHtml = httpGet(opac_url + "/" + copiesParameter, ENCODING);
Document copiesDoc = Jsoup.parse(copiesHtml);
List<String> table_keys = new ArrayList<>();
for (Element th : copiesDoc.select(".data tr th")) {
if (th.text().contains("Zweigstelle")) {
table_keys.add("branch");
} else if (th.text().contains("Status")) {
table_keys.add("status");
} else if (th.text().contains("Signatur")) {
table_keys.add("signature");
} else {
table_keys.add(null);
}
}
for (Element tr : copiesDoc.select(".data tr:has(td)")) {
Copy copy = new Copy();
int i = 0;
for (Element td : tr.select("td")) {
if (table_keys.get(i) != null) {
copy.set(table_keys.get(i), td.text().trim());
}
i++;
}
result.addCopy(copy);
}
}
// Reservation Info, only works if the code above could find a URL
if (!"".equals(copiesParameter)) {
String reservationParameter = copiesParameter.replace("showHoldings", "showDocument");
try {
String reservationHtml = httpGet(opac_url + "/" + reservationParameter, ENCODING);
Document reservationDoc = Jsoup.parse(reservationHtml);
reservationDoc.setBaseUri(opac_url);
if (reservationDoc.select("a[href*=requestItem.do]").size() == 1) {
result.setReservable(true);
result.setReservation_info(reservationDoc.select("a").first().attr("abs:href"));
}
} catch (Exception e) {
e.printStackTrace();
// fail silently
}
}
try {
Element isvolume = null;
Map<String, String> volume = new HashMap<>();
Elements links = doc.select(".data td a");
int elcount = links.size();
for (int eli = 0; eli < elcount; eli++) {
List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(links.get(eli).attr("href")), "UTF-8");
for (NameValuePair nv : anyurl) {
if (nv.getName().equals("methodToCall") && nv.getValue().equals("volumeSearch")) {
isvolume = links.get(eli);
} else if (nv.getName().equals("catKey")) {
volume.put("catKey", nv.getValue());
} else if (nv.getName().equals("dbIdentifier")) {
volume.put("dbIdentifier", nv.getValue());
}
}
if (isvolume != null) {
volume.put("volume", "true");
result.setVolumesearch(volume);
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
use of org.jsoup.nodes.TextNode in project opacclient by opacapp.
the class Zones method parse_result.
private DetailedItem parse_result(String id, String html) {
Document doc = Jsoup.parse(html);
DetailedItem result = new DetailedItem();
result.setTitle("");
boolean title_is_set = false;
result.setId(id);
String detailTrsQuery = version18 ? ".inRoundBox1 table table tr" : ".DetailDataCell table table:not(.inRecordHeader) tr";
Elements detailtrs1 = doc.select(detailTrsQuery);
for (int i = 0; i < detailtrs1.size(); i++) {
Element tr = detailtrs1.get(i);
int s = tr.children().size();
if (tr.child(0).text().trim().equals("Titel") && !title_is_set) {
result.setTitle(tr.child(s - 1).text().trim());
title_is_set = true;
} else if (s > 1) {
Element valchild = tr.child(s - 1);
if (valchild.select("table").isEmpty()) {
String val = valchild.text().trim();
if (val.length() > 0) {
result.addDetail(new Detail(tr.child(0).text().trim(), val));
}
}
}
}
for (Element a : doc.select("a.SummaryActionLink")) {
if (a.text().contains("Vormerken")) {
result.setReservable(true);
result.setReservation_info(a.attr("href"));
}
}
Elements detaildiv = doc.select("div.record-item-new");
if (!detaildiv.isEmpty()) {
for (int i = 0; i < detaildiv.size(); i++) {
Element dd = detaildiv.get(i);
String text = "";
for (Node node : dd.childNodes()) {
if (node instanceof TextNode) {
String snip = ((TextNode) node).text();
if (snip.length() > 0) {
text += snip;
}
} else if (node instanceof Element) {
if (((Element) node).tagName().equals("br")) {
text += "\n";
} else {
String snip = ((Element) node).text().trim();
if (snip.length() > 0) {
text += snip;
}
}
}
}
result.addDetail(new Detail("", text));
}
}
if (doc.select("span.z3988").size() > 0) {
// Sometimes there is a <span class="Z3988"> item which provides
// data in a standardized format.
String z3988data = doc.select("span.z3988").first().attr("title").trim();
for (String pair : z3988data.split("&")) {
String[] nv = pair.split("=", 2);
if (nv.length == 2) {
if (!nv[1].trim().equals("")) {
if (nv[0].equals("rft.btitle") && result.getTitle().length() == 0) {
result.setTitle(nv[1]);
} else if (nv[0].equals("rft.atitle") && result.getTitle().length() == 0) {
result.setTitle(nv[1]);
} else if (nv[0].equals("rft.au")) {
result.addDetail(new Detail("Author", nv[1]));
}
}
}
}
}
// Cover
if (doc.select(".BookCover, .LargeBookCover").size() > 0) {
result.setCover(doc.select(".BookCover, .LargeBookCover").first().attr("src"));
}
Elements copydivs = doc.select("div[id^=stock_]");
String pop = "";
for (int i = 0; i < copydivs.size(); i++) {
Element div = copydivs.get(i);
if (div.attr("id").startsWith("stock_head")) {
pop = div.text().trim();
continue;
}
Copy copy = new Copy();
DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
// This is getting very ugly - check if it is valid for libraries which are not Hamburg.
// Seems to also work in Kiel (Zones 1.8, checked 10.10.2015)
int j = 0;
for (Node node : div.childNodes()) {
try {
if (node instanceof Element) {
if (((Element) node).tag().getName().equals("br")) {
copy.setBranch(pop);
result.addCopy(copy);
j = -1;
} else if (((Element) node).tag().getName().equals("b") && j == 1) {
copy.setLocation(((Element) node).text());
} else if (((Element) node).tag().getName().equals("b") && j > 1) {
copy.setStatus(((Element) node).text());
}
j++;
} else if (node instanceof TextNode) {
if (j == 0) {
copy.setDepartment(((TextNode) node).text());
}
if (j == 2) {
copy.setBarcode(((TextNode) node).getWholeText().trim().split("\n")[0].trim());
}
if (j == 6) {
String text = ((TextNode) node).text().trim();
String date = text.substring(text.length() - 10);
try {
copy.setReturnDate(fmt.parseLocalDate(date));
} catch (IllegalArgumentException e) {
e.printStackTrace();
}
}
j++;
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
return result;
}
Aggregations