use of org.jsoup.nodes.TextNode in project opacclient by opacapp.
the class Heidi method parse_reservations.
protected List<ReservedItem> parse_reservations(String html) {
Document doc = Jsoup.parse(html);
List<ReservedItem> reservations = new ArrayList<>();
DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
for (Element tr : doc.select("table.kontopos tr")) {
ReservedItem item = new ReservedItem();
Element desc = tr.child(1).select("label").first();
Element pos = tr.child(3);
if (tr.child(1).select("a").size() > 0) {
String kk = getQueryParamsFirst(tr.child(1).select("a").first().absUrl("href")).get("katkey");
item.setId(kk);
}
if (tr.child(0).select("input").size() > 0) {
item.setCancelData(tr.child(0).select("input").first().val());
}
int i = 0;
for (Node node : desc.childNodes()) {
if (node instanceof TextNode) {
String text = ((TextNode) node).text().trim();
if (i == 0) {
item.setAuthor(text);
} else if (i == 1) {
item.setTitle(text);
}
i++;
}
}
i = 0;
for (Node node : pos.childNodes()) {
if (node instanceof TextNode) {
String text = ((TextNode) node).text().trim();
if (i == 0 && text.contains("")) {
try {
item.setReadyDate(fmt.parseLocalDate(text));
} catch (IllegalArgumentException e) {
item.setStatus(text);
}
} else if (i == 1) {
item.setBranch(text);
}
i++;
}
}
reservations.add(item);
}
return reservations;
}
use of org.jsoup.nodes.TextNode in project opacclient by opacapp.
the class Heidi method account.
@Override
public AccountData account(Account account) throws IOException, JSONException, OpacErrorException {
login(account);
String html;
Document doc;
AccountData adata = new AccountData(account.getId());
DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
html = httpGet(opac_url + "/konto.cgi?sess=" + sessid, getDefaultEncoding());
doc = Jsoup.parse(html);
doc.setBaseUri(opac_url + "/");
for (Element td : doc.select("table.konto td")) {
if (td.text().contains("Offene")) {
String text = td.text().trim().replaceAll("Offene[^0-9]+Geb.+hren:[^0-9]+([0-9.," + "]+)[^0-9€A-Z]*(€|EUR|CHF|Fr.)", "$1 $2");
adata.setPendingFees(text);
}
}
List<LentItem> lent = new ArrayList<>();
for (Element tr : doc.select("table.kontopos tr")) {
LentItem item = new LentItem();
Element desc = tr.child(1).select("label").first();
String dates = tr.child(2).text().trim();
if (tr.child(1).select("a").size() > 0) {
String kk = getQueryParamsFirst(tr.child(1).select("a").first().absUrl("href")).get("katkey");
item.setId(kk);
}
int i = 0;
for (Node node : desc.childNodes()) {
if (node instanceof TextNode) {
String text = ((TextNode) node).text().trim();
if (i == 0) {
item.setAuthor(text);
} else if (i == 1) {
item.setTitle(text);
} else if (text.contains("Mediennummer")) {
item.setBarcode(text.replace("Mediennummer: ", ""));
}
i++;
}
}
if (tr.child(0).select("input").size() == 1) {
item.setProlongData(tr.child(0).select("input").first().val());
item.setRenewable(true);
} else {
item.setProlongData("§" + tr.child(0).select("span").first().attr("class"));
item.setRenewable(false);
}
String todate = dates;
if (todate.contains("-")) {
String[] datesplit = todate.split("-");
todate = datesplit[1].trim();
}
try {
item.setDeadline(fmt.parseLocalDate(todate.substring(0, 10)));
} catch (IllegalArgumentException e) {
e.printStackTrace();
}
lent.add(item);
}
adata.setLent(lent);
List<ReservedItem> reservations = new ArrayList<>();
html = httpGet(opac_url + "/konto.cgi?konto=v&sess=" + sessid, getDefaultEncoding());
reservations.addAll(parse_reservations(html));
html = httpGet(opac_url + "/konto.cgi?konto=b&sess=" + sessid, getDefaultEncoding());
reservations.addAll(parse_reservations(html));
adata.setReservations(reservations);
return adata;
}
use of org.jsoup.nodes.TextNode in project opacclient by opacapp.
the class VuFind method parseCopies.
static void parseCopies(DetailedItem res, Document doc, JSONObject data) throws JSONException {
if ("doublestacked".equals(data.optString("copystyle"))) {
// e.g. http://vopac.nlg.gr/Record/393668/Holdings#tabnav
// for Athens_GreekNationalLibrary
Element container = doc.select(".tab-container").first();
String branch = "";
for (Element child : container.children()) {
if (child.tagName().equals("h5")) {
branch = child.text();
} else if (child.tagName().equals("table")) {
int i = 0;
String callNumber = "";
for (Element row : child.select("tr")) {
if (i == 0) {
callNumber = row.child(1).text();
} else {
Copy copy = new Copy();
copy.setBranch(branch);
copy.setShelfmark(callNumber);
copy.setBarcode(row.child(0).text());
copy.setStatus(row.child(1).text());
res.addCopy(copy);
}
i++;
}
}
}
} else if ("stackedtable".equals(data.optString("copystyle"))) {
// e.g. http://search.lib.auth.gr/Record/376356
// or https://katalog.ub.uni-leipzig.de/Record/0000196115
// or https://www.stadt-muenster.de/opac2/Record/0367968
Element container = doc.select(".recordsubcontent, .tab-container").first();
// .tab-container is used in Muenster.
String branch = "";
JSONObject copytable = data.getJSONObject("copytable");
for (Element child : container.children()) {
if (child.tagName().equals("div")) {
child = child.child(0);
}
if (child.tagName().equals("h3")) {
branch = child.text();
} else if (child.tagName().equals("table")) {
if (child.select("caption").size() > 0) {
// Leipzig_Uni
branch = child.select("caption").first().ownText();
}
int i = 0;
String callNumber = null;
if ("headrow".equals(copytable.optString("signature"))) {
callNumber = child.select("tr").get(0).child(1).text();
}
for (Element row : child.select("tr")) {
if (i < copytable.optInt("_offset", 0)) {
i++;
continue;
}
Copy copy = new Copy();
if (callNumber != null) {
copy.setShelfmark(callNumber);
}
copy.setBranch(branch);
Iterator<?> keys = copytable.keys();
while (keys.hasNext()) {
String key = (String) keys.next();
if (key.startsWith("_"))
continue;
if (copytable.optString(key, "").contains("/")) {
// Leipzig_Uni
String[] splitted = copytable.getString(key).split("/");
int col = Integer.parseInt(splitted[0]);
int line = Integer.parseInt(splitted[1]);
int j = 0;
for (Node node : row.child(col).childNodes()) {
if (node instanceof Element) {
if (((Element) node).tagName().equals("br")) {
j++;
} else if (j == line) {
copy.set(key, ((Element) node).text());
}
} else if (node instanceof TextNode && j == line && !((TextNode) node).text().trim().equals("")) {
copy.set(key, ((TextNode) node).text());
}
}
} else {
// Thessaloniki_University
if (copytable.optInt(key, -1) == -1)
continue;
String value = row.child(copytable.getInt(key)).text();
copy.set(key, value);
}
}
res.addCopy(copy);
i++;
}
}
}
}
}
use of org.jsoup.nodes.TextNode in project opacclient by opacapp.
the class PicaOld method parseMediaList.
static void parseMediaList(List<LentItem> media, Document doc, StringProvider stringProvider, List<String> renewalCounts) throws OpacErrorException {
Elements copytrs = doc.select("table[summary^=list] > tbody > tr[valign=top]");
DateTimeFormatter fmt = DateTimeFormat.forPattern("dd-MM-yyyy").withLocale(Locale.GERMAN);
int trs = copytrs.size();
if (trs < 1) {
throw new OpacErrorException(stringProvider.getString(StringProvider.COULD_NOT_LOAD_ACCOUNT));
}
assert (trs > 0);
for (int i = 0; i < trs; i++) {
Element tr = copytrs.get(i);
if (tr.select("table[summary=title data]").size() > 0) {
// According to HTML code from Bug reports (Server TU Darmstadt,
// Berlin Ibero-Amerikanisches Institut)
LentItem item = new LentItem();
// Check if there is a checkbox to prolong this item
if (tr.select("input").size() > 0) {
item.setProlongData(tr.select("input").attr("value"));
} else {
item.setRenewable(false);
}
Elements datatrs = tr.select("table[summary=title data] tr");
item.setTitle(datatrs.get(0).text());
String reservations = null;
for (Element td : datatrs.get(1).select("td")) {
List<TextNode> textNodes = td.textNodes();
Elements titles = td.select("span.label-small");
List<String> values = new ArrayList<>();
if (td.select("span[name=xxxxx]").size() > 0) {
for (Element span : td.select("span[name=xxxxx]")) {
values.add(span.text());
}
} else {
for (TextNode node : textNodes) {
if (!node.text().equals(" ")) {
values.add(node.text());
}
}
}
assert (values.size() == titles.size());
for (int j = 0; j < values.size(); j++) {
String title = titles.get(j).text();
String value = values.get(j).trim().replace(";", "");
// noinspection StatementWithEmptyBody
if (title.contains("Signatur") || title.contains("shelf mark") || title.contains("signatuur")) {
// not supported
} else if (title.contains("Status") || title.contains("status") || title.contains("statut")) {
item.setStatus(value);
} else if (title.contains("Leihfristende") || title.contains("expiry date") || title.contains("vervaldatum") || title.contains("date d'expiration")) {
try {
item.setDeadline(fmt.parseLocalDate(value));
} catch (IllegalArgumentException e1) {
e1.printStackTrace();
}
} else // noinspection StatementWithEmptyBody
if (title.contains("Vormerkungen") || title.contains("reservations") || title.contains("reserveringen") || title.contains("réservations")) {
reservations = value;
}
}
}
media.add(item);
} else {
// like in Kiel
String prolongCount = "";
if (renewalCounts.size() == trs && renewalCounts.get(i) != null) {
prolongCount = renewalCounts.get(i);
}
String reminderCount = tr.child(13).text().trim();
if (reminderCount.contains(" Mahn") && reminderCount.contains("(") && reminderCount.indexOf("(") < reminderCount.indexOf(" Mahn")) {
reminderCount = reminderCount.substring(reminderCount.indexOf("(") + 1, reminderCount.indexOf(" Mahn"));
} else {
reminderCount = "";
}
LentItem item = new LentItem();
if (tr.child(4).text().trim().length() < 5 && tr.child(5).text().trim().length() > 4) {
item.setTitle(tr.child(5).text().trim());
} else {
item.setTitle(tr.child(4).text().trim());
}
String status = tr.child(13).text().trim();
if (!reminderCount.equals("0") && !reminderCount.equals("")) {
if (!status.equals(""))
status += ", ";
status += reminderCount + " " + stringProvider.getString(StringProvider.REMINDERS) + ", ";
}
if (!"".equals(prolongCount)) {
if (!status.equals(""))
status += ", ";
status += prolongCount + "x " + stringProvider.getString(StringProvider.PROLONGED_ABBR);
}
if (tr.children().size() >= 26 && !"".equals(tr.child(25).text().trim())) {
if (!status.equals(""))
status += ", ";
try {
status += stringProvider.getQuantityString(StringProvider.RESERVATIONS_NUMBER, Integer.parseInt(tr.child(25).text().trim()), Integer.parseInt(tr.child(25).text().trim()));
} catch (NumberFormatException e) {
// pass
}
}
// + tr.child(25).text().trim() + " Vormerkungen");
item.setStatus(status);
try {
item.setDeadline(fmt.parseLocalDate(tr.child(21).text().trim()));
} catch (IllegalArgumentException e) {
e.printStackTrace();
}
if (tr.child(1).select("input").size() > 0) {
// If there is no checkbox, the medium is not renewable
item.setProlongData(tr.child(1).select("input").attr("value"));
}
media.add(item);
}
}
assert (media.size() == trs);
}
use of org.jsoup.nodes.TextNode in project opacclient by opacapp.
the class SISIS method parseDetail.
static DetailedItem parseDetail(String html, String html2, String html3, String coverJs, JSONObject data, StringProvider stringProvider) throws IOException {
Document doc = Jsoup.parse(html);
String opac_url = data.optString("baseurl", "");
doc.setBaseUri(opac_url);
Document doc2 = Jsoup.parse(html2);
doc2.setBaseUri(opac_url);
Document doc3 = Jsoup.parse(html3);
doc3.setBaseUri(opac_url);
DetailedItem result = new DetailedItem();
try {
result.setId(doc.select("#bibtip_id").text().trim());
} catch (Exception ex) {
ex.printStackTrace();
}
List<String> reservationlinks = new ArrayList<>();
for (Element link : doc3.select("#vormerkung a, #tab-content a")) {
String href = link.absUrl("href");
Map<String, String> hrefq = getQueryParamsFirst(href);
if (result.getId() == null) {
// ID retrieval
String key = hrefq.get("katkey");
if (key != null) {
result.setId(key);
break;
}
}
// Vormerken
if (hrefq.get("methodToCall") != null) {
if (hrefq.get("methodToCall").equals("doVormerkung") || hrefq.get("methodToCall").equals("doBestellung")) {
reservationlinks.add(href.split("\\?")[1]);
}
}
}
if (reservationlinks.size() == 1) {
result.setReservable(true);
result.setReservation_info(reservationlinks.get(0));
} else if (reservationlinks.size() == 0) {
result.setReservable(false);
} else {
// TODO: Multiple options - handle this case!
}
if (result.getId() == null && doc.select("#permalink_link").size() > 0) {
result.setId(doc.select("#permalink_link").text());
}
if (coverJs != null) {
Pattern srcPattern = Pattern.compile("<img .* src=\"([^\"]+)\">");
Matcher matcher = srcPattern.matcher(coverJs);
if (matcher.find()) {
result.setCover(matcher.group(1));
}
} else if (doc.select(".data td img").size() == 1) {
result.setCover(doc.select(".data td img").first().attr("abs:src"));
}
if (doc.select(".aw_teaser_title").size() == 1) {
result.setTitle(doc.select(".aw_teaser_title").first().text().trim());
} else if (doc.select(".data td strong").size() > 0) {
result.setTitle(doc.select(".data td strong").first().text().trim());
} else {
result.setTitle("");
}
if (doc.select(".aw_teaser_title_zusatz").size() > 0) {
result.addDetail(new Detail("Titelzusatz", doc.select(".aw_teaser_title_zusatz").text().trim()));
}
String title = "";
String text = "";
boolean takeover = false;
Element detailtrs = doc2.select(".box-container .data td").first();
for (Node node : detailtrs.childNodes()) {
if (node instanceof Element) {
Element element = (Element) node;
if (element.tagName().equals("strong")) {
if (element.hasClass("c2")) {
if (!title.equals("")) {
result.addDetail(new Detail(title, text.trim()));
}
title = element.text().trim();
text = "";
} else {
text = text + element.text();
}
} else {
if (element.tagName().equals("a")) {
if (element.text().trim().contains("hier klicken") || title.contains("Link")) {
text = text + node.attr("href");
takeover = true;
break;
} else {
text = text + element.text();
}
}
}
} else if (node instanceof TextNode) {
text = text + ((TextNode) node).text();
}
}
if (!takeover) {
text = "";
title = "";
}
detailtrs = doc2.select("#tab-content .data td").first();
if (detailtrs != null) {
for (Node node : detailtrs.childNodes()) {
if (node instanceof Element) {
if (((Element) node).tagName().equals("strong")) {
if (!text.equals("") && !title.equals("")) {
result.addDetail(new Detail(title.trim(), text.trim()));
if (title.equals("Titel:")) {
result.setTitle(text.trim());
}
text = "";
}
title = ((Element) node).text().trim();
} else {
if (((Element) node).tagName().equals("a") && (((Element) node).text().trim().contains("hier klicken") || title.equals("Link:"))) {
text = text + node.attr("href");
} else {
text = text + ((Element) node).text();
}
}
} else if (node instanceof TextNode) {
text = text + ((TextNode) node).text();
}
}
} else {
if (doc2.select("#tab-content .fulltitle tr").size() > 0) {
Elements rows = doc2.select("#tab-content .fulltitle tr");
for (Element tr : rows) {
if (tr.children().size() == 2) {
Element valcell = tr.child(1);
String value = valcell.text().trim();
if (valcell.select("a").size() == 1) {
value = valcell.select("a").first().absUrl("href");
}
result.addDetail(new Detail(tr.child(0).text().trim(), value));
}
}
} else {
result.addDetail(new Detail(stringProvider.getString(StringProvider.ERROR), stringProvider.getString(StringProvider.COULD_NOT_LOAD_DETAIL)));
}
}
if (!text.equals("") && !title.equals("")) {
result.addDetail(new Detail(title.trim(), text.trim()));
if (title.equals("Titel:")) {
result.setTitle(text.trim());
}
}
for (Element link : doc3.select("#tab-content a")) {
Map<String, String> hrefq = getQueryParamsFirst(link.absUrl("href"));
if (result.getId() == null) {
// ID retrieval
String key = hrefq.get("katkey");
if (key != null) {
result.setId(key);
break;
}
}
}
for (Element link : doc3.select(".box-container a")) {
if (link.text().trim().equals("Download")) {
result.addDetail(new Detail(stringProvider.getString(StringProvider.DOWNLOAD), link.absUrl("href")));
}
}
if (doc3.select("#tab-content .textrot").size() > 0) {
result.addDetail(new Detail(stringProvider.getString(StringProvider.STATUS), doc3.select("#tab-content .textrot").text()));
}
Map<String, Integer> copy_columnmap = new HashMap<>();
// Default values
copy_columnmap.put("barcode", 1);
copy_columnmap.put("branch", 3);
copy_columnmap.put("status", 4);
Element table = doc.select("#tab-content .data").first();
Elements copy_columns = table != null ? table.select("tr#bg2 th") : new Elements();
for (int i = 0; i < copy_columns.size(); i++) {
Element th = copy_columns.get(i);
String head = th.text().trim();
if (head.contains("Status")) {
copy_columnmap.put("status", i);
}
if (head.contains("Zweigstelle")) {
copy_columnmap.put("branch", i);
}
if (head.contains("Mediennummer")) {
copy_columnmap.put("barcode", i);
}
if (head.contains("Standort")) {
copy_columnmap.put("location", i);
}
if (head.contains("Signatur")) {
copy_columnmap.put("signature", i);
}
}
Pattern status_lent = Pattern.compile("^(entliehen) bis ([0-9]{1,2}.[0-9]{1,2}.[0-9]{2," + "4}) \\(gesamte Vormerkungen: ([0-9]+)\\)$");
Pattern status_and_barcode = Pattern.compile("^(.*) ([0-9A-Za-z]+)$");
Elements exemplartrs = table != null ? table.select("tr").not("#bg2") : new Elements();
DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
for (Element tr : exemplartrs) {
try {
Copy copy = new Copy();
Element status = tr.child(copy_columnmap.get("status"));
Element barcode = tr.child(copy_columnmap.get("barcode"));
String barcodetext = barcode.text().trim().replace(" Wegweiser", "");
// STATUS
String statustext;
if (status.getElementsByTag("b").size() > 0) {
statustext = status.getElementsByTag("b").text().trim();
} else {
statustext = status.text().trim();
}
if (copy_columnmap.get("status").equals(copy_columnmap.get("barcode"))) {
Matcher matcher1 = status_and_barcode.matcher(statustext);
if (matcher1.matches()) {
statustext = matcher1.group(1);
barcodetext = matcher1.group(2);
}
}
Matcher matcher = status_lent.matcher(statustext);
if (matcher.matches()) {
copy.setStatus(matcher.group(1));
copy.setReservations(matcher.group(3));
copy.setReturnDate(fmt.parseLocalDate(matcher.group(2)));
} else {
copy.setStatus(statustext.trim().replace(" Wegweiser", ""));
}
copy.setBarcode(barcodetext);
if (status.select("a[href*=doVormerkung]").size() == 1) {
copy.setResInfo(status.select("a[href*=doVormerkung]").attr("href").split("\\?")[1]);
}
String branchtext = tr.child(copy_columnmap.get("branch")).text().trim().replace(" Wegweiser", "");
copy.setBranch(branchtext);
if (copy_columnmap.containsKey("location")) {
copy.setLocation(tr.child(copy_columnmap.get("location")).text().trim().replace(" Wegweiser", ""));
}
if (copy_columnmap.containsKey("signature")) {
copy.setShelfmark(tr.child(copy_columnmap.get("signature")).text().trim().replace(" Wegweiser", ""));
}
result.addCopy(copy);
} catch (Exception ex) {
ex.printStackTrace();
}
}
try {
Element isvolume = null;
Map<String, String> volume = new HashMap<>();
Elements links = doc.select(".data td a");
int elcount = links.size();
for (int eli = 0; eli < elcount; eli++) {
List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(links.get(eli).attr("href")), "UTF-8");
for (NameValuePair nv : anyurl) {
if (nv.getName().equals("methodToCall") && nv.getValue().equals("volumeSearch")) {
isvolume = links.get(eli);
} else if (nv.getName().equals("catKey")) {
volume.put("catKey", nv.getValue());
} else if (nv.getName().equals("dbIdentifier")) {
volume.put("dbIdentifier", nv.getValue());
}
}
if (isvolume != null) {
volume.put("volume", "true");
result.setVolumesearch(volume);
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
Aggregations