use of de.geeksfactory.opacclient.objects.Detail in project opacclient by opacapp.
the class SISISSearchTest method testParseDetail.
@Test
public void testParseDetail() throws OpacApi.OpacErrorException, JSONException, IOException {
String html1 = readResource("/sisis/result_detail/" + file.replace(".html", "_1.html"));
String html2 = readResource("/sisis/result_detail/" + file.replace(".html", "_2.html"));
String html3 = readResource("/sisis/result_detail/" + file.replace(".html", "_3.html"));
String coverJs = readResource("/sisis/result_detail/" + file.replace(".html", ".js"));
if (html1 == null || html2 == null || html3 == null) {
// we may not have all files for all libraries
return;
}
DetailedItem result = SISIS.parseDetail(html1, html2, html3, coverJs, new JSONObject(), new DummyStringProvider());
assertTrue(result.getCopies().size() > 0);
for (Copy copy : result.getCopies()) {
assertContainsData(copy.getStatus());
assertNullOrNotEmpty(copy.getBarcode());
assertNullOrNotEmpty(copy.getBranch());
assertNullOrNotEmpty(copy.getDepartment());
assertNullOrNotEmpty(copy.getLocation());
assertNullOrNotEmpty(copy.getReservations());
assertNullOrNotEmpty(copy.getShelfmark());
assertNullOrNotEmpty(copy.getUrl());
if (copy.getStatus().equals("Entliehen"))
assertNotNull(copy.getReturnDate());
}
for (Volume volume : result.getVolumes()) {
assertContainsData(volume.getId());
assertContainsData(volume.getTitle());
}
assertEquals(result.getTitle(), getDetailTitle(file));
if (file.equals("berlin_htw.html")) {
assertTrue(result.getDetails().contains(new Detail("Signatur:", "15/2322")));
assertNotNull(result.getCover());
}
}
use of de.geeksfactory.opacclient.objects.Detail in project opacclient by opacapp.
the class Open method parse_result.
protected DetailedItem parse_result(Document doc) {
DetailedItem item = new DetailedItem();
// Title and Subtitle
item.setTitle(doc.select("span[id$=LblShortDescriptionValue], span[id$=LblTitleValue]").text());
String subtitle = doc.select("span[id$=LblSubTitleValue]").text();
if (subtitle.equals("") && doc.select("span[id$=LblShortDescriptionValue]").size() > 0) {
// Subtitle detection for Bern
Element next = doc.select("span[id$=LblShortDescriptionValue]").first().parent().nextElementSibling();
if (next.select("span").size() == 0) {
subtitle = next.text().trim();
}
}
if (!subtitle.equals("")) {
item.addDetail(new Detail(stringProvider.getString(StringProvider.SUBTITLE), subtitle));
}
// Cover
if (doc.select("input[id$=mediumImage]").size() > 0) {
item.setCover(doc.select("input[id$=mediumImage]").attr("src"));
} else if (doc.select("img[id$=CoverView_Image]").size() > 0) {
assignBestCover(item, getCoverUrlList(doc.select("img[id$=CoverView_Image]").first()));
}
// ID
item.setId(doc.select("input[id$=regionmednr]").val());
// Description
if (doc.select("span[id$=ucCatalogueContent_LblAnnotation]").size() > 0) {
String name = doc.select("span[id$=lblCatalogueContent]").text();
String value = doc.select("span[id$=ucCatalogueContent_LblAnnotation]").text();
item.addDetail(new Detail(name, value));
}
// Parent
if (doc.select("a[id$=HyperLinkParent]").size() > 0) {
item.setCollectionId(doc.select("a[id$=HyperLinkParent]").first().attr("href"));
}
// Details
String DETAIL_SELECTOR = "div[id$=CatalogueDetailView] .spacingBottomSmall:has(span+span)," + "div[id$=CatalogueDetailView] .spacingBottomSmall:has(span+a), " + "div[id$=CatalogueDetailView] .oclc-searchmodule-detail-data div:has" + "(span+span), " + "div[id$=CatalogueDetailView] .oclc-searchmodule-detail-data div:has" + "(span+a)";
for (Element detail : doc.select(DETAIL_SELECTOR)) {
String name = detail.select("span").get(0).text().replace(": ", "");
String value = "";
if (detail.select("a").size() > 1) {
int i = 0;
for (Element a : detail.select("a")) {
if (i != 0) {
value += ", ";
}
value += a.text().trim();
i++;
}
} else {
value = detail.select("span, a").get(1).text();
if (value.contains("hier klicken") && detail.select("a").size() > 0) {
value = value + " " + detail.select("a").first().attr("href");
}
}
item.addDetail(new Detail(name, value));
}
// Description
if (doc.select("div[id$=CatalogueContent]").size() > 0) {
String name = doc.select("div[id$=CatalogueContent] .oclc-module-header").text();
String value = doc.select("div[id$=CatalogueContent] .oclc-searchmodule-detail-annotation").text();
item.addDetail(new Detail(name, value));
}
// Copies
Element table = doc.select("table[id$=grdViewMediumCopies]").first();
if (table != null) {
Elements trs = table.select("tr");
List<String> columnmap = new ArrayList<>();
for (Element th : trs.first().select("th")) {
columnmap.add(getCopyColumnKey(th.text()));
}
DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
for (int i = 1; i < trs.size(); i++) {
Elements tds = trs.get(i).select("td");
Copy copy = new Copy();
for (int j = 0; j < tds.size(); j++) {
if (columnmap.get(j) == null)
continue;
String text = tds.get(j).text().replace("\u00a0", "");
if (tds.get(j).select(".oclc-module-label").size() > 0 && tds.get(j).select("span").size() == 2) {
text = tds.get(j).select("span").get(1).text();
}
if (text.equals(""))
continue;
copy.set(columnmap.get(j), text, fmt);
}
item.addCopy(copy);
}
}
// Dependent (e.g. Verden)
if (doc.select("div[id$=DivDependentCatalogue]").size() > 0) {
String url = opac_url + "/DesktopModules/OCLC.OPEN.PL.DNN.SearchModule/SearchService.asmx/GetDependantCatalogues";
JSONObject postData = new JSONObject();
// Determine portalID value
int portalId = 1;
for (Element scripttag : doc.select("script")) {
String scr = scripttag.html();
if (scr.contains("LoadCatalogueViewDependantCataloguesAsync")) {
Pattern portalIdPattern = Pattern.compile(".*LoadCatalogueViewDependantCataloguesAsync\\([^,]*,[^,]*," + "[^,]*,[^,]*,[^,]*,[^0-9,]*([0-9]+)[^0-9,]*,.*\\).*");
Matcher portalIdMatcher = portalIdPattern.matcher(scr);
if (portalIdMatcher.find()) {
portalId = Integer.parseInt(portalIdMatcher.group(1));
}
}
}
try {
postData.put("portalId", portalId).put("mednr", item.getId()).put("tabUrl", opac_url + "/" + data.getJSONObject("urls").getString("simple_search") + NO_MOBILE + "&id=").put("branchFilter", "");
RequestBody entity = RequestBody.create(MEDIA_TYPE_JSON, postData.toString());
String json = httpPost(url, entity, getDefaultEncoding());
JSONObject volumeData = new JSONObject(json);
JSONArray cat = volumeData.getJSONObject("d").getJSONArray("Catalogues");
for (int i = 0; i < cat.length(); i++) {
JSONObject obj = cat.getJSONObject(i);
Map<String, String> params = getQueryParamsFirst(obj.getString("DependantUrl"));
item.addVolume(new Volume(params.get("id"), obj.getString("DependantTitle")));
}
} catch (JSONException | IOException e) {
e.printStackTrace();
}
}
return item;
}
use of de.geeksfactory.opacclient.objects.Detail in project opacclient by opacapp.
the class Primo method parse_detail.
protected DetailedItem parse_detail(String id, Document doc) throws OpacErrorException, IOException {
DetailedItem res = new DetailedItem();
res.setId(id);
res.setTitle(doc.select(".EXLResultTitle").text());
for (Element detrow : doc.select(".EXLDetailsContent li")) {
String title = null;
String value = "";
for (Node node : detrow.childNodes()) {
if (node instanceof Element && (((Element) node).tagName().equals("strong") || ((Element) node).hasClass("bib-EXLDetailsContent-item-title"))) {
title = ((Element) node).text();
} else if (node instanceof Element && title != null) {
value += ((Element) node).text();
} else if (node instanceof TextNode && title != null) {
value += ((TextNode) node).text();
}
}
if (title != null) {
res.addDetail(new Detail(title, value.trim()));
}
}
String html2 = httpGet(opac_url + "/action/display.do?ct=display&fn=search&vid=" + vid + "&doc=" + id + "&tabs=locationsTab", getDefaultEncoding());
Document doc2 = Jsoup.parse(html2);
if (doc2.select(".EXLLocationTitlesRow").size() > 0) {
Map<Integer, String> copymap = new HashMap<>();
int i = 0;
for (Element th : doc2.select(".EXLLocationTitlesRow th")) {
String title = th.text().toLowerCase(Locale.GERMAN).trim();
if (title.contains("library") || title.contains("bibliothek") || title.contains("branch")) {
copymap.put(i, "branch");
} else if (title.contains("location") || title.contains("ort")) {
copymap.put(i, "location");
} else if (title.contains("call number") || title.contains("signatur")) {
copymap.put(i, "signature");
} else if (title.contains("due date") || title.contains("llig am") || title.contains("ausgeliehen bis") || title.contains("lligkeit") || title.contains("ausleihstatus")) {
copymap.put(i, "returndate");
} else if (title.contains("loan to") || title.contains("bezugsmodalit") || title.contains("ausleihm") || title.contains("status")) {
copymap.put(i, "status");
} else if (title.contains("queue") || title.contains("vormerker")) {
copymap.put(i, "reservations");
}
i++;
}
DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
DateTimeFormatter fmt2 = DateTimeFormat.forPattern("dd/MM/yyyy").withLocale(Locale.GERMAN);
for (Element tr : doc2.select(".EXLLocationTable tr:not(.EXLLocationTitlesRow):not(" + ".EXLAdditionalFieldsRow)")) {
int j = 0;
Copy copy = new Copy();
for (Element td : tr.children()) {
String value = td.text().replace("\u00a0", " ").trim();
if (copymap.containsKey(j) && !value.equals("")) {
try {
copy.set(copymap.get(j), value, fmt);
} catch (IllegalArgumentException e) {
try {
copy.set(copymap.get(j), value, fmt2);
} catch (IllegalArgumentException e2) {
e2.printStackTrace();
}
}
}
j++;
}
res.addCopy(copy);
}
} else if (doc2.select(".EXLLocationList").size() > 0) {
// e.g. University of South Wales
for (Element row : doc2.select(".EXLLocationList")) {
Copy copy = new Copy();
copy.setBranch(row.select(".EXLLocationsTitle").text());
copy.setDepartment(row.select(".EXLLocationInfo strong").text());
copy.setShelfmark(row.select(".EXLLocationInfo cite").text());
copy.setStatus(row.select(".EXLLocationInfo em").text());
res.addCopy(copy);
}
}
if (res.getCopies().size() == 0) {
// Online-Medium?
String html3 = httpGet(opac_url + "/action/display.do?ct=display&fn=search&vid=" + vid + "&doc=" + id + "&tabs=viewOnlineTab", getDefaultEncoding());
Document doc3 = Jsoup.parse(html3);
doc3.setBaseUri(opac_url + "/action/display.do");
if (doc3.select(".EXLTabHeaderContent a").size() > 0) {
Element link = doc3.select(".EXLTabHeaderContent a").first();
res.addDetail(new Detail(link.text().trim(), cleanUrl(link.absUrl("href"))));
}
for (Element link : doc3.select(".EXLViewOnlineLinksTitle a")) {
res.addDetail(new Detail(link.text().trim(), cleanUrl(link.absUrl("href"))));
}
}
return res;
}
use of de.geeksfactory.opacclient.objects.Detail in project opacclient by opacapp.
the class Heidi method getResultById.
@Override
public DetailedItem getResultById(String id, final String homebranch) throws IOException {
if (sessid == null) {
start();
}
// Homebranch
if (homebranch != null && !"".equals(homebranch)) {
cookieStore.addCookie(new BasicClientCookie("zweig", homebranch));
}
String html = httpGet(opac_url + "/titel.cgi?katkey=" + id + "&sess=" + sessid, ENCODING, false, cookieStore);
Document doc = Jsoup.parse(html);
DetailedItem item = new DetailedItem();
item.setId(id);
Elements table = doc.select(".titelsatz tr");
for (Element tr : table) {
if (tr.select("th").size() == 0 || tr.select("td").size() == 0) {
continue;
}
String d = tr.select("th").first().text();
String c = tr.select("td").first().text();
if (d.equals("Titel:")) {
item.setTitle(c);
} else if ((d.contains("URL") || d.contains("Link")) && tr.select("td a").size() > 0) {
item.addDetail(new Detail(d, tr.select("td a").first().attr("href")));
} else {
item.addDetail(new Detail(d, c));
}
}
if (doc.select(".ex table tr").size() > 0) {
table = doc.select(".ex table tr");
DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
for (Element tr : table) {
if (tr.hasClass("exueber") || tr.select(".exsig").size() == 0 || tr.select(".exso").size() == 0 || tr.select(".exstatus").size() == 0) {
continue;
}
Copy copy = new Copy();
copy.setShelfmark(tr.select(".exsig").first().text());
copy.setBranch(tr.select(".exso").first().text());
String status = tr.select(".exstatus").first().text();
if (status.contains("entliehen bis")) {
copy.setReturnDate(fmt.parseLocalDate(status.replaceAll("entliehen bis ([0-9.]+) .*", "$1")));
copy.setReservations(status.replaceAll(".*\\(.*Vormerkungen: ([0-9]+)\\)", "$1"));
copy.setStatus("entliehen");
} else {
copy.setStatus(status);
}
item.addCopy(copy);
}
}
for (Element a : doc.select(".status1 a")) {
if (a.attr("href").contains("bestellung.cgi")) {
item.setReservable(true);
item.setReservation_info(id);
break;
}
}
for (Element a : doc.select(".titelsatz a")) {
if (a.text().trim().matches("B.+nde")) {
Map<String, String> volumesearch = new HashMap<>();
volumesearch.put("query", getQueryParamsFirst(a.attr("href")).get("query"));
item.setVolumesearch(volumesearch);
}
}
return item;
}
use of de.geeksfactory.opacclient.objects.Detail in project opacclient by opacapp.
the class BiBer1992 method parse_result.
/*
* Two-column table inside of a form 1st column is category, e.g.
* "Verfasser" 2nd column is content, e.g. "Bach, Johann Sebastian" In some
* rows, the 1st column is empty, then 2nd column is continued text from row
* above.
*
* Some libraries have a second section for the copies in stock (Exemplare).
* This 2nd section has reverse layout.
*
* |-------------------| | Subject | Content | |-------------------| |
* Subject | Content | |-------------------| | | Content |
* |-------------------| | Subject | Content |
* |-------------------------------------------------| | | Site | Signatur|
* ID | State | |-------------------------------------------------| | |
* Content | Content | Content | Content |
* |-------------------------------------------------|
*/
private DetailedItem parse_result(String html) {
DetailedItem item = new DetailedItem();
Document document = Jsoup.parse(html);
Elements rows = document.select("html body form table tr");
// Elements rows = document.select("html body div form table tr");
// Element rowReverseSubject = null;
Detail detail = null;
// prepare copiestable
Copy copy_last_content = null;
int copy_row = 0;
String[] copy_keys = new String[] { "barcode", "branch", "department", "location", "status", "returndate", "reservations" };
int[] copy_map = new int[] { 3, 1, -1, 1, 4, -1, -1 };
try {
JSONObject map = data.getJSONObject("copiestable");
for (int i = 0; i < copy_keys.length; i++) {
if (map.has(copy_keys[i])) {
copy_map[i] = map.getInt(copy_keys[i]);
}
}
} catch (Exception e) {
// "copiestable" is optional
}
DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
// go through all rows
for (Element row : rows) {
Elements columns = row.children();
if (columns.size() == 2) {
// HTML tag " " is encoded as 0xA0
String firstColumn = columns.get(0).text().replace("\u00a0", " ").trim();
String secondColumn = columns.get(1).text().replace("\u00a0", " ").trim();
if (firstColumn.length() > 0) {
// 1st column is category
if (firstColumn.equalsIgnoreCase("titel")) {
detail = null;
item.setTitle(secondColumn);
} else {
if (secondColumn.contains("hier klicken") && columns.get(1).select("a").size() > 0) {
secondColumn += " " + columns.get(1).select("a").first().attr("href");
}
detail = new Detail(firstColumn, secondColumn);
item.getDetails().add(detail);
}
} else {
// category
if (detail != null) {
String content = detail.getContent() + "\n" + secondColumn;
detail.setContent(content);
} else {
// check if there is an amazon image
if (columns.get(0).select("a img[src]").size() > 0) {
item.setCover(columns.get(0).select("a img").first().attr("src"));
}
}
}
} else if (columns.size() > 3) {
// (copy_row > 0)
if (copy_row > 0) {
Copy copy = new Copy();
for (int j = 0; j < copy_keys.length; j++) {
int col = copy_map[j];
if (col > -1) {
String text = "";
if (copy_keys[j].equals("branch")) {
// for "Standort" only use ownText() to suppress
// Link "Wegweiser"
text = columns.get(col).ownText().replace("\u00a0", " ").trim();
}
if (text.length() == 0) {
// text of children
text = columns.get(col).text().replace("\u00a0", " ").trim();
}
if (text.length() == 0) {
// this is sometimes the case for "Standort"
if (copy_keys[j].equals("status")) {
// but do it not for Status
text = " ";
} else {
if (copy_last_content != null) {
text = copy_last_content.get(copy_keys[j]);
} else {
text = "";
}
}
}
if (copy_keys[j].equals("reservations")) {
text = text.replace("Vorgemerkt: ", "").replace("Vorbestellt: ", "");
}
try {
copy.set(copy_keys[j], text, fmt);
} catch (IllegalArgumentException e) {
e.printStackTrace();
}
}
}
if (copy.getBranch() != null && copy.getLocation() != null && copy.getLocation().equals(copy.getBranch())) {
copy.setLocation(null);
}
item.addCopy(copy);
copy_last_content = copy;
}
// ignore 1st row
copy_row++;
}
// if columns.size
}
// for rows
// We cannot check if media is reservable
item.setReservable(true);
if (opacDir.contains("opax")) {
if (document.select("input[type=checkbox]").size() > 0) {
item.setReservation_info(document.select("input[type=checkbox]").first().attr("name"));
} else if (document.select("a[href^=reserv" + opacSuffix + "]").size() > 0) {
String href = document.select("a[href^=reserv" + opacSuffix + "]").first().attr("href");
item.setReservation_info(href.substring(href.indexOf("resF_")));
} else {
item.setReservable(false);
}
} else {
item.setReservation_info(document.select("input[name=ID]").attr("value"));
}
return item;
}
Aggregations