use of de.geeksfactory.opacclient.objects.SearchRequestResult in project opacclient by opacapp.
the class Zones method parse_search.
private SearchRequestResult parse_search(String html, int page) throws OpacErrorException {
Document doc = Jsoup.parse(html);
doc.setBaseUri(opac_url + "/APS_PRESENT_BIB");
if (doc.select("#ErrorAdviceRow").size() > 0) {
throw new OpacErrorException(doc.select("#ErrorAdviceRow").text().trim());
}
int results_total = -1;
String searchHitsQuery = version18 ? "td:containsOwn(Total)" : ".searchHits";
if (doc.select(searchHitsQuery).size() > 0) {
results_total = Integer.parseInt(doc.select(searchHitsQuery).first().text().trim().replaceAll(".*\\(([0-9]+)\\).*", "$1"));
} else if (doc.select("span:matches(\\[\\d+/\\d+\\])").size() > 0) {
// Zones 1.8 - searchGetPage
String text = doc.select("span:matches(\\[\\d+/\\d+\\])").text();
Pattern pattern = Pattern.compile("\\[\\d+/(\\d+)\\]");
Matcher matcher = pattern.matcher(text);
if (matcher.find()) {
results_total = Integer.parseInt(matcher.group(1));
}
}
if (doc.select(".pageNavLink").size() > 0) {
// Zones 2.2
searchobj = doc.select(".pageNavLink").first().attr("href").split("\\?")[0];
} else if (doc.select("div[targetObject]").size() > 0) {
// Zones 1.8 - search
searchobj = doc.select("div[targetObject]").attr("targetObject").split("\\?")[0];
} else {
// Zones 1.8 - searchGetPage
// The page contains a data structure that at first glance seems to be JSON, but uses
// "=" instead of ":". So we parse it using regex...
Pattern pattern = Pattern.compile("targetObject = \"([^\\?]+)[^\"]+\"");
Matcher matcher = pattern.matcher(doc.html());
if (matcher.find()) {
searchobj = matcher.group(1);
}
}
Elements table = doc.select(// Zones 2.2
"#BrowseList > tbody > tr," + // Zones 1.8
" .inRoundBox1");
List<SearchResult> results = new ArrayList<>();
for (int i = 0; i < table.size(); i++) {
Element tr = table.get(i);
SearchResult sr = new SearchResult();
String typetext;
if (version18) {
String[] parts = tr.select("img[src^=IMG/MAT]").attr("src").split("/");
typetext = parts[parts.length - 1].replace(".gif", "");
} else {
typetext = tr.select(".SummaryMaterialTypeField").text().replace("\n", " ").trim();
}
if (data.has("mediatypes")) {
try {
sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(typetext)));
} catch (JSONException | IllegalArgumentException e) {
sr.setType(defaulttypes.get(typetext));
}
} else {
sr.setType(defaulttypes.get(typetext));
}
String imgUrl = null;
if (version18) {
if (tr.select("a[title=Titelbild]").size() > 0) {
imgUrl = tr.select("a[title=Titelbild]").attr("href");
} else if (tr.select("img[width=50]").size() > 0) {
// TODO: better way to select these cover images? (found in Hannover)
imgUrl = tr.select("img[width=50]").attr("src");
}
} else {
if (tr.select(".SummaryImageCell img[id^=Bookcover]").size() > 0) {
imgUrl = tr.select(".SummaryImageCell img[id^=Bookcover]").first().attr("src");
}
}
sr.setCover(imgUrl);
if (version18) {
if (tr.select("img[src$=oci_1.gif]").size() > 0) {
// probably can only appear when searching the catalog on a terminal in
// the library.
sr.setStatus(SearchResult.Status.GREEN);
} else if (tr.select("img[src$=blob_amber.gif]").size() > 0) {
sr.setStatus(SearchResult.Status.YELLOW);
}
}
String desc = "";
String childrenQuery = version18 ? "table[cellpadding=1] tr" : ".SummaryDataCell tr, .SummaryDataCellStripe tr";
Elements children = tr.select(childrenQuery);
int childrennum = children.size();
boolean haslink = false;
for (int ch = 0; ch < childrennum; ch++) {
Element node = children.get(ch);
if (getName(node).equals("Titel")) {
desc += "<b>" + getValue(node).trim() + "</b><br />";
} else if (getName(node).equals("Verfasser") || getName(node).equals("Jahr")) {
desc += getValue(node).trim() + "<br />";
}
String linkSelector = version18 ? "a[href*=ShowStock], a[href*=APS_CAT_IDENTIFY]" : ".SummaryFieldData a.SummaryFieldLink";
if (node.select(linkSelector).size() > 0 && !haslink) {
String href = node.select(linkSelector).attr("abs:href");
Map<String, String> hrefq = getQueryParamsFirst(href);
if (hrefq.containsKey("no")) {
sr.setId(hrefq.get("no"));
} else if (hrefq.containsKey("Key")) {
sr.setId(hrefq.get("Key"));
}
haslink = true;
}
}
if (desc.endsWith("<br />")) {
desc = desc.substring(0, desc.length() - 6);
}
sr.setInnerhtml(desc);
sr.setNr(i);
results.add(sr);
}
return new SearchRequestResult(results, results_total, page);
}
use of de.geeksfactory.opacclient.objects.SearchRequestResult in project opacclient by opacapp.
the class Littera method executeSearch.
protected SearchRequestResult executeSearch(List<SearchQuery> query, int pageIndex) throws IOException, OpacErrorException, JSONException {
final String searchUrl;
if (!initialised) {
start();
}
try {
searchUrl = buildSearchUrl(query, pageIndex);
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
final String html = httpGet(searchUrl, getDefaultEncoding());
final Document doc = Jsoup.parse(html);
final Element navigation = doc.select(".result_view .navigation").first();
final int totalResults = navigation != null ? parseTotalResults(navigation.text()) : 0;
final Element ul = doc.select(".result_view ul.list").first();
final List<SearchResult> results = new ArrayList<>();
for (final Element li : ul.children()) {
if (li.hasClass("zugangsmonat")) {
continue;
}
final SearchResult result = new SearchResult();
final Element title = li.select(".titelinfo a").first();
result.setId(getQueryParamsFirst(title.attr("href")).get("id"));
result.setInnerhtml(title.text() + "<br>" + title.parent().nextElementSibling().text());
result.setNr(results.size());
result.setPage(pageIndex);
result.setType(MEDIA_TYPES.get(li.select(".statusinfo .ma").text()));
result.setCover(getCover(li));
final String statusImg = li.select(".status img").attr("src");
result.setStatus(statusImg.contains("-yes") ? SearchResult.Status.GREEN : statusImg.contains("-no") ? SearchResult.Status.RED : null);
results.add(result);
}
return new SearchRequestResult(results, totalResults, pageIndex);
}
use of de.geeksfactory.opacclient.objects.SearchRequestResult in project opacclient by opacapp.
the class SRU method parse_result.
private SearchRequestResult parse_result(String xml) throws OpacErrorException {
searchDoc = Jsoup.parse(xml, "", Parser.xmlParser());
if (searchDoc.select("diag|diagnostic").size() > 0) {
throw new OpacErrorException(searchDoc.select("diag|message").text());
}
int resultcount;
List<SearchResult> results = new ArrayList<>();
resultcount = Integer.valueOf(searchDoc.select("zs|numberOfRecords").text());
Elements records = searchDoc.select("zs|records > zs|record");
int i = 0;
for (Element record : records) {
SearchResult sr = new SearchResult();
String title = getDetail(record, "titleInfo title");
String firstName = getDetail(record, "name > namePart[type=given]");
String lastName = getDetail(record, "name > namePart[type=family]");
String year = getDetail(record, "dateIssued");
String mType = getDetail(record, "physicalDescription > form");
String isbn = getDetail(record, "identifier[type=isbn]");
String coverUrl = getDetail(record, "url[displayLabel=C Cover]");
String additionalInfo = firstName + " " + lastName + ", " + year;
sr.setInnerhtml("<b>" + title + "</b><br>" + additionalInfo);
sr.setType(defaulttypes.get(mType));
sr.setNr(i);
sr.setId(getDetail(record, "recordIdentifier"));
if (coverUrl.equals("")) {
sr.setCover(ISBNTools.getAmazonCoverURL(isbn, false));
} else {
sr.setCover(coverUrl);
}
results.add(sr);
i++;
}
return new SearchRequestResult(results, resultcount, 1);
}
use of de.geeksfactory.opacclient.objects.SearchRequestResult in project opacclient by opacapp.
the class BiBer1992 method parse_search.
/*
* result table format: JSON "rows_per_hit" = 1: One <tr> per hit JSON
* "rows_per_hit" = 2: Two <tr> per hit (default) <form> <table> <tr
* valign="top"> <td class="td3" ...><a href=...><img ...></a></td> (row is
* optional, only in some bibs) <td class="td2" ...><input ...></td> <td
* width="34%">TITEL</td> <td width="34%"> </td> <td width="6%"
* align="center">2009</td> <td width="*" align="left">DVD0 Seew</td> </tr>
* <tr valign="top"> <td class="td3" ...> ...</td> <td class="td2"
* ...> ...</td> <td colspan="4" ...><font size="-1"><font
* class="p1">Erwachsenenbibliothek</font></font><div
* class="hr4"></div></td> </tr>
*/
private SearchRequestResult parse_search(String html, int page) {
List<SearchResult> results = new ArrayList<>();
Document doc = Jsoup.parse(html);
if (doc.select("h3").text().contains("Es wurde nichts gefunden")) {
return new SearchRequestResult(results, 0, page);
}
// <tr
Elements trList = doc.select("form table tr[valign]");
// valign="top">
if (trList.size() == 0) {
// Schwieberdingen
trList = doc.select("table:has(input[type=checkbox]) tr");
}
Elements elem;
int rows_per_hit = 2;
if (trList.size() == 1 || (trList.size() > 1 && trList.get(0).select("input[type=checkbox]").size() > 0 && trList.get(1).select("input[type=checkbox]").size() > 0)) {
rows_per_hit = 1;
}
try {
rows_per_hit = data.getInt("rows_per_hit");
} catch (JSONException e) {
}
// Overall search results
// are very differently layouted, but have always the text:
// "....Treffer Gesamt (nnn)"
int results_total;
Pattern pattern = Pattern.compile("Treffer Gesamt \\(([0-9]+)\\)");
Matcher matcher = pattern.matcher(html);
if (matcher.find()) {
results_total = Integer.parseInt(matcher.group(1));
} else {
results_total = -1;
}
// limit to 20 entries
// two rows per entry
int numOfEntries = trList.size() / rows_per_hit;
if (numOfEntries > numOfResultsPerPage) {
numOfEntries = numOfResultsPerPage;
}
for (int i = 0; i < numOfEntries; i++) {
Element tr = trList.get(i * rows_per_hit);
SearchResult sr = new SearchResult();
// ID as href tag
elem = tr.select("td a");
if (elem.size() > 0 && !elem.get(0).attr("href").contains("ISBN")) {
// Exclude the cover links in Ludwigsburg as they lead to a page that misses the
// reservation button
String hrefID = elem.get(0).attr("href");
sr.setId(hrefID);
} else {
// no ID as href found, look for the ID in the input form
elem = tr.select("td input");
if (elem.size() > 0) {
String nameID = elem.get(0).attr("name").trim();
String hrefID = "/" + opacDir + "/ftitle" + opacSuffix + "?LANG=de&FUNC=full&" + nameID + "=YES";
sr.setId(hrefID);
}
}
// media type
elem = tr.select("td img");
if (elem.size() > 0) {
sr.setType(getMediaTypeFromImageFilename(sr, elem.get(0).attr("src"), data));
}
// description
String desc = "";
try {
// array "searchtable" list the column numbers of the
// description
JSONArray searchtable = data.getJSONArray("searchtable");
for (int j = 0; j < searchtable.length(); j++) {
int colNum = searchtable.getInt(j);
if (j > 0) {
desc = desc + "<br />";
}
String c = tr.child(colNum).html();
if (tr.child(colNum).childNodes().size() == 1 && tr.child(colNum).select("a[href*=ftitle.]").size() > 0) {
c = tr.select("a[href*=ftitle.]").text();
}
desc = desc + c;
}
} catch (Exception e) {
e.printStackTrace();
}
// remove links "<a ...>...</a>
// needed for Friedrichshafen: "Warenkorb", "Vormerkung"
// Herford: "Medienkorb"
desc = desc.replaceAll("<a .*?</a>", "");
// remove newlines (useless in HTML)
desc = desc.replaceAll("\\n", "");
// remove hidden divs ("Titel übernommen!" in Wuerzburg)
desc = desc.replaceAll("<div[^>]*style=\"display:none\">.*</div>", "");
// remove all invalid HTML tags
desc = desc.replaceAll("</?(tr|td|font|table|tbody|div)[^>]*>", "");
// replace multiple line breaks by one
desc = desc.replaceAll("(<br( /)?>\\s*)+", "<br>");
sr.setInnerhtml(desc);
if (tr.select("font.p04x09b").size() > 0 && tr.select("font.p02x09b").size() == 0) {
sr.setStatus(Status.GREEN);
} else if (tr.select("font.p04x09b").size() == 0 && tr.select("font.p02x09b").size() > 0) {
sr.setStatus(Status.RED);
} else if (tr.select("font.p04x09b").size() > 0 && tr.select("font.p02x09b").size() > 0) {
sr.setStatus(Status.YELLOW);
}
// number
sr.setNr(i / rows_per_hit);
results.add(sr);
}
// m_resultcount = results.size();
return new SearchRequestResult(results, results_total, page);
}
use of de.geeksfactory.opacclient.objects.SearchRequestResult in project opacclient by opacapp.
the class IOpac method parse_search.
protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException, NotReachableException {
Document doc = Jsoup.parse(html);
if (doc.select("h4").size() > 0) {
if (doc.select("h4").text().trim().startsWith("0 gefundene Medien")) {
// nothing found
return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
} else if (!doc.select("h4").text().trim().contains("gefundene Medien") && !doc.select("h4").text().trim().contains("Es wurden mehr als")) {
// error
throw new OpacErrorException(doc.select("h4").text().trim());
}
} else if (doc.select("h1").size() > 0) {
if (doc.select("h1").text().trim().contains("RUNTIME ERROR")) {
// Server Error
throw new NotReachableException("IOPAC RUNTIME ERROR");
} else {
throw new OpacErrorException(stringProvider.getFormattedString(StringProvider.UNKNOWN_ERROR_WITH_DESCRIPTION, doc.select("h1").text().trim()));
}
} else {
return null;
}
updateRechnr(doc);
reusehtml = html;
results_total = -1;
if (doc.select("h4").text().trim().contains("Es wurden mehr als")) {
results_total = 200;
} else {
String resultnumstr = doc.select("h4").first().text();
resultnumstr = resultnumstr.substring(0, resultnumstr.indexOf(" ")).trim();
results_total = Integer.parseInt(resultnumstr);
}
List<SearchResult> results = new ArrayList<>();
Elements tables = doc.select("table").first().select("tr:has(td)");
Map<String, Integer> colmap = new HashMap<>();
Element thead = doc.select("table").first().select("tr:has(th)").first();
int j = 0;
for (Element th : thead.select("th")) {
String text = th.text().trim().toLowerCase(Locale.GERMAN);
if (text.contains("cover")) {
colmap.put("cover", j);
} else if (text.contains("titel")) {
colmap.put("title", j);
} else if (text.contains("verfasser")) {
colmap.put("author", j);
} else if (text.contains("mtyp")) {
colmap.put("category", j);
} else if (text.contains("jahr")) {
colmap.put("year", j);
} else if (text.contains("signatur")) {
colmap.put("shelfmark", j);
} else if (text.contains("info")) {
colmap.put("info", j);
} else if (text.contains("abteilung")) {
colmap.put("department", j);
} else if (text.contains("verliehen") || text.contains("verl.")) {
colmap.put("returndate", j);
} else if (text.contains("anz.res")) {
colmap.put("reservations", j);
}
j++;
}
if (colmap.size() == 0) {
colmap.put("cover", 0);
colmap.put("title", 1);
colmap.put("author", 2);
colmap.put("publisher", 3);
colmap.put("year", 4);
colmap.put("department", 5);
colmap.put("shelfmark", 6);
colmap.put("returndate", 7);
colmap.put("category", 8);
}
for (int i = 0; i < tables.size(); i++) {
Element tr = tables.get(i);
SearchResult sr = new SearchResult();
if (tr.select("td").get(colmap.get("cover")).select("img").size() > 0) {
String imgUrl = tr.select("td").get(colmap.get("cover")).select("img").first().attr("src");
sr.setCover(imgUrl);
}
// Media Type
if (colmap.get("category") != null) {
String mType = tr.select("td").get(colmap.get("category")).text().trim().replace("\u00a0", "");
if (data.has("mediatypes")) {
try {
sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(mType.toLowerCase(Locale.GERMAN))));
} catch (JSONException | IllegalArgumentException e) {
sr.setType(defaulttypes.get(mType.toLowerCase(Locale.GERMAN)));
}
} else {
sr.setType(defaulttypes.get(mType.toLowerCase(Locale.GERMAN)));
}
}
// Title and additional info
String title;
String additionalInfo = "";
if (colmap.get("info") != null) {
Element info = tr.select("td").get(colmap.get("info"));
title = info.select("a[title=Details-Info], a[title=Details-Info1]").text().trim();
String authorIn = info.text().substring(0, info.text().indexOf(title));
if (authorIn.contains(":")) {
authorIn = authorIn.replaceFirst("^([^:]*):(.*)$", "$1");
additionalInfo += " - " + authorIn;
}
} else {
title = tr.select("td").get(colmap.get("title")).text().trim().replace("\u00a0", "");
if (title.contains("(") && title.indexOf("(") > 0) {
additionalInfo += title.substring(title.indexOf("("));
title = title.substring(0, title.indexOf("(") - 1).trim();
}
// Author
if (colmap.containsKey("author")) {
String author = tr.select("td").get(colmap.get("author")).text().trim().replace("\u00a0", "");
additionalInfo += " - " + author;
}
}
// Publisher
if (colmap.containsKey("publisher")) {
String publisher = tr.select("td").get(colmap.get("publisher")).text().trim().replace("\u00a0", "");
additionalInfo += " (" + publisher;
}
// Year
if (colmap.containsKey("year")) {
String year = tr.select("td").get(colmap.get("year")).text().trim().replace("\u00a0", "");
additionalInfo += ", " + year + ")";
}
sr.setInnerhtml("<b>" + title + "</b><br>" + additionalInfo);
// Status
String status = tr.select("td").get(colmap.get("returndate")).text().trim().replace("\u00a0", "");
SimpleDateFormat df = new SimpleDateFormat("dd.MM.yyyy", Locale.GERMAN);
try {
df.parse(status);
// this is a return date
sr.setStatus(Status.RED);
sr.setInnerhtml(sr.getInnerhtml() + "<br><i>" + stringProvider.getString(StringProvider.LENT_UNTIL) + " " + status + "</i>");
} catch (ParseException e) {
// this is a different status text
String lc = status.toLowerCase(Locale.GERMAN);
if ((lc.equals("") || lc.toLowerCase(Locale.GERMAN).contains("onleihe") || lc.contains("verleihbar") || lc.contains("entleihbar") || lc.contains("ausleihbar")) && !lc.contains("nicht")) {
sr.setStatus(Status.GREEN);
} else {
sr.setStatus(Status.YELLOW);
sr.setInnerhtml(sr.getInnerhtml() + "<br><i>" + status + "</i>");
}
}
// In some libraries (for example search for "atelier" in Preetz)
// the results are sorted differently than their numbers suggest, so
// we need to detect the number ("recno") from the link
String link = tr.select("a[href^=/cgi-bin/di.exe?page=]").attr("href");
Map<String, String> params = getQueryParamsFirst(link);
if (params.containsKey("recno")) {
int recno = Integer.valueOf(params.get("recno"));
sr.setNr(recno - 1);
} else {
// the above should work, but fall back to this if it doesn't
sr.setNr(10 * (page - 1) + i);
}
// In some libraries (for example Preetz) we can detect the media ID
// here using another link present in the search results
Elements idLinks = tr.select("a[href^=/cgi-bin/di.exe?cMedNr]");
if (idLinks.size() > 0) {
Map<String, String> idParams = getQueryParamsFirst(idLinks.first().attr("href"));
String id = idParams.get("cMedNr");
sr.setId(id);
} else {
sr.setId(null);
}
results.add(sr);
}
return new SearchRequestResult(results, results_total, page);
}
Aggregations