use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.
the class Open method searchGetPage.
@Override
public SearchRequestResult searchGetPage(int page) throws IOException, OpacErrorException, JSONException {
if (searchResultDoc == null)
throw new NotReachableException();
Document doc = searchResultDoc;
if (doc.select("span[id$=DataPager1]").size() == 0) {
/*
New style: Page buttons using normal links
We can go directly to the correct page
*/
if (doc.select("a[id*=LinkButtonPageN]").size() > 0) {
String href = doc.select("a[id*=LinkButtonPageN][href*=page]").first().attr("href");
String url = href.replaceFirst("page=\\d+", "page=" + page);
Document doc2 = Jsoup.parse(httpGet(url, getDefaultEncoding()));
doc2.setBaseUri(url);
return parse_search(doc2, page);
} else {
int totalCount;
try {
totalCount = Integer.parseInt(doc.select("span[id$=TotalItemsLabel]").first().text());
} catch (Exception e) {
totalCount = 0;
}
// Next page does not exist
return new SearchRequestResult(new ArrayList<SearchResult>(), 0, totalCount);
}
} else {
/*
Old style: Page buttons using Javascript
When there are many pages of results, there will only be links to the next 4 and
previous 4 pages, so we will click links until it gets to the correct page.
*/
Elements pageLinks = doc.select("span[id$=DataPager1]").first().select("a[id*=LinkButtonPageN], span[id*=LabelPageN]");
int from = Integer.valueOf(pageLinks.first().text());
int to = Integer.valueOf(pageLinks.last().text());
Element linkToClick;
boolean willBeCorrectPage;
if (page < from) {
linkToClick = pageLinks.first();
willBeCorrectPage = false;
} else if (page > to) {
linkToClick = pageLinks.last();
willBeCorrectPage = false;
} else {
linkToClick = pageLinks.get(page - from);
willBeCorrectPage = true;
}
if (linkToClick.tagName().equals("span")) {
// we are trying to get the page we are already on
return parse_search(searchResultDoc, page);
}
Pattern pattern = Pattern.compile("javascript:__doPostBack\\('([^,]*)','([^\\)]*)'\\)");
Matcher matcher = pattern.matcher(linkToClick.attr("href"));
if (!matcher.find())
throw new OpacErrorException(StringProvider.INTERNAL_ERROR);
FormElement form = (FormElement) doc.select("form").first();
MultipartBody data = formData(form, null).addFormDataPart("__EVENTTARGET", matcher.group(1)).addFormDataPart("__EVENTARGUMENT", matcher.group(2)).build();
String postUrl = form.attr("abs:action");
String html = httpPost(postUrl, data, "UTF-8");
if (willBeCorrectPage) {
// We clicked on the correct link
Document doc2 = Jsoup.parse(html);
doc2.setBaseUri(postUrl);
return parse_search(doc2, page);
} else {
// There was no correct link, so try to find one again
searchResultDoc = Jsoup.parse(html);
searchResultDoc.setBaseUri(postUrl);
return searchGetPage(page);
}
}
}
use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.
the class Heidi method parse_search.
private SearchRequestResult parse_search(String html, int page) {
Document doc = Jsoup.parse(html);
doc.setBaseUri(opac_url);
int results_total = 0;
if (doc.select("#heiditreffer").size() > 0) {
String resstr = doc.select("#heiditreffer").text();
String resnum = resstr.replaceAll("\\(([0-9.]+)([^0-9]*)\\)", "$1").replace(".", "");
results_total = Integer.parseInt(resnum);
}
Elements table = doc.select("table.treffer tr");
List<SearchResult> results = new ArrayList<>();
for (int i = 0; i < table.size(); i++) {
Element tr = table.get(i);
SearchResult sr = new SearchResult();
StringBuilder description = null;
String author = "";
for (Element link : tr.select("a")) {
String kk = getQueryParamsFirst(link.absUrl("href")).get("katkey");
if (kk != null) {
sr.setId(kk);
break;
}
}
if (tr.select("span.Z3988").size() == 1) {
// Luckily there is a <span class="Z3988"> item which provides
// data in a standardized format.
String zdata = tr.select("span.Z3988").attr("title").replace(";", "%3B").replace(":", "%3A").replace("/", "%2F");
boolean hastitle = false;
description = new StringBuilder();
List<NameValuePair> z3988data = parse_z3988data(zdata);
for (NameValuePair nv : z3988data) {
if (nv.getValue() != null) {
if (!nv.getValue().trim().equals("")) {
if (nv.getName().equals("rft.btitle") && !hastitle) {
description.append("<b>").append(nv.getValue()).append("</b>");
hastitle = true;
} else if (nv.getName().equals("rft.atitle") && !hastitle) {
description.append("<b>").append(nv.getValue()).append("</b>");
hastitle = true;
} else if (nv.getName().equals("rft.au")) {
author = nv.getValue();
} else if (nv.getName().equals("rft.aufirst")) {
author = author + ", " + nv.getValue();
} else if (nv.getName().equals("rft.aulast")) {
author = nv.getValue();
} else if (nv.getName().equals("rft.date")) {
description.append("<br />").append(nv.getValue());
}
}
}
}
}
if (!"".equals(author)) {
author = author + "<br />";
}
sr.setInnerhtml(author + description.toString());
if (tr.select(".kurzstat").size() > 0) {
String stattext = tr.select(".kurzstat").first().text();
if (stattext.contains("ausleihbar")) {
sr.setStatus(Status.GREEN);
} else if (stattext.contains("online")) {
sr.setStatus(Status.GREEN);
} else if (stattext.contains("entliehen")) {
sr.setStatus(Status.RED);
} else if (stattext.contains("Präsenznutzung")) {
sr.setStatus(Status.YELLOW);
} else if (stattext.contains("bestellen")) {
sr.setStatus(Status.YELLOW);
}
}
if (tr.select(".typbild").size() > 0) {
String typtext = tr.select(".typbild").first().text();
if (typtext.contains("Buch")) {
sr.setType(MediaType.BOOK);
} else if (typtext.contains("DVD-ROM")) {
sr.setType(MediaType.CD_SOFTWARE);
} else if (typtext.contains("Online-Ressource")) {
sr.setType(MediaType.EDOC);
} else if (typtext.contains("DVD")) {
sr.setType(MediaType.DVD);
} else if (typtext.contains("Film")) {
sr.setType(MediaType.MOVIE);
} else if (typtext.contains("Zeitschrift")) {
sr.setType(MediaType.MAGAZINE);
} else if (typtext.contains("Musiknoten")) {
sr.setType(MediaType.SCORE_MUSIC);
} else if (typtext.contains("Bildliche Darstellung")) {
sr.setType(MediaType.ART);
} else if (typtext.contains("Zeitung")) {
sr.setType(MediaType.NEWSPAPER);
} else if (typtext.contains("Karte")) {
sr.setType(MediaType.MAP);
} else if (typtext.contains("Mehrteilig")) {
sr.setType(MediaType.PACKAGE_BOOKS);
}
}
results.add(sr);
}
// TODO
return new SearchRequestResult(results, results_total, page);
}
use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.
the class Adis method parse_search.
private SearchRequestResult parse_search(Document doc, int page) throws OpacErrorException, SingleResultFound {
if (doc.select(".message h1").size() > 0 && doc.select("#right #R06").size() == 0) {
throw new OpacErrorException(doc.select(".message h1").text());
}
if (doc.select("#OPACLI").text().contains("nicht gefunden")) {
throw new OpacErrorException(stringProvider.getString(StringProvider.NO_RESULTS));
}
int total_result_count = -1;
List<SearchResult> results = new ArrayList<>();
if (doc.select("#R06").size() > 0) {
Pattern patNum = Pattern.compile(".*Treffer: .* von ([0-9]+)[^0-9]*");
Matcher matcher = patNum.matcher(doc.select("#R06").text().trim());
if (matcher.matches()) {
total_result_count = Integer.parseInt(matcher.group(1));
} else if (doc.select("#R06").text().trim().endsWith("Treffer: 1")) {
total_result_count = 1;
}
}
if (doc.select("#R03").size() == 1 && doc.select("#R03").text().trim().endsWith("Treffer: 1")) {
throw new SingleResultFound();
}
Pattern patId = Pattern.compile("javascript:.*htmlOnLink\\('([0-9A-Za-z]+)'\\)");
int nr = 1;
String selector_row, selector_link, selector_img, selector_num, selector_text;
if (doc.select("table.rTable_table tbody").size() > 0) {
selector_row = "table.rTable_table tbody tr";
selector_link = ".rTable_td_text a";
selector_text = ".rList_name";
selector_img = ".rTable_td_img img, .rTable_td_text img";
selector_num = "tr td:first-child";
} else {
// New version, e.g. Berlin
selector_row = ".rList li.rList_li_even, .rList li.rList_li_odd";
selector_link = ".rList_titel a";
selector_text = ".rList_name";
selector_img = ".rlist_icon img, .rList_titel img, .rList_medium .icon, .rList_availability .icon, .rList_img img";
selector_num = ".rList_num";
}
for (Element tr : doc.select(selector_row)) {
SearchResult res = new SearchResult();
Element innerele = tr.select(selector_link).first();
innerele.select("img").remove();
String descr = innerele.html();
for (Element n : tr.select(selector_text)) {
String t = n.text().replace("\u00a0", " ").trim();
if (t.length() > 0) {
descr += "<br />" + t.trim();
}
}
res.setInnerhtml(descr);
try {
res.setNr(Integer.parseInt(tr.select(selector_num).text().trim()));
} catch (NumberFormatException e) {
res.setNr(nr);
}
Matcher matcher = patId.matcher(tr.select(selector_link).first().attr("href"));
if (matcher.matches()) {
res.setId(matcher.group(1));
}
for (Element img : tr.select(selector_img)) {
String ttext = img.attr("title");
String src = img.attr("abs:src");
if (types.containsKey(ttext)) {
res.setType(types.get(ttext));
} else if (ttext.contains("+") && types.containsKey(ttext.split("\\+")[0].trim())) {
res.setType(types.get(ttext.split("\\+")[0].trim()));
} else if (ttext.matches(".*ist verf.+gbar") || ttext.contains("is available") || img.attr("href").contains("verfu_ja")) {
res.setStatus(SearchResult.Status.GREEN);
} else if (ttext.matches(".*nicht verf.+gbar") || ttext.contains("not available") || img.attr("href").contains("verfu_nein")) {
res.setStatus(SearchResult.Status.RED);
}
}
results.add(res);
nr++;
}
updatePageform(doc);
s_lastpage = page;
String nextButton = doc.select("input[title=nächster], input[title=Vorwärts blättern]").attr("name");
String previousButton = doc.select("input[title=nächster], input[title=Rückwärts blättern]").attr("name");
if (!nextButton.equals(""))
s_nextbutton = nextButton;
if (!previousButton.equals(""))
s_previousbutton = previousButton;
return new SearchRequestResult(results, total_result_count, page);
}
Aggregations