use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.
the class BibliothecaSearchTest method testParseSearch.
@Test
public void testParseSearch() throws OpacApi.OpacErrorException, JSONException, NotReachableException {
String html = readResource("/bibliotheca/resultlist/" + file);
// we may not have all files for all libraries
if (html == null)
return;
int page = 1;
SearchRequestResult result = Bibliotheca.parseSearch(html, page, getData(file));
assertTrue(result.getPage_count() > 0 || result.getTotal_result_count() > 0);
assertTrue(result.getPage_index() == page);
for (SearchResult item : result.getResults()) {
assertNotNull(item.getId());
assertNotNull(item.getType());
}
SearchResult firstItem = result.getResults().get(0);
assertEquals(firstItem.getInnerhtml(), getFirstResultHtml(file));
}
use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.
the class Open method parse_search.
protected SearchRequestResult parse_search(Document doc, int page) throws OpacErrorException {
searchResultDoc = doc;
if (doc.select("#Label1, span[id$=LblInfoMessage]").size() > 0) {
String message = doc.select("#Label1, span[id$=LblInfoMessage]").text();
if (message.contains("keine Treffer")) {
return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, page);
} else {
throw new OpacErrorException(message);
}
}
int totalCount;
if (doc.select("span[id$=TotalItemsLabel]").size() > 0) {
totalCount = Integer.parseInt(doc.select("span[id$=TotalItemsLabel]").first().text().split("[ \\t\\xA0\\u1680\\u180e\\u2000-\\u200a\\u202f\\u205f\\u3000]")[0]);
} else {
throw new OpacErrorException(stringProvider.getString(StringProvider.UNKNOWN_ERROR));
}
Pattern idPattern = Pattern.compile("\\$(mdv|civ|dcv)(\\d+)\\$");
Pattern weakIdPattern = Pattern.compile("(mdv|civ|dcv)(\\d+)[^\\d]");
Elements elements = doc.select("div[id$=divMedium], div[id$=divComprehensiveItem], div[id$=divDependentCatalogue]");
List<SearchResult> results = new ArrayList<>();
int i = 0;
List<CompletableFuture<Void>> futures = new ArrayList<>();
for (Element element : elements) {
final SearchResult result = new SearchResult();
// Cover
if (element.select("input[id$=mediumImage]").size() > 0) {
result.setCover(element.select("input[id$=mediumImage]").first().attr("src"));
} else if (element.select("img[id$=CoverView_Image]").size() > 0) {
assignBestCover(result, getCoverUrlList(element.select("img[id$=CoverView_Image]").first()));
}
Element catalogueContent = element.select(".catalogueContent, .oclc-searchmodule-mediumview-content, .oclc-searchmodule-comprehensiveitemview-content, .oclc-searchmodule-dependentitemview-content").first();
// Media Type
if (catalogueContent.select("#spanMediaGrpIcon, .spanMediaGrpIcon").size() > 0) {
String mediatype = catalogueContent.select("#spanMediaGrpIcon, .spanMediaGrpIcon").attr("class");
if (mediatype.startsWith("itemtype ")) {
mediatype = mediatype.substring("itemtype ".length());
}
SearchResult.MediaType defaulttype = defaulttypes.get(mediatype);
if (defaulttype == null)
defaulttype = SearchResult.MediaType.UNKNOWN;
if (data.has("mediatypes")) {
try {
result.setType(SearchResult.MediaType.valueOf(data.getJSONObject("mediatypes").getString(mediatype)));
} catch (JSONException e) {
result.setType(defaulttype);
}
} else {
result.setType(defaulttype);
}
} else {
result.setType(SearchResult.MediaType.UNKNOWN);
}
// Text
String title = catalogueContent.select("a[id$=LbtnShortDescriptionValue], a[id$=LbtnTitleValue]").text();
String subtitle = catalogueContent.select("span[id$=LblSubTitleValue]").text();
String author = catalogueContent.select("span[id$=LblAuthorValue]").text();
String year = catalogueContent.select("span[id$=LblProductionYearValue]").text();
String series = catalogueContent.select("span[id$=LblSeriesValue]").text();
// Some libraries, such as Bern, have labels but no <span id="..Value"> tags
int j = 0;
for (Element div : catalogueContent.children()) {
if (subtitle.equals("") && div.select("span").size() == 0 && j > 0 && j < 3) {
subtitle = div.text().trim();
}
if (author.equals("") && div.select("span[id$=LblAuthor]").size() == 1) {
author = div.text().trim();
if (author.contains(":")) {
author = author.split(":")[1];
}
}
if (year.equals("") && div.select("span[id$=LblProductionYear]").size() == 1) {
year = div.text().trim();
if (year.contains(":")) {
year = year.split(":")[1];
}
}
j++;
}
StringBuilder text = new StringBuilder();
text.append("<b>").append(title).append("</b>");
if (!subtitle.equals(""))
text.append("<br/>").append(subtitle);
if (!author.equals(""))
text.append("<br/>").append(author);
if (!year.equals(""))
text.append("<br/>").append(year);
if (!series.equals(""))
text.append("<br/>").append(series);
result.setInnerhtml(text.toString());
// ID
Matcher matcher = idPattern.matcher(element.html());
if (matcher.find()) {
result.setId(matcher.group(2));
} else {
matcher = weakIdPattern.matcher(element.html());
if (matcher.find()) {
result.setId(matcher.group(2));
}
}
// Availability
if (result.getId() != null) {
String url = opac_url + "/DesktopModules/OCLC.OPEN.PL.DNN.SearchModule/SearchService" + ".asmx/GetAvailability";
String culture = element.select("input[name$=culture]").val();
JSONObject data = new JSONObject();
try {
// Determine portalID value
int portalId = 1;
for (Element scripttag : doc.select("script")) {
String scr = scripttag.html();
if (scr.contains("LoadSharedCatalogueViewAvailabilityAsync")) {
Pattern portalIdPattern = Pattern.compile(".*LoadSharedCatalogueViewAvailabilityAsync\\([^,]*,[^,]*," + "[^0-9,]*([0-9]+)[^0-9,]*,.*\\).*");
Matcher portalIdMatcher = portalIdPattern.matcher(scr);
if (portalIdMatcher.find()) {
portalId = Integer.parseInt(portalIdMatcher.group(1));
}
}
}
data.put("portalId", portalId).put("mednr", result.getId()).put("culture", culture).put("requestCopyData", false).put("branchFilter", "");
RequestBody entity = RequestBody.create(MEDIA_TYPE_JSON, data.toString());
futures.add(asyncPost(url, entity, false).handle((response, throwable) -> {
if (throwable != null)
return null;
try {
JSONObject availabilityData = new JSONObject(response.body().string());
String isAvail = availabilityData.getJSONObject("d").getString("IsAvail");
switch(isAvail) {
case "true":
result.setStatus(SearchResult.Status.GREEN);
break;
case "false":
result.setStatus(SearchResult.Status.RED);
break;
case "digital":
result.setStatus(SearchResult.Status.UNKNOWN);
break;
}
} catch (JSONException | IOException e) {
e.printStackTrace();
}
return null;
}));
} catch (JSONException e) {
e.printStackTrace();
}
}
result.setNr(i);
results.add(result);
}
CompletableFuture.allOf(futures.toArray(new CompletableFuture[futures.size()])).join();
return new SearchRequestResult(results, totalCount, page);
}
use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.
the class Primo method parse_search.
protected SearchRequestResult parse_search(Document doc, int page) throws OpacErrorException, IOException {
doc.setBaseUri(opac_url + "/action/search.do");
List<SearchResult> reslist = new ArrayList<>();
int resnum = -1;
if (doc.select(".EXLResultsNumbers").size() > 0) {
try {
resnum = Integer.valueOf(doc.select(".EXLResultsNumbers em, .PaginationLabel strong").first().text().trim().replace(".", "").replace(",", "").replace(" ", "").replace("Ergebnisse", ""));
} catch (NumberFormatException e) {
e.printStackTrace();
}
}
for (Element resrow : doc.select(".EXLResult")) {
SearchResult res = new SearchResult();
StringBuilder description = new StringBuilder();
description.append("<b>").append(resrow.select(".EXLResultTitle").text()).append("</b>");
if (resrow.select(".EXLResultAuthor").size() > 0) {
description.append("<br />").append(resrow.select(".EXLResultAuthor").text());
}
if (resrow.select(".EXLResultDetails").size() > 0) {
description.append("<br />").append(resrow.select(".EXLResultDetails").text());
}
String availSelect = ".EXLResultAvailability span, .EXLResultAvailability em";
if (resrow.select(availSelect).size() > 0) {
description.append("<br />").append(resrow.select(availSelect).first().ownText());
}
res.setInnerhtml(description.toString());
if (resrow.select(".EXLResultStatusAvailable").size() > 0) {
res.setStatus(SearchResult.Status.GREEN);
} else if (resrow.select(".EXLResultStatusNotAvailable").size() > 0) {
res.setStatus(SearchResult.Status.RED);
} else if (resrow.select(".EXLResultStatusMaybeAvailable").size() > 0) {
res.setStatus(SearchResult.Status.YELLOW);
}
res.setPage(page);
for (Element a : resrow.select(".EXLResultTitle a, a.EXLThumbnailLinkMarker, .EXLDetailsTab a")) {
Map<String, String> q = getQueryParamsFirst(a.absUrl("href"));
if (q.containsKey("doc")) {
res.setId(q.get("doc"));
break;
}
}
if (res.getId() == null) {
continue;
}
if (resrow.select("img.EXLBriefResultsCover").size() > 0) {
String src = resrow.select("img.EXLBriefResultsCover").first().absUrl("src");
if (!src.contains("pixel.png")) {
res.setCover(src);
}
}
for (Map.Entry<String, SearchResult.MediaType> cls : mediaTypeClasses.entrySet()) {
if (resrow.hasClass(cls.getKey()) || resrow.select("." + cls.getKey()).size() > 0) {
res.setType(cls.getValue());
break;
}
}
if (resrow.select("a.EXLBriefResultsDisplayMultipleLink").size() > 0) {
String url = resrow.select("a.EXLBriefResultsDisplayMultipleLink").first().absUrl("href");
List<SearchQuery> query = new ArrayList<>();
TextSearchField field = new TextSearchField("url", "url", false, false, "url", false, false);
field.setVisible(false);
query.add(new SearchQuery(field, url));
res.setChildQuery(query);
}
reslist.add(res);
}
return new SearchRequestResult(reslist, resnum, page);
}
use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.
the class Bibliotheca method parseSearch.
public static SearchRequestResult parseSearch(String html, int page, JSONObject data) throws OpacErrorException {
Document doc = Jsoup.parse(html);
doc.setBaseUri(data.optString("baseurl"));
Elements table = doc.select(".resulttab tr.result_trefferX, .resulttab tr.result_treffer");
List<SearchResult> results = new ArrayList<>();
for (int i = 0; i < table.size(); i++) {
Element tr = table.get(i);
SearchResult sr = new SearchResult();
int contentindex = 1;
if (tr.select("td a img").size() > 0) {
String[] fparts = tr.select("td a img").get(0).attr("src").split("/");
String fname = fparts[fparts.length - 1];
if (data.has("mediatypes")) {
try {
sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
} catch (JSONException | IllegalArgumentException e) {
sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", "")));
}
} else {
sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", "")));
}
} else {
if (tr.children().size() == 3) {
contentindex = 2;
}
}
sr.setInnerhtml(tr.child(contentindex).child(0).html());
sr.setNr(i);
Element link = tr.child(contentindex).select("a").first();
try {
if (link != null && link.attr("href").contains("detmediennr")) {
Map<String, String> params = getQueryParamsFirst(link.attr("abs:href"));
String nr = params.get("detmediennr");
if (Integer.parseInt(nr) > i + 1) {
// Seems to be an ID…
if (params.get("detDB") != null) {
sr.setId("&detmediennr=" + nr + "&detDB=" + params.get("detDB"));
} else {
sr.setId("&detmediennr=" + nr);
}
}
}
} catch (Exception e) {
}
try {
if (tr.child(1).childNode(0) instanceof Comment) {
Comment c = (Comment) tr.child(1).childNode(0);
String comment = c.getData().trim();
String id = comment.split(": ")[1];
sr.setId(id);
}
} catch (Exception e) {
e.printStackTrace();
}
results.add(sr);
}
int results_total = -1;
if (doc.select(".result_gefunden").size() > 0) {
try {
results_total = Integer.parseInt(doc.select(".result_gefunden").text().trim().replaceAll(".*[^0-9]+([0-9]+).*", "$1"));
} catch (NumberFormatException e) {
e.printStackTrace();
results_total = -1;
}
} else if (doc.select(".resultzeile").size() > 0) {
throw new OpacErrorException(doc.select(".resultzeile").text());
}
return new SearchRequestResult(results, results_total, page);
}
use of de.geeksfactory.opacclient.objects.SearchResult in project opacclient by opacapp.
the class VuFind method parse_search.
protected SearchRequestResult parse_search(Document doc, int page) throws OpacErrorException {
doc.setBaseUri(opac_url + "/Search/Results");
if (doc.select("p.error, p.errorMsg, .alert-error").size() > 0) {
throw new OpacErrorException(doc.select("p.error, p.errorMsg, .alert-error").text());
} else if (doc.select("div.result").size() == 0 && doc.select(".main p").size() > 0) {
throw new OpacErrorException(doc.select(".main p").first().text());
}
int rescount = -1;
if (doc.select(".resulthead").size() == 1) {
rescount = Integer.parseInt(doc.select(".resulthead strong").get(2).text().replace(",", "").replace(".", ""));
}
List<SearchResult> reslist = new ArrayList<>();
for (Element row : doc.select("div.result")) {
SearchResult res = new SearchResult();
Element z3988el = null;
if (row.select("span.Z3988").size() == 1) {
z3988el = row.select("span.3988").first();
} else if (row.parent().tagName().equals("li") && row.parent().select("span.Z3988").size() > 0) {
z3988el = row.parent().select("span.3988").first();
}
if (z3988el != null) {
List<NameValuePair> z3988data;
try {
StringBuilder description = new StringBuilder();
z3988data = URLEncodedUtils.parse(new URI("http://dummy/?" + z3988el.select("span.Z3988").attr("title")), "UTF-8");
for (NameValuePair nv : z3988data) {
if (nv.getValue() != null) {
if (!nv.getValue().trim().equals("")) {
if (nv.getName().equals("rft.btitle")) {
description.append("<b>").append(nv.getValue()).append("</b>");
} else if (nv.getName().equals("rft.atitle")) {
description.append("<b>").append(nv.getValue()).append("</b>");
} else if (nv.getName().equals("rft.au")) {
description.append("<br />").append(nv.getValue());
} else if (nv.getName().equals("rft.date")) {
description.append("<br />").append(nv.getValue());
}
}
}
}
res.setInnerhtml(description.toString());
} catch (URISyntaxException e) {
e.printStackTrace();
}
} else {
res.setInnerhtml(row.select("a.title").text());
}
if (row.hasClass("available") || row.hasClass("internet")) {
res.setStatus(SearchResult.Status.GREEN);
} else if (row.hasClass("reservable")) {
res.setStatus(SearchResult.Status.YELLOW);
} else if (row.hasClass("not-available")) {
res.setStatus(SearchResult.Status.RED);
} else if (row.select(".status.available").size() > 0) {
res.setStatus(SearchResult.Status.GREEN);
} else if (row.select(".status .label-success").size() > 0) {
res.setStatus(SearchResult.Status.GREEN);
} else if (row.select(".status .label-important").size() > 0) {
res.setStatus(SearchResult.Status.RED);
} else if (row.select(".status.checkedout").size() > 0) {
res.setStatus(SearchResult.Status.RED);
}
for (Map.Entry<String, SearchResult.MediaType> entry : mediaTypeSelectors.entrySet()) {
if (row.select(entry.getKey()).size() > 0) {
res.setType(entry.getValue());
break;
}
}
for (Element img : row.select("img")) {
String src = img.absUrl("src");
if (src.contains("over")) {
if (!src.contains("Unavailable")) {
res.setCover(src);
}
break;
}
}
res.setPage(page);
String href = row.select("a.title").first().absUrl("href");
try {
URL idurl = new URL(href);
String path = idurl.getPath();
Matcher matcher = idPattern.matcher(path);
if (matcher.find()) {
if (matcher.group().contains("/OpacrlRecord/")) {
res.setId("Opacrl:" + matcher.group(1));
} else {
res.setId(matcher.group(1));
}
}
} catch (MalformedURLException e) {
e.printStackTrace();
}
reslist.add(res);
}
return new SearchRequestResult(reslist, rescount, page);
}
Aggregations