use of de.geeksfactory.opacclient.objects.SearchResult.MediaType in project opacclient by opacapp.
the class Pica method getMediaTypeInSingleResult.
public MediaType getMediaTypeInSingleResult(String html) {
Document doc = Jsoup.parse(html);
MediaType mediatype = MediaType.UNKNOWN;
if (doc.select("table[summary=presentation switch] img").size() > 0) {
String[] fparts = doc.select("table[summary=presentation switch] img").get(0).attr("src").split("/");
String fname = fparts[fparts.length - 1];
if (data.has("mediatypes")) {
try {
mediatype = MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname));
} catch (JSONException e) {
mediatype = defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", ""));
} catch (IllegalArgumentException e) {
mediatype = defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", ""));
}
} else {
mediatype = defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", ""));
}
}
return mediatype;
}
use of de.geeksfactory.opacclient.objects.SearchResult.MediaType in project opacclient by opacapp.
the class SISIS method parse_search.
public SearchRequestResult parse_search(String html, int page) throws OpacErrorException, SingleResultFound {
Document doc = Jsoup.parse(html);
doc.setBaseUri(opac_url + "/searchfoo");
if (doc.select(".error").size() > 0) {
throw new OpacErrorException(doc.select(".error").text().trim());
} else if (doc.select(".nohits").size() > 0) {
throw new OpacErrorException(doc.select(".nohits").text().trim());
} else if (doc.select(".box-header h2, #nohits").text().contains("keine Treffer")) {
return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
}
int results_total = -1;
String resultnumstr = doc.select(".box-header h2").first().text();
if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) {
throw new SingleResultFound();
} else if (resultnumstr.contains("(")) {
results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
} else if (resultnumstr.contains(": ")) {
results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
}
Elements table = doc.select("table.data tbody tr");
identifier = null;
Elements links = doc.select("table.data a");
boolean haslink = false;
for (int i = 0; i < links.size(); i++) {
Element node = links.get(i);
if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) {
haslink = true;
try {
List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(node.attr("href").replace(" ", "%20").replace("&", "&")), ENCODING);
for (NameValuePair nv : anyurl) {
if (nv.getName().equals("identifier")) {
identifier = nv.getValue();
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
List<SearchResult> results = new ArrayList<>();
for (int i = 0; i < table.size(); i++) {
Element tr = table.get(i);
SearchResult sr = new SearchResult();
if (tr.select("td img[title]").size() > 0) {
String title = tr.select("td img").get(0).attr("title");
String[] fparts = tr.select("td img").get(0).attr("src").split("/");
String fname = fparts[fparts.length - 1];
MediaType default_by_fname = defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", ""));
MediaType default_by_title = defaulttypes.get(title);
MediaType default_name = default_by_title != null ? default_by_title : default_by_fname;
if (data.has("mediatypes")) {
try {
sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
} catch (JSONException | IllegalArgumentException e) {
sr.setType(default_name);
}
} else {
sr.setType(default_name);
}
}
String alltext = tr.text();
if (alltext.contains("eAudio") || alltext.contains("eMusic")) {
sr.setType(MediaType.MP3);
} else if (alltext.contains("eVideo")) {
sr.setType(MediaType.EVIDEO);
} else if (alltext.contains("eBook")) {
sr.setType(MediaType.EBOOK);
} else if (alltext.contains("Munzinger")) {
sr.setType(MediaType.EDOC);
}
if (tr.children().size() > 3 && tr.child(3).select("img[title*=cover]").size() == 1) {
sr.setCover(tr.child(3).select("img[title*=cover]").attr("abs:src"));
if (sr.getCover().contains("showCover.do")) {
downloadCover(sr);
}
}
Element middlething;
if (tr.children().size() > 2 && tr.child(2).select("a").size() > 0) {
middlething = tr.child(2);
} else {
middlething = tr.child(1);
}
List<Node> children = middlething.childNodes();
if (middlething.select("div").not("#hlrightblock,.bestellfunktionen").size() == 1) {
Element indiv = middlething.select("div").not("#hlrightblock,.bestellfunktionen").first();
if (indiv.select("a").size() > 0 && indiv.children().size() > 1) {
children = indiv.childNodes();
}
} else if (middlething.select("span.titleData").size() == 1) {
children = middlething.select("span.titleData").first().childNodes();
}
int childrennum = children.size();
List<String[]> strings = new ArrayList<>();
for (int ch = 0; ch < childrennum; ch++) {
Node node = children.get(ch);
if (node instanceof TextNode) {
String text = ((TextNode) node).text().trim();
if (text.length() > 3) {
strings.add(new String[] { "text", "", text });
}
} else if (node instanceof Element) {
List<Node> subchildren = node.childNodes();
for (int j = 0; j < subchildren.size(); j++) {
Node subnode = subchildren.get(j);
if (subnode instanceof TextNode) {
String text = ((TextNode) subnode).text().trim();
if (text.length() > 3) {
strings.add(new String[] { ((Element) node).tag().getName(), "text", text, ((Element) node).className(), node.attr("style") });
}
} else if (subnode instanceof Element) {
String text = ((Element) subnode).text().trim();
if (text.length() > 3) {
strings.add(new String[] { ((Element) node).tag().getName(), ((Element) subnode).tag().getName(), text, ((Element) node).className(), node.attr("style") });
}
}
}
}
}
StringBuilder description = null;
if (tr.select("span.Z3988").size() == 1) {
// Sometimes there is a <span class="Z3988"> item which provides
// data in a standardized format.
List<NameValuePair> z3988data;
boolean hastitle = false;
try {
description = new StringBuilder();
z3988data = URLEncodedUtils.parse(new URI("http://dummy/?" + tr.select("span.Z3988").attr("title")), "UTF-8");
for (NameValuePair nv : z3988data) {
if (nv.getValue() != null) {
if (!nv.getValue().trim().equals("")) {
if (nv.getName().equals("rft.btitle") && !hastitle) {
description.append("<b>").append(nv.getValue()).append("</b>");
hastitle = true;
} else if (nv.getName().equals("rft.atitle") && !hastitle) {
description.append("<b>").append(nv.getValue()).append("</b>");
hastitle = true;
} else if (nv.getName().equals("rft.au")) {
description.append("<br />").append(nv.getValue());
} else if (nv.getName().equals("rft.date")) {
description.append("<br />").append(nv.getValue());
}
}
}
}
} catch (URISyntaxException e) {
description = null;
}
}
boolean described = false;
if (description != null && description.length() > 0) {
sr.setInnerhtml(description.toString());
described = true;
} else {
description = new StringBuilder();
}
int k = 0;
boolean yearfound = false;
boolean titlefound = false;
boolean sigfound = false;
for (String[] part : strings) {
if (!described) {
if (part[0].equals("a") && (k == 0 || !titlefound)) {
if (k != 0) {
description.append("<br />");
}
description.append("<b>").append(part[2]).append("</b>");
titlefound = true;
} else if (part[2].matches("\\D*[0-9]{4}\\D*") && part[2].length() <= 10) {
yearfound = true;
if (k != 0) {
description.append("<br />");
}
description.append(part[2]);
} else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
if (k != 0) {
description.append("<br />");
}
description.append(part[2]);
} else if (k == 1 && !yearfound && part[2].matches("^\\s*\\([0-9]{4}\\)$")) {
if (k != 0) {
description.append("<br />");
}
description.append(part[2]);
} else if (k == 1 && !yearfound) {
description.append("<br />");
description.append(part[2]);
} else if (k > 1 && k < 4 && !sigfound && part[0].equals("text") && part[2].matches("^[A-Za-z0-9,\\- ]+$")) {
description.append("<br />");
description.append(part[2]);
}
}
if (part.length == 4) {
if (part[0].equals("span") && part[3].equals("textgruen")) {
sr.setStatus(SearchResult.Status.GREEN);
} else if (part[0].equals("span") && part[3].equals("textrot")) {
sr.setStatus(SearchResult.Status.RED);
}
} else if (part.length == 5) {
if (part[4].contains("purple")) {
sr.setStatus(SearchResult.Status.YELLOW);
}
}
if (sr.getStatus() == null) {
if ((part[2].contains("entliehen") && part[2].startsWith("Vormerkung ist leider nicht möglich")) || part[2].contains("Alle Exemplare des gewählten Titels sind entliehen") || part[2].contains("nur in anderer Zweigstelle ausleihbar und nicht bestellbar")) {
sr.setStatus(SearchResult.Status.RED);
} else if (part[2].startsWith("entliehen") || part[2].contains("Ein Exemplar finden Sie in einer anderen Zweigstelle")) {
sr.setStatus(SearchResult.Status.YELLOW);
} else if ((part[2].startsWith("bestellbar") && !part[2].contains("nicht bestellbar")) || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar")) || (part[2].startsWith("vorbestellbar") && !part[2].contains("nicht vorbestellbar")) || (part[2].startsWith("vormerkbar") && !part[2].contains("nicht vormerkbar")) || (part[2].contains("heute zurückgebucht")) || (part[2].contains("ausleihbar") && !part[2].contains("nicht ausleihbar"))) {
sr.setStatus(SearchResult.Status.GREEN);
}
if (sr.getType() != null) {
if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO) || sr.getType().equals(MediaType.MP3)) // Especially Onleihe.de ebooks are often marked
// green though they are not available.
{
sr.setStatus(SearchResult.Status.UNKNOWN);
}
}
}
k++;
}
if (!described) {
sr.setInnerhtml(description.toString());
}
sr.setNr(10 * (page - 1) + i);
sr.setId(null);
results.add(sr);
}
resultcount = results.size();
return new SearchRequestResult(results, results_total, page);
}
use of de.geeksfactory.opacclient.objects.SearchResult.MediaType in project opacclient by opacapp.
the class TouchPoint method parse_search.
protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException, IOException, IOException, SingleResultFound {
Document doc = Jsoup.parse(html);
if (doc.select("#RefineHitListForm").size() > 0) {
// the results are located on a different page loaded via AJAX
html = httpGet(opac_url + "/speedHitList.do?_=" + String.valueOf(System.currentTimeMillis() / 1000) + "&hitlistindex=0&exclusionList=", ENCODING);
doc = Jsoup.parse(html);
}
if (doc.select(".nodata").size() > 0) {
return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
}
doc.setBaseUri(opac_url + "/searchfoo");
int results_total = -1;
String resultnumstr = doc.select(".box-header h2, .box-header h1").first().text();
if (resultnumstr.contains("(1/1)") || resultnumstr.contains(" 1/1")) {
throw new SingleResultFound();
} else if (resultnumstr.contains("(")) {
results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
} else if (resultnumstr.contains(": ")) {
results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
} else if (resultnumstr.contains("Treffer")) {
try {
results_total = Integer.parseInt(resultnumstr.replaceAll(".* ([0-9]+)$", "$1"));
} catch (NumberFormatException e) {
// pass
}
}
Elements table = doc.select("table.data > tbody > tr");
identifier = null;
Elements links = doc.select("table.data a");
boolean haslink = false;
for (Element node : links) {
if (node.hasAttr("href") & node.attr("href").contains("singleHit.do") && !haslink) {
haslink = true;
try {
List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(node.attr("href").replace(" ", "%20").replace("&", "&")), ENCODING);
for (NameValuePair nv : anyurl) {
if (nv.getName().equals("identifier")) {
identifier = nv.getValue();
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
List<SearchResult> results = new ArrayList<>();
for (int i = 0; i < table.size(); i++) {
Element tr = table.get(i);
SearchResult sr = new SearchResult();
if (tr.select(".icn, img[width=32]").size() > 0) {
String[] fparts = tr.select(".icn, img[width=32]").first().attr("src").split("/");
String fname = fparts[fparts.length - 1];
String changedFname = fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", "");
// File names can look like this: "20_DVD_Video.gif"
Pattern pattern = Pattern.compile("(\\d+)_.*");
Matcher matcher = pattern.matcher(changedFname);
if (matcher.find()) {
changedFname = matcher.group(1);
}
MediaType defaulttype = defaulttypes.get(changedFname);
if (data.has("mediatypes")) {
try {
sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
} catch (JSONException | IllegalArgumentException e) {
sr.setType(defaulttype);
}
} else {
sr.setType(defaulttype);
}
}
String title;
String text;
if (tr.select(".results table").size() > 0) {
// e.g. RWTH Aachen
title = tr.select(".title a").text();
text = tr.select(".title div").text();
} else {
// e.g. Schaffhausen, BSB München
title = tr.select(".title, .hitlistTitle").text();
text = tr.select(".results, .hitlistMetadata").first().ownText();
}
// we need to do some evil javascript parsing here to get the cover
// and loan status of the item
// get cover
sr.setCover(findCoverUrl(tr, true));
// get loan status and media ID
if (tr.select("div[id^=loanstatus] + script").size() > 0) {
String js = tr.select("div[id^=loanstatus] + script").first().html();
String[] variables = new String[] { "loanstateDBId", "itemIdentifier", "hitlistIdentifier", "hitlistPosition", "duplicateHitlistIdentifier", "itemType", "titleStatus", "typeofHit", "context" };
String ajaxUrl = matchJSVariable(js, "ajaxUrl");
if (!"".equals(ajaxUrl)) {
JSONObject id = new JSONObject();
List<NameValuePair> map = new ArrayList<>();
for (String variable : variables) {
String value = matchJSVariable(js, variable);
if (!"".equals(value)) {
map.add(new BasicNameValuePair(variable, value));
}
try {
if (variable.equals("itemIdentifier")) {
id.put("id", value);
} else if (variable.equals("loanstateDBId")) {
id.put("db", value);
}
} catch (JSONException e) {
e.printStackTrace();
}
}
sr.setId(id.toString());
String url = new URL(new URL(opac_url + "/"), ajaxUrl).toString();
String loanStatusHtml = httpGet(url + "?" + URLEncodedUtils.format(map, "UTF-8"), ENCODING).replace("\r\n", "").trim();
Document loanStatusDoc = Jsoup.parse(loanStatusHtml);
String loanstatus = loanStatusDoc.text().replace("\u00bb", "").trim();
if ((loanstatus.startsWith("entliehen") && loanstatus.contains("keine Vormerkung möglich") || loanstatus.contains("Keine Exemplare verfügbar"))) {
sr.setStatus(SearchResult.Status.RED);
} else if (loanstatus.startsWith("entliehen") || loanstatus.contains("andere Zweigstelle")) {
sr.setStatus(SearchResult.Status.YELLOW);
} else if ((loanstatus.startsWith("bestellbar") && !loanstatus.contains("nicht bestellbar")) || (loanstatus.startsWith("vorbestellbar") && !loanstatus.contains("nicht vorbestellbar")) || (loanstatus.startsWith("vorbestellbar") && !loanstatus.contains("nicht vorbestellbar")) || (loanstatus.startsWith("vormerkbar") && !loanstatus.contains("nicht vormerkbar")) || (loanstatus.contains("heute zurückgebucht")) || (loanstatus.contains("ausleihbar") && !loanstatus.contains("nicht ausleihbar"))) {
sr.setStatus(SearchResult.Status.GREEN);
} else if (loanstatus.equals("")) {
// In special databases (like "Handschriften" in Winterthur) ID lookup is
// not possible, which we try to detect this way. We therefore also cannot
// use getResultById when accessing the results.
sr.setId(null);
}
if (sr.getType() != null) {
if (sr.getType().equals(MediaType.EBOOK) || sr.getType().equals(MediaType.EVIDEO) || sr.getType().equals(MediaType.MP3)) // Especially Onleihe.de ebooks are often marked
// green though they are not available.
{
sr.setStatus(SearchResult.Status.UNKNOWN);
}
}
}
}
sr.setInnerhtml(("<b>" + title + "</b><br/>") + text);
sr.setNr(10 * (page - 1) + i + 1);
results.add(sr);
}
resultcount = results.size();
return new SearchRequestResult(results, results_total, page);
}
Aggregations