use of org.jsoup.nodes.Node in project opacclient by opacapp.
the class SISIS method parseDetail.
static DetailedItem parseDetail(String html, String html2, String html3, String coverJs, JSONObject data, StringProvider stringProvider) throws IOException {
Document doc = Jsoup.parse(html);
String opac_url = data.optString("baseurl", "");
doc.setBaseUri(opac_url);
Document doc2 = Jsoup.parse(html2);
doc2.setBaseUri(opac_url);
Document doc3 = Jsoup.parse(html3);
doc3.setBaseUri(opac_url);
DetailedItem result = new DetailedItem();
try {
result.setId(doc.select("#bibtip_id").text().trim());
} catch (Exception ex) {
ex.printStackTrace();
}
List<String> reservationlinks = new ArrayList<>();
for (Element link : doc3.select("#vormerkung a, #tab-content a")) {
String href = link.absUrl("href");
Map<String, String> hrefq = getQueryParamsFirst(href);
if (result.getId() == null) {
// ID retrieval
String key = hrefq.get("katkey");
if (key != null) {
result.setId(key);
break;
}
}
// Vormerken
if (hrefq.get("methodToCall") != null) {
if (hrefq.get("methodToCall").equals("doVormerkung") || hrefq.get("methodToCall").equals("doBestellung")) {
reservationlinks.add(href.split("\\?")[1]);
}
}
}
if (reservationlinks.size() == 1) {
result.setReservable(true);
result.setReservation_info(reservationlinks.get(0));
} else if (reservationlinks.size() == 0) {
result.setReservable(false);
} else {
// TODO: Multiple options - handle this case!
}
if (result.getId() == null && doc.select("#permalink_link").size() > 0) {
result.setId(doc.select("#permalink_link").text());
}
if (coverJs != null) {
Pattern srcPattern = Pattern.compile("<img .* src=\"([^\"]+)\">");
Matcher matcher = srcPattern.matcher(coverJs);
if (matcher.find()) {
result.setCover(matcher.group(1));
}
} else if (doc.select(".data td img").size() == 1) {
result.setCover(doc.select(".data td img").first().attr("abs:src"));
}
if (doc.select(".aw_teaser_title").size() == 1) {
result.setTitle(doc.select(".aw_teaser_title").first().text().trim());
} else if (doc.select(".data td strong").size() > 0) {
result.setTitle(doc.select(".data td strong").first().text().trim());
} else {
result.setTitle("");
}
if (doc.select(".aw_teaser_title_zusatz").size() > 0) {
result.addDetail(new Detail("Titelzusatz", doc.select(".aw_teaser_title_zusatz").text().trim()));
}
String title = "";
String text = "";
boolean takeover = false;
Element detailtrs = doc2.select(".box-container .data td").first();
for (Node node : detailtrs.childNodes()) {
if (node instanceof Element) {
Element element = (Element) node;
if (element.tagName().equals("strong")) {
if (element.hasClass("c2")) {
if (!title.equals("")) {
result.addDetail(new Detail(title, text.trim()));
}
title = element.text().trim();
text = "";
} else {
text = text + element.text();
}
} else {
if (element.tagName().equals("a")) {
if (element.text().trim().contains("hier klicken") || title.contains("Link")) {
text = text + node.attr("href");
takeover = true;
break;
} else {
text = text + element.text();
}
}
}
} else if (node instanceof TextNode) {
text = text + ((TextNode) node).text();
}
}
if (!takeover) {
text = "";
title = "";
}
detailtrs = doc2.select("#tab-content .data td").first();
if (detailtrs != null) {
for (Node node : detailtrs.childNodes()) {
if (node instanceof Element) {
if (((Element) node).tagName().equals("strong")) {
if (!text.equals("") && !title.equals("")) {
result.addDetail(new Detail(title.trim(), text.trim()));
if (title.equals("Titel:")) {
result.setTitle(text.trim());
}
text = "";
}
title = ((Element) node).text().trim();
} else {
if (((Element) node).tagName().equals("a") && (((Element) node).text().trim().contains("hier klicken") || title.equals("Link:"))) {
text = text + node.attr("href");
} else {
text = text + ((Element) node).text();
}
}
} else if (node instanceof TextNode) {
text = text + ((TextNode) node).text();
}
}
} else {
if (doc2.select("#tab-content .fulltitle tr").size() > 0) {
Elements rows = doc2.select("#tab-content .fulltitle tr");
for (Element tr : rows) {
if (tr.children().size() == 2) {
Element valcell = tr.child(1);
String value = valcell.text().trim();
if (valcell.select("a").size() == 1) {
value = valcell.select("a").first().absUrl("href");
}
result.addDetail(new Detail(tr.child(0).text().trim(), value));
}
}
} else {
result.addDetail(new Detail(stringProvider.getString(StringProvider.ERROR), stringProvider.getString(StringProvider.COULD_NOT_LOAD_DETAIL)));
}
}
if (!text.equals("") && !title.equals("")) {
result.addDetail(new Detail(title.trim(), text.trim()));
if (title.equals("Titel:")) {
result.setTitle(text.trim());
}
}
for (Element link : doc3.select("#tab-content a")) {
Map<String, String> hrefq = getQueryParamsFirst(link.absUrl("href"));
if (result.getId() == null) {
// ID retrieval
String key = hrefq.get("katkey");
if (key != null) {
result.setId(key);
break;
}
}
}
for (Element link : doc3.select(".box-container a")) {
if (link.text().trim().equals("Download")) {
result.addDetail(new Detail(stringProvider.getString(StringProvider.DOWNLOAD), link.absUrl("href")));
}
}
if (doc3.select("#tab-content .textrot").size() > 0) {
result.addDetail(new Detail(stringProvider.getString(StringProvider.STATUS), doc3.select("#tab-content .textrot").text()));
}
Map<String, Integer> copy_columnmap = new HashMap<>();
// Default values
copy_columnmap.put("barcode", 1);
copy_columnmap.put("branch", 3);
copy_columnmap.put("status", 4);
Element table = doc.select("#tab-content .data").first();
Elements copy_columns = table != null ? table.select("tr#bg2 th") : new Elements();
for (int i = 0; i < copy_columns.size(); i++) {
Element th = copy_columns.get(i);
String head = th.text().trim();
if (head.contains("Status")) {
copy_columnmap.put("status", i);
}
if (head.contains("Zweigstelle")) {
copy_columnmap.put("branch", i);
}
if (head.contains("Mediennummer")) {
copy_columnmap.put("barcode", i);
}
if (head.contains("Standort")) {
copy_columnmap.put("location", i);
}
if (head.contains("Signatur")) {
copy_columnmap.put("signature", i);
}
}
Pattern status_lent = Pattern.compile("^(entliehen) bis ([0-9]{1,2}.[0-9]{1,2}.[0-9]{2," + "4}) \\(gesamte Vormerkungen: ([0-9]+)\\)$");
Pattern status_and_barcode = Pattern.compile("^(.*) ([0-9A-Za-z]+)$");
Elements exemplartrs = table != null ? table.select("tr").not("#bg2") : new Elements();
DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
for (Element tr : exemplartrs) {
try {
Copy copy = new Copy();
Element status = tr.child(copy_columnmap.get("status"));
Element barcode = tr.child(copy_columnmap.get("barcode"));
String barcodetext = barcode.text().trim().replace(" Wegweiser", "");
// STATUS
String statustext;
if (status.getElementsByTag("b").size() > 0) {
statustext = status.getElementsByTag("b").text().trim();
} else {
statustext = status.text().trim();
}
if (copy_columnmap.get("status").equals(copy_columnmap.get("barcode"))) {
Matcher matcher1 = status_and_barcode.matcher(statustext);
if (matcher1.matches()) {
statustext = matcher1.group(1);
barcodetext = matcher1.group(2);
}
}
Matcher matcher = status_lent.matcher(statustext);
if (matcher.matches()) {
copy.setStatus(matcher.group(1));
copy.setReservations(matcher.group(3));
copy.setReturnDate(fmt.parseLocalDate(matcher.group(2)));
} else {
copy.setStatus(statustext.trim().replace(" Wegweiser", ""));
}
copy.setBarcode(barcodetext);
if (status.select("a[href*=doVormerkung]").size() == 1) {
copy.setResInfo(status.select("a[href*=doVormerkung]").attr("href").split("\\?")[1]);
}
String branchtext = tr.child(copy_columnmap.get("branch")).text().trim().replace(" Wegweiser", "");
copy.setBranch(branchtext);
if (copy_columnmap.containsKey("location")) {
copy.setLocation(tr.child(copy_columnmap.get("location")).text().trim().replace(" Wegweiser", ""));
}
if (copy_columnmap.containsKey("signature")) {
copy.setShelfmark(tr.child(copy_columnmap.get("signature")).text().trim().replace(" Wegweiser", ""));
}
result.addCopy(copy);
} catch (Exception ex) {
ex.printStackTrace();
}
}
try {
Element isvolume = null;
Map<String, String> volume = new HashMap<>();
Elements links = doc.select(".data td a");
int elcount = links.size();
for (int eli = 0; eli < elcount; eli++) {
List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(links.get(eli).attr("href")), "UTF-8");
for (NameValuePair nv : anyurl) {
if (nv.getName().equals("methodToCall") && nv.getValue().equals("volumeSearch")) {
isvolume = links.get(eli);
} else if (nv.getName().equals("catKey")) {
volume.put("catKey", nv.getValue());
} else if (nv.getName().equals("dbIdentifier")) {
volume.put("dbIdentifier", nv.getValue());
}
}
if (isvolume != null) {
volume.put("volume", "true");
result.setVolumesearch(volume);
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
use of org.jsoup.nodes.Node in project k-9 by k9mail.
the class AdvancedNodeTraversor method filter.
/**
* Start a depth-first filtering of the root and all of its descendants.
*
* @param root
* the root node point to traverse.
*
* @return The result of the filter operation.
*/
public FilterResult filter(Node root) {
Node node = root;
int depth = 0;
while (node != null) {
HeadFilterDecision headResult = filter.head(node, depth);
if (headResult == HeadFilterDecision.STOP) {
return FilterResult.STOPPED;
}
if (headResult == HeadFilterDecision.CONTINUE && node.childNodeSize() > 0) {
node = node.childNode(0);
++depth;
continue;
}
TailFilterDecision tailResult = TailFilterDecision.CONTINUE;
while (node.nextSibling() == null && depth > 0) {
if (headResult == HeadFilterDecision.CONTINUE || headResult == HeadFilterDecision.SKIP_CHILDREN) {
tailResult = filter.tail(node, depth);
if (tailResult == TailFilterDecision.STOP) {
return FilterResult.STOPPED;
}
}
Node prev = node;
node = node.parentNode();
depth--;
if (headResult == HeadFilterDecision.REMOVE || tailResult == TailFilterDecision.REMOVE) {
prev.remove();
}
headResult = HeadFilterDecision.CONTINUE;
}
if (headResult == HeadFilterDecision.CONTINUE || headResult == HeadFilterDecision.SKIP_CHILDREN) {
tailResult = filter.tail(node, depth);
if (tailResult == TailFilterDecision.STOP) {
return FilterResult.STOPPED;
}
}
Node prev = node;
node = node.nextSibling();
if (headResult == HeadFilterDecision.REMOVE) {
prev.remove();
}
if (prev == root) {
return headResult == HeadFilterDecision.REMOVE ? FilterResult.ROOT_REMOVED : FilterResult.ENDED;
}
}
return FilterResult.ENDED;
}
use of org.jsoup.nodes.Node in project ez-vcard by mangstadt.
the class HCardElement method visitForValue.
private void visitForValue(Element element, StringBuilder value) {
for (Node node : element.childNodes()) {
if (node instanceof Element) {
Element e = (Element) node;
if (e.classNames().contains("type")) {
// ignore "type" elements
continue;
}
if ("br".equals(e.tagName())) {
// convert "<br>" to a newline
value.append(NEWLINE);
continue;
}
if ("del".equals(e.tagName())) {
// skip "<del>" tags
continue;
}
visitForValue(e, value);
continue;
}
if (node instanceof TextNode) {
TextNode t = (TextNode) node;
value.append(t.text());
continue;
}
}
}
use of org.jsoup.nodes.Node in project Java-readability by basis-technology-corp.
the class XmlDataMap method recurse.
private void recurse(Element element) {
ElementAction action = classifyElement(element);
if (action == ElementAction.Whitespace || action == ElementAction.Sentence) {
appendSpace();
}
for (Node childNode : element.childNodes()) {
// though we could use canonical XML to get rid of them.
if (childNode instanceof TextNode && action != ElementAction.Banned) {
TextNode textContent = (TextNode) childNode;
String textString = textContent.text();
append(textContent, textString);
} else if (childNode instanceof Element) {
recurse((Element) childNode);
}
}
if (action == ElementAction.Whitespace) {
appendSpace();
} else if (action == ElementAction.Sentence) {
appendPeriod();
} else if (action == ElementAction.Mark) {
Mark mark = new Mark();
mark.setOffset(pcDataOffset);
mark.setTag(element.tagName());
}
}
use of org.jsoup.nodes.Node in project jsoup by jhy.
the class Parser method parseBodyFragment.
/**
* Parse a fragment of HTML into the {@code body} of a Document.
*
* @param bodyHtml fragment of HTML
* @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
*
* @return Document, with empty head, and HTML parsed into body
*/
public static Document parseBodyFragment(String bodyHtml, String baseUri) {
Document doc = Document.createShell(baseUri);
Element body = doc.body();
List<Node> nodeList = parseFragment(bodyHtml, body, baseUri);
// the node list gets modified when re-parented
Node[] nodes = nodeList.toArray(new Node[nodeList.size()]);
for (int i = nodes.length - 1; i > 0; i--) {
nodes[i].remove();
}
for (Node node : nodes) {
body.appendChild(node);
}
return doc;
}
Aggregations