use of org.jsoup.nodes.TextNode in project jsoup by jhy.
the class TraversorTest method canRemoveDuringHead.
@Test
public void canRemoveDuringHead() {
Document doc = Jsoup.parse("<div><p id=1>Zero<p id=1>One<p id=2>Two<p>Three</div>");
NodeTraversor.traverse((node, depth) -> {
if (node.attr("id").equals("1"))
node.remove();
else if (node instanceof TextNode && ((TextNode) node).text().equals("Three"))
node.remove();
}, doc);
assertEquals("<div><p id=\"2\">Two</p><p></p></div>", TextUtil.stripNewlines(doc.body().html()));
}
use of org.jsoup.nodes.TextNode in project jsoup by jhy.
the class TokeniserStateTest method testCommentEndCoverage.
@Test
public void testCommentEndCoverage() {
String html = "<html><head></head><body><img src=foo><!-- <table><tr><td></table> --! --- --><p>Hello</p></body></html>";
Document doc = Jsoup.parse(html);
Element body = doc.body();
Comment comment = (Comment) body.childNode(1);
assertEquals(" <table><tr><td></table> --! --- ", comment.getData());
Element p = body.child(1);
TextNode text = (TextNode) p.childNode(0);
assertEquals("Hello", text.getWholeText());
}
use of org.jsoup.nodes.TextNode in project webmagic by code4craft.
the class CssSelector method getText.
protected String getText(Element element) {
StringBuilder accum = new StringBuilder();
for (Node node : element.childNodes()) {
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
accum.append(textNode.text());
}
}
return accum.toString();
}
use of org.jsoup.nodes.TextNode in project flow by vaadin.
the class TemplateParser method splitInclude.
private static void splitInclude(TextNode nodeToSplit, TemplateResolver resolver) {
while (nodeToSplit != null) {
String text = nodeToSplit.getWholeText();
int includeStart = text.indexOf(INCLUDE_PREFIX);
if (includeStart == -1) {
return;
}
int includeEnd = text.indexOf('@', includeStart + INCLUDE_PREFIX.length());
if (includeEnd == -1) {
return;
}
int includeLength = includeEnd - includeStart;
String includeFileName = text.substring(includeStart + INCLUDE_PREFIX.length(), includeEnd).trim();
Element replacement = loadInclude(includeFileName, resolver);
// Split the original node into an untouched prefix, the actual
// include statement and (if there's more text) a remainder
TextNode includeStatement = nodeToSplit.splitText(includeStart);
TextNode remainder = null;
if (includeStatement.getWholeText().length() > includeLength + 1) {
remainder = includeStatement.splitText(includeLength + 1);
}
includeStatement.replaceWith(replacement);
// Continue splitting the rest of the node
nodeToSplit = remainder;
}
}
use of org.jsoup.nodes.TextNode in project opacclient by opacapp.
the class Pica method parse_search.
protected SearchRequestResult parse_search(String html, int page) throws OpacErrorException {
Document doc = Jsoup.parse(html);
updateSearchSetValue(doc);
if (doc.select(".error").size() > 0) {
String error = doc.select(".error").first().text().trim();
if (error.equals("Es wurde nichts gefunden.") || error.equals("Nothing has been found") || error.equals("Er is niets gevonden.") || error.equals("Rien n'a été trouvé.")) {
// nothing found
return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, 1);
} else {
// error
throw new OpacErrorException(error);
}
}
reusehtml = html;
int results_total;
String resultnumstr = doc.select(".pages").first().text();
Pattern p = Pattern.compile("[0-9]+$");
Matcher m = p.matcher(resultnumstr);
if (m.find()) {
resultnumstr = m.group();
}
if (resultnumstr.contains("(")) {
results_total = Integer.parseInt(resultnumstr.replaceAll(".*\\(([0-9]+)\\).*", "$1"));
} else if (resultnumstr.contains(": ")) {
results_total = Integer.parseInt(resultnumstr.replaceAll(".*: ([0-9]+)$", "$1"));
} else {
results_total = Integer.parseInt(resultnumstr);
}
List<SearchResult> results = new ArrayList<>();
if (results_total == 1) {
// Only one result
DetailedItem singleResult = parse_result(html);
SearchResult sr = new SearchResult();
sr.setType(getMediaTypeInSingleResult(html));
sr.setInnerhtml("<b>" + singleResult.getTitle() + "</b><br>" + singleResult.getDetails().get(0).getContent());
results.add(sr);
}
Elements table = doc.select("table[summary=hitlist] tbody tr[valign=top]");
// identifier = null;
Elements links = doc.select("table[summary=hitlist] a");
boolean haslink = false;
for (int i = 0; i < links.size(); i++) {
Element node = links.get(i);
if (node.hasAttr("href") & node.attr("href").contains("SHW?") && !haslink) {
haslink = true;
try {
List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(node.attr("href")), getDefaultEncoding());
for (NameValuePair nv : anyurl) {
if (nv.getName().equals("identifier")) {
// identifier = nv.getValue();
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
for (int i = 0; i < table.size(); i++) {
Element tr = table.get(i);
SearchResult sr = new SearchResult();
if (tr.select("td.hit img").size() > 0) {
String[] fparts = tr.select("td img").get(0).attr("src").split("/");
String fname = fparts[fparts.length - 1];
if (data.has("mediatypes")) {
try {
sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
} catch (JSONException | IllegalArgumentException e) {
sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", "")));
}
} else {
sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "").replace(".gif", "").replace(".png", "")));
}
}
Element middlething = tr.child(2);
List<Node> children = middlething.childNodes();
int childrennum = children.size();
List<String[]> strings = new ArrayList<>();
for (int ch = 0; ch < childrennum; ch++) {
Node node = children.get(ch);
if (node instanceof TextNode) {
String text = ((TextNode) node).text().trim();
if (text.length() > 3) {
strings.add(new String[] { "text", "", text });
}
} else if (node instanceof Element) {
List<Node> subchildren = node.childNodes();
for (int j = 0; j < subchildren.size(); j++) {
Node subnode = subchildren.get(j);
if (subnode instanceof TextNode) {
String text = ((TextNode) subnode).text().trim();
if (text.length() > 3) {
strings.add(new String[] { ((Element) node).tag().getName(), "text", text, ((Element) node).className(), node.attr("style") });
}
} else if (subnode instanceof Element) {
String text = ((Element) subnode).text().trim();
if (text.length() > 3) {
strings.add(new String[] { ((Element) node).tag().getName(), ((Element) subnode).tag().getName(), text, ((Element) node).className(), node.attr("style") });
}
}
}
}
}
StringBuilder description = new StringBuilder();
int k = 0;
for (String[] part : strings) {
if (part[0].equals("a") && k == 0) {
description.append("<b>").append(part[2]).append("</b>");
} else if (k < 3) {
description.append("<br />").append(part[2]);
}
k++;
}
sr.setInnerhtml(description.toString());
sr.setNr(10 * (page - 1) + i);
sr.setId(null);
results.add(sr);
}
resultcount = results.size();
return new SearchRequestResult(results, results_total, page);
}
Aggregations