use of org.jsoup.select.NodeVisitor in project jsoup by jhy.
the class Node method setBaseUri.
/**
Update the base URI of this node and all of its descendants.
@param baseUri base URI to set
*/
public void setBaseUri(final String baseUri) {
Validate.notNull(baseUri);
traverse(new NodeVisitor() {
public void head(Node node, int depth) {
node.baseUri = baseUri;
}
public void tail(Node node, int depth) {
}
});
}
use of org.jsoup.select.NodeVisitor in project symphony by b3log.
the class Markdowns method toHTML.
/**
* Converts the specified markdown text to HTML.
*
* @param markdownText the specified markdown text
* @return converted HTML, returns an empty string "" if the specified markdown text is "" or {@code null}, returns
* 'markdownErrorLabel' if exception
*/
public static String toHTML(final String markdownText) {
if (Strings.isEmptyOrNull(markdownText)) {
return "";
}
final String cachedHTML = getHTML(markdownText);
if (null != cachedHTML) {
return cachedHTML;
}
final ExecutorService pool = Executors.newSingleThreadExecutor();
final long[] threadId = new long[1];
final Callable<String> call = () -> {
threadId[0] = Thread.currentThread().getId();
String html = LANG_PROPS_SERVICE.get("contentRenderFailedLabel");
if (MARKED_AVAILABLE) {
try {
html = toHtmlByMarked(markdownText);
if (!StringUtils.startsWith(html, "<p>")) {
html = "<p>" + html + "</p>";
}
} catch (final Exception e) {
LOGGER.log(Level.WARN, "Failed to use [marked] for markdown [md=" + StringUtils.substring(markdownText, 0, 256) + "]: " + e.getMessage());
com.vladsch.flexmark.ast.Node document = PARSER.parse(markdownText);
html = RENDERER.render(document);
if (!StringUtils.startsWith(html, "<p>")) {
html = "<p>" + html + "</p>";
}
}
} else {
com.vladsch.flexmark.ast.Node document = PARSER.parse(markdownText);
html = RENDERER.render(document);
if (!StringUtils.startsWith(html, "<p>")) {
html = "<p>" + html + "</p>";
}
}
final Document doc = Jsoup.parse(html);
final List<org.jsoup.nodes.Node> toRemove = new ArrayList<>();
doc.traverse(new NodeVisitor() {
@Override
public void head(final org.jsoup.nodes.Node node, int depth) {
if (node instanceof org.jsoup.nodes.TextNode) {
final org.jsoup.nodes.TextNode textNode = (org.jsoup.nodes.TextNode) node;
final org.jsoup.nodes.Node parent = textNode.parent();
if (parent instanceof Element) {
final Element parentElem = (Element) parent;
if (!parentElem.tagName().equals("code")) {
String text = textNode.getWholeText();
boolean nextIsBr = false;
final org.jsoup.nodes.Node nextSibling = textNode.nextSibling();
if (nextSibling instanceof Element) {
nextIsBr = "br".equalsIgnoreCase(((Element) nextSibling).tagName());
}
if (null != userQueryService) {
try {
final Set<String> userNames = userQueryService.getUserNames(text);
for (final String userName : userNames) {
text = text.replace('@' + userName + (nextIsBr ? "" : " "), "@<a href='" + Latkes.getServePath() + "/member/" + userName + "'>" + userName + "</a> ");
}
text = text.replace("@participants ", "@<a href='https://hacpai.com/article/1458053458339' class='ft-red'>participants</a> ");
} finally {
JdbcRepository.dispose();
}
}
if (text.contains("@<a href=")) {
final List<org.jsoup.nodes.Node> nodes = Parser.parseFragment(text, parentElem, "");
final int index = textNode.siblingIndex();
parentElem.insertChildren(index, nodes);
toRemove.add(node);
} else {
textNode.text(Pangu.spacingText(text));
}
}
}
}
}
@Override
public void tail(org.jsoup.nodes.Node node, int depth) {
}
});
toRemove.forEach(node -> node.remove());
doc.select("pre>code").addClass("hljs");
doc.select("a").forEach(a -> {
String src = a.attr("href");
if (!StringUtils.startsWithIgnoreCase(src, Latkes.getServePath())) {
try {
src = URLEncoder.encode(src, "UTF-8");
} catch (final Exception e) {
}
a.attr("href", Latkes.getServePath() + "/forward?goto=" + src);
a.attr("target", "_blank");
}
});
doc.outputSettings().prettyPrint(false);
String ret = doc.select("body").html();
ret = StringUtils.trim(ret);
// cache it
putHTML(markdownText, ret);
return ret;
};
Stopwatchs.start("Md to HTML");
try {
final Future<String> future = pool.submit(call);
return future.get(MD_TIMEOUT, TimeUnit.MILLISECONDS);
} catch (final TimeoutException e) {
LOGGER.log(Level.ERROR, "Markdown timeout [md=" + StringUtils.substring(markdownText, 0, 256) + "]");
Callstacks.printCallstack(Level.ERROR, new String[] { "org.b3log" }, null);
final Set<Thread> threads = Thread.getAllStackTraces().keySet();
for (final Thread thread : threads) {
if (thread.getId() == threadId[0]) {
thread.stop();
break;
}
}
} catch (final Exception e) {
LOGGER.log(Level.ERROR, "Markdown failed [md=" + StringUtils.substring(markdownText, 0, 256) + "]", e);
} finally {
pool.shutdownNow();
Stopwatchs.end();
}
return LANG_PROPS_SERVICE.get("contentRenderFailedLabel");
}
use of org.jsoup.select.NodeVisitor in project jsoup by jhy.
the class Element method text.
/**
* Gets the <b>normalized, combined text</b> of this element and all its children. Whitespace is normalized and
* trimmed.
* <p>For example, given HTML {@code <p>Hello <b>there</b> now! </p>}, {@code p.text()} returns {@code "Hello there
* now!"}
* <p>If you do not want normalized text, use {@link #wholeText()}. If you want just the text of this node (and not
* children), use {@link #ownText()}
* <p>Note that this method returns the textual content that would be presented to a reader. The contents of data
* nodes (such as {@code <script>} tags are not considered text. Use {@link #data()} or {@link #html()} to retrieve
* that content.
*
* @return unencoded, normalized text, or empty string if none.
* @see #wholeText()
* @see #ownText()
* @see #textNodes()
*/
public String text() {
final StringBuilder accum = StringUtil.borrowBuilder();
NodeTraversor.traverse(new NodeVisitor() {
public void head(Node node, int depth) {
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
appendNormalisedText(accum, textNode);
} else if (node instanceof Element) {
Element element = (Element) node;
if (accum.length() > 0 && (element.isBlock() || element.tag.normalName().equals("br")) && !TextNode.lastCharIsWhitespace(accum))
accum.append(' ');
}
}
public void tail(Node node, int depth) {
// make sure there is a space between block tags and immediately following text nodes <div>One</div>Two should be "One Two".
if (node instanceof Element) {
Element element = (Element) node;
if (element.isBlock() && (node.nextSibling() instanceof TextNode) && !TextNode.lastCharIsWhitespace(accum))
accum.append(' ');
}
}
}, this);
return StringUtil.releaseBuilder(accum).trim();
}
use of org.jsoup.select.NodeVisitor in project jsoup by jhy.
the class ElementTest method testTraverse.
@Test
public void testTraverse() {
Document doc = Jsoup.parse("<div><p>One<p>Two<p>Three");
Element div = doc.selectFirst("div");
assertNotNull(div);
final AtomicInteger counter = new AtomicInteger(0);
Element div2 = div.traverse(new NodeVisitor() {
@Override
public void head(Node node, int depth) {
counter.incrementAndGet();
}
@Override
public void tail(Node node, int depth) {
}
});
assertEquals(7, counter.get());
assertEquals(div2, div);
}
use of org.jsoup.select.NodeVisitor in project structr by structr.
the class MicroformatParser method unwrap.
private void unwrap(final Element element) {
final Set<Element> elementsToUnwrap = new LinkedHashSet<>();
element.traverse(new NodeVisitor() {
@Override
public void head(Node node, int depth) {
if (node instanceof Element) {
final Element element = (Element) node;
if (element.isBlock()) {
final Set<String> classes = element.classNames();
removeEmpty(classes);
if (classes.isEmpty()) {
elementsToUnwrap.add(element);
}
}
}
}
@Override
public void tail(Node node, int depth) {
}
});
for (final Element unwrap : elementsToUnwrap) {
unwrap.unwrap();
}
}
Aggregations