Search in sources :

Example 6 with NodeVisitor

use of org.jsoup.select.NodeVisitor in project jsoup by jhy.

the class Node method setBaseUri.

/**
     Update the base URI of this node and all of its descendants.
     @param baseUri base URI to set
     */
public void setBaseUri(final String baseUri) {
    Validate.notNull(baseUri);
    traverse(new NodeVisitor() {

        public void head(Node node, int depth) {
            node.baseUri = baseUri;
        }

        public void tail(Node node, int depth) {
        }
    });
}
Also used : NodeVisitor(org.jsoup.select.NodeVisitor)

Example 7 with NodeVisitor

use of org.jsoup.select.NodeVisitor in project symphony by b3log.

the class Markdowns method toHTML.

/**
 * Converts the specified markdown text to HTML.
 *
 * @param markdownText the specified markdown text
 * @return converted HTML, returns an empty string "" if the specified markdown text is "" or {@code null}, returns
 * 'markdownErrorLabel' if exception
 */
public static String toHTML(final String markdownText) {
    if (Strings.isEmptyOrNull(markdownText)) {
        return "";
    }
    final String cachedHTML = getHTML(markdownText);
    if (null != cachedHTML) {
        return cachedHTML;
    }
    final ExecutorService pool = Executors.newSingleThreadExecutor();
    final long[] threadId = new long[1];
    final Callable<String> call = () -> {
        threadId[0] = Thread.currentThread().getId();
        String html = LANG_PROPS_SERVICE.get("contentRenderFailedLabel");
        if (MARKED_AVAILABLE) {
            try {
                html = toHtmlByMarked(markdownText);
                if (!StringUtils.startsWith(html, "<p>")) {
                    html = "<p>" + html + "</p>";
                }
            } catch (final Exception e) {
                LOGGER.log(Level.WARN, "Failed to use [marked] for markdown [md=" + StringUtils.substring(markdownText, 0, 256) + "]: " + e.getMessage());
                com.vladsch.flexmark.ast.Node document = PARSER.parse(markdownText);
                html = RENDERER.render(document);
                if (!StringUtils.startsWith(html, "<p>")) {
                    html = "<p>" + html + "</p>";
                }
            }
        } else {
            com.vladsch.flexmark.ast.Node document = PARSER.parse(markdownText);
            html = RENDERER.render(document);
            if (!StringUtils.startsWith(html, "<p>")) {
                html = "<p>" + html + "</p>";
            }
        }
        final Document doc = Jsoup.parse(html);
        final List<org.jsoup.nodes.Node> toRemove = new ArrayList<>();
        doc.traverse(new NodeVisitor() {

            @Override
            public void head(final org.jsoup.nodes.Node node, int depth) {
                if (node instanceof org.jsoup.nodes.TextNode) {
                    final org.jsoup.nodes.TextNode textNode = (org.jsoup.nodes.TextNode) node;
                    final org.jsoup.nodes.Node parent = textNode.parent();
                    if (parent instanceof Element) {
                        final Element parentElem = (Element) parent;
                        if (!parentElem.tagName().equals("code")) {
                            String text = textNode.getWholeText();
                            boolean nextIsBr = false;
                            final org.jsoup.nodes.Node nextSibling = textNode.nextSibling();
                            if (nextSibling instanceof Element) {
                                nextIsBr = "br".equalsIgnoreCase(((Element) nextSibling).tagName());
                            }
                            if (null != userQueryService) {
                                try {
                                    final Set<String> userNames = userQueryService.getUserNames(text);
                                    for (final String userName : userNames) {
                                        text = text.replace('@' + userName + (nextIsBr ? "" : " "), "@<a href='" + Latkes.getServePath() + "/member/" + userName + "'>" + userName + "</a> ");
                                    }
                                    text = text.replace("@participants ", "@<a href='https://hacpai.com/article/1458053458339' class='ft-red'>participants</a> ");
                                } finally {
                                    JdbcRepository.dispose();
                                }
                            }
                            if (text.contains("@<a href=")) {
                                final List<org.jsoup.nodes.Node> nodes = Parser.parseFragment(text, parentElem, "");
                                final int index = textNode.siblingIndex();
                                parentElem.insertChildren(index, nodes);
                                toRemove.add(node);
                            } else {
                                textNode.text(Pangu.spacingText(text));
                            }
                        }
                    }
                }
            }

            @Override
            public void tail(org.jsoup.nodes.Node node, int depth) {
            }
        });
        toRemove.forEach(node -> node.remove());
        doc.select("pre>code").addClass("hljs");
        doc.select("a").forEach(a -> {
            String src = a.attr("href");
            if (!StringUtils.startsWithIgnoreCase(src, Latkes.getServePath())) {
                try {
                    src = URLEncoder.encode(src, "UTF-8");
                } catch (final Exception e) {
                }
                a.attr("href", Latkes.getServePath() + "/forward?goto=" + src);
                a.attr("target", "_blank");
            }
        });
        doc.outputSettings().prettyPrint(false);
        String ret = doc.select("body").html();
        ret = StringUtils.trim(ret);
        // cache it
        putHTML(markdownText, ret);
        return ret;
    };
    Stopwatchs.start("Md to HTML");
    try {
        final Future<String> future = pool.submit(call);
        return future.get(MD_TIMEOUT, TimeUnit.MILLISECONDS);
    } catch (final TimeoutException e) {
        LOGGER.log(Level.ERROR, "Markdown timeout [md=" + StringUtils.substring(markdownText, 0, 256) + "]");
        Callstacks.printCallstack(Level.ERROR, new String[] { "org.b3log" }, null);
        final Set<Thread> threads = Thread.getAllStackTraces().keySet();
        for (final Thread thread : threads) {
            if (thread.getId() == threadId[0]) {
                thread.stop();
                break;
            }
        }
    } catch (final Exception e) {
        LOGGER.log(Level.ERROR, "Markdown failed [md=" + StringUtils.substring(markdownText, 0, 256) + "]", e);
    } finally {
        pool.shutdownNow();
        Stopwatchs.end();
    }
    return LANG_PROPS_SERVICE.get("contentRenderFailedLabel");
}
Also used : NodeVisitor(org.jsoup.select.NodeVisitor) HttpURLConnection(java.net.HttpURLConnection) StringUtils(org.apache.commons.lang.StringUtils) DataHolder(com.vladsch.flexmark.util.options.DataHolder) URL(java.net.URL) LatkeBeanManagerImpl(org.b3log.latke.ioc.LatkeBeanManagerImpl) Parser(org.jsoup.parser.Parser) ArrayList(java.util.ArrayList) Cache(org.b3log.latke.cache.Cache) JSONObject(org.json.JSONObject) Level(org.b3log.latke.logging.Level) Element(org.jsoup.nodes.Element) Logger(org.b3log.latke.logging.Logger) Whitelist(org.jsoup.safety.Whitelist) OutputStream(java.io.OutputStream) Common(org.b3log.symphony.model.Common) java.util.concurrent(java.util.concurrent) Set(java.util.Set) LangPropsService(org.b3log.latke.service.LangPropsService) LatkeBeanManager(org.b3log.latke.ioc.LatkeBeanManager) Lifecycle(org.b3log.latke.ioc.Lifecycle) Extensions(com.vladsch.flexmark.profiles.pegdown.Extensions) IOUtils(org.apache.commons.io.IOUtils) Latkes(org.b3log.latke.Latkes) Callstacks(org.b3log.latke.util.Callstacks) URLEncoder(java.net.URLEncoder) List(java.util.List) PegdownOptionsAdapter(com.vladsch.flexmark.profiles.pegdown.PegdownOptionsAdapter) Strings(org.b3log.latke.util.Strings) Document(org.jsoup.nodes.Document) LangPropsServiceImpl(org.b3log.latke.service.LangPropsServiceImpl) UserQueryService(org.b3log.symphony.service.UserQueryService) Stopwatchs(org.b3log.latke.util.Stopwatchs) Jsoup(org.jsoup.Jsoup) Elements(org.jsoup.select.Elements) DigestUtils(org.apache.commons.codec.digest.DigestUtils) CacheFactory(org.b3log.latke.cache.CacheFactory) JdbcRepository(org.b3log.latke.repository.jdbc.JdbcRepository) HtmlRenderer(com.vladsch.flexmark.html.HtmlRenderer) InputStream(java.io.InputStream) Set(java.util.Set) Element(org.jsoup.nodes.Element) Document(org.jsoup.nodes.Document) NodeVisitor(org.jsoup.select.NodeVisitor) ArrayList(java.util.ArrayList) List(java.util.List)

Example 8 with NodeVisitor

use of org.jsoup.select.NodeVisitor in project jsoup by jhy.

the class Element method text.

/**
 *     Gets the <b>normalized, combined text</b> of this element and all its children. Whitespace is normalized and
 *     trimmed.
 *     <p>For example, given HTML {@code <p>Hello  <b>there</b> now! </p>}, {@code p.text()} returns {@code "Hello there
 *    now!"}
 *     <p>If you do not want normalized text, use {@link #wholeText()}. If you want just the text of this node (and not
 *     children), use {@link #ownText()}
 *     <p>Note that this method returns the textual content that would be presented to a reader. The contents of data
 *     nodes (such as {@code <script>} tags are not considered text. Use {@link #data()} or {@link #html()} to retrieve
 *     that content.
 *
 *     @return unencoded, normalized text, or empty string if none.
 *     @see #wholeText()
 *     @see #ownText()
 *     @see #textNodes()
 */
public String text() {
    final StringBuilder accum = StringUtil.borrowBuilder();
    NodeTraversor.traverse(new NodeVisitor() {

        public void head(Node node, int depth) {
            if (node instanceof TextNode) {
                TextNode textNode = (TextNode) node;
                appendNormalisedText(accum, textNode);
            } else if (node instanceof Element) {
                Element element = (Element) node;
                if (accum.length() > 0 && (element.isBlock() || element.tag.normalName().equals("br")) && !TextNode.lastCharIsWhitespace(accum))
                    accum.append(' ');
            }
        }

        public void tail(Node node, int depth) {
            // make sure there is a space between block tags and immediately following text nodes <div>One</div>Two should be "One Two".
            if (node instanceof Element) {
                Element element = (Element) node;
                if (element.isBlock() && (node.nextSibling() instanceof TextNode) && !TextNode.lastCharIsWhitespace(accum))
                    accum.append(' ');
            }
        }
    }, this);
    return StringUtil.releaseBuilder(accum).trim();
}
Also used : NodeVisitor(org.jsoup.select.NodeVisitor)

Example 9 with NodeVisitor

use of org.jsoup.select.NodeVisitor in project jsoup by jhy.

the class ElementTest method testTraverse.

@Test
public void testTraverse() {
    Document doc = Jsoup.parse("<div><p>One<p>Two<p>Three");
    Element div = doc.selectFirst("div");
    assertNotNull(div);
    final AtomicInteger counter = new AtomicInteger(0);
    Element div2 = div.traverse(new NodeVisitor() {

        @Override
        public void head(Node node, int depth) {
            counter.incrementAndGet();
        }

        @Override
        public void tail(Node node, int depth) {
        }
    });
    assertEquals(7, counter.get());
    assertEquals(div2, div);
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) NodeVisitor(org.jsoup.select.NodeVisitor) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 10 with NodeVisitor

use of org.jsoup.select.NodeVisitor in project structr by structr.

the class MicroformatParser method unwrap.

private void unwrap(final Element element) {
    final Set<Element> elementsToUnwrap = new LinkedHashSet<>();
    element.traverse(new NodeVisitor() {

        @Override
        public void head(Node node, int depth) {
            if (node instanceof Element) {
                final Element element = (Element) node;
                if (element.isBlock()) {
                    final Set<String> classes = element.classNames();
                    removeEmpty(classes);
                    if (classes.isEmpty()) {
                        elementsToUnwrap.add(element);
                    }
                }
            }
        }

        @Override
        public void tail(Node node, int depth) {
        }
    });
    for (final Element unwrap : elementsToUnwrap) {
        unwrap.unwrap();
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) LinkedHashSet(java.util.LinkedHashSet) Element(org.jsoup.nodes.Element) Node(org.jsoup.nodes.Node) NodeVisitor(org.jsoup.select.NodeVisitor)

Aggregations

NodeVisitor (org.jsoup.select.NodeVisitor)10 Node (org.jsoup.nodes.Node)5 Element (org.jsoup.nodes.Element)4 ArrayList (java.util.ArrayList)3 Set (java.util.Set)3 LinkedHashSet (java.util.LinkedHashSet)2 NonNull (androidx.annotation.NonNull)1 GalleryComment (com.hippo.ehviewer.client.data.GalleryComment)1 GalleryCommentList (com.hippo.ehviewer.client.data.GalleryCommentList)1 MutableBoolean (com.hippo.util.MutableBoolean)1 TemplateNode (com.vaadin.flow.template.angular.TemplateNode)1 HtmlRenderer (com.vladsch.flexmark.html.HtmlRenderer)1 Extensions (com.vladsch.flexmark.profiles.pegdown.Extensions)1 PegdownOptionsAdapter (com.vladsch.flexmark.profiles.pegdown.PegdownOptionsAdapter)1 DataHolder (com.vladsch.flexmark.util.options.DataHolder)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 HttpURLConnection (java.net.HttpURLConnection)1 URL (java.net.URL)1 URLEncoder (java.net.URLEncoder)1