Search in sources :

Example 1 with TextNode

use of org.jsoup.nodes.TextNode in project sppanblog4springboot by whoismy8023.

the class HtmlFilter method truncateHTML.

 * 使用Jsoup预览
 * @param source 需要过滤的
 * @param dest   过滤后的对象
 * @param len    截取字符长度
 *               <p>
 *               Document dirtyDocument = Jsoup.parse(sb.toString());<br />
 *               Element source = dirtyDocument.body();<br />
 *               Document clean = Document.createShell(dirtyDocument.baseUri());<br />
 *               Element dest = clean.body();<br />
 *               int len = 6;<br />
 *               truncateHTML(source,dest,len);<br />
 *               System.out.println(dest.html());<br />
private static void truncateHTML(Element source, Element dest, int len) {
    List<Node> sourceChildren = source.childNodes();
    for (Node sourceChild : sourceChildren) {
        if (sourceChild instanceof Element) {
            Element sourceEl = (Element) sourceChild;
            Element destChild = createSafeElement(sourceEl);
            int txt = dest.text().length();
            if (txt >= len) {
            } else {
                len = len - txt;
            truncateHTML(sourceEl, destChild, len);
        } else if (sourceChild instanceof TextNode) {
            int destLeng = dest.text().length();
            if (destLeng >= len) {
            TextNode sourceText = (TextNode) sourceChild;
            int txtLeng = sourceText.getWholeText().length();
            if ((destLeng + txtLeng) > len) {
                int tmp = len - destLeng;
                String txt = sourceText.getWholeText().substring(0, tmp);
                TextNode destText = new TextNode(txt, sourceChild.baseUri());
            } else {
                TextNode destText = new TextNode(sourceText.getWholeText(), sourceChild.baseUri());
Also used : Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode)

Example 2 with TextNode

use of org.jsoup.nodes.TextNode in project flow by vaadin.

the class TemplateParser method collectIncludeNodes.

private static List<TextNode> collectIncludeNodes(Element element) {
    List<TextNode> includeNodes = new ArrayList<>();
    new NodeTraversor(new NodeVisitor() {

        public void head(Node node, int depth) {
        // nop

        public void tail(Node node, int depth) {
            if (node instanceof TextNode) {
                TextNode textNode = (TextNode) node;
                String text = textNode.getWholeText();
                if (text.contains(INCLUDE_PREFIX)) {
    return includeNodes;
Also used : TextNode(org.jsoup.nodes.TextNode) TemplateNode(com.vaadin.flow.template.angular.TemplateNode) Node(org.jsoup.nodes.Node) ArrayList(java.util.ArrayList) TextNode(org.jsoup.nodes.TextNode) NodeTraversor( NodeVisitor(

Example 3 with TextNode

use of org.jsoup.nodes.TextNode in project ocreader by schaal.

the class ArticleWebView method prepareDocument.

private void prepareDocument(Document document) {
    // Some blog engines replace emojis with an image and place the emoji in the image tag.
    // Find images with the tag being a single character and check if they are emoji. Then
    // replace the img with the actual emoji in unicode.
    Elements imgs ="img[alt~=^.$]");
    for (Element img : imgs) {
        final String possibleEmoji = img.attr("alt");
        if (EmojiManager.isEmoji(possibleEmoji))
            img.replaceWith(new TextNode(possibleEmoji));
    Elements iframes = document.getElementsByTag("iframe");
    for (Element iframe : iframes) {
        if (iframe.hasAttr("src")) {
            String href = iframe.attr("src");
            String html = String.format(Locale.US, videoLink, href, href);
            // Check if url matches any known patterns
            for (IframePattern iframePattern : IframePattern.values()) {
                Matcher matcher = iframePattern.pattern.matcher(href);
                if (matcher.matches()) {
                    final String videoId =;
                    String urlPrefix =;
                    href = urlPrefix + iframePattern.baseUrl + videoId;
                    // use thumbnail if available
                    if (iframePattern.thumbUrl != null) {
                        String thumbUrl = String.format(iframePattern.thumbUrl, urlPrefix, videoId);
                        html = String.format(Locale.US, videoThumbLink, href, thumbUrl);
        } else {
Also used : Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode) Elements(

Example 4 with TextNode

use of org.jsoup.nodes.TextNode in project by eclipse.

the class HtmlCleanerTest method testTrailingWhitespaceBodyNoBlock_WhitespaceOutsideBody2.

public void testTrailingWhitespaceBodyNoBlock_WhitespaceOutsideBody2() {
    // bug 406943
    Document document = Document.createShell("");
    document.body().appendChild(new TextNode("\n", ""));
    document.body().appendChild(new TextNode("text", ""));
    document.body().appendChild(new TextNode("\n", ""));
    document.body().appendChild(new TextNode("\n", ""));
    String result = cleanToBody(document);
    assertEquals("<body>text</body>", result);
Also used : TextNode(org.jsoup.nodes.TextNode) Document(org.jsoup.nodes.Document) Test(org.junit.Test)

Example 5 with TextNode

use of org.jsoup.nodes.TextNode in project by eclipse.

the class DocumentProcessor method normalizeTextNodes.

 * normalize text node children of the given parent element. Ensures that adjacent text nodes are combined into a
 * single text node, and whitespace is normalized.
 * @param parentElement
 *            the parent element whose children should be normalized
 * @see StringUtil#normaliseWhitespace(String)
protected static void normalizeTextNodes(Element parentElement) {
    List<Node> children = parentElement.childNodes();
    if (!children.isEmpty()) {
        children = new ArrayList<Node>(children);
        for (Node child : children) {
            if (child instanceof TextNode) {
                TextNode childTextNode = (TextNode) child;
                Node previousSibling = child.previousSibling();
                if (previousSibling instanceof TextNode) {
                    TextNode previousSiblingTextNode = (TextNode) previousSibling;
                    childTextNode.text(previousSiblingTextNode.text() + childTextNode.text());
                String wholeText = childTextNode.getWholeText();
                if (!Html.isWhitespacePreserve(parentElement)) {
                    wholeText = StringUtil.normaliseWhitespace(wholeText);
Also used : Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) TextNode(org.jsoup.nodes.TextNode)


TextNode (org.jsoup.nodes.TextNode)52 Element (org.jsoup.nodes.Element)41 Node (org.jsoup.nodes.Node)37 Document (org.jsoup.nodes.Document)19 ArrayList (java.util.ArrayList)16 Elements ( IOException ( DateTimeFormatter (org.joda.time.format.DateTimeFormatter)6 JSONException (org.json.JSONException)6 Copy (de.geeksfactory.opacclient.objects.Copy)5 DetailedItem (de.geeksfactory.opacclient.objects.DetailedItem)5 HashMap (java.util.HashMap)5 NameValuePair (org.apache.http.NameValuePair)5 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)5 Test (org.junit.jupiter.api.Test)5 NotReachableException (de.geeksfactory.opacclient.networking.NotReachableException)4 Detail (de.geeksfactory.opacclient.objects.Detail)4 UnsupportedEncodingException ( URI ( Matcher (java.util.regex.Matcher)4