Search in sources :

Example 1 with TextNode

use of org.jsoup.nodes.TextNode in project sppanblog4springboot by whoismy8023.

the class HtmlFilter method truncateHTML.

/**
 * 使用Jsoup预览
 *
 * @param source 需要过滤的
 * @param dest   过滤后的对象
 * @param len    截取字符长度
 *               <p>
 *               Document dirtyDocument = Jsoup.parse(sb.toString());<br />
 *               Element source = dirtyDocument.body();<br />
 *               Document clean = Document.createShell(dirtyDocument.baseUri());<br />
 *               Element dest = clean.body();<br />
 *               int len = 6;<br />
 *               truncateHTML(source,dest,len);<br />
 *               System.out.println(dest.html());<br />
 */
private static void truncateHTML(Element source, Element dest, int len) {
    List<Node> sourceChildren = source.childNodes();
    for (Node sourceChild : sourceChildren) {
        if (sourceChild instanceof Element) {
            Element sourceEl = (Element) sourceChild;
            Element destChild = createSafeElement(sourceEl);
            int txt = dest.text().length();
            if (txt >= len) {
                break;
            } else {
                len = len - txt;
            }
            dest.appendChild(destChild);
            truncateHTML(sourceEl, destChild, len);
        } else if (sourceChild instanceof TextNode) {
            int destLeng = dest.text().length();
            if (destLeng >= len) {
                break;
            }
            TextNode sourceText = (TextNode) sourceChild;
            int txtLeng = sourceText.getWholeText().length();
            if ((destLeng + txtLeng) > len) {
                int tmp = len - destLeng;
                String txt = sourceText.getWholeText().substring(0, tmp);
                TextNode destText = new TextNode(txt, sourceChild.baseUri());
                dest.appendChild(destText);
                break;
            } else {
                TextNode destText = new TextNode(sourceText.getWholeText(), sourceChild.baseUri());
                dest.appendChild(destText);
            }
        }
    }
}
Also used : Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode)

Example 2 with TextNode

use of org.jsoup.nodes.TextNode in project flow by vaadin.

the class TemplateParser method collectIncludeNodes.

private static List<TextNode> collectIncludeNodes(Element element) {
    List<TextNode> includeNodes = new ArrayList<>();
    new NodeTraversor(new NodeVisitor() {

        @Override
        public void head(Node node, int depth) {
        // nop
        }

        @Override
        public void tail(Node node, int depth) {
            if (node instanceof TextNode) {
                TextNode textNode = (TextNode) node;
                String text = textNode.getWholeText();
                if (text.contains(INCLUDE_PREFIX)) {
                    includeNodes.add(textNode);
                }
            }
        }
    }).traverse(element);
    return includeNodes;
}
Also used : TextNode(org.jsoup.nodes.TextNode) TemplateNode(com.vaadin.flow.template.angular.TemplateNode) Node(org.jsoup.nodes.Node) ArrayList(java.util.ArrayList) TextNode(org.jsoup.nodes.TextNode) NodeTraversor(org.jsoup.select.NodeTraversor) NodeVisitor(org.jsoup.select.NodeVisitor)

Example 3 with TextNode

use of org.jsoup.nodes.TextNode in project ocreader by schaal.

the class ArticleWebView method prepareDocument.

private void prepareDocument(Document document) {
    // Some blog engines replace emojis with an image and place the emoji in the image tag.
    // Find images with the tag being a single character and check if they are emoji. Then
    // replace the img with the actual emoji in unicode.
    Elements imgs = document.select("img[alt~=^.$]");
    for (Element img : imgs) {
        final String possibleEmoji = img.attr("alt");
        if (EmojiManager.isEmoji(possibleEmoji))
            img.replaceWith(new TextNode(possibleEmoji));
    }
    Elements iframes = document.getElementsByTag("iframe");
    for (Element iframe : iframes) {
        if (iframe.hasAttr("src")) {
            String href = iframe.attr("src");
            String html = String.format(Locale.US, videoLink, href, href);
            // Check if url matches any known patterns
            for (IframePattern iframePattern : IframePattern.values()) {
                Matcher matcher = iframePattern.pattern.matcher(href);
                if (matcher.matches()) {
                    final String videoId = matcher.group(2);
                    String urlPrefix = matcher.group(1);
                    href = urlPrefix + iframePattern.baseUrl + videoId;
                    // use thumbnail if available
                    if (iframePattern.thumbUrl != null) {
                        String thumbUrl = String.format(iframePattern.thumbUrl, urlPrefix, videoId);
                        html = String.format(Locale.US, videoThumbLink, href, thumbUrl);
                    }
                    break;
                }
            }
            iframe.replaceWith(Jsoup.parse(html).body().child(0));
        } else {
            iframe.remove();
        }
    }
}
Also used : Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode) Elements(org.jsoup.select.Elements)

Example 4 with TextNode

use of org.jsoup.nodes.TextNode in project mylyn.docs by eclipse.

the class HtmlCleanerTest method testTrailingWhitespaceBodyNoBlock_WhitespaceOutsideBody2.

@Test
public void testTrailingWhitespaceBodyNoBlock_WhitespaceOutsideBody2() {
    // bug 406943
    Document document = Document.createShell("");
    document.body().appendChild(new TextNode("\n", ""));
    document.body().appendChild(new TextNode("text", ""));
    document.body().appendChild(new TextNode("\n", ""));
    document.body().appendChild(new TextNode("\n", ""));
    String result = cleanToBody(document);
    assertEquals("<body>text</body>", result);
}
Also used : TextNode(org.jsoup.nodes.TextNode) Document(org.jsoup.nodes.Document) Test(org.junit.Test)

Example 5 with TextNode

use of org.jsoup.nodes.TextNode in project mylyn.docs by eclipse.

the class DocumentProcessor method normalizeTextNodes.

/**
 * normalize text node children of the given parent element. Ensures that adjacent text nodes are combined into a
 * single text node, and whitespace is normalized.
 *
 * @param parentElement
 *            the parent element whose children should be normalized
 * @see StringUtil#normaliseWhitespace(String)
 */
protected static void normalizeTextNodes(Element parentElement) {
    List<Node> children = parentElement.childNodes();
    if (!children.isEmpty()) {
        children = new ArrayList<Node>(children);
        for (Node child : children) {
            if (child instanceof TextNode) {
                TextNode childTextNode = (TextNode) child;
                Node previousSibling = child.previousSibling();
                if (previousSibling instanceof TextNode) {
                    TextNode previousSiblingTextNode = (TextNode) previousSibling;
                    childTextNode.text(previousSiblingTextNode.text() + childTextNode.text());
                    previousSibling.remove();
                }
                String wholeText = childTextNode.getWholeText();
                if (!Html.isWhitespacePreserve(parentElement)) {
                    wholeText = StringUtil.normaliseWhitespace(wholeText);
                }
                childTextNode.text(wholeText);
            }
        }
    }
}
Also used : Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) TextNode(org.jsoup.nodes.TextNode)

Aggregations

TextNode (org.jsoup.nodes.TextNode)52 Element (org.jsoup.nodes.Element)41 Node (org.jsoup.nodes.Node)37 Document (org.jsoup.nodes.Document)19 ArrayList (java.util.ArrayList)16 Elements (org.jsoup.select.Elements)14 IOException (java.io.IOException)6 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)6 JSONException (org.json.JSONException)6 Copy (de.geeksfactory.opacclient.objects.Copy)5 DetailedItem (de.geeksfactory.opacclient.objects.DetailedItem)5 HashMap (java.util.HashMap)5 NameValuePair (org.apache.http.NameValuePair)5 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)5 Test (org.junit.jupiter.api.Test)5 NotReachableException (de.geeksfactory.opacclient.networking.NotReachableException)4 Detail (de.geeksfactory.opacclient.objects.Detail)4 UnsupportedEncodingException (java.io.UnsupportedEncodingException)4 URI (java.net.URI)4 Matcher (java.util.regex.Matcher)4