Search in sources :

Example 66 with Node

use of org.jsoup.nodes.Node in project flow by vaadin.

the class JsoupUtils method removeCommentsRecursively.

/**
 * Removes all comments from the {@code node} tree.
 *
 * @param node
 *            a Jsoup node
 */
static void removeCommentsRecursively(Node node) {
    int i = 0;
    while (i < node.childNodeSize()) {
        Node child = node.childNode(i);
        if (child instanceof Comment) {
            child.remove();
        } else {
            removeCommentsRecursively(child);
            i++;
        }
    }
}
Also used : Comment(org.jsoup.nodes.Comment) Node(org.jsoup.nodes.Node)

Example 67 with Node

use of org.jsoup.nodes.Node in project zrlog by 94fzb.

the class ParseUtil method autoDigest.

public static String autoDigest(String str, int size) {
    StringBuilder sb = new StringBuilder();
    Document document = Jsoup.parseBodyFragment(str);
    List<Node> allTextNode = new ArrayList<>();
    getAllTextNode(document.childNodes(), allTextNode);
    int tLength = 0;
    for (Node node : allTextNode) {
        if (node instanceof TextNode) {
            sb.append(node.parent().outerHtml());
            tLength += ((TextNode) node).text().length();
            if (tLength > size) {
                sb.append(" ...");
                break;
            }
        }
    }
    String digest = sb.toString();
    Elements elements = Jsoup.parse(str).body().select("video");
    if (elements != null && !elements.isEmpty()) {
        digest = elements.get(0).toString() + "<br/>" + digest;
    }
    return digest.trim();
}
Also used : Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) ArrayList(java.util.ArrayList) TextNode(org.jsoup.nodes.TextNode) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements)

Example 68 with Node

use of org.jsoup.nodes.Node in project pratilipi by Pratilipi.

the class PratilipiDocUtil method _setPage.

private static void _setPage(Long pratilipiId, PratilipiContentDoc.Page page, String html) throws InvalidArgumentException, UnexpectedServerException {
    // Deleting Existing Pagelets
    page.deleteAllPagelets();
    // Adding Pagelets from HTML
    if (html != null && !html.trim().isEmpty()) {
        Node body = Jsoup.parse(html).body();
        Node badNode = _validateContent(body);
        if (badNode != null) {
            String errMsg = "";
            while (badNode != body) {
                errMsg = " > " + badNode.nodeName() + errMsg;
                badNode = badNode.parent();
            }
            errMsg = "Invalid node " + errMsg;
            throw new InvalidArgumentException(errMsg);
        }
        for (Node node : body.childNodes()) {
            if (node.nodeName().equals("p")) {
                if (node.childNodeSize() == 1 && node.childNode(0).nodeName().equals("img")) {
                    JsonObject imgData = _createImageData(pratilipiId, node.childNode(0));
                    if (imgData != null)
                        page.addPagelet(PageletType.IMAGE, imgData);
                } else {
                    page.addPagelet(PageletType.HTML, ((Element) node).html(), _getAlignment(node));
                }
            } else if (node.nodeName().equals("img")) {
                JsonObject imgData = _createImageData(pratilipiId, node);
                if (imgData != null)
                    page.addPagelet(PageletType.IMAGE, imgData);
            } else if (node.nodeName().equals("blockquote")) {
                page.addPagelet(PageletType.BLOCK_QUOTE, ((Element) node).html());
            } else if (node.nodeName().equals("ol")) {
                page.addPagelet(PageletType.LIST_ORDERED, ((Element) node).html());
            } else if (node.nodeName().equals("ul")) {
                page.addPagelet(PageletType.LIST_UNORDERED, ((Element) node).html());
            }
        }
    }
}
Also used : InvalidArgumentException(com.pratilipi.common.exception.InvalidArgumentException) Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) Element(org.jsoup.nodes.Element) JsonElement(com.google.gson.JsonElement) JsonObject(com.google.gson.JsonObject)

Example 69 with Node

use of org.jsoup.nodes.Node in project mylyn.docs by eclipse.

the class RemoveExcessiveStylesProcessor method removeElementPreserveChildren.

private void removeElementPreserveChildren(Element element) {
    final Element parent = element.parent();
    for (Node child : new ArrayList<Node>(element.childNodes())) {
        child.remove();
        element.before(child);
    }
    element.remove();
    if (parent != null) {
        normalizeTextNodes(parent);
    }
}
Also used : Element(org.jsoup.nodes.Element) Node(org.jsoup.nodes.Node) ArrayList(java.util.ArrayList)

Example 70 with Node

use of org.jsoup.nodes.Node in project AozoraEpub3 by hmdev.

the class WebAozoraConverter method _printNode.

/**
 * ノードを出力 再帰用
 */
private void _printNode(BufferedWriter bw, Node parent) throws IOException {
    for (Node node : parent.childNodes()) {
        if (startElement != null) {
            if (node.equals(startElement)) {
                startElement = null;
                continue;
            }
            if (node instanceof Element)
                _printNode(bw, node);
            continue;
        }
        if (endElement != null && node.equals(endElement)) {
            return;
        }
        if (node instanceof TextNode)
            printText(bw, ((TextNode) node).getWholeText());
        else if (node instanceof Element) {
            Element elem = (Element) node;
            if ("br".equals(elem.tagName())) {
                if (elem.nextSibling() != null)
                    bw.append('\n');
            } else if ("div".equals(elem.tagName())) {
                if (elem.previousSibling() != null && !isBlockNode(elem.previousSibling()))
                    bw.append('\n');
                // 子を出力
                _printNode(bw, node);
                if (elem.nextSibling() != null)
                    bw.append('\n');
            } else if ("p".equals(elem.tagName())) {
                if (elem.previousSibling() != null && !isBlockNode(elem.previousSibling()))
                    bw.append('\n');
                // 子を出力
                _printNode(bw, node);
                if (elem.nextSibling() != null)
                    bw.append('\n');
            } else if ("ruby".equals(elem.tagName())) {
                // ルビ注記出力
                printRuby(bw, elem);
            } else if ("img".equals(elem.tagName())) {
                // 画像をキャッシュして注記出力
                printImage(bw, elem);
            } else if ("hr".equals(elem.tagName()) && !this.noHr) {
                bw.append("[#区切り線]\n");
            } else if ("b".equals(elem.tagName())) {
                bw.append("[#ここから太字]");
                // 子を出力
                _printNode(bw, node);
                bw.append("[#ここで太字終わり]");
            } else if ("sup".equals(elem.tagName())) {
                bw.append("[#上付き小文字]");
                // 子を出力
                _printNode(bw, node);
                bw.append("[#上付き小文字終わり]");
            } else if ("sub".equals(elem.tagName())) {
                bw.append("[#下付き小文字]");
                // 子を出力
                _printNode(bw, node);
                bw.append("[#下付き小文字終わり]");
            } else if ("strike".equals(elem.tagName()) || "s".equals(elem.tagName())) {
                bw.append("[#取消線]");
                // 子を出力
                _printNode(bw, node);
                bw.append("[#取消線終わり]");
            } else if ("tr".equals(elem.tagName())) {
                // 子を出力
                _printNode(bw, node);
                bw.append('\n');
            } else {
                // 子を出力
                _printNode(bw, node);
            }
        } else {
            System.out.println(node.getClass().getName());
        }
    }
}
Also used : TextNode(org.jsoup.nodes.TextNode) Node(org.jsoup.nodes.Node) Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode)

Aggregations

Node (org.jsoup.nodes.Node)75 TextNode (org.jsoup.nodes.TextNode)52 Element (org.jsoup.nodes.Element)48 Document (org.jsoup.nodes.Document)29 ArrayList (java.util.ArrayList)19 Elements (org.jsoup.select.Elements)13 Test (org.junit.jupiter.api.Test)8 IOException (java.io.IOException)7 Copy (de.geeksfactory.opacclient.objects.Copy)5 DetailedItem (de.geeksfactory.opacclient.objects.DetailedItem)5 HashMap (java.util.HashMap)5 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)5 JSONException (org.json.JSONException)5 NotReachableException (de.geeksfactory.opacclient.networking.NotReachableException)4 Detail (de.geeksfactory.opacclient.objects.Detail)4 UnsupportedEncodingException (java.io.UnsupportedEncodingException)4 URI (java.net.URI)4 Matcher (java.util.regex.Matcher)4 NameValuePair (org.apache.http.NameValuePair)4 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)4