Search in sources :

Example 46 with TextNode

use of org.jsoup.nodes.TextNode in project zrlog by 94fzb.

the class ParseUtil method autoDigest.

public static String autoDigest(String str, int size) {
    StringBuilder sb = new StringBuilder();
    Document document = Jsoup.parseBodyFragment(str);
    List<Node> allTextNode = new ArrayList<>();
    getAllTextNode(document.childNodes(), allTextNode);
    int tLength = 0;
    for (Node node : allTextNode) {
        if (node instanceof TextNode) {
            sb.append(node.parent().outerHtml());
            tLength += ((TextNode) node).text().length();
            if (tLength > size) {
                sb.append(" ...");
                break;
            }
        }
    }
    String digest = sb.toString();
    Elements elements = Jsoup.parse(str).body().select("video");
    if (elements != null && !elements.isEmpty()) {
        digest = elements.get(0).toString() + "<br/>" + digest;
    }
    return digest.trim();
}
Also used : Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) ArrayList(java.util.ArrayList) TextNode(org.jsoup.nodes.TextNode) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements)

Example 47 with TextNode

use of org.jsoup.nodes.TextNode in project mylyn.docs by eclipse.

the class DocumentProcessorTest method testNormalizeTextNodes.

@Test
public void testNormalizeTextNodes() {
    Document document = new Document("");
    Element element = document.appendElement("root");
    element.appendText("first ");
    element.appendText("second,");
    element.appendText(" third");
    element.appendElement("break");
    element.appendText("fourth");
    assertEquals(5, element.childNodes().size());
    TestDocumentProcessor.normalizeTextNodes(element);
    assertEquals(3, element.childNodes().size());
    assertTrue(element.childNode(0) instanceof TextNode);
    assertEquals("first second, third", ((TextNode) element.childNode(0)).text());
    assertTrue(element.childNode(2) instanceof TextNode);
    assertEquals("fourth", ((TextNode) element.childNode(2)).text());
}
Also used : Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode) Document(org.jsoup.nodes.Document) Test(org.junit.Test)

Example 48 with TextNode

use of org.jsoup.nodes.TextNode in project AozoraEpub3 by hmdev.

the class WebAozoraConverter method _printNode.

/**
 * ノードを出力 再帰用
 */
private void _printNode(BufferedWriter bw, Node parent) throws IOException {
    for (Node node : parent.childNodes()) {
        if (startElement != null) {
            if (node.equals(startElement)) {
                startElement = null;
                continue;
            }
            if (node instanceof Element)
                _printNode(bw, node);
            continue;
        }
        if (endElement != null && node.equals(endElement)) {
            return;
        }
        if (node instanceof TextNode)
            printText(bw, ((TextNode) node).getWholeText());
        else if (node instanceof Element) {
            Element elem = (Element) node;
            if ("br".equals(elem.tagName())) {
                if (elem.nextSibling() != null)
                    bw.append('\n');
            } else if ("div".equals(elem.tagName())) {
                if (elem.previousSibling() != null && !isBlockNode(elem.previousSibling()))
                    bw.append('\n');
                // 子を出力
                _printNode(bw, node);
                if (elem.nextSibling() != null)
                    bw.append('\n');
            } else if ("p".equals(elem.tagName())) {
                if (elem.previousSibling() != null && !isBlockNode(elem.previousSibling()))
                    bw.append('\n');
                // 子を出力
                _printNode(bw, node);
                if (elem.nextSibling() != null)
                    bw.append('\n');
            } else if ("ruby".equals(elem.tagName())) {
                // ルビ注記出力
                printRuby(bw, elem);
            } else if ("img".equals(elem.tagName())) {
                // 画像をキャッシュして注記出力
                printImage(bw, elem);
            } else if ("hr".equals(elem.tagName()) && !this.noHr) {
                bw.append("[#区切り線]\n");
            } else if ("b".equals(elem.tagName())) {
                bw.append("[#ここから太字]");
                // 子を出力
                _printNode(bw, node);
                bw.append("[#ここで太字終わり]");
            } else if ("sup".equals(elem.tagName())) {
                bw.append("[#上付き小文字]");
                // 子を出力
                _printNode(bw, node);
                bw.append("[#上付き小文字終わり]");
            } else if ("sub".equals(elem.tagName())) {
                bw.append("[#下付き小文字]");
                // 子を出力
                _printNode(bw, node);
                bw.append("[#下付き小文字終わり]");
            } else if ("strike".equals(elem.tagName()) || "s".equals(elem.tagName())) {
                bw.append("[#取消線]");
                // 子を出力
                _printNode(bw, node);
                bw.append("[#取消線終わり]");
            } else if ("tr".equals(elem.tagName())) {
                // 子を出力
                _printNode(bw, node);
                bw.append('\n');
            } else {
                // 子を出力
                _printNode(bw, node);
            }
        } else {
            System.out.println(node.getClass().getName());
        }
    }
}
Also used : TextNode(org.jsoup.nodes.TextNode) Node(org.jsoup.nodes.Node) Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode)

Example 49 with TextNode

use of org.jsoup.nodes.TextNode in project solr-cmd-utils by tblsoft.

the class HtmlJsoupFilter method mapAllElements.

public void mapAllElements(String selector, String fieldName) {
    Elements elements = jsoupDocument.select(selector);
    for (int i = 0; i < elements.size(); i++) {
        Element element = elements.get(i);
        StringBuilder value = new StringBuilder();
        for (Element subElements : element.getAllElements()) {
            for (TextNode textNode : subElements.textNodes()) {
                final String text = textNode.text();
                value.append(text);
                value.append(" ");
            }
        }
        document.addField(fieldName, value.toString().trim());
    }
}
Also used : Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode) Elements(org.jsoup.select.Elements)

Example 50 with TextNode

use of org.jsoup.nodes.TextNode in project Lightning-Browser by anthonycr.

the class OutputFormatter method appendTextSkipHidden.

private void appendTextSkipHidden(@NonNull Element e, @NonNull StringBuilder accum, int indent) {
    for (Node child : e.childNodes()) {
        if (unlikely(child)) {
            continue;
        }
        if (child instanceof TextNode) {
            TextNode textNode = (TextNode) child;
            String txt = textNode.text();
            accum.append(txt);
        } else if (child instanceof Element) {
            Element element = (Element) child;
            if (accum.length() > 0 && element.isBlock() && !lastCharIsWhitespace(accum))
                accum.append(' ');
            else if (element.tagName().equals("br"))
                accum.append(' ');
            appendTextSkipHidden(element, accum, indent + 1);
        }
    }
}
Also used : Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode)

Aggregations

TextNode (org.jsoup.nodes.TextNode)52 Element (org.jsoup.nodes.Element)41 Node (org.jsoup.nodes.Node)37 Document (org.jsoup.nodes.Document)19 ArrayList (java.util.ArrayList)16 Elements (org.jsoup.select.Elements)14 IOException (java.io.IOException)6 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)6 JSONException (org.json.JSONException)6 Copy (de.geeksfactory.opacclient.objects.Copy)5 DetailedItem (de.geeksfactory.opacclient.objects.DetailedItem)5 HashMap (java.util.HashMap)5 NameValuePair (org.apache.http.NameValuePair)5 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)5 Test (org.junit.jupiter.api.Test)5 NotReachableException (de.geeksfactory.opacclient.networking.NotReachableException)4 Detail (de.geeksfactory.opacclient.objects.Detail)4 UnsupportedEncodingException (java.io.UnsupportedEncodingException)4 URI (java.net.URI)4 Matcher (java.util.regex.Matcher)4