Search in sources :

Example 11 with TextNode

use of org.jsoup.nodes.TextNode in project McWiki by skylerdev.

the class CommandWiki method buildPages.

private List<String> buildPages(Document doc, String title, String url) {
    Elements main = doc.select("div[id=mw-content-text] > p, h2, h3");
    ArrayList<String> pages = new ArrayList<String>();
    pages.add(titlePage(title, url));
    pages.add("Table of contents placeholder");
    JSONArray contentsPage = newPage();
    MCJson contentsHead = new MCJson("Contents\n\n", "dark_gray");
    contentsHead.setBold(true);
    contentsPage.add(contentsHead);
    JSONArray currentPage = newPage();
    int currentPageSize = 0;
    int maxChars = 230;
    MCJson space = new MCJson(" ");
    for (Element mainchild : main) {
        if (currentPageSize > maxChars) {
            pages.add(currentPage.toString());
            currentPageSize = 0;
            currentPage = newPage();
        }
        if (mainchild.is("h2")) {
            if (isOmitted(mainchild)) {
                continue;
            }
            pages.add(currentPage.toString());
            currentPage = newPage();
            currentPageSize = 20;
            String h = mainchild.text().replaceAll("\\[edit\\]", "");
            currentPage.add(new MCJson(h, header2));
            currentPage.add(space);
            MCJson backButton = new MCJson("«", link);
            backButton.setHover("show_text", "Back to contents");
            backButton.setClick("change_page", "2");
            currentPage.add(backButton);
            currentPage.add(new MCJson("\n"));
            MCJson contentslink = new MCJson(h, link);
            contentslink.setHover("show_text", "Jump to this section");
            contentslink.setClick("change_page", "" + (pages.size() + 1));
            contentsPage.add(contentslink);
            contentsPage.add(new MCJson("\n"));
        } else if (mainchild.is("h3")) {
            String h = mainchild.text().replaceAll("\\[edit\\]", "");
            currentPage.add(new MCJson(h, header3));
            currentPage.add(space);
            currentPageSize += h.length() + 2;
        } else if (mainchild.is("p")) {
            List<Node> inner = mainchild.childNodes();
            for (Node n : inner) {
                if (currentPageSize > maxChars) {
                    pages.add(currentPage.toString());
                    currentPageSize = 0;
                    currentPage = newPage();
                }
                if (n instanceof Element) {
                    Element e = (Element) n;
                    if (e.is("a")) {
                        String linkto = e.attr("href");
                        MCJson a = new MCJson(e.text(), link);
                        if (linkto.startsWith("/")) {
                            a.setClick("run_command", "/wiki " + linkto.substring(1));
                            a.setHover("show_text", "Click to show this article.");
                        } else {
                            a.setClick("open_url", linkto);
                            a.setHover("show_text", "External Link");
                        }
                        currentPage.add(a);
                    } else if (e.is("b")) {
                        currentPage.add(new MCJson(e.text(), bold));
                    } else if (e.is("i")) {
                        currentPage.add(new MCJson(e.text(), italic));
                    }
                    currentPageSize += e.text().length();
                }
                if (n instanceof TextNode) {
                    TextNode t = (TextNode) n;
                    String s = t.text();
                    if (currentPageSize + s.length() > maxChars - 10) {
                        // karate chop
                        int splitAt = s.lastIndexOf(" ", maxChars - currentPageSize);
                        if (splitAt < 0) {
                            pages.add(currentPage.toString());
                            currentPage = newPage();
                            currentPage.add(new MCJson(s));
                            currentPageSize = s.length();
                        } else {
                            String firstString = s.substring(0, splitAt);
                            String nextString = s.substring(splitAt + 1);
                            currentPage.add(new MCJson(firstString));
                            pages.add(currentPage.toString());
                            currentPageSize = nextString.length();
                            currentPage = newPage();
                            currentPage.add(new MCJson(nextString));
                        }
                    } else {
                        currentPage.add(new MCJson(s));
                        currentPageSize += s.length();
                    }
                }
            }
            currentPage.add("\n");
            currentPageSize += 20;
        }
    }
    pages.set(1, contentsPage.toString());
    pages.add(endPage(title, url));
    return pages;
}
Also used : Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode) Node(org.jsoup.nodes.Node) ArrayList(java.util.ArrayList) JSONArray(org.json.simple.JSONArray) TextNode(org.jsoup.nodes.TextNode) Elements(org.jsoup.select.Elements)

Example 12 with TextNode

use of org.jsoup.nodes.TextNode in project ultimate-java by pantinor.

the class HtmlUtils method buildStringFromNode.

public static StringBuffer buildStringFromNode(Node node) {
    StringBuffer buffer = new StringBuffer();
    if (node instanceof TextNode) {
        TextNode textNode = (TextNode) node;
        buffer.append(textNode.text().trim());
    }
    for (Node childNode : node.childNodes()) {
        buffer.append(buildStringFromNode(childNode));
    }
    if (node instanceof Element) {
        Element element = (Element) node;
        String tagName = element.tagName();
        if ("p".equals(tagName) || "br".equals(tagName)) {
            buffer.append("\n");
        }
        if ("h1".equals(tagName) || "h2".equals(tagName) || "h3".equals(tagName) || "h4".equals(tagName) || "dt".equals(tagName) || "dl".equals(tagName)) {
            buffer.append("\n\n");
        }
    }
    return buffer;
}
Also used : TextNode(org.jsoup.nodes.TextNode) Node(org.jsoup.nodes.Node) Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode)

Example 13 with TextNode

use of org.jsoup.nodes.TextNode in project NetDiscovery by fengzhizi715.

the class CssSelector method getText.

protected String getText(Element element) {
    StringBuilder accum = new StringBuilder();
    for (Node node : element.childNodes()) {
        if (node instanceof TextNode) {
            TextNode textNode = (TextNode) node;
            accum.append(textNode.text());
        }
    }
    return accum.toString();
}
Also used : Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) TextNode(org.jsoup.nodes.TextNode)

Example 14 with TextNode

use of org.jsoup.nodes.TextNode in project substitution-schedule-parser by vertretungsplanme.

the class DaVinciParser method parsePage.

@NotNull
static void parsePage(Element doc, SubstitutionSchedule schedule, ColorProvider colorProvider) throws IOException {
    SubstitutionScheduleDay day = new SubstitutionScheduleDay();
    Element titleElem;
    if (doc.select("h1.list-table-caption").size() > 0) {
        titleElem = doc.select("h1.list-table-caption").first();
    } else {
        // DaVinci 5
        titleElem = doc.select("h2").first();
    }
    String title = titleElem.text();
    String klasse = null;
    // title can either be date or class
    Pattern datePattern = Pattern.compile("\\d+\\.\\d+.\\d{4}");
    Matcher dateMatcher = datePattern.matcher(title);
    if (dateMatcher.find()) {
        day.setDateString(dateMatcher.group());
        day.setDate(ParserUtils.parseDate(dateMatcher.group()));
    } else {
        klasse = title;
        String nextText = titleElem.nextElementSibling().text();
        if (nextText.matches("\\w+ \\d+\\.\\d+.\\d{4}")) {
            day.setDateString(nextText);
            day.setDate(ParserUtils.parseDate(nextText));
        } else {
            // could not find date, must be multiple days
            day = null;
        }
    }
    for (Element p : doc.select(".row:has(h1.list-table-caption) p")) {
        for (TextNode node : p.textNodes()) {
            if (!node.text().trim().isEmpty() && day != null)
                day.addMessage(node.text().trim());
        }
    }
    for (Element message : doc.select(".callout")) {
        for (TextNode node : message.textNodes()) {
            if (!node.text().trim().isEmpty())
                day.addMessage(node.text().trim());
        }
    }
    Element lastChangeElem = doc.select(".row.copyright div").first();
    if (lastChangeElem == null) {
        // DaVinci 5
        lastChangeElem = doc.select("h1").first();
    }
    if (lastChangeElem != null) {
        String lastChange = lastChangeElem.ownText();
        Pattern pattern = Pattern.compile("(\\d{2}-\\d{2}-\\d{4} \\d{2}:\\d{2}) \\|");
        Matcher matcher = pattern.matcher(lastChange);
        if (matcher.find()) {
            LocalDateTime lastChangeTime = DateTimeFormat.forPattern("dd-MM-yyyy HH:mm").parseLocalDateTime(matcher.group(1));
            if (day != null) {
                day.setLastChange(lastChangeTime);
            } else {
                schedule.setLastChange(lastChangeTime);
            }
        } else {
            Pattern pattern2 = Pattern.compile("(\\d{2}.\\d{2}.\\d{4} \\| \\d+:\\d{2})");
            Matcher matcher2 = pattern2.matcher(lastChange);
            if (matcher2.find()) {
                LocalDateTime lastChangeTime = DateTimeFormat.forPattern("dd.MM.yyyy | HH:mm").parseLocalDateTime(matcher2.group(1));
                if (day != null) {
                    day.setLastChange(lastChangeTime);
                } else {
                    schedule.setLastChange(lastChangeTime);
                }
            }
        }
    } else {
        Pattern pattern = Pattern.compile("<!-- Created by daVinci 5 \\| (\\d+\\.\\d+\\.\\d+ \\| \\d+:\\d+) \\| " + "www.stueber.de -->");
        Matcher matcher = pattern.matcher(doc.html());
        if (matcher.find()) {
            String str = matcher.group(1);
            LocalDateTime date = DateTimeFormat.forPattern("dd.MM.yyyy | HH:mm").parseLocalDateTime(str);
            if (day != null) {
                day.setLastChange(date);
            } else {
                schedule.setLastChange(date);
            }
        }
    }
    if (doc.select(".list-table").size() > 0 || !doc.select(".callout").text().contains("Es liegen keine")) {
        Element table = doc.select(".list-table, table").first();
        parseDaVinciTable(table, schedule, klasse, day, colorProvider);
    }
    if (day != null) {
        schedule.addDay(day);
    }
}
Also used : LocalDateTime(org.joda.time.LocalDateTime) Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) Element(org.jsoup.nodes.Element) SubstitutionScheduleDay(me.vertretungsplan.objects.SubstitutionScheduleDay) TextNode(org.jsoup.nodes.TextNode) NotNull(org.jetbrains.annotations.NotNull)

Example 15 with TextNode

use of org.jsoup.nodes.TextNode in project opacclient by opacapp.

the class Bibliotheca method parseSearchFields.

@Override
public List<SearchField> parseSearchFields() throws IOException, JSONException {
    if (!initialised) {
        start();
    }
    List<SearchField> fields = new ArrayList<>();
    // Read branches and media types
    FormBody.Builder formData = new FormBody.Builder(Charset.forName(getDefaultEncoding()));
    formData.add("link_profis.x", "0");
    formData.add("link_profis.y", "1");
    String html = httpPost(opac_url + "/index.asp", formData.build(), getDefaultEncoding());
    Document doc = Jsoup.parse(html);
    Elements fieldElems = doc.select(".suchfeldinhalt");
    for (Element fieldElem : fieldElems) {
        String name = fieldElem.select(".suchfeld_inhalt_titel label").text();
        String hint = "";
        if (fieldElem.select(".suchfeld_inhalt_input").size() > 0) {
            List<TextNode> textNodes = fieldElem.select(".suchfeld_inhalt_input").first().textNodes();
            if (textNodes.size() > 0) {
                for (TextNode node : textNodes) {
                    String text = node.getWholeText().replace("\n", "");
                    if (!text.equals("")) {
                        hint = node.getWholeText().replace("\n", "");
                        break;
                    }
                }
            }
        }
        Elements inputs = fieldElem.select(".suchfeld_inhalt_input input[type=text], " + ".suchfeld_inhalt_input select");
        if (inputs.size() == 1) {
            SearchField field = createSearchField(name, hint, inputs.get(0));
            Elements radios = fieldElem.select("input[type=radio]");
            if (field instanceof TextSearchField && radios.size() > 0) {
                TextSearchField tf = (TextSearchField) field;
                if (radios.get(0).attr("value").equals("stich")) {
                    tf.setFreeSearch(true);
                    if (fieldElem.select("label[for=stichtit_sich]").size() > 0) {
                        tf.setHint(fieldElem.select("label[for=stichtit_sich]").text().trim());
                    }
                    JSONObject addData = new JSONObject();
                    JSONObject params = new JSONObject();
                    params.put("stichtit", "stich");
                    addData.put("additional_params", params);
                    tf.setData(addData);
                }
                if (radios.size() == 2 && radios.get(1).attr("value").equals("titel")) {
                    TextSearchField tf2 = new TextSearchField();
                    tf2.setId(tf.getId());
                    if (fieldElem.select("label[for=stichtit_titel]").size() > 0) {
                        tf2.setDisplayName(fieldElem.select("label[for=stichtit_titel]").text().trim());
                    }
                    JSONObject addData = new JSONObject();
                    JSONObject params = new JSONObject();
                    params.put("stichtit", "titel");
                    addData.put("additional_params", params);
                    tf2.setData(addData);
                    fields.add(tf2);
                }
            }
            fields.add(field);
        } else if (inputs.size() == 2 && inputs.select("input[type=text]").size() == 2) {
            // Two text fields, e.g. year from/to or two keywords
            fields.add(createSearchField(name, hint, inputs.get(0)));
            TextSearchField secondField = (TextSearchField) createSearchField(name, hint, inputs.get(1));
            secondField.setHalfWidth(true);
            fields.add(secondField);
        } else if (inputs.size() == 2 && inputs.get(0).tagName().equals("select") && inputs.get(1).tagName().equals("input") && inputs.get(0).attr("name").equals("feld1")) {
            // Break it down into single text fields.
            for (Element option : inputs.get(0).select("option")) {
                TextSearchField field = new TextSearchField();
                field.setHint(hint);
                field.setDisplayName(option.text());
                field.setId(inputs.get(1).attr("name") + "$" + option.attr("value"));
                JSONObject data = new JSONObject();
                JSONObject params = new JSONObject();
                params.put(inputs.get(0).attr("name"), option.attr("value"));
                data.put("additional_params", params);
                field.setData(data);
                fields.add(field);
            }
        }
    }
    if (fields.size() > 0) {
        DropdownSearchField orderField = new DropdownSearchField("orderselect", stringProvider.getString(StringProvider.ORDER), false, null);
        orderField.addDropdownValue("1", stringProvider.getString(StringProvider.ORDER_DEFAULT));
        orderField.addDropdownValue("2:desc", stringProvider.getString(StringProvider.ORDER_YEAR_DESC));
        orderField.addDropdownValue("2:asc", stringProvider.getString(StringProvider.ORDER_YEAR_ASC));
        orderField.addDropdownValue("3:desc", stringProvider.getString(StringProvider.ORDER_CATEGORY_DESC));
        orderField.addDropdownValue("3:asc", stringProvider.getString(StringProvider.ORDER_CATEGORY_ASC));
        orderField.setMeaning(Meaning.ORDER);
        fields.add(orderField);
    }
    return fields;
}
Also used : Element(org.jsoup.nodes.Element) ArrayList(java.util.ArrayList) FormBody(okhttp3.FormBody) TextNode(org.jsoup.nodes.TextNode) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) TextSearchField(de.geeksfactory.opacclient.searchfields.TextSearchField) DropdownSearchField(de.geeksfactory.opacclient.searchfields.DropdownSearchField) SearchField(de.geeksfactory.opacclient.searchfields.SearchField) TextSearchField(de.geeksfactory.opacclient.searchfields.TextSearchField) DropdownSearchField(de.geeksfactory.opacclient.searchfields.DropdownSearchField) JSONObject(org.json.JSONObject)

Aggregations

TextNode (org.jsoup.nodes.TextNode)52 Element (org.jsoup.nodes.Element)41 Node (org.jsoup.nodes.Node)37 Document (org.jsoup.nodes.Document)19 ArrayList (java.util.ArrayList)16 Elements (org.jsoup.select.Elements)14 IOException (java.io.IOException)6 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)6 JSONException (org.json.JSONException)6 Copy (de.geeksfactory.opacclient.objects.Copy)5 DetailedItem (de.geeksfactory.opacclient.objects.DetailedItem)5 HashMap (java.util.HashMap)5 NameValuePair (org.apache.http.NameValuePair)5 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)5 Test (org.junit.jupiter.api.Test)5 NotReachableException (de.geeksfactory.opacclient.networking.NotReachableException)4 Detail (de.geeksfactory.opacclient.objects.Detail)4 UnsupportedEncodingException (java.io.UnsupportedEncodingException)4 URI (java.net.URI)4 Matcher (java.util.regex.Matcher)4