use of org.jsoup.nodes.TextNode in project McWiki by skylerdev.
the class CommandWiki method buildPages.
private List<String> buildPages(Document doc, String title, String url) {
Elements main = doc.select("div[id=mw-content-text] > p, h2, h3");
ArrayList<String> pages = new ArrayList<String>();
pages.add(titlePage(title, url));
pages.add("Table of contents placeholder");
JSONArray contentsPage = newPage();
MCJson contentsHead = new MCJson("Contents\n\n", "dark_gray");
contentsHead.setBold(true);
contentsPage.add(contentsHead);
JSONArray currentPage = newPage();
int currentPageSize = 0;
int maxChars = 230;
MCJson space = new MCJson(" ");
for (Element mainchild : main) {
if (currentPageSize > maxChars) {
pages.add(currentPage.toString());
currentPageSize = 0;
currentPage = newPage();
}
if (mainchild.is("h2")) {
if (isOmitted(mainchild)) {
continue;
}
pages.add(currentPage.toString());
currentPage = newPage();
currentPageSize = 20;
String h = mainchild.text().replaceAll("\\[edit\\]", "");
currentPage.add(new MCJson(h, header2));
currentPage.add(space);
MCJson backButton = new MCJson("«", link);
backButton.setHover("show_text", "Back to contents");
backButton.setClick("change_page", "2");
currentPage.add(backButton);
currentPage.add(new MCJson("\n"));
MCJson contentslink = new MCJson(h, link);
contentslink.setHover("show_text", "Jump to this section");
contentslink.setClick("change_page", "" + (pages.size() + 1));
contentsPage.add(contentslink);
contentsPage.add(new MCJson("\n"));
} else if (mainchild.is("h3")) {
String h = mainchild.text().replaceAll("\\[edit\\]", "");
currentPage.add(new MCJson(h, header3));
currentPage.add(space);
currentPageSize += h.length() + 2;
} else if (mainchild.is("p")) {
List<Node> inner = mainchild.childNodes();
for (Node n : inner) {
if (currentPageSize > maxChars) {
pages.add(currentPage.toString());
currentPageSize = 0;
currentPage = newPage();
}
if (n instanceof Element) {
Element e = (Element) n;
if (e.is("a")) {
String linkto = e.attr("href");
MCJson a = new MCJson(e.text(), link);
if (linkto.startsWith("/")) {
a.setClick("run_command", "/wiki " + linkto.substring(1));
a.setHover("show_text", "Click to show this article.");
} else {
a.setClick("open_url", linkto);
a.setHover("show_text", "External Link");
}
currentPage.add(a);
} else if (e.is("b")) {
currentPage.add(new MCJson(e.text(), bold));
} else if (e.is("i")) {
currentPage.add(new MCJson(e.text(), italic));
}
currentPageSize += e.text().length();
}
if (n instanceof TextNode) {
TextNode t = (TextNode) n;
String s = t.text();
if (currentPageSize + s.length() > maxChars - 10) {
// karate chop
int splitAt = s.lastIndexOf(" ", maxChars - currentPageSize);
if (splitAt < 0) {
pages.add(currentPage.toString());
currentPage = newPage();
currentPage.add(new MCJson(s));
currentPageSize = s.length();
} else {
String firstString = s.substring(0, splitAt);
String nextString = s.substring(splitAt + 1);
currentPage.add(new MCJson(firstString));
pages.add(currentPage.toString());
currentPageSize = nextString.length();
currentPage = newPage();
currentPage.add(new MCJson(nextString));
}
} else {
currentPage.add(new MCJson(s));
currentPageSize += s.length();
}
}
}
currentPage.add("\n");
currentPageSize += 20;
}
}
pages.set(1, contentsPage.toString());
pages.add(endPage(title, url));
return pages;
}
use of org.jsoup.nodes.TextNode in project ultimate-java by pantinor.
the class HtmlUtils method buildStringFromNode.
public static StringBuffer buildStringFromNode(Node node) {
StringBuffer buffer = new StringBuffer();
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
buffer.append(textNode.text().trim());
}
for (Node childNode : node.childNodes()) {
buffer.append(buildStringFromNode(childNode));
}
if (node instanceof Element) {
Element element = (Element) node;
String tagName = element.tagName();
if ("p".equals(tagName) || "br".equals(tagName)) {
buffer.append("\n");
}
if ("h1".equals(tagName) || "h2".equals(tagName) || "h3".equals(tagName) || "h4".equals(tagName) || "dt".equals(tagName) || "dl".equals(tagName)) {
buffer.append("\n\n");
}
}
return buffer;
}
use of org.jsoup.nodes.TextNode in project NetDiscovery by fengzhizi715.
the class CssSelector method getText.
protected String getText(Element element) {
StringBuilder accum = new StringBuilder();
for (Node node : element.childNodes()) {
if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
accum.append(textNode.text());
}
}
return accum.toString();
}
use of org.jsoup.nodes.TextNode in project substitution-schedule-parser by vertretungsplanme.
the class DaVinciParser method parsePage.
@NotNull
static void parsePage(Element doc, SubstitutionSchedule schedule, ColorProvider colorProvider) throws IOException {
SubstitutionScheduleDay day = new SubstitutionScheduleDay();
Element titleElem;
if (doc.select("h1.list-table-caption").size() > 0) {
titleElem = doc.select("h1.list-table-caption").first();
} else {
// DaVinci 5
titleElem = doc.select("h2").first();
}
String title = titleElem.text();
String klasse = null;
// title can either be date or class
Pattern datePattern = Pattern.compile("\\d+\\.\\d+.\\d{4}");
Matcher dateMatcher = datePattern.matcher(title);
if (dateMatcher.find()) {
day.setDateString(dateMatcher.group());
day.setDate(ParserUtils.parseDate(dateMatcher.group()));
} else {
klasse = title;
String nextText = titleElem.nextElementSibling().text();
if (nextText.matches("\\w+ \\d+\\.\\d+.\\d{4}")) {
day.setDateString(nextText);
day.setDate(ParserUtils.parseDate(nextText));
} else {
// could not find date, must be multiple days
day = null;
}
}
for (Element p : doc.select(".row:has(h1.list-table-caption) p")) {
for (TextNode node : p.textNodes()) {
if (!node.text().trim().isEmpty() && day != null)
day.addMessage(node.text().trim());
}
}
for (Element message : doc.select(".callout")) {
for (TextNode node : message.textNodes()) {
if (!node.text().trim().isEmpty())
day.addMessage(node.text().trim());
}
}
Element lastChangeElem = doc.select(".row.copyright div").first();
if (lastChangeElem == null) {
// DaVinci 5
lastChangeElem = doc.select("h1").first();
}
if (lastChangeElem != null) {
String lastChange = lastChangeElem.ownText();
Pattern pattern = Pattern.compile("(\\d{2}-\\d{2}-\\d{4} \\d{2}:\\d{2}) \\|");
Matcher matcher = pattern.matcher(lastChange);
if (matcher.find()) {
LocalDateTime lastChangeTime = DateTimeFormat.forPattern("dd-MM-yyyy HH:mm").parseLocalDateTime(matcher.group(1));
if (day != null) {
day.setLastChange(lastChangeTime);
} else {
schedule.setLastChange(lastChangeTime);
}
} else {
Pattern pattern2 = Pattern.compile("(\\d{2}.\\d{2}.\\d{4} \\| \\d+:\\d{2})");
Matcher matcher2 = pattern2.matcher(lastChange);
if (matcher2.find()) {
LocalDateTime lastChangeTime = DateTimeFormat.forPattern("dd.MM.yyyy | HH:mm").parseLocalDateTime(matcher2.group(1));
if (day != null) {
day.setLastChange(lastChangeTime);
} else {
schedule.setLastChange(lastChangeTime);
}
}
}
} else {
Pattern pattern = Pattern.compile("<!-- Created by daVinci 5 \\| (\\d+\\.\\d+\\.\\d+ \\| \\d+:\\d+) \\| " + "www.stueber.de -->");
Matcher matcher = pattern.matcher(doc.html());
if (matcher.find()) {
String str = matcher.group(1);
LocalDateTime date = DateTimeFormat.forPattern("dd.MM.yyyy | HH:mm").parseLocalDateTime(str);
if (day != null) {
day.setLastChange(date);
} else {
schedule.setLastChange(date);
}
}
}
if (doc.select(".list-table").size() > 0 || !doc.select(".callout").text().contains("Es liegen keine")) {
Element table = doc.select(".list-table, table").first();
parseDaVinciTable(table, schedule, klasse, day, colorProvider);
}
if (day != null) {
schedule.addDay(day);
}
}
use of org.jsoup.nodes.TextNode in project opacclient by opacapp.
the class Bibliotheca method parseSearchFields.
@Override
public List<SearchField> parseSearchFields() throws IOException, JSONException {
if (!initialised) {
start();
}
List<SearchField> fields = new ArrayList<>();
// Read branches and media types
FormBody.Builder formData = new FormBody.Builder(Charset.forName(getDefaultEncoding()));
formData.add("link_profis.x", "0");
formData.add("link_profis.y", "1");
String html = httpPost(opac_url + "/index.asp", formData.build(), getDefaultEncoding());
Document doc = Jsoup.parse(html);
Elements fieldElems = doc.select(".suchfeldinhalt");
for (Element fieldElem : fieldElems) {
String name = fieldElem.select(".suchfeld_inhalt_titel label").text();
String hint = "";
if (fieldElem.select(".suchfeld_inhalt_input").size() > 0) {
List<TextNode> textNodes = fieldElem.select(".suchfeld_inhalt_input").first().textNodes();
if (textNodes.size() > 0) {
for (TextNode node : textNodes) {
String text = node.getWholeText().replace("\n", "");
if (!text.equals("")) {
hint = node.getWholeText().replace("\n", "");
break;
}
}
}
}
Elements inputs = fieldElem.select(".suchfeld_inhalt_input input[type=text], " + ".suchfeld_inhalt_input select");
if (inputs.size() == 1) {
SearchField field = createSearchField(name, hint, inputs.get(0));
Elements radios = fieldElem.select("input[type=radio]");
if (field instanceof TextSearchField && radios.size() > 0) {
TextSearchField tf = (TextSearchField) field;
if (radios.get(0).attr("value").equals("stich")) {
tf.setFreeSearch(true);
if (fieldElem.select("label[for=stichtit_sich]").size() > 0) {
tf.setHint(fieldElem.select("label[for=stichtit_sich]").text().trim());
}
JSONObject addData = new JSONObject();
JSONObject params = new JSONObject();
params.put("stichtit", "stich");
addData.put("additional_params", params);
tf.setData(addData);
}
if (radios.size() == 2 && radios.get(1).attr("value").equals("titel")) {
TextSearchField tf2 = new TextSearchField();
tf2.setId(tf.getId());
if (fieldElem.select("label[for=stichtit_titel]").size() > 0) {
tf2.setDisplayName(fieldElem.select("label[for=stichtit_titel]").text().trim());
}
JSONObject addData = new JSONObject();
JSONObject params = new JSONObject();
params.put("stichtit", "titel");
addData.put("additional_params", params);
tf2.setData(addData);
fields.add(tf2);
}
}
fields.add(field);
} else if (inputs.size() == 2 && inputs.select("input[type=text]").size() == 2) {
// Two text fields, e.g. year from/to or two keywords
fields.add(createSearchField(name, hint, inputs.get(0)));
TextSearchField secondField = (TextSearchField) createSearchField(name, hint, inputs.get(1));
secondField.setHalfWidth(true);
fields.add(secondField);
} else if (inputs.size() == 2 && inputs.get(0).tagName().equals("select") && inputs.get(1).tagName().equals("input") && inputs.get(0).attr("name").equals("feld1")) {
// Break it down into single text fields.
for (Element option : inputs.get(0).select("option")) {
TextSearchField field = new TextSearchField();
field.setHint(hint);
field.setDisplayName(option.text());
field.setId(inputs.get(1).attr("name") + "$" + option.attr("value"));
JSONObject data = new JSONObject();
JSONObject params = new JSONObject();
params.put(inputs.get(0).attr("name"), option.attr("value"));
data.put("additional_params", params);
field.setData(data);
fields.add(field);
}
}
}
if (fields.size() > 0) {
DropdownSearchField orderField = new DropdownSearchField("orderselect", stringProvider.getString(StringProvider.ORDER), false, null);
orderField.addDropdownValue("1", stringProvider.getString(StringProvider.ORDER_DEFAULT));
orderField.addDropdownValue("2:desc", stringProvider.getString(StringProvider.ORDER_YEAR_DESC));
orderField.addDropdownValue("2:asc", stringProvider.getString(StringProvider.ORDER_YEAR_ASC));
orderField.addDropdownValue("3:desc", stringProvider.getString(StringProvider.ORDER_CATEGORY_DESC));
orderField.addDropdownValue("3:asc", stringProvider.getString(StringProvider.ORDER_CATEGORY_ASC));
orderField.setMeaning(Meaning.ORDER);
fields.add(orderField);
}
return fields;
}
Aggregations