use of org.jsoup.nodes.TextNode in project AozoraEpub3 by hmdev.
the class WebAozoraConverter method _printNode.
/**
* ノードを出力 再帰用
*/
private void _printNode(BufferedWriter bw, Node parent) throws IOException {
for (Node node : parent.childNodes()) {
if (startElement != null) {
if (node.equals(startElement)) {
startElement = null;
continue;
}
if (node instanceof Element)
_printNode(bw, node);
continue;
}
if (endElement != null && node.equals(endElement)) {
return;
}
if (node instanceof TextNode)
printText(bw, ((TextNode) node).getWholeText());
else if (node instanceof Element) {
Element elem = (Element) node;
if ("br".equals(elem.tagName())) {
if (elem.nextSibling() != null)
bw.append('\n');
} else if ("div".equals(elem.tagName())) {
if (elem.previousSibling() != null && !isBlockNode(elem.previousSibling()))
bw.append('\n');
// 子を出力
_printNode(bw, node);
if (elem.nextSibling() != null)
bw.append('\n');
} else if ("p".equals(elem.tagName())) {
if (elem.previousSibling() != null && !isBlockNode(elem.previousSibling()))
bw.append('\n');
// 子を出力
_printNode(bw, node);
if (elem.nextSibling() != null)
bw.append('\n');
} else if ("ruby".equals(elem.tagName())) {
// ルビ注記出力
printRuby(bw, elem);
} else if ("img".equals(elem.tagName())) {
// 画像をキャッシュして注記出力
printImage(bw, elem);
} else if ("hr".equals(elem.tagName()) && !this.noHr) {
bw.append("[#区切り線]\n");
} else if ("b".equals(elem.tagName())) {
bw.append("[#ここから太字]");
// 子を出力
_printNode(bw, node);
bw.append("[#ここで太字終わり]");
} else if ("sup".equals(elem.tagName())) {
bw.append("[#上付き小文字]");
// 子を出力
_printNode(bw, node);
bw.append("[#上付き小文字終わり]");
} else if ("sub".equals(elem.tagName())) {
bw.append("[#下付き小文字]");
// 子を出力
_printNode(bw, node);
bw.append("[#下付き小文字終わり]");
} else if ("strike".equals(elem.tagName()) || "s".equals(elem.tagName())) {
bw.append("[#取消線]");
// 子を出力
_printNode(bw, node);
bw.append("[#取消線終わり]");
} else if ("tr".equals(elem.tagName())) {
// 子を出力
_printNode(bw, node);
bw.append('\n');
} else {
// 子を出力
_printNode(bw, node);
}
} else {
System.out.println(node.getClass().getName());
}
}
}
use of org.jsoup.nodes.TextNode in project solr-cmd-utils by tblsoft.
the class HtmlJsoupFilter method mapAllElements.
public void mapAllElements(String selector, String fieldName) {
Elements elements = jsoupDocument.select(selector);
for (int i = 0; i < elements.size(); i++) {
Element element = elements.get(i);
StringBuilder value = new StringBuilder();
for (Element subElements : element.getAllElements()) {
for (TextNode textNode : subElements.textNodes()) {
final String text = textNode.text();
value.append(text);
value.append(" ");
}
}
document.addField(fieldName, value.toString().trim());
}
}
use of org.jsoup.nodes.TextNode in project Lightning-Browser by anthonycr.
the class OutputFormatter method appendTextSkipHidden.
private void appendTextSkipHidden(@NonNull Element e, @NonNull StringBuilder accum, int indent) {
for (Node child : e.childNodes()) {
if (unlikely(child)) {
continue;
}
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
String txt = textNode.text();
accum.append(txt);
} else if (child instanceof Element) {
Element element = (Element) child;
if (accum.length() > 0 && element.isBlock() && !lastCharIsWhitespace(accum))
accum.append(' ');
else if (element.tagName().equals("br"))
accum.append(' ');
appendTextSkipHidden(element, accum, indent + 1);
}
}
}
use of org.jsoup.nodes.TextNode in project onebusaway-application-modules by camsys.
the class GtfsFullValidationTaskJob method checkOutputForErrors.
/**
* This method will parse the validation output HTML file checking to see
* if any errors were found during the validation. If any were, a summary csv
* file is created listing the errors.
*
* @param agencyId - the agency id of the HTML file being checked
* @param outputFile - the name of the HTML file to be checked.
* @throws IOException
*/
private void checkOutputForErrors(String agencyId, String outputFile) throws IOException {
File validationHtmlFile = new File(outputFile);
Document doc = Jsoup.parse(validationHtmlFile, "UTF-8");
Elements select = doc.select(".issueHeader:containsOwn(Errors:) ~ ul");
if (select == null)
return;
Element first = select.first();
if (first == null)
return;
Elements validationErrors = first.select("li");
if (validationErrors != null && validationErrors.hasText()) {
for (Node parentNode : validationErrors) {
// for each <li>
String errorMsgText = "";
String errorDetailText = "";
for (Node node : parentNode.childNodes()) {
if (node instanceof TextNode) {
errorMsgText += ((TextNode) node).text();
} else if (node instanceof Element) {
String tagName = ((Element) node).tagName();
if (tagName.equals("br")) {
errorMsgText += " ";
} else if (tagName.equals("div")) {
errorMsgText += parseDivData(node);
} else if (tagName.equals("table")) {
errorDetailText = parseTableData(node);
} else {
errorMsgText += ((Element) node).text();
}
}
}
result.addError(errorMsgText + "," + errorDetailText);
}
}
}
use of org.jsoup.nodes.TextNode in project structr by structr.
the class Importer method createChildNodes.
private DOMNode createChildNodes(final Node startNode, final DOMNode parent, final Page page, final boolean removeHashAttribute, final int depth) throws FrameworkException {
DOMNode rootElement = null;
Linkable res = null;
String instructions = null;
final List<Node> children = startNode.childNodes();
for (Node node : children) {
String tag = node.nodeName();
// clean tag, remove non-word characters except : and #
if (tag != null) {
tag = tag.replaceAll("[^a-zA-Z0-9#:.-_]+", "");
}
final StringBuilder classString = new StringBuilder();
final String type = CaseHelper.toUpperCamelCase(tag);
String comment = null;
String content = null;
String id = null;
boolean isNewTemplateOrComponent = false;
if (ignoreElementNames.contains(type)) {
continue;
}
if (node instanceof Element) {
final Element el = ((Element) node);
final Set<String> classes = el.classNames();
for (String cls : classes) {
classString.append(cls).append(" ");
}
id = el.id();
// do not download files when called from DeployCommand!
if (!isDeployment) {
String downloadAddressAttr = srcElements.contains(tag) ? "src" : hrefElements.contains(tag) ? "href" : null;
if (downloadAddressAttr != null && StringUtils.isNotBlank(node.attr(downloadAddressAttr))) {
String downloadAddress = node.attr(downloadAddressAttr);
res = downloadFile(downloadAddress, originalUrl);
} else {
res = null;
}
}
if (removeHashAttribute) {
// Remove data-structr-hash attribute
node.removeAttr("data-structr-hash");
}
}
// Data and comment nodes: Trim the text and put it into the "content" field without changes
if (type.equals("#comment")) {
comment = ((Comment) node).getData();
tag = "";
// Don't add content node for whitespace
if (StringUtils.isBlank(comment)) {
continue;
}
// store for later use
commentSource.append(comment).append("\n");
// check if comment contains instructions
if (commentHandler != null && commentHandler.containsInstructions(comment)) {
if (instructions != null) {
// unhandled instructions from previous iteration => empty content element
createEmptyContentNode(page, parent, commentHandler, instructions);
}
instructions = comment;
continue;
}
} else if (type.equals("#data")) {
tag = "";
content = ((DataNode) node).getWholeData();
// Don't add content node for whitespace
if (StringUtils.isBlank(content)) {
continue;
}
} else // Text-only nodes: Trim the text and put it into the "content" field
{
if (type.equals("#text")) {
tag = "";
if (isDeployment) {
content = trimTrailingNewline(((TextNode) node).getWholeText());
if (content == null || content.length() == 0) {
continue;
}
} else {
content = trimTrailingNewline(((TextNode) node).text());
if (StringUtils.isBlank(content)) {
continue;
}
}
}
}
org.structr.web.entity.dom.DOMNode newNode = null;
// create node
if (StringUtils.isBlank(tag)) {
if (page != null) {
// create comment or content node
if (!StringUtils.isBlank(comment)) {
final PropertyKey<String> contentTypeKey = StructrApp.key(Content.class, "contentType");
newNode = (DOMNode) page.createComment(comment);
newNode.setProperty(contentTypeKey, "text/html");
} else {
newNode = (Content) page.createTextNode(content);
}
}
} else if ("structr:template".equals(tag)) {
final String src = node.attr("src");
if (src != null) {
DOMNode template = null;
if (DeployCommand.isUuid(src)) {
template = (DOMNode) StructrApp.getInstance().nodeQuery(NodeInterface.class).and(GraphObject.id, src).getFirst();
if (template == null) {
System.out.println("##################################### template with UUID " + src + " not found, this is a known bug");
}
} else if (DeployCommand.endsWithUuid(src)) {
final String uuid = src.substring(src.length() - 32);
template = (DOMNode) StructrApp.getInstance().nodeQuery(NodeInterface.class).and(GraphObject.id, uuid).getFirst();
if (template == null) {
System.out.println("##################################### template with UUID " + uuid + " not found, this is a known bug");
}
} else {
template = Importer.findSharedComponentByName(src);
if (template == null) {
template = Importer.findTemplateByName(src);
if (template == null) {
template = createNewTemplateNode(parent, node.childNodes());
isNewTemplateOrComponent = true;
}
}
}
if (template != null) {
newNode = template;
if (template.isSharedComponent()) {
newNode = (DOMNode) template.cloneNode(false);
newNode.setSharedComponent(template);
newNode.setOwnerDocument(page);
} else if (page != null) {
newNode.setOwnerDocument(page);
}
} else {
logger.warn("Unable to find template or shared component {}, template ignored!", src);
}
} else {
logger.warn("Invalid template definition, missing src attribute!");
}
} else if ("structr:component".equals(tag)) {
final String src = node.attr("src");
if (src != null) {
DOMNode component = null;
if (DeployCommand.isUuid(src)) {
component = app.nodeQuery(DOMNode.class).and(GraphObject.id, src).getFirst();
} else {
component = Importer.findSharedComponentByName(src);
}
if (component == null) {
component = createSharedComponent(node);
}
isNewTemplateOrComponent = true;
if (component != null) {
newNode = (DOMNode) component.cloneNode(false);
newNode.setSharedComponent(component);
newNode.setOwnerDocument(page);
} else {
logger.warn("Unable to find shared component {} - ignored!", src);
}
} else {
logger.warn("Invalid component definition, missing src attribute!");
}
} else {
if (page != null) {
newNode = (org.structr.web.entity.dom.DOMElement) page.createElement(tag, true);
}
if (newNode == null) {
final PropertyKey<Boolean> hideOnDetailKey = StructrApp.key(DOMNode.class, "hideOnDetail");
final PropertyKey<Boolean> hideOnIndexKey = StructrApp.key(DOMNode.class, "hideOnIndex");
final PropertyKey<String> tagKey = StructrApp.key(DOMElement.class, "tag");
// experimental: create DOM element with literal tag
newNode = (DOMElement) app.create(DOMElement.class, new NodeAttribute(tagKey, node.nodeName()), new NodeAttribute(hideOnDetailKey, false), new NodeAttribute(hideOnIndexKey, false));
if (newNode != null && page != null) {
newNode.doAdopt(page);
}
/* disabled / replaced by implementation above
newNode = createNewHTMLTemplateNodeForUnsupportedTag(parent, node);
isNewTemplateOrComponent = true;
*/
}
}
if (newNode != null) {
// save root element for later use
if (rootElement == null && !(newNode instanceof org.structr.web.entity.dom.Comment)) {
rootElement = newNode;
}
// set linkable
if (res != null && newNode instanceof LinkSource) {
((LinkSource) newNode).setLinkable(res);
}
// container for bulk setProperties()
final PropertyMap newNodeProperties = new PropertyMap();
final Class newNodeType = newNode.getClass();
newNodeProperties.put(AbstractNode.visibleToPublicUsers, publicVisible);
newNodeProperties.put(AbstractNode.visibleToAuthenticatedUsers, authVisible);
// "id" attribute: Put it into the "_html_id" field
if (StringUtils.isNotBlank(id)) {
newNodeProperties.put(StructrApp.key(DOMElement.class, "_html_id"), id);
}
if (StringUtils.isNotBlank(classString.toString())) {
newNodeProperties.put(StructrApp.key(DOMElement.class, "_html_class"), StringUtils.trim(classString.toString()));
}
for (Attribute nodeAttr : node.attributes()) {
final String key = nodeAttr.getKey();
if (!key.equals("text")) {
// Don't add text attribute as _html_text because the text is already contained in the 'content' attribute
final String value = nodeAttr.getValue();
if (key.startsWith("data-")) {
if (key.startsWith(DATA_META_PREFIX)) {
// convert data-structr-meta-* attributes to local camel case properties on the node,
int l = DATA_META_PREFIX.length();
String upperCaseKey = WordUtils.capitalize(key.substring(l), new char[] { '-' }).replaceAll("-", "");
String camelCaseKey = key.substring(l, l + 1).concat(upperCaseKey.substring(1));
if (value != null) {
// store value using actual input converter
final PropertyKey actualKey = StructrApp.getConfiguration().getPropertyKeyForJSONName(newNodeType, camelCaseKey, false);
if (actualKey != null) {
final PropertyConverter converter = actualKey.inputConverter(securityContext);
if (converter != null) {
final Object convertedValue = converter.convert(value);
newNodeProperties.put(actualKey, convertedValue);
} else {
newNodeProperties.put(actualKey, value);
}
} else {
logger.warn("Unknown meta property key {}, ignoring.", camelCaseKey);
}
}
} else if (key.startsWith(DATA_STRUCTR_PREFIX)) {
// don't convert data-structr-* attributes as they are internal
final PropertyKey propertyKey = StructrApp.getConfiguration().getPropertyKeyForJSONName(newNodeType, key);
if (propertyKey != null) {
final PropertyConverter inputConverter = propertyKey.inputConverter(securityContext);
if (value != null && inputConverter != null) {
newNodeProperties.put(propertyKey, propertyKey.inputConverter(securityContext).convert(value));
} else {
newNodeProperties.put(propertyKey, value);
}
}
} else {
// store data-* attributes in node
final PropertyKey propertyKey = new StringProperty(key);
if (value != null) {
newNodeProperties.put(propertyKey, value);
}
}
} else {
boolean notBlank = StringUtils.isNotBlank(value);
boolean isAnchor = notBlank && value.startsWith("#");
boolean isLocal = notBlank && !value.startsWith("http");
boolean isActive = notBlank && value.contains("${");
boolean isStructrLib = notBlank && value.startsWith("/structr/js/");
if ("link".equals(tag) && "href".equals(key) && isLocal && !isActive && !isDeployment) {
newNodeProperties.put(new StringProperty(PropertyView.Html.concat(key)), "${link.path}?${link.version}");
} else if (("href".equals(key) || "src".equals(key)) && isLocal && !isActive && !isAnchor && !isStructrLib && !isDeployment) {
newNodeProperties.put(new StringProperty(PropertyView.Html.concat(key)), "${link.path}");
} else {
newNodeProperties.put(new StringProperty(PropertyView.Html.concat(key)), value);
}
}
}
}
// bulk set properties on new node
newNode.setProperties(securityContext, newNodeProperties);
if ("script".equals(tag)) {
final PropertyKey<String> typeKey = StructrApp.key(Input.class, "_html_type");
final String contentType = newNode.getProperty(typeKey);
if (contentType == null) {
// Set default type of script tag to "text/javascript" to ensure inline JS gets imported properly
newNode.setProperty(typeKey, "text/javascript");
} else if (contentType.equals("application/schema+json")) {
for (final Node scriptContentNode : node.childNodes()) {
final String source = scriptContentNode.toString();
// Import schema JSON
SchemaJsonImporter.importSchemaJson(source);
}
} else if (contentType.equals("application/x-structr-script")) {
for (final Node scriptContentNode : node.childNodes()) {
final String source = scriptContentNode.toString();
try {
Actions.execute(securityContext, null, source, null);
} catch (UnlicensedException ex) {
ex.log(logger);
}
}
continue;
} else if (contentType.equals("application/x-structr-javascript")) {
for (final Node scriptContentNode : node.childNodes()) {
final String source = scriptContentNode.toString();
try {
Actions.execute(securityContext, null, source, null);
} catch (UnlicensedException ex) {
ex.log(logger);
}
}
continue;
}
} else if ("style".equals(tag)) {
final PropertyKey<String> typeKey = StructrApp.key(Input.class, "_html_type");
final String contentType = newNode.getProperty(typeKey);
if ("text/css".equals(contentType)) {
// parse content of style elements and add referenced files to list of resources to be downloaded
for (final Node styleContentNode : node.childNodes()) {
final String source = styleContentNode.toString();
try {
// Import referenced resources
processCss(source, originalUrl);
} catch (IOException ex) {
logger.warn("Couldn't process CSS source", ex);
}
}
}
}
if (instructions != null) {
if (instructions.contains("@structr:content") && !(newNode instanceof Content)) {
// unhandled instructions from previous iteration => empty content element
createEmptyContentNode(page, parent, commentHandler, instructions);
} else {
// apply instructions to new DOM element
if (commentHandler != null) {
commentHandler.handleComment(page, newNode, instructions, true);
}
}
instructions = null;
}
// allow parent to be null to prevent direct child relationship
if (parent != null) {
// special handling for <head> elements
if (newNode instanceof Head && parent instanceof Body) {
final org.w3c.dom.Node html = parent.getParentNode();
html.insertBefore(newNode, parent);
} else {
parent.appendChild(newNode);
}
}
// Step down and process child nodes except for newly created templates
if (!isNewTemplateOrComponent) {
createChildNodes(node, newNode, page, removeHashAttribute, depth + 1);
}
}
}
// reset instructions when leaving a level
if (instructions != null) {
createEmptyContentNode(page, parent, commentHandler, instructions);
instructions = null;
}
return rootElement;
}
Aggregations