Search in sources :

Example 71 with Node

use of org.jsoup.nodes.Node in project Lightning-Browser by anthonycr.

the class OutputFormatter method appendTextSkipHidden.

private void appendTextSkipHidden(@NonNull Element e, @NonNull StringBuilder accum, int indent) {
    for (Node child : e.childNodes()) {
        if (unlikely(child)) {
            continue;
        }
        if (child instanceof TextNode) {
            TextNode textNode = (TextNode) child;
            String txt = textNode.text();
            accum.append(txt);
        } else if (child instanceof Element) {
            Element element = (Element) child;
            if (accum.length() > 0 && element.isBlock() && !lastCharIsWhitespace(accum))
                accum.append(' ');
            else if (element.tagName().equals("br"))
                accum.append(' ');
            appendTextSkipHidden(element, accum, indent + 1);
        }
    }
}
Also used : Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode)

Example 72 with Node

use of org.jsoup.nodes.Node in project onebusaway-application-modules by camsys.

the class GtfsFullValidationTaskJob method checkOutputForErrors.

/**
 * This method will parse the validation output HTML file checking to see
 * if any errors were found during the validation.  If any were, a summary csv
 * file is created listing the errors.
 *
 * @param agencyId - the agency id of the HTML file being checked
 * @param outputFile - the name of the HTML file to be checked.
 * @throws IOException
 */
private void checkOutputForErrors(String agencyId, String outputFile) throws IOException {
    File validationHtmlFile = new File(outputFile);
    Document doc = Jsoup.parse(validationHtmlFile, "UTF-8");
    Elements select = doc.select(".issueHeader:containsOwn(Errors:) ~ ul");
    if (select == null)
        return;
    Element first = select.first();
    if (first == null)
        return;
    Elements validationErrors = first.select("li");
    if (validationErrors != null && validationErrors.hasText()) {
        for (Node parentNode : validationErrors) {
            // for each <li>
            String errorMsgText = "";
            String errorDetailText = "";
            for (Node node : parentNode.childNodes()) {
                if (node instanceof TextNode) {
                    errorMsgText += ((TextNode) node).text();
                } else if (node instanceof Element) {
                    String tagName = ((Element) node).tagName();
                    if (tagName.equals("br")) {
                        errorMsgText += " ";
                    } else if (tagName.equals("div")) {
                        errorMsgText += parseDivData(node);
                    } else if (tagName.equals("table")) {
                        errorDetailText = parseTableData(node);
                    } else {
                        errorMsgText += ((Element) node).text();
                    }
                }
            }
            result.addError(errorMsgText + "," + errorDetailText);
        }
    }
}
Also used : Element(org.jsoup.nodes.Element) TextNode(org.jsoup.nodes.TextNode) Node(org.jsoup.nodes.Node) TextNode(org.jsoup.nodes.TextNode) Document(org.jsoup.nodes.Document) Elements(org.jsoup.select.Elements) File(java.io.File)

Example 73 with Node

use of org.jsoup.nodes.Node in project structr by structr.

the class MicroformatParser method unwrap.

private void unwrap(final Element element) {
    final Set<Element> elementsToUnwrap = new LinkedHashSet<>();
    element.traverse(new NodeVisitor() {

        @Override
        public void head(Node node, int depth) {
            if (node instanceof Element) {
                final Element element = (Element) node;
                if (element.isBlock()) {
                    final Set<String> classes = element.classNames();
                    removeEmpty(classes);
                    if (classes.isEmpty()) {
                        elementsToUnwrap.add(element);
                    }
                }
            }
        }

        @Override
        public void tail(Node node, int depth) {
        }
    });
    for (final Element unwrap : elementsToUnwrap) {
        unwrap.unwrap();
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) LinkedHashSet(java.util.LinkedHashSet) Element(org.jsoup.nodes.Element) Node(org.jsoup.nodes.Node) NodeVisitor(org.jsoup.select.NodeVisitor)

Example 74 with Node

use of org.jsoup.nodes.Node in project structr by structr.

the class Importer method createChildNodes.

private DOMNode createChildNodes(final Node startNode, final DOMNode parent, final Page page, final boolean removeHashAttribute, final int depth) throws FrameworkException {
    DOMNode rootElement = null;
    Linkable res = null;
    String instructions = null;
    final List<Node> children = startNode.childNodes();
    for (Node node : children) {
        String tag = node.nodeName();
        // clean tag, remove non-word characters except : and #
        if (tag != null) {
            tag = tag.replaceAll("[^a-zA-Z0-9#:.-_]+", "");
        }
        final StringBuilder classString = new StringBuilder();
        final String type = CaseHelper.toUpperCamelCase(tag);
        String comment = null;
        String content = null;
        String id = null;
        boolean isNewTemplateOrComponent = false;
        if (ignoreElementNames.contains(type)) {
            continue;
        }
        if (node instanceof Element) {
            final Element el = ((Element) node);
            final Set<String> classes = el.classNames();
            for (String cls : classes) {
                classString.append(cls).append(" ");
            }
            id = el.id();
            // do not download files when called from DeployCommand!
            if (!isDeployment) {
                String downloadAddressAttr = srcElements.contains(tag) ? "src" : hrefElements.contains(tag) ? "href" : null;
                if (downloadAddressAttr != null && StringUtils.isNotBlank(node.attr(downloadAddressAttr))) {
                    String downloadAddress = node.attr(downloadAddressAttr);
                    res = downloadFile(downloadAddress, originalUrl);
                } else {
                    res = null;
                }
            }
            if (removeHashAttribute) {
                // Remove data-structr-hash attribute
                node.removeAttr("data-structr-hash");
            }
        }
        // Data and comment nodes: Trim the text and put it into the "content" field without changes
        if (type.equals("#comment")) {
            comment = ((Comment) node).getData();
            tag = "";
            // Don't add content node for whitespace
            if (StringUtils.isBlank(comment)) {
                continue;
            }
            // store for later use
            commentSource.append(comment).append("\n");
            // check if comment contains instructions
            if (commentHandler != null && commentHandler.containsInstructions(comment)) {
                if (instructions != null) {
                    // unhandled instructions from previous iteration => empty content element
                    createEmptyContentNode(page, parent, commentHandler, instructions);
                }
                instructions = comment;
                continue;
            }
        } else if (type.equals("#data")) {
            tag = "";
            content = ((DataNode) node).getWholeData();
            // Don't add content node for whitespace
            if (StringUtils.isBlank(content)) {
                continue;
            }
        } else // Text-only nodes: Trim the text and put it into the "content" field
        {
            if (type.equals("#text")) {
                tag = "";
                if (isDeployment) {
                    content = trimTrailingNewline(((TextNode) node).getWholeText());
                    if (content == null || content.length() == 0) {
                        continue;
                    }
                } else {
                    content = trimTrailingNewline(((TextNode) node).text());
                    if (StringUtils.isBlank(content)) {
                        continue;
                    }
                }
            }
        }
        org.structr.web.entity.dom.DOMNode newNode = null;
        // create node
        if (StringUtils.isBlank(tag)) {
            if (page != null) {
                // create comment or content node
                if (!StringUtils.isBlank(comment)) {
                    final PropertyKey<String> contentTypeKey = StructrApp.key(Content.class, "contentType");
                    newNode = (DOMNode) page.createComment(comment);
                    newNode.setProperty(contentTypeKey, "text/html");
                } else {
                    newNode = (Content) page.createTextNode(content);
                }
            }
        } else if ("structr:template".equals(tag)) {
            final String src = node.attr("src");
            if (src != null) {
                DOMNode template = null;
                if (DeployCommand.isUuid(src)) {
                    template = (DOMNode) StructrApp.getInstance().nodeQuery(NodeInterface.class).and(GraphObject.id, src).getFirst();
                    if (template == null) {
                        System.out.println("##################################### template with UUID " + src + " not found, this is a known bug");
                    }
                } else if (DeployCommand.endsWithUuid(src)) {
                    final String uuid = src.substring(src.length() - 32);
                    template = (DOMNode) StructrApp.getInstance().nodeQuery(NodeInterface.class).and(GraphObject.id, uuid).getFirst();
                    if (template == null) {
                        System.out.println("##################################### template with UUID " + uuid + " not found, this is a known bug");
                    }
                } else {
                    template = Importer.findSharedComponentByName(src);
                    if (template == null) {
                        template = Importer.findTemplateByName(src);
                        if (template == null) {
                            template = createNewTemplateNode(parent, node.childNodes());
                            isNewTemplateOrComponent = true;
                        }
                    }
                }
                if (template != null) {
                    newNode = template;
                    if (template.isSharedComponent()) {
                        newNode = (DOMNode) template.cloneNode(false);
                        newNode.setSharedComponent(template);
                        newNode.setOwnerDocument(page);
                    } else if (page != null) {
                        newNode.setOwnerDocument(page);
                    }
                } else {
                    logger.warn("Unable to find template or shared component {}, template ignored!", src);
                }
            } else {
                logger.warn("Invalid template definition, missing src attribute!");
            }
        } else if ("structr:component".equals(tag)) {
            final String src = node.attr("src");
            if (src != null) {
                DOMNode component = null;
                if (DeployCommand.isUuid(src)) {
                    component = app.nodeQuery(DOMNode.class).and(GraphObject.id, src).getFirst();
                } else {
                    component = Importer.findSharedComponentByName(src);
                }
                if (component == null) {
                    component = createSharedComponent(node);
                }
                isNewTemplateOrComponent = true;
                if (component != null) {
                    newNode = (DOMNode) component.cloneNode(false);
                    newNode.setSharedComponent(component);
                    newNode.setOwnerDocument(page);
                } else {
                    logger.warn("Unable to find shared component {} - ignored!", src);
                }
            } else {
                logger.warn("Invalid component definition, missing src attribute!");
            }
        } else {
            if (page != null) {
                newNode = (org.structr.web.entity.dom.DOMElement) page.createElement(tag, true);
            }
            if (newNode == null) {
                final PropertyKey<Boolean> hideOnDetailKey = StructrApp.key(DOMNode.class, "hideOnDetail");
                final PropertyKey<Boolean> hideOnIndexKey = StructrApp.key(DOMNode.class, "hideOnIndex");
                final PropertyKey<String> tagKey = StructrApp.key(DOMElement.class, "tag");
                // experimental: create DOM element with literal tag
                newNode = (DOMElement) app.create(DOMElement.class, new NodeAttribute(tagKey, node.nodeName()), new NodeAttribute(hideOnDetailKey, false), new NodeAttribute(hideOnIndexKey, false));
                if (newNode != null && page != null) {
                    newNode.doAdopt(page);
                }
            /* disabled / replaced by implementation above
					newNode = createNewHTMLTemplateNodeForUnsupportedTag(parent, node);
					isNewTemplateOrComponent = true;
					*/
            }
        }
        if (newNode != null) {
            // save root element for later use
            if (rootElement == null && !(newNode instanceof org.structr.web.entity.dom.Comment)) {
                rootElement = newNode;
            }
            // set linkable
            if (res != null && newNode instanceof LinkSource) {
                ((LinkSource) newNode).setLinkable(res);
            }
            // container for bulk setProperties()
            final PropertyMap newNodeProperties = new PropertyMap();
            final Class newNodeType = newNode.getClass();
            newNodeProperties.put(AbstractNode.visibleToPublicUsers, publicVisible);
            newNodeProperties.put(AbstractNode.visibleToAuthenticatedUsers, authVisible);
            // "id" attribute: Put it into the "_html_id" field
            if (StringUtils.isNotBlank(id)) {
                newNodeProperties.put(StructrApp.key(DOMElement.class, "_html_id"), id);
            }
            if (StringUtils.isNotBlank(classString.toString())) {
                newNodeProperties.put(StructrApp.key(DOMElement.class, "_html_class"), StringUtils.trim(classString.toString()));
            }
            for (Attribute nodeAttr : node.attributes()) {
                final String key = nodeAttr.getKey();
                if (!key.equals("text")) {
                    // Don't add text attribute as _html_text because the text is already contained in the 'content' attribute
                    final String value = nodeAttr.getValue();
                    if (key.startsWith("data-")) {
                        if (key.startsWith(DATA_META_PREFIX)) {
                            // convert data-structr-meta-* attributes to local camel case properties on the node,
                            int l = DATA_META_PREFIX.length();
                            String upperCaseKey = WordUtils.capitalize(key.substring(l), new char[] { '-' }).replaceAll("-", "");
                            String camelCaseKey = key.substring(l, l + 1).concat(upperCaseKey.substring(1));
                            if (value != null) {
                                // store value using actual input converter
                                final PropertyKey actualKey = StructrApp.getConfiguration().getPropertyKeyForJSONName(newNodeType, camelCaseKey, false);
                                if (actualKey != null) {
                                    final PropertyConverter converter = actualKey.inputConverter(securityContext);
                                    if (converter != null) {
                                        final Object convertedValue = converter.convert(value);
                                        newNodeProperties.put(actualKey, convertedValue);
                                    } else {
                                        newNodeProperties.put(actualKey, value);
                                    }
                                } else {
                                    logger.warn("Unknown meta property key {}, ignoring.", camelCaseKey);
                                }
                            }
                        } else if (key.startsWith(DATA_STRUCTR_PREFIX)) {
                            // don't convert data-structr-* attributes as they are internal
                            final PropertyKey propertyKey = StructrApp.getConfiguration().getPropertyKeyForJSONName(newNodeType, key);
                            if (propertyKey != null) {
                                final PropertyConverter inputConverter = propertyKey.inputConverter(securityContext);
                                if (value != null && inputConverter != null) {
                                    newNodeProperties.put(propertyKey, propertyKey.inputConverter(securityContext).convert(value));
                                } else {
                                    newNodeProperties.put(propertyKey, value);
                                }
                            }
                        } else {
                            // store data-* attributes in node
                            final PropertyKey propertyKey = new StringProperty(key);
                            if (value != null) {
                                newNodeProperties.put(propertyKey, value);
                            }
                        }
                    } else {
                        boolean notBlank = StringUtils.isNotBlank(value);
                        boolean isAnchor = notBlank && value.startsWith("#");
                        boolean isLocal = notBlank && !value.startsWith("http");
                        boolean isActive = notBlank && value.contains("${");
                        boolean isStructrLib = notBlank && value.startsWith("/structr/js/");
                        if ("link".equals(tag) && "href".equals(key) && isLocal && !isActive && !isDeployment) {
                            newNodeProperties.put(new StringProperty(PropertyView.Html.concat(key)), "${link.path}?${link.version}");
                        } else if (("href".equals(key) || "src".equals(key)) && isLocal && !isActive && !isAnchor && !isStructrLib && !isDeployment) {
                            newNodeProperties.put(new StringProperty(PropertyView.Html.concat(key)), "${link.path}");
                        } else {
                            newNodeProperties.put(new StringProperty(PropertyView.Html.concat(key)), value);
                        }
                    }
                }
            }
            // bulk set properties on new node
            newNode.setProperties(securityContext, newNodeProperties);
            if ("script".equals(tag)) {
                final PropertyKey<String> typeKey = StructrApp.key(Input.class, "_html_type");
                final String contentType = newNode.getProperty(typeKey);
                if (contentType == null) {
                    // Set default type of script tag to "text/javascript" to ensure inline JS gets imported properly
                    newNode.setProperty(typeKey, "text/javascript");
                } else if (contentType.equals("application/schema+json")) {
                    for (final Node scriptContentNode : node.childNodes()) {
                        final String source = scriptContentNode.toString();
                        // Import schema JSON
                        SchemaJsonImporter.importSchemaJson(source);
                    }
                } else if (contentType.equals("application/x-structr-script")) {
                    for (final Node scriptContentNode : node.childNodes()) {
                        final String source = scriptContentNode.toString();
                        try {
                            Actions.execute(securityContext, null, source, null);
                        } catch (UnlicensedException ex) {
                            ex.log(logger);
                        }
                    }
                    continue;
                } else if (contentType.equals("application/x-structr-javascript")) {
                    for (final Node scriptContentNode : node.childNodes()) {
                        final String source = scriptContentNode.toString();
                        try {
                            Actions.execute(securityContext, null, source, null);
                        } catch (UnlicensedException ex) {
                            ex.log(logger);
                        }
                    }
                    continue;
                }
            } else if ("style".equals(tag)) {
                final PropertyKey<String> typeKey = StructrApp.key(Input.class, "_html_type");
                final String contentType = newNode.getProperty(typeKey);
                if ("text/css".equals(contentType)) {
                    // parse content of style elements and add referenced files to list of resources to be downloaded
                    for (final Node styleContentNode : node.childNodes()) {
                        final String source = styleContentNode.toString();
                        try {
                            // Import referenced resources
                            processCss(source, originalUrl);
                        } catch (IOException ex) {
                            logger.warn("Couldn't process CSS source", ex);
                        }
                    }
                }
            }
            if (instructions != null) {
                if (instructions.contains("@structr:content") && !(newNode instanceof Content)) {
                    // unhandled instructions from previous iteration => empty content element
                    createEmptyContentNode(page, parent, commentHandler, instructions);
                } else {
                    // apply instructions to new DOM element
                    if (commentHandler != null) {
                        commentHandler.handleComment(page, newNode, instructions, true);
                    }
                }
                instructions = null;
            }
            // allow parent to be null to prevent direct child relationship
            if (parent != null) {
                // special handling for <head> elements
                if (newNode instanceof Head && parent instanceof Body) {
                    final org.w3c.dom.Node html = parent.getParentNode();
                    html.insertBefore(newNode, parent);
                } else {
                    parent.appendChild(newNode);
                }
            }
            // Step down and process child nodes except for newly created templates
            if (!isNewTemplateOrComponent) {
                createChildNodes(node, newNode, page, removeHashAttribute, depth + 1);
            }
        }
    }
    // reset instructions when leaving a level
    if (instructions != null) {
        createEmptyContentNode(page, parent, commentHandler, instructions);
        instructions = null;
    }
    return rootElement;
}
Also used : LinkSource(org.structr.web.entity.LinkSource) NodeAttribute(org.structr.core.graph.NodeAttribute) Attribute(org.jsoup.nodes.Attribute) Node(org.jsoup.nodes.Node) DOMNode(org.structr.web.entity.dom.DOMNode) DataNode(org.jsoup.nodes.DataNode) TextNode(org.jsoup.nodes.TextNode) AbstractNode(org.structr.core.entity.AbstractNode) DOMElement(org.structr.web.entity.dom.DOMElement) Element(org.jsoup.nodes.Element) StringProperty(org.structr.core.property.StringProperty) DOMElement(org.structr.web.entity.dom.DOMElement) Input(org.structr.web.entity.html.Input) UnlicensedException(org.structr.common.error.UnlicensedException) DataNode(org.jsoup.nodes.DataNode) PropertyConverter(org.structr.core.converter.PropertyConverter) DOMNode(org.structr.web.entity.dom.DOMNode) Body(org.structr.web.entity.html.Body) NodeInterface(org.structr.core.graph.NodeInterface) NodeAttribute(org.structr.core.graph.NodeAttribute) Head(org.structr.web.entity.html.Head) IOException(java.io.IOException) PropertyMap(org.structr.core.property.PropertyMap) Content(org.structr.web.entity.dom.Content) DOMNode(org.structr.web.entity.dom.DOMNode) Linkable(org.structr.web.entity.Linkable) GraphObject(org.structr.core.GraphObject) PropertyKey(org.structr.core.property.PropertyKey)

Example 75 with Node

use of org.jsoup.nodes.Node in project structr by structr.

the class Importer method parse.

/**
 * Parse the code previously read by {@link Importer#readPage()} and treat it as page fragment.
 *
 * @param fragment
 * @return
 * @throws FrameworkException
 */
public boolean parse(final boolean fragment) throws FrameworkException {
    init();
    if (StringUtils.isNotBlank(code)) {
        if (!isDeployment) {
            logger.info("##### Start parsing code for page {} #####", new Object[] { name });
        } else {
            // a trailing slash to all void/self-closing tags so the XML parser can parse it correctly
            code = code.replaceAll("<(area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)([^>]*)>", "<$1$2/>");
        }
        if (fragment) {
            if (isDeployment) {
                final List<Node> nodeList = Parser.parseXmlFragment(code, "");
                parsedDocument = Document.createShell("");
                final Element body = parsedDocument.body();
                final Node[] nodes = nodeList.toArray(new Node[nodeList.size()]);
                for (int i = nodes.length - 1; i > 0; i--) {
                    nodes[i].remove();
                }
                for (Node node : nodes) {
                    body.appendChild(node);
                }
            } else {
                parsedDocument = Jsoup.parseBodyFragment(code);
            }
        } else {
            if (isDeployment) {
                parsedDocument = Jsoup.parse(code, "", Parser.xmlParser());
            } else {
                parsedDocument = Jsoup.parse(code);
            }
        }
    } else {
        if (!isDeployment) {
            logger.info("##### Start fetching {} for page {} #####", new Object[] { address, name });
        }
        code = HttpHelper.get(address);
        parsedDocument = Jsoup.parse(code);
    }
    return true;
}
Also used : Node(org.jsoup.nodes.Node) DOMNode(org.structr.web.entity.dom.DOMNode) DataNode(org.jsoup.nodes.DataNode) TextNode(org.jsoup.nodes.TextNode) AbstractNode(org.structr.core.entity.AbstractNode) DOMElement(org.structr.web.entity.dom.DOMElement) Element(org.jsoup.nodes.Element)

Aggregations

Node (org.jsoup.nodes.Node)75 TextNode (org.jsoup.nodes.TextNode)52 Element (org.jsoup.nodes.Element)48 Document (org.jsoup.nodes.Document)29 ArrayList (java.util.ArrayList)19 Elements (org.jsoup.select.Elements)13 Test (org.junit.jupiter.api.Test)8 IOException (java.io.IOException)7 Copy (de.geeksfactory.opacclient.objects.Copy)5 DetailedItem (de.geeksfactory.opacclient.objects.DetailedItem)5 HashMap (java.util.HashMap)5 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)5 JSONException (org.json.JSONException)5 NotReachableException (de.geeksfactory.opacclient.networking.NotReachableException)4 Detail (de.geeksfactory.opacclient.objects.Detail)4 UnsupportedEncodingException (java.io.UnsupportedEncodingException)4 URI (java.net.URI)4 Matcher (java.util.regex.Matcher)4 NameValuePair (org.apache.http.NameValuePair)4 BasicNameValuePair (org.apache.http.message.BasicNameValuePair)4