Search in sources :

Example 1 with DOMParser

use of org.apache.xerces.parsers.DOMParser in project gocd by gocd.

the class NokogiriDomParser method initialize.

protected void initialize() {
    if (config == null) {
        if (xInclude) {
            config = new XIncludeParserConfiguration();
        } else {
            config = getXMLParserConfiguration();
        }
    }
    DTDConfiguration dtdConfig = new DTDConfiguration();
    dtd = new DOMParser(dtdConfig);
    config.setDTDHandler(dtdConfig);
    config.setDTDContentModelHandler(dtdConfig);
}
Also used : XIncludeParserConfiguration(org.apache.xerces.parsers.XIncludeParserConfiguration) DTDConfiguration(org.cyberneko.dtd.DTDConfiguration) DOMParser(org.apache.xerces.parsers.DOMParser)

Example 2 with DOMParser

use of org.apache.xerces.parsers.DOMParser in project ofbiz-framework by apache.

the class UtilXml method readXmlDocument.

public static Document readXmlDocument(InputStream is, boolean validate, String docDescription, boolean withPosition) throws SAXException, ParserConfigurationException, java.io.IOException {
    if (!withPosition) {
        return readXmlDocument(is, validate, docDescription);
    }
    if (is == null) {
        Debug.logWarning("[UtilXml.readXmlDocument] InputStream was null, doing nothing", module);
        return null;
    }
    long startTime = System.currentTimeMillis();
    Document document = null;
    DOMParser parser = new DOMParser() {

        private XMLLocator locator = null;

        private void setLineColumn(Node node) {
            if (locator == null) {
                throw new java.lang.IllegalStateException("XMLLocator is null");
            }
            if (node.getUserData("startLine") != null) {
                return;
            }
            node.setUserData("systemId", locator.getLiteralSystemId(), null);
            node.setUserData("startLine", locator.getLineNumber(), null);
            node.setUserData("startColumn", locator.getColumnNumber(), null);
        }

        private void setLineColumn() {
            try {
                Node node = (Node) getProperty("http://apache.org/xml/properties/dom/current-element-node");
                if (node != null) {
                    setLineColumn(node);
                }
            } catch (SAXException ex) {
                Debug.logWarning(ex, module);
            }
        }

        private void setLastChildLineColumn() {
            try {
                Node node = (Node) getProperty("http://apache.org/xml/properties/dom/current-element-node");
                if (node != null) {
                    setLineColumn(node.getLastChild());
                }
            } catch (SAXException ex) {
                Debug.logWarning(ex, module);
            }
        }

        @Override
        public void startGeneralEntity(String name, XMLResourceIdentifier identifier, String encoding, Augmentations augs) throws XNIException {
            super.startGeneralEntity(name, identifier, encoding, augs);
            setLineColumn();
        }

        @Override
        public void comment(XMLString text, Augmentations augs) throws XNIException {
            super.comment(text, augs);
            setLastChildLineColumn();
        }

        @Override
        public void processingInstruction(String target, XMLString data, Augmentations augs) throws XNIException {
            super.processingInstruction(target, data, augs);
            setLastChildLineColumn();
        }

        @Override
        public void startDocument(XMLLocator locator, String encoding, NamespaceContext namespaceContext, Augmentations augs) throws XNIException {
            super.startDocument(locator, encoding, namespaceContext, augs);
            this.locator = locator;
            setLineColumn();
        }

        @Override
        public void doctypeDecl(String rootElement, String publicId, String systemId, Augmentations augs) throws XNIException {
            super.doctypeDecl(rootElement, publicId, systemId, augs);
        }

        @Override
        public void startElement(QName elementQName, XMLAttributes attrList, Augmentations augs) throws XNIException {
            super.startElement(elementQName, attrList, augs);
            setLineColumn();
        }

        @Override
        public void characters(XMLString text, Augmentations augs) throws XNIException {
            super.characters(text, augs);
            setLastChildLineColumn();
        }

        @Override
        public void ignorableWhitespace(XMLString text, Augmentations augs) throws XNIException {
            super.ignorableWhitespace(text, augs);
            setLastChildLineColumn();
        }
    };
    parser.setFeature("http://xml.org/sax/features/namespaces", true);
    parser.setFeature("http://xml.org/sax/features/validation", validate);
    parser.setFeature("http://apache.org/xml/features/validation/schema", validate);
    parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false);
    // with a SchemaUrl, a URL object
    if (validate) {
        LocalResolver lr = new LocalResolver(new DefaultHandler());
        ErrorHandler eh = new LocalErrorHandler(docDescription, lr);
        parser.setEntityResolver(lr);
        parser.setErrorHandler(eh);
    }
    InputSource inputSource = new InputSource(is);
    inputSource.setSystemId(docDescription);
    parser.parse(inputSource);
    document = parser.getDocument();
    double totalSeconds = (System.currentTimeMillis() - startTime) / 1000.0;
    if (Debug.verboseOn()) {
        Debug.logVerbose("XML Read " + totalSeconds + "s: " + docDescription, module);
    }
    return document;
}
Also used : ErrorHandler(org.xml.sax.ErrorHandler) InputSource(org.xml.sax.InputSource) Augmentations(org.apache.xerces.xni.Augmentations) QName(org.apache.xerces.xni.QName) Node(org.w3c.dom.Node) XMLString(org.apache.xerces.xni.XMLString) Document(org.w3c.dom.Document) XMLLocator(org.apache.xerces.xni.XMLLocator) SAXException(org.xml.sax.SAXException) DefaultHandler(org.xml.sax.helpers.DefaultHandler) XMLResourceIdentifier(org.apache.xerces.xni.XMLResourceIdentifier) XMLAttributes(org.apache.xerces.xni.XMLAttributes) NamespaceContext(org.apache.xerces.xni.NamespaceContext) DOMParser(org.apache.xerces.parsers.DOMParser) XMLString(org.apache.xerces.xni.XMLString)

Example 3 with DOMParser

use of org.apache.xerces.parsers.DOMParser in project muikku by otavanopisto.

the class WorkspaceMaterialController method createContentNode.

private ContentNode createContentNode(WorkspaceNode rootMaterialNode, int level, boolean processHtml, boolean includeHidden) throws WorkspaceMaterialException {
    boolean viewRestricted = false;
    try {
        switch(rootMaterialNode.getType()) {
            case FOLDER:
                WorkspaceFolder workspaceFolder = (WorkspaceFolder) rootMaterialNode;
                viewRestricted = !sessionController.isLoggedIn() && workspaceFolder.getViewRestrict() == MaterialViewRestrict.LOGGED_IN;
                ContentNode folderContentNode = new ContentNode(workspaceFolder.getTitle(), "folder", rootMaterialNode.getId(), null, level, null, null, rootMaterialNode.getParent().getId(), rootMaterialNode.getHidden(), null, 0l, 0l, workspaceFolder.getPath(), null, null, workspaceFolder.getViewRestrict(), viewRestricted);
                List<WorkspaceNode> children = includeHidden ? workspaceNodeDAO.listByParentSortByOrderNumber(workspaceFolder) : workspaceNodeDAO.listByParentAndHiddenSortByOrderNumber(workspaceFolder, Boolean.FALSE);
                List<FlattenedWorkspaceNode> flattenedChildren;
                if (level >= FLATTENING_LEVEL) {
                    flattenedChildren = flattenWorkspaceNodes(children, level, includeHidden);
                } else {
                    flattenedChildren = new ArrayList<>();
                    for (WorkspaceNode node : children) {
                        flattenedChildren.add(new FlattenedWorkspaceNode(false, null, node, level, node.getParent().getId(), node.getHidden()));
                    }
                }
                for (FlattenedWorkspaceNode child : flattenedChildren) {
                    ContentNode contentNode;
                    if (child.isEmptyFolder) {
                        contentNode = new ContentNode(child.emptyFolderTitle, "folder", rootMaterialNode.getId(), null, child.level, null, null, child.parentId, child.hidden, null, 0l, 0l, child.node.getPath(), null, null, MaterialViewRestrict.NONE, false);
                    } else {
                        contentNode = createContentNode(child.node, child.level, processHtml, includeHidden);
                    }
                    folderContentNode.addChild(contentNode);
                }
                return folderContentNode;
            case MATERIAL:
                DOMParser parser = null;
                Transformer transformer = null;
                if (processHtml) {
                    parser = new DOMParser(new HTMLConfiguration());
                    parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
                    transformer = TransformerFactory.newInstance().newTransformer();
                    transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
                    transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
                    transformer.setOutputProperty(OutputKeys.METHOD, "xml");
                    transformer.setOutputProperty(OutputKeys.INDENT, "no");
                }
                WorkspaceMaterial workspaceMaterial = (WorkspaceMaterial) rootMaterialNode;
                Material material = materialController.findMaterialById(workspaceMaterial.getMaterialId());
                Long currentRevision = material instanceof HtmlMaterial ? htmlMaterialController.lastHtmlMaterialRevision((HtmlMaterial) material) : 0l;
                Long publishedRevision = material instanceof HtmlMaterial ? ((HtmlMaterial) material).getRevisionNumber() : 0l;
                List<String> producerNames = null;
                String html;
                List<MaterialProducer> producers = materialController.listMaterialProducers(material);
                if ((producers != null) && !producers.isEmpty()) {
                    producerNames = new ArrayList<>();
                    for (MaterialProducer producer : producers) {
                        producerNames.add(StringUtils.replace(StringEscapeUtils.escapeHtml4(producer.getName()), ",", "&#44;"));
                    }
                }
                viewRestricted = !sessionController.isLoggedIn() && material.getViewRestrict() == MaterialViewRestrict.LOGGED_IN;
                if (!viewRestricted) {
                    html = processHtml ? getMaterialHtml(material, parser, transformer) : null;
                } else {
                    html = String.format("<p class=\"content-view-restricted-message\">%s</p>", localeController.getText(sessionController.getLocale(), "plugin.workspace.materialViewRestricted"));
                }
                return new ContentNode(workspaceMaterial.getTitle(), material.getType(), rootMaterialNode.getId(), material.getId(), level, workspaceMaterial.getAssignmentType(), workspaceMaterial.getCorrectAnswers(), workspaceMaterial.getParent().getId(), workspaceMaterial.getHidden(), html, currentRevision, publishedRevision, workspaceMaterial.getPath(), material.getLicense(), StringUtils.join(producerNames, ','), material.getViewRestrict(), viewRestricted);
            default:
                return null;
        }
    } catch (SAXNotRecognizedException | SAXNotSupportedException | TransformerConfigurationException e) {
        throw new WorkspaceMaterialException(e);
    }
}
Also used : Transformer(javax.xml.transform.Transformer) TransformerConfigurationException(javax.xml.transform.TransformerConfigurationException) HTMLConfiguration(org.cyberneko.html.HTMLConfiguration) Material(fi.otavanopisto.muikku.plugins.material.model.Material) HtmlMaterial(fi.otavanopisto.muikku.plugins.material.model.HtmlMaterial) WorkspaceMaterial(fi.otavanopisto.muikku.plugins.workspace.model.WorkspaceMaterial) SAXNotRecognizedException(org.xml.sax.SAXNotRecognizedException) MaterialProducer(fi.otavanopisto.muikku.plugins.material.model.MaterialProducer) WorkspaceNode(fi.otavanopisto.muikku.plugins.workspace.model.WorkspaceNode) WorkspaceMaterial(fi.otavanopisto.muikku.plugins.workspace.model.WorkspaceMaterial) WorkspaceFolder(fi.otavanopisto.muikku.plugins.workspace.model.WorkspaceFolder) SAXNotSupportedException(org.xml.sax.SAXNotSupportedException) DOMParser(org.apache.xerces.parsers.DOMParser) HtmlMaterial(fi.otavanopisto.muikku.plugins.material.model.HtmlMaterial)

Example 4 with DOMParser

use of org.apache.xerces.parsers.DOMParser in project muikku by otavanopisto.

the class DeusNexMachinaController method postProcessHtml.

private void postProcessHtml(HtmlMaterial material) throws ParserConfigurationException, SAXException, IOException, XPathExpressionException, TransformerException {
    StringReader htmlReader = new StringReader(material.getHtml());
    DOMParser parser = new DOMParser(new HTMLConfiguration());
    parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
    InputSource inputSource = new InputSource(htmlReader);
    parser.parse(inputSource);
    org.w3c.dom.Document domDocument = parser.getDocument();
    boolean modified = false;
    // Embedded YouTube clips; strip protocol
    List<Element> elements = DeusNexXmlUtils.getElementsByXPath(domDocument.getDocumentElement(), "//iframe");
    if (!elements.isEmpty()) {
        for (Element element : elements) {
            String src = element.getAttribute("src");
            if (src != null && src.startsWith("http://www.youtube.com/")) {
                element.setAttribute("src", src.substring(5));
                modified = true;
            }
        }
    }
    // Embedded documents; add data attributes and determine correct material title
    elements = DeusNexXmlUtils.getElementsByXPath(domDocument.getDocumentElement(), "//iframe[@data-type=\"embedded-document\"]");
    if (!elements.isEmpty()) {
        modified = true;
        for (Element element : elements) {
            Integer resourceNo = Integer.valueOf(element.getAttribute("data-resource-no"));
            WorkspaceMaterial workspaceMaterial = workspaceMaterialController.findWorkspaceMaterialById(getResourceWorkspaceNodeId(resourceNo));
            HtmlMaterial htmlMaterial = htmlMaterialController.findHtmlMaterialById(workspaceMaterial.getMaterialId());
            // If a header precedes an embedded document, use its text as the embedded
            // document's title and remove it from the parent document altogether
            Node possibleHeaderNode = getPreviousSiblingElement(element);
            if (isHeader(possibleHeaderNode)) {
                String headerText = StringUtils.trim(possibleHeaderNode.getTextContent());
                if (!StringUtils.isBlank(headerText)) {
                    htmlMaterialController.updateHtmlMaterialTitle(htmlMaterial, headerText);
                    possibleHeaderNode.getParentNode().removeChild(possibleHeaderNode);
                }
            }
            element.setAttribute("data-material-id", String.valueOf(htmlMaterial.getId()));
            element.setAttribute("data-material-type", htmlMaterial.getType());
            element.setAttribute("data-workspace-material-id", String.valueOf(workspaceMaterial.getId()));
        }
    }
    // Update to post-processed version, if applicable
    if (modified) {
        StringWriter writer = new StringWriter();
        TransformerFactory transformerFactory = TransformerFactory.newInstance();
        Transformer transformer = transformerFactory.newTransformer();
        transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        transformer.setOutputProperty(OutputKeys.METHOD, "xml");
        transformer.setOutputProperty(OutputKeys.INDENT, "no");
        transformer.transform(new DOMSource(domDocument), new StreamResult(writer));
        htmlMaterialController.updateHtmlMaterialHtml(material, writer.getBuffer().toString());
    }
}
Also used : InputSource(org.xml.sax.InputSource) DOMSource(javax.xml.transform.dom.DOMSource) TransformerFactory(javax.xml.transform.TransformerFactory) Transformer(javax.xml.transform.Transformer) StreamResult(javax.xml.transform.stream.StreamResult) HTMLConfiguration(org.cyberneko.html.HTMLConfiguration) Element(org.w3c.dom.Element) WorkspaceNode(fi.otavanopisto.muikku.plugins.workspace.model.WorkspaceNode) Node(org.w3c.dom.Node) WorkspaceMaterial(fi.otavanopisto.muikku.plugins.workspace.model.WorkspaceMaterial) StringWriter(java.io.StringWriter) StringReader(java.io.StringReader) DOMParser(org.apache.xerces.parsers.DOMParser) HtmlMaterial(fi.otavanopisto.muikku.plugins.material.model.HtmlMaterial)

Example 5 with DOMParser

use of org.apache.xerces.parsers.DOMParser in project muikku by otavanopisto.

the class HtmlMaterialCleaner method cleanMaterial.

public void cleanMaterial(HtmlMaterial htmlMaterial, WorkspaceMaterial ownerMaterial) {
    Long maxRevision = getMaterialRevision(htmlMaterial);
    try {
        // Document
        String html = htmlMaterialController.getRevisionHtml(htmlMaterial, maxRevision);
        DOMParser parser = new DOMParser(new HTMLConfiguration());
        parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
        InputSource inputSource = new InputSource(new StringReader(html));
        parser.parse(inputSource);
        Document document = parser.getDocument();
        // Tasks
        Iterator<HtmlMaterialCleanerTask> taskIterator = analyzerTasks.iterator();
        List<HtmlMaterialCleanerTask> cleanerTasks = new ArrayList<HtmlMaterialCleanerTask>();
        while (taskIterator.hasNext()) {
            cleanerTasks.add(taskIterator.next());
        }
        Collections.sort(cleanerTasks, new Comparator<HtmlMaterialCleanerTask>() {

            @Override
            public int compare(HtmlMaterialCleanerTask o1, HtmlMaterialCleanerTask o2) {
                return o1.getPriority().compareTo(o2.getPriority());
            }
        });
        String newHtml = null;
        for (HtmlMaterialCleanerTask cleanerTask : cleanerTasks) {
            if (cleanerTask.process(document, ownerMaterial)) {
                newHtml = DeusNexXmlUtils.serializeElement(document.getDocumentElement(), true, false, "html");
                patch(htmlMaterial, newHtml);
            }
        }
    } catch (Exception e) {
        logger.log(Level.SEVERE, "Failed to clean material " + htmlMaterial.getId(), e);
    }
}
Also used : InputSource(org.xml.sax.InputSource) HTMLConfiguration(org.cyberneko.html.HTMLConfiguration) ArrayList(java.util.ArrayList) Document(org.w3c.dom.Document) CoOpsUsageException(fi.foyt.coops.CoOpsUsageException) CoOpsInternalErrorException(fi.foyt.coops.CoOpsInternalErrorException) StringReader(java.io.StringReader) DOMParser(org.apache.xerces.parsers.DOMParser)

Aggregations

DOMParser (org.apache.xerces.parsers.DOMParser)24 InputSource (org.xml.sax.InputSource)13 Node (org.w3c.dom.Node)9 Document (org.w3c.dom.Document)8 StringReader (java.io.StringReader)5 Element (org.w3c.dom.Element)5 SAXException (org.xml.sax.SAXException)5 IOException (java.io.IOException)4 HTMLConfiguration (org.cyberneko.html.HTMLConfiguration)4 NodeList (org.w3c.dom.NodeList)4 ArrayList (java.util.ArrayList)3 HtmlMaterial (fi.otavanopisto.muikku.plugins.material.model.HtmlMaterial)2 WorkspaceMaterial (fi.otavanopisto.muikku.plugins.workspace.model.WorkspaceMaterial)2 WorkspaceNode (fi.otavanopisto.muikku.plugins.workspace.model.WorkspaceNode)2 FileNotFoundException (java.io.FileNotFoundException)2 Transformer (javax.xml.transform.Transformer)2 XIncludeParserConfiguration (org.apache.xerces.parsers.XIncludeParserConfiguration)2 DTDConfiguration (org.cyberneko.dtd.DTDConfiguration)2 AcsFileFinder (alma.acs.makesupport.AcsFileFinder)1 ConfigurationException (com.axway.ats.config.exceptions.ConfigurationException)1