Search in sources :

Example 1 with XMLValue

use of org.knime.core.data.xml.XMLValue in project knime-core by knime.

the class XMLXpathCellReader method readXML.

/**
 * {@inheritDoc}
 */
@Override
public XMLValue<Document> readXML() throws IOException {
    if (!m_xpathMatcher.nodeMatches()) {
        return null;
    }
    try {
        while (m_parser.hasNext()) {
            switch(m_parser.getEventType()) {
                case XMLStreamConstants.START_ELEMENT:
                    updateBasePath();
                    updateXmlSpaceDefinition();
                    updateXmlLangDefinition();
                    pushNamespaceContext();
                    for (int i = 0; i < m_docs.size(); i++) {
                        Element element = createElement(m_docs.get(i));
                        m_currNodes.get(i).appendChild(element);
                        m_currNodes.set(i, element);
                    }
                    boolean match = m_xpathMatcher.startElement(m_parser.getName());
                    if (match) {
                        Document doc = m_builder.newDocument();
                        Element element = createElement(doc);
                        m_namespaceStack.stream().flatMap(e -> e.stream()).forEach(p -> element.setAttributeNS(XMLConstants.XMLNS_ATTRIBUTE_NS_URI, p.getFirst(), p.getSecond()));
                        // (see: http://www.w3.org/XML/1998/namespace)
                        if (!m_base.isEmpty() && null != m_base.get(0)) {
                            element.setAttributeNS(XMLConstants.XML_NS_URI, "xml:base", m_base.get(0));
                        }
                        if (!m_space.isEmpty() && null != m_space.get(0)) {
                            element.setAttributeNS(XMLConstants.XML_NS_URI, "xml:space", m_space.get(0));
                        }
                        if (!m_lang.isEmpty() && null != m_lang.get(0)) {
                            element.setAttributeNS(XMLConstants.XML_NS_URI, "xml:lang", m_lang.get(0));
                        }
                        doc.appendChild(element);
                        m_docs.add(doc);
                        m_currNodes.add(element);
                    }
                    break;
                case XMLStreamConstants.END_ELEMENT:
                    if (!m_reentrent) {
                        m_xpathMatcher.endElement();
                        if (!m_base.isEmpty()) {
                            m_base.remove(0);
                        }
                        if (!m_space.isEmpty()) {
                            m_space.remove(0);
                        }
                        if (!m_lang.isEmpty()) {
                            m_lang.remove(0);
                        }
                    }
                    for (int i = m_docs.size() - 1; i >= 0; i--) {
                        Node curr = m_currNodes.get(i);
                        Node first = m_docs.get(i).getFirstChild();
                        if (curr.isSameNode(first)) {
                            XMLValue<Document> cell = null;
                            cell = createDataCell(m_docs.get(i));
                            m_docs.remove(i);
                            m_currNodes.remove(i);
                            m_reentrent = true;
                            return cell;
                        } else {
                            m_currNodes.set(i, curr.getParentNode());
                        }
                    }
                    popNamespaceContext();
                    m_reentrent = false;
                    break;
                case XMLStreamConstants.CHARACTERS:
                    if (!m_parser.isWhiteSpace() || (m_parser.isWhiteSpace() && !m_space.isEmpty() && null != m_space.get(0) && m_space.get(0).equals("preserve"))) {
                        for (int i = 0; i < m_docs.size(); i++) {
                            String str = m_parser.getText();
                            Text text = m_docs.get(i).createTextNode(str);
                            m_currNodes.get(i).appendChild(text);
                        }
                    }
                    break;
                case XMLStreamConstants.SPACE:
                    if (!m_space.isEmpty() && null != m_space.get(0) && m_space.get(0).equals("preserve")) {
                        for (int i = 0; i < m_docs.size(); i++) {
                            String str = m_parser.getText();
                            Text text = m_docs.get(i).createTextNode(str);
                            m_currNodes.get(i).appendChild(text);
                        }
                    }
                    break;
                case XMLStreamConstants.COMMENT:
                    for (int i = 0; i < m_docs.size(); i++) {
                        String str = m_parser.getText();
                        Comment comment = m_docs.get(i).createComment(str);
                        m_currNodes.get(i).appendChild(comment);
                    }
                    break;
                case XMLStreamConstants.ENTITY_REFERENCE:
                    for (int i = 0; i < m_docs.size(); i++) {
                        String str = m_parser.getText();
                        EntityReference ref = m_docs.get(i).createEntityReference(str);
                        m_currNodes.get(i).appendChild(ref);
                    }
                    break;
                case XMLStreamConstants.PROCESSING_INSTRUCTION:
                    for (int i = 0; i < m_docs.size(); i++) {
                        String piTarget = m_parser.getPITarget();
                        String piName = m_parser.getPIData();
                        ProcessingInstruction pi = m_docs.get(i).createProcessingInstruction(piTarget, piName);
                        m_currNodes.get(i).appendChild(pi);
                    }
                    break;
                case XMLStreamConstants.DTD:
                    // not needed use DOM-Reader for full documents
                    break;
                case XMLStreamConstants.END_DOCUMENT:
                    // never called
                    m_parser.close();
                default:
                    break;
            }
            m_parser.next();
        }
        return null;
    } catch (XMLStreamException e) {
        throw new IOException(e);
    }
}
Also used : Text(org.w3c.dom.Text) ProcessingInstruction(org.w3c.dom.ProcessingInstruction) XMLInputFactory(javax.xml.stream.XMLInputFactory) Deque(java.util.Deque) ArrayList(java.util.ArrayList) Pair(org.knime.core.util.Pair) XMLValue(org.knime.core.data.xml.XMLValue) EntityReference(org.w3c.dom.EntityReference) XMLStreamConstants(javax.xml.stream.XMLStreamConstants) XMLStreamReader(javax.xml.stream.XMLStreamReader) Document(org.w3c.dom.Document) XMLStreamException(javax.xml.stream.XMLStreamException) Node(org.w3c.dom.Node) LinkedList(java.util.LinkedList) XMLConstants(javax.xml.XMLConstants) XMLCellFactory(org.knime.core.data.xml.XMLCellFactory) KNIMEConstants(org.knime.core.node.KNIMEConstants) IOException(java.io.IOException) Comment(org.w3c.dom.Comment) List(java.util.List) Element(org.w3c.dom.Element) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException) DocumentBuilder(javax.xml.parsers.DocumentBuilder) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) ArrayDeque(java.util.ArrayDeque) Collections(java.util.Collections) InputStream(java.io.InputStream) Comment(org.w3c.dom.Comment) Element(org.w3c.dom.Element) Node(org.w3c.dom.Node) Text(org.w3c.dom.Text) IOException(java.io.IOException) Document(org.w3c.dom.Document) XMLStreamException(javax.xml.stream.XMLStreamException) EntityReference(org.w3c.dom.EntityReference) ProcessingInstruction(org.w3c.dom.ProcessingInstruction)

Example 2 with XMLValue

use of org.knime.core.data.xml.XMLValue in project knime-core by knime.

the class XML2PMMLNodeModel method createColRearranger.

private ColumnRearranger createColRearranger(final DataTableSpec spec) throws InvalidSettingsException {
    if (m_xmlColumnName.getStringValue() == null) {
        guessDefaultXMLColumn(spec);
    }
    String xmlColName = m_xmlColumnName.getStringValue();
    String newColName = m_newColumnName.getStringValue();
    final int colIndex = spec.findColumnIndex(xmlColName);
    CheckUtils.checkSetting(colIndex >= 0, "Column: '%s' does not exist anymore.", xmlColName);
    final DataColumnSpec colSpec = spec.getColumnSpec(colIndex);
    CheckUtils.checkSetting(colSpec.getType().isCompatible(StringValue.class), "Selected column '%s' is not string/xml-compatible", xmlColName);
    DataColumnSpecCreator colSpecCreator;
    if (newColName != null && !m_replaceColumn.getBooleanValue()) {
        String newName = DataTableSpec.getUniqueColumnName(spec, newColName);
        colSpecCreator = new DataColumnSpecCreator(newName, PMMLCell.TYPE);
    } else {
        colSpecCreator = new DataColumnSpecCreator(colSpec);
        colSpecCreator.setType(PMMLCell.TYPE);
        colSpecCreator.removeAllHandlers();
        colSpecCreator.setDomain(null);
    }
    DataColumnSpec outColumnSpec = colSpecCreator.createSpec();
    ColumnRearranger rearranger = new ColumnRearranger(spec);
    CellFactory fac = new SingleCellFactory(outColumnSpec) {

        @Override
        public DataCell getCell(final DataRow row) {
            DataCell cell = row.getCell(colIndex);
            if (cell.isMissing()) {
                return DataType.getMissingCell();
            } else {
                PMMLDocument pmmlDoc = null;
                String failure = null;
                XmlObject xmlDoc;
                try (LockedSupplier<Document> supplier = ((XMLValue<Document>) cell).getDocumentSupplier()) {
                    xmlDoc = XmlObject.Factory.parse(supplier.get().cloneNode(true));
                    if (xmlDoc instanceof PMMLDocument) {
                        pmmlDoc = (PMMLDocument) xmlDoc;
                    } else if (PMMLUtils.isOldKNIMEPMML(xmlDoc) || PMMLUtils.is4_1PMML(xmlDoc)) {
                        String updatedPMML = PMMLUtils.getUpdatedVersionAndNamespace(xmlDoc);
                        /* Parse the modified document and assign it to a
                                 * PMMLDocument.*/
                        pmmlDoc = PMMLDocument.Factory.parse(updatedPMML);
                    } else {
                        failure = "No valid PMML v 3.x/4.0/4.1 document";
                    }
                } catch (XmlException e) {
                    if (!m_failOnInvalid.getBooleanValue()) {
                        LOGGER.error("Invalid PMML in row " + row.getKey() + ": " + e.getMessage(), e);
                    }
                    failure = e.getMessage();
                }
                if (failure != null) {
                    m_failCounter.incrementAndGet();
                    if (m_failOnInvalid.getBooleanValue()) {
                        throw new RuntimeException("Invalid PMML in row " + row.getKey() + ": " + failure);
                    } else {
                        return new MissingCell(failure);
                    }
                } else {
                    try {
                        return PMMLCellFactory.create(pmmlDoc.toString());
                    } catch (Exception e) {
                        return new MissingCell(e.getMessage());
                    }
                }
            }
        }
    };
    if (m_replaceColumn.getBooleanValue()) {
        rearranger.replace(fac, colIndex);
    } else {
        rearranger.append(fac);
    }
    return rearranger;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) PMMLDocument(org.dmg.pmml.PMMLDocument) Document(org.w3c.dom.Document) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) XmlException(org.apache.xmlbeans.XmlException) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) MissingCell(org.knime.core.data.MissingCell) XmlException(org.apache.xmlbeans.XmlException) DataCell(org.knime.core.data.DataCell) XmlObject(org.apache.xmlbeans.XmlObject) PMMLDocument(org.dmg.pmml.PMMLDocument) XMLValue(org.knime.core.data.xml.XMLValue) StringValue(org.knime.core.data.StringValue) PMMLCellFactory(org.knime.core.data.xml.PMMLCellFactory) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) CellFactory(org.knime.core.data.container.CellFactory) SingleCellFactory(org.knime.core.data.container.SingleCellFactory)

Example 3 with XMLValue

use of org.knime.core.data.xml.XMLValue in project knime-core by knime.

the class XMLDOMCellReader method readXML.

/**
 * {@inheritDoc}
 */
@SuppressWarnings("unchecked")
@Override
public XMLValue<Document> readXML() throws IOException {
    if (m_first) {
        m_first = false;
        Document doc;
        try {
            doc = m_builder.parse(m_in);
        } catch (SAXException e) {
            throw new IOException(e);
        }
        removeEmptyTextRecursive(doc, new LinkedList<Boolean>());
        return (XMLValue<Document>) XMLCellFactory.create(doc);
    } else {
        return null;
    }
}
Also used : IOException(java.io.IOException) XMLValue(org.knime.core.data.xml.XMLValue) Document(org.w3c.dom.Document) SAXException(org.xml.sax.SAXException)

Aggregations

IOException (java.io.IOException)3 XMLValue (org.knime.core.data.xml.XMLValue)3 Document (org.w3c.dom.Document)3 InputStream (java.io.InputStream)1 ArrayDeque (java.util.ArrayDeque)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 Deque (java.util.Deque)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 XMLConstants (javax.xml.XMLConstants)1 DocumentBuilder (javax.xml.parsers.DocumentBuilder)1 DocumentBuilderFactory (javax.xml.parsers.DocumentBuilderFactory)1 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)1 XMLInputFactory (javax.xml.stream.XMLInputFactory)1 XMLStreamConstants (javax.xml.stream.XMLStreamConstants)1 XMLStreamException (javax.xml.stream.XMLStreamException)1 XMLStreamReader (javax.xml.stream.XMLStreamReader)1 XmlException (org.apache.xmlbeans.XmlException)1 XmlObject (org.apache.xmlbeans.XmlObject)1