Search in sources :

Example 41 with XmlObject

use of org.apache.xmlbeans.XmlObject in project knime-core by knime.

the class PMMLPortObject method loadFrom.

/**
 * Initializes the pmml port object based on the xml input stream.
 * @param spec the referring spec of this object
 * @param is the pmml input stream
 * @throws IOException if the file cannot be found
 * @throws XmlException if something goes wrong during reading
 */
public void loadFrom(final PMMLPortObjectSpec spec, final InputStream is) throws IOException, XmlException {
    // disallow close in the factory -- we had indeterministic behavior
    // where close was called more than once (which should be OK) but as
    // the argument input stream is a NonClosableZipInput, which delegates
    // close to closeEntry(), we have to make sure that close is only
    // called once.
    // TODO: The document is read twice here. Could we "probe" into the file to check the version?
    XmlObject xmlDoc = XmlObject.Factory.parse(new NonClosableInputStream(is));
    is.close();
    if (xmlDoc instanceof PMMLDocument) {
        m_pmmlDoc = (PMMLDocument) xmlDoc;
    } else {
        /* Try to recover when reading a PMML 3.x/4.0 document that
             * was produced by KNIME by just replacing the PMML version and
             * namespace. */
        if (PMMLUtils.isOldKNIMEPMML(xmlDoc) || PMMLUtils.is4_1PMML(xmlDoc)) {
            try {
                String updatedPMML = PMMLUtils.getUpdatedVersionAndNamespace(xmlDoc);
                /* Parse the modified document and assign it to a
                     * PMMLDocument.*/
                m_pmmlDoc = PMMLDocument.Factory.parse(updatedPMML);
            } catch (Exception e) {
                throw new RuntimeException("Parsing of PMML v 3.x/4.0 document failed.", e);
            }
            LOGGER.info("KNIME produced PMML 3.x/4.0  converted to PMML 4.1.");
        } else {
            throw new RuntimeException("Parsing of PMML v 3.x/4.0 document failed.");
        }
    }
    m_spec = spec;
}
Also used : NonClosableInputStream(org.knime.core.data.util.NonClosableInputStream) XmlObject(org.apache.xmlbeans.XmlObject) PMMLDocument(org.dmg.pmml.PMMLDocument) SAXException(org.xml.sax.SAXException) IOException(java.io.IOException) XmlException(org.apache.xmlbeans.XmlException) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException)

Example 42 with XmlObject

use of org.apache.xmlbeans.XmlObject in project knime-core by knime.

the class PMMLPortObject method addModelTranslater.

/**
 * Adds the model of the content translater to the PMML document.
 * @param modelTranslator the model translator containing the model to be
 *      added
 */
public void addModelTranslater(final PMMLTranslator modelTranslator) {
    SchemaType type = modelTranslator.exportTo(m_pmmlDoc, m_spec);
    LocalTransformations localTransformations = moveDerivedFields(type);
    /* Remove mining fields from mining schema that where created as a
         * derived field. In KNIME the origin of columns is not distinguished
         * and all columns are added to the mining schema. But in PMML this
         * results in duplicate entries. Those columns should only appear once
         * as derived field in the transformation dictionary or local
         * transformations. */
    Set<String> derivedFields = new HashSet<String>();
    for (DerivedField derivedField : getDerivedFields()) {
        derivedFields.add(derivedField.getName());
    }
    MiningSchema miningSchema = PMMLUtils.getFirstMiningSchema(m_pmmlDoc, type);
    if (miningSchema == null) {
        LOGGER.info("No mining schema found.");
        return;
    }
    MiningField[] miningFieldArray = miningSchema.getMiningFieldArray();
    List<MiningField> miningFields = new ArrayList<MiningField>(Arrays.asList(miningFieldArray));
    Set<String> miningFieldNames = new HashSet<String>();
    for (MiningField miningField : miningFieldArray) {
        String miningFieldName = miningField.getName();
        if (derivedFields.contains(miningFieldName)) {
            LOGGER.debug("Removing field \"" + miningFieldName + "\" from MiningFields as it is a DerivedField.");
            miningFields.remove(miningField);
        } else {
            miningFieldNames.add(miningFieldName);
        }
    }
    /* According to the PMML Spec DerivedFields must ultimately refer back
         * to active MiningFields of the model's MiningSchema. Therefore we
         * have to add all referred DataFields to the MiningSchema. */
    String fullPath = NAMESPACE_DECLARATION + "$this/pmml:DerivedField/*/@field" + "| $this/pmml:DerivedField//pmml:FieldColumnPair/@field";
    XmlObject[] xmlDescendants = localTransformations.selectPath(fullPath);
    Set<String> referencedFields = new LinkedHashSet<String>();
    // collect all referred field names
    for (XmlObject xo : xmlDescendants) {
        XmlCursor xmlCursor = xo.newCursor();
        referencedFields.add(xmlCursor.getTextValue());
        xmlCursor.dispose();
    }
    for (String referencedField : referencedFields) {
        if (!derivedFields.contains(referencedField) && !miningFieldNames.contains(referencedField)) {
            /* Add them to the mining schema if they are not already
                 * contained there and if they don't refer to derived fields. */
            MiningField miningField = MiningField.Factory.newInstance();
            miningField.setName(referencedField);
            miningField.setInvalidValueTreatment(INVALIDVALUETREATMENTMETHOD.AS_IS);
            LOGGER.debug("Adding field \"" + referencedField + "\" to MiningSchema because it is referenced in " + "LocalTransformations.");
            miningFields.add(miningField);
        }
    }
    miningSchema.setMiningFieldArray(miningFields.toArray(new MiningField[0]));
}
Also used : LinkedHashSet(java.util.LinkedHashSet) MiningField(org.dmg.pmml.MiningFieldDocument.MiningField) ArrayList(java.util.ArrayList) SchemaType(org.apache.xmlbeans.SchemaType) XmlCursor(org.apache.xmlbeans.XmlCursor) LocalTransformations(org.dmg.pmml.LocalTransformationsDocument.LocalTransformations) MiningSchema(org.dmg.pmml.MiningSchemaDocument.MiningSchema) XmlObject(org.apache.xmlbeans.XmlObject) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 43 with XmlObject

use of org.apache.xmlbeans.XmlObject in project knime-core by knime.

the class PMMLConditionTranslator method parseCompoundPredicate.

/**
 * Create a KNIME compound predicate from a PMML compound predicate. Note that the "order" of the sub-predicates is
 * important (because of surrogate predicate). Therefore, we need to use xmlCursor to retrieve the order of the
 * predicates
 *
 * @param xmlCompoundPredicate the PMML Compound Predicate element
 * @return the KNIME Compound Predicate
 */
protected PMMLCompoundPredicate parseCompoundPredicate(final CompoundPredicate xmlCompoundPredicate) {
    List<PMMLPredicate> tempPredicateList = new ArrayList<PMMLPredicate>();
    if (xmlCompoundPredicate.sizeOfSimplePredicateArray() != 0) {
        for (SimplePredicate xmlSubSimplePredicate : xmlCompoundPredicate.getSimplePredicateList()) {
            tempPredicateList.add(parseSimplePredicate(xmlSubSimplePredicate));
        }
    }
    if (xmlCompoundPredicate.sizeOfCompoundPredicateArray() != 0) {
        for (CompoundPredicate xmlSubCompoundPredicate : xmlCompoundPredicate.getCompoundPredicateList()) {
            tempPredicateList.add(parseCompoundPredicate(xmlSubCompoundPredicate));
        }
    }
    if (xmlCompoundPredicate.sizeOfSimpleSetPredicateArray() != 0) {
        for (SimpleSetPredicate xmlSubSimpleSetPredicate : xmlCompoundPredicate.getSimpleSetPredicateList()) {
            tempPredicateList.add(parseSimpleSetPredicate(xmlSubSimpleSetPredicate));
        }
    }
    if (xmlCompoundPredicate.sizeOfTrueArray() != 0) {
        for (int i = 0; i < xmlCompoundPredicate.sizeOfTrueArray(); i++) {
            tempPredicateList.add(new PMMLTruePredicate());
        }
    }
    if (xmlCompoundPredicate.sizeOfFalseArray() != 0) {
        for (int i = 0; i < xmlCompoundPredicate.sizeOfFalseArray(); i++) {
            tempPredicateList.add(new PMMLFalsePredicate());
        }
    }
    List<String> predicateNames = new ArrayList<String>();
    XmlCursor xmlCursor = xmlCompoundPredicate.newCursor();
    if (xmlCursor.toFirstChild()) {
        do {
            XmlObject xmlElement = xmlCursor.getObject();
            XmlCursor elementCursor = xmlElement.newCursor();
            if (xmlElement instanceof CompoundPredicateDocument.CompoundPredicate) {
                predicateNames.add(COMPOUND);
            } else if (xmlElement instanceof TrueDocument.True) {
                predicateNames.add(TRUE);
            } else if (xmlElement instanceof FalseDocument.False) {
                predicateNames.add(FALSE);
            } else {
                elementCursor.toFirstAttribute();
                do {
                    if ("field".equals(elementCursor.getName().getLocalPart())) {
                        predicateNames.add(m_nameMapper.getColumnName(elementCursor.getTextValue()));
                        break;
                    }
                } while (elementCursor.toNextAttribute());
            }
        } while (xmlCursor.toNextSibling());
    }
    // ------------------------------------------------------
    // sort the predicate list
    List<PMMLPredicate> predicateList = new ArrayList<PMMLPredicate>();
    List<PMMLPredicate> compoundList = new ArrayList<PMMLPredicate>();
    for (PMMLPredicate tempPredicate : tempPredicateList) {
        if (tempPredicate instanceof PMMLCompoundPredicate) {
            compoundList.add(tempPredicate);
        }
    }
    for (String name : predicateNames) {
        if (name.equals(COMPOUND)) {
            predicateList.add(compoundList.get(0));
            compoundList.remove(0);
        } else if (name.equals(TRUE)) {
            predicateList.add(new PMMLTruePredicate());
        } else if (name.equals(FALSE)) {
            predicateList.add(new PMMLFalsePredicate());
        } else {
            int foundIndex = -1, i = 0;
            for (PMMLPredicate tempPredicate : tempPredicateList) {
                if (tempPredicate instanceof PMMLSimplePredicate) {
                    if (name.equals(((PMMLSimplePredicate) tempPredicate).getSplitAttribute())) {
                        predicateList.add(tempPredicate);
                        foundIndex = i;
                        break;
                    }
                } else if (tempPredicate instanceof PMMLSimpleSetPredicate) {
                    if (name.equals(((PMMLSimpleSetPredicate) tempPredicate).getSplitAttribute())) {
                        predicateList.add(tempPredicate);
                        foundIndex = i;
                        break;
                    }
                }
                ++i;
            }
            assert foundIndex >= 0 : tempPredicateList + "\n" + name;
            tempPredicateList.remove(foundIndex);
        }
    }
    LinkedList<PMMLPredicate> subPredicates = new LinkedList<PMMLPredicate>(predicateList);
    String operator = xmlCompoundPredicate.getBooleanOperator().toString();
    PMMLCompoundPredicate compoundPredicate = newCompoundPredicate(operator);
    compoundPredicate.setPredicates(subPredicates);
    return compoundPredicate;
}
Also used : TrueDocument(org.dmg.pmml.TrueDocument) ArrayList(java.util.ArrayList) SimplePredicate(org.dmg.pmml.SimplePredicateDocument.SimplePredicate) LinkedList(java.util.LinkedList) SimpleSetPredicate(org.dmg.pmml.SimpleSetPredicateDocument.SimpleSetPredicate) XmlCursor(org.apache.xmlbeans.XmlCursor) CompoundPredicate(org.dmg.pmml.CompoundPredicateDocument.CompoundPredicate) XmlObject(org.apache.xmlbeans.XmlObject)

Example 44 with XmlObject

use of org.apache.xmlbeans.XmlObject in project knime-core by knime.

the class XML2PMMLNodeModel method createColRearranger.

private ColumnRearranger createColRearranger(final DataTableSpec spec) throws InvalidSettingsException {
    if (m_xmlColumnName.getStringValue() == null) {
        guessDefaultXMLColumn(spec);
    }
    String xmlColName = m_xmlColumnName.getStringValue();
    String newColName = m_newColumnName.getStringValue();
    final int colIndex = spec.findColumnIndex(xmlColName);
    CheckUtils.checkSetting(colIndex >= 0, "Column: '%s' does not exist anymore.", xmlColName);
    final DataColumnSpec colSpec = spec.getColumnSpec(colIndex);
    CheckUtils.checkSetting(colSpec.getType().isCompatible(StringValue.class), "Selected column '%s' is not string/xml-compatible", xmlColName);
    DataColumnSpecCreator colSpecCreator;
    if (newColName != null && !m_replaceColumn.getBooleanValue()) {
        String newName = DataTableSpec.getUniqueColumnName(spec, newColName);
        colSpecCreator = new DataColumnSpecCreator(newName, PMMLCell.TYPE);
    } else {
        colSpecCreator = new DataColumnSpecCreator(colSpec);
        colSpecCreator.setType(PMMLCell.TYPE);
        colSpecCreator.removeAllHandlers();
        colSpecCreator.setDomain(null);
    }
    DataColumnSpec outColumnSpec = colSpecCreator.createSpec();
    ColumnRearranger rearranger = new ColumnRearranger(spec);
    CellFactory fac = new SingleCellFactory(outColumnSpec) {

        @Override
        public DataCell getCell(final DataRow row) {
            DataCell cell = row.getCell(colIndex);
            if (cell.isMissing()) {
                return DataType.getMissingCell();
            } else {
                PMMLDocument pmmlDoc = null;
                String failure = null;
                XmlObject xmlDoc;
                try (LockedSupplier<Document> supplier = ((XMLValue<Document>) cell).getDocumentSupplier()) {
                    xmlDoc = XmlObject.Factory.parse(supplier.get().cloneNode(true));
                    if (xmlDoc instanceof PMMLDocument) {
                        pmmlDoc = (PMMLDocument) xmlDoc;
                    } else if (PMMLUtils.isOldKNIMEPMML(xmlDoc) || PMMLUtils.is4_1PMML(xmlDoc)) {
                        String updatedPMML = PMMLUtils.getUpdatedVersionAndNamespace(xmlDoc);
                        /* Parse the modified document and assign it to a
                                 * PMMLDocument.*/
                        pmmlDoc = PMMLDocument.Factory.parse(updatedPMML);
                    } else {
                        failure = "No valid PMML v 3.x/4.0/4.1 document";
                    }
                } catch (XmlException e) {
                    if (!m_failOnInvalid.getBooleanValue()) {
                        LOGGER.error("Invalid PMML in row " + row.getKey() + ": " + e.getMessage(), e);
                    }
                    failure = e.getMessage();
                }
                if (failure != null) {
                    m_failCounter.incrementAndGet();
                    if (m_failOnInvalid.getBooleanValue()) {
                        throw new RuntimeException("Invalid PMML in row " + row.getKey() + ": " + failure);
                    } else {
                        return new MissingCell(failure);
                    }
                } else {
                    try {
                        return PMMLCellFactory.create(pmmlDoc.toString());
                    } catch (Exception e) {
                        return new MissingCell(e.getMessage());
                    }
                }
            }
        }
    };
    if (m_replaceColumn.getBooleanValue()) {
        rearranger.replace(fac, colIndex);
    } else {
        rearranger.append(fac);
    }
    return rearranger;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) PMMLDocument(org.dmg.pmml.PMMLDocument) Document(org.w3c.dom.Document) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) XmlException(org.apache.xmlbeans.XmlException) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) MissingCell(org.knime.core.data.MissingCell) XmlException(org.apache.xmlbeans.XmlException) DataCell(org.knime.core.data.DataCell) XmlObject(org.apache.xmlbeans.XmlObject) PMMLDocument(org.dmg.pmml.PMMLDocument) XMLValue(org.knime.core.data.xml.XMLValue) StringValue(org.knime.core.data.StringValue) PMMLCellFactory(org.knime.core.data.xml.PMMLCellFactory) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) CellFactory(org.knime.core.data.container.CellFactory) SingleCellFactory(org.knime.core.data.container.SingleCellFactory)

Example 45 with XmlObject

use of org.apache.xmlbeans.XmlObject in project hackpad by dropbox.

the class XML method insertChild.

/**
     *
     * @param curs
     * @param xmlToInsert
     */
private void insertChild(XmlCursor curs, Object xmlToInsert) {
    if (xmlToInsert == null || xmlToInsert instanceof Undefined) {
    // Do nothing
    } else if (xmlToInsert instanceof XmlCursor) {
        moveSrcToDest((XmlCursor) xmlToInsert, curs, true);
    } else if (xmlToInsert instanceof XML) {
        XML xmlValue = (XML) xmlToInsert;
        // If it's an attribute, then change to text node
        if (xmlValue.tokenType() == XmlCursor.TokenType.ATTR) {
            insertChild(curs, xmlValue.toString());
        } else {
            XmlCursor cursToInsert = ((XML) xmlToInsert).newCursor();
            moveSrcToDest(cursToInsert, curs, true);
            cursToInsert.dispose();
        }
    } else if (xmlToInsert instanceof XMLList) {
        XMLList list = (XMLList) xmlToInsert;
        for (int i = 0; i < list.length(); i++) {
            insertChild(curs, list.item(i));
        }
    } else {
        // Convert to string and make XML out of it
        String xmlStr = ScriptRuntime.toString(xmlToInsert);
        // Create an empty document.
        XmlObject xo = XmlObject.Factory.newInstance();
        XmlCursor sourceCurs = xo.newCursor();
        sourceCurs.toNextToken();
        // To hold the text.
        sourceCurs.insertChars(xmlStr);
        sourceCurs.toPrevToken();
        // Call us again with the cursor.
        moveSrcToDest(sourceCurs, curs, true);
    }
}
Also used : XmlObject(org.apache.xmlbeans.XmlObject) XmlCursor(org.apache.xmlbeans.XmlCursor)

Aggregations

XmlObject (org.apache.xmlbeans.XmlObject)102 XmlCursor (org.apache.xmlbeans.XmlCursor)49 XmlException (org.apache.xmlbeans.XmlException)17 Test (org.junit.Test)14 CTTbl (org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl)13 CTAxDataSource (org.openxmlformats.schemas.drawingml.x2006.chart.CTAxDataSource)12 CTNumDataSource (org.openxmlformats.schemas.drawingml.x2006.chart.CTNumDataSource)12 DefaultExchange (org.apache.camel.impl.DefaultExchange)10 ArrayList (java.util.ArrayList)9 DefaultCamelContext (org.apache.camel.impl.DefaultCamelContext)9 CTP (org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP)9 CTPicture (org.openxmlformats.schemas.presentationml.x2006.main.CTPicture)7 IOException (java.io.IOException)6 QName (javax.xml.namespace.QName)6 POIXMLException (org.apache.poi.POIXMLException)6 CTShapeProperties (org.openxmlformats.schemas.drawingml.x2006.main.CTShapeProperties)6 CTGroupShape (org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape)6 CTGraphicalObjectFrame (org.openxmlformats.schemas.presentationml.x2006.main.CTGraphicalObjectFrame)5 Node (org.w3c.dom.Node)5 CTConnector (org.openxmlformats.schemas.presentationml.x2006.main.CTConnector)4