Search in sources :

Example 56 with XmlCursor

use of org.apache.xmlbeans.XmlCursor in project knime-core by knime.

the class PMMLClusterTranslator method exportTo.

/**
 * {@inheritDoc}
 */
@Override
public SchemaType exportTo(final PMMLDocument pmmlDoc, final PMMLPortObjectSpec spec) {
    DerivedFieldMapper mapper = new DerivedFieldMapper(pmmlDoc);
    PMML pmml = pmmlDoc.getPMML();
    ClusteringModelDocument.ClusteringModel clusteringModel = pmml.addNewClusteringModel();
    PMMLMiningSchemaTranslator.writeMiningSchema(spec, clusteringModel);
    // ---------------------------------------------------
    // set clustering model attributes
    clusteringModel.setModelName("k-means");
    clusteringModel.setFunctionName(MININGFUNCTION.CLUSTERING);
    clusteringModel.setModelClass(ModelClass.CENTER_BASED);
    clusteringModel.setNumberOfClusters(BigInteger.valueOf(m_nrOfClusters));
    // ---------------------------------------------------
    // set comparison measure
    ComparisonMeasureDocument.ComparisonMeasure pmmlComparisonMeasure = clusteringModel.addNewComparisonMeasure();
    pmmlComparisonMeasure.setKind(Kind.DISTANCE);
    if (ComparisonMeasure.squaredEuclidean.equals(m_measure)) {
        pmmlComparisonMeasure.addNewSquaredEuclidean();
    } else {
        pmmlComparisonMeasure.addNewEuclidean();
    }
    // set clustering fields
    for (String colName : m_usedColumns) {
        ClusteringFieldDocument.ClusteringField pmmlClusteringField = clusteringModel.addNewClusteringField();
        pmmlClusteringField.setField(mapper.getDerivedFieldName(colName));
        pmmlClusteringField.setCompareFunction(COMPAREFUNCTION.ABS_DIFF);
    }
    // ----------------------------------------------------
    // set clusters
    int i = 0;
    for (double[] prototype : m_prototypes) {
        ClusterDocument.Cluster pmmlCluster = clusteringModel.addNewCluster();
        String name = CLUSTER_NAME_PREFIX + i;
        pmmlCluster.setName(name);
        if (m_clusterCoverage != null && m_clusterCoverage.length == m_prototypes.length) {
            pmmlCluster.setSize(BigInteger.valueOf(m_clusterCoverage[i]));
        }
        i++;
        ArrayType pmmlArray = pmmlCluster.addNewArray();
        pmmlArray.setN(BigInteger.valueOf(prototype.length));
        pmmlArray.setType(Type.REAL);
        StringBuffer buff = new StringBuffer();
        for (double d : prototype) {
            buff.append(d + " ");
        }
        XmlCursor xmlCursor = pmmlArray.newCursor();
        xmlCursor.setTextValue(buff.toString());
        xmlCursor.dispose();
    }
    return ClusteringModel.type;
}
Also used : ClusteringModel(org.dmg.pmml.ClusteringModelDocument.ClusteringModel) ClusteringModelDocument(org.dmg.pmml.ClusteringModelDocument) ComparisonMeasureDocument(org.dmg.pmml.ComparisonMeasureDocument) ClusterDocument(org.dmg.pmml.ClusterDocument) XmlCursor(org.apache.xmlbeans.XmlCursor) ArrayType(org.dmg.pmml.ArrayType) DerivedFieldMapper(org.knime.core.node.port.pmml.preproc.DerivedFieldMapper) ClusteringFieldDocument(org.dmg.pmml.ClusteringFieldDocument) PMML(org.dmg.pmml.PMMLDocument.PMML) ClusteringField(org.dmg.pmml.ClusteringFieldDocument.ClusteringField)

Example 57 with XmlCursor

use of org.apache.xmlbeans.XmlCursor in project knime-core by knime.

the class PMMLConditionTranslator method parseCompoundPredicate.

/**
 * Create a KNIME compound predicate from a PMML compound predicate. Note that the "order" of the sub-predicates is
 * important (because of surrogate predicate). Therefore, we need to use xmlCursor to retrieve the order of the
 * predicates
 *
 * @param xmlCompoundPredicate the PMML Compound Predicate element
 * @return the KNIME Compound Predicate
 */
protected PMMLCompoundPredicate parseCompoundPredicate(final CompoundPredicate xmlCompoundPredicate) {
    List<PMMLPredicate> tempPredicateList = new ArrayList<PMMLPredicate>();
    if (xmlCompoundPredicate.sizeOfSimplePredicateArray() != 0) {
        for (SimplePredicate xmlSubSimplePredicate : xmlCompoundPredicate.getSimplePredicateList()) {
            tempPredicateList.add(parseSimplePredicate(xmlSubSimplePredicate));
        }
    }
    if (xmlCompoundPredicate.sizeOfCompoundPredicateArray() != 0) {
        for (CompoundPredicate xmlSubCompoundPredicate : xmlCompoundPredicate.getCompoundPredicateList()) {
            tempPredicateList.add(parseCompoundPredicate(xmlSubCompoundPredicate));
        }
    }
    if (xmlCompoundPredicate.sizeOfSimpleSetPredicateArray() != 0) {
        for (SimpleSetPredicate xmlSubSimpleSetPredicate : xmlCompoundPredicate.getSimpleSetPredicateList()) {
            tempPredicateList.add(parseSimpleSetPredicate(xmlSubSimpleSetPredicate));
        }
    }
    if (xmlCompoundPredicate.sizeOfTrueArray() != 0) {
        for (int i = 0; i < xmlCompoundPredicate.sizeOfTrueArray(); i++) {
            tempPredicateList.add(new PMMLTruePredicate());
        }
    }
    if (xmlCompoundPredicate.sizeOfFalseArray() != 0) {
        for (int i = 0; i < xmlCompoundPredicate.sizeOfFalseArray(); i++) {
            tempPredicateList.add(new PMMLFalsePredicate());
        }
    }
    List<String> predicateNames = new ArrayList<String>();
    XmlCursor xmlCursor = xmlCompoundPredicate.newCursor();
    if (xmlCursor.toFirstChild()) {
        do {
            XmlObject xmlElement = xmlCursor.getObject();
            XmlCursor elementCursor = xmlElement.newCursor();
            if (xmlElement instanceof CompoundPredicateDocument.CompoundPredicate) {
                predicateNames.add(COMPOUND);
            } else if (xmlElement instanceof TrueDocument.True) {
                predicateNames.add(TRUE);
            } else if (xmlElement instanceof FalseDocument.False) {
                predicateNames.add(FALSE);
            } else {
                elementCursor.toFirstAttribute();
                do {
                    if ("field".equals(elementCursor.getName().getLocalPart())) {
                        predicateNames.add(m_nameMapper.getColumnName(elementCursor.getTextValue()));
                        break;
                    }
                } while (elementCursor.toNextAttribute());
            }
        } while (xmlCursor.toNextSibling());
    }
    // ------------------------------------------------------
    // sort the predicate list
    List<PMMLPredicate> predicateList = new ArrayList<PMMLPredicate>();
    List<PMMLPredicate> compoundList = new ArrayList<PMMLPredicate>();
    for (PMMLPredicate tempPredicate : tempPredicateList) {
        if (tempPredicate instanceof PMMLCompoundPredicate) {
            compoundList.add(tempPredicate);
        }
    }
    for (String name : predicateNames) {
        if (name.equals(COMPOUND)) {
            predicateList.add(compoundList.get(0));
            compoundList.remove(0);
        } else if (name.equals(TRUE)) {
            predicateList.add(new PMMLTruePredicate());
        } else if (name.equals(FALSE)) {
            predicateList.add(new PMMLFalsePredicate());
        } else {
            int foundIndex = -1, i = 0;
            for (PMMLPredicate tempPredicate : tempPredicateList) {
                if (tempPredicate instanceof PMMLSimplePredicate) {
                    if (name.equals(((PMMLSimplePredicate) tempPredicate).getSplitAttribute())) {
                        predicateList.add(tempPredicate);
                        foundIndex = i;
                        break;
                    }
                } else if (tempPredicate instanceof PMMLSimpleSetPredicate) {
                    if (name.equals(((PMMLSimpleSetPredicate) tempPredicate).getSplitAttribute())) {
                        predicateList.add(tempPredicate);
                        foundIndex = i;
                        break;
                    }
                }
                ++i;
            }
            assert foundIndex >= 0 : tempPredicateList + "\n" + name;
            tempPredicateList.remove(foundIndex);
        }
    }
    LinkedList<PMMLPredicate> subPredicates = new LinkedList<PMMLPredicate>(predicateList);
    String operator = xmlCompoundPredicate.getBooleanOperator().toString();
    PMMLCompoundPredicate compoundPredicate = newCompoundPredicate(operator);
    compoundPredicate.setPredicates(subPredicates);
    return compoundPredicate;
}
Also used : TrueDocument(org.dmg.pmml.TrueDocument) ArrayList(java.util.ArrayList) SimplePredicate(org.dmg.pmml.SimplePredicateDocument.SimplePredicate) LinkedList(java.util.LinkedList) SimpleSetPredicate(org.dmg.pmml.SimpleSetPredicateDocument.SimpleSetPredicate) XmlCursor(org.apache.xmlbeans.XmlCursor) CompoundPredicate(org.dmg.pmml.CompoundPredicateDocument.CompoundPredicate) XmlObject(org.apache.xmlbeans.XmlObject)

Example 58 with XmlCursor

use of org.apache.xmlbeans.XmlCursor in project knime-core by knime.

the class PMMLDecisionTreeTranslator method addTreeNode.

/**
 * A recursive function which converts each KNIME Tree node to a
 * corresponding PMML element.
 *
 * @param pmmlNode the desired PMML element
 * @param node A KNIME DecisionTree node
 */
private static void addTreeNode(final NodeDocument.Node pmmlNode, final DecisionTreeNode node, final DerivedFieldMapper mapper) {
    pmmlNode.setId(String.valueOf(node.getOwnIndex()));
    pmmlNode.setScore(node.getMajorityClass().toString());
    // read in and then exported again
    if (node.getEntireClassCount() > 0) {
        pmmlNode.setRecordCount(node.getEntireClassCount());
    }
    if (node instanceof DecisionTreeNodeSplitPMML) {
        int defaultChild = ((DecisionTreeNodeSplitPMML) node).getDefaultChildIndex();
        if (defaultChild > -1) {
            pmmlNode.setDefaultChild(String.valueOf(defaultChild));
        }
    }
    // adding score and stuff from parent
    DecisionTreeNode parent = node.getParent();
    if (parent == null) {
        // When the parent is null, it is the root Node.
        // For root node, the predicate is always True.
        pmmlNode.addNewTrue();
    } else if (parent instanceof DecisionTreeNodeSplitContinuous) {
        // SimplePredicate case
        DecisionTreeNodeSplitContinuous splitNode = (DecisionTreeNodeSplitContinuous) parent;
        if (splitNode.getIndex(node) == 0) {
            SimplePredicate pmmlSimplePredicate = pmmlNode.addNewSimplePredicate();
            pmmlSimplePredicate.setField(mapper.getDerivedFieldName(splitNode.getSplitAttr()));
            pmmlSimplePredicate.setOperator(Operator.LESS_OR_EQUAL);
            pmmlSimplePredicate.setValue(String.valueOf(splitNode.getThreshold()));
        } else if (splitNode.getIndex(node) == 1) {
            pmmlNode.addNewTrue();
        }
    } else if (parent instanceof DecisionTreeNodeSplitNominalBinary) {
        // SimpleSetPredicate case
        DecisionTreeNodeSplitNominalBinary splitNode = (DecisionTreeNodeSplitNominalBinary) parent;
        SimpleSetPredicate pmmlSimpleSetPredicate = pmmlNode.addNewSimpleSetPredicate();
        pmmlSimpleSetPredicate.setField(mapper.getDerivedFieldName(splitNode.getSplitAttr()));
        pmmlSimpleSetPredicate.setBooleanOperator(SimpleSetPredicate.BooleanOperator.IS_IN);
        ArrayType pmmlArray = pmmlSimpleSetPredicate.addNewArray();
        pmmlArray.setType(ArrayType.Type.STRING);
        DataCell[] splitValues = splitNode.getSplitValues();
        List<Integer> indices = null;
        if (splitNode.getIndex(node) == SplitNominalBinary.LEFT_PARTITION) {
            indices = splitNode.getLeftChildIndices();
        } else if (splitNode.getIndex(node) == SplitNominalBinary.RIGHT_PARTITION) {
            indices = splitNode.getRightChildIndices();
        } else {
            throw new IllegalArgumentException("Split node is neither " + "contained in the right nor in the left partition.");
        }
        StringBuilder classSet = new StringBuilder();
        for (Integer i : indices) {
            if (classSet.length() > 0) {
                classSet.append(" ");
            }
            classSet.append(splitValues[i].toString());
        }
        pmmlArray.setN(BigInteger.valueOf(indices.size()));
        XmlCursor xmlCursor = pmmlArray.newCursor();
        xmlCursor.setTextValue(classSet.toString());
        xmlCursor.dispose();
    } else if (parent instanceof DecisionTreeNodeSplitNominal) {
        DecisionTreeNodeSplitNominal splitNode = (DecisionTreeNodeSplitNominal) parent;
        SimplePredicate pmmlSimplePredicate = pmmlNode.addNewSimplePredicate();
        pmmlSimplePredicate.setField(mapper.getDerivedFieldName(splitNode.getSplitAttr()));
        pmmlSimplePredicate.setOperator(Operator.EQUAL);
        int nodeIndex = parent.getIndex(node);
        pmmlSimplePredicate.setValue(String.valueOf(splitNode.getSplitValues()[nodeIndex].toString()));
    } else if (parent instanceof DecisionTreeNodeSplitPMML) {
        DecisionTreeNodeSplitPMML splitNode = (DecisionTreeNodeSplitPMML) parent;
        int nodeIndex = parent.getIndex(node);
        // get the PMML predicate of the current node from its parent
        PMMLPredicate predicate = splitNode.getSplitPred()[nodeIndex];
        if (predicate instanceof PMMLCompoundPredicate) {
            // surrogates as used in GBT
            exportCompoundPredicate(pmmlNode, (PMMLCompoundPredicate) predicate, mapper);
        } else {
            predicate.setSplitAttribute(mapper.getDerivedFieldName(predicate.getSplitAttribute()));
            // delegate the writing to the predicate translator
            PMMLPredicateTranslator.exportTo(predicate, pmmlNode);
        }
    } else {
        throw new IllegalArgumentException("Node Type " + parent.getClass() + " is not supported!");
    }
    // adding score distribution (class counts)
    Set<Entry<DataCell, Double>> classCounts = node.getClassCounts().entrySet();
    Iterator<Entry<DataCell, Double>> iterator = classCounts.iterator();
    while (iterator.hasNext()) {
        Entry<DataCell, Double> entry = iterator.next();
        DataCell cell = entry.getKey();
        Double freq = entry.getValue();
        ScoreDistribution pmmlScoreDist = pmmlNode.addNewScoreDistribution();
        pmmlScoreDist.setValue(cell.toString());
        pmmlScoreDist.setRecordCount(freq);
    }
    // adding children
    if (!(node instanceof DecisionTreeNodeLeaf)) {
        for (int i = 0; i < node.getChildCount(); i++) {
            addTreeNode(pmmlNode.addNewNode(), node.getChildAt(i), mapper);
        }
    }
}
Also used : DecisionTreeNodeSplitNominal(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeSplitNominal) ArrayType(org.dmg.pmml.ArrayType) Entry(java.util.Map.Entry) DecisionTreeNodeSplitPMML(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeSplitPMML) SimplePredicate(org.dmg.pmml.SimplePredicateDocument.SimplePredicate) SimpleSetPredicate(org.dmg.pmml.SimpleSetPredicateDocument.SimpleSetPredicate) XmlCursor(org.apache.xmlbeans.XmlCursor) BigInteger(java.math.BigInteger) ScoreDistribution(org.dmg.pmml.ScoreDistributionDocument.ScoreDistribution) DecisionTreeNodeLeaf(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeLeaf) DecisionTreeNodeSplitNominalBinary(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeSplitNominalBinary) DataCell(org.knime.core.data.DataCell) DecisionTreeNodeSplitContinuous(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeSplitContinuous) DecisionTreeNode(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNode)

Example 59 with XmlCursor

use of org.apache.xmlbeans.XmlCursor in project knime-core by knime.

the class PMMLPredicateTranslator method initSimpleSetPred.

/**
 * Converts a {@link PMMLSimpleSetPredicate} ({@code sp}) to a {@link SimpleSetPredicate} ({@code setPred}).
 *
 * @param sp A {@link PMMLSimpleSetPredicate}.
 * @param setPred The {@link SimpleSetPredicate} to initialize.
 * @since 2.9
 */
public static void initSimpleSetPred(final PMMLSimpleSetPredicate sp, final SimpleSetPredicate setPred) {
    setPred.setField(sp.getSplitAttribute());
    setPred.setBooleanOperator(getOperator(sp.getSetOperator()));
    ArrayType array = setPred.addNewArray();
    array.setN(BigInteger.valueOf(sp.getValues().size()));
    array.setType(getType(sp.getArrayType()));
    // how to set content?
    StringBuffer sb = new StringBuffer();
    if (sp.getArrayType() == PMMLArrayType.STRING) {
        for (String value : sp.getValues()) {
            sb.append('"');
            sb.append(value.replace("\"", "\\\""));
            sb.append('"');
            sb.append(' ');
        }
    } else {
        for (String value : sp.getValues()) {
            sb.append(value);
            sb.append(' ');
        }
    }
    XmlCursor xmlCursor = array.newCursor();
    xmlCursor.setTextValue(sb.toString());
    xmlCursor.dispose();
}
Also used : ArrayType(org.dmg.pmml.ArrayType) XmlCursor(org.apache.xmlbeans.XmlCursor)

Example 60 with XmlCursor

use of org.apache.xmlbeans.XmlCursor in project knime-core by knime.

the class PMMLMapValuesTranslator method createDerivedFields.

private DerivedField[] createDerivedFields() {
    DerivedField df = DerivedField.Factory.newInstance();
    df.setExtensionArray(createSummaryExtension());
    /* The field name must be retrieved before creating a new derived
         * name for this derived field as the map only contains the
         * current mapping. */
    String fieldName = m_mapper.getDerivedFieldName(m_config.getInColumn());
    if (m_config.getInColumn().equals(m_config.getOutColumn())) {
        String name = m_config.getInColumn();
        df.setDisplayName(name);
        df.setName(m_mapper.createDerivedFieldName(name));
    } else {
        df.setName(m_config.getOutColumn());
    }
    df.setOptype(m_config.getOpType());
    df.setDataType(m_config.getOutDataType());
    MapValues mapValues = df.addNewMapValues();
    // the element in the InlineTable representing the output column
    // Use dummy name instead of m_config.getOutColumn() since the
    // input column could contain characters that are not allowed in XML
    final QName xmlOut = new QName("http://www.dmg.org/PMML-4_0", "out");
    mapValues.setOutputColumn(xmlOut.getLocalPart());
    mapValues.setDataType(m_config.getOutDataType());
    if (!m_config.getDefaultValue().isMissing()) {
        mapValues.setDefaultValue(m_config.getDefaultValue().toString());
    }
    if (!m_config.getMapMissingTo().isMissing()) {
        mapValues.setMapMissingTo(m_config.getMapMissingTo().toString());
    }
    // the mapping of input field <-> element in the InlineTable
    FieldColumnPair fieldColPair = mapValues.addNewFieldColumnPair();
    fieldColPair.setField(fieldName);
    // Use dummy name instead of m_config.getInColumn() since the
    // input column could contain characters that are not allowed in XML
    final QName xmlIn = new QName("http://www.dmg.org/PMML-4_0", "in");
    fieldColPair.setColumn(xmlIn.getLocalPart());
    InlineTable table = mapValues.addNewInlineTable();
    for (Entry<DataCell, ? extends DataCell> entry : m_config.getEntries().entrySet()) {
        Row row = table.addNewRow();
        XmlCursor cursor = row.newCursor();
        cursor.toNextToken();
        cursor.insertElementWithText(xmlIn, entry.getKey().toString());
        cursor.insertElementWithText(xmlOut, entry.getValue().toString());
        cursor.dispose();
    }
    return new DerivedField[] { df };
}
Also used : InlineTable(org.dmg.pmml.InlineTableDocument.InlineTable) MapValues(org.dmg.pmml.MapValuesDocument.MapValues) QName(javax.xml.namespace.QName) DataCell(org.knime.core.data.DataCell) FieldColumnPair(org.dmg.pmml.FieldColumnPairDocument.FieldColumnPair) Row(org.dmg.pmml.RowDocument.Row) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) XmlCursor(org.apache.xmlbeans.XmlCursor)

Aggregations

XmlCursor (org.apache.xmlbeans.XmlCursor)160 XmlObject (org.apache.xmlbeans.XmlObject)68 QName (javax.xml.namespace.QName)21 XmlException (org.apache.xmlbeans.XmlException)16 TokenType (org.apache.xmlbeans.XmlCursor.TokenType)14 CTTbl (org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl)14 CTP (org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP)10 ArrayList (java.util.ArrayList)9 POSIXApplicationType (org.ggf.schemas.jsdl.x2005.x11.jsdlPosix.POSIXApplicationType)8 HPCProfileApplicationType (org.ggf.schemas.jsdl.x2006.x07.jsdlHpcpa.HPCProfileApplicationType)8 SPMDApplicationType (org.ogf.schemas.jsdl.x2007.x02.jsdlSpmd.SPMDApplicationType)8 IOException (java.io.IOException)5 POIXMLException (org.apache.poi.POIXMLException)5 InputStream (java.io.InputStream)4 DrawPaint (org.apache.poi.sl.draw.DrawPaint)3 ArrayType (org.dmg.pmml.ArrayType)3 ApplicationType (org.ggf.schemas.jsdl.x2005.x11.jsdl.ApplicationType)3 LineString (org.locationtech.jts.geom.LineString)3 CTRow (org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow)3 CTSdtBlock (org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock)3