Search in sources :

Example 1 with MiningField

use of org.dmg.pmml.MiningFieldDocument.MiningField in project knime-core by knime.

the class PMMLMiningSchemaTranslator method initializeFrom.

/**
 * Initializes the mining schema translator based on a PMML document.
 * See {@link PMMLTranslator#initializeFrom(PMMLDocument)}
 * @param pmmlDoc the PMML document
 */
public void initializeFrom(final PMMLDocument pmmlDoc) {
    Map<PMMLModelType, Integer> models = PMMLUtils.getNumberOfModels(pmmlDoc);
    if (models.isEmpty()) {
        LOGGER.warn("The PMML document contains no model. Hence no " + "mining schema could be found.");
        return;
    }
    // retrieve the first models mining schema
    MiningSchema miningSchema = PMMLUtils.getFirstMiningSchema(pmmlDoc, models.keySet().iterator().next().getXmlBeansType());
    for (MiningField miningField : miningSchema.getMiningFieldArray()) {
        if (miningField.isSetMissingValueReplacement()) {
            LOGGER.warn("\"missingValueReplacement\" is not supported and " + "will be ignored. Skipping it");
        }
        if (miningField.isSetMissingValueTreatment()) {
            LOGGER.warn("\"missingValueTreatment\" is not supported and " + "will be ignored. Skipping it");
        }
        if (miningField.isSetOutliers()) {
            LOGGER.warn("\"outliers\" is not supported and " + "will be ignored. Skipping it");
        }
        checkInvalidValueTreatment(pmmlDoc, miningField);
        String name = miningField.getName();
        FIELDUSAGETYPE.Enum usageType = miningField.getUsageType();
        if (FIELDUSAGETYPE.ACTIVE == usageType) {
            m_learningFields.add(name);
        } else if (FIELDUSAGETYPE.PREDICTED == usageType || FIELDUSAGETYPE.TARGET == usageType) {
            m_targetFields.add(name);
        }
    }
}
Also used : MiningField(org.dmg.pmml.MiningFieldDocument.MiningField) MiningSchema(org.dmg.pmml.MiningSchemaDocument.MiningSchema) PMMLModelType(org.knime.core.pmml.PMMLModelType) FIELDUSAGETYPE(org.dmg.pmml.FIELDUSAGETYPE)

Example 2 with MiningField

use of org.dmg.pmml.MiningFieldDocument.MiningField in project knime-core by knime.

the class PMMLPortObject method addModelTranslater.

/**
 * Adds the model of the content translater to the PMML document.
 * @param modelTranslator the model translator containing the model to be
 *      added
 */
public void addModelTranslater(final PMMLTranslator modelTranslator) {
    SchemaType type = modelTranslator.exportTo(m_pmmlDoc, m_spec);
    LocalTransformations localTransformations = moveDerivedFields(type);
    /* Remove mining fields from mining schema that where created as a
         * derived field. In KNIME the origin of columns is not distinguished
         * and all columns are added to the mining schema. But in PMML this
         * results in duplicate entries. Those columns should only appear once
         * as derived field in the transformation dictionary or local
         * transformations. */
    Set<String> derivedFields = new HashSet<String>();
    for (DerivedField derivedField : getDerivedFields()) {
        derivedFields.add(derivedField.getName());
    }
    MiningSchema miningSchema = PMMLUtils.getFirstMiningSchema(m_pmmlDoc, type);
    if (miningSchema == null) {
        LOGGER.info("No mining schema found.");
        return;
    }
    MiningField[] miningFieldArray = miningSchema.getMiningFieldArray();
    List<MiningField> miningFields = new ArrayList<MiningField>(Arrays.asList(miningFieldArray));
    Set<String> miningFieldNames = new HashSet<String>();
    for (MiningField miningField : miningFieldArray) {
        String miningFieldName = miningField.getName();
        if (derivedFields.contains(miningFieldName)) {
            LOGGER.debug("Removing field \"" + miningFieldName + "\" from MiningFields as it is a DerivedField.");
            miningFields.remove(miningField);
        } else {
            miningFieldNames.add(miningFieldName);
        }
    }
    /* According to the PMML Spec DerivedFields must ultimately refer back
         * to active MiningFields of the model's MiningSchema. Therefore we
         * have to add all referred DataFields to the MiningSchema. */
    String fullPath = NAMESPACE_DECLARATION + "$this/pmml:DerivedField/*/@field" + "| $this/pmml:DerivedField//pmml:FieldColumnPair/@field";
    XmlObject[] xmlDescendants = localTransformations.selectPath(fullPath);
    Set<String> referencedFields = new LinkedHashSet<String>();
    // collect all referred field names
    for (XmlObject xo : xmlDescendants) {
        XmlCursor xmlCursor = xo.newCursor();
        referencedFields.add(xmlCursor.getTextValue());
        xmlCursor.dispose();
    }
    for (String referencedField : referencedFields) {
        if (!derivedFields.contains(referencedField) && !miningFieldNames.contains(referencedField)) {
            /* Add them to the mining schema if they are not already
                 * contained there and if they don't refer to derived fields. */
            MiningField miningField = MiningField.Factory.newInstance();
            miningField.setName(referencedField);
            miningField.setInvalidValueTreatment(INVALIDVALUETREATMENTMETHOD.AS_IS);
            LOGGER.debug("Adding field \"" + referencedField + "\" to MiningSchema because it is referenced in " + "LocalTransformations.");
            miningFields.add(miningField);
        }
    }
    miningSchema.setMiningFieldArray(miningFields.toArray(new MiningField[0]));
}
Also used : LinkedHashSet(java.util.LinkedHashSet) MiningField(org.dmg.pmml.MiningFieldDocument.MiningField) ArrayList(java.util.ArrayList) SchemaType(org.apache.xmlbeans.SchemaType) XmlCursor(org.apache.xmlbeans.XmlCursor) LocalTransformations(org.dmg.pmml.LocalTransformationsDocument.LocalTransformations) MiningSchema(org.dmg.pmml.MiningSchemaDocument.MiningSchema) XmlObject(org.apache.xmlbeans.XmlObject) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 3 with MiningField

use of org.dmg.pmml.MiningFieldDocument.MiningField in project knime-core by knime.

the class PMMLDecisionTreeTranslator method parseDecTreeFromModel.

/**
 * Builds a decision tree object out of the TreeModel.
 * @param treeModel treeModel parsed from the PMML.
 *
 * @return DecisionTreeModel for further processing.
 */
public DecisionTree parseDecTreeFromModel(final TreeModel treeModel) {
    // --------------------------------------------
    // check the mining function, only classification is allowed
    final Enum functionName = CheckUtils.checkArgumentNotNull(treeModel.getFunctionName(), "Function name must not be null");
    if (MININGFUNCTION.CLASSIFICATION.equals(functionName)) {
        m_isClassification = true;
    } else if (MININGFUNCTION.REGRESSION.equals(functionName)) {
        m_isClassification = false;
    } else {
        throw new IllegalArgumentException("Unsupported function name \"" + functionName + "\"");
    }
    // --------------------------------------------
    // Find the predicted field from the mining schema
    MiningField[] miningFields = treeModel.getMiningSchema().getMiningFieldArray();
    String predictedField = "predictedField";
    for (MiningField mf : miningFields) {
        if (FIELDUSAGETYPE.PREDICTED == mf.getUsageType() || FIELDUSAGETYPE.TARGET == mf.getUsageType()) {
            predictedField = mf.getName();
            break;
        }
    }
    // ------------------------------------------------
    // Parse PMML nodes to KNIME nodes
    Node pmmlRoot = treeModel.getNode();
    DecisionTreeNode knimeRoot = addKnimeTreeNode(pmmlRoot);
    // ------------------------------------------------
    // parse no true child strategy
    PMMLNoTrueChildStrategy ntcStrategy = PMMLNoTrueChildStrategy.RETURN_NULL_PREDICTION;
    if (NOTRUECHILDSTRATEGY.RETURN_LAST_PREDICTION.equals(treeModel.getNoTrueChildStrategy())) {
        ntcStrategy = PMMLNoTrueChildStrategy.RETURN_LAST_PREDICTION;
    }
    // initialize a KNIME decision tree
    return new DecisionTree(knimeRoot, predictedField, MV_STRATEGY_TO_KNIME_MAP.get(treeModel.getMissingValueStrategy()), ntcStrategy);
}
Also used : Enum(org.dmg.pmml.MININGFUNCTION.Enum) MiningField(org.dmg.pmml.MiningFieldDocument.MiningField) DecisionTree(org.knime.base.node.mine.decisiontree2.model.DecisionTree) Node(org.dmg.pmml.NodeDocument.Node) DecisionTreeNode(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNode) DecisionTreeNode(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNode)

Example 4 with MiningField

use of org.dmg.pmml.MiningFieldDocument.MiningField in project knime-core by knime.

the class PMMLMiningSchemaTranslator method writeMiningSchema.

/**
 * Writes the MiningSchema based upon the fields of the passed
 * {@link PMMLPortObjectSpec}.
 *
 * @param portSpec based upon this port object spec the mining schema is
 *            written
 * @param model the PMML model element to write the mining schema to
 */
public static void writeMiningSchema(final PMMLPortObjectSpec portSpec, final XmlObject model) {
    MiningSchema miningSchema = MiningSchema.Factory.newInstance();
    // avoid duplicate entries
    Set<String> learningNames = new HashSet<String>(portSpec.getLearningFields());
    Set<String> targetNames = new HashSet<String>(portSpec.getTargetFields());
    for (String colName : portSpec.getLearningFields()) {
        if (!targetNames.contains(colName)) {
            MiningField miningField = miningSchema.addNewMiningField();
            miningField.setName(colName);
            miningField.setInvalidValueTreatment(INVALIDVALUETREATMENTMETHOD.AS_IS);
        // don't write usageType = active (is default)
        }
    }
    // add all fields referenced in local transformations
    for (String colName : portSpec.getPreprocessingFields()) {
        if (!learningNames.contains(colName) && !targetNames.contains(colName)) {
            MiningField miningField = miningSchema.addNewMiningField();
            miningField.setName(colName);
            miningField.setInvalidValueTreatment(INVALIDVALUETREATMENTMETHOD.AS_IS);
        // don't write usageType = active (is default)
        }
    }
    // target columns = predicted
    for (String colName : portSpec.getTargetFields()) {
        MiningField miningField = miningSchema.addNewMiningField();
        miningField.setName(colName);
        miningField.setInvalidValueTreatment(INVALIDVALUETREATMENTMETHOD.AS_IS);
        miningField.setUsageType(FIELDUSAGETYPE.TARGET);
    }
    /* Unfortunately the PMML models have no common base class. Therefore
         * a cast to the specific type is necessary for being able to add the
         * mining schema. */
    SchemaType type = model.schemaType();
    if (AssociationModel.type.equals(type)) {
        ((AssociationModel) model).setMiningSchema(miningSchema);
    } else if (ClusteringModel.type.equals(type)) {
        ((ClusteringModel) model).setMiningSchema(miningSchema);
    } else if (GeneralRegressionModel.type.equals(type)) {
        ((GeneralRegressionModel) model).setMiningSchema(miningSchema);
    } else if (MiningModel.type.equals(type)) {
        ((MiningModel) model).setMiningSchema(miningSchema);
    } else if (NaiveBayesModel.type.equals(type)) {
        ((NaiveBayesModel) model).setMiningSchema(miningSchema);
    } else if (NeuralNetwork.type.equals(type)) {
        ((NeuralNetwork) model).setMiningSchema(miningSchema);
    } else if (RegressionModel.type.equals(type)) {
        ((RegressionModel) model).setMiningSchema(miningSchema);
    } else if (RuleSetModel.type.equals(type)) {
        ((RuleSetModel) model).setMiningSchema(miningSchema);
    } else if (SequenceModel.type.equals(type)) {
        ((SequenceModel) model).setMiningSchema(miningSchema);
    } else if (SupportVectorMachineModel.type.equals(type)) {
        ((SupportVectorMachineModel) model).setMiningSchema(miningSchema);
    } else if (TextModel.type.equals(type)) {
        ((TextModel) model).setMiningSchema(miningSchema);
    } else if (TimeSeriesModel.type.equals(type)) {
        ((TimeSeriesModel) model).setMiningSchema(miningSchema);
    } else if (TreeModel.type.equals(type)) {
        ((TreeModel) model).setMiningSchema(miningSchema);
    } else if (NearestNeighborModel.type.equals(type)) {
        ((NearestNeighborModel) model).setMiningSchema(miningSchema);
    }
}
Also used : SequenceModel(org.dmg.pmml.SequenceModelDocument.SequenceModel) MiningField(org.dmg.pmml.MiningFieldDocument.MiningField) TextModel(org.dmg.pmml.TextModelDocument.TextModel) NaiveBayesModel(org.dmg.pmml.NaiveBayesModelDocument.NaiveBayesModel) SchemaType(org.apache.xmlbeans.SchemaType) RegressionModel(org.dmg.pmml.RegressionModelDocument.RegressionModel) GeneralRegressionModel(org.dmg.pmml.GeneralRegressionModelDocument.GeneralRegressionModel) TreeModel(org.dmg.pmml.TreeModelDocument.TreeModel) MiningSchema(org.dmg.pmml.MiningSchemaDocument.MiningSchema) GeneralRegressionModel(org.dmg.pmml.GeneralRegressionModelDocument.GeneralRegressionModel) HashSet(java.util.HashSet) AssociationModel(org.dmg.pmml.AssociationModelDocument.AssociationModel)

Example 5 with MiningField

use of org.dmg.pmml.MiningFieldDocument.MiningField in project knime-core by knime.

the class DataColumnSpecFilterPMMLNodeModel method createPMMLOut.

private PMMLPortObject createPMMLOut(final PMMLPortObject pmmlIn, final DataTableSpec outSpec, final FilterResult res) throws XmlException {
    StringBuffer warningBuffer = new StringBuffer();
    if (pmmlIn == null) {
        return new PMMLPortObject(createPMMLSpec(null, outSpec, res));
    } else {
        PMMLDocument pmmldoc;
        try (LockedSupplier<Document> supplier = pmmlIn.getPMMLValue().getDocumentSupplier()) {
            pmmldoc = PMMLDocument.Factory.parse(supplier.get());
        }
        // Inspect models to check if they use any excluded columns
        List<PMMLModelWrapper> models = PMMLModelWrapper.getModelListFromPMMLDocument(pmmldoc);
        for (PMMLModelWrapper model : models) {
            MiningSchema ms = model.getMiningSchema();
            for (MiningField mf : ms.getMiningFieldList()) {
                if (isExcluded(mf.getName(), res)) {
                    if (warningBuffer.length() != 0) {
                        warningBuffer.append("\n");
                    }
                    warningBuffer.append(model.getModelType().name() + " uses excluded column " + mf.getName());
                }
            }
        }
        ArrayList<String> warningFields = new ArrayList<String>();
        PMML pmml = pmmldoc.getPMML();
        // Now check the transformations if they exist
        if (pmml.getTransformationDictionary() != null) {
            for (DerivedField df : pmml.getTransformationDictionary().getDerivedFieldList()) {
                FieldRef fr = df.getFieldRef();
                if (fr != null && isExcluded(fr.getField(), res)) {
                    warningFields.add(fr.getField());
                }
                Aggregate a = df.getAggregate();
                if (a != null && isExcluded(a.getField(), res)) {
                    warningFields.add(a.getField());
                }
                Apply ap = df.getApply();
                if (ap != null) {
                    for (FieldRef fieldRef : ap.getFieldRefList()) {
                        if (isExcluded(fieldRef.getField(), res)) {
                            warningFields.add(fieldRef.getField());
                            break;
                        }
                    }
                }
                Discretize d = df.getDiscretize();
                if (d != null && isExcluded(d.getField(), res)) {
                    warningFields.add(d.getField());
                }
                MapValues mv = df.getMapValues();
                if (mv != null) {
                    for (FieldColumnPair fcp : mv.getFieldColumnPairList()) {
                        if (isExcluded(fcp.getField(), res)) {
                            warningFields.add(fcp.getField());
                        }
                    }
                }
                NormContinuous nc = df.getNormContinuous();
                if (nc != null && isExcluded(nc.getField(), res)) {
                    warningFields.add(nc.getField());
                }
                NormDiscrete nd = df.getNormDiscrete();
                if (nd != null && isExcluded(nd.getField(), res)) {
                    warningFields.add(nd.getField());
                }
            }
        }
        DataDictionary dict = pmml.getDataDictionary();
        List<DataField> fields = dict.getDataFieldList();
        // Apply filter to spec
        int numFields = 0;
        for (int i = fields.size() - 1; i >= 0; i--) {
            if (isExcluded(fields.get(i).getName(), res)) {
                dict.removeDataField(i);
            } else {
                numFields++;
            }
        }
        dict.setNumberOfFields(new BigInteger(Integer.toString(numFields)));
        pmml.setDataDictionary(dict);
        pmmldoc.setPMML(pmml);
        // generate warnings and set as warning message
        for (String s : warningFields) {
            if (warningBuffer.length() != 0) {
                warningBuffer.append("\n");
            }
            warningBuffer.append("Transformation dictionary uses excluded column " + s);
        }
        if (warningBuffer.length() > 0) {
            setWarningMessage(warningBuffer.toString().trim());
        }
        PMMLPortObject outport = null;
        try {
            outport = new PMMLPortObject(createPMMLSpec(pmmlIn.getSpec(), outSpec, res), pmmldoc);
        } catch (IllegalArgumentException e) {
            if (res.getIncludes().length == 0) {
                throw new IllegalArgumentException("Excluding all columns produces invalid PMML", e);
            } else {
                throw e;
            }
        }
        return outport;
    }
}
Also used : MiningField(org.dmg.pmml.MiningFieldDocument.MiningField) NormContinuous(org.dmg.pmml.NormContinuousDocument.NormContinuous) Apply(org.dmg.pmml.ApplyDocument.Apply) ArrayList(java.util.ArrayList) FieldColumnPair(org.dmg.pmml.FieldColumnPairDocument.FieldColumnPair) PMMLDocument(org.dmg.pmml.PMMLDocument) Document(org.w3c.dom.Document) MapValues(org.dmg.pmml.MapValuesDocument.MapValues) Discretize(org.dmg.pmml.DiscretizeDocument.Discretize) FieldRef(org.dmg.pmml.FieldRefDocument.FieldRef) DataDictionary(org.dmg.pmml.DataDictionaryDocument.DataDictionary) PMMLModelWrapper(org.knime.core.node.port.pmml.PMMLModelWrapper) NormDiscrete(org.dmg.pmml.NormDiscreteDocument.NormDiscrete) MiningSchema(org.dmg.pmml.MiningSchemaDocument.MiningSchema) DataField(org.dmg.pmml.DataFieldDocument.DataField) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PMML(org.dmg.pmml.PMMLDocument.PMML) BigInteger(java.math.BigInteger) PMMLDocument(org.dmg.pmml.PMMLDocument) Aggregate(org.dmg.pmml.AggregateDocument.Aggregate) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField)

Aggregations

MiningField (org.dmg.pmml.MiningFieldDocument.MiningField)6 MiningSchema (org.dmg.pmml.MiningSchemaDocument.MiningSchema)5 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 SchemaType (org.apache.xmlbeans.SchemaType)2 DerivedField (org.dmg.pmml.DerivedFieldDocument.DerivedField)2 BigInteger (java.math.BigInteger)1 LinkedHashSet (java.util.LinkedHashSet)1 XmlCursor (org.apache.xmlbeans.XmlCursor)1 XmlObject (org.apache.xmlbeans.XmlObject)1 Aggregate (org.dmg.pmml.AggregateDocument.Aggregate)1 Apply (org.dmg.pmml.ApplyDocument.Apply)1 AssociationModel (org.dmg.pmml.AssociationModelDocument.AssociationModel)1 DataDictionary (org.dmg.pmml.DataDictionaryDocument.DataDictionary)1 DataField (org.dmg.pmml.DataFieldDocument.DataField)1 Discretize (org.dmg.pmml.DiscretizeDocument.Discretize)1 FIELDUSAGETYPE (org.dmg.pmml.FIELDUSAGETYPE)1 FieldColumnPair (org.dmg.pmml.FieldColumnPairDocument.FieldColumnPair)1 FieldRef (org.dmg.pmml.FieldRefDocument.FieldRef)1 GeneralRegressionModel (org.dmg.pmml.GeneralRegressionModelDocument.GeneralRegressionModel)1