Search in sources :

Example 16 with PMMLDocument

use of org.dmg.pmml.PMMLDocument in project knime-core by knime.

the class PMMLRegressionModelWrapper method createPMMLDocument.

/**
 * {@inheritDoc}
 */
@Override
public PMMLDocument createPMMLDocument(final DataDictionary dataDict) {
    PMMLDocument doc = createEmptyDocument(dataDict);
    doc.getPMML().setRegressionModelArray(new RegressionModel[] { m_model });
    return doc;
}
Also used : PMMLDocument(org.dmg.pmml.PMMLDocument)

Example 17 with PMMLDocument

use of org.dmg.pmml.PMMLDocument in project knime-core by knime.

the class DataColumnSpecFilterPMMLNodeModel method createPMMLOut.

private PMMLPortObject createPMMLOut(final PMMLPortObject pmmlIn, final DataTableSpec outSpec, final FilterResult res) throws XmlException {
    StringBuffer warningBuffer = new StringBuffer();
    if (pmmlIn == null) {
        return new PMMLPortObject(createPMMLSpec(null, outSpec, res));
    } else {
        PMMLDocument pmmldoc;
        try (LockedSupplier<Document> supplier = pmmlIn.getPMMLValue().getDocumentSupplier()) {
            pmmldoc = PMMLDocument.Factory.parse(supplier.get());
        }
        // Inspect models to check if they use any excluded columns
        List<PMMLModelWrapper> models = PMMLModelWrapper.getModelListFromPMMLDocument(pmmldoc);
        for (PMMLModelWrapper model : models) {
            MiningSchema ms = model.getMiningSchema();
            for (MiningField mf : ms.getMiningFieldList()) {
                if (isExcluded(mf.getName(), res)) {
                    if (warningBuffer.length() != 0) {
                        warningBuffer.append("\n");
                    }
                    warningBuffer.append(model.getModelType().name() + " uses excluded column " + mf.getName());
                }
            }
        }
        ArrayList<String> warningFields = new ArrayList<String>();
        PMML pmml = pmmldoc.getPMML();
        // Now check the transformations if they exist
        if (pmml.getTransformationDictionary() != null) {
            for (DerivedField df : pmml.getTransformationDictionary().getDerivedFieldList()) {
                FieldRef fr = df.getFieldRef();
                if (fr != null && isExcluded(fr.getField(), res)) {
                    warningFields.add(fr.getField());
                }
                Aggregate a = df.getAggregate();
                if (a != null && isExcluded(a.getField(), res)) {
                    warningFields.add(a.getField());
                }
                Apply ap = df.getApply();
                if (ap != null) {
                    for (FieldRef fieldRef : ap.getFieldRefList()) {
                        if (isExcluded(fieldRef.getField(), res)) {
                            warningFields.add(fieldRef.getField());
                            break;
                        }
                    }
                }
                Discretize d = df.getDiscretize();
                if (d != null && isExcluded(d.getField(), res)) {
                    warningFields.add(d.getField());
                }
                MapValues mv = df.getMapValues();
                if (mv != null) {
                    for (FieldColumnPair fcp : mv.getFieldColumnPairList()) {
                        if (isExcluded(fcp.getField(), res)) {
                            warningFields.add(fcp.getField());
                        }
                    }
                }
                NormContinuous nc = df.getNormContinuous();
                if (nc != null && isExcluded(nc.getField(), res)) {
                    warningFields.add(nc.getField());
                }
                NormDiscrete nd = df.getNormDiscrete();
                if (nd != null && isExcluded(nd.getField(), res)) {
                    warningFields.add(nd.getField());
                }
            }
        }
        DataDictionary dict = pmml.getDataDictionary();
        List<DataField> fields = dict.getDataFieldList();
        // Apply filter to spec
        int numFields = 0;
        for (int i = fields.size() - 1; i >= 0; i--) {
            if (isExcluded(fields.get(i).getName(), res)) {
                dict.removeDataField(i);
            } else {
                numFields++;
            }
        }
        dict.setNumberOfFields(new BigInteger(Integer.toString(numFields)));
        pmml.setDataDictionary(dict);
        pmmldoc.setPMML(pmml);
        // generate warnings and set as warning message
        for (String s : warningFields) {
            if (warningBuffer.length() != 0) {
                warningBuffer.append("\n");
            }
            warningBuffer.append("Transformation dictionary uses excluded column " + s);
        }
        if (warningBuffer.length() > 0) {
            setWarningMessage(warningBuffer.toString().trim());
        }
        PMMLPortObject outport = null;
        try {
            outport = new PMMLPortObject(createPMMLSpec(pmmlIn.getSpec(), outSpec, res), pmmldoc);
        } catch (IllegalArgumentException e) {
            if (res.getIncludes().length == 0) {
                throw new IllegalArgumentException("Excluding all columns produces invalid PMML", e);
            } else {
                throw e;
            }
        }
        return outport;
    }
}
Also used : MiningField(org.dmg.pmml.MiningFieldDocument.MiningField) NormContinuous(org.dmg.pmml.NormContinuousDocument.NormContinuous) Apply(org.dmg.pmml.ApplyDocument.Apply) ArrayList(java.util.ArrayList) FieldColumnPair(org.dmg.pmml.FieldColumnPairDocument.FieldColumnPair) PMMLDocument(org.dmg.pmml.PMMLDocument) Document(org.w3c.dom.Document) MapValues(org.dmg.pmml.MapValuesDocument.MapValues) Discretize(org.dmg.pmml.DiscretizeDocument.Discretize) FieldRef(org.dmg.pmml.FieldRefDocument.FieldRef) DataDictionary(org.dmg.pmml.DataDictionaryDocument.DataDictionary) PMMLModelWrapper(org.knime.core.node.port.pmml.PMMLModelWrapper) NormDiscrete(org.dmg.pmml.NormDiscreteDocument.NormDiscrete) MiningSchema(org.dmg.pmml.MiningSchemaDocument.MiningSchema) DataField(org.dmg.pmml.DataFieldDocument.DataField) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PMML(org.dmg.pmml.PMMLDocument.PMML) BigInteger(java.math.BigInteger) PMMLDocument(org.dmg.pmml.PMMLDocument) Aggregate(org.dmg.pmml.AggregateDocument.Aggregate) DerivedField(org.dmg.pmml.DerivedFieldDocument.DerivedField)

Example 18 with PMMLDocument

use of org.dmg.pmml.PMMLDocument in project knime-core by knime.

the class RuleEngine2PortsNodeModel method computeRearrangerWithPMML.

/**
 * @param spec
 * @param rules
 * @param flowVars
 * @param ruleIdx
 * @param outcomeIdx
 * @param confidenceIdx
 * @param weightIdx
 * @param validationIdx
 * @param outputColumnName
 * @return
 * @throws InterruptedException
 * @throws InvalidSettingsException
 */
private Pair<ColumnRearranger, PortObject> computeRearrangerWithPMML(final DataTableSpec spec, final RowInput rules, final Map<String, FlowVariable> flowVars, final int ruleIdx, final int outcomeIdx, final int confidenceIdx, final int weightIdx, final int validationIdx, final String outputColumnName) throws InterruptedException, InvalidSettingsException {
    PortObject po;
    ColumnRearranger ret;
    PMMLDocument doc = PMMLDocument.Factory.newInstance();
    final PMML pmmlObj = doc.addNewPMML();
    RuleSetModel ruleSetModel = pmmlObj.addNewRuleSetModel();
    RuleSet ruleSet = ruleSetModel.addNewRuleSet();
    List<DataType> outcomeTypes = new ArrayList<>();
    PMMLRuleParser parser = new PMMLRuleParser(spec, flowVars);
    int lineNo = 0;
    DataRow ruleRow;
    while ((ruleRow = rules.poll()) != null) {
        ++lineNo;
        DataCell rule = ruleRow.getCell(ruleIdx);
        CheckUtils.checkSetting(!rule.isMissing(), "Missing rule in row: " + ruleRow.getKey());
        if (rule instanceof StringValue) {
            StringValue ruleText = (StringValue) rule;
            String r = ruleText.getStringValue().replaceAll("[\r\n]+", " ");
            if (RuleSupport.isComment(r)) {
                continue;
            }
            if (outcomeIdx >= 0) {
                r += " => " + m_settings.asStringFailForMissing(ruleRow.getCell(outcomeIdx));
            }
            ParseState state = new ParseState(r);
            try {
                PMMLPredicate condition = parser.parseBooleanExpression(state);
                SimpleRule simpleRule = ruleSet.addNewSimpleRule();
                setCondition(simpleRule, condition);
                state.skipWS();
                state.consumeText("=>");
                state.skipWS();
                Expression outcome = parser.parseOutcomeOperand(state, null);
                simpleRule.setScore(outcome.toString());
                if (confidenceIdx >= 0) {
                    DataCell confidenceCell = ruleRow.getCell(confidenceIdx);
                    if (!confidenceCell.isMissing()) {
                        if (confidenceCell instanceof DoubleValue) {
                            DoubleValue dv = (DoubleValue) confidenceCell;
                            double confidence = dv.getDoubleValue();
                            simpleRule.setConfidence(confidence);
                        }
                    }
                }
                if (weightIdx >= 0) {
                    DataCell weightCell = ruleRow.getCell(weightIdx);
                    boolean missing = true;
                    if (!weightCell.isMissing()) {
                        if (weightCell instanceof DoubleValue) {
                            DoubleValue dv = (DoubleValue) weightCell;
                            double weight = dv.getDoubleValue();
                            simpleRule.setWeight(weight);
                            missing = false;
                        }
                    }
                    if (missing && m_settings.isHasDefaultWeight()) {
                        simpleRule.setWeight(m_settings.getDefaultWeight());
                    }
                }
                CheckUtils.checkSetting(outcome.isConstant(), "Outcome is not constant in line " + lineNo + " (" + ruleRow.getKey() + ") for rule: " + rule);
                outcomeTypes.add(outcome.getOutputType());
            } catch (ParseException e) {
                ParseException error = Util.addContext(e, r, lineNo);
                throw new InvalidSettingsException("Wrong rule in line: " + ruleRow.getKey() + "\n" + error.getMessage(), error);
            }
        } else {
            CheckUtils.checkSetting(false, "Wrong type (" + rule.getType() + ") of rule: " + rule + "\nin row: " + ruleRow.getKey());
        }
    }
    ColumnRearranger dummy = new ColumnRearranger(spec);
    if (!m_settings.isReplaceColumn()) {
        dummy.append(new SingleCellFactory(new DataColumnSpecCreator(outputColumnName, RuleEngineNodeModel.computeOutputType(outcomeTypes, computeOutcomeType(rules.getDataTableSpec()), true, m_settings.isDisallowLongOutputForCompatibility())).createSpec()) {

            @Override
            public DataCell getCell(final DataRow row) {
                return null;
            }
        });
    }
    PMMLPortObject pmml = createPMMLPortObject(doc, ruleSetModel, ruleSet, parser, dummy.createSpec());
    po = pmml;
    m_copy = copy(pmml);
    String predictionConfidenceColumn = m_settings.getPredictionConfidenceColumn();
    if (predictionConfidenceColumn == null || predictionConfidenceColumn.isEmpty()) {
        predictionConfidenceColumn = RuleEngine2PortsSettings.DEFAULT_PREDICTION_CONFIDENCE_COLUMN;
    }
    ret = PMMLRuleSetPredictorNodeModel.createRearranger(pmml, spec, m_settings.isReplaceColumn(), outputColumnName, m_settings.isComputeConfidence(), DataTableSpec.getUniqueColumnName(dummy.createSpec(), predictionConfidenceColumn), validationIdx);
    return Pair.create(ret, po);
}
Also used : RuleSetModel(org.dmg.pmml.RuleSetModelDocument.RuleSetModel) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) ArrayList(java.util.ArrayList) PMMLRuleParser(org.knime.base.node.rules.engine.pmml.PMMLRuleParser) ParseState(org.knime.base.node.rules.engine.BaseRuleParser.ParseState) DataRow(org.knime.core.data.DataRow) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataType(org.knime.core.data.DataType) StringValue(org.knime.core.data.StringValue) PortObject(org.knime.core.node.port.PortObject) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) InactiveBranchPortObject(org.knime.core.node.port.inactive.InactiveBranchPortObject) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) RuleSet(org.dmg.pmml.RuleSetDocument.RuleSet) PMMLPredicate(org.knime.base.node.mine.decisiontree2.PMMLPredicate) Expression(org.knime.base.node.rules.engine.Expression) DoubleValue(org.knime.core.data.DoubleValue) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PMML(org.dmg.pmml.PMMLDocument.PMML) DataCell(org.knime.core.data.DataCell) PMMLDocument(org.dmg.pmml.PMMLDocument) ParseException(java.text.ParseException)

Example 19 with PMMLDocument

use of org.dmg.pmml.PMMLDocument in project knime-core by knime.

the class PMMLRuleEditorNodeModel method createRearrangerAndPMMLModel.

private RearrangerAndPMMLModel createRearrangerAndPMMLModel(final DataTableSpec spec) throws ParseException, InvalidSettingsException {
    final PMMLDocument doc = PMMLDocument.Factory.newInstance();
    final PMML pmml = doc.addNewPMML();
    RuleSetModel ruleSetModel = pmml.addNewRuleSetModel();
    RuleSet ruleSet = ruleSetModel.addNewRuleSet();
    PMMLRuleParser parser = new PMMLRuleParser(spec, getAvailableInputFlowVariables());
    ColumnRearranger rearranger = createRearranger(spec, ruleSet, parser);
    PMMLPortObject ret = new PMMLPortObject(createPMMLPortObjectSpec(rearranger.createSpec(), parser.getUsedColumns()));
    // if (inData[1] != null) {
    // PMMLPortObject po = (PMMLPortObject)inData[1];
    // TransformationDictionary dict = TransformationDictionary.Factory.newInstance();
    // dict.setDerivedFieldArray(po.getDerivedFields());
    // ret.addGlobalTransformations(dict);
    // }
    PMMLRuleTranslator modelTranslator = new PMMLRuleTranslator();
    ruleSetModel.setFunctionName(MININGFUNCTION.CLASSIFICATION);
    ruleSet.setDefaultConfidence(defaultConfidenceValue());
    PMMLMiningSchemaTranslator.writeMiningSchema(ret.getSpec(), ruleSetModel);
    PMMLDataDictionaryTranslator ddTranslator = new PMMLDataDictionaryTranslator();
    ddTranslator.exportTo(doc, ret.getSpec());
    modelTranslator.initializeFrom(doc);
    ret.addModelTranslater(modelTranslator);
    ret.validate();
    return new RearrangerAndPMMLModel(rearranger, ret);
}
Also used : RuleSetModel(org.dmg.pmml.RuleSetModelDocument.RuleSetModel) RuleSet(org.dmg.pmml.RuleSetDocument.RuleSet) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PMML(org.dmg.pmml.PMMLDocument.PMML) PMMLDocument(org.dmg.pmml.PMMLDocument) PMMLDataDictionaryTranslator(org.knime.core.node.port.pmml.PMMLDataDictionaryTranslator)

Aggregations

PMMLDocument (org.dmg.pmml.PMMLDocument)19 PMML (org.dmg.pmml.PMMLDocument.PMML)7 PMMLPortObject (org.knime.core.node.port.pmml.PMMLPortObject)6 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)5 RuleSet (org.dmg.pmml.RuleSetDocument.RuleSet)4 RuleSetModel (org.dmg.pmml.RuleSetModelDocument.RuleSetModel)4 DataRow (org.knime.core.data.DataRow)3 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)3 PortObject (org.knime.core.node.port.PortObject)3 Document (org.w3c.dom.Document)3 IOException (java.io.IOException)2 BigInteger (java.math.BigInteger)2 ParseException (java.text.ParseException)2 ArrayList (java.util.ArrayList)2 XmlException (org.apache.xmlbeans.XmlException)2 XmlObject (org.apache.xmlbeans.XmlObject)2 DataCell (org.knime.core.data.DataCell)2 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)2 DataTableSpec (org.knime.core.data.DataTableSpec)2 StringValue (org.knime.core.data.StringValue)2