Search in sources :

Example 6 with SimpleRule

use of org.dmg.pmml.SimpleRuleDocument.SimpleRule in project knime-core by knime.

the class PMMLRuleTranslator method createRule.

/**
 * Converts an xml {@link SimpleRule} to {@link Rule}.
 *
 * @param r An xml {@link SimpleRule}.
 * @return The corresponding {@link Rule} object.
 */
private Rule createRule(final SimpleRule r) {
    PMMLPredicate pred;
    if (r.getTrue() != null) {
        pred = new PMMLTruePredicate();
    } else if (r.getFalse() != null) {
        pred = new PMMLFalsePredicate();
    } else if (r.getCompoundPredicate() != null) {
        CompoundPredicate c = r.getCompoundPredicate();
        pred = parseCompoundPredicate(c);
    } else if (r.getSimplePredicate() != null) {
        pred = parseSimplePredicate(r.getSimplePredicate());
    } else if (r.getSimpleSetPredicate() != null) {
        pred = parseSimpleSetPredicate(r.getSimpleSetPredicate());
    } else {
        throw new UnsupportedOperationException(r.toString());
    }
    final Map<String, ScoreProbabilityAndRecordCount> scores = r.getScoreDistributionList().stream().map(sd -> Pair.create(sd.getValue(), new ScoreProbabilityAndRecordCount(sd.isSetProbability() ? sd.getProbability() : null, sd.getRecordCount()))).collect(Collectors.toMap(Pair::getFirst, Pair::getSecond));
    final Rule ret = new Rule(pred, r.getScore(), r.isSetWeight() ? r.getWeight() : null, r.isSetConfidence() ? r.getConfidence() : null, scores);
    if (r.isSetNbCorrect()) {
        ret.setNbCorrect(r.getNbCorrect());
    }
    if (r.isSetRecordCount()) {
        ret.setRecordCount(r.getRecordCount());
    }
    return ret;
}
Also used : PMMLTruePredicate(org.knime.base.node.mine.decisiontree2.PMMLTruePredicate) AbstractCellFactory(org.knime.core.data.container.AbstractCellFactory) PMMLDocument(org.dmg.pmml.PMMLDocument) CompoundRule(org.dmg.pmml.CompoundRuleDocument.CompoundRule) PMMLMiningSchemaTranslator(org.knime.core.node.port.pmml.PMMLMiningSchemaTranslator) SimpleRuleDocument(org.dmg.pmml.SimpleRuleDocument) PMMLCompoundPredicate(org.knime.base.node.mine.decisiontree2.PMMLCompoundPredicate) PMMLSimplePredicate(org.knime.base.node.mine.decisiontree2.PMMLSimplePredicate) BigDecimal(java.math.BigDecimal) PMML(org.dmg.pmml.PMMLDocument.PMML) PMMLFalsePredicate(org.knime.base.node.mine.decisiontree2.PMMLFalsePredicate) PMMLBooleanOperator(org.knime.base.node.mine.decisiontree2.PMMLBooleanOperator) Map(java.util.Map) SchemaType(org.apache.xmlbeans.SchemaType) PMMLSimpleSetPredicate(org.knime.base.node.mine.decisiontree2.PMMLSimpleSetPredicate) SimplePredicate(org.dmg.pmml.SimplePredicateDocument.SimplePredicate) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) SimpleSetPredicate(org.dmg.pmml.SimpleSetPredicateDocument.SimpleSetPredicate) CompoundPredicate(org.dmg.pmml.CompoundPredicateDocument.CompoundPredicate) Collectors(java.util.stream.Collectors) Value(org.dmg.pmml.ValueDocument.Value) List(java.util.List) DerivedFieldMapper(org.knime.core.node.port.pmml.preproc.DerivedFieldMapper) PMMLTruePredicate(org.knime.base.node.mine.decisiontree2.PMMLTruePredicate) RuleSelectionMethod(org.dmg.pmml.RuleSelectionMethodDocument.RuleSelectionMethod) Entry(java.util.Map.Entry) MININGFUNCTION(org.dmg.pmml.MININGFUNCTION) RuleSetModel(org.dmg.pmml.RuleSetModelDocument.RuleSetModel) PMMLTranslator(org.knime.core.node.port.pmml.PMMLTranslator) PMMLOperator(org.knime.base.node.mine.decisiontree2.PMMLOperator) XmlCursor(org.apache.xmlbeans.XmlCursor) Criterion(org.dmg.pmml.RuleSelectionMethodDocument.RuleSelectionMethod.Criterion) CompoundRuleDocument(org.dmg.pmml.CompoundRuleDocument) RuleSet(org.dmg.pmml.RuleSetDocument.RuleSet) HashMap(java.util.HashMap) Enum(org.dmg.pmml.SimplePredicateDocument.SimplePredicate.Operator.Enum) ArrayList(java.util.ArrayList) Pair(org.knime.core.util.Pair) LinkedHashMap(java.util.LinkedHashMap) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PMMLPredicate(org.knime.base.node.mine.decisiontree2.PMMLPredicate) XmlObject(org.apache.xmlbeans.XmlObject) DataField(org.dmg.pmml.DataFieldDocument.DataField) LinkedList(java.util.LinkedList) PMMLPredicateTranslator(org.knime.base.node.mine.decisiontree2.PMMLPredicateTranslator) PMMLConditionTranslator(org.knime.base.node.mine.decisiontree2.PMMLConditionTranslator) DataDictionary(org.dmg.pmml.DataDictionaryDocument.DataDictionary) BitSet(java.util.BitSet) Collections(java.util.Collections) ScoreDistribution(org.dmg.pmml.ScoreDistributionDocument.ScoreDistribution) PMMLCompoundPredicate(org.knime.base.node.mine.decisiontree2.PMMLCompoundPredicate) CompoundPredicate(org.dmg.pmml.CompoundPredicateDocument.CompoundPredicate) PMMLPredicate(org.knime.base.node.mine.decisiontree2.PMMLPredicate) CompoundRule(org.dmg.pmml.CompoundRuleDocument.CompoundRule) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) PMMLFalsePredicate(org.knime.base.node.mine.decisiontree2.PMMLFalsePredicate)

Example 7 with SimpleRule

use of org.dmg.pmml.SimpleRuleDocument.SimpleRule in project knime-core by knime.

the class RuleEngine2PortsNodeModel method computeRearrangerWithPMML.

/**
 * @param spec
 * @param rules
 * @param flowVars
 * @param ruleIdx
 * @param outcomeIdx
 * @param confidenceIdx
 * @param weightIdx
 * @param validationIdx
 * @param outputColumnName
 * @return
 * @throws InterruptedException
 * @throws InvalidSettingsException
 */
private Pair<ColumnRearranger, PortObject> computeRearrangerWithPMML(final DataTableSpec spec, final RowInput rules, final Map<String, FlowVariable> flowVars, final int ruleIdx, final int outcomeIdx, final int confidenceIdx, final int weightIdx, final int validationIdx, final String outputColumnName) throws InterruptedException, InvalidSettingsException {
    PortObject po;
    ColumnRearranger ret;
    PMMLDocument doc = PMMLDocument.Factory.newInstance();
    final PMML pmmlObj = doc.addNewPMML();
    RuleSetModel ruleSetModel = pmmlObj.addNewRuleSetModel();
    RuleSet ruleSet = ruleSetModel.addNewRuleSet();
    List<DataType> outcomeTypes = new ArrayList<>();
    PMMLRuleParser parser = new PMMLRuleParser(spec, flowVars);
    int lineNo = 0;
    DataRow ruleRow;
    while ((ruleRow = rules.poll()) != null) {
        ++lineNo;
        DataCell rule = ruleRow.getCell(ruleIdx);
        CheckUtils.checkSetting(!rule.isMissing(), "Missing rule in row: " + ruleRow.getKey());
        if (rule instanceof StringValue) {
            StringValue ruleText = (StringValue) rule;
            String r = ruleText.getStringValue().replaceAll("[\r\n]+", " ");
            if (RuleSupport.isComment(r)) {
                continue;
            }
            if (outcomeIdx >= 0) {
                r += " => " + m_settings.asStringFailForMissing(ruleRow.getCell(outcomeIdx));
            }
            ParseState state = new ParseState(r);
            try {
                PMMLPredicate condition = parser.parseBooleanExpression(state);
                SimpleRule simpleRule = ruleSet.addNewSimpleRule();
                setCondition(simpleRule, condition);
                state.skipWS();
                state.consumeText("=>");
                state.skipWS();
                Expression outcome = parser.parseOutcomeOperand(state, null);
                simpleRule.setScore(outcome.toString());
                if (confidenceIdx >= 0) {
                    DataCell confidenceCell = ruleRow.getCell(confidenceIdx);
                    if (!confidenceCell.isMissing()) {
                        if (confidenceCell instanceof DoubleValue) {
                            DoubleValue dv = (DoubleValue) confidenceCell;
                            double confidence = dv.getDoubleValue();
                            simpleRule.setConfidence(confidence);
                        }
                    }
                }
                if (weightIdx >= 0) {
                    DataCell weightCell = ruleRow.getCell(weightIdx);
                    boolean missing = true;
                    if (!weightCell.isMissing()) {
                        if (weightCell instanceof DoubleValue) {
                            DoubleValue dv = (DoubleValue) weightCell;
                            double weight = dv.getDoubleValue();
                            simpleRule.setWeight(weight);
                            missing = false;
                        }
                    }
                    if (missing && m_settings.isHasDefaultWeight()) {
                        simpleRule.setWeight(m_settings.getDefaultWeight());
                    }
                }
                CheckUtils.checkSetting(outcome.isConstant(), "Outcome is not constant in line " + lineNo + " (" + ruleRow.getKey() + ") for rule: " + rule);
                outcomeTypes.add(outcome.getOutputType());
            } catch (ParseException e) {
                ParseException error = Util.addContext(e, r, lineNo);
                throw new InvalidSettingsException("Wrong rule in line: " + ruleRow.getKey() + "\n" + error.getMessage(), error);
            }
        } else {
            CheckUtils.checkSetting(false, "Wrong type (" + rule.getType() + ") of rule: " + rule + "\nin row: " + ruleRow.getKey());
        }
    }
    ColumnRearranger dummy = new ColumnRearranger(spec);
    if (!m_settings.isReplaceColumn()) {
        dummy.append(new SingleCellFactory(new DataColumnSpecCreator(outputColumnName, RuleEngineNodeModel.computeOutputType(outcomeTypes, computeOutcomeType(rules.getDataTableSpec()), true, m_settings.isDisallowLongOutputForCompatibility())).createSpec()) {

            @Override
            public DataCell getCell(final DataRow row) {
                return null;
            }
        });
    }
    PMMLPortObject pmml = createPMMLPortObject(doc, ruleSetModel, ruleSet, parser, dummy.createSpec());
    po = pmml;
    m_copy = copy(pmml);
    String predictionConfidenceColumn = m_settings.getPredictionConfidenceColumn();
    if (predictionConfidenceColumn == null || predictionConfidenceColumn.isEmpty()) {
        predictionConfidenceColumn = RuleEngine2PortsSettings.DEFAULT_PREDICTION_CONFIDENCE_COLUMN;
    }
    ret = PMMLRuleSetPredictorNodeModel.createRearranger(pmml, spec, m_settings.isReplaceColumn(), outputColumnName, m_settings.isComputeConfidence(), DataTableSpec.getUniqueColumnName(dummy.createSpec(), predictionConfidenceColumn), validationIdx);
    return Pair.create(ret, po);
}
Also used : RuleSetModel(org.dmg.pmml.RuleSetModelDocument.RuleSetModel) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) ArrayList(java.util.ArrayList) PMMLRuleParser(org.knime.base.node.rules.engine.pmml.PMMLRuleParser) ParseState(org.knime.base.node.rules.engine.BaseRuleParser.ParseState) DataRow(org.knime.core.data.DataRow) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataType(org.knime.core.data.DataType) StringValue(org.knime.core.data.StringValue) PortObject(org.knime.core.node.port.PortObject) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) InactiveBranchPortObject(org.knime.core.node.port.inactive.InactiveBranchPortObject) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) RuleSet(org.dmg.pmml.RuleSetDocument.RuleSet) PMMLPredicate(org.knime.base.node.mine.decisiontree2.PMMLPredicate) Expression(org.knime.base.node.rules.engine.Expression) DoubleValue(org.knime.core.data.DoubleValue) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PMML(org.dmg.pmml.PMMLDocument.PMML) DataCell(org.knime.core.data.DataCell) PMMLDocument(org.dmg.pmml.PMMLDocument) ParseException(java.text.ParseException)

Example 8 with SimpleRule

use of org.dmg.pmml.SimpleRuleDocument.SimpleRule in project knime-core by knime.

the class PMMLRuleTranslator method collectPredicates.

/**
 * The predicates of a {@link CompoundRule} in the order they appear.
 *
 * @param compoundRule An xml {@link CompoundRule}.
 * @return The flat list of {@link PMMLPredicate}s.
 */
private List<PMMLPredicate> collectPredicates(final CompoundRule compoundRule) {
    List<PMMLPredicate> ret = new ArrayList<PMMLPredicate>();
    XmlCursor cursor = compoundRule.newCursor();
    if (cursor.toFirstChild()) {
        do {
            XmlObject object = cursor.getObject();
            if (object instanceof CompoundRuleDocument.CompoundRule) {
                CompoundRuleDocument.CompoundRule cr = (CompoundRuleDocument.CompoundRule) object;
                ret.addAll(collectPredicates(cr));
            } else if (object instanceof SimpleRule) {
                SimpleRule sr = (SimpleRule) object;
                ret.add(createRule(sr).getCondition());
            } else if (object instanceof SimplePredicate) {
                SimplePredicate sp = (SimplePredicate) object;
                ret.add(parseSimplePredicate(sp));
            } else if (object instanceof CompoundPredicate) {
                CompoundPredicate cp = (CompoundPredicate) object;
                ret.add(parseCompoundPredicate(cp));
            }
        } while (cursor.toNextSibling());
    }
    return ret;
}
Also used : CompoundRule(org.dmg.pmml.CompoundRuleDocument.CompoundRule) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) CompoundRule(org.dmg.pmml.CompoundRuleDocument.CompoundRule) ArrayList(java.util.ArrayList) CompoundRuleDocument(org.dmg.pmml.CompoundRuleDocument) XmlObject(org.apache.xmlbeans.XmlObject) PMMLCompoundPredicate(org.knime.base.node.mine.decisiontree2.PMMLCompoundPredicate) CompoundPredicate(org.dmg.pmml.CompoundPredicateDocument.CompoundPredicate) PMMLPredicate(org.knime.base.node.mine.decisiontree2.PMMLPredicate) PMMLSimplePredicate(org.knime.base.node.mine.decisiontree2.PMMLSimplePredicate) SimplePredicate(org.dmg.pmml.SimplePredicateDocument.SimplePredicate) XmlCursor(org.apache.xmlbeans.XmlCursor)

Example 9 with SimpleRule

use of org.dmg.pmml.SimpleRuleDocument.SimpleRule in project knime-core by knime.

the class PMMLRuleTranslator method createRule.

/**
 * The compound rules are tricky... We have to pull each simple rule out of them in order and find the first simple
 * rule to get the outcome. The result is a simple {@link Rule}.
 *
 * @param compoundRule An xml {@link CompoundRule}.
 * @return The corresponding {@link Rule}.
 */
private Rule createRule(final CompoundRule compoundRule) {
    final LinkedList<PMMLPredicate> predicates = new LinkedList<PMMLPredicate>();
    predicates.addAll(collectPredicates(compoundRule));
    final PMMLCompoundPredicate condition = newCompoundPredicate(PMMLBooleanOperator.AND.toString());
    condition.setPredicates(predicates);
    // This is suspicious, as the later outcomes are discarded, but this is the right thing
    // according to the spec 4.1 (http://www.dmg.org/v4-1/RuleSet.html)
    final SimpleRule firstRule = findFirst(compoundRule);
    if (firstRule == null) {
        throw new IllegalStateException("No SimpleRule was found in " + compoundRule);
    }
    return new Rule(condition, firstRule.getScore(), firstRule.isSetWeight() ? firstRule.getWeight() : null, firstRule.isSetConfidence() ? firstRule.getConfidence() : null);
}
Also used : SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) PMMLPredicate(org.knime.base.node.mine.decisiontree2.PMMLPredicate) CompoundRule(org.dmg.pmml.CompoundRuleDocument.CompoundRule) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) LinkedList(java.util.LinkedList) PMMLCompoundPredicate(org.knime.base.node.mine.decisiontree2.PMMLCompoundPredicate)

Aggregations

SimpleRule (org.dmg.pmml.SimpleRuleDocument.SimpleRule)9 PMMLPredicate (org.knime.base.node.mine.decisiontree2.PMMLPredicate)6 ArrayList (java.util.ArrayList)5 CompoundRule (org.dmg.pmml.CompoundRuleDocument.CompoundRule)5 RuleSetModel (org.dmg.pmml.RuleSetModelDocument.RuleSetModel)4 PMMLCompoundPredicate (org.knime.base.node.mine.decisiontree2.PMMLCompoundPredicate)4 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)4 Entry (java.util.Map.Entry)3 XmlCursor (org.apache.xmlbeans.XmlCursor)3 XmlObject (org.apache.xmlbeans.XmlObject)3 CompoundPredicate (org.dmg.pmml.CompoundPredicateDocument.CompoundPredicate)3 PMMLDocument (org.dmg.pmml.PMMLDocument)3 PMML (org.dmg.pmml.PMMLDocument.PMML)3 RuleSet (org.dmg.pmml.RuleSetDocument.RuleSet)3 ScoreDistribution (org.dmg.pmml.ScoreDistributionDocument.ScoreDistribution)3 SimplePredicate (org.dmg.pmml.SimplePredicateDocument.SimplePredicate)3 PMMLSimplePredicate (org.knime.base.node.mine.decisiontree2.PMMLSimplePredicate)3 DataCell (org.knime.core.data.DataCell)3 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)3 DataRow (org.knime.core.data.DataRow)3