use of org.dmg.pmml.SimpleRuleDocument.SimpleRule in project knime-core by knime.
the class PMMLRuleTranslator method createRule.
/**
* Converts an xml {@link SimpleRule} to {@link Rule}.
*
* @param r An xml {@link SimpleRule}.
* @return The corresponding {@link Rule} object.
*/
private Rule createRule(final SimpleRule r) {
PMMLPredicate pred;
if (r.getTrue() != null) {
pred = new PMMLTruePredicate();
} else if (r.getFalse() != null) {
pred = new PMMLFalsePredicate();
} else if (r.getCompoundPredicate() != null) {
CompoundPredicate c = r.getCompoundPredicate();
pred = parseCompoundPredicate(c);
} else if (r.getSimplePredicate() != null) {
pred = parseSimplePredicate(r.getSimplePredicate());
} else if (r.getSimpleSetPredicate() != null) {
pred = parseSimpleSetPredicate(r.getSimpleSetPredicate());
} else {
throw new UnsupportedOperationException(r.toString());
}
final Map<String, ScoreProbabilityAndRecordCount> scores = r.getScoreDistributionList().stream().map(sd -> Pair.create(sd.getValue(), new ScoreProbabilityAndRecordCount(sd.isSetProbability() ? sd.getProbability() : null, sd.getRecordCount()))).collect(Collectors.toMap(Pair::getFirst, Pair::getSecond));
final Rule ret = new Rule(pred, r.getScore(), r.isSetWeight() ? r.getWeight() : null, r.isSetConfidence() ? r.getConfidence() : null, scores);
if (r.isSetNbCorrect()) {
ret.setNbCorrect(r.getNbCorrect());
}
if (r.isSetRecordCount()) {
ret.setRecordCount(r.getRecordCount());
}
return ret;
}
use of org.dmg.pmml.SimpleRuleDocument.SimpleRule in project knime-core by knime.
the class RuleEngine2PortsNodeModel method computeRearrangerWithPMML.
/**
* @param spec
* @param rules
* @param flowVars
* @param ruleIdx
* @param outcomeIdx
* @param confidenceIdx
* @param weightIdx
* @param validationIdx
* @param outputColumnName
* @return
* @throws InterruptedException
* @throws InvalidSettingsException
*/
private Pair<ColumnRearranger, PortObject> computeRearrangerWithPMML(final DataTableSpec spec, final RowInput rules, final Map<String, FlowVariable> flowVars, final int ruleIdx, final int outcomeIdx, final int confidenceIdx, final int weightIdx, final int validationIdx, final String outputColumnName) throws InterruptedException, InvalidSettingsException {
PortObject po;
ColumnRearranger ret;
PMMLDocument doc = PMMLDocument.Factory.newInstance();
final PMML pmmlObj = doc.addNewPMML();
RuleSetModel ruleSetModel = pmmlObj.addNewRuleSetModel();
RuleSet ruleSet = ruleSetModel.addNewRuleSet();
List<DataType> outcomeTypes = new ArrayList<>();
PMMLRuleParser parser = new PMMLRuleParser(spec, flowVars);
int lineNo = 0;
DataRow ruleRow;
while ((ruleRow = rules.poll()) != null) {
++lineNo;
DataCell rule = ruleRow.getCell(ruleIdx);
CheckUtils.checkSetting(!rule.isMissing(), "Missing rule in row: " + ruleRow.getKey());
if (rule instanceof StringValue) {
StringValue ruleText = (StringValue) rule;
String r = ruleText.getStringValue().replaceAll("[\r\n]+", " ");
if (RuleSupport.isComment(r)) {
continue;
}
if (outcomeIdx >= 0) {
r += " => " + m_settings.asStringFailForMissing(ruleRow.getCell(outcomeIdx));
}
ParseState state = new ParseState(r);
try {
PMMLPredicate condition = parser.parseBooleanExpression(state);
SimpleRule simpleRule = ruleSet.addNewSimpleRule();
setCondition(simpleRule, condition);
state.skipWS();
state.consumeText("=>");
state.skipWS();
Expression outcome = parser.parseOutcomeOperand(state, null);
simpleRule.setScore(outcome.toString());
if (confidenceIdx >= 0) {
DataCell confidenceCell = ruleRow.getCell(confidenceIdx);
if (!confidenceCell.isMissing()) {
if (confidenceCell instanceof DoubleValue) {
DoubleValue dv = (DoubleValue) confidenceCell;
double confidence = dv.getDoubleValue();
simpleRule.setConfidence(confidence);
}
}
}
if (weightIdx >= 0) {
DataCell weightCell = ruleRow.getCell(weightIdx);
boolean missing = true;
if (!weightCell.isMissing()) {
if (weightCell instanceof DoubleValue) {
DoubleValue dv = (DoubleValue) weightCell;
double weight = dv.getDoubleValue();
simpleRule.setWeight(weight);
missing = false;
}
}
if (missing && m_settings.isHasDefaultWeight()) {
simpleRule.setWeight(m_settings.getDefaultWeight());
}
}
CheckUtils.checkSetting(outcome.isConstant(), "Outcome is not constant in line " + lineNo + " (" + ruleRow.getKey() + ") for rule: " + rule);
outcomeTypes.add(outcome.getOutputType());
} catch (ParseException e) {
ParseException error = Util.addContext(e, r, lineNo);
throw new InvalidSettingsException("Wrong rule in line: " + ruleRow.getKey() + "\n" + error.getMessage(), error);
}
} else {
CheckUtils.checkSetting(false, "Wrong type (" + rule.getType() + ") of rule: " + rule + "\nin row: " + ruleRow.getKey());
}
}
ColumnRearranger dummy = new ColumnRearranger(spec);
if (!m_settings.isReplaceColumn()) {
dummy.append(new SingleCellFactory(new DataColumnSpecCreator(outputColumnName, RuleEngineNodeModel.computeOutputType(outcomeTypes, computeOutcomeType(rules.getDataTableSpec()), true, m_settings.isDisallowLongOutputForCompatibility())).createSpec()) {
@Override
public DataCell getCell(final DataRow row) {
return null;
}
});
}
PMMLPortObject pmml = createPMMLPortObject(doc, ruleSetModel, ruleSet, parser, dummy.createSpec());
po = pmml;
m_copy = copy(pmml);
String predictionConfidenceColumn = m_settings.getPredictionConfidenceColumn();
if (predictionConfidenceColumn == null || predictionConfidenceColumn.isEmpty()) {
predictionConfidenceColumn = RuleEngine2PortsSettings.DEFAULT_PREDICTION_CONFIDENCE_COLUMN;
}
ret = PMMLRuleSetPredictorNodeModel.createRearranger(pmml, spec, m_settings.isReplaceColumn(), outputColumnName, m_settings.isComputeConfidence(), DataTableSpec.getUniqueColumnName(dummy.createSpec(), predictionConfidenceColumn), validationIdx);
return Pair.create(ret, po);
}
use of org.dmg.pmml.SimpleRuleDocument.SimpleRule in project knime-core by knime.
the class PMMLRuleTranslator method collectPredicates.
/**
* The predicates of a {@link CompoundRule} in the order they appear.
*
* @param compoundRule An xml {@link CompoundRule}.
* @return The flat list of {@link PMMLPredicate}s.
*/
private List<PMMLPredicate> collectPredicates(final CompoundRule compoundRule) {
List<PMMLPredicate> ret = new ArrayList<PMMLPredicate>();
XmlCursor cursor = compoundRule.newCursor();
if (cursor.toFirstChild()) {
do {
XmlObject object = cursor.getObject();
if (object instanceof CompoundRuleDocument.CompoundRule) {
CompoundRuleDocument.CompoundRule cr = (CompoundRuleDocument.CompoundRule) object;
ret.addAll(collectPredicates(cr));
} else if (object instanceof SimpleRule) {
SimpleRule sr = (SimpleRule) object;
ret.add(createRule(sr).getCondition());
} else if (object instanceof SimplePredicate) {
SimplePredicate sp = (SimplePredicate) object;
ret.add(parseSimplePredicate(sp));
} else if (object instanceof CompoundPredicate) {
CompoundPredicate cp = (CompoundPredicate) object;
ret.add(parseCompoundPredicate(cp));
}
} while (cursor.toNextSibling());
}
return ret;
}
use of org.dmg.pmml.SimpleRuleDocument.SimpleRule in project knime-core by knime.
the class PMMLRuleTranslator method createRule.
/**
* The compound rules are tricky... We have to pull each simple rule out of them in order and find the first simple
* rule to get the outcome. The result is a simple {@link Rule}.
*
* @param compoundRule An xml {@link CompoundRule}.
* @return The corresponding {@link Rule}.
*/
private Rule createRule(final CompoundRule compoundRule) {
final LinkedList<PMMLPredicate> predicates = new LinkedList<PMMLPredicate>();
predicates.addAll(collectPredicates(compoundRule));
final PMMLCompoundPredicate condition = newCompoundPredicate(PMMLBooleanOperator.AND.toString());
condition.setPredicates(predicates);
// This is suspicious, as the later outcomes are discarded, but this is the right thing
// according to the spec 4.1 (http://www.dmg.org/v4-1/RuleSet.html)
final SimpleRule firstRule = findFirst(compoundRule);
if (firstRule == null) {
throw new IllegalStateException("No SimpleRule was found in " + compoundRule);
}
return new Rule(condition, firstRule.getScore(), firstRule.isSetWeight() ? firstRule.getWeight() : null, firstRule.isSetConfidence() ? firstRule.getConfidence() : null);
}
Aggregations