use of org.knime.base.node.rules.engine.BaseRuleParser.ParseState in project knime-core by knime.
the class PMMLRuleParser method parseAndWarn.
/**
* {@inheritDoc}
*/
@Override
protected boolean parseAndWarn(final RSyntaxDocument doc, final DefaultParseResult res, final boolean wasCatchAllRule, final int line, final String lineText) throws ParseException {
if (RuleSupport.isComment(lineText)) {
return false;
}
ParseState state = new ParseState(lineText);
org.knime.base.node.rules.engine.pmml.PMMLRuleParser mainParser = new org.knime.base.node.rules.engine.pmml.PMMLRuleParser(getSpec(), getFlowVariables());
PMMLPredicate condition = mainParser.parseBooleanExpression(state);
state.skipWS();
state.consumeText("=>");
mainParser.parseOutcomeOperand(state, null);
if (wasCatchAllRule || (condition instanceof PMMLFalsePredicate)) {
addWarningNotice(doc, res, wasCatchAllRule, line, lineText);
}
return condition instanceof PMMLTruePredicate;
}
use of org.knime.base.node.rules.engine.BaseRuleParser.ParseState in project knime-core by knime.
the class PMMLRuleEditorNodeModel method createRearranger.
/**
* Creates the {@link ColumnRearranger} that can compute the new column.
*
* @param tableSpec The spec of the input table.
* @param ruleSet The {@link RuleSet} xml object where the rules should be added.
* @param parser The parser for the rules.
* @return The {@link ColumnRearranger}.
* @throws ParseException Problem during parsing.
* @throws InvalidSettingsException if settings are invalid
*/
private ColumnRearranger createRearranger(final DataTableSpec tableSpec, final RuleSet ruleSet, final PMMLRuleParser parser) throws ParseException, InvalidSettingsException {
if (m_settings.isAppendColumn() && m_settings.getNewColName().isEmpty()) {
throw new InvalidSettingsException("No name for prediction column provided");
}
Set<String> outcomes = new LinkedHashSet<String>();
List<DataType> outcomeTypes = new ArrayList<DataType>();
int line = 0;
final List<Pair<PMMLPredicate, Expression>> rules = new ArrayList<Pair<PMMLPredicate, Expression>>();
for (String ruleText : m_settings.rules()) {
++line;
if (RuleSupport.isComment(ruleText)) {
continue;
}
try {
ParseState state = new ParseState(ruleText);
PMMLPredicate expression = parser.parseBooleanExpression(state);
SimpleRule simpleRule = ruleSet.addNewSimpleRule();
setCondition(simpleRule, expression);
state.skipWS();
state.consumeText("=>");
state.skipWS();
Expression outcome = parser.parseOutcomeOperand(state, null);
// Only constants are allowed in the outcomes.
assert outcome.isConstant() : outcome;
rules.add(new Pair<PMMLPredicate, Expression>(expression, outcome));
outcomeTypes.add(outcome.getOutputType());
simpleRule.setScore(outcome.toString());
// simpleRule.setConfidence(confidenceForRule(simpleRule, line, ruleText));
simpleRule.setWeight(weightForRule(simpleRule, line, ruleText));
outcomes.add(simpleRule.getScore());
} catch (ParseException e) {
throw Util.addContext(e, ruleText, line);
}
}
DataType outcomeType = RuleEngineNodeModel.computeOutputType(outcomeTypes, true);
ColumnRearranger rearranger = new ColumnRearranger(tableSpec);
DataColumnSpecCreator specProto = new DataColumnSpecCreator(m_settings.isAppendColumn() ? DataTableSpec.getUniqueColumnName(tableSpec, m_settings.getNewColName()) : m_settings.getReplaceColumn(), outcomeType);
specProto.setDomain(new DataColumnDomainCreator(toCells(outcomes, outcomeType)).createDomain());
SingleCellFactory cellFactory = new SingleCellFactory(true, specProto.createSpec()) {
@Override
public DataCell getCell(final DataRow row) {
for (Pair<PMMLPredicate, Expression> pair : rules) {
if (pair.getFirst().evaluate(row, tableSpec) == Boolean.TRUE) {
return pair.getSecond().evaluate(row, null).getValue();
}
}
return DataType.getMissingCell();
}
};
if (m_settings.isAppendColumn()) {
rearranger.append(cellFactory);
} else {
rearranger.replace(cellFactory, m_settings.getReplaceColumn());
}
return rearranger;
}
use of org.knime.base.node.rules.engine.BaseRuleParser.ParseState in project knime-core by knime.
the class RuleEngine2PortsNodeModel method computeRearrangerWithPMML.
/**
* @param spec
* @param rules
* @param flowVars
* @param ruleIdx
* @param outcomeIdx
* @param confidenceIdx
* @param weightIdx
* @param validationIdx
* @param outputColumnName
* @return
* @throws InterruptedException
* @throws InvalidSettingsException
*/
private Pair<ColumnRearranger, PortObject> computeRearrangerWithPMML(final DataTableSpec spec, final RowInput rules, final Map<String, FlowVariable> flowVars, final int ruleIdx, final int outcomeIdx, final int confidenceIdx, final int weightIdx, final int validationIdx, final String outputColumnName) throws InterruptedException, InvalidSettingsException {
PortObject po;
ColumnRearranger ret;
PMMLDocument doc = PMMLDocument.Factory.newInstance();
final PMML pmmlObj = doc.addNewPMML();
RuleSetModel ruleSetModel = pmmlObj.addNewRuleSetModel();
RuleSet ruleSet = ruleSetModel.addNewRuleSet();
List<DataType> outcomeTypes = new ArrayList<>();
PMMLRuleParser parser = new PMMLRuleParser(spec, flowVars);
int lineNo = 0;
DataRow ruleRow;
while ((ruleRow = rules.poll()) != null) {
++lineNo;
DataCell rule = ruleRow.getCell(ruleIdx);
CheckUtils.checkSetting(!rule.isMissing(), "Missing rule in row: " + ruleRow.getKey());
if (rule instanceof StringValue) {
StringValue ruleText = (StringValue) rule;
String r = ruleText.getStringValue().replaceAll("[\r\n]+", " ");
if (RuleSupport.isComment(r)) {
continue;
}
if (outcomeIdx >= 0) {
r += " => " + m_settings.asStringFailForMissing(ruleRow.getCell(outcomeIdx));
}
ParseState state = new ParseState(r);
try {
PMMLPredicate condition = parser.parseBooleanExpression(state);
SimpleRule simpleRule = ruleSet.addNewSimpleRule();
setCondition(simpleRule, condition);
state.skipWS();
state.consumeText("=>");
state.skipWS();
Expression outcome = parser.parseOutcomeOperand(state, null);
simpleRule.setScore(outcome.toString());
if (confidenceIdx >= 0) {
DataCell confidenceCell = ruleRow.getCell(confidenceIdx);
if (!confidenceCell.isMissing()) {
if (confidenceCell instanceof DoubleValue) {
DoubleValue dv = (DoubleValue) confidenceCell;
double confidence = dv.getDoubleValue();
simpleRule.setConfidence(confidence);
}
}
}
if (weightIdx >= 0) {
DataCell weightCell = ruleRow.getCell(weightIdx);
boolean missing = true;
if (!weightCell.isMissing()) {
if (weightCell instanceof DoubleValue) {
DoubleValue dv = (DoubleValue) weightCell;
double weight = dv.getDoubleValue();
simpleRule.setWeight(weight);
missing = false;
}
}
if (missing && m_settings.isHasDefaultWeight()) {
simpleRule.setWeight(m_settings.getDefaultWeight());
}
}
CheckUtils.checkSetting(outcome.isConstant(), "Outcome is not constant in line " + lineNo + " (" + ruleRow.getKey() + ") for rule: " + rule);
outcomeTypes.add(outcome.getOutputType());
} catch (ParseException e) {
ParseException error = Util.addContext(e, r, lineNo);
throw new InvalidSettingsException("Wrong rule in line: " + ruleRow.getKey() + "\n" + error.getMessage(), error);
}
} else {
CheckUtils.checkSetting(false, "Wrong type (" + rule.getType() + ") of rule: " + rule + "\nin row: " + ruleRow.getKey());
}
}
ColumnRearranger dummy = new ColumnRearranger(spec);
if (!m_settings.isReplaceColumn()) {
dummy.append(new SingleCellFactory(new DataColumnSpecCreator(outputColumnName, RuleEngineNodeModel.computeOutputType(outcomeTypes, computeOutcomeType(rules.getDataTableSpec()), true, m_settings.isDisallowLongOutputForCompatibility())).createSpec()) {
@Override
public DataCell getCell(final DataRow row) {
return null;
}
});
}
PMMLPortObject pmml = createPMMLPortObject(doc, ruleSetModel, ruleSet, parser, dummy.createSpec());
po = pmml;
m_copy = copy(pmml);
String predictionConfidenceColumn = m_settings.getPredictionConfidenceColumn();
if (predictionConfidenceColumn == null || predictionConfidenceColumn.isEmpty()) {
predictionConfidenceColumn = RuleEngine2PortsSettings.DEFAULT_PREDICTION_CONFIDENCE_COLUMN;
}
ret = PMMLRuleSetPredictorNodeModel.createRearranger(pmml, spec, m_settings.isReplaceColumn(), outputColumnName, m_settings.isComputeConfidence(), DataTableSpec.getUniqueColumnName(dummy.createSpec(), predictionConfidenceColumn), validationIdx);
return Pair.create(ret, po);
}
use of org.knime.base.node.rules.engine.BaseRuleParser.ParseState in project knime-core by knime.
the class ParseStateTest method testParseNumber.
/**
* Tests {@link ParseState#parseNumber()}.
*
* @throws ParseException
* Should not happen.
*/
@Test
public void testParseNumber() throws ParseException {
for (int i = 0; i < m_d.length; i++) {
ParseState ps = m_d[i];
assertEquals(m_numbers[i], m_numbers[i], ps.parseNumber());
Double.parseDouble(m_numbers[i]);
}
for (int i = 0; i < m_d2.length; i++) {
ParseState ps = m_d2[i];
assertEquals(m_partialNumbers[i], m_partialNumberMatches[i], ps.parseNumber());
}
assertEquals("3", new ParseState("3 ").parseNumber());
}
use of org.knime.base.node.rules.engine.BaseRuleParser.ParseState in project knime-core by knime.
the class ParseStateTest method setup.
/**
* Initialize the test constants.
*/
@org.junit.Before
public void setup() {
m_empty = new ParseState("");
m_hello = new ParseState("Hello");
m_hello1 = new ParseState(" Hello ");
m_h = new ParseState("H");
m_string = new ParseState("\"Hello\"");
m_stringWithQuote = new ParseState("\"Hello\\\" continue\"");
m_flowVar = new ParseState("$${S flowvar ok }$$");
m_flowVarError = new ParseState("$${D flowvar without end");
m_flowVarError1 = new ParseState("$${S flowvar without end $ ");
m_flowVarError2 = new ParseState("$${I flowvar without end $ $$");
m_rowIndex = new ParseState("$$ROWINDEX$$ Hello");
m_column = new ParseState("$col0$");
m_column1 = new ParseState("$col1 $");
m_columnError = new ParseState("$col0 ");
m_numbers = new String[] { "-4.6", "-Infinity", "Infinity", "3", ".4", ".3E43", ".3E-2" };
m_d = new ParseState[m_numbers.length];
for (int i = 0; i < m_numbers.length; i++) {
m_d[i] = new ParseState(m_numbers[i]);
}
m_partialNumbers = new String[] { "-.3E", "-.3E-", ".3E-", "3E", "3e.", "3.e" };
m_partialNumberMatches = new String[] { "-.3", "-.3", ".3", "3", "3", "3." };
m_d2 = new ParseState[m_partialNumbers.length];
for (int i = 0; i < m_partialNumbers.length; i++) {
m_d2[i] = new ParseState(m_partialNumbers[i]);
}
}
Aggregations