Search in sources :

Example 1 with ParseState

use of org.knime.base.node.rules.engine.BaseRuleParser.ParseState in project knime-core by knime.

the class PMMLRuleParser method parseAndWarn.

/**
 * {@inheritDoc}
 */
@Override
protected boolean parseAndWarn(final RSyntaxDocument doc, final DefaultParseResult res, final boolean wasCatchAllRule, final int line, final String lineText) throws ParseException {
    if (RuleSupport.isComment(lineText)) {
        return false;
    }
    ParseState state = new ParseState(lineText);
    org.knime.base.node.rules.engine.pmml.PMMLRuleParser mainParser = new org.knime.base.node.rules.engine.pmml.PMMLRuleParser(getSpec(), getFlowVariables());
    PMMLPredicate condition = mainParser.parseBooleanExpression(state);
    state.skipWS();
    state.consumeText("=>");
    mainParser.parseOutcomeOperand(state, null);
    if (wasCatchAllRule || (condition instanceof PMMLFalsePredicate)) {
        addWarningNotice(doc, res, wasCatchAllRule, line, lineText);
    }
    return condition instanceof PMMLTruePredicate;
}
Also used : ParseState(org.knime.base.node.rules.engine.BaseRuleParser.ParseState) PMMLPredicate(org.knime.base.node.mine.decisiontree2.PMMLPredicate) PMMLFalsePredicate(org.knime.base.node.mine.decisiontree2.PMMLFalsePredicate) PMMLTruePredicate(org.knime.base.node.mine.decisiontree2.PMMLTruePredicate)

Example 2 with ParseState

use of org.knime.base.node.rules.engine.BaseRuleParser.ParseState in project knime-core by knime.

the class PMMLRuleEditorNodeModel method createRearranger.

/**
 * Creates the {@link ColumnRearranger} that can compute the new column.
 *
 * @param tableSpec The spec of the input table.
 * @param ruleSet The {@link RuleSet} xml object where the rules should be added.
 * @param parser The parser for the rules.
 * @return The {@link ColumnRearranger}.
 * @throws ParseException Problem during parsing.
 * @throws InvalidSettingsException if settings are invalid
 */
private ColumnRearranger createRearranger(final DataTableSpec tableSpec, final RuleSet ruleSet, final PMMLRuleParser parser) throws ParseException, InvalidSettingsException {
    if (m_settings.isAppendColumn() && m_settings.getNewColName().isEmpty()) {
        throw new InvalidSettingsException("No name for prediction column provided");
    }
    Set<String> outcomes = new LinkedHashSet<String>();
    List<DataType> outcomeTypes = new ArrayList<DataType>();
    int line = 0;
    final List<Pair<PMMLPredicate, Expression>> rules = new ArrayList<Pair<PMMLPredicate, Expression>>();
    for (String ruleText : m_settings.rules()) {
        ++line;
        if (RuleSupport.isComment(ruleText)) {
            continue;
        }
        try {
            ParseState state = new ParseState(ruleText);
            PMMLPredicate expression = parser.parseBooleanExpression(state);
            SimpleRule simpleRule = ruleSet.addNewSimpleRule();
            setCondition(simpleRule, expression);
            state.skipWS();
            state.consumeText("=>");
            state.skipWS();
            Expression outcome = parser.parseOutcomeOperand(state, null);
            // Only constants are allowed in the outcomes.
            assert outcome.isConstant() : outcome;
            rules.add(new Pair<PMMLPredicate, Expression>(expression, outcome));
            outcomeTypes.add(outcome.getOutputType());
            simpleRule.setScore(outcome.toString());
            // simpleRule.setConfidence(confidenceForRule(simpleRule, line, ruleText));
            simpleRule.setWeight(weightForRule(simpleRule, line, ruleText));
            outcomes.add(simpleRule.getScore());
        } catch (ParseException e) {
            throw Util.addContext(e, ruleText, line);
        }
    }
    DataType outcomeType = RuleEngineNodeModel.computeOutputType(outcomeTypes, true);
    ColumnRearranger rearranger = new ColumnRearranger(tableSpec);
    DataColumnSpecCreator specProto = new DataColumnSpecCreator(m_settings.isAppendColumn() ? DataTableSpec.getUniqueColumnName(tableSpec, m_settings.getNewColName()) : m_settings.getReplaceColumn(), outcomeType);
    specProto.setDomain(new DataColumnDomainCreator(toCells(outcomes, outcomeType)).createDomain());
    SingleCellFactory cellFactory = new SingleCellFactory(true, specProto.createSpec()) {

        @Override
        public DataCell getCell(final DataRow row) {
            for (Pair<PMMLPredicate, Expression> pair : rules) {
                if (pair.getFirst().evaluate(row, tableSpec) == Boolean.TRUE) {
                    return pair.getSecond().evaluate(row, null).getValue();
                }
            }
            return DataType.getMissingCell();
        }
    };
    if (m_settings.isAppendColumn()) {
        rearranger.append(cellFactory);
    } else {
        rearranger.replace(cellFactory, m_settings.getReplaceColumn());
    }
    return rearranger;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) PMMLPredicate(org.knime.base.node.mine.decisiontree2.PMMLPredicate) ParseState(org.knime.base.node.rules.engine.BaseRuleParser.ParseState) DataRow(org.knime.core.data.DataRow) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) Expression(org.knime.base.node.rules.engine.Expression) DataType(org.knime.core.data.DataType) ParseException(java.text.ParseException) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) Pair(org.knime.core.util.Pair)

Example 3 with ParseState

use of org.knime.base.node.rules.engine.BaseRuleParser.ParseState in project knime-core by knime.

the class RuleEngine2PortsNodeModel method computeRearrangerWithPMML.

/**
 * @param spec
 * @param rules
 * @param flowVars
 * @param ruleIdx
 * @param outcomeIdx
 * @param confidenceIdx
 * @param weightIdx
 * @param validationIdx
 * @param outputColumnName
 * @return
 * @throws InterruptedException
 * @throws InvalidSettingsException
 */
private Pair<ColumnRearranger, PortObject> computeRearrangerWithPMML(final DataTableSpec spec, final RowInput rules, final Map<String, FlowVariable> flowVars, final int ruleIdx, final int outcomeIdx, final int confidenceIdx, final int weightIdx, final int validationIdx, final String outputColumnName) throws InterruptedException, InvalidSettingsException {
    PortObject po;
    ColumnRearranger ret;
    PMMLDocument doc = PMMLDocument.Factory.newInstance();
    final PMML pmmlObj = doc.addNewPMML();
    RuleSetModel ruleSetModel = pmmlObj.addNewRuleSetModel();
    RuleSet ruleSet = ruleSetModel.addNewRuleSet();
    List<DataType> outcomeTypes = new ArrayList<>();
    PMMLRuleParser parser = new PMMLRuleParser(spec, flowVars);
    int lineNo = 0;
    DataRow ruleRow;
    while ((ruleRow = rules.poll()) != null) {
        ++lineNo;
        DataCell rule = ruleRow.getCell(ruleIdx);
        CheckUtils.checkSetting(!rule.isMissing(), "Missing rule in row: " + ruleRow.getKey());
        if (rule instanceof StringValue) {
            StringValue ruleText = (StringValue) rule;
            String r = ruleText.getStringValue().replaceAll("[\r\n]+", " ");
            if (RuleSupport.isComment(r)) {
                continue;
            }
            if (outcomeIdx >= 0) {
                r += " => " + m_settings.asStringFailForMissing(ruleRow.getCell(outcomeIdx));
            }
            ParseState state = new ParseState(r);
            try {
                PMMLPredicate condition = parser.parseBooleanExpression(state);
                SimpleRule simpleRule = ruleSet.addNewSimpleRule();
                setCondition(simpleRule, condition);
                state.skipWS();
                state.consumeText("=>");
                state.skipWS();
                Expression outcome = parser.parseOutcomeOperand(state, null);
                simpleRule.setScore(outcome.toString());
                if (confidenceIdx >= 0) {
                    DataCell confidenceCell = ruleRow.getCell(confidenceIdx);
                    if (!confidenceCell.isMissing()) {
                        if (confidenceCell instanceof DoubleValue) {
                            DoubleValue dv = (DoubleValue) confidenceCell;
                            double confidence = dv.getDoubleValue();
                            simpleRule.setConfidence(confidence);
                        }
                    }
                }
                if (weightIdx >= 0) {
                    DataCell weightCell = ruleRow.getCell(weightIdx);
                    boolean missing = true;
                    if (!weightCell.isMissing()) {
                        if (weightCell instanceof DoubleValue) {
                            DoubleValue dv = (DoubleValue) weightCell;
                            double weight = dv.getDoubleValue();
                            simpleRule.setWeight(weight);
                            missing = false;
                        }
                    }
                    if (missing && m_settings.isHasDefaultWeight()) {
                        simpleRule.setWeight(m_settings.getDefaultWeight());
                    }
                }
                CheckUtils.checkSetting(outcome.isConstant(), "Outcome is not constant in line " + lineNo + " (" + ruleRow.getKey() + ") for rule: " + rule);
                outcomeTypes.add(outcome.getOutputType());
            } catch (ParseException e) {
                ParseException error = Util.addContext(e, r, lineNo);
                throw new InvalidSettingsException("Wrong rule in line: " + ruleRow.getKey() + "\n" + error.getMessage(), error);
            }
        } else {
            CheckUtils.checkSetting(false, "Wrong type (" + rule.getType() + ") of rule: " + rule + "\nin row: " + ruleRow.getKey());
        }
    }
    ColumnRearranger dummy = new ColumnRearranger(spec);
    if (!m_settings.isReplaceColumn()) {
        dummy.append(new SingleCellFactory(new DataColumnSpecCreator(outputColumnName, RuleEngineNodeModel.computeOutputType(outcomeTypes, computeOutcomeType(rules.getDataTableSpec()), true, m_settings.isDisallowLongOutputForCompatibility())).createSpec()) {

            @Override
            public DataCell getCell(final DataRow row) {
                return null;
            }
        });
    }
    PMMLPortObject pmml = createPMMLPortObject(doc, ruleSetModel, ruleSet, parser, dummy.createSpec());
    po = pmml;
    m_copy = copy(pmml);
    String predictionConfidenceColumn = m_settings.getPredictionConfidenceColumn();
    if (predictionConfidenceColumn == null || predictionConfidenceColumn.isEmpty()) {
        predictionConfidenceColumn = RuleEngine2PortsSettings.DEFAULT_PREDICTION_CONFIDENCE_COLUMN;
    }
    ret = PMMLRuleSetPredictorNodeModel.createRearranger(pmml, spec, m_settings.isReplaceColumn(), outputColumnName, m_settings.isComputeConfidence(), DataTableSpec.getUniqueColumnName(dummy.createSpec(), predictionConfidenceColumn), validationIdx);
    return Pair.create(ret, po);
}
Also used : RuleSetModel(org.dmg.pmml.RuleSetModelDocument.RuleSetModel) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) ArrayList(java.util.ArrayList) PMMLRuleParser(org.knime.base.node.rules.engine.pmml.PMMLRuleParser) ParseState(org.knime.base.node.rules.engine.BaseRuleParser.ParseState) DataRow(org.knime.core.data.DataRow) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataType(org.knime.core.data.DataType) StringValue(org.knime.core.data.StringValue) PortObject(org.knime.core.node.port.PortObject) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) InactiveBranchPortObject(org.knime.core.node.port.inactive.InactiveBranchPortObject) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) RuleSet(org.dmg.pmml.RuleSetDocument.RuleSet) PMMLPredicate(org.knime.base.node.mine.decisiontree2.PMMLPredicate) Expression(org.knime.base.node.rules.engine.Expression) DoubleValue(org.knime.core.data.DoubleValue) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PMML(org.dmg.pmml.PMMLDocument.PMML) DataCell(org.knime.core.data.DataCell) PMMLDocument(org.dmg.pmml.PMMLDocument) ParseException(java.text.ParseException)

Example 4 with ParseState

use of org.knime.base.node.rules.engine.BaseRuleParser.ParseState in project knime-core by knime.

the class ParseStateTest method testParseNumber.

/**
 * Tests {@link ParseState#parseNumber()}.
 *
 * @throws ParseException
 *             Should not happen.
 */
@Test
public void testParseNumber() throws ParseException {
    for (int i = 0; i < m_d.length; i++) {
        ParseState ps = m_d[i];
        assertEquals(m_numbers[i], m_numbers[i], ps.parseNumber());
        Double.parseDouble(m_numbers[i]);
    }
    for (int i = 0; i < m_d2.length; i++) {
        ParseState ps = m_d2[i];
        assertEquals(m_partialNumbers[i], m_partialNumberMatches[i], ps.parseNumber());
    }
    assertEquals("3", new ParseState("3 ").parseNumber());
}
Also used : ParseState(org.knime.base.node.rules.engine.BaseRuleParser.ParseState) Test(org.junit.Test)

Example 5 with ParseState

use of org.knime.base.node.rules.engine.BaseRuleParser.ParseState in project knime-core by knime.

the class ParseStateTest method setup.

/**
 * Initialize the test constants.
 */
@org.junit.Before
public void setup() {
    m_empty = new ParseState("");
    m_hello = new ParseState("Hello");
    m_hello1 = new ParseState("   Hello   ");
    m_h = new ParseState("H");
    m_string = new ParseState("\"Hello\"");
    m_stringWithQuote = new ParseState("\"Hello\\\" continue\"");
    m_flowVar = new ParseState("$${S flowvar ok }$$");
    m_flowVarError = new ParseState("$${D flowvar without end");
    m_flowVarError1 = new ParseState("$${S flowvar without end $       ");
    m_flowVarError2 = new ParseState("$${I flowvar without end $   $$");
    m_rowIndex = new ParseState("$$ROWINDEX$$ Hello");
    m_column = new ParseState("$col0$");
    m_column1 = new ParseState("$col1  $");
    m_columnError = new ParseState("$col0 ");
    m_numbers = new String[] { "-4.6", "-Infinity", "Infinity", "3", ".4", ".3E43", ".3E-2" };
    m_d = new ParseState[m_numbers.length];
    for (int i = 0; i < m_numbers.length; i++) {
        m_d[i] = new ParseState(m_numbers[i]);
    }
    m_partialNumbers = new String[] { "-.3E", "-.3E-", ".3E-", "3E", "3e.", "3.e" };
    m_partialNumberMatches = new String[] { "-.3", "-.3", ".3", "3", "3", "3." };
    m_d2 = new ParseState[m_partialNumbers.length];
    for (int i = 0; i < m_partialNumbers.length; i++) {
        m_d2[i] = new ParseState(m_partialNumbers[i]);
    }
}
Also used : ParseState(org.knime.base.node.rules.engine.BaseRuleParser.ParseState)

Aggregations

ParseState (org.knime.base.node.rules.engine.BaseRuleParser.ParseState)5 PMMLPredicate (org.knime.base.node.mine.decisiontree2.PMMLPredicate)3 ParseException (java.text.ParseException)2 ArrayList (java.util.ArrayList)2 SimpleRule (org.dmg.pmml.SimpleRuleDocument.SimpleRule)2 Expression (org.knime.base.node.rules.engine.Expression)2 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)2 DataRow (org.knime.core.data.DataRow)2 DataType (org.knime.core.data.DataType)2 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)2 SingleCellFactory (org.knime.core.data.container.SingleCellFactory)2 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)2 LinkedHashSet (java.util.LinkedHashSet)1 PMMLDocument (org.dmg.pmml.PMMLDocument)1 PMML (org.dmg.pmml.PMMLDocument.PMML)1 RuleSet (org.dmg.pmml.RuleSetDocument.RuleSet)1 RuleSetModel (org.dmg.pmml.RuleSetModelDocument.RuleSetModel)1 Test (org.junit.Test)1 PMMLFalsePredicate (org.knime.base.node.mine.decisiontree2.PMMLFalsePredicate)1 PMMLTruePredicate (org.knime.base.node.mine.decisiontree2.PMMLTruePredicate)1