Search in sources :

Example 1 with DataTableSpec

use of org.knime.core.data.DataTableSpec in project knime-core by knime.

the class RuleSetToTable method execute.

/**
 * Performs the conversion.
 *
 * @param exec An {@link ExecutionContext}.
 * @param pmmlPo The input {@link PMMLPortObject}.
 * @return The created {@link BufferedDataTable}.
 * @throws CanceledExecutionException Execition was cancelled.
 * @throws InvalidSettingsException No or more than one RuleSet model is in the PMML input.
 */
public BufferedDataTable execute(final ExecutionContext exec, final PMMLPortObject pmmlPo) throws CanceledExecutionException, InvalidSettingsException {
    // TODO should the rule selection method be an output flow variable?
    if (pmmlPo.getPMMLValue().getModels(PMMLModelType.RuleSetModel).size() != 1) {
        throw new InvalidSettingsException("Only a single RuleSet model is supported.");
    }
    PMMLRuleTranslator ruleTranslator = new PMMLRuleTranslator();
    pmmlPo.initializeModelTranslator(ruleTranslator);
    List<Rule> rules = ruleTranslator.getRules();
    final DataTableSpec confSpec = configure(pmmlPo.getSpec());
    final List<String> scoreValues = new ArrayList<>();
    final DataTableSpec properSpec = confSpec != null ? confSpec : properSpec(rules, scoreValues);
    BufferedDataContainer container = exec.createDataContainer(properSpec);
    List<DataColumnSpec> targetCols = pmmlPo.getSpec().getTargetCols();
    DataType outcomeType = targetCols.get(0).getType();
    long idx = 0L;
    int rulesSize = rules.size();
    Map<String, DataType> types = new LinkedHashMap<>();
    for (DataColumnSpec col : pmmlPo.getSpec().getLearningCols()) {
        types.put(col.getName(), col.getType());
    }
    for (Rule rule : rules) {
        exec.checkCanceled();
        exec.setProgress(1.0 * idx++ / rulesSize);
        container.addRowToTable(new DefaultRow(RowKey.createRowKey(idx), createRow(rule, outcomeType, types, scoreValues)));
    }
    container.close();
    return container.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) ArrayList(java.util.ArrayList) PMMLRuleTranslator(org.knime.base.node.rules.engine.pmml.PMMLRuleTranslator) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataType(org.knime.core.data.DataType) Rule(org.knime.base.node.rules.engine.pmml.PMMLRuleTranslator.Rule) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 2 with DataTableSpec

use of org.knime.core.data.DataTableSpec in project knime-core by knime.

the class RuleEngine2PortsNodeDialog method loadSettingsFrom.

/**
 * {@inheritDoc}
 */
@Override
protected void loadSettingsFrom(final NodeSettingsRO settings, final PortObjectSpec[] specs) throws NotConfigurableException {
    super.loadSettingsFrom(settings, specs);
    final DataTableSpec inSpec = (DataTableSpec) specs[0], secondSpec = (DataTableSpec) specs[1];
    m_dataSpec = inSpec;
    m_appendColumn.setText(getSettings().getAppendColumn());
    m_replaceColumn.update(inSpec, getSettings().getReplaceColumn());
    m_replace.setSelected(getSettings().isReplaceColumn());
    m_pmml.setSelected(getSettings().isPMMLRuleSet());
    m_ruleSelectionMethod.setSelectedItem(getSettings().getRuleSelectionMethod());
    m_hasDefaultScore.setSelected(getSettings().isHasDefaultScore());
    m_defaultScore.setText(getSettings().getDefaultScore());
    m_hasDefaultConfidence.setSelected(getSettings().isHasDefaultConfidence());
    m_defaultConfidence.setValue(getSettings().getDefaultConfidence());
    m_ruleConfidenceColumn.update(secondSpec, getSettings().getRuleConfidenceColumn());
    m_hasDefaultWeight.setSelected(getSettings().isHasDefaultWeight());
    m_defaultWeight.setValue(getSettings().getDefaultWeight());
    m_ruleWeightColumn.update(secondSpec, getSettings().getRuleWeightColumn());
    m_computeConfidence.setSelected(getSettings().isComputeConfidence());
    m_predictionConfidenceColumn.setText(getSettings().getPredictionConfidenceColumn());
    m_provideStatistics.setSelected(getSettings().isProvideStatistics());
    m_validationColumn.update(inSpec, getSettings().getValidateColumn());
    setEnabled();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec)

Example 3 with DataTableSpec

use of org.knime.core.data.DataTableSpec in project knime-core by knime.

the class RuleEngine2PortsNodeModel method autoGuessRuleColumnName.

/**
 * Tries to autoguess the rule column name. In case it fails because of ambiguity it returns a warning message,
 * otherwise it fails with {@link InvalidSettingsException}.
 *
 * @param inSpecs The input specs.
 * @param settings The {@link RuleEngine2PortsSimpleSettings} for the node model.
 * @return The warning message (or {@code null} if everything is fine).
 * @throws InvalidSettingsException Cannot find String-valued column for rules.
 */
static String autoGuessRuleColumnName(final PortObjectSpec[] inSpecs, final RuleEngine2PortsSimpleSettings settings) throws InvalidSettingsException {
    // check spec with selected column
    String ruleColumn = settings.getRuleColumn();
    DataTableSpec ruleSpec = (DataTableSpec) inSpecs[RULE_PORT];
    if (ruleSpec == null) {
        throw new InvalidSettingsException("Rule table specification is not available.");
    }
    DataColumnSpec columnSpec = ruleSpec.getColumnSpec(ruleColumn);
    boolean isValid = columnSpec != null && columnSpec.getType().isCompatible(StringValue.class);
    CheckUtils.checkSetting(ruleColumn == null || isValid, "Rule column \"" + ruleColumn + "\" not found or incompatible");
    if (ruleColumn == null) {
        // auto-guessing
        assert !isValid : "No class column set but valid configuration";
        // get the first useful one starting at the end of the table
        for (int i = ruleSpec.getNumColumns(); i-- > 0; ) {
            if (ruleSpec.getColumnSpec(i).getType().isCompatible(StringValue.class)) {
                settings.setRuleColumn(ruleSpec.getColumnSpec(i).getName());
                return "Guessing target column: \"" + settings.getRuleColumn() + "\".";
            }
        }
        CheckUtils.checkSetting(false, "Rules table contains no String column for rules.");
    }
    return null;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringValue(org.knime.core.data.StringValue)

Example 4 with DataTableSpec

use of org.knime.core.data.DataTableSpec in project knime-core by knime.

the class RuleEngine2PortsNodeModel method createStreamableOperator.

/**
 * {@inheritDoc}
 */
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    return new StreamableOperator() {

        private StreamInternalWithPortObject m_internals;

        /**
         * {@inheritDoc}
         */
        @Override
        public void loadInternals(final StreamableOperatorInternals internals) {
            m_internals = (StreamInternalWithPortObject) internals;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public void runIntermediate(final PortInput[] inputs, final ExecutionContext exec) throws Exception {
            // count number of rows
            long count = 0;
            final RowInput rowInput = (RowInput) inputs[DATA_PORT];
            while (rowInput.poll() != null) {
                count++;
            }
            if (inputs[RULE_PORT] instanceof RowInput) {
                final RowInput ruleInput = (RowInput) inputs[RULE_PORT];
                final Pair<ColumnRearranger, PortObject> pair = createColumnRearranger(rowInput.getDataTableSpec(), ruleInput);
                final ColumnRearranger rearranger = pair.getFirst();
                final DataTableSpec spec = rearranger.createSpec();
                m_internals.setTableSpec(spec);
                if (pair.getSecond() instanceof PMMLPortObject) {
                    PMMLPortObject po = (PMMLPortObject) pair.getSecond();
                    m_internals.setObject(po);
                } else {
                    m_internals.setObject(null);
                }
            }
            m_internals.setRowCount(count);
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public StreamableOperatorInternals saveInternals() {
            return m_internals;
        }

        @Override
        public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
            if (m_internals.getTableSpec() != null) {
                m_rowCount = m_internals.getRowCount();
            }
            final Pair<ColumnRearranger, PortObject> pair = createColumnRearranger((DataTableSpec) inSpecs[DATA_PORT], (RowInput) inputs[RULE_PORT]);
            pair.getFirst().createStreamableFunction(0, 0).runFinal(inputs, outputs, exec);
            if (pair.getSecond() != null) {
                ((PortObjectOutput) outputs[1]).setPortObject(pair.getSecond());
            }
        }
    };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) StreamableOperator(org.knime.core.node.streamable.StreamableOperator) StreamableOperatorInternals(org.knime.core.node.streamable.StreamableOperatorInternals) DataTableRowInput(org.knime.core.node.streamable.DataTableRowInput) RowInput(org.knime.core.node.streamable.RowInput) PortObject(org.knime.core.node.port.PortObject) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) InactiveBranchPortObject(org.knime.core.node.port.inactive.InactiveBranchPortObject) PortObjectOutput(org.knime.core.node.streamable.PortObjectOutput)

Example 5 with DataTableSpec

use of org.knime.core.data.DataTableSpec in project knime-core by knime.

the class RuleEngineFilter2PortsNodeModel method createStreamableOperator.

/**
 * {@inheritDoc}
 */
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    return new StreamableOperator() {

        private SimpleStreamableOperatorInternals m_internals;

        /**
         * {@inheritDoc}
         */
        @Override
        public void loadInternals(final StreamableOperatorInternals internals) {
            m_internals = (SimpleStreamableOperatorInternals) internals;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public void runIntermediate(final PortInput[] inputs, final ExecutionContext exec) throws Exception {
            // count number of rows
            long count = 0;
            RowInput rowInput = (RowInput) inputs[DATA_PORT];
            while (rowInput.poll() != null) {
                count++;
            }
            m_internals.getConfig().addLong(CFG_ROW_COUNT, count);
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public StreamableOperatorInternals saveInternals() {
            return m_internals;
        }

        @Override
        public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
            long rowCount = -1L;
            if (m_internals.getConfig().containsKey(CFG_ROW_COUNT)) {
                rowCount = m_internals.getConfig().getLong(CFG_ROW_COUNT);
            }
            m_rulesList.clear();
            final PortInput rulePort = inputs[RULE_PORT];
            if (rulePort instanceof PortObjectInput) {
                PortObjectInput poRule = (PortObjectInput) rulePort;
                m_rulesList.addAll(RuleEngineVariable2PortsNodeModel.rules((BufferedDataTable) poRule.getPortObject(), m_settings, RuleNodeSettings.RuleFilter));
            } else if (rulePort instanceof RowInput) {
                RowInput riRule = (RowInput) rulePort;
                m_rulesList.addAll(RuleEngineVariable2PortsNodeModel.rules(riRule, m_settings, RuleNodeSettings.RuleFilter));
            }
            final DataTableSpec spec = (DataTableSpec) inSpecs[DATA_PORT];
            try {
                parseRules(spec, RuleNodeSettings.RuleSplitter);
            } catch (final ParseException e) {
                throw new InvalidSettingsException(e);
            }
            final RowInput inputPartitions = (RowInput) inputs[DATA_PORT];
            final List<Rule> rules = parseRules(inputPartitions.getDataTableSpec(), RuleNodeSettings.RuleFilter);
            final RowOutput first = (RowOutput) outputs[0];
            final int nrOutPorts = getNrOutPorts();
            final RowOutput second = nrOutPorts > 1 ? (RowOutput) outputs[1] : new RowOutput() {

                @Override
                public void push(final DataRow row) throws InterruptedException {
                // do nothing
                }

                @Override
                public void close() throws InterruptedException {
                // do nothing
                }
            };
            final RowOutput[] containers = new RowOutput[] { first, second };
            final int matchIndex = m_includeOnMatch ? 0 : 1;
            final int otherIndex = 1 - matchIndex;
            try {
                final MutableLong rowIdx = new MutableLong(0L);
                final long rows = rowCount;
                final VariableProvider provider = new VariableProvider() {

                    @Override
                    public Object readVariable(final String name, final Class<?> type) {
                        return RuleEngineFilter2PortsNodeModel.this.readVariable(name, type);
                    }

                    @Override
                    @Deprecated
                    public int getRowCount() {
                        throw new UnsupportedOperationException();
                    }

                    @Override
                    public long getRowCountLong() {
                        return rows;
                    }

                    @Override
                    @Deprecated
                    public int getRowIndex() {
                        throw new UnsupportedOperationException();
                    }

                    @Override
                    public long getRowIndexLong() {
                        return rowIdx.longValue();
                    }
                };
                DataRow row;
                while ((row = inputPartitions.poll()) != null) {
                    rowIdx.increment();
                    if (rows > 0) {
                        exec.setProgress(rowIdx.longValue() / (double) rows, () -> "Adding row " + rowIdx.longValue() + " of " + rows);
                    } else {
                        exec.setMessage(() -> "Adding row " + rowIdx.longValue() + " of " + rows);
                    }
                    exec.checkCanceled();
                    boolean wasMatch = false;
                    for (Rule r : rules) {
                        if (r.getCondition().matches(row, provider).getOutcome() == MatchState.matchedAndStop) {
                            // r.getSideEffect().perform(row, provider);
                            DataValue value = r.getOutcome().getComputedResult(row, provider);
                            if (value instanceof BooleanValue) {
                                final BooleanValue bv = (BooleanValue) value;
                                containers[bv.getBooleanValue() ? matchIndex : otherIndex].push(row);
                            } else {
                                containers[matchIndex].push(row);
                            }
                            wasMatch = true;
                            break;
                        }
                    }
                    if (!wasMatch) {
                        containers[otherIndex].push(row);
                    }
                }
            } finally {
                try {
                    second.close();
                } finally {
                    first.close();
                }
            }
        }
    };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataValue(org.knime.core.data.DataValue) StreamableOperator(org.knime.core.node.streamable.StreamableOperator) StreamableOperatorInternals(org.knime.core.node.streamable.StreamableOperatorInternals) SimpleStreamableOperatorInternals(org.knime.core.node.streamable.simple.SimpleStreamableOperatorInternals) DataTableRowInput(org.knime.core.node.streamable.DataTableRowInput) RowInput(org.knime.core.node.streamable.RowInput) DataRow(org.knime.core.data.DataRow) PortObjectInput(org.knime.core.node.streamable.PortObjectInput) RowAppenderRowOutput(org.knime.base.node.rules.engine.RowAppenderRowOutput) BufferedDataTableRowOutput(org.knime.core.node.streamable.BufferedDataTableRowOutput) RowOutput(org.knime.core.node.streamable.RowOutput) VariableProvider(org.knime.base.node.rules.engine.VariableProvider) BooleanValue(org.knime.core.data.BooleanValue) BufferedDataTable(org.knime.core.node.BufferedDataTable) PortInput(org.knime.core.node.streamable.PortInput) SimpleStreamableOperatorInternals(org.knime.core.node.streamable.simple.SimpleStreamableOperatorInternals) MutableLong(org.apache.commons.lang3.mutable.MutableLong) ExecutionContext(org.knime.core.node.ExecutionContext) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) ParseException(java.text.ParseException) Rule(org.knime.base.node.rules.engine.Rule)

Aggregations

DataTableSpec (org.knime.core.data.DataTableSpec)986 DataColumnSpec (org.knime.core.data.DataColumnSpec)351 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)317 BufferedDataTable (org.knime.core.node.BufferedDataTable)237 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)230 DataCell (org.knime.core.data.DataCell)196 DataRow (org.knime.core.data.DataRow)183 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)146 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)129 ArrayList (java.util.ArrayList)111 DataType (org.knime.core.data.DataType)111 PortObjectSpec (org.knime.core.node.port.PortObjectSpec)100 DefaultRow (org.knime.core.data.def.DefaultRow)97 DoubleValue (org.knime.core.data.DoubleValue)96 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)95 ExecutionContext (org.knime.core.node.ExecutionContext)72 PortObject (org.knime.core.node.port.PortObject)68 RowKey (org.knime.core.data.RowKey)67 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)65 IOException (java.io.IOException)62