use of org.knime.core.data.DataTableSpec in project knime-core by knime.
the class RuleSetToTable method execute.
/**
* Performs the conversion.
*
* @param exec An {@link ExecutionContext}.
* @param pmmlPo The input {@link PMMLPortObject}.
* @return The created {@link BufferedDataTable}.
* @throws CanceledExecutionException Execition was cancelled.
* @throws InvalidSettingsException No or more than one RuleSet model is in the PMML input.
*/
public BufferedDataTable execute(final ExecutionContext exec, final PMMLPortObject pmmlPo) throws CanceledExecutionException, InvalidSettingsException {
// TODO should the rule selection method be an output flow variable?
if (pmmlPo.getPMMLValue().getModels(PMMLModelType.RuleSetModel).size() != 1) {
throw new InvalidSettingsException("Only a single RuleSet model is supported.");
}
PMMLRuleTranslator ruleTranslator = new PMMLRuleTranslator();
pmmlPo.initializeModelTranslator(ruleTranslator);
List<Rule> rules = ruleTranslator.getRules();
final DataTableSpec confSpec = configure(pmmlPo.getSpec());
final List<String> scoreValues = new ArrayList<>();
final DataTableSpec properSpec = confSpec != null ? confSpec : properSpec(rules, scoreValues);
BufferedDataContainer container = exec.createDataContainer(properSpec);
List<DataColumnSpec> targetCols = pmmlPo.getSpec().getTargetCols();
DataType outcomeType = targetCols.get(0).getType();
long idx = 0L;
int rulesSize = rules.size();
Map<String, DataType> types = new LinkedHashMap<>();
for (DataColumnSpec col : pmmlPo.getSpec().getLearningCols()) {
types.put(col.getName(), col.getType());
}
for (Rule rule : rules) {
exec.checkCanceled();
exec.setProgress(1.0 * idx++ / rulesSize);
container.addRowToTable(new DefaultRow(RowKey.createRowKey(idx), createRow(rule, outcomeType, types, scoreValues)));
}
container.close();
return container.getTable();
}
use of org.knime.core.data.DataTableSpec in project knime-core by knime.
the class RuleEngine2PortsNodeDialog method loadSettingsFrom.
/**
* {@inheritDoc}
*/
@Override
protected void loadSettingsFrom(final NodeSettingsRO settings, final PortObjectSpec[] specs) throws NotConfigurableException {
super.loadSettingsFrom(settings, specs);
final DataTableSpec inSpec = (DataTableSpec) specs[0], secondSpec = (DataTableSpec) specs[1];
m_dataSpec = inSpec;
m_appendColumn.setText(getSettings().getAppendColumn());
m_replaceColumn.update(inSpec, getSettings().getReplaceColumn());
m_replace.setSelected(getSettings().isReplaceColumn());
m_pmml.setSelected(getSettings().isPMMLRuleSet());
m_ruleSelectionMethod.setSelectedItem(getSettings().getRuleSelectionMethod());
m_hasDefaultScore.setSelected(getSettings().isHasDefaultScore());
m_defaultScore.setText(getSettings().getDefaultScore());
m_hasDefaultConfidence.setSelected(getSettings().isHasDefaultConfidence());
m_defaultConfidence.setValue(getSettings().getDefaultConfidence());
m_ruleConfidenceColumn.update(secondSpec, getSettings().getRuleConfidenceColumn());
m_hasDefaultWeight.setSelected(getSettings().isHasDefaultWeight());
m_defaultWeight.setValue(getSettings().getDefaultWeight());
m_ruleWeightColumn.update(secondSpec, getSettings().getRuleWeightColumn());
m_computeConfidence.setSelected(getSettings().isComputeConfidence());
m_predictionConfidenceColumn.setText(getSettings().getPredictionConfidenceColumn());
m_provideStatistics.setSelected(getSettings().isProvideStatistics());
m_validationColumn.update(inSpec, getSettings().getValidateColumn());
setEnabled();
}
use of org.knime.core.data.DataTableSpec in project knime-core by knime.
the class RuleEngine2PortsNodeModel method autoGuessRuleColumnName.
/**
* Tries to autoguess the rule column name. In case it fails because of ambiguity it returns a warning message,
* otherwise it fails with {@link InvalidSettingsException}.
*
* @param inSpecs The input specs.
* @param settings The {@link RuleEngine2PortsSimpleSettings} for the node model.
* @return The warning message (or {@code null} if everything is fine).
* @throws InvalidSettingsException Cannot find String-valued column for rules.
*/
static String autoGuessRuleColumnName(final PortObjectSpec[] inSpecs, final RuleEngine2PortsSimpleSettings settings) throws InvalidSettingsException {
// check spec with selected column
String ruleColumn = settings.getRuleColumn();
DataTableSpec ruleSpec = (DataTableSpec) inSpecs[RULE_PORT];
if (ruleSpec == null) {
throw new InvalidSettingsException("Rule table specification is not available.");
}
DataColumnSpec columnSpec = ruleSpec.getColumnSpec(ruleColumn);
boolean isValid = columnSpec != null && columnSpec.getType().isCompatible(StringValue.class);
CheckUtils.checkSetting(ruleColumn == null || isValid, "Rule column \"" + ruleColumn + "\" not found or incompatible");
if (ruleColumn == null) {
// auto-guessing
assert !isValid : "No class column set but valid configuration";
// get the first useful one starting at the end of the table
for (int i = ruleSpec.getNumColumns(); i-- > 0; ) {
if (ruleSpec.getColumnSpec(i).getType().isCompatible(StringValue.class)) {
settings.setRuleColumn(ruleSpec.getColumnSpec(i).getName());
return "Guessing target column: \"" + settings.getRuleColumn() + "\".";
}
}
CheckUtils.checkSetting(false, "Rules table contains no String column for rules.");
}
return null;
}
use of org.knime.core.data.DataTableSpec in project knime-core by knime.
the class RuleEngine2PortsNodeModel method createStreamableOperator.
/**
* {@inheritDoc}
*/
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
return new StreamableOperator() {
private StreamInternalWithPortObject m_internals;
/**
* {@inheritDoc}
*/
@Override
public void loadInternals(final StreamableOperatorInternals internals) {
m_internals = (StreamInternalWithPortObject) internals;
}
/**
* {@inheritDoc}
*/
@Override
public void runIntermediate(final PortInput[] inputs, final ExecutionContext exec) throws Exception {
// count number of rows
long count = 0;
final RowInput rowInput = (RowInput) inputs[DATA_PORT];
while (rowInput.poll() != null) {
count++;
}
if (inputs[RULE_PORT] instanceof RowInput) {
final RowInput ruleInput = (RowInput) inputs[RULE_PORT];
final Pair<ColumnRearranger, PortObject> pair = createColumnRearranger(rowInput.getDataTableSpec(), ruleInput);
final ColumnRearranger rearranger = pair.getFirst();
final DataTableSpec spec = rearranger.createSpec();
m_internals.setTableSpec(spec);
if (pair.getSecond() instanceof PMMLPortObject) {
PMMLPortObject po = (PMMLPortObject) pair.getSecond();
m_internals.setObject(po);
} else {
m_internals.setObject(null);
}
}
m_internals.setRowCount(count);
}
/**
* {@inheritDoc}
*/
@Override
public StreamableOperatorInternals saveInternals() {
return m_internals;
}
@Override
public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
if (m_internals.getTableSpec() != null) {
m_rowCount = m_internals.getRowCount();
}
final Pair<ColumnRearranger, PortObject> pair = createColumnRearranger((DataTableSpec) inSpecs[DATA_PORT], (RowInput) inputs[RULE_PORT]);
pair.getFirst().createStreamableFunction(0, 0).runFinal(inputs, outputs, exec);
if (pair.getSecond() != null) {
((PortObjectOutput) outputs[1]).setPortObject(pair.getSecond());
}
}
};
}
use of org.knime.core.data.DataTableSpec in project knime-core by knime.
the class RuleEngineFilter2PortsNodeModel method createStreamableOperator.
/**
* {@inheritDoc}
*/
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
return new StreamableOperator() {
private SimpleStreamableOperatorInternals m_internals;
/**
* {@inheritDoc}
*/
@Override
public void loadInternals(final StreamableOperatorInternals internals) {
m_internals = (SimpleStreamableOperatorInternals) internals;
}
/**
* {@inheritDoc}
*/
@Override
public void runIntermediate(final PortInput[] inputs, final ExecutionContext exec) throws Exception {
// count number of rows
long count = 0;
RowInput rowInput = (RowInput) inputs[DATA_PORT];
while (rowInput.poll() != null) {
count++;
}
m_internals.getConfig().addLong(CFG_ROW_COUNT, count);
}
/**
* {@inheritDoc}
*/
@Override
public StreamableOperatorInternals saveInternals() {
return m_internals;
}
@Override
public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
long rowCount = -1L;
if (m_internals.getConfig().containsKey(CFG_ROW_COUNT)) {
rowCount = m_internals.getConfig().getLong(CFG_ROW_COUNT);
}
m_rulesList.clear();
final PortInput rulePort = inputs[RULE_PORT];
if (rulePort instanceof PortObjectInput) {
PortObjectInput poRule = (PortObjectInput) rulePort;
m_rulesList.addAll(RuleEngineVariable2PortsNodeModel.rules((BufferedDataTable) poRule.getPortObject(), m_settings, RuleNodeSettings.RuleFilter));
} else if (rulePort instanceof RowInput) {
RowInput riRule = (RowInput) rulePort;
m_rulesList.addAll(RuleEngineVariable2PortsNodeModel.rules(riRule, m_settings, RuleNodeSettings.RuleFilter));
}
final DataTableSpec spec = (DataTableSpec) inSpecs[DATA_PORT];
try {
parseRules(spec, RuleNodeSettings.RuleSplitter);
} catch (final ParseException e) {
throw new InvalidSettingsException(e);
}
final RowInput inputPartitions = (RowInput) inputs[DATA_PORT];
final List<Rule> rules = parseRules(inputPartitions.getDataTableSpec(), RuleNodeSettings.RuleFilter);
final RowOutput first = (RowOutput) outputs[0];
final int nrOutPorts = getNrOutPorts();
final RowOutput second = nrOutPorts > 1 ? (RowOutput) outputs[1] : new RowOutput() {
@Override
public void push(final DataRow row) throws InterruptedException {
// do nothing
}
@Override
public void close() throws InterruptedException {
// do nothing
}
};
final RowOutput[] containers = new RowOutput[] { first, second };
final int matchIndex = m_includeOnMatch ? 0 : 1;
final int otherIndex = 1 - matchIndex;
try {
final MutableLong rowIdx = new MutableLong(0L);
final long rows = rowCount;
final VariableProvider provider = new VariableProvider() {
@Override
public Object readVariable(final String name, final Class<?> type) {
return RuleEngineFilter2PortsNodeModel.this.readVariable(name, type);
}
@Override
@Deprecated
public int getRowCount() {
throw new UnsupportedOperationException();
}
@Override
public long getRowCountLong() {
return rows;
}
@Override
@Deprecated
public int getRowIndex() {
throw new UnsupportedOperationException();
}
@Override
public long getRowIndexLong() {
return rowIdx.longValue();
}
};
DataRow row;
while ((row = inputPartitions.poll()) != null) {
rowIdx.increment();
if (rows > 0) {
exec.setProgress(rowIdx.longValue() / (double) rows, () -> "Adding row " + rowIdx.longValue() + " of " + rows);
} else {
exec.setMessage(() -> "Adding row " + rowIdx.longValue() + " of " + rows);
}
exec.checkCanceled();
boolean wasMatch = false;
for (Rule r : rules) {
if (r.getCondition().matches(row, provider).getOutcome() == MatchState.matchedAndStop) {
// r.getSideEffect().perform(row, provider);
DataValue value = r.getOutcome().getComputedResult(row, provider);
if (value instanceof BooleanValue) {
final BooleanValue bv = (BooleanValue) value;
containers[bv.getBooleanValue() ? matchIndex : otherIndex].push(row);
} else {
containers[matchIndex].push(row);
}
wasMatch = true;
break;
}
}
if (!wasMatch) {
containers[otherIndex].push(row);
}
}
} finally {
try {
second.close();
} finally {
first.close();
}
}
}
};
}
Aggregations