Search in sources :

Example 1 with DataColumnSpec

use of org.knime.core.data.DataColumnSpec in project knime-core by knime.

the class RuleSetToTable method execute.

/**
 * Performs the conversion.
 *
 * @param exec An {@link ExecutionContext}.
 * @param pmmlPo The input {@link PMMLPortObject}.
 * @return The created {@link BufferedDataTable}.
 * @throws CanceledExecutionException Execition was cancelled.
 * @throws InvalidSettingsException No or more than one RuleSet model is in the PMML input.
 */
public BufferedDataTable execute(final ExecutionContext exec, final PMMLPortObject pmmlPo) throws CanceledExecutionException, InvalidSettingsException {
    // TODO should the rule selection method be an output flow variable?
    if (pmmlPo.getPMMLValue().getModels(PMMLModelType.RuleSetModel).size() != 1) {
        throw new InvalidSettingsException("Only a single RuleSet model is supported.");
    }
    PMMLRuleTranslator ruleTranslator = new PMMLRuleTranslator();
    pmmlPo.initializeModelTranslator(ruleTranslator);
    List<Rule> rules = ruleTranslator.getRules();
    final DataTableSpec confSpec = configure(pmmlPo.getSpec());
    final List<String> scoreValues = new ArrayList<>();
    final DataTableSpec properSpec = confSpec != null ? confSpec : properSpec(rules, scoreValues);
    BufferedDataContainer container = exec.createDataContainer(properSpec);
    List<DataColumnSpec> targetCols = pmmlPo.getSpec().getTargetCols();
    DataType outcomeType = targetCols.get(0).getType();
    long idx = 0L;
    int rulesSize = rules.size();
    Map<String, DataType> types = new LinkedHashMap<>();
    for (DataColumnSpec col : pmmlPo.getSpec().getLearningCols()) {
        types.put(col.getName(), col.getType());
    }
    for (Rule rule : rules) {
        exec.checkCanceled();
        exec.setProgress(1.0 * idx++ / rulesSize);
        container.addRowToTable(new DefaultRow(RowKey.createRowKey(idx), createRow(rule, outcomeType, types, scoreValues)));
    }
    container.close();
    return container.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) ArrayList(java.util.ArrayList) PMMLRuleTranslator(org.knime.base.node.rules.engine.pmml.PMMLRuleTranslator) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataType(org.knime.core.data.DataType) Rule(org.knime.base.node.rules.engine.pmml.PMMLRuleTranslator.Rule) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 2 with DataColumnSpec

use of org.knime.core.data.DataColumnSpec in project knime-core by knime.

the class RuleEngine2PortsNodeModel method autoGuessRuleColumnName.

/**
 * Tries to autoguess the rule column name. In case it fails because of ambiguity it returns a warning message,
 * otherwise it fails with {@link InvalidSettingsException}.
 *
 * @param inSpecs The input specs.
 * @param settings The {@link RuleEngine2PortsSimpleSettings} for the node model.
 * @return The warning message (or {@code null} if everything is fine).
 * @throws InvalidSettingsException Cannot find String-valued column for rules.
 */
static String autoGuessRuleColumnName(final PortObjectSpec[] inSpecs, final RuleEngine2PortsSimpleSettings settings) throws InvalidSettingsException {
    // check spec with selected column
    String ruleColumn = settings.getRuleColumn();
    DataTableSpec ruleSpec = (DataTableSpec) inSpecs[RULE_PORT];
    if (ruleSpec == null) {
        throw new InvalidSettingsException("Rule table specification is not available.");
    }
    DataColumnSpec columnSpec = ruleSpec.getColumnSpec(ruleColumn);
    boolean isValid = columnSpec != null && columnSpec.getType().isCompatible(StringValue.class);
    CheckUtils.checkSetting(ruleColumn == null || isValid, "Rule column \"" + ruleColumn + "\" not found or incompatible");
    if (ruleColumn == null) {
        // auto-guessing
        assert !isValid : "No class column set but valid configuration";
        // get the first useful one starting at the end of the table
        for (int i = ruleSpec.getNumColumns(); i-- > 0; ) {
            if (ruleSpec.getColumnSpec(i).getType().isCompatible(StringValue.class)) {
                settings.setRuleColumn(ruleSpec.getColumnSpec(i).getName());
                return "Guessing target column: \"" + settings.getRuleColumn() + "\".";
            }
        }
        CheckUtils.checkSetting(false, "Rules table contains no String column for rules.");
    }
    return null;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringValue(org.knime.core.data.StringValue)

Example 3 with DataColumnSpec

use of org.knime.core.data.DataColumnSpec in project knime-core by knime.

the class RuleEngine2PortsSimpleNodeDialog method updateErrorsAndWarnings.

/**
 * Updates the errors table, the warning text area and the computed outcome type.
 */
protected void updateErrorsAndWarnings() {
    m_errorsModel.setRowCount(0);
    hideErrors();
    m_warnings.setText("");
    m_outcomeType.setIcon(DataType.getMissingCell().getType().getIcon());
    // Checking data from second input port
    final int ruleIdx = getRules() == null ? -1 : getRules().getSpec().findColumnIndex(m_ruleColumn.getSelectedColumn());
    final int outcomeIdx = getRules() == null ? -1 : getRules().getSpec().findColumnIndex(m_outcomeColumn.getSelectedColumn());
    if (getRules() != null && isSpecAvailable() && ruleIdx >= 0) {
        RuleFactory factory = ruleFactory();
        long lineNo = 0;
        boolean wasCatchAll = false;
        final boolean firstHit = isFirstHit();
        List<Rule> rules = new ArrayList<>();
        for (DataRow dataRow : getRules()) {
            ++lineNo;
            DataCell ruleCell = dataRow.getCell(ruleIdx);
            if (ruleCell.isMissing()) {
                // String cellValue = "?";
                // if (ruleCell instanceof MissingValue) {
                // cellValue += " (" + ((MissingValue)ruleCell).getError() + ")";
                // }
                m_errorsModel.addRow(new Object[] { dataRow.getKey(), ruleCell, "Missing cell" });
                showErrors();
            }
            if (ruleCell instanceof StringValue) {
                StringValue ruleSV = (StringValue) ruleCell;
                String ruleText = ruleSV.getStringValue().replaceAll("[\r\n]+", " ");
                if (outcomeIdx >= 0) {
                    DataCell outcome = dataRow.getCell(outcomeIdx);
                    String outcomeString;
                    try {
                        outcomeString = m_settings.asStringFailForMissing(outcome);
                    } catch (InvalidSettingsException e) {
                        outcomeString = "?";
                    }
                    if (m_ruleType.onlyBooleanOutcome()) {
                        if ("\"TRUE\"".equalsIgnoreCase(outcomeString)) {
                            outcomeString = "TRUE";
                        } else if ("\"FALSE\"".equalsIgnoreCase(outcomeString)) {
                            outcomeString = "FALSE";
                        }
                    }
                    ruleText += " => " + outcomeString;
                }
                try {
                    Rule rule = factory.parse(ruleText, getDataSpec(), getAvailableFlowVariables());
                    rules.add(rule);
                    String origWarning = !m_warnings.getText().isEmpty() ? m_warnings.getText() + "\n" : "";
                    Condition cond = rule.getCondition();
                    if (cond.isEnabled()) {
                        // not comment
                        if (cond.isCatchAll() && !wasCatchAll && firstHit && lineNo < getRules().size()) {
                            m_warnings.setText(origWarning + "No rules will match after line " + lineNo + " (" + dataRow.getKey() + "). Because of rule: " + ruleText);
                        }
                        wasCatchAll |= cond.isCatchAll() && firstHit;
                        if (!wasCatchAll && cond.isConstantFalse()) {
                            m_warnings.setText(origWarning + "The rule in line " + lineNo + " (" + dataRow.getKey() + ") will never match: " + ruleText);
                        }
                    }
                } catch (ParseException e) {
                    m_errorsModel.addRow(new Object[] { dataRow.getKey(), ruleText, e.getMessage() });
                    showErrors();
                }
            } else {
                // Missings were handled previously
                if (!ruleCell.isMissing()) {
                    m_errorsModel.addRow(new Object[] { dataRow.getKey(), ruleCell.toString(), "Wrong type: " + ruleCell.getType() });
                }
            }
        }
        final DataColumnSpec outcomeSpec = m_outcomeColumn.getSelectedColumnAsSpec();
        DataType dataType = RuleEngineNodeModel.computeOutputType(rules, outcomeSpec == null ? StringCell.TYPE : outcomeSpec.getType(), m_ruleType, getSettings().isDisallowLongOutputForCompatibility());
        if (dataType != null) {
            m_outcomeType.setIcon(dataType.getIcon());
        }
    }
}
Also used : Condition(org.knime.base.node.rules.engine.Condition) RuleFactory(org.knime.base.node.rules.engine.RuleFactory) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType) PortObject(org.knime.core.node.port.PortObject) Rule(org.knime.base.node.rules.engine.Rule) ParseException(java.text.ParseException) StringValue(org.knime.core.data.StringValue)

Example 4 with DataColumnSpec

use of org.knime.core.data.DataColumnSpec in project knime-core by knime.

the class MovingAverageNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected DataTableSpec[] configure(final DataTableSpec[] inSpecs) throws InvalidSettingsException {
    DataTableSpec inSpec = inSpecs[0];
    // define column name on which to apply MA
    if ((m_columnNames.getIncludeList().size() == 0) && (m_columnNames.getExcludeList().size() == 0)) {
        // auto-configure
        List<String> autoConfiguredInclList = new ArrayList<String>();
        for (DataColumnSpec colSpec : inSpec) {
            if (colSpec.getType().isCompatible(DoubleValue.class)) {
                autoConfiguredInclList.add(colSpec.getName());
            }
        }
        m_columnNames.setIncludeList(autoConfiguredInclList);
        setWarningMessage("Auto-configure: selected all double columns!");
    }
    if (m_columnNames.getIncludeList().isEmpty()) {
        setWarningMessage("No double columns selected: input will be same as output!");
    }
    // check for the existence of the selected columns
    for (String colName : m_columnNames.getIncludeList()) {
        if (!inSpecs[0].containsName(colName)) {
            throw new InvalidSettingsException("Column \"" + colName + "\" not found in input data!");
        }
    }
    // define moving average window length
    int winLength = m_winLength.getIntValue();
    if (winLength == -1) {
        throw new InvalidSettingsException("Window length is not selected.");
    }
    // define weight function
    String kindOfMAModelString;
    try {
        kindOfMAModelString = m_kindOfMAModel.getStringValue();
    } catch (IllegalArgumentException e) {
        throw new InvalidSettingsException(e.getMessage(), e);
    }
    if (kindOfMAModelString == null) {
        throw new InvalidSettingsException("No weight function selected.");
    } else {
        // create one MA-compute engine per column (overkill, I know
        // but much easier to reference later on in our DataCellFactory)
        MA_METHODS method = MA_METHODS.getPolicy4Label(kindOfMAModelString);
        if (MA_METHODS.getCenteredMethods().contains(method) && winLength % 2 == 0) {
            throw new InvalidSettingsException("For centered methods, the window size has to be uneven");
        }
        m_mas = new MovingAverage[inSpecs[0].getNumColumns()];
        for (int i = 0; i < inSpecs[0].getNumColumns(); i++) {
            m_mas[i] = method.getMAObject(winLength);
        }
    }
    ColumnRearranger c = createColRearranger(inSpecs[0]);
    return new DataTableSpec[] { c.createSpec() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) ArrayList(java.util.ArrayList) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString)

Example 5 with DataColumnSpec

use of org.knime.core.data.DataColumnSpec in project knime-core by knime.

the class DateFieldExtractorNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected DataTableSpec[] configure(final DataTableSpec[] inSpecs) throws InvalidSettingsException {
    DataTableSpec inSpec = inSpecs[0];
    // contains timestamp?
    if (!inSpec.containsCompatibleType(DateAndTimeValue.class)) {
        throw new InvalidSettingsException("No timestamp found in input table!");
    }
    // currently selected column still there?
    String selectedColName = m_selectedColumn.getStringValue();
    if (selectedColName != null && !selectedColName.isEmpty()) {
        if (!inSpec.containsName(selectedColName)) {
            throw new InvalidSettingsException("Column " + selectedColName + " not found in input spec!");
        }
    } else {
        // no value set: auto-configure -> choose first timeseries
        for (DataColumnSpec colSpec : inSpec) {
            if (colSpec.getType().isCompatible(DateAndTimeValue.class)) {
                String colName = colSpec.getName();
                m_selectedColumn.setStringValue(colName);
                setWarningMessage("Auto-configure: selected " + colName);
                break;
            }
        }
    }
    // create outputspec
    ColumnRearranger colRearranger = createColumnRearranger(inSpec).getColumnRearranger();
    return new DataTableSpec[] { colRearranger.createSpec() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DateAndTimeValue(org.knime.core.data.date.DateAndTimeValue) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString)

Aggregations

DataColumnSpec (org.knime.core.data.DataColumnSpec)800 DataTableSpec (org.knime.core.data.DataTableSpec)351 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)239 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)223 DataCell (org.knime.core.data.DataCell)187 ArrayList (java.util.ArrayList)167 DataType (org.knime.core.data.DataType)149 DataRow (org.knime.core.data.DataRow)124 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)123 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)119 DoubleValue (org.knime.core.data.DoubleValue)104 HashSet (java.util.HashSet)92 BufferedDataTable (org.knime.core.node.BufferedDataTable)77 LinkedHashSet (java.util.LinkedHashSet)65 LinkedHashMap (java.util.LinkedHashMap)56 LinkedList (java.util.LinkedList)47 SingleCellFactory (org.knime.core.data.container.SingleCellFactory)46 DoubleCell (org.knime.core.data.def.DoubleCell)46 StringCell (org.knime.core.data.def.StringCell)45 DataColumnDomainCreator (org.knime.core.data.DataColumnDomainCreator)43