Search in sources :

Example 46 with FilterResult

use of org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult in project knime-core by knime.

the class ColumnAggregatorNodeModel method createRearranger.

private ColumnRearranger createRearranger(final DataTableSpec oSpec, final CellFactory cellFactory) {
    final ColumnRearranger cr = new ColumnRearranger(oSpec);
    cr.append(cellFactory);
    final FilterResult filterResult = m_aggregationCols.applyTo(oSpec);
    if (m_removeAggregationCols.getBooleanValue()) {
        cr.remove(filterResult.getIncludes());
    }
    if (m_removeRetainedCols.getBooleanValue()) {
        cr.remove(filterResult.getExcludes());
    }
    return cr;
}
Also used : ColumnRearranger(org.knime.core.data.container.ColumnRearranger) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult)

Example 47 with FilterResult

use of org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult in project knime-core by knime.

the class CorrelationComputeNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    DataTableSpec in = (DataTableSpec) inSpecs[0];
    if (!in.containsCompatibleType(DoubleValue.class) && !in.containsCompatibleType(NominalValue.class)) {
        throw new InvalidSettingsException("No double or nominal compatible columns in input");
    }
    final String[] includes;
    if (m_columnFilterModel == null) {
        m_columnFilterModel = createColumnFilterModel();
        // auto-configure, no previous configuration
        m_columnFilterModel.loadDefaults(in);
        includes = m_columnFilterModel.applyTo(in).getIncludes();
        setWarningMessage("Auto configuration: Using all suitable " + "columns (in total " + includes.length + ")");
    } else {
        FilterResult applyTo = m_columnFilterModel.applyTo(in);
        includes = applyTo.getIncludes();
    }
    if (includes.length == 0) {
        throw new InvalidSettingsException("No columns selected");
    }
    return new PortObjectSpec[] { PMCCPortObjectAndSpec.createOutSpec(includes), new PMCCPortObjectAndSpec(includes) };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMCCPortObjectAndSpec(org.knime.base.node.preproc.correlation.pmcc.PMCCPortObjectAndSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult)

Example 48 with FilterResult

use of org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult in project knime-core by knime.

the class RoundDoubleNodeModel method createColumnRearranger.

/**
 * {@inheritDoc}
 */
@Override
public ColumnRearranger createColumnRearranger(final DataTableSpec dataSpec) throws InvalidSettingsException {
    // 
    // / SPEC CHECKS
    // 
    FilterResult filteredCols = m_filterDoubleColModel.applyTo(dataSpec);
    // check for at least one double column in input data table spec
    if (filteredCols.getIncludes().length == 0) {
        throw new InvalidSettingsException("There are no columns containing double values in the input table!");
    }
    // check if all included columns are available in the spec
    String[] unknownCols = filteredCols.getRemovedFromIncludes();
    if (unknownCols.length == 1) {
        setWarningMessage("Column \"" + unknownCols[0] + "\" is not available.");
    } else if (unknownCols.length > 1) {
        setWarningMessage("" + unknownCols.length + " selected columns are not available anymore.");
    }
    // 
    // / CREATE COLUMN REARRANGER
    // 
    // parameters
    int precision = m_numberPrecisionModel.getIntValue();
    boolean append = m_appendColumnsModel.getBooleanValue();
    RoundingMode roundingMode = RoundingMode.valueOf(m_roundingModeModel.getStringValue());
    NumberMode numberMode = NumberMode.valueByDescription(m_numberModeModel.getStringValue());
    final RoundOutputType outputType = RoundOutputType.valueByTextLabel(m_outputTypeModel.getStringValue());
    String colSuffix = m_columnSuffixModel.getStringValue();
    // get array of indices of included columns
    int[] includedColIndices = getIncludedColIndices(dataSpec, filteredCols.getIncludes());
    ColumnRearranger cR = new ColumnRearranger(dataSpec);
    // create spec of new output columns
    DataColumnSpec[] newColsSpecs = getNewColSpecs(append, colSuffix, outputType, filteredCols.getIncludes(), dataSpec);
    // Pass all necessary parameters to the cell factory, which rounds
    // the values and creates new cells to replace or append.
    RoundDoubleCellFactory cellFac = new RoundDoubleCellFactory(precision, numberMode, roundingMode, outputType, includedColIndices, newColsSpecs);
    // replace or append columns
    if (append) {
        cR.append(cellFac);
    } else {
        cR.replace(cellFac, includedColIndices);
    }
    return cR;
}
Also used : RoundingMode(java.math.RoundingMode) RoundOutputType(org.knime.base.node.preproc.rounddouble.RoundDoubleConfigKeys.RoundOutputType) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult)

Example 49 with FilterResult

use of org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult in project knime-core by knime.

the class TreeEnsembleLearnerConfiguration method loadInDialog.

/**
 * Loads the settings. Intended for the use in the NodeDialog
 *
 * @param settings
 * @param inSpec
 * @throws NotConfigurableException
 */
public void loadInDialog(final NodeSettingsRO settings, final DataTableSpec inSpec) throws NotConfigurableException {
    String defTargetColumn = null;
    String defFingerprintColumn = null;
    boolean hasAttributeColumns = false;
    // guess defaults:
    // traverse columns backwards; assign last (i.e. first-seen) appropriate
    // column as target, use any subsequent as valid learning attribute
    Class<? extends DataValue> targetClass = getRequiredTargetClass();
    for (int i = inSpec.getNumColumns() - 1; i >= 0; i--) {
        DataColumnSpec colSpec = inSpec.getColumnSpec(i);
        DataType colType = colSpec.getType();
        String colName = colSpec.getName();
        if (colType.isCompatible(BitVectorValue.class) || colType.isCompatible(ByteVectorValue.class) || colType.isCompatible(DoubleVectorValue.class)) {
            defFingerprintColumn = colName;
        } else if (colType.isCompatible(NominalValue.class) || colType.isCompatible(DoubleValue.class)) {
            if (colType.isCompatible(targetClass)) {
                if (defTargetColumn == null) {
                    // first categorical column
                    defTargetColumn = colName;
                } else {
                    hasAttributeColumns = true;
                }
            } else {
                hasAttributeColumns = true;
            }
        }
    }
    if (defTargetColumn == null) {
        throw new NotConfigurableException("No possible target in input (node not connected?) -- unable to configure.");
    }
    if (!hasAttributeColumns && defFingerprintColumn == null) {
        throw new NotConfigurableException("No appropriate learning column " + "in input (need to have at least one additional " + "numeric/categorical column, fingerprint data or byte or double vector data)");
    }
    // assign fields:
    m_targetColumn = settings.getString(KEY_TARGET_COLUMN, defTargetColumn);
    DataColumnSpec targetColSpec = inSpec.getColumnSpec(m_targetColumn);
    if (targetColSpec == null || !targetColSpec.getType().isCompatible(targetClass)) {
        m_targetColumn = defTargetColumn;
    }
    String hardCodedRootColumn = settings.getString(KEY_ROOT_COLUMN, null);
    if (inSpec.getColumnSpec(hardCodedRootColumn) == null) {
        m_hardCodedRootColumn = null;
    } else {
        m_hardCodedRootColumn = hardCodedRootColumn;
    }
    m_fingerprintColumn = settings.getString(KEY_FINGERPRINT_COLUMN, defFingerprintColumn);
    if (m_fingerprintColumn == null) {
    // null in node settings - leave it
    } else {
        DataColumnSpec fpColSpec = inSpec.getColumnSpec(m_fingerprintColumn);
        if (fpColSpec == null || !fpColSpec.getType().isCompatible(BitVectorValue.class)) {
            m_fingerprintColumn = defFingerprintColumn;
        }
    }
    // m_includeColumns = settings.getStringArray(KEY_INCLUDE_COLUMNS, (String[])null);
    // m_includeAllColumns = settings.getBoolean(KEY_INCLUDE_ALL_COLUMNS, true);
    m_columnFilterConfig.loadConfigurationInDialog(settings, inSpec);
    Long defSeed = System.currentTimeMillis();
    String seedS = settings.getString(KEY_SEED, Long.toString(defSeed));
    Long seed;
    if (seedS == null) {
        seed = null;
    } else {
        try {
            seed = Long.parseLong(seedS);
        } catch (NumberFormatException nfe) {
            seed = m_seed;
        }
    }
    m_seed = seed;
    m_maxLevels = settings.getInt(KEY_MAX_LEVELS, DEF_MAX_LEVEL);
    if (m_maxLevels != MAX_LEVEL_INFINITE && m_maxLevels <= 0) {
        m_maxLevels = DEF_MAX_LEVEL;
    }
    int minNodeSize = settings.getInt(KEY_MIN_NODE_SIZE, MIN_NODE_SIZE_UNDEFINED);
    int minChildSize = settings.getInt(KEY_MIN_CHILD_SIZE, MIN_CHILD_SIZE_UNDEFINED);
    try {
        setMinSizes(minNodeSize, minChildSize);
    } catch (InvalidSettingsException e) {
        m_minNodeSize = MIN_NODE_SIZE_UNDEFINED;
        m_minChildSize = MIN_CHILD_SIZE_UNDEFINED;
    }
    m_dataFractionPerTree = settings.getDouble(KEY_DATA_FRACTION, DEF_DATA_FRACTION);
    if (m_dataFractionPerTree <= 0.0 || m_dataFractionPerTree > 1.0) {
        m_dataFractionPerTree = DEF_DATA_FRACTION;
    }
    m_columnAbsoluteValue = settings.getInt(KEY_COLUMN_ABSOLUTE, DEF_COLUMN_ABSOLUTE);
    if (m_columnAbsoluteValue <= 0) {
        m_columnAbsoluteValue = DEF_COLUMN_ABSOLUTE;
    }
    m_isDataSelectionWithReplacement = settings.getBoolean(KEY_IS_DATA_SELECTION_WITH_REPLACEMENT, true);
    ColumnSamplingMode defColSamplingMode = DEF_COLUMN_SAMPLING_MODE;
    ColumnSamplingMode colSamplingMode = defColSamplingMode;
    String colSamplingModeS = settings.getString(KEY_COLUMN_SAMPLING_MODE, null);
    if (colSamplingModeS == null) {
        colSamplingMode = defColSamplingMode;
    } else {
        try {
            colSamplingMode = ColumnSamplingMode.valueOf(colSamplingModeS);
        } catch (Exception e) {
            colSamplingMode = defColSamplingMode;
        }
    }
    double colFracLinValue;
    switch(colSamplingMode) {
        case Linear:
            colFracLinValue = settings.getDouble(KEY_COLUMN_FRACTION_LINEAR, DEF_COLUMN_FRACTION);
            if (colFracLinValue <= 0.0 || colFracLinValue > 1.0) {
                colFracLinValue = DEF_COLUMN_FRACTION;
            }
            break;
        default:
            colFracLinValue = DEF_COLUMN_FRACTION;
    }
    m_columnSamplingMode = colSamplingMode;
    m_columnFractionLinearValue = colFracLinValue;
    m_isUseDifferentAttributesAtEachNode = settings.getBoolean(KEY_IS_USE_DIFFERENT_ATTRIBUTES_AT_EACH_NODE, true);
    m_nrModels = settings.getInt(KEY_NR_MODELS, DEF_NR_MODELS);
    if (m_nrModels <= 0) {
        m_nrModels = DEF_NR_MODELS;
    }
    SplitCriterion defSplitCriterion = SplitCriterion.InformationGainRatio;
    String splitCriterionS = settings.getString(KEY_SPLIT_CRITERION, defSplitCriterion.name());
    SplitCriterion splitCriterion;
    if (splitCriterionS == null) {
        splitCriterion = defSplitCriterion;
    } else {
        try {
            splitCriterion = SplitCriterion.valueOf(splitCriterionS);
        } catch (Exception e) {
            splitCriterion = defSplitCriterion;
        }
    }
    m_splitCriterion = splitCriterion;
    m_useAverageSplitPoints = settings.getBoolean(KEY_USE_AVERAGE_SPLIT_POINTS, DEF_AVERAGE_SPLIT_POINTS);
    m_useBinaryNominalSplits = settings.getBoolean(KEY_USE_BINARY_NOMINAL_SPLITS, DEF_BINARY_NOMINAL_SPLITS);
    String missingValueHandlingS = settings.getString(KEY_MISSING_VALUE_HANDLING, DEF_MISSING_VALUE_HANDLING.name());
    MissingValueHandling missingValueHandling;
    if (missingValueHandlingS == null) {
        missingValueHandling = DEF_MISSING_VALUE_HANDLING;
    } else {
        try {
            missingValueHandling = MissingValueHandling.valueOf(missingValueHandlingS);
        } catch (Exception e) {
            missingValueHandling = DEF_MISSING_VALUE_HANDLING;
        }
    }
    m_missingValueHandling = missingValueHandling;
    FilterResult filterResult = m_columnFilterConfig.applyTo(inSpec);
    if (m_fingerprintColumn != null) {
    // use fingerprint data, OK
    } else if (filterResult.getIncludes().length > 0) {
    // some attributes set, OK
    // } else if (m_includeAllColumns) {
    // use all appropriate columns, OK
    } else if (defFingerprintColumn != null) {
        // no valid columns but fingerprint column found - use it
        m_fingerprintColumn = defFingerprintColumn;
    // } else {
    // m_includeAllColumns = true;
    }
    m_ignoreColumnsWithoutDomain = settings.getBoolean(KEY_IGNORE_COLUMNS_WITHOUT_DOMAIN, true);
    m_nrHilitePatterns = settings.getInt(KEY_NR_HILITE_PATTERNS, -1);
    m_saveTargetDistributionInNodes = settings.getBoolean(KEY_SAVE_TARGET_DISTRIBUTION_IN_NODES, DEF_SAVE_TARGET_DISTRIBUTION_IN_NODES);
    setRowSamplingMode(RowSamplingMode.valueOf(settings.getString(KEY_ROW_SAMPLING_MODE, DEF_ROW_SAMPLING_MODE.name())));
}
Also used : NotConfigurableException(org.knime.core.node.NotConfigurableException) DoubleVectorValue(org.knime.core.data.vector.doublevector.DoubleVectorValue) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) NotConfigurableException(org.knime.core.node.NotConfigurableException) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataType(org.knime.core.data.DataType) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult)

Example 50 with FilterResult

use of org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult in project knime-core by knime.

the class LogRegLearner method init.

/**
 * Initialize instance and check if settings are consistent.
 */
private void init(final DataTableSpec inSpec, final Set<String> exclude) throws InvalidSettingsException {
    List<String> inputCols = new ArrayList<String>();
    FilterResult includedColumns = m_settings.getIncludedColumns().applyTo(inSpec);
    for (String column : includedColumns.getIncludes()) {
        inputCols.add(column);
    }
    inputCols.remove(m_settings.getTargetColumn());
    if (inputCols.isEmpty()) {
        throw new InvalidSettingsException("At least one column must " + "be included.");
    }
    DataColumnSpec targetColSpec = null;
    List<DataColumnSpec> regressorColSpecs = new ArrayList<DataColumnSpec>();
    // Auto configuration when target is not set
    if (null == m_settings.getTargetColumn() && m_settings.getIncludedColumns().applyTo(inSpec).getExcludes().length == 0) {
        for (int i = 0; i < inSpec.getNumColumns(); i++) {
            DataColumnSpec colSpec = inSpec.getColumnSpec(i);
            String colName = colSpec.getName();
            inputCols.remove(colName);
            if (colSpec.getType().isCompatible(NominalValue.class)) {
                m_settings.setTargetColumn(colName);
            }
        }
        // when there is no column with nominal data
        if (null == m_settings.getTargetColumn()) {
            throw new InvalidSettingsException("No column in " + "spec compatible to \"NominalValue\".");
        }
    }
    // remove all columns that should not be used
    inputCols.removeAll(exclude);
    m_specialColumns = new LinkedList<>();
    for (int i = 0; i < inSpec.getNumColumns(); i++) {
        DataColumnSpec colSpec = inSpec.getColumnSpec(i);
        String colName = colSpec.getName();
        final DataType type = colSpec.getType();
        if (m_settings.getTargetColumn().equals(colName)) {
            if (type.isCompatible(NominalValue.class)) {
                targetColSpec = colSpec;
            } else {
                throw new InvalidSettingsException("Type of column \"" + colName + "\" is not nominal.");
            }
        } else if (inputCols.contains(colName)) {
            if (type.isCompatible(DoubleValue.class) || type.isCompatible(NominalValue.class)) {
                regressorColSpecs.add(colSpec);
            } else if (type.isCompatible(BitVectorValue.class) || type.isCompatible(ByteVectorValue.class) || (type.isCollectionType() && type.getCollectionElementType().isCompatible(DoubleValue.class))) {
                m_specialColumns.add(colSpec);
                // We change the table spec later to encode it as a string.
                regressorColSpecs.add(new DataColumnSpecCreator(colSpec.getName(), StringCell.TYPE).createSpec());
            } else {
                throw new InvalidSettingsException("Type of column \"" + colName + "\" is not one of the allowed types, " + "which are numeric or nomial.");
            }
        }
    }
    if (null != targetColSpec) {
        // Check if target has at least two categories.
        final Set<DataCell> targetValues = targetColSpec.getDomain().getValues();
        if (targetValues != null && targetValues.size() < 2) {
            throw new InvalidSettingsException("The target column \"" + targetColSpec.getName() + "\" has one value, only. " + "At least two target categories are expected.");
        }
        String[] learnerCols = new String[regressorColSpecs.size() + 1];
        for (int i = 0; i < regressorColSpecs.size(); i++) {
            learnerCols[i] = regressorColSpecs.get(i).getName();
        }
        learnerCols[learnerCols.length - 1] = targetColSpec.getName();
        final DataColumnSpec[] updatedSpecs = new DataColumnSpec[inSpec.getNumColumns()];
        for (int i = updatedSpecs.length; i-- > 0; ) {
            final DataColumnSpec columnSpec = inSpec.getColumnSpec(i);
            final DataType type = columnSpec.getType();
            if (type.isCompatible(BitVectorValue.class) || type.isCompatible(ByteVectorValue.class)) {
                final DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(columnSpec.getName(), StringCell.TYPE);
                colSpecCreator.setProperties(new DataColumnProperties(Collections.singletonMap("realType", type.isCompatible(BitVectorValue.class) ? "BitVector" : "ByteVector")));
                updatedSpecs[i] = colSpecCreator.createSpec();
            } else {
                updatedSpecs[i] = columnSpec;
            }
        }
        DataTableSpec updated = new DataTableSpec(updatedSpecs);
        PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(updated);
        creator.setTargetCols(Arrays.asList(targetColSpec));
        creator.setLearningCols(regressorColSpecs);
        // creator.addPreprocColNames(m_specialColumns.stream().flatMap(spec -> ));
        m_pmmlOutSpec = creator.createSpec();
        m_learner = new Learner(m_pmmlOutSpec, m_specialColumns, m_settings.getTargetReferenceCategory(), m_settings.getSortTargetCategories(), m_settings.getSortIncludesCategories());
    } else {
        throw new InvalidSettingsException("The target is " + "not in the input.");
    }
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) ArrayList(java.util.ArrayList) ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue) DataColumnProperties(org.knime.core.data.DataColumnProperties) PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)

Aggregations

FilterResult (org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult)54 DataTableSpec (org.knime.core.data.DataTableSpec)29 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)29 DataColumnSpec (org.knime.core.data.DataColumnSpec)19 ArrayList (java.util.ArrayList)14 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)13 DataType (org.knime.core.data.DataType)10 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)10 BufferedDataTable (org.knime.core.node.BufferedDataTable)9 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)6 PortObjectSpec (org.knime.core.node.port.PortObjectSpec)6 HashSet (java.util.HashSet)5 DataCell (org.knime.core.data.DataCell)5 DoubleValue (org.knime.core.data.DoubleValue)5 HashMap (java.util.HashMap)3 DataColumnProperties (org.knime.core.data.DataColumnProperties)3 DataRow (org.knime.core.data.DataRow)3 PMMLPortObjectSpecCreator (org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)3 LinkedHashMap (java.util.LinkedHashMap)2 LinkedHashSet (java.util.LinkedHashSet)2