Search in sources :

Example 31 with FilterResult

use of org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult in project knime-core by knime.

the class CollectionCreate2NodeModel method createColumnRearranger.

@Override
protected ColumnRearranger createColumnRearranger(final DataTableSpec in) throws InvalidSettingsException {
    FilterResult filterResult = m_includeModel.applyTo(in);
    List<String> includes = Arrays.asList(filterResult.getIncludes());
    String[] names = includes.toArray(new String[includes.size()]);
    final int[] colIndices = new int[names.length];
    for (int i = 0; i < names.length; i++) {
        int index = in.findColumnIndex(names[i]);
        if (index < 0) {
            throw new InvalidSettingsException("No column \"" + names[i] + "\" in input table");
        }
        colIndices[i] = index;
    }
    DataType comType;
    if (includes.size() == 0) {
        comType = DataType.getType(DataCell.class);
    } else {
        comType = CollectionCellFactory.getElementType(in, colIndices);
    }
    String newColName = m_newColName.getStringValue();
    DataType type;
    if (m_createSet.getBooleanValue()) {
        type = SetCell.getCollectionType(comType);
    } else {
        type = ListCell.getCollectionType(comType);
    }
    DataColumnSpecCreator newColSpecC = new DataColumnSpecCreator(newColName, type);
    newColSpecC.setElementNames(names);
    DataColumnSpec newColSpec = newColSpecC.createSpec();
    CellFactory appendFactory = new SingleCellFactory(newColSpec) {

        /**
         * {@inheritDoc}
         */
        @Override
        public DataCell getCell(final DataRow row) {
            int[] validColIndices = colIndices;
            // iff ignore missing value is switched on
            if (m_ignoreMissing.getBooleanValue()) {
                List<Integer> vCI = new ArrayList<Integer>();
                for (int i : validColIndices) {
                    if (!row.getCell(i).isMissing()) {
                        vCI.add(i);
                    }
                }
                validColIndices = buildIntArray(vCI);
            }
            // based on given column indices
            if (m_createSet.getBooleanValue()) {
                return CollectionCellFactory.createSetCell(row, validColIndices);
            } else {
                return CollectionCellFactory.createListCell(row, validColIndices);
            }
        }
    };
    ColumnRearranger rearranger = new ColumnRearranger(in);
    if (m_removeCols.getBooleanValue()) {
        rearranger.remove(colIndices);
    }
    rearranger.append(appendFactory);
    return rearranger;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) ArrayList(java.util.ArrayList) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) CellFactory(org.knime.core.data.container.CellFactory) CollectionCellFactory(org.knime.core.data.collection.CollectionCellFactory) SingleCellFactory(org.knime.core.data.container.SingleCellFactory)

Example 32 with FilterResult

use of org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult in project knime-core by knime.

the class LogRegCoordinator method init.

/**
 * Initialize instance and check if settings are consistent.
 */
private void init(final DataTableSpec inSpec, final Set<String> exclude) throws InvalidSettingsException {
    List<String> inputCols = new ArrayList<String>();
    FilterResult includedColumns = m_settings.getIncludedColumns().applyTo(inSpec);
    for (String column : includedColumns.getIncludes()) {
        inputCols.add(column);
    }
    inputCols.remove(m_settings.getTargetColumn());
    if (inputCols.isEmpty()) {
        throw new InvalidSettingsException("At least one column must " + "be included.");
    }
    DataColumnSpec targetColSpec = null;
    List<DataColumnSpec> regressorColSpecs = new ArrayList<DataColumnSpec>();
    // Auto configuration when target is not set
    if (null == m_settings.getTargetColumn() && m_settings.getIncludedColumns().applyTo(inSpec).getExcludes().length == 0) {
        for (int i = 0; i < inSpec.getNumColumns(); i++) {
            DataColumnSpec colSpec = inSpec.getColumnSpec(i);
            String colName = colSpec.getName();
            inputCols.remove(colName);
            if (colSpec.getType().isCompatible(NominalValue.class)) {
                m_settings.setTargetColumn(colName);
            }
        }
        // when there is no column with nominal data
        if (null == m_settings.getTargetColumn()) {
            throw new InvalidSettingsException("No column in " + "spec compatible to \"NominalValue\".");
        }
    }
    // remove all columns that should not be used
    inputCols.removeAll(exclude);
    m_specialColumns = new LinkedList<>();
    for (int i = 0; i < inSpec.getNumColumns(); i++) {
        DataColumnSpec colSpec = inSpec.getColumnSpec(i);
        String colName = colSpec.getName();
        final DataType type = colSpec.getType();
        if (m_settings.getTargetColumn().equals(colName)) {
            if (type.isCompatible(NominalValue.class)) {
                targetColSpec = colSpec;
            } else {
                throw new InvalidSettingsException("Type of column \"" + colName + "\" is not nominal.");
            }
        } else if (inputCols.contains(colName)) {
            if (type.isCompatible(DoubleValue.class) || type.isCompatible(NominalValue.class)) {
                regressorColSpecs.add(colSpec);
            } else if (type.isCompatible(BitVectorValue.class) || type.isCompatible(ByteVectorValue.class) || (type.isCollectionType() && type.getCollectionElementType().isCompatible(DoubleValue.class))) {
                m_specialColumns.add(colSpec);
                // We change the table spec later to encode it as a string.
                regressorColSpecs.add(new DataColumnSpecCreator(colSpec.getName(), StringCell.TYPE).createSpec());
            } else {
                throw new InvalidSettingsException("Type of column \"" + colName + "\" is not one of the allowed types, " + "which are numeric or nomial.");
            }
        }
    }
    if (null != targetColSpec) {
        // Check if target has at least two categories.
        final Set<DataCell> targetValues = targetColSpec.getDomain().getValues();
        if (targetValues != null && targetValues.size() < 2) {
            throw new InvalidSettingsException("The target column \"" + targetColSpec.getName() + "\" has one value, only. " + "At least two target categories are expected.");
        }
        String[] learnerCols = new String[regressorColSpecs.size() + 1];
        for (int i = 0; i < regressorColSpecs.size(); i++) {
            learnerCols[i] = regressorColSpecs.get(i).getName();
        }
        learnerCols[learnerCols.length - 1] = targetColSpec.getName();
        final DataColumnSpec[] updatedSpecs = new DataColumnSpec[inSpec.getNumColumns()];
        for (int i = updatedSpecs.length; i-- > 0; ) {
            final DataColumnSpec columnSpec = inSpec.getColumnSpec(i);
            final DataType type = columnSpec.getType();
            if (type.isCompatible(BitVectorValue.class) || type.isCompatible(ByteVectorValue.class)) {
                final DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(columnSpec.getName(), StringCell.TYPE);
                colSpecCreator.setProperties(new DataColumnProperties(Collections.singletonMap("realType", type.isCompatible(BitVectorValue.class) ? "BitVector" : "ByteVector")));
                updatedSpecs[i] = colSpecCreator.createSpec();
            } else {
                updatedSpecs[i] = columnSpec;
            }
        }
        DataTableSpec updated = new DataTableSpec(updatedSpecs);
        PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(updated);
        creator.setTargetCols(Arrays.asList(targetColSpec));
        creator.setLearningCols(regressorColSpecs);
        // creator.addPreprocColNames(m_specialColumns.stream().flatMap(spec -> ));
        m_pmmlOutSpec = creator.createSpec();
    } else {
        throw new InvalidSettingsException("The target is " + "not in the input.");
    }
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) ArrayList(java.util.ArrayList) ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue) DataColumnProperties(org.knime.core.data.DataColumnProperties) PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)

Example 33 with FilterResult

use of org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult in project knime-core by knime.

the class Unpivot2NodeModel method createOutSpec.

private DataTableSpec createOutSpec(final DataTableSpec spec) throws InvalidSettingsException {
    final FilterResult valueFilterResult = m_valueColumns.applyTo(spec);
    String[] valueColumns = valueFilterResult.getIncludes();
    if (valueColumns.length == 0) {
        throw new InvalidSettingsException("No column 'value' defined for unpivoting operation.");
    }
    final String[] unknowns = valueFilterResult.getRemovedFromIncludes();
    if (unknowns.length > 0) {
        setWarningMessage("Some selected value column(s) are no longer available: " + ConvenienceMethods.getShortStringFrom(Arrays.asList(unknowns), 3));
    }
    String[] retainedColumns = m_retainedColumns.applyTo(spec).getIncludes();
    DataColumnSpec[] outSpecs = new DataColumnSpec[retainedColumns.length + 3];
    for (int i = 0; i < retainedColumns.length; i++) {
        outSpecs[i + 3] = spec.getColumnSpec(retainedColumns[i]);
    }
    DataType type = null;
    for (int i = 0; i < valueColumns.length; i++) {
        DataType ctype = spec.getColumnSpec(valueColumns[i]).getType();
        if (type == null) {
            type = ctype;
        } else {
            type = DataType.getCommonSuperType(type, ctype);
        }
    }
    int idx = 0;
    String colName = ROWID_COLUMN;
    while (spec.containsName(colName)) {
        colName = ROWID_COLUMN + "(" + (idx++) + ")";
    }
    outSpecs[0] = new DataColumnSpecCreator(colName, StringCell.TYPE).createSpec();
    idx = 0;
    colName = VALUE_COLUMN_NAMES;
    while (spec.containsName(colName)) {
        colName = VALUE_COLUMN_NAMES + "(" + (idx++) + ")";
    }
    outSpecs[1] = new DataColumnSpecCreator(colName, StringCell.TYPE).createSpec();
    idx = 0;
    colName = VALUE_COLUMN_VALUES;
    while (spec.containsName(colName)) {
        colName = VALUE_COLUMN_VALUES + "(" + (idx++) + ")";
    }
    outSpecs[2] = new DataColumnSpecCreator(colName, type).createSpec();
    return new DataTableSpec(outSpecs);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataType(org.knime.core.data.DataType) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult)

Example 34 with FilterResult

use of org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult in project knime-core by knime.

the class SourceColumnsAsProperties method toProperties.

/**
 * Creates the {@link DataColumnProperties} with the
 * {@link #PROPKEY_SOURCE_COLUMN_INDICES} keys to the column names and column values respectively.
 *
 * @param selection The model for the selected columns.
 * @param input The input {@link DataTableSpec}.
 * @return The properties with the column names and column indices encoded as a string value for the specified keys.
 */
public static DataColumnProperties toProperties(final SettingsModelColumnFilter2 selection, final DataTableSpec input) {
    Map<String, String> map = new HashMap<String, String>();
    FilterResult filterResult = selection.applyTo(input);
    map.put(PROPKEY_SOURCE_COLUMN_INDICES, indicesAsString(filterResult, input));
    return new DataColumnProperties(map);
}
Also used : HashMap(java.util.HashMap) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult) DataColumnProperties(org.knime.core.data.DataColumnProperties)

Example 35 with FilterResult

use of org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult in project knime-core by knime.

the class PolyRegLearnerNodeModel method computeSelectedColumns.

/**
 * Depending on whether the includeAll flag is set, it determines the list of learning (independent) columns. If the
 * flag is not set, it returns the list stored in m_settings.
 *
 * @param spec to get column names from.
 * @return The list of learning columns.
 * @throws InvalidSettingsException If no valid columns are in the spec.
 */
private String[] computeSelectedColumns(final DataTableSpec spec) throws InvalidSettingsException {
    String target = m_settings.getTargetColumn();
    FilterResult filterResult = m_settings.getFilterConfiguration().applyTo(spec);
    String[] includes = filterResult.getIncludes();
    // boolean targetIsPresetSet = target != null;
    if (target == null) {
        if (spec.containsCompatibleType(DoubleValue.class)) {
            for (DataColumnSpec colSpec : spec) {
                if (colSpec.getType().isCompatible(DoubleValue.class)) {
                    target = colSpec.getName();
                }
            }
        } else {
            throw new InvalidSettingsException("No target column selected");
        }
        m_settings.setTargetColumn(target);
    }
    boolean targetIsIncluded = false;
    for (String incl : includes) {
        targetIsIncluded |= incl.equals(target);
    }
    if (targetIsIncluded) {
        // String warningMessage = "The selected columns " + Arrays.asList(includes)+" also contain the target column: " + target +", removing target!";
        // if (targetIsPresetSet) {
        // m_logger.warn(warningMessage);
        // setWarningMessage(warningMessage);
        // }
        List<String> tmp = new ArrayList<>(Arrays.asList(includes));
        tmp.remove(target);
        includes = tmp.toArray(new String[includes.length - 1]);
    }
    if (includes.length == 0) {
        throw new InvalidSettingsException("No double-compatible variables (learning columns) in input table");
    }
    return includes;
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) ArrayList(java.util.ArrayList) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult)

Aggregations

FilterResult (org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult)54 DataTableSpec (org.knime.core.data.DataTableSpec)29 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)29 DataColumnSpec (org.knime.core.data.DataColumnSpec)19 ArrayList (java.util.ArrayList)14 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)13 DataType (org.knime.core.data.DataType)10 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)10 BufferedDataTable (org.knime.core.node.BufferedDataTable)9 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)6 PortObjectSpec (org.knime.core.node.port.PortObjectSpec)6 HashSet (java.util.HashSet)5 DataCell (org.knime.core.data.DataCell)5 DoubleValue (org.knime.core.data.DoubleValue)5 HashMap (java.util.HashMap)3 DataColumnProperties (org.knime.core.data.DataColumnProperties)3 DataRow (org.knime.core.data.DataRow)3 PMMLPortObjectSpecCreator (org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator)3 LinkedHashMap (java.util.LinkedHashMap)2 LinkedHashSet (java.util.LinkedHashSet)2