Search in sources :

Example 81 with DataColumnSpec

use of org.knime.core.data.DataColumnSpec in project knime-core by knime.

the class RegressionTreePredictorCellFactory method createFactory.

/**
 * @param predictor
 * @return factory based on RegressionTreePredictor <b>predictor</b>
 * @throws InvalidSettingsException
 */
public static RegressionTreePredictorCellFactory createFactory(final RegressionTreePredictor predictor) throws InvalidSettingsException {
    DataTableSpec testDataSpec = predictor.getDataSpec();
    RegressionTreeModelPortObjectSpec modelSpec = predictor.getModelSpec();
    RegressionTreePredictorConfiguration configuration = predictor.getConfiguration();
    UniqueNameGenerator nameGen = new UniqueNameGenerator(testDataSpec);
    List<DataColumnSpec> newColsList = new ArrayList<DataColumnSpec>();
    String targetColName = configuration.getPredictionColumnName();
    DataColumnSpec targetCol = nameGen.newColumn(targetColName, DoubleCell.TYPE);
    newColsList.add(targetCol);
    DataColumnSpec[] newCols = newColsList.toArray(new DataColumnSpec[newColsList.size()]);
    int[] learnColumnInRealDataIndices = modelSpec.calculateFilterIndices(testDataSpec);
    return new RegressionTreePredictorCellFactory(predictor, newCols, learnColumnInRealDataIndices);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) ArrayList(java.util.ArrayList) RegressionTreeModelPortObjectSpec(org.knime.base.node.mine.treeensemble2.model.RegressionTreeModelPortObjectSpec) UniqueNameGenerator(org.knime.core.util.UniqueNameGenerator)

Example 82 with DataColumnSpec

use of org.knime.core.data.DataColumnSpec in project knime-core by knime.

the class GradientBoostingPMMLPredictorNodeModel method translateSpec.

private TreeEnsembleModelPortObjectSpec translateSpec(final PMMLPortObjectSpec pmmlSpec) {
    DataTableSpec pmmlDataSpec = pmmlSpec.getDataTableSpec();
    ColumnRearranger cr = new ColumnRearranger(pmmlDataSpec);
    List<DataColumnSpec> targets = pmmlSpec.getTargetCols();
    CheckUtils.checkArgument(!targets.isEmpty(), "The provided PMML does not declare a target field.");
    CheckUtils.checkArgument(targets.size() == 1, "The provided PMML declares multiple target. " + "This behavior is currently not supported.");
    cr.move(targets.get(0).getName(), pmmlDataSpec.getNumColumns());
    return new TreeEnsembleModelPortObjectSpec(cr.createSpec());
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) TreeEnsembleModelPortObjectSpec(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObjectSpec)

Example 83 with DataColumnSpec

use of org.knime.core.data.DataColumnSpec in project knime-core by knime.

the class TreeEnsembleLearnerConfiguration method checkColumnSelection.

/**
 * To be used in the configure of the learner nodes. Checks if the column selection makes sense and throws an
 * InvalidSettingsException otherwise. The sanity checks include: <br>
 * Existence and type check of fingerprint columns if specified. <br>
 * Check if any attributes are selected if no fingerprint column is used for learning.
 *
 * @param inSpec Spec of the incoming table
 * @throws InvalidSettingsException thrown if the column selection makes no sense
 */
public void checkColumnSelection(final DataTableSpec inSpec) throws InvalidSettingsException {
    FilterResult filterResult = m_columnFilterConfig.applyTo(inSpec);
    if (m_fingerprintColumn != null) {
        DataColumnSpec colSpec = inSpec.getColumnSpec(m_fingerprintColumn);
        if (colSpec == null) {
            throw new InvalidSettingsException("The fingerprint column is not contained in the incoming table.");
        }
        DataType colType = colSpec.getType();
        if (!(colType.isCompatible(BitVectorValue.class) || colType.isCompatible(ByteVectorValue.class) || colType.isCompatible(DoubleVectorValue.class))) {
            throw new InvalidSettingsException("The specified fingerprint column is not of a compatible vector type.");
        }
    } else if (filterResult.getIncludes().length > 0) {
    // ok, there are some features selected
    } else {
        throw new InvalidSettingsException("No attributes are selected.");
    }
}
Also used : DoubleVectorValue(org.knime.core.data.vector.doublevector.DoubleVectorValue) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataType(org.knime.core.data.DataType) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult)

Example 84 with DataColumnSpec

use of org.knime.core.data.DataColumnSpec in project knime-core by knime.

the class TreeEnsembleLearnerConfiguration method filterLearnColumns.

/**
 * @param spec
 * @return ColumnRearranger that filters out all columns not part of the learning columns.
 * @throws InvalidSettingsException
 */
public FilterLearnColumnRearranger filterLearnColumns(final DataTableSpec spec) throws InvalidSettingsException {
    // (ColumnRearranger is a final class in v2.5)
    if (m_targetColumn == null) {
        throw new InvalidSettingsException("Target column not set");
    }
    DataColumnSpec targetCol = spec.getColumnSpec(m_targetColumn);
    if (targetCol == null || !targetCol.getType().isCompatible(getRequiredTargetClass())) {
        throw new InvalidSettingsException("Target column \"" + m_targetColumn + "\" does not exist or is not of the " + "correct type");
    }
    FilterResult filterResult = m_columnFilterConfig.applyTo(spec);
    List<String> noDomainColumns = new ArrayList<String>();
    FilterLearnColumnRearranger rearranger = new FilterLearnColumnRearranger(spec);
    if (m_fingerprintColumn == null) {
        // use ordinary data
        Set<String> incl = new HashSet<String>(Arrays.asList(filterResult.getIncludes()));
        // the target column can possibly show up in the include list of the filter result
        // therefore we have to remove it
        incl.remove(targetCol.getName());
        for (DataColumnSpec col : spec) {
            String colName = col.getName();
            if (colName.equals(m_targetColumn)) {
                continue;
            }
            DataType type = col.getType();
            boolean ignoreColumn = false;
            boolean isAppropriateType = type.isCompatible(DoubleValue.class) || type.isCompatible(NominalValue.class);
            if (incl.remove(colName)) {
                // accept unless type mismatch
                if (!isAppropriateType) {
                    throw new InvalidSettingsException("Attribute column \"" + colName + "\" is " + "not of the expected type (must be " + "numeric or nominal).");
                } else if (shouldIgnoreLearnColumn(col)) {
                    ignoreColumn = true;
                    noDomainColumns.add(colName);
                } else {
                // accept
                }
            } else {
                ignoreColumn = true;
            }
            // }
            if (ignoreColumn) {
                rearranger.remove(colName);
            }
        }
        if (rearranger.getColumnCount() <= 1) {
            StringBuilder b = new StringBuilder("Input table has no valid " + "learning columns (need one additional numeric or " + "nominal column).");
            if (!noDomainColumns.isEmpty()) {
                b.append(" ").append(noDomainColumns.size());
                b.append(" column(s) were ignored due to missing domain ");
                b.append("information -- execute predecessor and/or ");
                b.append(" use Domain Calculator node.");
                throw new InvalidSettingsException(b.toString());
            }
        }
        if (/*!m_includeAllColumns &&*/
        !incl.isEmpty()) {
            StringBuilder missings = new StringBuilder();
            int i = 0;
            for (Iterator<String> it = incl.iterator(); it.hasNext() && i < 4; i++) {
                String s = it.next();
                missings.append(i > 0 ? ", " : "").append(s);
                it.remove();
            }
            if (!incl.isEmpty()) {
                missings.append(",...").append(incl.size()).append(" more");
            }
            throw new InvalidSettingsException("Some selected attributes " + "are not present in the input table: " + missings);
        }
    } else {
        // use fingerprint data
        DataColumnSpec fpCol = spec.getColumnSpec(m_fingerprintColumn);
        if (fpCol == null || !(fpCol.getType().isCompatible(BitVectorValue.class) || fpCol.getType().isCompatible(ByteVectorValue.class) || fpCol.getType().isCompatible(DoubleVectorValue.class))) {
            throw new InvalidSettingsException("Fingerprint columnn \"" + m_fingerprintColumn + "\" does not exist or is not " + "of correct type.");
        }
        rearranger.keepOnly(m_targetColumn, m_fingerprintColumn);
    }
    rearranger.move(m_targetColumn, rearranger.getColumnCount());
    String warn = null;
    if (!noDomainColumns.isEmpty()) {
        StringBuilder b = new StringBuilder();
        b.append(noDomainColumns.size());
        b.append(" column(s) were ignored due to missing domain");
        b.append(" information: [");
        int index = 0;
        for (String s : noDomainColumns) {
            if (index > 3) {
                b.append(", ...");
                break;
            }
            if (index > 0) {
                b.append(", ");
            }
            b.append("\"").append(s).append("\"");
            index++;
        }
        b.append("] -- change the node configuration or use a");
        b.append(" Domain Calculator node to fix it");
        warn = b.toString();
    }
    rearranger.setWarning(warn);
    return rearranger;
}
Also used : NominalValue(org.knime.core.data.NominalValue) ArrayList(java.util.ArrayList) ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue) HashSet(java.util.HashSet)

Example 85 with DataColumnSpec

use of org.knime.core.data.DataColumnSpec in project knime-core by knime.

the class AttributeSelectionPanel method loadSettingsFrom.

/**
 * Load settings from config <b>cfg</b>
 *
 * @param inSpec
 * @param cfg
 * @throws NotConfigurableException
 */
public void loadSettingsFrom(final DataTableSpec inSpec, final TreeEnsembleLearnerConfiguration cfg) throws NotConfigurableException {
    // disabled automatic propagation of table specs
    m_lastTableSpec = null;
    int nrNominalCols = 0;
    int nrNumericCols = 0;
    for (DataColumnSpec col : inSpec) {
        DataType type = col.getType();
        if (type.isCompatible(NominalValue.class)) {
            nrNominalCols += 1;
        } else if (type.isCompatible(DoubleValue.class)) {
            nrNumericCols += 1;
        }
    }
    boolean hasOrdinaryColumnsInInput = nrNominalCols > 1 || nrNumericCols > 0;
    boolean hasFPColumnInInput = inSpec.containsCompatibleType(BitVectorValue.class) || inSpec.containsCompatibleType(ByteVectorValue.class) || inSpec.containsCompatibleType(DoubleVectorValue.class);
    m_targetColumnBox.update(inSpec, cfg.getTargetColumn());
    DataTableSpec attSpec = removeColumn(inSpec, m_targetColumnBox.getSelectedColumn());
    String fpColumn = cfg.getFingerprintColumn();
    m_useOrdinaryColumnsRadio.setEnabled(true);
    m_useFingerprintColumnRadio.setEnabled(true);
    // default, fix later
    m_useOrdinaryColumnsRadio.doClick();
    if (hasOrdinaryColumnsInInput) {
        m_includeColumnsFilterPanel2.loadConfiguration(cfg.getColumnFilterConfig(), attSpec);
    } else {
        m_useOrdinaryColumnsRadio.setEnabled(false);
        m_useFingerprintColumnRadio.doClick();
        m_includeColumnsFilterPanel2.loadConfiguration(cfg.getColumnFilterConfig(), NO_VALID_INPUT_SPEC);
    }
    if (hasFPColumnInInput) {
        m_fingerprintColumnBox.update(inSpec, fpColumn);
    } else {
        m_useOrdinaryColumnsRadio.doClick();
        m_fingerprintColumnBox.update(NO_VALID_INPUT_SPEC, "");
        m_useFingerprintColumnRadio.setEnabled(false);
        fpColumn = null;
    }
    if (fpColumn != null || !hasOrdinaryColumnsInInput) {
        m_useFingerprintColumnRadio.doClick();
    } else {
        m_useOrdinaryColumnsRadio.doClick();
    }
    boolean ignoreColsNoDomain = cfg.isIgnoreColumnsWithoutDomain();
    m_ignoreColumnsWithoutDomainChecker.setSelected(ignoreColsNoDomain);
    int hiliteCount = cfg.getNrHilitePatterns();
    if (hiliteCount > 0) {
        m_enableHiliteChecker.setSelected(true);
        m_hiliteCountSpinner.setValue(hiliteCount);
    } else {
        m_enableHiliteChecker.setSelected(false);
        m_hiliteCountSpinner.setValue(2000);
    }
    m_saveTargetDistributionInNodesChecker.setSelected(cfg.isSaveTargetDistributionInNodes());
    m_lastTableSpec = inSpec;
}
Also used : DoubleVectorValue(org.knime.core.data.vector.doublevector.DoubleVectorValue) DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType)

Aggregations

DataColumnSpec (org.knime.core.data.DataColumnSpec)800 DataTableSpec (org.knime.core.data.DataTableSpec)351 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)239 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)223 DataCell (org.knime.core.data.DataCell)187 ArrayList (java.util.ArrayList)167 DataType (org.knime.core.data.DataType)149 DataRow (org.knime.core.data.DataRow)124 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)123 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)119 DoubleValue (org.knime.core.data.DoubleValue)104 HashSet (java.util.HashSet)92 BufferedDataTable (org.knime.core.node.BufferedDataTable)77 LinkedHashSet (java.util.LinkedHashSet)65 LinkedHashMap (java.util.LinkedHashMap)56 LinkedList (java.util.LinkedList)47 SingleCellFactory (org.knime.core.data.container.SingleCellFactory)46 DoubleCell (org.knime.core.data.def.DoubleCell)46 StringCell (org.knime.core.data.def.StringCell)45 DataColumnDomainCreator (org.knime.core.data.DataColumnDomainCreator)43