Search in sources :

Example 86 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class NaiveBayesCellFactory method createPredictedClassColSpec.

private static DataColumnSpec createPredictedClassColSpec(final String classColumnName, final DataType classType, final DataTableSpec inSpec) {
    final String colName = DataTableSpec.getUniqueColumnName(inSpec, classColumnName);
    // we have to do this back and forth conversion because long data cells are converted into double by PMML
    // that is why we convert the KNIME type to PMML to see what PMML uses as type and then use the PMML type
    // to inver the right KNIME type
    final DataType pmmlConformDataType = PMMLDataDictionaryTranslator.getKNIMEDataType(PMMLDataDictionaryTranslator.getPMMLDataType(classType));
    final DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(colName, pmmlConformDataType);
    final DataColumnSpec classColSpec = colSpecCreator.createSpec();
    return classColSpec;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataType(org.knime.core.data.DataType)

Example 87 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class NaiveBayesCellFactory method createResultColSpecs.

/**
 * Creates the column specification of the result columns and returns
 * them in the order they should be appended to the original table
 * specification.
 * @param model the {@link NaiveBayesModel} to use
 * @param predictionColName the name of the prediction column
 * @param inSpec the <code>DataTableSpec</code> of the input data to check
 * if the winner column name already exists
 * @param inclClassProbVals if the probability values should be displayed
 * @param suffix the suffix for the probability columns
 * @return <code>DataColumnSpec[]</code> with the column specifications
 * of the result columns
 */
private static DataColumnSpec[] createResultColSpecs(final NaiveBayesModel model, final String predictionColName, final DataTableSpec inSpec, final boolean inclClassProbVals, final String suffix) {
    final DataColumnSpec classColSpec = createPredictedClassColSpec(predictionColName, model.getClassColumnDataType(), inSpec);
    if (!inclClassProbVals) {
        return new DataColumnSpec[] { classColSpec };
    }
    final List<String> classValues = model.getSortedClassValues();
    final Collection<DataColumnSpec> colSpecs = new ArrayList<>(classValues.size() + 1);
    final DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator("dummy", DoubleCell.TYPE);
    final PredictorHelper predictorHelper = PredictorHelper.getInstance();
    for (final String classVal : classValues) {
        colSpecCreator.setName(predictorHelper.probabilityColumnName(model.getClassColumnName(), classVal, suffix));
        colSpecs.add(colSpecCreator.createSpec());
    }
    colSpecs.add(classColSpec);
    return colSpecs.toArray(new DataColumnSpec[0]);
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) PredictorHelper(org.knime.base.node.mine.util.PredictorHelper) ArrayList(java.util.ArrayList)

Example 88 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class BasisFunctionLearnerNodeModel method execute.

/**
 * Starts the learning algorithm in the learner.
 *
 * @param inData the input training data at index 0
 * @param exec the execution monitor
 * @return the output fuzzy rule model
 * @throws CanceledExecutionException if the training was canceled
 */
@Override
public PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws CanceledExecutionException {
    BufferedDataTable data = (BufferedDataTable) inData[0];
    // find all double cell columns in the data
    DataTableSpec tSpec = data.getDataTableSpec();
    LinkedHashSet<String> columns = new LinkedHashSet<String>(tSpec.getNumColumns());
    List<String> targetHash = Arrays.asList(m_targetColumns);
    for (int c = 0; c < tSpec.getNumColumns(); c++) {
        DataColumnSpec cSpec = tSpec.getColumnSpec(c);
        String name = cSpec.getName();
        if (!targetHash.contains(name)) {
            // TODO only numeric columns allowed
            if (cSpec.getType().isCompatible(DoubleValue.class)) {
                columns.add(cSpec.getName());
            }
        }
    }
    // get all data columns without target columns
    String[] dataCols = BasisFunctionFactory.findDataColumns(tSpec, targetHash);
    columns.addAll(Arrays.asList(dataCols));
    // add target columns at the end
    columns.addAll(Arrays.asList(m_targetColumns));
    // filter selected columns from input data
    String[] cols = columns.toArray(new String[] {});
    ColumnRearranger colRe = new ColumnRearranger(tSpec);
    colRe.keepOnly(cols);
    BufferedDataTable trainData = exec.createColumnRearrangeTable(data, colRe, exec);
    // print settings info
    LOGGER.debug("distance      : " + getDistance());
    LOGGER.debug("missing       : " + getMissingFct());
    LOGGER.debug("target columns: " + Arrays.toString(m_targetColumns));
    LOGGER.debug("shrink commit : " + isShrinkAfterCommit());
    LOGGER.debug("max coverage  : " + isMaxClassCoverage());
    LOGGER.debug("max no. epochs: " + m_maxEpochs);
    // create factory
    BasisFunctionFactory factory = getFactory(trainData.getDataTableSpec());
    // start training
    BasisFunctionLearnerTable table = new BasisFunctionLearnerTable(trainData, dataCols, m_targetColumns, factory, BasisFunctionLearnerTable.MISSINGS[m_missing], m_shrinkAfterCommit, m_maxCoverage, m_maxEpochs, exec);
    DataTableSpec modelSpec = table.getDataTableSpec();
    DataColumnSpec[] modelSpecs = new DataColumnSpec[modelSpec.getNumColumns()];
    for (int i = 0; i < modelSpecs.length; i++) {
        DataColumnSpecCreator creator = new DataColumnSpecCreator(modelSpec.getColumnSpec(i));
        creator.removeAllHandlers();
        modelSpecs[i] = creator.createSpec();
    }
    // set translator mapping
    m_translator.setMapper(table.getHiLiteMapper());
    ModelContent modelInfo = new ModelContent(MODEL_INFO);
    table.saveInfos(modelInfo);
    m_modelInfo = modelInfo;
    // return rules[0] and rule_model[1]
    return new PortObject[] { exec.createBufferedDataTable(table, exec), createPortObject(new BasisFunctionModelContent(table.getDataTableSpec(), table.getBasisFunctions())) };
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) ModelContent(org.knime.core.node.ModelContent) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) BufferedDataTable(org.knime.core.node.BufferedDataTable) PortObject(org.knime.core.node.port.PortObject)

Example 89 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class OneMissingValueReplacementFunction method getDataTableSpec.

/**
 * {@inheritDoc}
 */
@Override
public DataTableSpec getDataTableSpec() {
    DataTableSpec spec = getFactory().getModelSpec();
    final int idx = spec.getNumColumns() - 5;
    final DataColumnSpec cspec = spec.getColumnSpec(idx);
    DataColumnSpecCreator cr = new DataColumnSpecCreator(cspec);
    TreeSet<DataCell> domValues = new TreeSet<DataCell>(cspec.getType().getComparator());
    domValues.addAll(m_bfs.keySet());
    if (cspec.getDomain().hasValues()) {
        domValues.addAll(cspec.getDomain().getValues());
    }
    cr.setDomain(new DataColumnDomainCreator(domValues).createDomain());
    ColumnRearranger colre = new ColumnRearranger(spec);
    colre.replace(new SingleCellFactory(cr.createSpec()) {

        @Override
        public DataCell getCell(final DataRow row) {
            return row.getCell(idx);
        }
    }, idx);
    return colre.createSpec();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) TreeSet(java.util.TreeSet) DataCell(org.knime.core.data.DataCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) DataRow(org.knime.core.data.DataRow)

Example 90 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class RegressionPredictorCellFactory method createColumnSpec.

/**
 * Creates the spec of the output if possible.
 *
 * @param portSpec the spec of the pmml input port
 * @param tableSpec the spec of the data input port
 * @param settings settings for the predictor node
 * @return The spec of the output or null
 * @throws InvalidSettingsException when tableSpec and portSpec do not match
 */
public static DataColumnSpec[] createColumnSpec(final PMMLPortObjectSpec portSpec, final DataTableSpec tableSpec, final RegressionPredictorSettings settings) throws InvalidSettingsException {
    // Assertions
    if (portSpec.getTargetCols().isEmpty()) {
        throw new InvalidSettingsException("The general regression model" + " does not specify a target column.");
    }
    for (DataColumnSpec learningColSpec : portSpec.getLearningCols()) {
        String learningCol = learningColSpec.getName();
        if (tableSpec.containsName(learningCol)) {
            DataColumnSpec colSpec = tableSpec.getColumnSpec(learningCol);
            if (learningColSpec.getType().isCompatible(NominalValue.class)) {
                if (!colSpec.getType().isCompatible(BitVectorValue.class) && !colSpec.getType().isCompatible(ByteVectorValue.class) && !colSpec.getType().isCompatible(NominalValue.class)) {
                    throw new InvalidSettingsException("The column \"" + learningCol + "\" in the table of prediction " + "is expected to be  compatible with " + "\"NominalValue\".");
                }
            } else if (learningColSpec.getType().isCompatible(DoubleValue.class) && !colSpec.getType().isCompatible(DoubleValue.class)) {
                throw new InvalidSettingsException("The column \"" + learningCol + "\" in the table of prediction " + "is expected to be numeric.");
            }
        } else {
            throw new InvalidSettingsException("The table for prediction " + "does not contain the column \"" + learningCol + "\".");
        }
    }
    // The list of added columns
    List<DataColumnSpec> newColsSpec = new ArrayList<DataColumnSpec>();
    String targetCol = portSpec.getTargetFields().get(0);
    DataColumnSpec targetColSpec = portSpec.getDataTableSpec().getColumnSpec(targetCol);
    if (settings.getIncludeProbabilities() && targetColSpec.getType().isCompatible(NominalValue.class)) {
        if (!targetColSpec.getDomain().hasValues()) {
            return null;
        }
        List<DataCell> targetCategories = new ArrayList<DataCell>();
        targetCategories.addAll(targetColSpec.getDomain().getValues());
        for (DataCell value : targetCategories) {
            String name = "P (" + targetCol + "=" + value.toString() + ")" + settings.getPropColumnSuffix();
            String newColName = DataTableSpec.getUniqueColumnName(tableSpec, name);
            DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(newColName, DoubleCell.TYPE);
            DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0));
            colSpecCreator.setDomain(domainCreator.createDomain());
            newColsSpec.add(colSpecCreator.createSpec());
        }
    }
    String targetColName = settings.getHasCustomPredictionName() ? settings.getCustomPredictionName() : "Prediction (" + targetCol + ")";
    String uniqueTargetColName = DataTableSpec.getUniqueColumnName(tableSpec, targetColName);
    DataType targetType = targetColSpec.getType().isCompatible(NominalValue.class) ? targetColSpec.getType() : DoubleCell.TYPE;
    DataColumnSpecCreator targetColSpecCreator = new DataColumnSpecCreator(uniqueTargetColName, targetType);
    if (targetColSpec.getType().isCompatible(NominalValue.class)) {
        DataColumnDomainCreator targetDomainCreator = new DataColumnDomainCreator(targetColSpec.getDomain());
        targetColSpecCreator.setDomain(targetDomainCreator.createDomain());
    }
    newColsSpec.add(targetColSpecCreator.createSpec());
    return newColsSpec.toArray(new DataColumnSpec[0]);
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) NominalValue(org.knime.core.data.NominalValue) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue)

Aggregations

DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)267 DataColumnSpec (org.knime.core.data.DataColumnSpec)210 DataTableSpec (org.knime.core.data.DataTableSpec)132 DataCell (org.knime.core.data.DataCell)92 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)77 DataType (org.knime.core.data.DataType)74 DataRow (org.knime.core.data.DataRow)73 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)57 DataColumnDomainCreator (org.knime.core.data.DataColumnDomainCreator)51 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)48 ArrayList (java.util.ArrayList)46 DoubleCell (org.knime.core.data.def.DoubleCell)45 SingleCellFactory (org.knime.core.data.container.SingleCellFactory)44 StringCell (org.knime.core.data.def.StringCell)29 BufferedDataTable (org.knime.core.node.BufferedDataTable)23 DoubleValue (org.knime.core.data.DoubleValue)22 HashSet (java.util.HashSet)19 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)17 DataColumnDomain (org.knime.core.data.DataColumnDomain)16 DefaultRow (org.knime.core.data.def.DefaultRow)16