Search in sources :

Example 21 with DataTableSpec

use of org.knime.core.data.DataTableSpec in project knime-core by knime.

the class PMMLRuleSetPredictorNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected DataTableSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    DataTableSpec original = (DataTableSpec) inSpecs[DATA_INDEX];
    ColumnRearranger rearranger = new ColumnRearranger(original);
    PMMLPortObjectSpec portObjectSpec = (PMMLPortObjectSpec) inSpecs[MODEL_INDEX];
    List<DataColumnSpec> activeColumnList = portObjectSpec.getActiveColumnList();
    List<DataColumnSpec> notFound = new ArrayList<DataColumnSpec>();
    for (DataColumnSpec dataColumnSpec : activeColumnList) {
        if (original.containsName(dataColumnSpec.getName())) {
            DataColumnSpec origSpec = original.getColumnSpec(dataColumnSpec.getName());
            if (!origSpec.getType().equals(dataColumnSpec.getType())) {
                notFound.add(dataColumnSpec);
            }
        } else {
            notFound.add(dataColumnSpec);
        }
    }
    if (!notFound.isEmpty()) {
        StringBuilder sb = new StringBuilder("Incompatible to the table, the following columns are not present, or have a wrong type:");
        for (DataColumnSpec dataColumnSpec : notFound) {
            sb.append("\n   ").append(dataColumnSpec);
        }
        throw new InvalidSettingsException(sb.toString());
    }
    List<DataColumnSpec> targetCols = portObjectSpec.getTargetCols();
    final DataType dataType = targetCols.isEmpty() ? StringCell.TYPE : targetCols.get(0).getType();
    DataColumnSpecCreator specCreator;
    if (m_doReplaceColumn.getBooleanValue()) {
        String col = m_replaceColumn.getStringValue();
        specCreator = new DataColumnSpecCreator(col, dataType);
    } else {
        specCreator = new DataColumnSpecCreator(DataTableSpec.getUniqueColumnName(original, m_outputColumn.getStringValue()), dataType);
    }
    SingleCellFactory dummy = new SingleCellFactory(specCreator.createSpec()) {

        /**
         * {@inheritDoc}
         */
        @Override
        public DataCell getCell(final DataRow row) {
            throw new IllegalStateException();
        }
    };
    if (m_addConfidence.getBooleanValue()) {
        rearranger.append(new SingleCellFactory(new DataColumnSpecCreator(DataTableSpec.getUniqueColumnName(rearranger.createSpec(), m_confidenceColumn.getStringValue()), DoubleCell.TYPE).createSpec()) {

            @Override
            public DataCell getCell(final DataRow row) {
                throw new IllegalStateException();
            }
        });
    }
    if (m_doReplaceColumn.getBooleanValue()) {
        rearranger.replace(dummy, m_replaceColumn.getStringValue());
    } else {
        rearranger.append(dummy);
    }
    return new DataTableSpec[] { rearranger.createSpec() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) ArrayList(java.util.ArrayList) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) SingleCellFactory(org.knime.core.data.container.SingleCellFactory)

Example 22 with DataTableSpec

use of org.knime.core.data.DataTableSpec in project knime-core by knime.

the class NumericOutliersReviser method replaceOutliers.

/**
 * Replaces outliers found in the row input according to the selected replacement option. Additionally, the outlier
 * replacement counts and new domains are calculated.
 *
 * @param exec the execution context
 * @param in the row input whose outliers have to be treated
 * @param out the row output whose outliers have been treated
 * @param outlierModel the model storing the permitted intervals
 * @param memberCounter the member counter
 * @param outlierRepCounter the outlier replacement counter
 * @param missingGroupsCounter the missing groups counter
 * @throws Exception any exception to indicate an error, cancelation
 */
private void replaceOutliers(final ExecutionContext exec, final RowInput in, final RowOutput out, final NumericOutliersModel outlierModel, final MemberCounter memberCounter, final MemberCounter outlierRepCounter, final MemberCounter missingGroupsCounter) throws Exception {
    // total number of outlier columns
    final int noOutliers = m_outlierColNames.length;
    // the in table spec
    final DataTableSpec inSpec = in.getDataTableSpec();
    // create column re-arranger to overwrite cells corresponding to outliers
    final ColumnRearranger colRearranger = new ColumnRearranger(inSpec);
    // store the positions where the outlier column names can be found in the input table
    final int[] outlierIndices = calculateOutlierIndicies(inSpec);
    final DataColumnSpec[] outlierSpecs = new DataColumnSpec[noOutliers];
    for (int i = 0; i < noOutliers; i++) {
        outlierSpecs[i] = inSpec.getColumnSpec(outlierIndices[i]);
    }
    // values are copied anyways by the re-arranger so there is no need to
    // create new instances for each row
    final DataCell[] treatedVals = new DataCell[noOutliers];
    final AbstractCellFactory fac = new AbstractCellFactory(true, outlierSpecs) {

        @Override
        public DataCell[] getCells(final DataRow row) {
            final GroupKey key = outlierModel.getKey(row, inSpec);
            final Map<String, double[]> colsMap = outlierModel.getGroupIntervals(key);
            for (int i = 0; i < noOutliers; i++) {
                final DataCell curCell = row.getCell(outlierIndices[i]);
                final DataCell treatedCell;
                final String outlierColName = m_outlierColNames[i];
                if (!curCell.isMissing()) {
                    // if the key exists treat the value otherwise we process an unkown group
                    if (colsMap != null) {
                        // increment the member counter
                        memberCounter.incrementMemberCount(outlierColName, key);
                        // treat the value of the cell if its a outlier
                        treatedCell = treatCellValue(colsMap.get(outlierColName), curCell);
                    } else {
                        missingGroupsCounter.incrementMemberCount(outlierColName, key);
                        treatedCell = curCell;
                    }
                } else {
                    treatedCell = curCell;
                }
                // if we changed the value this is an outlier
                if (!treatedCell.equals(curCell)) {
                    outlierRepCounter.incrementMemberCount(outlierColName, key);
                }
                // update the domain if necessary
                if (m_updateDomain && !treatedCell.isMissing()) {
                    m_domainUpdater.updateDomain(outlierColName, ((DoubleValue) treatedCell).getDoubleValue());
                }
                treatedVals[i] = treatedCell;
            }
            return treatedVals;
        }
    };
    // replace the outlier columns by their updated versions
    colRearranger.replace(fac, outlierIndices);
    // stream it
    colRearranger.createStreamableFunction().runFinal(new PortInput[] { in }, new PortOutput[] { out }, exec);
    exec.setProgress(1);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) AbstractCellFactory(org.knime.core.data.container.AbstractCellFactory) GroupKey(org.knime.base.node.preproc.groupby.GroupKey) DataRow(org.knime.core.data.DataRow) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataCell(org.knime.core.data.DataCell)

Example 23 with DataTableSpec

use of org.knime.core.data.DataTableSpec in project knime-core by knime.

the class HistogramColumn method constructFromDataArray.

/**
 * Constructs the helper data structures from the numeric hostigran models and the data as {@link DataArray}.
 *
 * @param histograms The numeric histograms.
 * @param data The input data.
 * @param nominalColumnNames The nominal column names.
 * @return The helper data structures.
 * @see #construct(Map, DataTable, Set)
 */
protected static Pair<Map<Integer, Map<Integer, Set<RowKey>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> constructFromDataArray(final Map<Integer, HistogramNumericModel> histograms, final DataTable data, final Set<String> nominalColumnNames) {
    Map<Integer, Map<Integer, Set<RowKey>>> numericMapping = new HashMap<Integer, Map<Integer, Set<RowKey>>>();
    Map<Integer, Map<DataValue, Set<RowKey>>> nominalMapping = new HashMap<Integer, Map<DataValue, Set<RowKey>>>();
    DataTableSpec tableSpec = data.getDataTableSpec();
    for (DataColumnSpec colSpec : tableSpec) {
        int colIndex = tableSpec.findColumnIndex(colSpec.getName());
        if (colSpec.getType().isCompatible(DoubleValue.class)) {
            // + colIndex;
            if (histograms.containsKey(Integer.valueOf(colIndex)) && histograms.get(colIndex) != null) {
                numericMapping.put(colIndex, new HashMap<Integer, Set<RowKey>>());
            }
        }
        if (colSpec.getDomain().hasValues() || nominalColumnNames.contains(colSpec.getName())) {
            nominalMapping.put(colIndex, new HashMap<DataValue, Set<RowKey>>());
        }
    }
    for (DataRow dataRow : data) {
        for (Entry<Integer, Map<Integer, Set<RowKey>>> outer : numericMapping.entrySet()) {
            Integer key = outer.getKey();
            DataCell cell = dataRow.getCell(key);
            if (cell instanceof DoubleValue) {
                DoubleValue dv = (DoubleValue) cell;
                Integer bin = Integer.valueOf(histograms.get(key).findBin(dv));
                Map<Integer, Set<RowKey>> inner = outer.getValue();
                if (!inner.containsKey(bin)) {
                    inner.put(bin, new HashSet<RowKey>());
                }
                inner.get(bin).add(dataRow.getKey());
            }
        }
        for (Entry<Integer, Map<DataValue, Set<RowKey>>> outer : nominalMapping.entrySet()) {
            int key = outer.getKey().intValue();
            DataCell cell = dataRow.getCell(key);
            if (!cell.isMissing()) /* && cell instanceof NominalValue*/
            {
                Map<DataValue, Set<RowKey>> inner = outer.getValue();
                if (!inner.containsKey(cell)) {
                    inner.put(cell, new HashSet<RowKey>());
                }
                inner.get(cell).add(dataRow.getKey());
            }
        }
    }
    return Pair.create(numericMapping, nominalMapping);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) Set(java.util.Set) HashSet(java.util.HashSet) RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DataValue(org.knime.core.data.DataValue) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 24 with DataTableSpec

use of org.knime.core.data.DataTableSpec in project knime-core by knime.

the class RankCorrelationComputeNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    DataTableSpec in = (DataTableSpec) inSpecs[0];
    final String[] includes;
    if (m_columnFilterModel == null) {
        m_columnFilterModel = createColumnFilterModel();
        // auto-configure, no previous configuration
        m_columnFilterModel.loadDefaults(in);
        includes = m_columnFilterModel.applyTo(in).getIncludes();
        setWarningMessage("Auto configuration: Using all suitable columns (in total " + includes.length + ")");
    } else {
        FilterResult applyTo = m_columnFilterModel.applyTo(in);
        includes = applyTo.getIncludes();
    }
    if (includes.length == 0) {
        throw new InvalidSettingsException("No columns selected");
    }
    return new PortObjectSpec[] { PMCCPortObjectAndSpec.createOutSpec(includes), new PMCCPortObjectAndSpec(includes), null };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMCCPortObjectAndSpec(org.knime.base.node.preproc.correlation.pmcc.PMCCPortObjectAndSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult)

Example 25 with DataTableSpec

use of org.knime.core.data.DataTableSpec in project knime-core by knime.

the class CronbachNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected PortObjectSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    DataTableSpec in = (DataTableSpec) inSpecs[0];
    if (!in.containsCompatibleType(DoubleValue.class)) {
        throw new InvalidSettingsException("No double compatible columns in input");
    }
    final String[] includes;
    if (m_columnFilterModel == null) {
        m_columnFilterModel = createColumnFilterModel();
        // auto-configure, no previous configuration
        m_columnFilterModel.loadDefaults(in);
        includes = m_columnFilterModel.applyTo(in).getIncludes();
        setWarningMessage("Auto configuration: Using all suitable columns (in total " + includes.length + ")");
    } else {
        FilterResult applyTo = m_columnFilterModel.applyTo(in);
        includes = applyTo.getIncludes();
    }
    if (includes.length == 0) {
        throw new InvalidSettingsException("Please include at least two numerical columns!");
    }
    return new PortObjectSpec[] { getDataTableSpec() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DoubleValue(org.knime.core.data.DoubleValue) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) FilterResult(org.knime.core.node.util.filter.NameFilterConfiguration.FilterResult)

Aggregations

DataTableSpec (org.knime.core.data.DataTableSpec)938 DataColumnSpec (org.knime.core.data.DataColumnSpec)340 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)306 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)228 BufferedDataTable (org.knime.core.node.BufferedDataTable)226 DataCell (org.knime.core.data.DataCell)186 DataRow (org.knime.core.data.DataRow)170 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)136 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)129 DataType (org.knime.core.data.DataType)109 ArrayList (java.util.ArrayList)106 PortObjectSpec (org.knime.core.node.port.PortObjectSpec)98 DoubleValue (org.knime.core.data.DoubleValue)94 DefaultRow (org.knime.core.data.def.DefaultRow)92 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)90 ExecutionContext (org.knime.core.node.ExecutionContext)68 PortObject (org.knime.core.node.port.PortObject)66 PMMLPortObjectSpec (org.knime.core.node.port.pmml.PMMLPortObjectSpec)62 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)61 RowKey (org.knime.core.data.RowKey)59