Search in sources :

Example 31 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class AutoBinner method calcDomainBoundsIfNeccessary.

/**
 * Determines the per column min/max values of the given data if not already
 * present in the domain.
 * @param data the data
 * @param exec the execution context
 * @param recalcValuesFor The columns
 * @return The data with extended domain information
 * @throws InvalidSettingsException
 * @throws CanceledExecutionException
 */
public BufferedDataTable calcDomainBoundsIfNeccessary(final BufferedDataTable data, final ExecutionContext exec, final List<String> recalcValuesFor) throws InvalidSettingsException, CanceledExecutionException {
    if (null == recalcValuesFor || recalcValuesFor.isEmpty()) {
        return data;
    }
    List<Integer> valuesI = new ArrayList<Integer>();
    for (String colName : recalcValuesFor) {
        DataColumnSpec colSpec = data.getDataTableSpec().getColumnSpec(colName);
        if (!colSpec.getType().isCompatible(DoubleValue.class)) {
            throw new InvalidSettingsException("Can only process numeric " + "data. The column \"" + colSpec.getName() + "\" is not numeric.");
        }
        if (recalcValuesFor.contains(colName) && !colSpec.getDomain().hasBounds()) {
            valuesI.add(data.getDataTableSpec().findColumnIndex(colName));
        }
    }
    if (valuesI.isEmpty()) {
        return data;
    }
    Map<Integer, Double> min = new HashMap<Integer, Double>();
    Map<Integer, Double> max = new HashMap<Integer, Double>();
    for (int col : valuesI) {
        min.put(col, Double.MAX_VALUE);
        max.put(col, Double.MIN_VALUE);
    }
    int c = 0;
    for (DataRow row : data) {
        c++;
        exec.checkCanceled();
        exec.setProgress(c / (double) data.getRowCount());
        for (int col : valuesI) {
            double val = ((DoubleValue) row.getCell(col)).getDoubleValue();
            if (min.get(col) > val) {
                min.put(col, val);
            }
            if (max.get(col) < val) {
                min.put(col, val);
            }
        }
    }
    List<DataColumnSpec> newColSpecList = new ArrayList<DataColumnSpec>();
    int cc = 0;
    for (DataColumnSpec columnSpec : data.getDataTableSpec()) {
        if (recalcValuesFor.contains(columnSpec.getName())) {
            DataColumnSpecCreator specCreator = new DataColumnSpecCreator(columnSpec);
            DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(min.get(cc)), new DoubleCell(max.get(cc)));
            specCreator.setDomain(domainCreator.createDomain());
            DataColumnSpec newColSpec = specCreator.createSpec();
            newColSpecList.add(newColSpec);
        } else {
            newColSpecList.add(columnSpec);
        }
        cc++;
    }
    DataTableSpec spec = new DataTableSpec(newColSpecList.toArray(new DataColumnSpec[0]));
    BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
    return newDataTable;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 32 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class ColCombineNodeModel method createColumnRearranger.

private ColumnRearranger createColumnRearranger(final DataTableSpec spec) {
    ColumnRearranger result = new ColumnRearranger(spec);
    DataColumnSpec append = new DataColumnSpecCreator(m_newColName, StringCell.TYPE).createSpec();
    final int[] indices = new int[m_columns.length];
    List<String> colNames = Arrays.asList(m_columns);
    int j = 0;
    for (int k = 0; k < spec.getNumColumns(); k++) {
        DataColumnSpec cs = spec.getColumnSpec(k);
        if (colNames.contains(cs.getName())) {
            indices[j] = k;
            j++;
        }
    }
    // ", " -> ","
    // "  " -> "  " (do not let the resulting string be empty)
    // " bla bla " -> "bla bla"
    final String delimTrim = trimDelimString(m_delimString);
    result.append(new SingleCellFactory(append) {

        @Override
        public DataCell getCell(final DataRow row) {
            String[] cellContents = new String[indices.length];
            for (int i = 0; i < indices.length; i++) {
                DataCell c = row.getCell(indices[i]);
                String s = c instanceof StringValue ? ((StringValue) c).getStringValue() : c.toString();
                cellContents[i] = s;
            }
            return new StringCell(handleContent(cellContents, delimTrim));
        }
    });
    return result;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataRow(org.knime.core.data.DataRow) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) StringValue(org.knime.core.data.StringValue) SingleCellFactory(org.knime.core.data.container.SingleCellFactory)

Example 33 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class AutoBinner method calcDomainBoundsIfNeccessary.

/**
 * Determines the per column min/max values of the given data if not already present in the domain.
 *
 * @param data the data
 * @param exec the execution context
 * @param recalcValuesFor The columns
 * @return The data with extended domain information
 * @throws InvalidSettingsException ...
 * @throws CanceledExecutionException ...
 */
public BufferedDataTable calcDomainBoundsIfNeccessary(final BufferedDataTable data, final ExecutionContext exec, final List<String> recalcValuesFor) throws InvalidSettingsException, CanceledExecutionException {
    if (null == recalcValuesFor || recalcValuesFor.isEmpty()) {
        return data;
    }
    List<Integer> valuesI = new ArrayList<Integer>();
    for (String colName : recalcValuesFor) {
        DataColumnSpec colSpec = data.getDataTableSpec().getColumnSpec(colName);
        if (!colSpec.getType().isCompatible(DoubleValue.class)) {
            throw new InvalidSettingsException("Can only process numeric " + "data. The column \"" + colSpec.getName() + "\" is not numeric.");
        }
        if (recalcValuesFor.contains(colName) && !colSpec.getDomain().hasBounds()) {
            valuesI.add(data.getDataTableSpec().findColumnIndex(colName));
        }
    }
    if (valuesI.isEmpty()) {
        return data;
    }
    Map<Integer, Double> min = new HashMap<Integer, Double>();
    Map<Integer, Double> max = new HashMap<Integer, Double>();
    for (int col : valuesI) {
        min.put(col, Double.MAX_VALUE);
        max.put(col, Double.MIN_VALUE);
    }
    int c = 0;
    for (DataRow row : data) {
        c++;
        exec.checkCanceled();
        exec.setProgress(c / (double) data.getRowCount());
        for (int col : valuesI) {
            double val = ((DoubleValue) row.getCell(col)).getDoubleValue();
            if (min.get(col) > val) {
                min.put(col, val);
            }
            if (max.get(col) < val) {
                min.put(col, val);
            }
        }
    }
    List<DataColumnSpec> newColSpecList = new ArrayList<DataColumnSpec>();
    int cc = 0;
    for (DataColumnSpec columnSpec : data.getDataTableSpec()) {
        if (recalcValuesFor.contains(columnSpec.getName())) {
            DataColumnSpecCreator specCreator = new DataColumnSpecCreator(columnSpec);
            DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(min.get(cc)), new DoubleCell(max.get(cc)));
            specCreator.setDomain(domainCreator.createDomain());
            DataColumnSpec newColSpec = specCreator.createSpec();
            newColSpecList.add(newColSpec);
        } else {
            newColSpecList.add(columnSpec);
        }
        cc++;
    }
    DataTableSpec spec = new DataTableSpec(newColSpecList.toArray(new DataColumnSpec[0]));
    BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
    return newDataTable;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 34 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class ProximityMatrix method createTable.

public BufferedDataTable createTable(final ExecutionContext exec) throws CanceledExecutionException {
    int numCols = getNumCols();
    int numRows = getNumRows();
    DataColumnSpec[] colSpecs = new DataColumnSpec[numCols];
    for (int i = 0; i < colSpecs.length; i++) {
        colSpecs[i] = new DataColumnSpecCreator(getRowKeyForTable(1, i).getString(), DoubleCell.TYPE).createSpec();
    }
    DataTableSpec tableSpec = new DataTableSpec(colSpecs);
    BufferedDataContainer container = exec.createDataContainer(tableSpec);
    for (int i = 0; i < numRows; i++) {
        exec.checkCanceled();
        exec.setProgress(((double) i) / numRows, "Row " + i + "/" + numRows);
        DataCell[] cells = new DataCell[numCols];
        for (int j = 0; j < numCols; j++) {
            cells[j] = new DoubleCell(getEntryAt(i, j));
        }
        container.addRowToTable(new DefaultRow(getRowKeyForTable(0, i), cells));
    }
    container.close();
    return container.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 35 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class NormalizerNodeModel method calculate.

/**
 * New normalized {@link org.knime.core.data.DataTable} is created depending
 * on the mode.
 */
/**
 * @param inData The input data.
 * @param exec For BufferedDataTable creation and progress.
 * @return the result of the calculation
 * @throws Exception If the node calculation fails for any reason.
 */
protected CalculationResult calculate(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inData[0];
    DataTableSpec inSpec = inTable.getSpec();
    // extract selected numeric columns
    updateNumericColumnSelection(inSpec);
    Normalizer ntable = new Normalizer(inTable, m_columns);
    long rowcount = inTable.size();
    ExecutionMonitor prepareExec = exec.createSubProgress(0.3);
    AffineTransTable outTable;
    boolean fixDomainBounds = false;
    switch(m_mode) {
        case NONORM_MODE:
            return new CalculationResult(inTable, new DataTableSpec(), new AffineTransConfiguration());
        case MINMAX_MODE:
            fixDomainBounds = true;
            outTable = ntable.doMinMaxNorm(m_max, m_min, prepareExec);
            break;
        case ZSCORE_MODE:
            outTable = ntable.doZScoreNorm(prepareExec);
            break;
        case DECIMALSCALING_MODE:
            outTable = ntable.doDecimalScaling(prepareExec);
            break;
        default:
            throw new Exception("No mode set");
    }
    if (outTable.getErrorMessage() != null) {
        // something went wrong, report and throw an exception
        throw new Exception(outTable.getErrorMessage());
    }
    if (ntable.getErrorMessage() != null) {
        // something went wrong during initialization, report.
        setWarningMessage(ntable.getErrorMessage());
    }
    DataTableSpec modelSpec = FilterColumnTable.createFilterTableSpec(inSpec, m_columns);
    AffineTransConfiguration configuration = outTable.getConfiguration();
    DataTableSpec spec = outTable.getDataTableSpec();
    // the same transformation, which is not guaranteed to snap to min/max)
    if (fixDomainBounds) {
        DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
        for (int i = 0; i < newColSpecs.length; i++) {
            newColSpecs[i] = spec.getColumnSpec(i);
        }
        for (int i = 0; i < m_columns.length; i++) {
            int index = spec.findColumnIndex(m_columns[i]);
            DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
            DataColumnDomainCreator domCreator = new DataColumnDomainCreator(newColSpecs[index].getDomain());
            domCreator.setLowerBound(new DoubleCell(m_min));
            domCreator.setUpperBound(new DoubleCell(m_max));
            creator.setDomain(domCreator.createDomain());
            newColSpecs[index] = creator.createSpec();
        }
        spec = new DataTableSpec(spec.getName(), newColSpecs);
    }
    ExecutionMonitor normExec = exec.createSubProgress(.7);
    BufferedDataContainer container = exec.createDataContainer(spec);
    long count = 1;
    for (DataRow row : outTable) {
        normExec.checkCanceled();
        normExec.setProgress(count / (double) rowcount, "Normalizing row no. " + count + " of " + rowcount + " (\"" + row.getKey() + "\")");
        container.addRowToTable(row);
        count++;
    }
    container.close();
    return new CalculationResult(container.getTable(), modelSpec, configuration);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) Normalizer(org.knime.base.data.normalize.Normalizer) DoubleCell(org.knime.core.data.def.DoubleCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) AffineTransTable(org.knime.base.data.normalize.AffineTransTable) AffineTransConfiguration(org.knime.base.data.normalize.AffineTransConfiguration) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Aggregations

DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)267 DataColumnSpec (org.knime.core.data.DataColumnSpec)210 DataTableSpec (org.knime.core.data.DataTableSpec)132 DataCell (org.knime.core.data.DataCell)92 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)77 DataType (org.knime.core.data.DataType)74 DataRow (org.knime.core.data.DataRow)73 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)57 DataColumnDomainCreator (org.knime.core.data.DataColumnDomainCreator)51 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)48 ArrayList (java.util.ArrayList)46 DoubleCell (org.knime.core.data.def.DoubleCell)45 SingleCellFactory (org.knime.core.data.container.SingleCellFactory)44 StringCell (org.knime.core.data.def.StringCell)29 BufferedDataTable (org.knime.core.node.BufferedDataTable)23 DoubleValue (org.knime.core.data.DoubleValue)22 HashSet (java.util.HashSet)19 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)17 DataColumnDomain (org.knime.core.data.DataColumnDomain)16 DefaultRow (org.knime.core.data.def.DefaultRow)16