Search in sources :

Example 21 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class JoinedTable method createSpec.

/**
 * Creates a new DataTableSpec as an result of merging a <code>left</code>
 * and a <code>right</code> table. Duplicate of column names are treated
 * as given by the <code>duplicateMethod</code> argument, i.e.
 * <ul>
 * <li> {@link #METHOD_FAIL} throw exception</li>
 * <li> {@link #METHOD_FILTER} ignore duplicates in the right column</li>
 * <li> {@link #METHOD_APPEND_SUFFIX} append a suffix given by the
 * <code>suffix</code> argument to occuring duplicates</li>
 * </ul>
 *
 * @param left the left part of the this table
 * @param right and the corresponding right part
 * @param duplicateMethod the method on how to treat duplicates
 * @param suffix the suffix that is used when the method is
 *            {@link #METHOD_APPEND_SUFFIX}. In case of another any other
 *            method this argument is ignored.
 * @return the spec as result of merging both table specs
 * @throws IllegalArgumentException in case of duplicate column names and no
 *             special treatment is requested
 * @throws NullPointerException if either table is <code>null</code>
 */
public static final DataTableSpec createSpec(final DataTableSpec left, final DataTableSpec right, final String duplicateMethod, final String suffix) {
    DataColumnSpec[] leftCols;
    DataColumnSpec[] rightCols;
    if (METHOD_FAIL.equals(duplicateMethod)) {
        leftCols = new DataColumnSpec[left.getNumColumns()];
        rightCols = new DataColumnSpec[right.getNumColumns()];
        Set<String> hash = new HashSet<String>();
        for (int i = 0; i < left.getNumColumns(); i++) {
            leftCols[i] = left.getColumnSpec(i);
            hash.add(leftCols[i].getName());
        }
        for (int i = 0; i < right.getNumColumns(); i++) {
            rightCols[i] = right.getColumnSpec(i);
            if (hash.contains(rightCols[i].getName())) {
                throw new IllegalArgumentException("Duplicate column: " + rightCols[i].getName());
            }
        }
    } else if (METHOD_FILTER.equals(duplicateMethod)) {
        String[] survivers = getSurvivers(left, right);
        DataTableSpec newRight = FilterColumnTable.createFilterTableSpec(right, survivers);
        leftCols = new DataColumnSpec[left.getNumColumns()];
        rightCols = new DataColumnSpec[newRight.getNumColumns()];
        for (int i = 0; i < left.getNumColumns(); i++) {
            leftCols[i] = left.getColumnSpec(i);
        }
        for (int i = 0; i < newRight.getNumColumns(); i++) {
            rightCols[i] = newRight.getColumnSpec(i);
        }
    } else if (METHOD_APPEND_SUFFIX.equals(duplicateMethod)) {
        final int rightColCount = right.getNumColumns();
        HashSet<String> newInvented = new HashSet<String>();
        DataColumnSpec[] newCols = new DataColumnSpec[rightColCount];
        for (int i = 0; i < rightColCount; i++) {
            DataColumnSpec col = right.getColumnSpec(i);
            String name = col.getName();
            boolean invented = false;
            while (left.containsName(name) || newInvented.contains(name)) {
                invented = true;
                do {
                    name = name.toString() + suffix;
                // we need also the keep track that we don't "invent" a
                // name that is used in the right table already
                } while (right.containsName(name));
            }
            if (invented) {
                newInvented.add(name);
                DataColumnSpecCreator creator = new DataColumnSpecCreator(col);
                creator.setName(name);
                newCols[i] = creator.createSpec();
            } else {
                newCols[i] = col;
            }
        }
        DataTableSpec newRight = new DataTableSpec(newCols);
        leftCols = new DataColumnSpec[left.getNumColumns()];
        rightCols = new DataColumnSpec[newRight.getNumColumns()];
        for (int i = 0; i < left.getNumColumns(); i++) {
            leftCols[i] = left.getColumnSpec(i);
        }
        for (int i = 0; i < right.getNumColumns(); i++) {
            rightCols[i] = newRight.getColumnSpec(i);
        }
    } else {
        throw new IllegalArgumentException("Unknown method: " + duplicateMethod);
    }
    boolean isLeftContainColorHandler = false;
    boolean isLeftContainSizeHandler = false;
    boolean isLeftContainShapeHandler = false;
    for (DataColumnSpec s : leftCols) {
        isLeftContainColorHandler |= s.getColorHandler() != null;
        isLeftContainSizeHandler |= s.getSizeHandler() != null;
        isLeftContainShapeHandler |= s.getShapeHandler() != null;
    }
    for (int i = 0; i < rightCols.length; i++) {
        DataColumnSpec s = rightCols[i];
        boolean removeColorHandler = false;
        if (s.getColorHandler() != null && isLeftContainColorHandler) {
            removeColorHandler = true;
        }
        boolean removeSizeHandler = false;
        if (s.getSizeHandler() != null && isLeftContainSizeHandler) {
            removeSizeHandler = true;
        }
        boolean removeShapeHandler = false;
        if (s.getShapeHandler() != null && isLeftContainShapeHandler) {
            removeShapeHandler = true;
        }
        if (removeColorHandler || removeSizeHandler || removeShapeHandler) {
            DataColumnSpecCreator c = new DataColumnSpecCreator(s);
            if (removeColorHandler) {
                c.setColorHandler(null);
            }
            if (removeSizeHandler) {
                c.setSizeHandler(null);
            }
            if (removeShapeHandler) {
                c.setShapeHandler(null);
            }
            rightCols[i] = c.createSpec();
        }
    }
    DataColumnSpec[] sp = new DataColumnSpec[leftCols.length + rightCols.length];
    System.arraycopy(leftCols, 0, sp, 0, leftCols.length);
    System.arraycopy(rightCols, 0, sp, leftCols.length, rightCols.length);
    return new DataTableSpec(sp);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) HashSet(java.util.HashSet)

Example 22 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class StatisticsTable method calculateAllMoments.

/**
 * Calculates <b>all the statistical moments in one pass </b>. After the
 * call of this operation, the statistical moments can be obtained very fast
 * from all the other methods.
 *
 * @param rowCount Row count of table for progress, may be NaN if unknown.
 * @param exec object to check with if user canceled the operation
 * @throws CanceledExecutionException if user canceled
 * @throws IllegalArgumentException if rowCount argument < 0
 */
protected void calculateAllMoments(final double rowCount, final ExecutionMonitor exec) throws CanceledExecutionException {
    if (rowCount < 0.0) {
        throw new IllegalArgumentException("rowCount argument must not < 0: " + rowCount);
    }
    DataTableSpec origSpec = m_table.getDataTableSpec();
    int numOfCols = origSpec.getNumColumns();
    // the number of non-missing cells in each column
    int[] validCount = new int[numOfCols];
    double[] sumsquare = new double[numOfCols];
    final DataValueComparator[] comp = new DataValueComparator[numOfCols];
    for (int i = 0; i < numOfCols; i++) {
        sumsquare[i] = 0.0;
        validCount[i] = 0;
        comp[i] = origSpec.getColumnSpec(i).getType().getComparator();
        assert comp[i] != null;
    }
    int nrRows = 0;
    for (RowIterator rowIt = m_table.iterator(); rowIt.hasNext(); nrRows++) {
        DataRow row = rowIt.next();
        if (exec != null) {
            double prog = Double.isNaN(rowCount) ? 0.0 : nrRows / rowCount;
            exec.setProgress(prog, "Calculating statistics, processing row " + (nrRows + 1) + " (\"" + row.getKey() + "\")");
            // throws exception if user canceled
            exec.checkCanceled();
        }
        for (int c = 0; c < numOfCols; c++) {
            final DataCell cell = row.getCell(c);
            if (!(cell.isMissing())) {
                // keep the min and max for each column
                if ((m_minValues[c] == null) || (comp[c].compare(cell, m_minValues[c]) < 0)) {
                    m_minValues[c] = cell;
                }
                if ((m_maxValues[c] == null) || (comp[c].compare(m_maxValues[c], cell) < 0)) {
                    m_maxValues[c] = cell;
                }
                // for double columns we calc the sum (for the mean calc)
                DataType type = origSpec.getColumnSpec(c).getType();
                if (type.isCompatible(DoubleValue.class)) {
                    double d = ((DoubleValue) cell).getDoubleValue();
                    if (Double.isNaN(m_sum[c])) {
                        m_sum[c] = d;
                    } else {
                        m_sum[c] += d;
                    }
                    sumsquare[c] += d * d;
                    validCount[c]++;
                }
            } else {
                m_missingValueCnt[c]++;
            }
        }
        calculateMomentInSubClass(row);
    }
    m_nrRows = nrRows;
    for (int j = 0; j < numOfCols; j++) {
        // missing values
        if (validCount[j] == 0 || m_minValues[j] == null) {
            DataCell mc = DataType.getMissingCell();
            m_minValues[j] = mc;
            m_maxValues[j] = mc;
            m_meanValues[j] = Double.NaN;
            m_varianceValues[j] = Double.NaN;
        } else {
            m_meanValues[j] = m_sum[j] / validCount[j];
            if (validCount[j] > 1) {
                m_varianceValues[j] = (sumsquare[j] - ((m_sum[j] * m_sum[j]) / validCount[j])) / (validCount[j] - 1);
            } else {
                m_varianceValues[j] = 0.0;
            }
            // round-off errors resulting in negative variance values
            if (m_varianceValues[j] < 0.0 && m_varianceValues[j] > -1.0E8) {
                m_varianceValues[j] = 0.0;
            }
            assert m_varianceValues[j] >= 0.0 : "Variance cannot be negative (column \"" + origSpec.getColumnSpec(j).getName() + "\": " + m_varianceValues[j];
        }
    }
    // compute resulting table spec
    int nrCols = m_table.getDataTableSpec().getNumColumns();
    DataColumnSpec[] cSpec = new DataColumnSpec[nrCols];
    for (int c = 0; c < nrCols; c++) {
        DataColumnSpec s = m_table.getDataTableSpec().getColumnSpec(c);
        // we create domains with our bounds.
        Set<DataCell> values = (s.getDomain() == null ? null : s.getDomain().getValues());
        DataColumnDomain newDomain = new DataColumnDomainCreator(values, (m_minValues[c] == null || m_minValues[c].isMissing()) ? null : m_minValues[c], (m_maxValues[c] == null || m_maxValues[c].isMissing()) ? null : m_maxValues[c]).createDomain();
        DataColumnSpecCreator creator = new DataColumnSpecCreator(s);
        creator.setDomain(newDomain);
        cSpec[c] = creator.createSpec();
    }
    m_tSpec = new DataTableSpec(cSpec);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataValueComparator(org.knime.core.data.DataValueComparator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DoubleValue(org.knime.core.data.DoubleValue) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType)

Example 23 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class ReadPNGFromURLNodeModel method createColumnRearranger.

private ColumnRearranger createColumnRearranger(final DataTableSpec in, final AtomicLong failCounter) throws InvalidSettingsException {
    String colName = m_config.getUrlColName();
    if (colName == null) {
        // throws ISE
        m_config.guessDefaults(in);
        colName = m_config.getUrlColName();
        setWarningMessage("Auto-configuration: Guessing column \"" + colName + "\" to contain locations");
    }
    final int colIndex = in.findColumnIndex(colName);
    if (colIndex < 0) {
        throw new InvalidSettingsException("No such column in input: " + colName);
    }
    DataColumnSpec colSpec = in.getColumnSpec(colIndex);
    if (!colSpec.getType().isCompatible(StringValue.class)) {
        throw new InvalidSettingsException("Selected column \"" + colName + "\" is not string-compatible");
    }
    final String newColName = m_config.getNewColumnName();
    DataColumnSpecCreator colSpecCreator;
    if (newColName != null) {
        String newName = DataTableSpec.getUniqueColumnName(in, newColName);
        colSpecCreator = new DataColumnSpecCreator(newName, PNGImageContent.TYPE);
    } else {
        colSpecCreator = new DataColumnSpecCreator(colSpec);
        colSpecCreator.setType(PNGImageContent.TYPE);
        colSpecCreator.removeAllHandlers();
        colSpecCreator.setDomain(null);
    }
    DataColumnSpec outColumnSpec = colSpecCreator.createSpec();
    ColumnRearranger rearranger = new ColumnRearranger(in);
    CellFactory fac = new SingleCellFactory(outColumnSpec) {

        @Override
        public DataCell getCell(final DataRow row) {
            DataCell cell = row.getCell(colIndex);
            if (cell.isMissing()) {
                return DataType.getMissingCell();
            } else {
                String url = ((StringValue) cell).getStringValue();
                try {
                    return toPNGCell(url);
                } catch (Exception e) {
                    if (m_config.isFailOnInvalid()) {
                        if (e instanceof RuntimeException) {
                            throw (RuntimeException) e;
                        } else {
                            throw new RuntimeException(e.getMessage(), e);
                        }
                    } else {
                        String message = "Failed to read png content from " + "\"" + url + "\": " + e.getMessage();
                        LOGGER.warn(message, e);
                        failCounter.incrementAndGet();
                        return DataType.getMissingCell();
                    }
                }
            }
        }
    };
    if (newColName == null) {
        rearranger.replace(fac, colIndex);
    } else {
        rearranger.append(fac);
    }
    return rearranger;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataCell(org.knime.core.data.DataCell) StringValue(org.knime.core.data.StringValue) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) CellFactory(org.knime.core.data.container.CellFactory) SingleCellFactory(org.knime.core.data.container.SingleCellFactory)

Example 24 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class AppendVariableToTableNodeModel method createColumnRearranger.

private ColumnRearranger createColumnRearranger(final DataTableSpec spec) throws InvalidSettingsException {
    ColumnRearranger arranger = new ColumnRearranger(spec);
    Set<String> nameHash = new HashSet<String>();
    for (DataColumnSpec c : spec) {
        nameHash.add(c.getName());
    }
    List<Pair<String, FlowVariable.Type>> vars;
    if (m_settings.getIncludeAll()) {
        vars = getAllVariables();
    } else {
        vars = m_settings.getVariablesOfInterest();
    }
    if (vars.isEmpty()) {
        throw new InvalidSettingsException("No variables selected");
    }
    DataColumnSpec[] specs = new DataColumnSpec[vars.size()];
    final DataCell[] values = new DataCell[vars.size()];
    for (int i = 0; i < vars.size(); i++) {
        Pair<String, FlowVariable.Type> c = vars.get(i);
        String name = c.getFirst();
        DataType type;
        switch(c.getSecond()) {
            case DOUBLE:
                type = DoubleCell.TYPE;
                try {
                    double dValue = peekFlowVariableDouble(name);
                    values[i] = new DoubleCell(dValue);
                } catch (NoSuchElementException e) {
                    throw new InvalidSettingsException("No such flow variable (of type double): " + name);
                }
                break;
            case INTEGER:
                type = IntCell.TYPE;
                try {
                    int iValue = peekFlowVariableInt(name);
                    values[i] = new IntCell(iValue);
                } catch (NoSuchElementException e) {
                    throw new InvalidSettingsException("No such flow variable (of type int): " + name);
                }
                break;
            case STRING:
                type = StringCell.TYPE;
                try {
                    String sValue = peekFlowVariableString(name);
                    sValue = sValue == null ? "" : sValue;
                    values[i] = new StringCell(sValue);
                } catch (NoSuchElementException e) {
                    throw new InvalidSettingsException("No such flow variable (of type String): " + name);
                }
                break;
            default:
                throw new InvalidSettingsException("Unsupported variable type: " + c.getSecond());
        }
        if (nameHash.contains(name) && !name.toLowerCase().endsWith("(variable)")) {
            name = name.concat(" (variable)");
        }
        String newName = name;
        int uniquifier = 1;
        while (!nameHash.add(newName)) {
            newName = name + " (#" + (uniquifier++) + ")";
        }
        specs[i] = new DataColumnSpecCreator(newName, type).createSpec();
    }
    arranger.append(new AbstractCellFactory(specs) {

        /**
         * {@inheritDoc}
         */
        @Override
        public DataCell[] getCells(final DataRow row) {
            return values;
        }
    });
    return arranger;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataType(org.knime.core.data.DataType) HashSet(java.util.HashSet) Pair(org.knime.core.util.Pair) AbstractCellFactory(org.knime.core.data.container.AbstractCellFactory) PortType(org.knime.core.node.port.PortType) DataType(org.knime.core.data.DataType) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) NoSuchElementException(java.util.NoSuchElementException) FlowVariable(org.knime.core.node.workflow.FlowVariable)

Example 25 with DataColumnSpecCreator

use of org.knime.core.data.DataColumnSpecCreator in project knime-core by knime.

the class VariableToTableNodeModel method createOutSpec.

private DataTableSpec createOutSpec() throws InvalidSettingsException {
    List<Pair<String, FlowVariable.Type>> vars;
    if (m_settings.getIncludeAll()) {
        vars = getAllVariables();
    } else {
        vars = m_settings.getVariablesOfInterest();
    }
    if (vars.isEmpty()) {
        throw new InvalidSettingsException("No variables selected");
    }
    DataColumnSpec[] specs = new DataColumnSpec[vars.size()];
    for (int i = 0; i < vars.size(); i++) {
        Pair<String, FlowVariable.Type> c = vars.get(i);
        DataType type;
        switch(c.getSecond()) {
            case DOUBLE:
                type = DoubleCell.TYPE;
                break;
            case INTEGER:
                type = IntCell.TYPE;
                break;
            case STRING:
                type = StringCell.TYPE;
                break;
            default:
                throw new InvalidSettingsException("Unsupported variable type: " + c.getSecond());
        }
        specs[i] = new DataColumnSpecCreator(c.getFirst(), type).createSpec();
    }
    return new DataTableSpec(specs);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) PortType(org.knime.core.node.port.PortType) DataType(org.knime.core.data.DataType) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataType(org.knime.core.data.DataType) Pair(org.knime.core.util.Pair) FlowVariable(org.knime.core.node.workflow.FlowVariable)

Aggregations

DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)267 DataColumnSpec (org.knime.core.data.DataColumnSpec)210 DataTableSpec (org.knime.core.data.DataTableSpec)132 DataCell (org.knime.core.data.DataCell)92 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)77 DataType (org.knime.core.data.DataType)74 DataRow (org.knime.core.data.DataRow)73 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)57 DataColumnDomainCreator (org.knime.core.data.DataColumnDomainCreator)51 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)48 ArrayList (java.util.ArrayList)46 DoubleCell (org.knime.core.data.def.DoubleCell)45 SingleCellFactory (org.knime.core.data.container.SingleCellFactory)44 StringCell (org.knime.core.data.def.StringCell)29 BufferedDataTable (org.knime.core.node.BufferedDataTable)23 DoubleValue (org.knime.core.data.DoubleValue)22 HashSet (java.util.HashSet)19 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)17 DataColumnDomain (org.knime.core.data.DataColumnDomain)16 DefaultRow (org.knime.core.data.def.DefaultRow)16