Search in sources :

Example 61 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class BigGroupByTable method createGroupByTable.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final DataTableSpec resultSpec, final int[] groupColIdx) throws CanceledExecutionException {
    LOGGER.debug("Entering createGroupByTable(exec, table) " + "of class BigGroupByTable.");
    final DataTableSpec origSpec = table.getDataTableSpec();
    // sort the data table in order to process the input table chunk wise
    final BufferedDataTable sortedTable;
    final ExecutionContext groupExec;
    final DataValueComparator[] comparators;
    if (groupColIdx.length < 1) {
        sortedTable = table;
        groupExec = exec;
        comparators = new DataValueComparator[0];
    } else {
        final ExecutionContext sortExec = exec.createSubExecutionContext(0.6);
        exec.setMessage("Sorting input table...");
        sortedTable = sortTable(sortExec, table, getGroupCols());
        sortExec.setProgress(1.0);
        groupExec = exec.createSubExecutionContext(0.4);
        comparators = new DataValueComparator[groupColIdx.length];
        for (int i = 0, length = groupColIdx.length; i < length; i++) {
            final DataColumnSpec colSpec = origSpec.getColumnSpec(groupColIdx[i]);
            comparators[i] = colSpec.getType().getComparator();
        }
    }
    final BufferedDataContainer dc = exec.createDataContainer(resultSpec);
    exec.setMessage("Creating groups");
    final DataCell[] previousGroup = new DataCell[groupColIdx.length];
    final DataCell[] currentGroup = new DataCell[groupColIdx.length];
    final MutableInteger groupCounter = new MutableInteger(0);
    boolean firstRow = true;
    final double numOfRows = sortedTable.size();
    long rowCounter = 0;
    // In the rare case that the DataCell comparator return 0 for two
    // data cells that are not equal we have to maintain a map with all
    // rows with equal cells in the group columns per chunk.
    // This variable stores for each chunk these members. A chunk consists
    // of rows which return 0 for the pairwise group value comparison.
    // Usually only equal data cells return 0 when compared with each other
    // but in rare occasions also data cells that are NOT equal return 0 when
    // compared to each other
    // (such as cells that contain chemical structures).
    // In this rare case this map will contain for each group of data cells
    // that are pairwise equal in the chunk a separate entry.
    final Map<GroupKey, Pair<ColumnAggregator[], Set<RowKey>>> chunkMembers = new LinkedHashMap<>(3);
    boolean logUnusualCells = true;
    String groupLabel = "";
    // cannot put init to the constructor, as the super() constructor directly calls the current function
    initMissingValuesMap();
    for (final DataRow row : sortedTable) {
        // fetch the current group column values
        for (int i = 0, length = groupColIdx.length; i < length; i++) {
            currentGroup[i] = row.getCell(groupColIdx[i]);
        }
        if (firstRow) {
            groupLabel = createGroupLabelForProgress(currentGroup);
            System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
            firstRow = false;
        }
        // group column data cells
        if (!sameChunk(comparators, previousGroup, currentGroup)) {
            groupLabel = createGroupLabelForProgress(currentGroup);
            createTableRows(dc, chunkMembers, groupCounter);
            // set the current group as previous group
            System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
            if (logUnusualCells && chunkMembers.size() > 1) {
                // cause the problem
                if (LOGGER.isEnabledFor(LEVEL.INFO)) {
                    final StringBuilder buf = new StringBuilder();
                    buf.append("Data chunk with ");
                    buf.append(chunkMembers.size());
                    buf.append(" members occured in groupby node. " + "Involved classes are: ");
                    final GroupKey key = chunkMembers.keySet().iterator().next();
                    for (final DataCell cell : key.getGroupVals()) {
                        buf.append(cell.getClass().getCanonicalName());
                        buf.append(", ");
                    }
                    LOGGER.info(buf.toString());
                }
                logUnusualCells = false;
            }
            // reset the chunk members map
            chunkMembers.clear();
        }
        // process the row as one of the members of the current chunk
        Pair<ColumnAggregator[], Set<RowKey>> member = chunkMembers.get(new GroupKey(currentGroup));
        if (member == null) {
            Set<RowKey> rowKeys;
            if (isEnableHilite()) {
                rowKeys = new HashSet<>();
            } else {
                rowKeys = Collections.emptySet();
            }
            member = new Pair<>(cloneColumnAggregators(), rowKeys);
            final DataCell[] groupKeys = new DataCell[currentGroup.length];
            System.arraycopy(currentGroup, 0, groupKeys, 0, currentGroup.length);
            chunkMembers.put(new GroupKey(groupKeys), member);
        }
        // compute the current row values
        for (final ColumnAggregator colAggr : member.getFirst()) {
            final int colIdx = origSpec.findColumnIndex(colAggr.getOriginalColName());
            colAggr.getOperator(getGlobalSettings()).compute(row, colIdx);
        }
        if (isEnableHilite()) {
            member.getSecond().add(row.getKey());
        }
        groupExec.checkCanceled();
        groupExec.setProgress(++rowCounter / numOfRows, groupLabel);
    }
    // create the final row for the last chunk after processing the last
    // table row
    createTableRows(dc, chunkMembers, groupCounter);
    dc.close();
    return dc.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) HashSet(java.util.HashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) DataValueComparator(org.knime.core.data.DataValueComparator) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) Pair(org.knime.core.util.Pair) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) MutableInteger(org.knime.core.util.MutableInteger) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) DataCell(org.knime.core.data.DataCell)

Example 62 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class UngroupOperation method compute.

/**
 * @param exec the execution context
 * @return the table with the ungrouped collections
 * @throws Exception the thrown exception
 */
public BufferedDataTable compute(final ExecutionContext exec) throws Exception {
    final BufferedDataContainer dc = exec.createDataContainer(m_newSpec);
    if (m_table.size() == 0) {
        dc.close();
        return dc.getTable();
    }
    DataTableRowInput in = new DataTableRowInput(m_table);
    BufferedDataTableRowOutput out = new BufferedDataTableRowOutput(dc);
    compute(in, out, exec, m_table.size());
    in.close();
    out.close();
    return out.getDataTable();
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataTableRowInput(org.knime.core.node.streamable.DataTableRowInput) BufferedDataTableRowOutput(org.knime.core.node.streamable.BufferedDataTableRowOutput)

Example 63 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class Unpivot2NodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    DataTableSpec inSpec = inData[0].getSpec();
    String[] retainedColumns = m_retainedColumns.applyTo(inSpec).getIncludes();
    String[] valueColumns = m_valueColumns.applyTo(inSpec).getIncludes();
    int[] valueColumnIndices = new int[valueColumns.length];
    for (int i = 0; i < valueColumnIndices.length; i++) {
        valueColumnIndices[i] = inSpec.findColumnIndex(valueColumns[i]);
    }
    int[] orderColumnIdx = new int[retainedColumns.length];
    for (int i = 0; i < orderColumnIdx.length; i++) {
        orderColumnIdx[i] = inSpec.findColumnIndex(retainedColumns[i]);
    }
    final double newRowCnt = inData[0].size() * valueColumns.length;
    final boolean enableHilite = m_enableHilite.getBooleanValue();
    LinkedHashMap<RowKey, Set<RowKey>> map = new LinkedHashMap<RowKey, Set<RowKey>>();
    DataTableSpec outSpec = createOutSpec(inSpec);
    BufferedDataContainer buf = exec.createDataContainer(outSpec);
    final boolean skipMissings = m_missingValues.getBooleanValue();
    for (DataRow row : inData[0]) {
        LinkedHashSet<RowKey> set = new LinkedHashSet<RowKey>();
        FilterColumnRow crow = new FilterColumnRow(row, orderColumnIdx);
        for (int i = 0; i < valueColumns.length; i++) {
            String colName = valueColumns[i];
            DataCell acell = row.getCell(valueColumnIndices[i]);
            if (acell.isMissing() && skipMissings) {
                // skip rows containing missing cells (in Value column(s))
                continue;
            }
            RowKey rowKey = RowKey.createRowKey(buf.size());
            if (enableHilite) {
                set.add(rowKey);
            }
            DefaultRow drow = new DefaultRow(rowKey, new StringCell(row.getKey().getString()), new StringCell(colName), acell);
            buf.addRowToTable(new AppendedColumnRow(rowKey, drow, crow));
            exec.checkCanceled();
            exec.setProgress(buf.size() / newRowCnt);
        }
        if (enableHilite) {
            map.put(crow.getKey(), set);
        }
    }
    buf.close();
    if (enableHilite) {
        m_trans.setMapper(new DefaultHiLiteMapper(map));
    } else {
        m_trans.setMapper(null);
    }
    return new BufferedDataTable[] { buf.getTable() };
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) StringCell(org.knime.core.data.def.StringCell) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) DefaultHiLiteMapper(org.knime.core.node.property.hilite.DefaultHiLiteMapper) FilterColumnRow(org.knime.base.data.filter.column.FilterColumnRow) AppendedColumnRow(org.knime.base.data.append.column.AppendedColumnRow)

Example 64 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class EndcaseNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    Vector<BufferedDataTable> tables = new Vector<BufferedDataTable>();
    for (int i = 0; i < getNrInPorts(); i++) {
        if (inData[i] != null) {
            // if connected...
            if (!(inData[i] instanceof InactiveBranchPortObject)) {
                // ...and active, add it:
                tables.add((BufferedDataTable) inData[i]);
            }
        }
    }
    if (tables.size() == 0) {
        // be connected!)
        assert inData[0] instanceof InactiveBranchPortObject;
        if (m_enableHiliting) {
            // create empty hilite translation map (so we correctly
            // handle the internals).
            Map<RowKey, Set<RowKey>> map = new HashMap<RowKey, Set<RowKey>>();
            m_hiliteTranslator.setMapper(new DefaultHiLiteMapper(map));
        }
        return new PortObject[] { inData[0] };
    }
    assert tables.size() > 0;
    // check compatibility of specs against first spec in list
    for (int i = 1; i < tables.size(); i++) {
        if (!(tables.get(0).getSpec().equalStructure(tables.get(i).getSpec()))) {
            // incompatible - refuse to execute
            throw new Exception("The data table structures of the active " + "ports are not compatible.");
        }
    }
    int totalRowCount = 0;
    DataTable[] dtables = new DataTable[tables.size()];
    int i = 0;
    for (BufferedDataTable t : tables) {
        totalRowCount += t.getRowCount();
        dtables[i] = t;
        i++;
    }
    AppendedRowsTable out = new AppendedRowsTable((m_isAppendSuffix ? m_suffix : null), dtables);
    // note, this iterator throws runtime exceptions when canceled.
    AppendedRowsIterator it = out.iterator(exec, totalRowCount);
    BufferedDataContainer c = exec.createDataContainer(out.getDataTableSpec());
    try {
        while (it.hasNext()) {
            // may throw exception, also sets progress
            c.addRowToTable(it.next());
        }
    } catch (RuntimeCanceledExecutionException rcee) {
        throw rcee.getCause();
    } finally {
        c.close();
    }
    if (it.getNrRowsSkipped() > 0) {
        setWarningMessage("Filtered out " + it.getNrRowsSkipped() + " duplicate row id(s).");
    }
    if (m_enableHiliting) {
        // create hilite translation map
        Map<RowKey, Set<RowKey>> map = new HashMap<RowKey, Set<RowKey>>();
        // map of all RowKeys and duplicate RowKeys in the resulting table
        Map<RowKey, RowKey> dupMap = it.getDuplicateNameMap();
        for (Map.Entry<RowKey, RowKey> e : dupMap.entrySet()) {
            // if a duplicate key
            if (!e.getKey().equals(e.getValue())) {
                Set<RowKey> set = Collections.singleton(e.getValue());
                // put duplicate key and original key into map
                map.put(e.getKey(), set);
            } else {
                // skip duplicate keys
                if (!dupMap.containsKey(new RowKey(e.getKey().getString() + m_suffix))) {
                    Set<RowKey> set = Collections.singleton(e.getValue());
                    map.put(e.getKey(), set);
                }
            }
        }
        m_hiliteTranslator.setMapper(new DefaultHiLiteMapper(map));
    }
    return new BufferedDataTable[] { c.getTable() };
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) InactiveBranchPortObject(org.knime.core.node.port.inactive.InactiveBranchPortObject) Set(java.util.Set) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) HashMap(java.util.HashMap) RuntimeCanceledExecutionException(org.knime.base.data.append.row.AppendedRowsIterator.RuntimeCanceledExecutionException) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) AppendedRowsIterator(org.knime.core.data.append.AppendedRowsIterator) AppendedRowsTable(org.knime.core.data.append.AppendedRowsTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) RuntimeCanceledExecutionException(org.knime.base.data.append.row.AppendedRowsIterator.RuntimeCanceledExecutionException) Vector(java.util.Vector) DefaultHiLiteMapper(org.knime.core.node.property.hilite.DefaultHiLiteMapper) InactiveBranchPortObject(org.knime.core.node.port.inactive.InactiveBranchPortObject) PortObject(org.knime.core.node.port.PortObject) HashMap(java.util.HashMap) Map(java.util.Map)

Example 65 with BufferedDataContainer

use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.

the class GlobalTimerinfoNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable result0 = NodeTimer.GLOBAL_TIMER.getGlobalStatsTable(exec);
    BufferedDataContainer result1 = exec.createDataContainer(createSpecOut1());
    int rowcount = 0;
    for (IBundleGroupProvider provider : Platform.getBundleGroupProviders()) {
        for (IBundleGroup feature : provider.getBundleGroups()) {
            DataRow row = new DefaultRow(new RowKey("Row " + rowcount++), new StringCell(feature.getIdentifier()), new StringCell(feature.getVersion()));
            result1.addRowToTable(row);
        }
    }
    result1.close();
    return new PortObject[] { result0, result1.getTable() };
}
Also used : IBundleGroup(org.eclipse.core.runtime.IBundleGroup) IBundleGroupProvider(org.eclipse.core.runtime.IBundleGroupProvider) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) StringCell(org.knime.core.data.def.StringCell) BufferedDataTable(org.knime.core.node.BufferedDataTable) DefaultRow(org.knime.core.data.def.DefaultRow) DataRow(org.knime.core.data.DataRow) PortObject(org.knime.core.node.port.PortObject)

Aggregations

BufferedDataContainer (org.knime.core.node.BufferedDataContainer)157 BufferedDataTable (org.knime.core.node.BufferedDataTable)96 DefaultRow (org.knime.core.data.def.DefaultRow)93 DataCell (org.knime.core.data.DataCell)88 DataTableSpec (org.knime.core.data.DataTableSpec)88 DataRow (org.knime.core.data.DataRow)80 RowKey (org.knime.core.data.RowKey)38 DoubleCell (org.knime.core.data.def.DoubleCell)37 StringCell (org.knime.core.data.def.StringCell)26 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)24 ArrayList (java.util.ArrayList)23 DataColumnSpec (org.knime.core.data.DataColumnSpec)21 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)21 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)17 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)16 IOException (java.io.IOException)15 ExecutionContext (org.knime.core.node.ExecutionContext)15 LinkedHashMap (java.util.LinkedHashMap)14 HashSet (java.util.HashSet)13 IntCell (org.knime.core.data.def.IntCell)13