use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class BigGroupByTable method createGroupByTable.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final DataTableSpec resultSpec, final int[] groupColIdx) throws CanceledExecutionException {
LOGGER.debug("Entering createGroupByTable(exec, table) " + "of class BigGroupByTable.");
final DataTableSpec origSpec = table.getDataTableSpec();
// sort the data table in order to process the input table chunk wise
final BufferedDataTable sortedTable;
final ExecutionContext groupExec;
final DataValueComparator[] comparators;
if (groupColIdx.length < 1) {
sortedTable = table;
groupExec = exec;
comparators = new DataValueComparator[0];
} else {
final ExecutionContext sortExec = exec.createSubExecutionContext(0.6);
exec.setMessage("Sorting input table...");
sortedTable = sortTable(sortExec, table, getGroupCols());
sortExec.setProgress(1.0);
groupExec = exec.createSubExecutionContext(0.4);
comparators = new DataValueComparator[groupColIdx.length];
for (int i = 0, length = groupColIdx.length; i < length; i++) {
final DataColumnSpec colSpec = origSpec.getColumnSpec(groupColIdx[i]);
comparators[i] = colSpec.getType().getComparator();
}
}
final BufferedDataContainer dc = exec.createDataContainer(resultSpec);
exec.setMessage("Creating groups");
final DataCell[] previousGroup = new DataCell[groupColIdx.length];
final DataCell[] currentGroup = new DataCell[groupColIdx.length];
final MutableInteger groupCounter = new MutableInteger(0);
boolean firstRow = true;
final double numOfRows = sortedTable.size();
long rowCounter = 0;
// In the rare case that the DataCell comparator return 0 for two
// data cells that are not equal we have to maintain a map with all
// rows with equal cells in the group columns per chunk.
// This variable stores for each chunk these members. A chunk consists
// of rows which return 0 for the pairwise group value comparison.
// Usually only equal data cells return 0 when compared with each other
// but in rare occasions also data cells that are NOT equal return 0 when
// compared to each other
// (such as cells that contain chemical structures).
// In this rare case this map will contain for each group of data cells
// that are pairwise equal in the chunk a separate entry.
final Map<GroupKey, Pair<ColumnAggregator[], Set<RowKey>>> chunkMembers = new LinkedHashMap<>(3);
boolean logUnusualCells = true;
String groupLabel = "";
// cannot put init to the constructor, as the super() constructor directly calls the current function
initMissingValuesMap();
for (final DataRow row : sortedTable) {
// fetch the current group column values
for (int i = 0, length = groupColIdx.length; i < length; i++) {
currentGroup[i] = row.getCell(groupColIdx[i]);
}
if (firstRow) {
groupLabel = createGroupLabelForProgress(currentGroup);
System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
firstRow = false;
}
// group column data cells
if (!sameChunk(comparators, previousGroup, currentGroup)) {
groupLabel = createGroupLabelForProgress(currentGroup);
createTableRows(dc, chunkMembers, groupCounter);
// set the current group as previous group
System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
if (logUnusualCells && chunkMembers.size() > 1) {
// cause the problem
if (LOGGER.isEnabledFor(LEVEL.INFO)) {
final StringBuilder buf = new StringBuilder();
buf.append("Data chunk with ");
buf.append(chunkMembers.size());
buf.append(" members occured in groupby node. " + "Involved classes are: ");
final GroupKey key = chunkMembers.keySet().iterator().next();
for (final DataCell cell : key.getGroupVals()) {
buf.append(cell.getClass().getCanonicalName());
buf.append(", ");
}
LOGGER.info(buf.toString());
}
logUnusualCells = false;
}
// reset the chunk members map
chunkMembers.clear();
}
// process the row as one of the members of the current chunk
Pair<ColumnAggregator[], Set<RowKey>> member = chunkMembers.get(new GroupKey(currentGroup));
if (member == null) {
Set<RowKey> rowKeys;
if (isEnableHilite()) {
rowKeys = new HashSet<>();
} else {
rowKeys = Collections.emptySet();
}
member = new Pair<>(cloneColumnAggregators(), rowKeys);
final DataCell[] groupKeys = new DataCell[currentGroup.length];
System.arraycopy(currentGroup, 0, groupKeys, 0, currentGroup.length);
chunkMembers.put(new GroupKey(groupKeys), member);
}
// compute the current row values
for (final ColumnAggregator colAggr : member.getFirst()) {
final int colIdx = origSpec.findColumnIndex(colAggr.getOriginalColName());
colAggr.getOperator(getGlobalSettings()).compute(row, colIdx);
}
if (isEnableHilite()) {
member.getSecond().add(row.getKey());
}
groupExec.checkCanceled();
groupExec.setProgress(++rowCounter / numOfRows, groupLabel);
}
// create the final row for the last chunk after processing the last
// table row
createTableRows(dc, chunkMembers, groupCounter);
dc.close();
return dc.getTable();
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class UngroupOperation method compute.
/**
* @param exec the execution context
* @return the table with the ungrouped collections
* @throws Exception the thrown exception
*/
public BufferedDataTable compute(final ExecutionContext exec) throws Exception {
final BufferedDataContainer dc = exec.createDataContainer(m_newSpec);
if (m_table.size() == 0) {
dc.close();
return dc.getTable();
}
DataTableRowInput in = new DataTableRowInput(m_table);
BufferedDataTableRowOutput out = new BufferedDataTableRowOutput(dc);
compute(in, out, exec, m_table.size());
in.close();
out.close();
return out.getDataTable();
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class Unpivot2NodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
DataTableSpec inSpec = inData[0].getSpec();
String[] retainedColumns = m_retainedColumns.applyTo(inSpec).getIncludes();
String[] valueColumns = m_valueColumns.applyTo(inSpec).getIncludes();
int[] valueColumnIndices = new int[valueColumns.length];
for (int i = 0; i < valueColumnIndices.length; i++) {
valueColumnIndices[i] = inSpec.findColumnIndex(valueColumns[i]);
}
int[] orderColumnIdx = new int[retainedColumns.length];
for (int i = 0; i < orderColumnIdx.length; i++) {
orderColumnIdx[i] = inSpec.findColumnIndex(retainedColumns[i]);
}
final double newRowCnt = inData[0].size() * valueColumns.length;
final boolean enableHilite = m_enableHilite.getBooleanValue();
LinkedHashMap<RowKey, Set<RowKey>> map = new LinkedHashMap<RowKey, Set<RowKey>>();
DataTableSpec outSpec = createOutSpec(inSpec);
BufferedDataContainer buf = exec.createDataContainer(outSpec);
final boolean skipMissings = m_missingValues.getBooleanValue();
for (DataRow row : inData[0]) {
LinkedHashSet<RowKey> set = new LinkedHashSet<RowKey>();
FilterColumnRow crow = new FilterColumnRow(row, orderColumnIdx);
for (int i = 0; i < valueColumns.length; i++) {
String colName = valueColumns[i];
DataCell acell = row.getCell(valueColumnIndices[i]);
if (acell.isMissing() && skipMissings) {
// skip rows containing missing cells (in Value column(s))
continue;
}
RowKey rowKey = RowKey.createRowKey(buf.size());
if (enableHilite) {
set.add(rowKey);
}
DefaultRow drow = new DefaultRow(rowKey, new StringCell(row.getKey().getString()), new StringCell(colName), acell);
buf.addRowToTable(new AppendedColumnRow(rowKey, drow, crow));
exec.checkCanceled();
exec.setProgress(buf.size() / newRowCnt);
}
if (enableHilite) {
map.put(crow.getKey(), set);
}
}
buf.close();
if (enableHilite) {
m_trans.setMapper(new DefaultHiLiteMapper(map));
} else {
m_trans.setMapper(null);
}
return new BufferedDataTable[] { buf.getTable() };
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class EndcaseNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
Vector<BufferedDataTable> tables = new Vector<BufferedDataTable>();
for (int i = 0; i < getNrInPorts(); i++) {
if (inData[i] != null) {
// if connected...
if (!(inData[i] instanceof InactiveBranchPortObject)) {
// ...and active, add it:
tables.add((BufferedDataTable) inData[i]);
}
}
}
if (tables.size() == 0) {
// be connected!)
assert inData[0] instanceof InactiveBranchPortObject;
if (m_enableHiliting) {
// create empty hilite translation map (so we correctly
// handle the internals).
Map<RowKey, Set<RowKey>> map = new HashMap<RowKey, Set<RowKey>>();
m_hiliteTranslator.setMapper(new DefaultHiLiteMapper(map));
}
return new PortObject[] { inData[0] };
}
assert tables.size() > 0;
// check compatibility of specs against first spec in list
for (int i = 1; i < tables.size(); i++) {
if (!(tables.get(0).getSpec().equalStructure(tables.get(i).getSpec()))) {
// incompatible - refuse to execute
throw new Exception("The data table structures of the active " + "ports are not compatible.");
}
}
int totalRowCount = 0;
DataTable[] dtables = new DataTable[tables.size()];
int i = 0;
for (BufferedDataTable t : tables) {
totalRowCount += t.getRowCount();
dtables[i] = t;
i++;
}
AppendedRowsTable out = new AppendedRowsTable((m_isAppendSuffix ? m_suffix : null), dtables);
// note, this iterator throws runtime exceptions when canceled.
AppendedRowsIterator it = out.iterator(exec, totalRowCount);
BufferedDataContainer c = exec.createDataContainer(out.getDataTableSpec());
try {
while (it.hasNext()) {
// may throw exception, also sets progress
c.addRowToTable(it.next());
}
} catch (RuntimeCanceledExecutionException rcee) {
throw rcee.getCause();
} finally {
c.close();
}
if (it.getNrRowsSkipped() > 0) {
setWarningMessage("Filtered out " + it.getNrRowsSkipped() + " duplicate row id(s).");
}
if (m_enableHiliting) {
// create hilite translation map
Map<RowKey, Set<RowKey>> map = new HashMap<RowKey, Set<RowKey>>();
// map of all RowKeys and duplicate RowKeys in the resulting table
Map<RowKey, RowKey> dupMap = it.getDuplicateNameMap();
for (Map.Entry<RowKey, RowKey> e : dupMap.entrySet()) {
// if a duplicate key
if (!e.getKey().equals(e.getValue())) {
Set<RowKey> set = Collections.singleton(e.getValue());
// put duplicate key and original key into map
map.put(e.getKey(), set);
} else {
// skip duplicate keys
if (!dupMap.containsKey(new RowKey(e.getKey().getString() + m_suffix))) {
Set<RowKey> set = Collections.singleton(e.getValue());
map.put(e.getKey(), set);
}
}
}
m_hiliteTranslator.setMapper(new DefaultHiLiteMapper(map));
}
return new BufferedDataTable[] { c.getTable() };
}
use of org.knime.core.node.BufferedDataContainer in project knime-core by knime.
the class GlobalTimerinfoNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable result0 = NodeTimer.GLOBAL_TIMER.getGlobalStatsTable(exec);
BufferedDataContainer result1 = exec.createDataContainer(createSpecOut1());
int rowcount = 0;
for (IBundleGroupProvider provider : Platform.getBundleGroupProviders()) {
for (IBundleGroup feature : provider.getBundleGroups()) {
DataRow row = new DefaultRow(new RowKey("Row " + rowcount++), new StringCell(feature.getIdentifier()), new StringCell(feature.getVersion()));
result1.addRowToTable(row);
}
}
result1.close();
return new PortObject[] { result0, result1.getTable() };
}
Aggregations