use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.
the class MemoryGroupByTable method addRow.
private void addRow(final DataTableSpec spec, final GroupKey groupKey, final DataRow row) {
ColumnAggregator[] aggregators = m_vals.get(groupKey);
if (aggregators == null) {
final ColumnAggregator[] origAggregators = getColAggregators();
aggregators = new ColumnAggregator[origAggregators.length];
for (int i = 0, length = origAggregators.length; i < length; i++) {
aggregators[i] = origAggregators[i].clone();
}
m_vals.put(groupKey, aggregators);
}
for (final ColumnAggregator aggregator : aggregators) {
final int colIdx = spec.findColumnIndex(aggregator.getOriginalColName());
aggregator.getOperator(getGlobalSettings()).compute(row, colIdx);
}
}
use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.
the class Pivot2NodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
final BufferedDataTable table = (BufferedDataTable) inData[0];
final List<String> groupAndPivotCols = createAllColumns();
final BufferedDataTable groupTable;
final String orderPivotColumnName;
ExecutionContext groupAndPivotExec = exec.createSubExecutionContext(0.5);
ExecutionContext groupExec = exec.createSubExecutionContext(0.25);
ExecutionContext pivotExec = exec.createSubExecutionContext(0.25);
double progMainTotal = 0.0;
double progMainTableAppendIndexForSort = isProcessInMemory() || isRetainOrder() ? 1.0 : 0.0;
progMainTotal += progMainTableAppendIndexForSort;
double progMainTableGroup = 5.0;
progMainTotal += progMainTableGroup;
double progMainTableInMemSort = isProcessInMemory() ? 3.0 : 0.0;
progMainTotal += progMainTableInMemSort;
double progMainTableGetPivots = 1.0;
progMainTotal += progMainTableGetPivots;
double progMainTableFillPivots = 1.0;
progMainTotal += progMainTableFillPivots;
double progMainTableRestoreSort = isProcessInMemory() || isRetainOrder() ? 1.0 : 0.0;
progMainTotal += progMainTableRestoreSort;
double progMainTableReplaceRowKey = isProcessInMemory() ? 1.0 : 0.0;
progMainTotal += progMainTableReplaceRowKey;
if (isProcessInMemory() || isRetainOrder()) {
exec.setMessage("Keeping row order");
final String retainOrderCol = DataTableSpec.getUniqueColumnName(table.getDataTableSpec(), "#pivot_order#");
// append temp. id column with minimum-aggregation method
final ColumnAggregator[] colAggregators = getColumnAggregators().toArray(new ColumnAggregator[0]);
final Set<String> workingCols = new LinkedHashSet<String>();
workingCols.addAll(groupAndPivotCols);
for (final ColumnAggregator ca : colAggregators) {
workingCols.add(ca.getOriginalColName());
}
workingCols.add(retainOrderCol);
final BufferedDataTable appTable = GroupByTable.appendOrderColumn(groupAndPivotExec.createSubExecutionContext(progMainTableAppendIndexForSort / progMainTotal), table, workingCols, retainOrderCol);
final DataColumnSpec retainOrderColSpec = appTable.getSpec().getColumnSpec(retainOrderCol);
final ColumnAggregator[] aggrs = new ColumnAggregator[colAggregators.length + 1];
System.arraycopy(colAggregators, 0, aggrs, 0, colAggregators.length);
aggrs[colAggregators.length] = new ColumnAggregator(retainOrderColSpec, AggregationMethods.getRowOrderMethod(), true);
orderPivotColumnName = getColumnNamePolicy().createColumName(aggrs[colAggregators.length]);
exec.setMessage("Grouping main table");
final GroupByTable groupByTable = createGroupByTable(groupAndPivotExec.createSubExecutionContext(progMainTableGroup / progMainTotal), appTable, groupAndPivotCols, isProcessInMemory(), false, /* retain order always false; handled by pivoting */
Arrays.asList(aggrs));
// true then sort table by group&pivot columns
if (isProcessInMemory()) {
exec.setMessage("Sorting group table");
final boolean[] sortDirection = new boolean[groupAndPivotCols.size()];
// ensure that missing values are at the end by sorting in ascending order
Arrays.fill(sortDirection, true);
final SortedTable sortedGroupByTable = new SortedTable(groupByTable.getBufferedTable(), groupAndPivotCols, sortDirection, groupAndPivotExec.createSubExecutionContext(progMainTableInMemSort / progMainTotal));
groupTable = sortedGroupByTable.getBufferedDataTable();
} else {
groupTable = groupByTable.getBufferedTable();
}
} else {
exec.setMessage("Grouping main table");
final GroupByTable groupByTable = createGroupByTable(groupAndPivotExec.createSubExecutionContext(progMainTableGroup / progMainTotal), table, groupAndPivotCols, isProcessInMemory(), false, getColumnAggregators());
groupTable = groupByTable.getBufferedTable();
orderPivotColumnName = null;
}
final List<String> pivotCols = m_pivotCols.getIncludeList();
final int[] pivotIdx = new int[pivotCols.size()];
final DataTableSpec groupSpec = groupTable.getSpec();
final Set<String>[] combPivots = createCombinedPivots(groupSpec, pivotCols);
for (int i = 0; i < pivotIdx.length; i++) {
pivotIdx[i] = groupSpec.findColumnIndex(pivotCols.get(i));
}
exec.setProgress("Determining pivots...");
ExecutionContext fillExec = groupAndPivotExec.createSubExecutionContext(progMainTableGetPivots / progMainTotal);
final long groupTableSize = groupTable.size();
long groupIndex = 0;
for (final DataRow row : groupTable) {
for (int i = 0; i < pivotIdx.length; i++) {
if (combPivots[i] == null) {
combPivots[i] = new LinkedHashSet<String>();
}
final DataCell cell = row.getCell(pivotIdx[i]);
if (cell.isMissing()) {
if (!m_ignoreMissValues.getBooleanValue()) {
combPivots[i].add(cell.toString());
}
} else {
combPivots[i].add(cell.toString());
}
}
fillExec.setProgress(groupIndex++ / (double) groupTableSize, String.format("Group \"%s\" (%d/%d)", row.getKey(), groupIndex, groupTableSize));
fillExec.checkCanceled();
}
final Map<String, Integer> pivotStarts = new LinkedHashMap<String, Integer>();
final DataTableSpec outSpec = createOutSpec(groupSpec, combPivots, pivotStarts, orderPivotColumnName);
exec.setProgress("Filling pivot table");
BufferedDataTable pivotTable = fillPivotTable(groupTable, outSpec, pivotStarts, groupAndPivotExec.createSubExecutionContext(progMainTableFillPivots / progMainTotal), orderPivotColumnName);
if (orderPivotColumnName != null) {
exec.setMessage("Restoring row order");
final SortedTable sortedPivotTable = new SortedTable(pivotTable, Arrays.asList(new String[] { orderPivotColumnName }), new boolean[] { true }, groupAndPivotExec.createSubExecutionContext(progMainTableRestoreSort / progMainTotal));
pivotTable = sortedPivotTable.getBufferedDataTable();
final ColumnRearranger colre = new ColumnRearranger(pivotTable.getSpec());
colre.remove(orderPivotColumnName);
pivotTable = exec.createColumnRearrangeTable(pivotTable, colre, exec.createSilentSubProgress(0.0));
}
// temp fix for bug 3286
if (isProcessInMemory()) {
// if process in memory is true, RowKey's needs to be re-computed
final BufferedDataContainer rowkeyBuf = groupAndPivotExec.createSubExecutionContext(progMainTableReplaceRowKey / progMainTotal).createDataContainer(pivotTable.getSpec());
long rowIndex = 0;
for (DataRow row : pivotTable) {
rowkeyBuf.addRowToTable(new DefaultRow(RowKey.createRowKey(rowIndex++), row));
}
rowkeyBuf.close();
pivotTable = rowkeyBuf.getTable();
}
groupAndPivotExec.setProgress(1.0);
/* Fill the 3rd port */
exec.setMessage("Determining pivot totals");
double progPivotTotal = 0.0;
double progPivotGroup = 5.0;
progPivotTotal += progPivotGroup;
double progPivotFillMissing = 1.0;
progPivotTotal += progPivotFillMissing;
double progPivotFillPivots = 1.0;
progPivotTotal += progPivotFillPivots;
double progPivotOverallTotals = m_totalAggregation.getBooleanValue() ? 5.0 : 0.0;
progPivotTotal += progPivotOverallTotals;
// create pivot table only on pivot columns (for grouping)
// perform pivoting: result in single line
final GroupByTable rowGroup = createGroupByTable(pivotExec.createSubExecutionContext(progPivotGroup / progPivotTotal), table, m_pivotCols.getIncludeList(), isProcessInMemory(), isRetainOrder(), getColumnAggregators());
final BufferedDataTable rowGroupTable = rowGroup.getBufferedTable();
// fill group columns with missing cells
final ColumnRearranger colre = new ColumnRearranger(rowGroupTable.getDataTableSpec());
for (int i = 0; i < getGroupByColumns().size(); i++) {
final DataColumnSpec cspec = outSpec.getColumnSpec(i);
final CellFactory factory = new SingleCellFactory(cspec) {
/**
* {@inheritDoc}
*/
@Override
public DataCell getCell(final DataRow row) {
return DataType.getMissingCell();
}
};
colre.insertAt(i, factory);
}
final BufferedDataTable groupedRowTable = exec.createColumnRearrangeTable(rowGroupTable, colre, pivotExec.createSubExecutionContext(progPivotFillMissing / progPivotTotal));
BufferedDataTable pivotRowsTable = fillPivotTable(groupedRowTable, outSpec, pivotStarts, pivotExec.createSubExecutionContext(progPivotFillPivots / progPivotTotal), null);
if (orderPivotColumnName != null) {
final ColumnRearranger colre2 = new ColumnRearranger(pivotRowsTable.getSpec());
colre2.remove(orderPivotColumnName);
pivotRowsTable = exec.createColumnRearrangeTable(pivotRowsTable, colre2, exec.createSilentSubProgress(0.0));
}
// total aggregation without grouping
if (m_totalAggregation.getBooleanValue()) {
@SuppressWarnings("unchecked") final GroupByTable totalGroup = createGroupByTable(pivotExec.createSubExecutionContext(progPivotOverallTotals / progPivotTotal), table, Collections.EMPTY_LIST, isProcessInMemory(), isRetainOrder(), getColumnAggregators());
final BufferedDataTable totalGroupTable = totalGroup.getBufferedTable();
final DataTableSpec pivotsRowsSpec = pivotRowsTable.getSpec();
final DataTableSpec totalGroupSpec = totalGroupTable.getSpec();
final DataTableSpec overallTotalSpec = new DataTableSpec(pivotsRowsSpec, totalGroupSpec);
final BufferedDataContainer buf = exec.createDataContainer(overallTotalSpec);
if (pivotRowsTable.size() > 0) {
final List<DataCell> pivotTotalsCells = new ArrayList<DataCell>();
final DataRow pivotsRow = pivotRowsTable.iterator().next();
for (final DataCell cell : pivotsRow) {
pivotTotalsCells.add(cell);
}
final DataRow totalGroupRow = totalGroupTable.iterator().next();
for (final DataCell cell : totalGroupRow) {
pivotTotalsCells.add(cell);
}
buf.addRowToTable(new DefaultRow(new RowKey("Totals"), pivotTotalsCells));
}
buf.close();
pivotRowsTable = buf.getTable();
}
pivotExec.setProgress(1.0);
/* Fill the 2nd port: important to create this last since it will create
* the final hilite handler (mapping) for port #1 AND #2 (bug 3270) */
exec.setMessage("Creating group totals");
// create group table only on group columns; no pivoting
final BufferedDataTable columnGroupTable = createGroupByTable(groupExec, table, getGroupByColumns()).getBufferedTable();
return new PortObject[] { // pivot table
pivotTable, // group totals
columnGroupTable, // pivot and overall totals
pivotRowsTable };
}
use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.
the class BigGroupByTable method createTableRows.
/**
* Creates and adds the result rows for the members of a data chunk to the
* given data container. It also handles the row key mapping if hilite
* translation is enabled.
*
* @param dc the {@link DataContainer} to use
* @param chunkMembers the members of the current data chunk
* @param groupCounter the number of groups that have been created
* so fare
*/
private void createTableRows(final BufferedDataContainer dc, final Map<GroupKey, Pair<ColumnAggregator[], Set<RowKey>>> chunkMembers, final MutableInteger groupCounter) {
if (chunkMembers == null || chunkMembers.isEmpty()) {
return;
}
for (final Entry<GroupKey, Pair<ColumnAggregator[], Set<RowKey>>> e : chunkMembers.entrySet()) {
final DataCell[] groupVals = e.getKey().getGroupVals();
final ColumnAggregator[] colAggregators = e.getValue().getFirst();
final RowKey rowKey = RowKey.createRowKey(groupCounter.intValue());
groupCounter.inc();
final DataCell[] rowVals = new DataCell[groupVals.length + colAggregators.length];
// add the group values first
int valIdx = 0;
for (final DataCell groupCell : groupVals) {
rowVals[valIdx++] = groupCell;
}
// add the aggregation values
for (final ColumnAggregator colAggr : colAggregators) {
final AggregationOperator operator = colAggr.getOperator(getGlobalSettings());
rowVals[valIdx++] = operator.getResult();
if (operator.isSkipped()) {
// add skipped groups and the column that causes the
// skipping into the skipped groups map
addSkippedGroup(colAggr.getOriginalColName(), operator.getSkipMessage(), groupVals);
}
m_missingValuesMap.get(colAggr.getOriginalColName()).add(operator.getMissingValuesCount());
}
final DataRow newRow = new DefaultRow(rowKey, rowVals);
dc.addRowToTable(newRow);
if (isEnableHilite()) {
final Set<RowKey> oldKeys = e.getValue().getSecond();
addHiliteMapping(rowKey, oldKeys);
}
}
}
use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.
the class BigGroupByTable method initMissingValuesMap.
private void initMissingValuesMap() {
m_missingValuesMap = new HashMap<>();
ColumnAggregator[] colAggregators = getColAggregators();
for (ColumnAggregator ca : colAggregators) {
m_missingValuesMap.put(ca.getOriginalColName(), new MutableLong(0L));
}
}
use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.
the class GroupByNodeModel method compCreateColumnAggregators.
/**
* Compatibility method used for compatibility to versions prior Knime 2.0.
* Method to get the aggregation methods for the versions with only one
* method for numerical and one for nominal columns.
*
* @param spec
* the {@link DataTableSpec}
* @param excludeCols
* the name of all columns to be excluded
* @param numeric
* the name of the numerical aggregation method
* @param nominal
* the name of the nominal aggregation method
* @return {@link Collection} of the {@link ColumnAggregator}s
*/
private static List<ColumnAggregator> compCreateColumnAggregators(final DataTableSpec spec, final List<String> excludeCols, final String numeric, final String nominal) {
final AggregationMethod numericMethod = AggregationMethods.getMethod4Id(numeric);
final AggregationMethod nominalMethod = AggregationMethods.getMethod4Id(nominal);
final Set<String> groupCols = new HashSet<>(excludeCols);
final List<ColumnAggregator> colAg = new LinkedList<>();
for (int colIdx = 0, length = spec.getNumColumns(); colIdx < length; colIdx++) {
final DataColumnSpec colSpec = spec.getColumnSpec(colIdx);
if (!groupCols.contains(colSpec.getName())) {
final AggregationMethod method = AggregationMethods.getAggregationMethod(colSpec, numericMethod, nominalMethod);
colAg.add(new ColumnAggregator(colSpec, method, method.inclMissingCells()));
}
}
return colAg;
}
Aggregations