use of org.knime.base.node.preproc.groupby.BigGroupByTable in project knime-core by knime.
the class CrosstabNodeModel method createGroupByTable.
/**
* Create group-by table.
* @param exec execution context
* @param table input table to group
* @param groupByCols column selected for group-by operation
* @return table with group and aggregation columns
* @throws CanceledExecutionException if the group-by table generation was
* canceled externally
*/
private final GroupByTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final List<String> groupByCols) throws CanceledExecutionException {
final int maxUniqueVals = Integer.MAX_VALUE;
final boolean enableHilite = m_settings.getEnableHiliting();
final boolean retainOrder = false;
final ColumnNamePolicy colNamePolicy = ColumnNamePolicy.AGGREGATION_METHOD_COLUMN_NAME;
final GlobalSettings globalSettings = GlobalSettings.builder().setFileStoreFactory(FileStoreFactory.createWorkflowFileStoreFactory(exec)).setGroupColNames(groupByCols).setMaxUniqueValues(maxUniqueVals).setValueDelimiter(GlobalSettings.STANDARD_DELIMITER).setDataTableSpec(table.getDataTableSpec()).setNoOfRows(table.size()).setAggregationContext(AggregationContext.ROW_AGGREGATION).build();
ColumnAggregator collAggregator = null;
if (null != m_settings.getWeightColumn()) {
final String weightColumn = m_settings.getWeightColumn();
// the column aggregator for the weighting column
final boolean inclMissing = false;
final DataColumnSpec originalColSpec = table.getDataTableSpec().getColumnSpec(weightColumn);
final OperatorColumnSettings opColSettings = new OperatorColumnSettings(inclMissing, originalColSpec);
collAggregator = new ColumnAggregator(originalColSpec, new NonNegativeSumOperator(globalSettings, opColSettings), inclMissing);
} else {
// use any column, does not matter as long as it exists and
// include missing is true;
final boolean inclMissing = true;
final DataColumnSpec originalColSpec = table.getDataTableSpec().getColumnSpec(groupByCols.get(0));
final OperatorColumnSettings opColSettings = new OperatorColumnSettings(inclMissing, originalColSpec);
collAggregator = new ColumnAggregator(originalColSpec, new CountOperator(globalSettings, opColSettings), inclMissing);
}
final GroupByTable resultTable = new BigGroupByTable(exec, table, groupByCols, new ColumnAggregator[] { collAggregator }, globalSettings, enableHilite, colNamePolicy, retainOrder);
if (enableHilite) {
setHiliteMapping(new DefaultHiLiteMapper(resultTable.getHiliteMapping()));
}
// check for skipped columns
final String warningMsg = resultTable.getSkippedGroupsMessage(3, 3);
if (warningMsg != null) {
setWarningMessage(warningMsg);
}
return resultTable;
}
use of org.knime.base.node.preproc.groupby.BigGroupByTable in project knime-core by knime.
the class NumericOutliersIntervalsCalculator method getGroupByTable.
/**
* Constructs the group by table in accordance with the given settings.
*
* @param inTable the input data table
* @param exec the execution context
* @return the group by table w.r.t. the selected settings
* @throws CanceledExecutionException if the user has canceled the execution
*/
private GroupByTable getGroupByTable(final BufferedDataTable inTable, final ExecutionContext exec) throws CanceledExecutionException {
// get the global settings
final GlobalSettings gSettings = getGlobalSettings(inTable);
// create the column aggregators
final ColumnAggregator[] agg = getAggretators(inTable.getDataTableSpec(), gSettings);
// init and return the GroupByTable obeying the chosen memory settings
final GroupByTable t;
if (m_inMemory) {
t = new MemoryGroupByTable(exec, inTable, Arrays.stream(m_groupColNames).collect(Collectors.toList()), agg, gSettings, false, COLUMN_NAME_POLICY, false);
} else {
t = new BigGroupByTable(exec, inTable, Arrays.stream(m_groupColNames).collect(Collectors.toList()), agg, gSettings, false, COLUMN_NAME_POLICY, false);
}
return t;
}
Aggregations