Search in sources :

Example 1 with BigGroupByTable

use of org.knime.base.node.preproc.groupby.BigGroupByTable in project knime-core by knime.

the class CrosstabNodeModel method createGroupByTable.

/**
 * Create group-by table.
 * @param exec execution context
 * @param table input table to group
 * @param groupByCols column selected for group-by operation
 * @return table with group and aggregation columns
 * @throws CanceledExecutionException if the group-by table generation was
 *         canceled externally
 */
private final GroupByTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final List<String> groupByCols) throws CanceledExecutionException {
    final int maxUniqueVals = Integer.MAX_VALUE;
    final boolean enableHilite = m_settings.getEnableHiliting();
    final boolean retainOrder = false;
    final ColumnNamePolicy colNamePolicy = ColumnNamePolicy.AGGREGATION_METHOD_COLUMN_NAME;
    final GlobalSettings globalSettings = GlobalSettings.builder().setFileStoreFactory(FileStoreFactory.createWorkflowFileStoreFactory(exec)).setGroupColNames(groupByCols).setMaxUniqueValues(maxUniqueVals).setValueDelimiter(GlobalSettings.STANDARD_DELIMITER).setDataTableSpec(table.getDataTableSpec()).setNoOfRows(table.size()).setAggregationContext(AggregationContext.ROW_AGGREGATION).build();
    ColumnAggregator collAggregator = null;
    if (null != m_settings.getWeightColumn()) {
        final String weightColumn = m_settings.getWeightColumn();
        // the column aggregator for the weighting column
        final boolean inclMissing = false;
        final DataColumnSpec originalColSpec = table.getDataTableSpec().getColumnSpec(weightColumn);
        final OperatorColumnSettings opColSettings = new OperatorColumnSettings(inclMissing, originalColSpec);
        collAggregator = new ColumnAggregator(originalColSpec, new NonNegativeSumOperator(globalSettings, opColSettings), inclMissing);
    } else {
        // use any column, does not matter as long as it exists and
        // include missing is true;
        final boolean inclMissing = true;
        final DataColumnSpec originalColSpec = table.getDataTableSpec().getColumnSpec(groupByCols.get(0));
        final OperatorColumnSettings opColSettings = new OperatorColumnSettings(inclMissing, originalColSpec);
        collAggregator = new ColumnAggregator(originalColSpec, new CountOperator(globalSettings, opColSettings), inclMissing);
    }
    final GroupByTable resultTable = new BigGroupByTable(exec, table, groupByCols, new ColumnAggregator[] { collAggregator }, globalSettings, enableHilite, colNamePolicy, retainOrder);
    if (enableHilite) {
        setHiliteMapping(new DefaultHiLiteMapper(resultTable.getHiliteMapping()));
    }
    // check for skipped columns
    final String warningMsg = resultTable.getSkippedGroupsMessage(3, 3);
    if (warningMsg != null) {
        setWarningMessage(warningMsg);
    }
    return resultTable;
}
Also used : OperatorColumnSettings(org.knime.base.data.aggregation.OperatorColumnSettings) GlobalSettings(org.knime.base.data.aggregation.GlobalSettings) ColumnNamePolicy(org.knime.base.node.preproc.groupby.ColumnNamePolicy) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) BigGroupByTable(org.knime.base.node.preproc.groupby.BigGroupByTable) GroupByTable(org.knime.base.node.preproc.groupby.GroupByTable) BigGroupByTable(org.knime.base.node.preproc.groupby.BigGroupByTable) DefaultHiLiteMapper(org.knime.core.node.property.hilite.DefaultHiLiteMapper) CountOperator(org.knime.base.data.aggregation.general.CountOperator)

Example 2 with BigGroupByTable

use of org.knime.base.node.preproc.groupby.BigGroupByTable in project knime-core by knime.

the class NumericOutliersIntervalsCalculator method getGroupByTable.

/**
 * Constructs the group by table in accordance with the given settings.
 *
 * @param inTable the input data table
 * @param exec the execution context
 * @return the group by table w.r.t. the selected settings
 * @throws CanceledExecutionException if the user has canceled the execution
 */
private GroupByTable getGroupByTable(final BufferedDataTable inTable, final ExecutionContext exec) throws CanceledExecutionException {
    // get the global settings
    final GlobalSettings gSettings = getGlobalSettings(inTable);
    // create the column aggregators
    final ColumnAggregator[] agg = getAggretators(inTable.getDataTableSpec(), gSettings);
    // init and return the GroupByTable obeying the chosen memory settings
    final GroupByTable t;
    if (m_inMemory) {
        t = new MemoryGroupByTable(exec, inTable, Arrays.stream(m_groupColNames).collect(Collectors.toList()), agg, gSettings, false, COLUMN_NAME_POLICY, false);
    } else {
        t = new BigGroupByTable(exec, inTable, Arrays.stream(m_groupColNames).collect(Collectors.toList()), agg, gSettings, false, COLUMN_NAME_POLICY, false);
    }
    return t;
}
Also used : ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) BigGroupByTable(org.knime.base.node.preproc.groupby.BigGroupByTable) MemoryGroupByTable(org.knime.base.node.preproc.groupby.MemoryGroupByTable) GroupByTable(org.knime.base.node.preproc.groupby.GroupByTable) GlobalSettings(org.knime.base.data.aggregation.GlobalSettings) BigGroupByTable(org.knime.base.node.preproc.groupby.BigGroupByTable) MemoryGroupByTable(org.knime.base.node.preproc.groupby.MemoryGroupByTable)

Aggregations

ColumnAggregator (org.knime.base.data.aggregation.ColumnAggregator)2 GlobalSettings (org.knime.base.data.aggregation.GlobalSettings)2 BigGroupByTable (org.knime.base.node.preproc.groupby.BigGroupByTable)2 GroupByTable (org.knime.base.node.preproc.groupby.GroupByTable)2 OperatorColumnSettings (org.knime.base.data.aggregation.OperatorColumnSettings)1 CountOperator (org.knime.base.data.aggregation.general.CountOperator)1 ColumnNamePolicy (org.knime.base.node.preproc.groupby.ColumnNamePolicy)1 MemoryGroupByTable (org.knime.base.node.preproc.groupby.MemoryGroupByTable)1 DataColumnSpec (org.knime.core.data.DataColumnSpec)1 DefaultHiLiteMapper (org.knime.core.node.property.hilite.DefaultHiLiteMapper)1