Search in sources :

Example 6 with ColumnAggregator

use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.

the class GroupByNodeModel method getAggregators.

/**
 * Creates a {@link List} with all {@link ColumnAggregator}s to use based on the given input settings.
 * Columns are only added once for the different aggregator types in the order they are added to the function
 * e.g. all column that are handled by one of the given {@link ColumnAggregator} are ignored by the
 * pattern and data type based aggregator all columns that are handled by one of the pattern based aggregators
 * is ignored by the data type based aggregators.
 * @param inputSpec the {@link DataTableSpec} of the input table
 * @param groupColumns the columns to group by
 * @param columnAggregators the manually added {@link ColumnAggregator}s
 * @param patternAggregators the {@link PatternAggregator}s
 * @param dataTypeAggregators the {@link DataTypeAggregator}s
 * @param invalidColAggrs empty {@link List} that is filled with the invalid column aggregators can be
 * <code>null</code>
 * @return the list of all {@link ColumnAggregator}s to use based on the given aggregator
 * @since 2.11
 */
public static List<ColumnAggregator> getAggregators(final DataTableSpec inputSpec, final Collection<String> groupColumns, final List<ColumnAggregator> columnAggregators, final Collection<PatternAggregator> patternAggregators, final Collection<DataTypeAggregator> dataTypeAggregators, final List<ColumnAggregator> invalidColAggrs) {
    final List<ColumnAggregator> columnAggregators2Use = new ArrayList<>(columnAggregators.size());
    final Set<String> usedColNames = new HashSet<>(inputSpec.getNumColumns());
    usedColNames.addAll(groupColumns);
    for (final ColumnAggregator colAggr : columnAggregators) {
        final String originalColName = colAggr.getOriginalColName();
        final DataColumnSpec colSpec = inputSpec.getColumnSpec(originalColName);
        if (colSpec != null && colAggr.getOriginalDataType().isASuperTypeOf(colSpec.getType())) {
            usedColNames.add(originalColName);
            columnAggregators2Use.add(colAggr);
        } else {
            if (invalidColAggrs != null) {
                invalidColAggrs.add(colAggr);
            }
        }
    }
    if (inputSpec.getNumColumns() > usedColNames.size() && !patternAggregators.isEmpty()) {
        for (final DataColumnSpec spec : inputSpec) {
            if (!usedColNames.contains(spec.getName())) {
                for (final PatternAggregator patternAggr : patternAggregators) {
                    Pattern pattern = patternAggr.getRegexPattern();
                    if (pattern != null && pattern.matcher(spec.getName()).matches() && patternAggr.isCompatible(spec)) {
                        final ColumnAggregator colAggregator = new ColumnAggregator(spec, patternAggr.getMethodTemplate(), patternAggr.inclMissingCells());
                        columnAggregators2Use.add(colAggregator);
                        usedColNames.add(spec.getName());
                    }
                }
            }
        }
    }
    // check if some columns are left
    if (inputSpec.getNumColumns() > usedColNames.size() && !dataTypeAggregators.isEmpty()) {
        for (final DataColumnSpec spec : inputSpec) {
            if (!usedColNames.contains(spec.getName())) {
                final DataType dataType = spec.getType();
                for (final DataTypeAggregator typeAggregator : dataTypeAggregators) {
                    if (typeAggregator.isCompatibleType(dataType)) {
                        final ColumnAggregator colAggregator = new ColumnAggregator(spec, typeAggregator.getMethodTemplate(), typeAggregator.inclMissingCells());
                        columnAggregators2Use.add(colAggregator);
                        usedColNames.add(spec.getName());
                    }
                }
            }
        }
    }
    return columnAggregators2Use;
}
Also used : PatternAggregator(org.knime.base.data.aggregation.dialogutil.pattern.PatternAggregator) Pattern(java.util.regex.Pattern) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) ArrayList(java.util.ArrayList) DataType(org.knime.core.data.DataType) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataTypeAggregator(org.knime.base.data.aggregation.dialogutil.type.DataTypeAggregator) HashSet(java.util.HashSet)

Example 7 with ColumnAggregator

use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.

the class GroupByNodeModel method validateSettings.

/**
 * {@inheritDoc}
 */
@Override
protected void validateSettings(final NodeSettingsRO settings) throws InvalidSettingsException {
    m_groupByCols.validateSettings(settings);
    // FIX bug 5040: potential problem with clone settings method when in-/exclude list contain same elements
    final SettingsModelFilterString tmpSett = new SettingsModelFilterString(CFG_GROUP_BY_COLUMNS);
    tmpSett.loadSettingsFrom(settings);
    final List<String> groupByCols = tmpSett.getIncludeList();
    m_maxUniqueValues.validateSettings(settings);
    m_enableHilite.validateSettings(settings);
    // with Knime 2.0 as well as the naming policy
    try {
        final List<ColumnAggregator> aggregators = ColumnAggregator.loadColumnAggregators(settings);
        final List<DataTypeAggregator> typeAggregators = new LinkedList<>();
        final List<PatternAggregator> patternAggregators = new LinkedList<>();
        try {
            patternAggregators.addAll(PatternAggregator.loadAggregators(settings, CFG_PATTERN_AGGREGATORS));
            typeAggregators.addAll(DataTypeAggregator.loadAggregators(settings, CFG_DATA_TYPE_AGGREGATORS));
        } catch (InvalidSettingsException e) {
        // introduced in 2.11
        }
        if (groupByCols.isEmpty() && aggregators.isEmpty() && patternAggregators.isEmpty() && typeAggregators.isEmpty()) {
            throw new IllegalArgumentException("Please select at least one group column or aggregation option");
        }
        ColumnNamePolicy namePolicy;
        try {
            final String policyLabel = ((SettingsModelString) m_columnNamePolicy.createCloneWithValidatedValue(settings)).getStringValue();
            namePolicy = ColumnNamePolicy.getPolicy4Label(policyLabel);
        } catch (final InvalidSettingsException e) {
            namePolicy = compGetColumnNamePolicy(settings);
        }
        checkDuplicateAggregators(namePolicy, aggregators);
    } catch (final InvalidSettingsException e) {
    // these settings are prior Knime 2.0 and can't contain
    // a column several times
    } catch (final IllegalArgumentException e) {
        throw new InvalidSettingsException(e.getMessage());
    }
}
Also used : PatternAggregator(org.knime.base.data.aggregation.dialogutil.pattern.PatternAggregator) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataTypeAggregator(org.knime.base.data.aggregation.dialogutil.type.DataTypeAggregator) LinkedList(java.util.LinkedList) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) InvalidSettingsException(org.knime.core.node.InvalidSettingsException)

Example 8 with ColumnAggregator

use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.

the class GroupByNodeModel method createGroupByTable.

/**
 * Create group-by table.
 * @param exec execution context
 * @param table input table to group
 * @param groupByCols column selected for group-by operation
 * @param inMemory keep data in memory
 * @param sortInMemory does sorting in memory
 * @param retainOrder reconstructs original data order
 * @param aggregators column aggregation to use
 * @return table with group and aggregation columns
 * @throws CanceledExecutionException if the group-by table generation was
 *         canceled externally
 * @deprecated sortInMemory is no longer required
 * @see #createGroupByTable(ExecutionContext, BufferedDataTable, List,
 * boolean, boolean, List)
 */
@Deprecated
protected final GroupByTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final List<String> groupByCols, final boolean inMemory, final boolean sortInMemory, final boolean retainOrder, final List<ColumnAggregator> aggregators) throws CanceledExecutionException {
    final int maxUniqueVals = m_maxUniqueValues.getIntValue();
    final boolean enableHilite = m_enableHilite.getBooleanValue();
    final ColumnNamePolicy colNamePolicy = ColumnNamePolicy.getPolicy4Label(m_columnNamePolicy.getStringValue());
    final GlobalSettings globalSettings = createGlobalSettings(exec, table, groupByCols, maxUniqueVals);
    // reset all aggregators in order to use enforce operator creation
    for (final ColumnAggregator colAggr : aggregators) {
        colAggr.reset();
    }
    final GroupByTable resultTable;
    if (inMemory || groupByCols.isEmpty()) {
        resultTable = new MemoryGroupByTable(exec, table, groupByCols, aggregators.toArray(new ColumnAggregator[0]), globalSettings, enableHilite, colNamePolicy, retainOrder);
    } else {
        resultTable = new BigGroupByTable(exec, table, groupByCols, aggregators.toArray(new ColumnAggregator[0]), globalSettings, enableHilite, colNamePolicy, retainOrder);
    }
    if (m_enableHilite.getBooleanValue()) {
        setHiliteMapping(new DefaultHiLiteMapper(resultTable.getHiliteMapping()));
    }
    // check for skipped columns
    final String warningMsg = resultTable.getSkippedGroupsMessage(3, 3);
    if (warningMsg != null) {
        setWarningMessage(warningMsg);
        LOGGER.info(resultTable.getSkippedGroupsMessage(Integer.MAX_VALUE, Integer.MAX_VALUE));
    }
    return resultTable;
}
Also used : ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) GlobalSettings(org.knime.base.data.aggregation.GlobalSettings) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DefaultHiLiteMapper(org.knime.core.node.property.hilite.DefaultHiLiteMapper)

Example 9 with ColumnAggregator

use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.

the class GroupByTable method getWorkingCols.

/**
 * @param globalSettings the {@link GlobalSettings}
 * @param groupByCols the group by column names
 * @param colAggregators the aggregation columns
 * @return {@link Set} with the name of all columns to work with
 */
private Set<String> getWorkingCols(final GlobalSettings globalSettings, final List<String> groupByCols, final ColumnAggregator[] colAggregators) {
    final Set<String> colNames = new LinkedHashSet<>(groupByCols);
    for (final ColumnAggregator aggr : colAggregators) {
        colNames.add(aggr.getOriginalColName());
        final Collection<String> addColNames = aggr.getOperator(globalSettings).getAdditionalColumnNames();
        if (addColNames != null && !addColNames.isEmpty()) {
            colNames.addAll(addColNames);
        }
    }
    return colNames;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator)

Example 10 with ColumnAggregator

use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.

the class MemoryGroupByTable method createResultTable.

private BufferedDataTable createResultTable(final ExecutionContext exec, final DataTableSpec resultSpec) throws CanceledExecutionException {
    final BufferedDataContainer dc = exec.createDataContainer(resultSpec);
    int groupCounter = 0;
    final int size = m_vals.size();
    for (final Entry<GroupKey, ColumnAggregator[]> entry : m_vals.entrySet()) {
        exec.checkCanceled();
        exec.setProgress(groupCounter / (double) size, "Writing group " + groupCounter + " of " + size);
        final GroupKey groupVals = entry.getKey();
        final ColumnAggregator[] colAggregators = entry.getValue();
        final RowKey rowKey = RowKey.createRowKey(groupCounter++);
        final DataCell[] rowVals = new DataCell[groupVals.size() + colAggregators.length];
        // add the group values first
        int valIdx = 0;
        for (final DataCell groupCell : groupVals.getGroupVals()) {
            rowVals[valIdx++] = groupCell;
        }
        // add the aggregation values
        for (final ColumnAggregator colAggr : colAggregators) {
            final AggregationOperator operator = colAggr.getOperator(getGlobalSettings());
            rowVals[valIdx++] = operator.getResult();
            if (operator.isSkipped()) {
                // add skipped groups and the column that causes the skipping
                // into the skipped groups map
                addSkippedGroup(colAggr.getOriginalColName(), operator.getSkipMessage(), groupVals.getGroupVals());
            }
            // reset the operator for the next group
            operator.reset();
        }
        final DataRow newRow = new DefaultRow(rowKey, rowVals);
        dc.addRowToTable(newRow);
        // add hilite mappings if enabled
        if (isEnableHilite()) {
            final Set<RowKey> oldKeys = m_rowKeys.get(groupVals);
            addHiliteMapping(rowKey, oldKeys);
        }
    }
    dc.close();
    return dc.getTable();
}
Also used : AggregationOperator(org.knime.base.data.aggregation.AggregationOperator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) DataRow(org.knime.core.data.DataRow) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Aggregations

ColumnAggregator (org.knime.base.data.aggregation.ColumnAggregator)33 DataColumnSpec (org.knime.core.data.DataColumnSpec)14 HashSet (java.util.HashSet)9 LinkedList (java.util.LinkedList)9 ArrayList (java.util.ArrayList)8 AggregationMethod (org.knime.base.data.aggregation.AggregationMethod)8 SettingsModelFilterString (org.knime.core.node.defaultnodesettings.SettingsModelFilterString)5 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)5 DataCell (org.knime.core.data.DataCell)4 DataRow (org.knime.core.data.DataRow)4 DataTableSpec (org.knime.core.data.DataTableSpec)4 RowKey (org.knime.core.data.RowKey)4 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)4 GlobalSettings (org.knime.base.data.aggregation.GlobalSettings)3 PatternAggregator (org.knime.base.data.aggregation.dialogutil.pattern.PatternAggregator)3 DataTypeAggregator (org.knime.base.data.aggregation.dialogutil.type.DataTypeAggregator)3 GroupByTable (org.knime.base.node.preproc.groupby.GroupByTable)3 DataType (org.knime.core.data.DataType)3 DefaultRow (org.knime.core.data.def.DefaultRow)3 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)3