use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.
the class GroupByNodeModel method getAggregators.
/**
* Creates a {@link List} with all {@link ColumnAggregator}s to use based on the given input settings.
* Columns are only added once for the different aggregator types in the order they are added to the function
* e.g. all column that are handled by one of the given {@link ColumnAggregator} are ignored by the
* pattern and data type based aggregator all columns that are handled by one of the pattern based aggregators
* is ignored by the data type based aggregators.
* @param inputSpec the {@link DataTableSpec} of the input table
* @param groupColumns the columns to group by
* @param columnAggregators the manually added {@link ColumnAggregator}s
* @param patternAggregators the {@link PatternAggregator}s
* @param dataTypeAggregators the {@link DataTypeAggregator}s
* @param invalidColAggrs empty {@link List} that is filled with the invalid column aggregators can be
* <code>null</code>
* @return the list of all {@link ColumnAggregator}s to use based on the given aggregator
* @since 2.11
*/
public static List<ColumnAggregator> getAggregators(final DataTableSpec inputSpec, final Collection<String> groupColumns, final List<ColumnAggregator> columnAggregators, final Collection<PatternAggregator> patternAggregators, final Collection<DataTypeAggregator> dataTypeAggregators, final List<ColumnAggregator> invalidColAggrs) {
final List<ColumnAggregator> columnAggregators2Use = new ArrayList<>(columnAggregators.size());
final Set<String> usedColNames = new HashSet<>(inputSpec.getNumColumns());
usedColNames.addAll(groupColumns);
for (final ColumnAggregator colAggr : columnAggregators) {
final String originalColName = colAggr.getOriginalColName();
final DataColumnSpec colSpec = inputSpec.getColumnSpec(originalColName);
if (colSpec != null && colAggr.getOriginalDataType().isASuperTypeOf(colSpec.getType())) {
usedColNames.add(originalColName);
columnAggregators2Use.add(colAggr);
} else {
if (invalidColAggrs != null) {
invalidColAggrs.add(colAggr);
}
}
}
if (inputSpec.getNumColumns() > usedColNames.size() && !patternAggregators.isEmpty()) {
for (final DataColumnSpec spec : inputSpec) {
if (!usedColNames.contains(spec.getName())) {
for (final PatternAggregator patternAggr : patternAggregators) {
Pattern pattern = patternAggr.getRegexPattern();
if (pattern != null && pattern.matcher(spec.getName()).matches() && patternAggr.isCompatible(spec)) {
final ColumnAggregator colAggregator = new ColumnAggregator(spec, patternAggr.getMethodTemplate(), patternAggr.inclMissingCells());
columnAggregators2Use.add(colAggregator);
usedColNames.add(spec.getName());
}
}
}
}
}
// check if some columns are left
if (inputSpec.getNumColumns() > usedColNames.size() && !dataTypeAggregators.isEmpty()) {
for (final DataColumnSpec spec : inputSpec) {
if (!usedColNames.contains(spec.getName())) {
final DataType dataType = spec.getType();
for (final DataTypeAggregator typeAggregator : dataTypeAggregators) {
if (typeAggregator.isCompatibleType(dataType)) {
final ColumnAggregator colAggregator = new ColumnAggregator(spec, typeAggregator.getMethodTemplate(), typeAggregator.inclMissingCells());
columnAggregators2Use.add(colAggregator);
usedColNames.add(spec.getName());
}
}
}
}
}
return columnAggregators2Use;
}
use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.
the class GroupByNodeModel method validateSettings.
/**
* {@inheritDoc}
*/
@Override
protected void validateSettings(final NodeSettingsRO settings) throws InvalidSettingsException {
m_groupByCols.validateSettings(settings);
// FIX bug 5040: potential problem with clone settings method when in-/exclude list contain same elements
final SettingsModelFilterString tmpSett = new SettingsModelFilterString(CFG_GROUP_BY_COLUMNS);
tmpSett.loadSettingsFrom(settings);
final List<String> groupByCols = tmpSett.getIncludeList();
m_maxUniqueValues.validateSettings(settings);
m_enableHilite.validateSettings(settings);
// with Knime 2.0 as well as the naming policy
try {
final List<ColumnAggregator> aggregators = ColumnAggregator.loadColumnAggregators(settings);
final List<DataTypeAggregator> typeAggregators = new LinkedList<>();
final List<PatternAggregator> patternAggregators = new LinkedList<>();
try {
patternAggregators.addAll(PatternAggregator.loadAggregators(settings, CFG_PATTERN_AGGREGATORS));
typeAggregators.addAll(DataTypeAggregator.loadAggregators(settings, CFG_DATA_TYPE_AGGREGATORS));
} catch (InvalidSettingsException e) {
// introduced in 2.11
}
if (groupByCols.isEmpty() && aggregators.isEmpty() && patternAggregators.isEmpty() && typeAggregators.isEmpty()) {
throw new IllegalArgumentException("Please select at least one group column or aggregation option");
}
ColumnNamePolicy namePolicy;
try {
final String policyLabel = ((SettingsModelString) m_columnNamePolicy.createCloneWithValidatedValue(settings)).getStringValue();
namePolicy = ColumnNamePolicy.getPolicy4Label(policyLabel);
} catch (final InvalidSettingsException e) {
namePolicy = compGetColumnNamePolicy(settings);
}
checkDuplicateAggregators(namePolicy, aggregators);
} catch (final InvalidSettingsException e) {
// these settings are prior Knime 2.0 and can't contain
// a column several times
} catch (final IllegalArgumentException e) {
throw new InvalidSettingsException(e.getMessage());
}
}
use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.
the class GroupByNodeModel method createGroupByTable.
/**
* Create group-by table.
* @param exec execution context
* @param table input table to group
* @param groupByCols column selected for group-by operation
* @param inMemory keep data in memory
* @param sortInMemory does sorting in memory
* @param retainOrder reconstructs original data order
* @param aggregators column aggregation to use
* @return table with group and aggregation columns
* @throws CanceledExecutionException if the group-by table generation was
* canceled externally
* @deprecated sortInMemory is no longer required
* @see #createGroupByTable(ExecutionContext, BufferedDataTable, List,
* boolean, boolean, List)
*/
@Deprecated
protected final GroupByTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final List<String> groupByCols, final boolean inMemory, final boolean sortInMemory, final boolean retainOrder, final List<ColumnAggregator> aggregators) throws CanceledExecutionException {
final int maxUniqueVals = m_maxUniqueValues.getIntValue();
final boolean enableHilite = m_enableHilite.getBooleanValue();
final ColumnNamePolicy colNamePolicy = ColumnNamePolicy.getPolicy4Label(m_columnNamePolicy.getStringValue());
final GlobalSettings globalSettings = createGlobalSettings(exec, table, groupByCols, maxUniqueVals);
// reset all aggregators in order to use enforce operator creation
for (final ColumnAggregator colAggr : aggregators) {
colAggr.reset();
}
final GroupByTable resultTable;
if (inMemory || groupByCols.isEmpty()) {
resultTable = new MemoryGroupByTable(exec, table, groupByCols, aggregators.toArray(new ColumnAggregator[0]), globalSettings, enableHilite, colNamePolicy, retainOrder);
} else {
resultTable = new BigGroupByTable(exec, table, groupByCols, aggregators.toArray(new ColumnAggregator[0]), globalSettings, enableHilite, colNamePolicy, retainOrder);
}
if (m_enableHilite.getBooleanValue()) {
setHiliteMapping(new DefaultHiLiteMapper(resultTable.getHiliteMapping()));
}
// check for skipped columns
final String warningMsg = resultTable.getSkippedGroupsMessage(3, 3);
if (warningMsg != null) {
setWarningMessage(warningMsg);
LOGGER.info(resultTable.getSkippedGroupsMessage(Integer.MAX_VALUE, Integer.MAX_VALUE));
}
return resultTable;
}
use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.
the class GroupByTable method getWorkingCols.
/**
* @param globalSettings the {@link GlobalSettings}
* @param groupByCols the group by column names
* @param colAggregators the aggregation columns
* @return {@link Set} with the name of all columns to work with
*/
private Set<String> getWorkingCols(final GlobalSettings globalSettings, final List<String> groupByCols, final ColumnAggregator[] colAggregators) {
final Set<String> colNames = new LinkedHashSet<>(groupByCols);
for (final ColumnAggregator aggr : colAggregators) {
colNames.add(aggr.getOriginalColName());
final Collection<String> addColNames = aggr.getOperator(globalSettings).getAdditionalColumnNames();
if (addColNames != null && !addColNames.isEmpty()) {
colNames.addAll(addColNames);
}
}
return colNames;
}
use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.
the class MemoryGroupByTable method createResultTable.
private BufferedDataTable createResultTable(final ExecutionContext exec, final DataTableSpec resultSpec) throws CanceledExecutionException {
final BufferedDataContainer dc = exec.createDataContainer(resultSpec);
int groupCounter = 0;
final int size = m_vals.size();
for (final Entry<GroupKey, ColumnAggregator[]> entry : m_vals.entrySet()) {
exec.checkCanceled();
exec.setProgress(groupCounter / (double) size, "Writing group " + groupCounter + " of " + size);
final GroupKey groupVals = entry.getKey();
final ColumnAggregator[] colAggregators = entry.getValue();
final RowKey rowKey = RowKey.createRowKey(groupCounter++);
final DataCell[] rowVals = new DataCell[groupVals.size() + colAggregators.length];
// add the group values first
int valIdx = 0;
for (final DataCell groupCell : groupVals.getGroupVals()) {
rowVals[valIdx++] = groupCell;
}
// add the aggregation values
for (final ColumnAggregator colAggr : colAggregators) {
final AggregationOperator operator = colAggr.getOperator(getGlobalSettings());
rowVals[valIdx++] = operator.getResult();
if (operator.isSkipped()) {
// add skipped groups and the column that causes the skipping
// into the skipped groups map
addSkippedGroup(colAggr.getOriginalColName(), operator.getSkipMessage(), groupVals.getGroupVals());
}
// reset the operator for the next group
operator.reset();
}
final DataRow newRow = new DefaultRow(rowKey, rowVals);
dc.addRowToTable(newRow);
// add hilite mappings if enabled
if (isEnableHilite()) {
final Set<RowKey> oldKeys = m_rowKeys.get(groupVals);
addHiliteMapping(rowKey, oldKeys);
}
}
dc.close();
return dc.getTable();
}
Aggregations