use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.
the class NumericOutliersIntervalsCalculator method getAggretators.
/**
* Creates column aggregators for each of the outlier columns.
*
* @param inSpec the input data table spec
* @param gSettings the global settings
* @return an array of column aggregators
*/
private ColumnAggregator[] getAggretators(final DataTableSpec inSpec, final GlobalSettings gSettings) {
final ColumnAggregator[] aggregators = new ColumnAggregator[m_outlierColNames.length * 2];
int pos = 0;
// for each outlier column name create the aggregators
for (final String outlierColName : m_outlierColNames) {
// the operator column settings
final OperatorColumnSettings cSettings = new OperatorColumnSettings(INCL_MISSING_CELLS, inSpec.getColumnSpec(outlierColName));
// setting
for (final double percentile : PERCENTILES) {
final AggregationMethod method;
if (m_useHeuristic) {
method = new PSquarePercentileOperator(gSettings, cSettings, 100 * percentile);
} else {
method = new QuantileOperator(new OperatorData("Quantile", true, false, DoubleValue.class, INCL_MISSING_CELLS), gSettings, cSettings, percentile, m_estimationType.name());
}
aggregators[pos++] = new ColumnAggregator(cSettings.getOriginalColSpec(), method);
}
}
// return the aggregators
return aggregators;
}
use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.
the class MovingAggregationNodeModel method configure.
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
@Override
protected DataTableSpec[] configure(final DataTableSpec[] inSpecs) throws InvalidSettingsException {
if (inSpecs == null || inSpecs.length != 1) {
throw new InvalidSettingsException("No input table specification available");
}
final DataTableSpec inputSpec = inSpecs[0];
m_columnAggregators2Use.clear();
final ArrayList<ColumnAggregator> invalidColAggrs = new ArrayList<>(1);
m_columnAggregators2Use.addAll(GroupByNodeModel.getAggregators(inputSpec, Collections.EMPTY_LIST, m_columnAggregators, m_patternAggregators, m_dataTypeAggregators, invalidColAggrs));
if (m_columnAggregators2Use.isEmpty()) {
setWarningMessage("No aggregation column defined");
}
if (!invalidColAggrs.isEmpty()) {
setWarningMessage(invalidColAggrs.size() + " invalid aggregation column(s) found.");
}
LOGGER.debug(m_columnAggregators2Use);
final MovingAggregationTableFactory tableFactory = createTableFactory(FileStoreFactory.createNotInWorkflowFileStoreFactory(), inputSpec);
return new DataTableSpec[] { tableFactory.createResultSpec() };
}
use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.
the class CrosstabNodeModel method createGroupByTable.
/**
* Create group-by table.
* @param exec execution context
* @param table input table to group
* @param groupByCols column selected for group-by operation
* @return table with group and aggregation columns
* @throws CanceledExecutionException if the group-by table generation was
* canceled externally
*/
private final GroupByTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final List<String> groupByCols) throws CanceledExecutionException {
final int maxUniqueVals = Integer.MAX_VALUE;
final boolean enableHilite = m_settings.getEnableHiliting();
final boolean retainOrder = false;
final ColumnNamePolicy colNamePolicy = ColumnNamePolicy.AGGREGATION_METHOD_COLUMN_NAME;
final GlobalSettings globalSettings = GlobalSettings.builder().setFileStoreFactory(FileStoreFactory.createWorkflowFileStoreFactory(exec)).setGroupColNames(groupByCols).setMaxUniqueValues(maxUniqueVals).setValueDelimiter(GlobalSettings.STANDARD_DELIMITER).setDataTableSpec(table.getDataTableSpec()).setNoOfRows(table.size()).setAggregationContext(AggregationContext.ROW_AGGREGATION).build();
ColumnAggregator collAggregator = null;
if (null != m_settings.getWeightColumn()) {
final String weightColumn = m_settings.getWeightColumn();
// the column aggregator for the weighting column
final boolean inclMissing = false;
final DataColumnSpec originalColSpec = table.getDataTableSpec().getColumnSpec(weightColumn);
final OperatorColumnSettings opColSettings = new OperatorColumnSettings(inclMissing, originalColSpec);
collAggregator = new ColumnAggregator(originalColSpec, new NonNegativeSumOperator(globalSettings, opColSettings), inclMissing);
} else {
// use any column, does not matter as long as it exists and
// include missing is true;
final boolean inclMissing = true;
final DataColumnSpec originalColSpec = table.getDataTableSpec().getColumnSpec(groupByCols.get(0));
final OperatorColumnSettings opColSettings = new OperatorColumnSettings(inclMissing, originalColSpec);
collAggregator = new ColumnAggregator(originalColSpec, new CountOperator(globalSettings, opColSettings), inclMissing);
}
final GroupByTable resultTable = new BigGroupByTable(exec, table, groupByCols, new ColumnAggregator[] { collAggregator }, globalSettings, enableHilite, colNamePolicy, retainOrder);
if (enableHilite) {
setHiliteMapping(new DefaultHiLiteMapper(resultTable.getHiliteMapping()));
}
// check for skipped columns
final String warningMsg = resultTable.getSkippedGroupsMessage(3, 3);
if (warningMsg != null) {
setWarningMessage(warningMsg);
}
return resultTable;
}
use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.
the class BigGroupByTable method createGroupByTable.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final DataTableSpec resultSpec, final int[] groupColIdx) throws CanceledExecutionException {
LOGGER.debug("Entering createGroupByTable(exec, table) " + "of class BigGroupByTable.");
final DataTableSpec origSpec = table.getDataTableSpec();
// sort the data table in order to process the input table chunk wise
final BufferedDataTable sortedTable;
final ExecutionContext groupExec;
final DataValueComparator[] comparators;
if (groupColIdx.length < 1) {
sortedTable = table;
groupExec = exec;
comparators = new DataValueComparator[0];
} else {
final ExecutionContext sortExec = exec.createSubExecutionContext(0.6);
exec.setMessage("Sorting input table...");
sortedTable = sortTable(sortExec, table, getGroupCols());
sortExec.setProgress(1.0);
groupExec = exec.createSubExecutionContext(0.4);
comparators = new DataValueComparator[groupColIdx.length];
for (int i = 0, length = groupColIdx.length; i < length; i++) {
final DataColumnSpec colSpec = origSpec.getColumnSpec(groupColIdx[i]);
comparators[i] = colSpec.getType().getComparator();
}
}
final BufferedDataContainer dc = exec.createDataContainer(resultSpec);
exec.setMessage("Creating groups");
final DataCell[] previousGroup = new DataCell[groupColIdx.length];
final DataCell[] currentGroup = new DataCell[groupColIdx.length];
final MutableInteger groupCounter = new MutableInteger(0);
boolean firstRow = true;
final double numOfRows = sortedTable.size();
long rowCounter = 0;
// In the rare case that the DataCell comparator return 0 for two
// data cells that are not equal we have to maintain a map with all
// rows with equal cells in the group columns per chunk.
// This variable stores for each chunk these members. A chunk consists
// of rows which return 0 for the pairwise group value comparison.
// Usually only equal data cells return 0 when compared with each other
// but in rare occasions also data cells that are NOT equal return 0 when
// compared to each other
// (such as cells that contain chemical structures).
// In this rare case this map will contain for each group of data cells
// that are pairwise equal in the chunk a separate entry.
final Map<GroupKey, Pair<ColumnAggregator[], Set<RowKey>>> chunkMembers = new LinkedHashMap<>(3);
boolean logUnusualCells = true;
String groupLabel = "";
// cannot put init to the constructor, as the super() constructor directly calls the current function
initMissingValuesMap();
for (final DataRow row : sortedTable) {
// fetch the current group column values
for (int i = 0, length = groupColIdx.length; i < length; i++) {
currentGroup[i] = row.getCell(groupColIdx[i]);
}
if (firstRow) {
groupLabel = createGroupLabelForProgress(currentGroup);
System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
firstRow = false;
}
// group column data cells
if (!sameChunk(comparators, previousGroup, currentGroup)) {
groupLabel = createGroupLabelForProgress(currentGroup);
createTableRows(dc, chunkMembers, groupCounter);
// set the current group as previous group
System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
if (logUnusualCells && chunkMembers.size() > 1) {
// cause the problem
if (LOGGER.isEnabledFor(LEVEL.INFO)) {
final StringBuilder buf = new StringBuilder();
buf.append("Data chunk with ");
buf.append(chunkMembers.size());
buf.append(" members occured in groupby node. " + "Involved classes are: ");
final GroupKey key = chunkMembers.keySet().iterator().next();
for (final DataCell cell : key.getGroupVals()) {
buf.append(cell.getClass().getCanonicalName());
buf.append(", ");
}
LOGGER.info(buf.toString());
}
logUnusualCells = false;
}
// reset the chunk members map
chunkMembers.clear();
}
// process the row as one of the members of the current chunk
Pair<ColumnAggregator[], Set<RowKey>> member = chunkMembers.get(new GroupKey(currentGroup));
if (member == null) {
Set<RowKey> rowKeys;
if (isEnableHilite()) {
rowKeys = new HashSet<>();
} else {
rowKeys = Collections.emptySet();
}
member = new Pair<>(cloneColumnAggregators(), rowKeys);
final DataCell[] groupKeys = new DataCell[currentGroup.length];
System.arraycopy(currentGroup, 0, groupKeys, 0, currentGroup.length);
chunkMembers.put(new GroupKey(groupKeys), member);
}
// compute the current row values
for (final ColumnAggregator colAggr : member.getFirst()) {
final int colIdx = origSpec.findColumnIndex(colAggr.getOriginalColName());
colAggr.getOperator(getGlobalSettings()).compute(row, colIdx);
}
if (isEnableHilite()) {
member.getSecond().add(row.getKey());
}
groupExec.checkCanceled();
groupExec.setProgress(++rowCounter / numOfRows, groupLabel);
}
// create the final row for the last chunk after processing the last
// table row
createTableRows(dc, chunkMembers, groupCounter);
dc.close();
return dc.getTable();
}
use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.
the class GroupByNodeModel method createGroupBySpec.
/**
* Generate table spec based on the input spec and the selected columns
* for grouping.
* @param origSpec original input spec
* @param groupByCols group-by columns
* @return a new table spec containing the group-by and aggregation columns
* @throws InvalidSettingsException if the group-by can't by generated due
* to invalid settings
*/
protected final DataTableSpec createGroupBySpec(final DataTableSpec origSpec, final List<String> groupByCols) throws InvalidSettingsException {
m_columnAggregators2Use.clear();
final ArrayList<ColumnAggregator> invalidColAggrs = new ArrayList<>(1);
m_columnAggregators2Use.addAll(GroupByNodeModel.getAggregators(origSpec, groupByCols, m_columnAggregators, m_patternAggregators, m_dataTypeAggregators, invalidColAggrs));
if (m_columnAggregators2Use.isEmpty()) {
setWarningMessage("No aggregation column defined");
}
if (m_columnAggregators2Use.isEmpty()) {
setWarningMessage("No aggregation column defined");
}
LOGGER.debug(m_columnAggregators2Use);
if (!invalidColAggrs.isEmpty()) {
setWarningMessage(invalidColAggrs.size() + " invalid aggregation column(s) found.");
}
// check for invalid group columns
try {
GroupByTable.checkGroupCols(origSpec, groupByCols);
} catch (final IllegalArgumentException e) {
throw new InvalidSettingsException(e.getMessage());
}
if (origSpec.getNumColumns() > 1 && groupByCols.size() == origSpec.getNumColumns()) {
setWarningMessage("All columns selected as group by column");
}
final ColumnNamePolicy colNamePolicy = ColumnNamePolicy.getPolicy4Label(m_columnNamePolicy.getStringValue());
// of the group columns above!!!
if (groupByCols.isEmpty() && m_columnAggregators2Use.isEmpty()) {
throw new InvalidSettingsException("Please select at least one group or aggregation column");
}
return GroupByTable.createGroupByTableSpec(origSpec, groupByCols, m_columnAggregators2Use.toArray(new ColumnAggregator[0]), colNamePolicy);
}
Aggregations