Search in sources :

Example 1 with ColumnAggregator

use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.

the class NumericOutliersIntervalsCalculator method getAggretators.

/**
 * Creates column aggregators for each of the outlier columns.
 *
 * @param inSpec the input data table spec
 * @param gSettings the global settings
 * @return an array of column aggregators
 */
private ColumnAggregator[] getAggretators(final DataTableSpec inSpec, final GlobalSettings gSettings) {
    final ColumnAggregator[] aggregators = new ColumnAggregator[m_outlierColNames.length * 2];
    int pos = 0;
    // for each outlier column name create the aggregators
    for (final String outlierColName : m_outlierColNames) {
        // the operator column settings
        final OperatorColumnSettings cSettings = new OperatorColumnSettings(INCL_MISSING_CELLS, inSpec.getColumnSpec(outlierColName));
        // setting
        for (final double percentile : PERCENTILES) {
            final AggregationMethod method;
            if (m_useHeuristic) {
                method = new PSquarePercentileOperator(gSettings, cSettings, 100 * percentile);
            } else {
                method = new QuantileOperator(new OperatorData("Quantile", true, false, DoubleValue.class, INCL_MISSING_CELLS), gSettings, cSettings, percentile, m_estimationType.name());
            }
            aggregators[pos++] = new ColumnAggregator(cSettings.getOriginalColSpec(), method);
        }
    }
    // return the aggregators
    return aggregators;
}
Also used : AggregationMethod(org.knime.base.data.aggregation.AggregationMethod) PSquarePercentileOperator(org.knime.base.data.aggregation.numerical.PSquarePercentileOperator) QuantileOperator(org.knime.base.data.aggregation.numerical.QuantileOperator) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) OperatorColumnSettings(org.knime.base.data.aggregation.OperatorColumnSettings) OperatorData(org.knime.base.data.aggregation.OperatorData)

Example 2 with ColumnAggregator

use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.

the class MovingAggregationNodeModel method configure.

/**
 * {@inheritDoc}
 */
@SuppressWarnings("unchecked")
@Override
protected DataTableSpec[] configure(final DataTableSpec[] inSpecs) throws InvalidSettingsException {
    if (inSpecs == null || inSpecs.length != 1) {
        throw new InvalidSettingsException("No input table specification available");
    }
    final DataTableSpec inputSpec = inSpecs[0];
    m_columnAggregators2Use.clear();
    final ArrayList<ColumnAggregator> invalidColAggrs = new ArrayList<>(1);
    m_columnAggregators2Use.addAll(GroupByNodeModel.getAggregators(inputSpec, Collections.EMPTY_LIST, m_columnAggregators, m_patternAggregators, m_dataTypeAggregators, invalidColAggrs));
    if (m_columnAggregators2Use.isEmpty()) {
        setWarningMessage("No aggregation column defined");
    }
    if (!invalidColAggrs.isEmpty()) {
        setWarningMessage(invalidColAggrs.size() + " invalid aggregation column(s) found.");
    }
    LOGGER.debug(m_columnAggregators2Use);
    final MovingAggregationTableFactory tableFactory = createTableFactory(FileStoreFactory.createNotInWorkflowFileStoreFactory(), inputSpec);
    return new DataTableSpec[] { tableFactory.createResultSpec() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) ArrayList(java.util.ArrayList)

Example 3 with ColumnAggregator

use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.

the class CrosstabNodeModel method createGroupByTable.

/**
 * Create group-by table.
 * @param exec execution context
 * @param table input table to group
 * @param groupByCols column selected for group-by operation
 * @return table with group and aggregation columns
 * @throws CanceledExecutionException if the group-by table generation was
 *         canceled externally
 */
private final GroupByTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final List<String> groupByCols) throws CanceledExecutionException {
    final int maxUniqueVals = Integer.MAX_VALUE;
    final boolean enableHilite = m_settings.getEnableHiliting();
    final boolean retainOrder = false;
    final ColumnNamePolicy colNamePolicy = ColumnNamePolicy.AGGREGATION_METHOD_COLUMN_NAME;
    final GlobalSettings globalSettings = GlobalSettings.builder().setFileStoreFactory(FileStoreFactory.createWorkflowFileStoreFactory(exec)).setGroupColNames(groupByCols).setMaxUniqueValues(maxUniqueVals).setValueDelimiter(GlobalSettings.STANDARD_DELIMITER).setDataTableSpec(table.getDataTableSpec()).setNoOfRows(table.size()).setAggregationContext(AggregationContext.ROW_AGGREGATION).build();
    ColumnAggregator collAggregator = null;
    if (null != m_settings.getWeightColumn()) {
        final String weightColumn = m_settings.getWeightColumn();
        // the column aggregator for the weighting column
        final boolean inclMissing = false;
        final DataColumnSpec originalColSpec = table.getDataTableSpec().getColumnSpec(weightColumn);
        final OperatorColumnSettings opColSettings = new OperatorColumnSettings(inclMissing, originalColSpec);
        collAggregator = new ColumnAggregator(originalColSpec, new NonNegativeSumOperator(globalSettings, opColSettings), inclMissing);
    } else {
        // use any column, does not matter as long as it exists and
        // include missing is true;
        final boolean inclMissing = true;
        final DataColumnSpec originalColSpec = table.getDataTableSpec().getColumnSpec(groupByCols.get(0));
        final OperatorColumnSettings opColSettings = new OperatorColumnSettings(inclMissing, originalColSpec);
        collAggregator = new ColumnAggregator(originalColSpec, new CountOperator(globalSettings, opColSettings), inclMissing);
    }
    final GroupByTable resultTable = new BigGroupByTable(exec, table, groupByCols, new ColumnAggregator[] { collAggregator }, globalSettings, enableHilite, colNamePolicy, retainOrder);
    if (enableHilite) {
        setHiliteMapping(new DefaultHiLiteMapper(resultTable.getHiliteMapping()));
    }
    // check for skipped columns
    final String warningMsg = resultTable.getSkippedGroupsMessage(3, 3);
    if (warningMsg != null) {
        setWarningMessage(warningMsg);
    }
    return resultTable;
}
Also used : OperatorColumnSettings(org.knime.base.data.aggregation.OperatorColumnSettings) GlobalSettings(org.knime.base.data.aggregation.GlobalSettings) ColumnNamePolicy(org.knime.base.node.preproc.groupby.ColumnNamePolicy) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) BigGroupByTable(org.knime.base.node.preproc.groupby.BigGroupByTable) GroupByTable(org.knime.base.node.preproc.groupby.GroupByTable) BigGroupByTable(org.knime.base.node.preproc.groupby.BigGroupByTable) DefaultHiLiteMapper(org.knime.core.node.property.hilite.DefaultHiLiteMapper) CountOperator(org.knime.base.data.aggregation.general.CountOperator)

Example 4 with ColumnAggregator

use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.

the class BigGroupByTable method createGroupByTable.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable createGroupByTable(final ExecutionContext exec, final BufferedDataTable table, final DataTableSpec resultSpec, final int[] groupColIdx) throws CanceledExecutionException {
    LOGGER.debug("Entering createGroupByTable(exec, table) " + "of class BigGroupByTable.");
    final DataTableSpec origSpec = table.getDataTableSpec();
    // sort the data table in order to process the input table chunk wise
    final BufferedDataTable sortedTable;
    final ExecutionContext groupExec;
    final DataValueComparator[] comparators;
    if (groupColIdx.length < 1) {
        sortedTable = table;
        groupExec = exec;
        comparators = new DataValueComparator[0];
    } else {
        final ExecutionContext sortExec = exec.createSubExecutionContext(0.6);
        exec.setMessage("Sorting input table...");
        sortedTable = sortTable(sortExec, table, getGroupCols());
        sortExec.setProgress(1.0);
        groupExec = exec.createSubExecutionContext(0.4);
        comparators = new DataValueComparator[groupColIdx.length];
        for (int i = 0, length = groupColIdx.length; i < length; i++) {
            final DataColumnSpec colSpec = origSpec.getColumnSpec(groupColIdx[i]);
            comparators[i] = colSpec.getType().getComparator();
        }
    }
    final BufferedDataContainer dc = exec.createDataContainer(resultSpec);
    exec.setMessage("Creating groups");
    final DataCell[] previousGroup = new DataCell[groupColIdx.length];
    final DataCell[] currentGroup = new DataCell[groupColIdx.length];
    final MutableInteger groupCounter = new MutableInteger(0);
    boolean firstRow = true;
    final double numOfRows = sortedTable.size();
    long rowCounter = 0;
    // In the rare case that the DataCell comparator return 0 for two
    // data cells that are not equal we have to maintain a map with all
    // rows with equal cells in the group columns per chunk.
    // This variable stores for each chunk these members. A chunk consists
    // of rows which return 0 for the pairwise group value comparison.
    // Usually only equal data cells return 0 when compared with each other
    // but in rare occasions also data cells that are NOT equal return 0 when
    // compared to each other
    // (such as cells that contain chemical structures).
    // In this rare case this map will contain for each group of data cells
    // that are pairwise equal in the chunk a separate entry.
    final Map<GroupKey, Pair<ColumnAggregator[], Set<RowKey>>> chunkMembers = new LinkedHashMap<>(3);
    boolean logUnusualCells = true;
    String groupLabel = "";
    // cannot put init to the constructor, as the super() constructor directly calls the current function
    initMissingValuesMap();
    for (final DataRow row : sortedTable) {
        // fetch the current group column values
        for (int i = 0, length = groupColIdx.length; i < length; i++) {
            currentGroup[i] = row.getCell(groupColIdx[i]);
        }
        if (firstRow) {
            groupLabel = createGroupLabelForProgress(currentGroup);
            System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
            firstRow = false;
        }
        // group column data cells
        if (!sameChunk(comparators, previousGroup, currentGroup)) {
            groupLabel = createGroupLabelForProgress(currentGroup);
            createTableRows(dc, chunkMembers, groupCounter);
            // set the current group as previous group
            System.arraycopy(currentGroup, 0, previousGroup, 0, currentGroup.length);
            if (logUnusualCells && chunkMembers.size() > 1) {
                // cause the problem
                if (LOGGER.isEnabledFor(LEVEL.INFO)) {
                    final StringBuilder buf = new StringBuilder();
                    buf.append("Data chunk with ");
                    buf.append(chunkMembers.size());
                    buf.append(" members occured in groupby node. " + "Involved classes are: ");
                    final GroupKey key = chunkMembers.keySet().iterator().next();
                    for (final DataCell cell : key.getGroupVals()) {
                        buf.append(cell.getClass().getCanonicalName());
                        buf.append(", ");
                    }
                    LOGGER.info(buf.toString());
                }
                logUnusualCells = false;
            }
            // reset the chunk members map
            chunkMembers.clear();
        }
        // process the row as one of the members of the current chunk
        Pair<ColumnAggregator[], Set<RowKey>> member = chunkMembers.get(new GroupKey(currentGroup));
        if (member == null) {
            Set<RowKey> rowKeys;
            if (isEnableHilite()) {
                rowKeys = new HashSet<>();
            } else {
                rowKeys = Collections.emptySet();
            }
            member = new Pair<>(cloneColumnAggregators(), rowKeys);
            final DataCell[] groupKeys = new DataCell[currentGroup.length];
            System.arraycopy(currentGroup, 0, groupKeys, 0, currentGroup.length);
            chunkMembers.put(new GroupKey(groupKeys), member);
        }
        // compute the current row values
        for (final ColumnAggregator colAggr : member.getFirst()) {
            final int colIdx = origSpec.findColumnIndex(colAggr.getOriginalColName());
            colAggr.getOperator(getGlobalSettings()).compute(row, colIdx);
        }
        if (isEnableHilite()) {
            member.getSecond().add(row.getKey());
        }
        groupExec.checkCanceled();
        groupExec.setProgress(++rowCounter / numOfRows, groupLabel);
    }
    // create the final row for the last chunk after processing the last
    // table row
    createTableRows(dc, chunkMembers, groupCounter);
    dc.close();
    return dc.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) HashSet(java.util.HashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) DataValueComparator(org.knime.core.data.DataValueComparator) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) Pair(org.knime.core.util.Pair) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) MutableInteger(org.knime.core.util.MutableInteger) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) DataCell(org.knime.core.data.DataCell)

Example 5 with ColumnAggregator

use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.

the class GroupByNodeModel method createGroupBySpec.

/**
 * Generate table spec based on the input spec and the selected columns
 * for grouping.
 * @param origSpec original input spec
 * @param groupByCols group-by columns
 * @return a new table spec containing the group-by and aggregation columns
 * @throws InvalidSettingsException if the group-by can't by generated due
 *         to invalid settings
 */
protected final DataTableSpec createGroupBySpec(final DataTableSpec origSpec, final List<String> groupByCols) throws InvalidSettingsException {
    m_columnAggregators2Use.clear();
    final ArrayList<ColumnAggregator> invalidColAggrs = new ArrayList<>(1);
    m_columnAggregators2Use.addAll(GroupByNodeModel.getAggregators(origSpec, groupByCols, m_columnAggregators, m_patternAggregators, m_dataTypeAggregators, invalidColAggrs));
    if (m_columnAggregators2Use.isEmpty()) {
        setWarningMessage("No aggregation column defined");
    }
    if (m_columnAggregators2Use.isEmpty()) {
        setWarningMessage("No aggregation column defined");
    }
    LOGGER.debug(m_columnAggregators2Use);
    if (!invalidColAggrs.isEmpty()) {
        setWarningMessage(invalidColAggrs.size() + " invalid aggregation column(s) found.");
    }
    // check for invalid group columns
    try {
        GroupByTable.checkGroupCols(origSpec, groupByCols);
    } catch (final IllegalArgumentException e) {
        throw new InvalidSettingsException(e.getMessage());
    }
    if (origSpec.getNumColumns() > 1 && groupByCols.size() == origSpec.getNumColumns()) {
        setWarningMessage("All columns selected as group by column");
    }
    final ColumnNamePolicy colNamePolicy = ColumnNamePolicy.getPolicy4Label(m_columnNamePolicy.getStringValue());
    // of the group columns above!!!
    if (groupByCols.isEmpty() && m_columnAggregators2Use.isEmpty()) {
        throw new InvalidSettingsException("Please select at least one group or aggregation column");
    }
    return GroupByTable.createGroupByTableSpec(origSpec, groupByCols, m_columnAggregators2Use.toArray(new ColumnAggregator[0]), colNamePolicy);
}
Also used : ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) ArrayList(java.util.ArrayList)

Aggregations

ColumnAggregator (org.knime.base.data.aggregation.ColumnAggregator)33 DataColumnSpec (org.knime.core.data.DataColumnSpec)14 HashSet (java.util.HashSet)9 LinkedList (java.util.LinkedList)9 ArrayList (java.util.ArrayList)8 AggregationMethod (org.knime.base.data.aggregation.AggregationMethod)8 SettingsModelFilterString (org.knime.core.node.defaultnodesettings.SettingsModelFilterString)5 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)5 DataCell (org.knime.core.data.DataCell)4 DataRow (org.knime.core.data.DataRow)4 DataTableSpec (org.knime.core.data.DataTableSpec)4 RowKey (org.knime.core.data.RowKey)4 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)4 GlobalSettings (org.knime.base.data.aggregation.GlobalSettings)3 PatternAggregator (org.knime.base.data.aggregation.dialogutil.pattern.PatternAggregator)3 DataTypeAggregator (org.knime.base.data.aggregation.dialogutil.type.DataTypeAggregator)3 GroupByTable (org.knime.base.node.preproc.groupby.GroupByTable)3 DataType (org.knime.core.data.DataType)3 DefaultRow (org.knime.core.data.def.DefaultRow)3 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)3