Search in sources :

Example 26 with ColumnAggregator

use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.

the class MemoryGroupByTable method addRow.

private void addRow(final DataTableSpec spec, final GroupKey groupKey, final DataRow row) {
    ColumnAggregator[] aggregators = m_vals.get(groupKey);
    if (aggregators == null) {
        final ColumnAggregator[] origAggregators = getColAggregators();
        aggregators = new ColumnAggregator[origAggregators.length];
        for (int i = 0, length = origAggregators.length; i < length; i++) {
            aggregators[i] = origAggregators[i].clone();
        }
        m_vals.put(groupKey, aggregators);
    }
    for (final ColumnAggregator aggregator : aggregators) {
        final int colIdx = spec.findColumnIndex(aggregator.getOriginalColName());
        aggregator.getOperator(getGlobalSettings()).compute(row, colIdx);
    }
}
Also used : ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator)

Example 27 with ColumnAggregator

use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.

the class Pivot2NodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    final BufferedDataTable table = (BufferedDataTable) inData[0];
    final List<String> groupAndPivotCols = createAllColumns();
    final BufferedDataTable groupTable;
    final String orderPivotColumnName;
    ExecutionContext groupAndPivotExec = exec.createSubExecutionContext(0.5);
    ExecutionContext groupExec = exec.createSubExecutionContext(0.25);
    ExecutionContext pivotExec = exec.createSubExecutionContext(0.25);
    double progMainTotal = 0.0;
    double progMainTableAppendIndexForSort = isProcessInMemory() || isRetainOrder() ? 1.0 : 0.0;
    progMainTotal += progMainTableAppendIndexForSort;
    double progMainTableGroup = 5.0;
    progMainTotal += progMainTableGroup;
    double progMainTableInMemSort = isProcessInMemory() ? 3.0 : 0.0;
    progMainTotal += progMainTableInMemSort;
    double progMainTableGetPivots = 1.0;
    progMainTotal += progMainTableGetPivots;
    double progMainTableFillPivots = 1.0;
    progMainTotal += progMainTableFillPivots;
    double progMainTableRestoreSort = isProcessInMemory() || isRetainOrder() ? 1.0 : 0.0;
    progMainTotal += progMainTableRestoreSort;
    double progMainTableReplaceRowKey = isProcessInMemory() ? 1.0 : 0.0;
    progMainTotal += progMainTableReplaceRowKey;
    if (isProcessInMemory() || isRetainOrder()) {
        exec.setMessage("Keeping row order");
        final String retainOrderCol = DataTableSpec.getUniqueColumnName(table.getDataTableSpec(), "#pivot_order#");
        // append temp. id column with minimum-aggregation method
        final ColumnAggregator[] colAggregators = getColumnAggregators().toArray(new ColumnAggregator[0]);
        final Set<String> workingCols = new LinkedHashSet<String>();
        workingCols.addAll(groupAndPivotCols);
        for (final ColumnAggregator ca : colAggregators) {
            workingCols.add(ca.getOriginalColName());
        }
        workingCols.add(retainOrderCol);
        final BufferedDataTable appTable = GroupByTable.appendOrderColumn(groupAndPivotExec.createSubExecutionContext(progMainTableAppendIndexForSort / progMainTotal), table, workingCols, retainOrderCol);
        final DataColumnSpec retainOrderColSpec = appTable.getSpec().getColumnSpec(retainOrderCol);
        final ColumnAggregator[] aggrs = new ColumnAggregator[colAggregators.length + 1];
        System.arraycopy(colAggregators, 0, aggrs, 0, colAggregators.length);
        aggrs[colAggregators.length] = new ColumnAggregator(retainOrderColSpec, AggregationMethods.getRowOrderMethod(), true);
        orderPivotColumnName = getColumnNamePolicy().createColumName(aggrs[colAggregators.length]);
        exec.setMessage("Grouping main table");
        final GroupByTable groupByTable = createGroupByTable(groupAndPivotExec.createSubExecutionContext(progMainTableGroup / progMainTotal), appTable, groupAndPivotCols, isProcessInMemory(), false, /* retain order always false; handled by pivoting */
        Arrays.asList(aggrs));
        // true then sort table by group&pivot columns
        if (isProcessInMemory()) {
            exec.setMessage("Sorting group table");
            final boolean[] sortDirection = new boolean[groupAndPivotCols.size()];
            // ensure that missing values are at the end by sorting in ascending order
            Arrays.fill(sortDirection, true);
            final SortedTable sortedGroupByTable = new SortedTable(groupByTable.getBufferedTable(), groupAndPivotCols, sortDirection, groupAndPivotExec.createSubExecutionContext(progMainTableInMemSort / progMainTotal));
            groupTable = sortedGroupByTable.getBufferedDataTable();
        } else {
            groupTable = groupByTable.getBufferedTable();
        }
    } else {
        exec.setMessage("Grouping main table");
        final GroupByTable groupByTable = createGroupByTable(groupAndPivotExec.createSubExecutionContext(progMainTableGroup / progMainTotal), table, groupAndPivotCols, isProcessInMemory(), false, getColumnAggregators());
        groupTable = groupByTable.getBufferedTable();
        orderPivotColumnName = null;
    }
    final List<String> pivotCols = m_pivotCols.getIncludeList();
    final int[] pivotIdx = new int[pivotCols.size()];
    final DataTableSpec groupSpec = groupTable.getSpec();
    final Set<String>[] combPivots = createCombinedPivots(groupSpec, pivotCols);
    for (int i = 0; i < pivotIdx.length; i++) {
        pivotIdx[i] = groupSpec.findColumnIndex(pivotCols.get(i));
    }
    exec.setProgress("Determining pivots...");
    ExecutionContext fillExec = groupAndPivotExec.createSubExecutionContext(progMainTableGetPivots / progMainTotal);
    final long groupTableSize = groupTable.size();
    long groupIndex = 0;
    for (final DataRow row : groupTable) {
        for (int i = 0; i < pivotIdx.length; i++) {
            if (combPivots[i] == null) {
                combPivots[i] = new LinkedHashSet<String>();
            }
            final DataCell cell = row.getCell(pivotIdx[i]);
            if (cell.isMissing()) {
                if (!m_ignoreMissValues.getBooleanValue()) {
                    combPivots[i].add(cell.toString());
                }
            } else {
                combPivots[i].add(cell.toString());
            }
        }
        fillExec.setProgress(groupIndex++ / (double) groupTableSize, String.format("Group \"%s\" (%d/%d)", row.getKey(), groupIndex, groupTableSize));
        fillExec.checkCanceled();
    }
    final Map<String, Integer> pivotStarts = new LinkedHashMap<String, Integer>();
    final DataTableSpec outSpec = createOutSpec(groupSpec, combPivots, pivotStarts, orderPivotColumnName);
    exec.setProgress("Filling pivot table");
    BufferedDataTable pivotTable = fillPivotTable(groupTable, outSpec, pivotStarts, groupAndPivotExec.createSubExecutionContext(progMainTableFillPivots / progMainTotal), orderPivotColumnName);
    if (orderPivotColumnName != null) {
        exec.setMessage("Restoring row order");
        final SortedTable sortedPivotTable = new SortedTable(pivotTable, Arrays.asList(new String[] { orderPivotColumnName }), new boolean[] { true }, groupAndPivotExec.createSubExecutionContext(progMainTableRestoreSort / progMainTotal));
        pivotTable = sortedPivotTable.getBufferedDataTable();
        final ColumnRearranger colre = new ColumnRearranger(pivotTable.getSpec());
        colre.remove(orderPivotColumnName);
        pivotTable = exec.createColumnRearrangeTable(pivotTable, colre, exec.createSilentSubProgress(0.0));
    }
    // temp fix for bug 3286
    if (isProcessInMemory()) {
        // if process in memory is true, RowKey's needs to be re-computed
        final BufferedDataContainer rowkeyBuf = groupAndPivotExec.createSubExecutionContext(progMainTableReplaceRowKey / progMainTotal).createDataContainer(pivotTable.getSpec());
        long rowIndex = 0;
        for (DataRow row : pivotTable) {
            rowkeyBuf.addRowToTable(new DefaultRow(RowKey.createRowKey(rowIndex++), row));
        }
        rowkeyBuf.close();
        pivotTable = rowkeyBuf.getTable();
    }
    groupAndPivotExec.setProgress(1.0);
    /* Fill the 3rd port */
    exec.setMessage("Determining pivot totals");
    double progPivotTotal = 0.0;
    double progPivotGroup = 5.0;
    progPivotTotal += progPivotGroup;
    double progPivotFillMissing = 1.0;
    progPivotTotal += progPivotFillMissing;
    double progPivotFillPivots = 1.0;
    progPivotTotal += progPivotFillPivots;
    double progPivotOverallTotals = m_totalAggregation.getBooleanValue() ? 5.0 : 0.0;
    progPivotTotal += progPivotOverallTotals;
    // create pivot table only on pivot columns (for grouping)
    // perform pivoting: result in single line
    final GroupByTable rowGroup = createGroupByTable(pivotExec.createSubExecutionContext(progPivotGroup / progPivotTotal), table, m_pivotCols.getIncludeList(), isProcessInMemory(), isRetainOrder(), getColumnAggregators());
    final BufferedDataTable rowGroupTable = rowGroup.getBufferedTable();
    // fill group columns with missing cells
    final ColumnRearranger colre = new ColumnRearranger(rowGroupTable.getDataTableSpec());
    for (int i = 0; i < getGroupByColumns().size(); i++) {
        final DataColumnSpec cspec = outSpec.getColumnSpec(i);
        final CellFactory factory = new SingleCellFactory(cspec) {

            /**
             * {@inheritDoc}
             */
            @Override
            public DataCell getCell(final DataRow row) {
                return DataType.getMissingCell();
            }
        };
        colre.insertAt(i, factory);
    }
    final BufferedDataTable groupedRowTable = exec.createColumnRearrangeTable(rowGroupTable, colre, pivotExec.createSubExecutionContext(progPivotFillMissing / progPivotTotal));
    BufferedDataTable pivotRowsTable = fillPivotTable(groupedRowTable, outSpec, pivotStarts, pivotExec.createSubExecutionContext(progPivotFillPivots / progPivotTotal), null);
    if (orderPivotColumnName != null) {
        final ColumnRearranger colre2 = new ColumnRearranger(pivotRowsTable.getSpec());
        colre2.remove(orderPivotColumnName);
        pivotRowsTable = exec.createColumnRearrangeTable(pivotRowsTable, colre2, exec.createSilentSubProgress(0.0));
    }
    // total aggregation without grouping
    if (m_totalAggregation.getBooleanValue()) {
        @SuppressWarnings("unchecked") final GroupByTable totalGroup = createGroupByTable(pivotExec.createSubExecutionContext(progPivotOverallTotals / progPivotTotal), table, Collections.EMPTY_LIST, isProcessInMemory(), isRetainOrder(), getColumnAggregators());
        final BufferedDataTable totalGroupTable = totalGroup.getBufferedTable();
        final DataTableSpec pivotsRowsSpec = pivotRowsTable.getSpec();
        final DataTableSpec totalGroupSpec = totalGroupTable.getSpec();
        final DataTableSpec overallTotalSpec = new DataTableSpec(pivotsRowsSpec, totalGroupSpec);
        final BufferedDataContainer buf = exec.createDataContainer(overallTotalSpec);
        if (pivotRowsTable.size() > 0) {
            final List<DataCell> pivotTotalsCells = new ArrayList<DataCell>();
            final DataRow pivotsRow = pivotRowsTable.iterator().next();
            for (final DataCell cell : pivotsRow) {
                pivotTotalsCells.add(cell);
            }
            final DataRow totalGroupRow = totalGroupTable.iterator().next();
            for (final DataCell cell : totalGroupRow) {
                pivotTotalsCells.add(cell);
            }
            buf.addRowToTable(new DefaultRow(new RowKey("Totals"), pivotTotalsCells));
        }
        buf.close();
        pivotRowsTable = buf.getTable();
    }
    pivotExec.setProgress(1.0);
    /* Fill the 2nd port: important to create this last since it will create
         * the final hilite handler (mapping) for port #1 AND #2 (bug 3270) */
    exec.setMessage("Creating group totals");
    // create group table only on group columns; no pivoting
    final BufferedDataTable columnGroupTable = createGroupByTable(groupExec, table, getGroupByColumns()).getBufferedTable();
    return new PortObject[] { // pivot table
    pivotTable, // group totals
    columnGroupTable, // pivot and overall totals
    pivotRowsTable };
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) ArrayList(java.util.ArrayList) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) BufferedDataTable(org.knime.core.node.BufferedDataTable) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) PortObject(org.knime.core.node.port.PortObject) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) SortedTable(org.knime.base.data.sort.SortedTable) GroupByTable(org.knime.base.node.preproc.groupby.GroupByTable) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) CellFactory(org.knime.core.data.container.CellFactory)

Example 28 with ColumnAggregator

use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.

the class BigGroupByTable method createTableRows.

/**
 * Creates and adds the result rows for the members of a data chunk to the
 * given data container. It also handles the row key mapping if hilite
 * translation is enabled.
 *
 * @param dc the {@link DataContainer} to use
 * @param chunkMembers the members of the current data chunk
 * @param groupCounter the number of groups that have been created
 * so fare
 */
private void createTableRows(final BufferedDataContainer dc, final Map<GroupKey, Pair<ColumnAggregator[], Set<RowKey>>> chunkMembers, final MutableInteger groupCounter) {
    if (chunkMembers == null || chunkMembers.isEmpty()) {
        return;
    }
    for (final Entry<GroupKey, Pair<ColumnAggregator[], Set<RowKey>>> e : chunkMembers.entrySet()) {
        final DataCell[] groupVals = e.getKey().getGroupVals();
        final ColumnAggregator[] colAggregators = e.getValue().getFirst();
        final RowKey rowKey = RowKey.createRowKey(groupCounter.intValue());
        groupCounter.inc();
        final DataCell[] rowVals = new DataCell[groupVals.length + colAggregators.length];
        // add the group values first
        int valIdx = 0;
        for (final DataCell groupCell : groupVals) {
            rowVals[valIdx++] = groupCell;
        }
        // add the aggregation values
        for (final ColumnAggregator colAggr : colAggregators) {
            final AggregationOperator operator = colAggr.getOperator(getGlobalSettings());
            rowVals[valIdx++] = operator.getResult();
            if (operator.isSkipped()) {
                // add skipped groups and the column that causes the
                // skipping into the skipped groups map
                addSkippedGroup(colAggr.getOriginalColName(), operator.getSkipMessage(), groupVals);
            }
            m_missingValuesMap.get(colAggr.getOriginalColName()).add(operator.getMissingValuesCount());
        }
        final DataRow newRow = new DefaultRow(rowKey, rowVals);
        dc.addRowToTable(newRow);
        if (isEnableHilite()) {
            final Set<RowKey> oldKeys = e.getValue().getSecond();
            addHiliteMapping(rowKey, oldKeys);
        }
    }
}
Also used : AggregationOperator(org.knime.base.data.aggregation.AggregationOperator) RowKey(org.knime.core.data.RowKey) DataRow(org.knime.core.data.DataRow) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) Pair(org.knime.core.util.Pair)

Example 29 with ColumnAggregator

use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.

the class BigGroupByTable method initMissingValuesMap.

private void initMissingValuesMap() {
    m_missingValuesMap = new HashMap<>();
    ColumnAggregator[] colAggregators = getColAggregators();
    for (ColumnAggregator ca : colAggregators) {
        m_missingValuesMap.put(ca.getOriginalColName(), new MutableLong(0L));
    }
}
Also used : MutableLong(org.apache.commons.lang.mutable.MutableLong) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator)

Example 30 with ColumnAggregator

use of org.knime.base.data.aggregation.ColumnAggregator in project knime-core by knime.

the class GroupByNodeModel method compCreateColumnAggregators.

/**
 * Compatibility method used for compatibility to versions prior Knime 2.0.
 * Method to get the aggregation methods for the versions with only one
 * method for numerical and one for nominal columns.
 *
 * @param spec
 *            the {@link DataTableSpec}
 * @param excludeCols
 *            the name of all columns to be excluded
 * @param numeric
 *            the name of the numerical aggregation method
 * @param nominal
 *            the name of the nominal aggregation method
 * @return {@link Collection} of the {@link ColumnAggregator}s
 */
private static List<ColumnAggregator> compCreateColumnAggregators(final DataTableSpec spec, final List<String> excludeCols, final String numeric, final String nominal) {
    final AggregationMethod numericMethod = AggregationMethods.getMethod4Id(numeric);
    final AggregationMethod nominalMethod = AggregationMethods.getMethod4Id(nominal);
    final Set<String> groupCols = new HashSet<>(excludeCols);
    final List<ColumnAggregator> colAg = new LinkedList<>();
    for (int colIdx = 0, length = spec.getNumColumns(); colIdx < length; colIdx++) {
        final DataColumnSpec colSpec = spec.getColumnSpec(colIdx);
        if (!groupCols.contains(colSpec.getName())) {
            final AggregationMethod method = AggregationMethods.getAggregationMethod(colSpec, numericMethod, nominalMethod);
            colAg.add(new ColumnAggregator(colSpec, method, method.inclMissingCells()));
        }
    }
    return colAg;
}
Also used : AggregationMethod(org.knime.base.data.aggregation.AggregationMethod) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) LinkedList(java.util.LinkedList) HashSet(java.util.HashSet)

Aggregations

ColumnAggregator (org.knime.base.data.aggregation.ColumnAggregator)33 DataColumnSpec (org.knime.core.data.DataColumnSpec)14 HashSet (java.util.HashSet)9 LinkedList (java.util.LinkedList)9 ArrayList (java.util.ArrayList)8 AggregationMethod (org.knime.base.data.aggregation.AggregationMethod)8 SettingsModelFilterString (org.knime.core.node.defaultnodesettings.SettingsModelFilterString)5 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)5 DataCell (org.knime.core.data.DataCell)4 DataRow (org.knime.core.data.DataRow)4 DataTableSpec (org.knime.core.data.DataTableSpec)4 RowKey (org.knime.core.data.RowKey)4 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)4 GlobalSettings (org.knime.base.data.aggregation.GlobalSettings)3 PatternAggregator (org.knime.base.data.aggregation.dialogutil.pattern.PatternAggregator)3 DataTypeAggregator (org.knime.base.data.aggregation.dialogutil.type.DataTypeAggregator)3 GroupByTable (org.knime.base.node.preproc.groupby.GroupByTable)3 DataType (org.knime.core.data.DataType)3 DefaultRow (org.knime.core.data.def.DefaultRow)3 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)3