Search in sources :

Example 11 with SortedTable

use of in project knime-core by knime.

the class BoxPlotNodeModel method execute.

 * {@inheritDoc}
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    if (inData[0] == null) {
        return new BufferedDataTable[] {};
    BufferedDataTable table = inData[0];
    m_statistics = new LinkedHashMap<DataColumnSpec, double[]>();
    m_mildOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    m_extremeOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    int colIdx = 0;
    List<DataColumnSpec> outputColSpecs = new ArrayList<DataColumnSpec>();
    double subProgress = 1.0 / getNumNumericColumns(table.getDataTableSpec());
    for (DataColumnSpec colSpec : table.getDataTableSpec()) {
        ExecutionContext colExec = exec.createSubExecutionContext(subProgress);
        if (colSpec.getType().isCompatible(DoubleValue.class)) {
            double[] statistic = new double[SIZE];
            List<String> col = new ArrayList<String>();
            ExecutionContext sortExec = colExec.createSubExecutionContext(0.75);
            ExecutionContext findExec = colExec.createSubExecutionContext(0.25);
            SortedTable sorted = new SortedTable(table, col, new boolean[] { true }, sortExec);
            long currRowAbsolute = 0;
            int currCountingRow = 1;
            double lastValue = 1;
            long nrOfRows = table.size();
            boolean first = true;
            for (DataRow row : sorted) {
                double rowProgress = currRowAbsolute / (double) table.size();
                findExec.setProgress(rowProgress, "determining statistics for: " + table.getDataTableSpec().getColumnSpec(colIdx).getName());
                if (row.getCell(colIdx).isMissing()) {
                    // asserts that the missing values are sorted at
                    // the top of the table
                // get the first value = actually observed minimum
                if (first) {
                    statistic[MIN] = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                    // initialize the statistics with first value
                    // if the table is large enough it will be overriden
                    // this is just for the case of tables with < 5 rows
                    statistic[MEDIAN] = statistic[MIN];
                    statistic[LOWER_QUARTILE] = statistic[MIN];
                    statistic[UPPER_QUARTILE] = statistic[MIN];
                    first = false;
                // get the last value = actually observed maximum
                if (currRowAbsolute == table.size() - 1) {
                    statistic[MAX] = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                float medianPos = nrOfRows * 0.5f;
                float lowerQuartilePos = nrOfRows * 0.25f;
                float upperQuartilePos = nrOfRows * 0.75f;
                if (currCountingRow == (int) Math.floor(lowerQuartilePos) + 1) {
                    if (lowerQuartilePos % 1 != 0) {
                        // get the row's value
                        statistic[LOWER_QUARTILE] = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                    } else {
                        // calculate the mean between row and last row
                        double value = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                        statistic[LOWER_QUARTILE] = (value + lastValue) / 2;
                if (currCountingRow == (int) Math.floor(medianPos) + 1) {
                    if (medianPos % 1 != 0) {
                        // get the row's value
                        statistic[MEDIAN] = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                    } else {
                        // calculate the mean between row and last row
                        double value = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                        statistic[MEDIAN] = (value + lastValue) / 2;
                if (currCountingRow == (int) Math.floor(upperQuartilePos) + 1) {
                    if (upperQuartilePos % 1 != 0) {
                        // get the row's value
                        statistic[UPPER_QUARTILE] = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                    } else {
                        // calculate the mean between row and last row
                        double value = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                        statistic[UPPER_QUARTILE] = (value + lastValue) / 2;
                lastValue = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
            double iqr = statistic[UPPER_QUARTILE] - statistic[LOWER_QUARTILE];
            Map<Double, Set<RowKey>> mild = new LinkedHashMap<Double, Set<RowKey>>();
            Map<Double, Set<RowKey>> extreme = new LinkedHashMap<Double, Set<RowKey>>();
            // per default the whiskers are at min and max
            double[] whiskers = new double[] { statistic[MIN], statistic[MAX] };
            if (statistic[MIN] < (statistic[LOWER_QUARTILE] - (1.5 * iqr)) || statistic[MAX] > statistic[UPPER_QUARTILE] + (1.5 * iqr)) {
                detectOutliers(sorted, iqr, new double[] { statistic[LOWER_QUARTILE], statistic[UPPER_QUARTILE] }, mild, extreme, whiskers, colIdx);
            statistic[LOWER_WHISKER] = whiskers[0];
            statistic[UPPER_WHISKER] = whiskers[1];
            m_mildOutliers.put(colSpec.getName(), mild);
            m_extremeOutliers.put(colSpec.getName(), extreme);
            m_statistics.put(colSpec, statistic);
    DataContainer container = createOutputTable(exec, outputColSpecs);
    // return a data array with just one row but with the data table spec
    // for the column selection panel
    m_array = new DefaultDataArray(table, 1, 2);
    return new BufferedDataTable[] { exec.createBufferedDataTable(container.getTable(), exec) };
Also used : HashSet(java.util.HashSet) Set(java.util.Set) RowKey( DefaultDataArray(org.knime.base.node.util.DefaultDataArray) ArrayList(java.util.ArrayList) DataRow( LinkedHashMap(java.util.LinkedHashMap) DataContainer( DataColumnSpec( BufferedDataTable(org.knime.core.node.BufferedDataTable) ExecutionContext(org.knime.core.node.ExecutionContext) DoubleValue( SortedTable( LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 12 with SortedTable

use of in project knime-core by knime.

the class BoxplotCalculator method calculateMultipleConditional.

 * Calculates statistics for a conditional box plot.
 * @param table the data table
 * @param catCol the column with the category values
 * @param numCol the numeric column
 * @param exec an execution context
 * @return A linked hash map with BoxplotStatistics for each category
 * @throws CanceledExecutionException when the user cancels the execution
 * @throws InvalidSettingsException when the category column has no domain values
public LinkedHashMap<String, LinkedHashMap<String, BoxplotStatistics>> calculateMultipleConditional(final BufferedDataTable table, final String catCol, final String[] numCol, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
    DataTableSpec spec = table.getSpec();
    int catColIdx = spec.findColumnIndex(catCol);
    int[] numColIdxs = new int[numCol.length];
    for (int i = 0; i < numCol.length; i++) {
        numColIdxs[i] = spec.findColumnIndex(numCol[i]);
    Set<DataCell> valuesSet = spec.getColumnSpec(catColIdx).getDomain().getValues();
    if (valuesSet == null) {
        throw new InvalidSettingsException("Selected category column has no domain values");
    ArrayList<DataCell> vals = new ArrayList<>(valuesSet);
    Collections.sort(vals, new Comparator<DataCell>() {

        public int compare(final DataCell o1, final DataCell o2) {
            return o1.toString().compareTo(o2.toString());
    // add Missing values class as it is never in specification
    vals.add(new MissingCell(null));
    // we need to have clear names, otherwise Missing values class will be taken as "?"
    ArrayList<String> catNames = new ArrayList<>(vals.size());
    for (DataCell cell : vals) {
        catNames.add(cell.isMissing() ? MISSING_VALUES_CLASS : cell.toString());
    LinkedHashMap<String, LinkedHashMap<String, DataContainer>> containers = new LinkedHashMap<>();
    m_ignoredMissVals = new LinkedHashMap<>();
    for (int i = 0; i < numCol.length; i++) {
        LinkedHashMap<String, DataContainer> map = new LinkedHashMap<>();
        LinkedHashMap<String, Long> missValMap = new LinkedHashMap<>();
        for (DataCell c : vals) {
            String name = c.isMissing() ? MISSING_VALUES_CLASS : c.toString();
            map.put(name, exec.createDataContainer(new DataTableSpec(new String[] { "col" }, new DataType[] { DoubleCell.TYPE })));
            missValMap.put(name, 0L);
        containers.put(numCol[i], map);
        m_ignoredMissVals.put(numCol[i], missValMap);
    ExecutionContext subExec = exec.createSubExecutionContext(0.7);
    // long[][] ignoredMissVals = new long[numCol.length][vals.size()];  // count missing values per data col per class
    long count = 0;
    final long numOfRows = table.size();
    for (DataRow row : table) {
        subExec.setProgress(count++ / (double) numOfRows);
        DataCell catCell = row.getCell(catColIdx);
        String catName = catCell.isMissing() ? MISSING_VALUES_CLASS : catCell.toString();
        for (int i = 0; i < numCol.length; i++) {
            DataCell cell = row.getCell(numColIdxs[i]);
            if (!cell.isMissing()) {
                containers.get(numCol[i]).get(catName).addRowToTable(new DefaultRow(row.getKey(), cell));
            } else {
                // increment missing values
                LinkedHashMap<String, Long> missValMap = m_ignoredMissVals.get(numCol[i]);
                missValMap.replace(catName, missValMap.get(catName) + 1);
    LinkedHashMap<String, LinkedHashMap<String, BoxplotStatistics>> statsMap = new LinkedHashMap<>();
    excludedClasses = new LinkedHashMap<>();
    List<String> colList = Arrays.asList(numCol);
    ExecutionContext subExec2 = exec.createSubExecutionContext(1.0);
    int count2 = 0;
    for (Entry<String, LinkedHashMap<String, DataContainer>> entry : containers.entrySet()) {
        subExec2.setProgress(count2++ / (double) containers.size());
        LinkedHashMap<String, DataContainer> containers2 = entry.getValue();
        LinkedHashMap<String, BoxplotStatistics> colStats = new LinkedHashMap<String, BoxplotStatistics>();
        String colName = entry.getKey();
        List<String> excludedColClassesList = new ArrayList<>();
        LinkedHashMap<String, Long> ignoredColMissVals = new LinkedHashMap<>();
        for (Entry<String, DataContainer> entry2 : containers2.entrySet()) {
            Set<Outlier> extremeOutliers = new HashSet<Outlier>();
            Set<Outlier> mildOutliers = new HashSet<Outlier>();
            String catName = entry2.getKey();
            BufferedDataTable catTable = (BufferedDataTable) entry2.getValue().getTable();
            LinkedHashMap<String, Long> missValMap = m_ignoredMissVals.get(colName);
            if (catTable.size() == 0) {
                if (!(catName.equals(MISSING_VALUES_CLASS) && missValMap.get(catName) == 0)) {
                    // we should add missing values to this list, only if they were there
            } else {
                if (missValMap.get(catName) == 0) {
            SortedTable st = new SortedTable(catTable, new Comparator<DataRow>() {

                public int compare(final DataRow o1, final DataRow o2) {
                    double d1 = ((DoubleValue) o1.getCell(0)).getDoubleValue();
                    double d2 = ((DoubleValue) o2.getCell(0)).getDoubleValue();
                    if (d1 == d2) {
                        return 0;
                    } else {
                        return d1 < d2 ? -1 : 1;
            }, false, exec);
            double min = 0, max = 0, q1 = 0, q3 = 0, median = 0;
            boolean dq1 = catTable.size() % 4 == 0;
            long q1Idx = catTable.size() / 4;
            boolean dq3 = 3 * catTable.size() % 4 == 0;
            long q3Idx = 3 * catTable.size() / 4;
            boolean dMedian = catTable.size() % 2 == 0;
            long medianIdx = catTable.size() / 2;
            int counter = 0;
            for (DataRow row : st) {
                double val = ((DoubleValue) row.getCell(0)).getDoubleValue();
                if (counter == 0) {
                    min = val;
                if (counter == catTable.size() - 1) {
                    max = val;
                if (counter == q1Idx - 1 && dq1) {
                    q1 = val;
                if (counter == q1Idx || (counter == 0 && st.size() <= 3)) {
                    if (dq1) {
                        q1 = (q1 + val) / 2.0;
                    } else {
                        q1 = val;
                if (counter == medianIdx - 1 && dMedian) {
                    median = val;
                if (counter == medianIdx) {
                    if (dMedian) {
                        median = (median + val) / 2;
                    } else {
                        median = val;
                if (counter == q3Idx - 1 && dq3) {
                    q3 = val;
                if (counter == q3Idx || (counter == st.size() - 1 && st.size() <= 3)) {
                    if (dq3) {
                        q3 = (q3 + val) / 2.0;
                    } else {
                        q3 = val;
            double iqr = q3 - q1;
            double lowerWhisker = min;
            double upperWhisker = max;
            double upperWhiskerFence = q3 + (1.5 * iqr);
            double lowerWhiskerFence = q1 - (1.5 * iqr);
            double lowerFence = q1 - (3 * iqr);
            double upperFence = q3 + (3 * iqr);
            for (DataRow row : st) {
                double value = ((DoubleValue) row.getCell(0)).getDoubleValue();
                String rowKey = row.getKey().getString();
                if (value < lowerFence) {
                    extremeOutliers.add(new Outlier(value, rowKey));
                } else if (value < lowerWhiskerFence) {
                    mildOutliers.add(new Outlier(value, rowKey));
                } else if (lowerWhisker < lowerWhiskerFence && value >= lowerWhiskerFence) {
                    lowerWhisker = value;
                } else if (value <= upperWhiskerFence) {
                    upperWhisker = value;
                } else if (value > upperFence) {
                    extremeOutliers.add(new Outlier(value, rowKey));
                } else if (value > upperWhiskerFence) {
                    mildOutliers.add(new Outlier(value, rowKey));
            colStats.put(catName, new BoxplotStatistics(mildOutliers, extremeOutliers, min, max, lowerWhisker, q1, median, q3, upperWhisker));
        statsMap.put(colName, colStats);
        // missing values part
        String[] excludedColClasses = excludedColClassesList.toArray(new String[excludedColClassesList.size()]);
        excludedClasses.put(colName, excludedColClasses);
    return statsMap;
Also used : DataTableSpec( ArrayList(java.util.ArrayList) DataRow( LinkedHashMap(java.util.LinkedHashMap) DataContainer( BufferedDataTable(org.knime.core.node.BufferedDataTable) HashSet(java.util.HashSet) ExecutionContext(org.knime.core.node.ExecutionContext) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) MissingCell( DoubleValue( SortedTable( DataCell( DefaultRow(

Example 13 with SortedTable

use of in project knime-core by knime.

the class GroupByTable method sortTable.

 * @param exec {@link ExecutionContext}
 * @param table2sort the {@link BufferedDataTable} to sort
 * @param sortCols the columns to sort by
 * @return the sorted {@link BufferedDataTable}
 * @throws CanceledExecutionException if the operation has been canceled
 * @since 2.7
public static BufferedDataTable sortTable(final ExecutionContext exec, final BufferedDataTable table2sort, final List<String> sortCols) throws CanceledExecutionException {
    if (sortCols.isEmpty()) {
        return table2sort;
    final boolean[] sortOrder = new boolean[sortCols.size()];
    for (int i = 0, length = sortOrder.length; i < length; i++) {
        sortOrder[i] = true;
    final SortedTable sortedTabel = new SortedTable(table2sort, sortCols, sortOrder, exec);
    return sortedTabel.getBufferedDataTable();
Also used : SortedTable(

Example 14 with SortedTable

use of in project knime-core by knime.

the class Joiner method computeJoinTable.

 * Joins the <code>leftTable</code> and the <code>rightTable</code>.
 * @param leftTable The left input table.
 * @param rightTable The right input table.
 * @param exec The Execution monitor for this execution.
 * @return The joined table.
 * @throws CanceledExecutionException when execution is canceled
 * @throws InvalidSettingsException when inconsistent settings are provided
public BufferedDataTable computeJoinTable(final BufferedDataTable leftTable, final BufferedDataTable rightTable, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
    // This does some input data checking, too
    DataTableSpec joinedTableSpec = createSpec(new DataTableSpec[] { leftTable.getDataTableSpec(), rightTable.getDataTableSpec() });
    if (m_settings.getDuplicateHandling().equals(DuplicateHandling.Filter)) {
        List<String> leftCols = getLeftIncluded(leftTable.getDataTableSpec());
        List<String> rightCols = getRightIncluded(rightTable.getDataTableSpec());
        List<String> duplicates = new ArrayList<String>();
        // Check if duplicated columns have identical data
        compareDuplicates(leftTable, rightTable, duplicates);
    BufferedDataTable outerTable = rightTable;
    BufferedDataTable innerTable = leftTable;
    m_retainRight = JoinMode.RightOuterJoin.equals(m_settings.getJoinMode()) || JoinMode.FullOuterJoin.equals(m_settings.getJoinMode());
    m_retainLeft = JoinMode.LeftOuterJoin.equals(m_settings.getJoinMode()) || JoinMode.FullOuterJoin.equals(m_settings.getJoinMode());
    // if multipleMatchCanOccur is true, to rows can be match more than
    // once. This is in general met with the MatchAny Option but only if
    // there are more than one join column.
    m_matchAny = m_settings.getCompositionMode().equals(CompositionMode.MatchAny) && m_settings.getLeftJoinColumns().length > 1;
    if (m_retainLeft && m_matchAny) {
        m_globalLeftOuterJoins = new HashSet<Integer>();
        for (int i = 0; i < leftTable.getRowCount(); i++) {
    m_inputDataRowSettings = createInputDataRowSettings(leftTable, rightTable);
    int[] rightSurvivors = getIndicesOf(rightTable, m_rightSurvivors);
    m_outputDataRowSettings = new OutputRow.Settings(rightTable.getDataTableSpec(), rightSurvivors);
    /* numBits -> numPartitions
         * 0 -> 1
         * 1 -> 2
         * 2 -> 4
         * 3 -> 8
         * 4 -> 16
         * 5 -> 32
         * 6 -> 64
         * 7 -> 128
    m_numBits = m_numBitsInitial;
    int numPartitions = 0x0001 << m_numBits;
    m_bitMask = 0;
    for (int i = 0; i < m_numBits; i++) {
        m_bitMask += 0x0001 << i;
    Set<Integer> pendingParts = new TreeSet<Integer>();
    for (int i = 0; i < numPartitions; i++) {
    JoinContainer joinCont = new JoinContainer(m_outputDataRowSettings);
    double[] progressIntervals = new double[] { 0.6, 0.2, 0.2 };
    while (pendingParts.size() > 0) {
        Collection<Integer> processedParts = performJoin(innerTable, outerTable, joinCont, pendingParts, exec, progressIntervals[0]);
    if (m_retainLeft && m_matchAny) {
        // Add left outer joins
        int c = 0;
        for (Integer index : m_globalLeftOuterJoins) {
            DataRow outRow = OutputRow.createDataRow(c, index, -1, m_outputDataRowSettings);
            joinCont.addLeftOuter(outRow, exec);
    // numbers are needed to report progress more precisely
    long totalNumJoins = joinCont.getRowCount();
    long numMatches = null != joinCont.getMatches() ? joinCont.getMatches().size() : 0;
    long numLeftOuter = null != joinCont.getLeftOuter() ? joinCont.getLeftOuter().size() : 0;
    long numRightOuter = null != joinCont.getRightOuter() ? joinCont.getRightOuter().size() : 0;
    exec.setMessage("Sort Joined Partitions");
    Comparator<DataRow> joinComp = OutputRow.createRowComparator();
    SortedTable matches = null != joinCont.getMatches() ? new SortedTable(joinCont.getMatches(), joinComp, false, exec.createSubExecutionContext(progressIntervals[1] * numMatches / totalNumJoins)) : null;
    SortedTable leftOuter = null != joinCont.getLeftOuter() ? new SortedTable(joinCont.getLeftOuter(), joinComp, false, exec.createSubExecutionContext(progressIntervals[1] * numLeftOuter / totalNumJoins)) : null;
    SortedTable rightOuter = null != joinCont.getRightOuter() ? new SortedTable(joinCont.getRightOuter(), joinComp, false, exec.createSubExecutionContext(progressIntervals[1] * numRightOuter / totalNumJoins)) : null;
    exec.setMessage("Merge Joined Partitions");
    // Build sorted table
    int[] leftSurvivors = getIndicesOf(leftTable, m_leftSurvivors);
    DataHiliteOutputContainer oc = new DataHiliteOutputContainer(joinedTableSpec, m_settings.getEnableHiLite(), leftTable, leftSurvivors, rightSurvivors, createRowKeyFactory(leftTable, rightTable));
    oc.addTableAndFilterDuplicates(matches, exec.createSubExecutionContext(progressIntervals[2] * numMatches / totalNumJoins));
    oc.addTableAndFilterDuplicates(leftOuter, exec.createSubExecutionContext(progressIntervals[2] * numLeftOuter / totalNumJoins));
    oc.addTableAndFilterDuplicates(rightOuter, exec.createSubExecutionContext(progressIntervals[2] * numRightOuter / totalNumJoins));
    m_leftRowKeyMap = oc.getLeftRowKeyMap();
    m_rightRowKeyMap = oc.getRightRowKeyMap();
    return oc.getTable();
Also used : DataTableSpec( ArrayList(java.util.ArrayList) DataRow( TreeSet(java.util.TreeSet) SortedTable( BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 15 with SortedTable

use of in project knime-core by knime.

the class Pivot2NodeModel method execute.

 * {@inheritDoc}
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    final BufferedDataTable table = (BufferedDataTable) inData[0];
    final List<String> groupAndPivotCols = createAllColumns();
    final BufferedDataTable groupTable;
    final String orderPivotColumnName;
    ExecutionContext groupAndPivotExec = exec.createSubExecutionContext(0.5);
    ExecutionContext groupExec = exec.createSubExecutionContext(0.25);
    ExecutionContext pivotExec = exec.createSubExecutionContext(0.25);
    double progMainTotal = 0.0;
    double progMainTableAppendIndexForSort = isProcessInMemory() || isRetainOrder() ? 1.0 : 0.0;
    progMainTotal += progMainTableAppendIndexForSort;
    double progMainTableGroup = 5.0;
    progMainTotal += progMainTableGroup;
    double progMainTableInMemSort = isProcessInMemory() ? 3.0 : 0.0;
    progMainTotal += progMainTableInMemSort;
    double progMainTableGetPivots = 1.0;
    progMainTotal += progMainTableGetPivots;
    double progMainTableFillPivots = 1.0;
    progMainTotal += progMainTableFillPivots;
    double progMainTableRestoreSort = isProcessInMemory() || isRetainOrder() ? 1.0 : 0.0;
    progMainTotal += progMainTableRestoreSort;
    double progMainTableReplaceRowKey = isProcessInMemory() ? 1.0 : 0.0;
    progMainTotal += progMainTableReplaceRowKey;
    if (isProcessInMemory() || isRetainOrder()) {
        exec.setMessage("Keeping row order");
        final String retainOrderCol = DataTableSpec.getUniqueColumnName(table.getDataTableSpec(), "#pivot_order#");
        // append temp. id column with minimum-aggregation method
        final ColumnAggregator[] colAggregators = getColumnAggregators().toArray(new ColumnAggregator[0]);
        final Set<String> workingCols = new LinkedHashSet<String>();
        for (final ColumnAggregator ca : colAggregators) {
        final BufferedDataTable appTable = GroupByTable.appendOrderColumn(groupAndPivotExec.createSubExecutionContext(progMainTableAppendIndexForSort / progMainTotal), table, workingCols, retainOrderCol);
        final DataColumnSpec retainOrderColSpec = appTable.getSpec().getColumnSpec(retainOrderCol);
        final ColumnAggregator[] aggrs = new ColumnAggregator[colAggregators.length + 1];
        System.arraycopy(colAggregators, 0, aggrs, 0, colAggregators.length);
        aggrs[colAggregators.length] = new ColumnAggregator(retainOrderColSpec, AggregationMethods.getRowOrderMethod(), true);
        orderPivotColumnName = getColumnNamePolicy().createColumName(aggrs[colAggregators.length]);
        exec.setMessage("Grouping main table");
        final GroupByTable groupByTable = createGroupByTable(groupAndPivotExec.createSubExecutionContext(progMainTableGroup / progMainTotal), appTable, groupAndPivotCols, isProcessInMemory(), false, /* retain order always false; handled by pivoting */
        // true then sort table by group&pivot columns
        if (isProcessInMemory()) {
            exec.setMessage("Sorting group table");
            final boolean[] sortDirection = new boolean[groupAndPivotCols.size()];
            // ensure that missing values are at the end by sorting in ascending order
            Arrays.fill(sortDirection, true);
            final SortedTable sortedGroupByTable = new SortedTable(groupByTable.getBufferedTable(), groupAndPivotCols, sortDirection, groupAndPivotExec.createSubExecutionContext(progMainTableInMemSort / progMainTotal));
            groupTable = sortedGroupByTable.getBufferedDataTable();
        } else {
            groupTable = groupByTable.getBufferedTable();
    } else {
        exec.setMessage("Grouping main table");
        final GroupByTable groupByTable = createGroupByTable(groupAndPivotExec.createSubExecutionContext(progMainTableGroup / progMainTotal), table, groupAndPivotCols, isProcessInMemory(), false, getColumnAggregators());
        groupTable = groupByTable.getBufferedTable();
        orderPivotColumnName = null;
    final List<String> pivotCols = m_pivotCols.getIncludeList();
    final int[] pivotIdx = new int[pivotCols.size()];
    final DataTableSpec groupSpec = groupTable.getSpec();
    final Set<String>[] combPivots = createCombinedPivots(groupSpec, pivotCols);
    for (int i = 0; i < pivotIdx.length; i++) {
        pivotIdx[i] = groupSpec.findColumnIndex(pivotCols.get(i));
    exec.setProgress("Determining pivots...");
    ExecutionContext fillExec = groupAndPivotExec.createSubExecutionContext(progMainTableGetPivots / progMainTotal);
    final long groupTableSize = groupTable.size();
    long groupIndex = 0;
    for (final DataRow row : groupTable) {
        for (int i = 0; i < pivotIdx.length; i++) {
            if (combPivots[i] == null) {
                combPivots[i] = new LinkedHashSet<String>();
            final DataCell cell = row.getCell(pivotIdx[i]);
            if (cell.isMissing()) {
                if (!m_ignoreMissValues.getBooleanValue()) {
            } else {
        fillExec.setProgress(groupIndex++ / (double) groupTableSize, String.format("Group \"%s\" (%d/%d)", row.getKey(), groupIndex, groupTableSize));
    final Map<String, Integer> pivotStarts = new LinkedHashMap<String, Integer>();
    final DataTableSpec outSpec = createOutSpec(groupSpec, combPivots, pivotStarts, orderPivotColumnName);
    exec.setProgress("Filling pivot table");
    BufferedDataTable pivotTable = fillPivotTable(groupTable, outSpec, pivotStarts, groupAndPivotExec.createSubExecutionContext(progMainTableFillPivots / progMainTotal), orderPivotColumnName);
    if (orderPivotColumnName != null) {
        exec.setMessage("Restoring row order");
        final SortedTable sortedPivotTable = new SortedTable(pivotTable, Arrays.asList(new String[] { orderPivotColumnName }), new boolean[] { true }, groupAndPivotExec.createSubExecutionContext(progMainTableRestoreSort / progMainTotal));
        pivotTable = sortedPivotTable.getBufferedDataTable();
        final ColumnRearranger colre = new ColumnRearranger(pivotTable.getSpec());
        pivotTable = exec.createColumnRearrangeTable(pivotTable, colre, exec.createSilentSubProgress(0.0));
    // temp fix for bug 3286
    if (isProcessInMemory()) {
        // if process in memory is true, RowKey's needs to be re-computed
        final BufferedDataContainer rowkeyBuf = groupAndPivotExec.createSubExecutionContext(progMainTableReplaceRowKey / progMainTotal).createDataContainer(pivotTable.getSpec());
        long rowIndex = 0;
        for (DataRow row : pivotTable) {
            rowkeyBuf.addRowToTable(new DefaultRow(RowKey.createRowKey(rowIndex++), row));
        pivotTable = rowkeyBuf.getTable();
    /* Fill the 3rd port */
    exec.setMessage("Determining pivot totals");
    double progPivotTotal = 0.0;
    double progPivotGroup = 5.0;
    progPivotTotal += progPivotGroup;
    double progPivotFillMissing = 1.0;
    progPivotTotal += progPivotFillMissing;
    double progPivotFillPivots = 1.0;
    progPivotTotal += progPivotFillPivots;
    double progPivotOverallTotals = m_totalAggregation.getBooleanValue() ? 5.0 : 0.0;
    progPivotTotal += progPivotOverallTotals;
    // create pivot table only on pivot columns (for grouping)
    // perform pivoting: result in single line
    final GroupByTable rowGroup = createGroupByTable(pivotExec.createSubExecutionContext(progPivotGroup / progPivotTotal), table, m_pivotCols.getIncludeList(), isProcessInMemory(), isRetainOrder(), getColumnAggregators());
    final BufferedDataTable rowGroupTable = rowGroup.getBufferedTable();
    // fill group columns with missing cells
    final ColumnRearranger colre = new ColumnRearranger(rowGroupTable.getDataTableSpec());
    for (int i = 0; i < getGroupByColumns().size(); i++) {
        final DataColumnSpec cspec = outSpec.getColumnSpec(i);
        final CellFactory factory = new SingleCellFactory(cspec) {

             * {@inheritDoc}
            public DataCell getCell(final DataRow row) {
                return DataType.getMissingCell();
        colre.insertAt(i, factory);
    final BufferedDataTable groupedRowTable = exec.createColumnRearrangeTable(rowGroupTable, colre, pivotExec.createSubExecutionContext(progPivotFillMissing / progPivotTotal));
    BufferedDataTable pivotRowsTable = fillPivotTable(groupedRowTable, outSpec, pivotStarts, pivotExec.createSubExecutionContext(progPivotFillPivots / progPivotTotal), null);
    if (orderPivotColumnName != null) {
        final ColumnRearranger colre2 = new ColumnRearranger(pivotRowsTable.getSpec());
        pivotRowsTable = exec.createColumnRearrangeTable(pivotRowsTable, colre2, exec.createSilentSubProgress(0.0));
    // total aggregation without grouping
    if (m_totalAggregation.getBooleanValue()) {
        @SuppressWarnings("unchecked") final GroupByTable totalGroup = createGroupByTable(pivotExec.createSubExecutionContext(progPivotOverallTotals / progPivotTotal), table, Collections.EMPTY_LIST, isProcessInMemory(), isRetainOrder(), getColumnAggregators());
        final BufferedDataTable totalGroupTable = totalGroup.getBufferedTable();
        final DataTableSpec pivotsRowsSpec = pivotRowsTable.getSpec();
        final DataTableSpec totalGroupSpec = totalGroupTable.getSpec();
        final DataTableSpec overallTotalSpec = new DataTableSpec(pivotsRowsSpec, totalGroupSpec);
        final BufferedDataContainer buf = exec.createDataContainer(overallTotalSpec);
        if (pivotRowsTable.size() > 0) {
            final List<DataCell> pivotTotalsCells = new ArrayList<DataCell>();
            final DataRow pivotsRow = pivotRowsTable.iterator().next();
            for (final DataCell cell : pivotsRow) {
            final DataRow totalGroupRow = totalGroupTable.iterator().next();
            for (final DataCell cell : totalGroupRow) {
            buf.addRowToTable(new DefaultRow(new RowKey("Totals"), pivotTotalsCells));
        pivotRowsTable = buf.getTable();
    /* Fill the 2nd port: important to create this last since it will create
         * the final hilite handler (mapping) for port #1 AND #2 (bug 3270) */
    exec.setMessage("Creating group totals");
    // create group table only on group columns; no pivoting
    final BufferedDataTable columnGroupTable = createGroupByTable(groupExec, table, getGroupByColumns()).getBufferedTable();
    return new PortObject[] { // pivot table
    pivotTable, // group totals
    columnGroupTable, // pivot and overall totals
    pivotRowsTable };
Also used : LinkedHashSet(java.util.LinkedHashSet) DataTableSpec( LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) RowKey( ArrayList(java.util.ArrayList) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) DataRow( LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec( ColumnRearranger( BufferedDataTable(org.knime.core.node.BufferedDataTable) SingleCellFactory( PortObject(org.knime.core.node.port.PortObject) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnAggregator( SortedTable( GroupByTable(org.knime.base.node.preproc.groupby.GroupByTable) DataCell( DefaultRow( SingleCellFactory( CellFactory(


SortedTable ( DataRow ( BufferedDataTable (org.knime.core.node.BufferedDataTable)13 DataTableSpec ( ArrayList (java.util.ArrayList)11 DataCell ( ExecutionContext (org.knime.core.node.ExecutionContext)10 DefaultRow ( DataColumnSpec ( DoubleValue ( RowKey ( LinkedHashMap (java.util.LinkedHashMap)7 ColumnRearranger ( DataContainer ( BufferedDataContainer (org.knime.core.node.BufferedDataContainer)5 SettingsModelFilterString (org.knime.core.node.defaultnodesettings.SettingsModelFilterString)5 Map (java.util.Map)4 HashMap (java.util.HashMap)3 HashSet (java.util.HashSet)3 DataColumnSpecCreator (