Examples with SortedTable - org.knime.base.data.sort.SortedTable

Example 11 with SortedTable

use of org.knime.base.data.sort.SortedTable in project knime-core by knime.

the class BoxPlotNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    if (inData[0] == null) {
        return new BufferedDataTable[] {};
    }
    BufferedDataTable table = inData[0];
    m_statistics = new LinkedHashMap<DataColumnSpec, double[]>();
    m_mildOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    m_extremeOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    int colIdx = 0;
    List<DataColumnSpec> outputColSpecs = new ArrayList<DataColumnSpec>();
    double subProgress = 1.0 / getNumNumericColumns(table.getDataTableSpec());
    for (DataColumnSpec colSpec : table.getDataTableSpec()) {
        ExecutionContext colExec = exec.createSubExecutionContext(subProgress);
        exec.checkCanceled();
        if (colSpec.getType().isCompatible(DoubleValue.class)) {
            double[] statistic = new double[SIZE];
            outputColSpecs.add(colSpec);
            List<String> col = new ArrayList<String>();
            col.add(colSpec.getName());
            ExecutionContext sortExec = colExec.createSubExecutionContext(0.75);
            ExecutionContext findExec = colExec.createSubExecutionContext(0.25);
            SortedTable sorted = new SortedTable(table, col, new boolean[] { true }, sortExec);
            long currRowAbsolute = 0;
            int currCountingRow = 1;
            double lastValue = 1;
            long nrOfRows = table.size();
            boolean first = true;
            for (DataRow row : sorted) {
                exec.checkCanceled();
                double rowProgress = currRowAbsolute / (double) table.size();
                findExec.setProgress(rowProgress, "determining statistics for: " + table.getDataTableSpec().getColumnSpec(colIdx).getName());
                if (row.getCell(colIdx).isMissing()) {
                    // asserts that the missing values are sorted at
                    // the top of the table
                    currRowAbsolute++;
                    nrOfRows--;
                    continue;
                }
                // get the first value = actually observed minimum
                if (first) {
                    statistic[MIN] = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                    // initialize the statistics with first value
                    // if the table is large enough it will be overriden
                    // this is just for the case of tables with < 5 rows
                    statistic[MEDIAN] = statistic[MIN];
                    statistic[LOWER_QUARTILE] = statistic[MIN];
                    statistic[UPPER_QUARTILE] = statistic[MIN];
                    first = false;
                }
                // get the last value = actually observed maximum
                if (currRowAbsolute == table.size() - 1) {
                    statistic[MAX] = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                }
                float medianPos = nrOfRows * 0.5f;
                float lowerQuartilePos = nrOfRows * 0.25f;
                float upperQuartilePos = nrOfRows * 0.75f;
                if (currCountingRow == (int) Math.floor(lowerQuartilePos) + 1) {
                    if (lowerQuartilePos % 1 != 0) {
                        // get the row's value
                        statistic[LOWER_QUARTILE] = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                    } else {
                        // calculate the mean between row and last row
                        double value = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                        statistic[LOWER_QUARTILE] = (value + lastValue) / 2;
                    }
                }
                if (currCountingRow == (int) Math.floor(medianPos) + 1) {
                    if (medianPos % 1 != 0) {
                        // get the row's value
                        statistic[MEDIAN] = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                    } else {
                        // calculate the mean between row and last row
                        double value = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                        statistic[MEDIAN] = (value + lastValue) / 2;
                    }
                }
                if (currCountingRow == (int) Math.floor(upperQuartilePos) + 1) {
                    if (upperQuartilePos % 1 != 0) {
                        // get the row's value
                        statistic[UPPER_QUARTILE] = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                    } else {
                        // calculate the mean between row and last row
                        double value = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                        statistic[UPPER_QUARTILE] = (value + lastValue) / 2;
                    }
                }
                lastValue = ((DoubleValue) row.getCell(colIdx)).getDoubleValue();
                currRowAbsolute++;
                currCountingRow++;
            }
            double iqr = statistic[UPPER_QUARTILE] - statistic[LOWER_QUARTILE];
            Map<Double, Set<RowKey>> mild = new LinkedHashMap<Double, Set<RowKey>>();
            Map<Double, Set<RowKey>> extreme = new LinkedHashMap<Double, Set<RowKey>>();
            // per default the whiskers are at min and max
            double[] whiskers = new double[] { statistic[MIN], statistic[MAX] };
            if (statistic[MIN] < (statistic[LOWER_QUARTILE] - (1.5 * iqr)) || statistic[MAX] > statistic[UPPER_QUARTILE] + (1.5 * iqr)) {
                detectOutliers(sorted, iqr, new double[] { statistic[LOWER_QUARTILE], statistic[UPPER_QUARTILE] }, mild, extreme, whiskers, colIdx);
            }
            statistic[LOWER_WHISKER] = whiskers[0];
            statistic[UPPER_WHISKER] = whiskers[1];
            m_mildOutliers.put(colSpec.getName(), mild);
            m_extremeOutliers.put(colSpec.getName(), extreme);
            m_statistics.put(colSpec, statistic);
        }
        colIdx++;
    }
    DataContainer container = createOutputTable(exec, outputColSpecs);
    // return a data array with just one row but with the data table spec
    // for the column selection panel
    m_array = new DefaultDataArray(table, 1, 2);
    return new BufferedDataTable[] { exec.createBufferedDataTable(container.getTable(), exec) };
}

Also used : HashSet(java.util.HashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataContainer(org.knime.core.data.container.DataContainer) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) ExecutionContext(org.knime.core.node.ExecutionContext) DoubleValue(org.knime.core.data.DoubleValue) SortedTable(org.knime.base.data.sort.SortedTable) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 12 with SortedTable

use of org.knime.base.data.sort.SortedTable in project knime-core by knime.

the class BoxplotCalculator method calculateMultipleConditional.

/**
 * Calculates statistics for a conditional box plot.
 * @param table the data table
 * @param catCol the column with the category values
 * @param numCol the numeric column
 * @param exec an execution context
 * @return A linked hash map with BoxplotStatistics for each category
 * @throws CanceledExecutionException when the user cancels the execution
 * @throws InvalidSettingsException when the category column has no domain values
 */
public LinkedHashMap<String, LinkedHashMap<String, BoxplotStatistics>> calculateMultipleConditional(final BufferedDataTable table, final String catCol, final String[] numCol, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
    DataTableSpec spec = table.getSpec();
    int catColIdx = spec.findColumnIndex(catCol);
    int[] numColIdxs = new int[numCol.length];
    for (int i = 0; i < numCol.length; i++) {
        numColIdxs[i] = spec.findColumnIndex(numCol[i]);
    }
    Set<DataCell> valuesSet = spec.getColumnSpec(catColIdx).getDomain().getValues();
    if (valuesSet == null) {
        throw new InvalidSettingsException("Selected category column has no domain values");
    }
    ArrayList<DataCell> vals = new ArrayList<>(valuesSet);
    Collections.sort(vals, new Comparator<DataCell>() {

        @Override
        public int compare(final DataCell o1, final DataCell o2) {
            return o1.toString().compareTo(o2.toString());
        }
    });
    // add Missing values class as it is never in specification
    vals.add(new MissingCell(null));
    // we need to have clear names, otherwise Missing values class will be taken as "?"
    ArrayList<String> catNames = new ArrayList<>(vals.size());
    for (DataCell cell : vals) {
        catNames.add(cell.isMissing() ? MISSING_VALUES_CLASS : cell.toString());
    }
    LinkedHashMap<String, LinkedHashMap<String, DataContainer>> containers = new LinkedHashMap<>();
    m_ignoredMissVals = new LinkedHashMap<>();
    for (int i = 0; i < numCol.length; i++) {
        LinkedHashMap<String, DataContainer> map = new LinkedHashMap<>();
        LinkedHashMap<String, Long> missValMap = new LinkedHashMap<>();
        for (DataCell c : vals) {
            String name = c.isMissing() ? MISSING_VALUES_CLASS : c.toString();
            map.put(name, exec.createDataContainer(new DataTableSpec(new String[] { "col" }, new DataType[] { DoubleCell.TYPE })));
            missValMap.put(name, 0L);
        }
        containers.put(numCol[i], map);
        m_ignoredMissVals.put(numCol[i], missValMap);
    }
    ExecutionContext subExec = exec.createSubExecutionContext(0.7);
    // long[][] ignoredMissVals = new long[numCol.length][vals.size()];  // count missing values per data col per class
    long count = 0;
    final long numOfRows = table.size();
    for (DataRow row : table) {
        exec.checkCanceled();
        subExec.setProgress(count++ / (double) numOfRows);
        DataCell catCell = row.getCell(catColIdx);
        String catName = catCell.isMissing() ? MISSING_VALUES_CLASS : catCell.toString();
        for (int i = 0; i < numCol.length; i++) {
            DataCell cell = row.getCell(numColIdxs[i]);
            if (!cell.isMissing()) {
                containers.get(numCol[i]).get(catName).addRowToTable(new DefaultRow(row.getKey(), cell));
            } else {
                // increment missing values
                LinkedHashMap<String, Long> missValMap = m_ignoredMissVals.get(numCol[i]);
                missValMap.replace(catName, missValMap.get(catName) + 1);
            }
        }
    }
    LinkedHashMap<String, LinkedHashMap<String, BoxplotStatistics>> statsMap = new LinkedHashMap<>();
    excludedClasses = new LinkedHashMap<>();
    List<String> colList = Arrays.asList(numCol);
    ExecutionContext subExec2 = exec.createSubExecutionContext(1.0);
    int count2 = 0;
    for (Entry<String, LinkedHashMap<String, DataContainer>> entry : containers.entrySet()) {
        exec.checkCanceled();
        subExec2.setProgress(count2++ / (double) containers.size());
        LinkedHashMap<String, DataContainer> containers2 = entry.getValue();
        LinkedHashMap<String, BoxplotStatistics> colStats = new LinkedHashMap<String, BoxplotStatistics>();
        String colName = entry.getKey();
        List<String> excludedColClassesList = new ArrayList<>();
        LinkedHashMap<String, Long> ignoredColMissVals = new LinkedHashMap<>();
        for (Entry<String, DataContainer> entry2 : containers2.entrySet()) {
            Set<Outlier> extremeOutliers = new HashSet<Outlier>();
            Set<Outlier> mildOutliers = new HashSet<Outlier>();
            entry2.getValue().close();
            String catName = entry2.getKey();
            BufferedDataTable catTable = (BufferedDataTable) entry2.getValue().getTable();
            LinkedHashMap<String, Long> missValMap = m_ignoredMissVals.get(colName);
            if (catTable.size() == 0) {
                if (!(catName.equals(MISSING_VALUES_CLASS) && missValMap.get(catName) == 0)) {
                    // we should add missing values to this list, only if they were there
                    excludedColClassesList.add(catName);
                }
                missValMap.remove(catName);
                continue;
            } else {
                if (missValMap.get(catName) == 0) {
                    missValMap.remove(catName);
                }
            }
            SortedTable st = new SortedTable(catTable, new Comparator<DataRow>() {

                @Override
                public int compare(final DataRow o1, final DataRow o2) {
                    double d1 = ((DoubleValue) o1.getCell(0)).getDoubleValue();
                    double d2 = ((DoubleValue) o2.getCell(0)).getDoubleValue();
                    if (d1 == d2) {
                        return 0;
                    } else {
                        return d1 < d2 ? -1 : 1;
                    }
                }
            }, false, exec);
            double min = 0, max = 0, q1 = 0, q3 = 0, median = 0;
            boolean dq1 = catTable.size() % 4 == 0;
            long q1Idx = catTable.size() / 4;
            boolean dq3 = 3 * catTable.size() % 4 == 0;
            long q3Idx = 3 * catTable.size() / 4;
            boolean dMedian = catTable.size() % 2 == 0;
            long medianIdx = catTable.size() / 2;
            int counter = 0;
            for (DataRow row : st) {
                double val = ((DoubleValue) row.getCell(0)).getDoubleValue();
                if (counter == 0) {
                    min = val;
                }
                if (counter == catTable.size() - 1) {
                    max = val;
                }
                if (counter == q1Idx - 1 && dq1) {
                    q1 = val;
                }
                if (counter == q1Idx || (counter == 0 && st.size() <= 3)) {
                    if (dq1) {
                        q1 = (q1 + val) / 2.0;
                    } else {
                        q1 = val;
                    }
                }
                if (counter == medianIdx - 1 && dMedian) {
                    median = val;
                }
                if (counter == medianIdx) {
                    if (dMedian) {
                        median = (median + val) / 2;
                    } else {
                        median = val;
                    }
                }
                if (counter == q3Idx - 1 && dq3) {
                    q3 = val;
                }
                if (counter == q3Idx || (counter == st.size() - 1 && st.size() <= 3)) {
                    if (dq3) {
                        q3 = (q3 + val) / 2.0;
                    } else {
                        q3 = val;
                    }
                }
                counter++;
            }
            double iqr = q3 - q1;
            double lowerWhisker = min;
            double upperWhisker = max;
            double upperWhiskerFence = q3 + (1.5 * iqr);
            double lowerWhiskerFence = q1 - (1.5 * iqr);
            double lowerFence = q1 - (3 * iqr);
            double upperFence = q3 + (3 * iqr);
            for (DataRow row : st) {
                double value = ((DoubleValue) row.getCell(0)).getDoubleValue();
                String rowKey = row.getKey().getString();
                if (value < lowerFence) {
                    extremeOutliers.add(new Outlier(value, rowKey));
                } else if (value < lowerWhiskerFence) {
                    mildOutliers.add(new Outlier(value, rowKey));
                } else if (lowerWhisker < lowerWhiskerFence && value >= lowerWhiskerFence) {
                    lowerWhisker = value;
                } else if (value <= upperWhiskerFence) {
                    upperWhisker = value;
                } else if (value > upperFence) {
                    extremeOutliers.add(new Outlier(value, rowKey));
                } else if (value > upperWhiskerFence) {
                    mildOutliers.add(new Outlier(value, rowKey));
                }
            }
            colStats.put(catName, new BoxplotStatistics(mildOutliers, extremeOutliers, min, max, lowerWhisker, q1, median, q3, upperWhisker));
        }
        statsMap.put(colName, colStats);
        // missing values part
        String[] excludedColClasses = excludedColClassesList.toArray(new String[excludedColClassesList.size()]);
        excludedClasses.put(colName, excludedColClasses);
    }
    return statsMap;
}

Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataContainer(org.knime.core.data.container.DataContainer) BufferedDataTable(org.knime.core.node.BufferedDataTable) HashSet(java.util.HashSet) ExecutionContext(org.knime.core.node.ExecutionContext) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) MissingCell(org.knime.core.data.MissingCell) DoubleValue(org.knime.core.data.DoubleValue) SortedTable(org.knime.base.data.sort.SortedTable) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 13 with SortedTable

use of org.knime.base.data.sort.SortedTable in project knime-core by knime.

the class GroupByTable method sortTable.

/**
 * @param exec {@link ExecutionContext}
 * @param table2sort the {@link BufferedDataTable} to sort
 * @param sortCols the columns to sort by
 * @return the sorted {@link BufferedDataTable}
 * @throws CanceledExecutionException if the operation has been canceled
 * @since 2.7
 */
public static BufferedDataTable sortTable(final ExecutionContext exec, final BufferedDataTable table2sort, final List<String> sortCols) throws CanceledExecutionException {
    if (sortCols.isEmpty()) {
        return table2sort;
    }
    final boolean[] sortOrder = new boolean[sortCols.size()];
    for (int i = 0, length = sortOrder.length; i < length; i++) {
        sortOrder[i] = true;
    }
    final SortedTable sortedTabel = new SortedTable(table2sort, sortCols, sortOrder, exec);
    return sortedTabel.getBufferedDataTable();
}

Also used : SortedTable(org.knime.base.data.sort.SortedTable)

Example 14 with SortedTable

use of org.knime.base.data.sort.SortedTable in project knime-core by knime.

the class Joiner method computeJoinTable.

/**
 * Joins the <code>leftTable</code> and the <code>rightTable</code>.
 *
 * @param leftTable The left input table.
 * @param rightTable The right input table.
 * @param exec The Execution monitor for this execution.
 * @return The joined table.
 * @throws CanceledExecutionException when execution is canceled
 * @throws InvalidSettingsException when inconsistent settings are provided
 */
public BufferedDataTable computeJoinTable(final BufferedDataTable leftTable, final BufferedDataTable rightTable, final ExecutionContext exec) throws CanceledExecutionException, InvalidSettingsException {
    m_runtimeWarnings.clear();
    m_leftRowKeyMap.clear();
    m_rightRowKeyMap.clear();
    // This does some input data checking, too
    DataTableSpec joinedTableSpec = createSpec(new DataTableSpec[] { leftTable.getDataTableSpec(), rightTable.getDataTableSpec() });
    if (m_settings.getDuplicateHandling().equals(DuplicateHandling.Filter)) {
        List<String> leftCols = getLeftIncluded(leftTable.getDataTableSpec());
        List<String> rightCols = getRightIncluded(rightTable.getDataTableSpec());
        List<String> duplicates = new ArrayList<String>();
        duplicates.addAll(leftCols);
        duplicates.retainAll(rightCols);
        // Check if duplicated columns have identical data
        compareDuplicates(leftTable, rightTable, duplicates);
    }
    BufferedDataTable outerTable = rightTable;
    BufferedDataTable innerTable = leftTable;
    m_retainRight = JoinMode.RightOuterJoin.equals(m_settings.getJoinMode()) || JoinMode.FullOuterJoin.equals(m_settings.getJoinMode());
    m_retainLeft = JoinMode.LeftOuterJoin.equals(m_settings.getJoinMode()) || JoinMode.FullOuterJoin.equals(m_settings.getJoinMode());
    // if multipleMatchCanOccur is true, to rows can be match more than
    // once. This is in general met with the MatchAny Option but only if
    // there are more than one join column.
    m_matchAny = m_settings.getCompositionMode().equals(CompositionMode.MatchAny) && m_settings.getLeftJoinColumns().length > 1;
    if (m_retainLeft && m_matchAny) {
        m_globalLeftOuterJoins = new HashSet<Integer>();
        for (int i = 0; i < leftTable.getRowCount(); i++) {
            m_globalLeftOuterJoins.add(i);
        }
    }
    m_inputDataRowSettings = createInputDataRowSettings(leftTable, rightTable);
    int[] rightSurvivors = getIndicesOf(rightTable, m_rightSurvivors);
    m_outputDataRowSettings = new OutputRow.Settings(rightTable.getDataTableSpec(), rightSurvivors);
    /* numBits -> numPartitions
         * 0 -> 1
         * 1 -> 2
         * 2 -> 4
         * 3 -> 8
         * 4 -> 16
         * 5 -> 32
         * 6 -> 64
         * 7 -> 128
         */
    m_numBits = m_numBitsInitial;
    int numPartitions = 0x0001 << m_numBits;
    m_bitMask = 0;
    for (int i = 0; i < m_numBits; i++) {
        m_bitMask += 0x0001 << i;
    }
    Set<Integer> pendingParts = new TreeSet<Integer>();
    for (int i = 0; i < numPartitions; i++) {
        pendingParts.add(i);
    }
    JoinContainer joinCont = new JoinContainer(m_outputDataRowSettings);
    double[] progressIntervals = new double[] { 0.6, 0.2, 0.2 };
    exec.setProgress(0.0);
    while (pendingParts.size() > 0) {
        Collection<Integer> processedParts = performJoin(innerTable, outerTable, joinCont, pendingParts, exec, progressIntervals[0]);
        pendingParts.removeAll(processedParts);
    }
    if (m_retainLeft && m_matchAny) {
        // Add left outer joins
        int c = 0;
        for (Integer index : m_globalLeftOuterJoins) {
            DataRow outRow = OutputRow.createDataRow(c, index, -1, m_outputDataRowSettings);
            joinCont.addLeftOuter(outRow, exec);
            c++;
        }
    }
    joinCont.close();
    // numbers are needed to report progress more precisely
    long totalNumJoins = joinCont.getRowCount();
    long numMatches = null != joinCont.getMatches() ? joinCont.getMatches().size() : 0;
    long numLeftOuter = null != joinCont.getLeftOuter() ? joinCont.getLeftOuter().size() : 0;
    long numRightOuter = null != joinCont.getRightOuter() ? joinCont.getRightOuter().size() : 0;
    exec.setMessage("Sort Joined Partitions");
    Comparator<DataRow> joinComp = OutputRow.createRowComparator();
    SortedTable matches = null != joinCont.getMatches() ? new SortedTable(joinCont.getMatches(), joinComp, false, exec.createSubExecutionContext(progressIntervals[1] * numMatches / totalNumJoins)) : null;
    SortedTable leftOuter = null != joinCont.getLeftOuter() ? new SortedTable(joinCont.getLeftOuter(), joinComp, false, exec.createSubExecutionContext(progressIntervals[1] * numLeftOuter / totalNumJoins)) : null;
    SortedTable rightOuter = null != joinCont.getRightOuter() ? new SortedTable(joinCont.getRightOuter(), joinComp, false, exec.createSubExecutionContext(progressIntervals[1] * numRightOuter / totalNumJoins)) : null;
    exec.setMessage("Merge Joined Partitions");
    // Build sorted table
    int[] leftSurvivors = getIndicesOf(leftTable, m_leftSurvivors);
    DataHiliteOutputContainer oc = new DataHiliteOutputContainer(joinedTableSpec, m_settings.getEnableHiLite(), leftTable, leftSurvivors, rightSurvivors, createRowKeyFactory(leftTable, rightTable));
    oc.addTableAndFilterDuplicates(matches, exec.createSubExecutionContext(progressIntervals[2] * numMatches / totalNumJoins));
    oc.addTableAndFilterDuplicates(leftOuter, exec.createSubExecutionContext(progressIntervals[2] * numLeftOuter / totalNumJoins));
    oc.addTableAndFilterDuplicates(rightOuter, exec.createSubExecutionContext(progressIntervals[2] * numRightOuter / totalNumJoins));
    oc.close();
    m_leftRowKeyMap = oc.getLeftRowKeyMap();
    m_rightRowKeyMap = oc.getRightRowKeyMap();
    return oc.getTable();
}

Also used : DataTableSpec(org.knime.core.data.DataTableSpec) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) TreeSet(java.util.TreeSet) SortedTable(org.knime.base.data.sort.SortedTable) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 15 with SortedTable

use of org.knime.base.data.sort.SortedTable in project knime-core by knime.

the class Pivot2NodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    final BufferedDataTable table = (BufferedDataTable) inData[0];
    final List<String> groupAndPivotCols = createAllColumns();
    final BufferedDataTable groupTable;
    final String orderPivotColumnName;
    ExecutionContext groupAndPivotExec = exec.createSubExecutionContext(0.5);
    ExecutionContext groupExec = exec.createSubExecutionContext(0.25);
    ExecutionContext pivotExec = exec.createSubExecutionContext(0.25);
    double progMainTotal = 0.0;
    double progMainTableAppendIndexForSort = isProcessInMemory() || isRetainOrder() ? 1.0 : 0.0;
    progMainTotal += progMainTableAppendIndexForSort;
    double progMainTableGroup = 5.0;
    progMainTotal += progMainTableGroup;
    double progMainTableInMemSort = isProcessInMemory() ? 3.0 : 0.0;
    progMainTotal += progMainTableInMemSort;
    double progMainTableGetPivots = 1.0;
    progMainTotal += progMainTableGetPivots;
    double progMainTableFillPivots = 1.0;
    progMainTotal += progMainTableFillPivots;
    double progMainTableRestoreSort = isProcessInMemory() || isRetainOrder() ? 1.0 : 0.0;
    progMainTotal += progMainTableRestoreSort;
    double progMainTableReplaceRowKey = isProcessInMemory() ? 1.0 : 0.0;
    progMainTotal += progMainTableReplaceRowKey;
    if (isProcessInMemory() || isRetainOrder()) {
        exec.setMessage("Keeping row order");
        final String retainOrderCol = DataTableSpec.getUniqueColumnName(table.getDataTableSpec(), "#pivot_order#");
        // append temp. id column with minimum-aggregation method
        final ColumnAggregator[] colAggregators = getColumnAggregators().toArray(new ColumnAggregator[0]);
        final Set<String> workingCols = new LinkedHashSet<String>();
        workingCols.addAll(groupAndPivotCols);
        for (final ColumnAggregator ca : colAggregators) {
            workingCols.add(ca.getOriginalColName());
        }
        workingCols.add(retainOrderCol);
        final BufferedDataTable appTable = GroupByTable.appendOrderColumn(groupAndPivotExec.createSubExecutionContext(progMainTableAppendIndexForSort / progMainTotal), table, workingCols, retainOrderCol);
        final DataColumnSpec retainOrderColSpec = appTable.getSpec().getColumnSpec(retainOrderCol);
        final ColumnAggregator[] aggrs = new ColumnAggregator[colAggregators.length + 1];
        System.arraycopy(colAggregators, 0, aggrs, 0, colAggregators.length);
        aggrs[colAggregators.length] = new ColumnAggregator(retainOrderColSpec, AggregationMethods.getRowOrderMethod(), true);
        orderPivotColumnName = getColumnNamePolicy().createColumName(aggrs[colAggregators.length]);
        exec.setMessage("Grouping main table");
        final GroupByTable groupByTable = createGroupByTable(groupAndPivotExec.createSubExecutionContext(progMainTableGroup / progMainTotal), appTable, groupAndPivotCols, isProcessInMemory(), false, /* retain order always false; handled by pivoting */
        Arrays.asList(aggrs));
        // true then sort table by group&pivot columns
        if (isProcessInMemory()) {
            exec.setMessage("Sorting group table");
            final boolean[] sortDirection = new boolean[groupAndPivotCols.size()];
            // ensure that missing values are at the end by sorting in ascending order
            Arrays.fill(sortDirection, true);
            final SortedTable sortedGroupByTable = new SortedTable(groupByTable.getBufferedTable(), groupAndPivotCols, sortDirection, groupAndPivotExec.createSubExecutionContext(progMainTableInMemSort / progMainTotal));
            groupTable = sortedGroupByTable.getBufferedDataTable();
        } else {
            groupTable = groupByTable.getBufferedTable();
        }
    } else {
        exec.setMessage("Grouping main table");
        final GroupByTable groupByTable = createGroupByTable(groupAndPivotExec.createSubExecutionContext(progMainTableGroup / progMainTotal), table, groupAndPivotCols, isProcessInMemory(), false, getColumnAggregators());
        groupTable = groupByTable.getBufferedTable();
        orderPivotColumnName = null;
    }
    final List<String> pivotCols = m_pivotCols.getIncludeList();
    final int[] pivotIdx = new int[pivotCols.size()];
    final DataTableSpec groupSpec = groupTable.getSpec();
    final Set<String>[] combPivots = createCombinedPivots(groupSpec, pivotCols);
    for (int i = 0; i < pivotIdx.length; i++) {
        pivotIdx[i] = groupSpec.findColumnIndex(pivotCols.get(i));
    }
    exec.setProgress("Determining pivots...");
    ExecutionContext fillExec = groupAndPivotExec.createSubExecutionContext(progMainTableGetPivots / progMainTotal);
    final long groupTableSize = groupTable.size();
    long groupIndex = 0;
    for (final DataRow row : groupTable) {
        for (int i = 0; i < pivotIdx.length; i++) {
            if (combPivots[i] == null) {
                combPivots[i] = new LinkedHashSet<String>();
            }
            final DataCell cell = row.getCell(pivotIdx[i]);
            if (cell.isMissing()) {
                if (!m_ignoreMissValues.getBooleanValue()) {
                    combPivots[i].add(cell.toString());
                }
            } else {
                combPivots[i].add(cell.toString());
            }
        }
        fillExec.setProgress(groupIndex++ / (double) groupTableSize, String.format("Group \"%s\" (%d/%d)", row.getKey(), groupIndex, groupTableSize));
        fillExec.checkCanceled();
    }
    final Map<String, Integer> pivotStarts = new LinkedHashMap<String, Integer>();
    final DataTableSpec outSpec = createOutSpec(groupSpec, combPivots, pivotStarts, orderPivotColumnName);
    exec.setProgress("Filling pivot table");
    BufferedDataTable pivotTable = fillPivotTable(groupTable, outSpec, pivotStarts, groupAndPivotExec.createSubExecutionContext(progMainTableFillPivots / progMainTotal), orderPivotColumnName);
    if (orderPivotColumnName != null) {
        exec.setMessage("Restoring row order");
        final SortedTable sortedPivotTable = new SortedTable(pivotTable, Arrays.asList(new String[] { orderPivotColumnName }), new boolean[] { true }, groupAndPivotExec.createSubExecutionContext(progMainTableRestoreSort / progMainTotal));
        pivotTable = sortedPivotTable.getBufferedDataTable();
        final ColumnRearranger colre = new ColumnRearranger(pivotTable.getSpec());
        colre.remove(orderPivotColumnName);
        pivotTable = exec.createColumnRearrangeTable(pivotTable, colre, exec.createSilentSubProgress(0.0));
    }
    // temp fix for bug 3286
    if (isProcessInMemory()) {
        // if process in memory is true, RowKey's needs to be re-computed
        final BufferedDataContainer rowkeyBuf = groupAndPivotExec.createSubExecutionContext(progMainTableReplaceRowKey / progMainTotal).createDataContainer(pivotTable.getSpec());
        long rowIndex = 0;
        for (DataRow row : pivotTable) {
            rowkeyBuf.addRowToTable(new DefaultRow(RowKey.createRowKey(rowIndex++), row));
        }
        rowkeyBuf.close();
        pivotTable = rowkeyBuf.getTable();
    }
    groupAndPivotExec.setProgress(1.0);
    /* Fill the 3rd port */
    exec.setMessage("Determining pivot totals");
    double progPivotTotal = 0.0;
    double progPivotGroup = 5.0;
    progPivotTotal += progPivotGroup;
    double progPivotFillMissing = 1.0;
    progPivotTotal += progPivotFillMissing;
    double progPivotFillPivots = 1.0;
    progPivotTotal += progPivotFillPivots;
    double progPivotOverallTotals = m_totalAggregation.getBooleanValue() ? 5.0 : 0.0;
    progPivotTotal += progPivotOverallTotals;
    // create pivot table only on pivot columns (for grouping)
    // perform pivoting: result in single line
    final GroupByTable rowGroup = createGroupByTable(pivotExec.createSubExecutionContext(progPivotGroup / progPivotTotal), table, m_pivotCols.getIncludeList(), isProcessInMemory(), isRetainOrder(), getColumnAggregators());
    final BufferedDataTable rowGroupTable = rowGroup.getBufferedTable();
    // fill group columns with missing cells
    final ColumnRearranger colre = new ColumnRearranger(rowGroupTable.getDataTableSpec());
    for (int i = 0; i < getGroupByColumns().size(); i++) {
        final DataColumnSpec cspec = outSpec.getColumnSpec(i);
        final CellFactory factory = new SingleCellFactory(cspec) {

            /**
             * {@inheritDoc}
             */
            @Override
            public DataCell getCell(final DataRow row) {
                return DataType.getMissingCell();
            }
        };
        colre.insertAt(i, factory);
    }
    final BufferedDataTable groupedRowTable = exec.createColumnRearrangeTable(rowGroupTable, colre, pivotExec.createSubExecutionContext(progPivotFillMissing / progPivotTotal));
    BufferedDataTable pivotRowsTable = fillPivotTable(groupedRowTable, outSpec, pivotStarts, pivotExec.createSubExecutionContext(progPivotFillPivots / progPivotTotal), null);
    if (orderPivotColumnName != null) {
        final ColumnRearranger colre2 = new ColumnRearranger(pivotRowsTable.getSpec());
        colre2.remove(orderPivotColumnName);
        pivotRowsTable = exec.createColumnRearrangeTable(pivotRowsTable, colre2, exec.createSilentSubProgress(0.0));
    }
    // total aggregation without grouping
    if (m_totalAggregation.getBooleanValue()) {
        @SuppressWarnings("unchecked") final GroupByTable totalGroup = createGroupByTable(pivotExec.createSubExecutionContext(progPivotOverallTotals / progPivotTotal), table, Collections.EMPTY_LIST, isProcessInMemory(), isRetainOrder(), getColumnAggregators());
        final BufferedDataTable totalGroupTable = totalGroup.getBufferedTable();
        final DataTableSpec pivotsRowsSpec = pivotRowsTable.getSpec();
        final DataTableSpec totalGroupSpec = totalGroupTable.getSpec();
        final DataTableSpec overallTotalSpec = new DataTableSpec(pivotsRowsSpec, totalGroupSpec);
        final BufferedDataContainer buf = exec.createDataContainer(overallTotalSpec);
        if (pivotRowsTable.size() > 0) {
            final List<DataCell> pivotTotalsCells = new ArrayList<DataCell>();
            final DataRow pivotsRow = pivotRowsTable.iterator().next();
            for (final DataCell cell : pivotsRow) {
                pivotTotalsCells.add(cell);
            }
            final DataRow totalGroupRow = totalGroupTable.iterator().next();
            for (final DataCell cell : totalGroupRow) {
                pivotTotalsCells.add(cell);
            }
            buf.addRowToTable(new DefaultRow(new RowKey("Totals"), pivotTotalsCells));
        }
        buf.close();
        pivotRowsTable = buf.getTable();
    }
    pivotExec.setProgress(1.0);
    /* Fill the 2nd port: important to create this last since it will create
         * the final hilite handler (mapping) for port #1 AND #2 (bug 3270) */
    exec.setMessage("Creating group totals");
    // create group table only on group columns; no pivoting
    final BufferedDataTable columnGroupTable = createGroupByTable(groupExec, table, getGroupByColumns()).getBufferedTable();
    return new PortObject[] { // pivot table
    pivotTable, // group totals
    columnGroupTable, // pivot and overall totals
    pivotRowsTable };
}

Also used : LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) ArrayList(java.util.ArrayList) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) BufferedDataTable(org.knime.core.node.BufferedDataTable) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) PortObject(org.knime.core.node.port.PortObject) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) ExecutionContext(org.knime.core.node.ExecutionContext) ColumnAggregator(org.knime.base.data.aggregation.ColumnAggregator) SortedTable(org.knime.base.data.sort.SortedTable) GroupByTable(org.knime.base.node.preproc.groupby.GroupByTable) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) CellFactory(org.knime.core.data.container.CellFactory)

Aggregations

SortedTable (org.knime.base.data.sort.SortedTable)18 DataRow (org.knime.core.data.DataRow)16 BufferedDataTable (org.knime.core.node.BufferedDataTable)13 DataTableSpec (org.knime.core.data.DataTableSpec)12 ArrayList (java.util.ArrayList)11 DataCell (org.knime.core.data.DataCell)10 ExecutionContext (org.knime.core.node.ExecutionContext)10 DefaultRow (org.knime.core.data.def.DefaultRow)9 DataColumnSpec (org.knime.core.data.DataColumnSpec)8 DoubleValue (org.knime.core.data.DoubleValue)8 RowKey (org.knime.core.data.RowKey)8 LinkedHashMap (java.util.LinkedHashMap)7 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)7 DataContainer (org.knime.core.data.container.DataContainer)5 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)5 SettingsModelFilterString (org.knime.core.node.defaultnodesettings.SettingsModelFilterString)5 Map (java.util.Map)4 HashMap (java.util.HashMap)3 HashSet (java.util.HashSet)3 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)3