Search in sources :

Example 1 with ColumnBufferedDataTableSorter

use of org.knime.core.data.sort.ColumnBufferedDataTableSorter in project knime-core by knime.

the class StatisticCalculator method evaluate.

/**
 * @param dataTable actual data table to compute the
 * @param exec execution context
 * @return a potential warnings message or <code>null</code>
 * @throws CanceledExecutionException if the user cancels the execution
 */
public String evaluate(final BufferedDataTable dataTable, final ExecutionContext exec) throws CanceledExecutionException {
    for (Statistic stat : m_statistics) {
        stat.beforeEvaluation(dataTable.size());
    }
    if (!m_colToSortOn.isEmpty()) {
        ColumnBufferedDataTableSorter columnDataTableSorter;
        try {
            columnDataTableSorter = new ColumnBufferedDataTableSorter(dataTable.getDataTableSpec(), dataTable.size(), m_colToSortOn.toArray(new String[m_colToSortOn.size()]));
        } catch (InvalidSettingsException e) {
            throw new RuntimeException("Error on initialize the sorting", e);
        }
        exec.setMessage("Sorting Data.");
        final Iterator<DataRow> it = dataTable.iterator();
        final MutableLong count = new MutableLong();
        final ExecutionContext evalProgress = exec.createSubExecutionContext(0.3);
        final int[] specMapping = createSpecMapping(dataTable.getSpec(), m_colToSortOn.toArray(new String[m_colToSortOn.size()]));
        columnDataTableSorter.sort(dataTable, exec.createSubExecutionContext(0.7), new SortingConsumer() {

            @Override
            public void consume(final DataRow defaultRow) {
                DataRow next = it.next();
                evalProgress.setProgress(count.longValue() / (double) dataTable.size(), "Processing Row: " + next.getKey());
                count.increment();
                for (Statistic stat : m_statistics) {
                    stat.consumeRow(new OverwritingRow(next, defaultRow, specMapping));
                }
            }
        });
    } else {
        exec.setMessage("Evaluating statistics.");
        long count = 0;
        for (DataRow currRow : dataTable) {
            exec.setProgress(count++ / (double) dataTable.size(), "Processing Row: " + currRow.getKey());
            for (Statistic stat : m_statistics) {
                stat.consumeRow(currRow);
            }
        }
    }
    StringBuilder warnings = new StringBuilder();
    for (Statistic stat : m_statistics) {
        String warningString = stat.finish();
        if (warningString != null) {
            warnings.append(warningString);
            warnings.append("\n");
        }
    }
    return warnings.length() > 0 ? warnings.toString() : null;
}
Also used : DataRow(org.knime.core.data.DataRow) ColumnBufferedDataTableSorter(org.knime.core.data.sort.ColumnBufferedDataTableSorter) MutableLong(org.apache.commons.lang.mutable.MutableLong) ExecutionContext(org.knime.core.node.ExecutionContext) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) SortingConsumer(org.knime.core.data.sort.SortingConsumer)

Example 2 with ColumnBufferedDataTableSorter

use of org.knime.core.data.sort.ColumnBufferedDataTableSorter in project knime-core by knime.

the class MedianTable method sortOnDisk.

/**
 * Sorts the data on the disk, it moves the missing values to the end.
 *
 * @param context An {@link ExecutionContext}.
 * @param k The indices to read from the different columns
 *        (first dim: length 2 (above & below median indices), second dim: columns)
 * @throws CanceledExecutionException Execution was cancelled.
 */
private void sortOnDisk(final ExecutionContext context, final long[][] k) throws CanceledExecutionException {
    final SortingDescription[] sorting = new SortingDescription[m_indices.length];
    final DataTableSpec spec = m_table.getSpec();
    for (int i = 0; i < m_indices.length; i++) {
        final DataColumnSpec columnSpec = spec.getColumnSpec(m_indices[i]);
        final DataValueComparator comparator = columnSpec.getType().getComparator();
        sorting[i] = new SortingDescription(columnSpec.getName()) {

            @Override
            public int compare(final DataRow o1, final DataRow o2) {
                // Move missing values to the end.
                final DataCell c1 = o1.getCell(0);
                final DataCell c2 = o2.getCell(0);
                if (c1.isMissing()) {
                    return c2.isMissing() ? 0 : 1;
                }
                if (c2.isMissing()) {
                    return -1;
                }
                return comparator.compare(c1, c2);
            }
        };
    }
    final ColumnBufferedDataTableSorter tableSorter;
    try {
        tableSorter = new ColumnBufferedDataTableSorter(m_table.getSpec(), m_table.size(), sorting);
    } catch (InvalidSettingsException e) {
        throw new IllegalStateException(e);
    }
    final MutableLong counter = new MutableLong();
    final DoubleValue[][] cells = new DoubleValue[2][m_indices.length];
    tableSorter.sort(m_table, context, new SortingConsumer() {

        @Override
        public void consume(final DataRow row) {
            for (int kindex = 0; kindex < 2; kindex++) {
                for (int i = 0; i < m_indices.length; i++) {
                    if (counter.longValue() == k[kindex][i]) {
                        DataCell cell = row.getCell(i);
                        if (cell instanceof DoubleValue) {
                            DoubleValue dv = (DoubleValue) cell;
                            cells[kindex][i] = dv;
                        } else {
                            cells[kindex][i] = new DoubleCell(Double.NaN);
                        }
                    }
                }
            }
            counter.increment();
        }
    });
    for (int index = m_indices.length; index-- > 0; ) {
        if (cells[0][index] == null || cells[1][index] == null) {
            // No non-missing rows
            m_medians[index] = Double.NaN;
        } else {
            m_medians[index] = (cells[0][index].getDoubleValue() + cells[1][index].getDoubleValue()) / 2;
        }
    }
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DoubleCell(org.knime.core.data.def.DoubleCell) SortingDescription(org.knime.core.data.sort.SortingDescription) DataValueComparator(org.knime.core.data.DataValueComparator) DataRow(org.knime.core.data.DataRow) ColumnBufferedDataTableSorter(org.knime.core.data.sort.ColumnBufferedDataTableSorter) MutableLong(org.apache.commons.lang.mutable.MutableLong) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DoubleValue(org.knime.core.data.DoubleValue) SortingConsumer(org.knime.core.data.sort.SortingConsumer) DataCell(org.knime.core.data.DataCell)

Aggregations

MutableLong (org.apache.commons.lang.mutable.MutableLong)2 DataRow (org.knime.core.data.DataRow)2 ColumnBufferedDataTableSorter (org.knime.core.data.sort.ColumnBufferedDataTableSorter)2 SortingConsumer (org.knime.core.data.sort.SortingConsumer)2 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)2 DataCell (org.knime.core.data.DataCell)1 DataColumnSpec (org.knime.core.data.DataColumnSpec)1 DataTableSpec (org.knime.core.data.DataTableSpec)1 DataValueComparator (org.knime.core.data.DataValueComparator)1 DoubleValue (org.knime.core.data.DoubleValue)1 DoubleCell (org.knime.core.data.def.DoubleCell)1 SortingDescription (org.knime.core.data.sort.SortingDescription)1 ExecutionContext (org.knime.core.node.ExecutionContext)1