use of org.knime.core.data.sort.ColumnBufferedDataTableSorter in project knime-core by knime.
the class StatisticCalculator method evaluate.
/**
* @param dataTable actual data table to compute the
* @param exec execution context
* @return a potential warnings message or <code>null</code>
* @throws CanceledExecutionException if the user cancels the execution
*/
public String evaluate(final BufferedDataTable dataTable, final ExecutionContext exec) throws CanceledExecutionException {
for (Statistic stat : m_statistics) {
stat.beforeEvaluation(dataTable.size());
}
if (!m_colToSortOn.isEmpty()) {
ColumnBufferedDataTableSorter columnDataTableSorter;
try {
columnDataTableSorter = new ColumnBufferedDataTableSorter(dataTable.getDataTableSpec(), dataTable.size(), m_colToSortOn.toArray(new String[m_colToSortOn.size()]));
} catch (InvalidSettingsException e) {
throw new RuntimeException("Error on initialize the sorting", e);
}
exec.setMessage("Sorting Data.");
final Iterator<DataRow> it = dataTable.iterator();
final MutableLong count = new MutableLong();
final ExecutionContext evalProgress = exec.createSubExecutionContext(0.3);
final int[] specMapping = createSpecMapping(dataTable.getSpec(), m_colToSortOn.toArray(new String[m_colToSortOn.size()]));
columnDataTableSorter.sort(dataTable, exec.createSubExecutionContext(0.7), new SortingConsumer() {
@Override
public void consume(final DataRow defaultRow) {
DataRow next = it.next();
evalProgress.setProgress(count.longValue() / (double) dataTable.size(), "Processing Row: " + next.getKey());
count.increment();
for (Statistic stat : m_statistics) {
stat.consumeRow(new OverwritingRow(next, defaultRow, specMapping));
}
}
});
} else {
exec.setMessage("Evaluating statistics.");
long count = 0;
for (DataRow currRow : dataTable) {
exec.setProgress(count++ / (double) dataTable.size(), "Processing Row: " + currRow.getKey());
for (Statistic stat : m_statistics) {
stat.consumeRow(currRow);
}
}
}
StringBuilder warnings = new StringBuilder();
for (Statistic stat : m_statistics) {
String warningString = stat.finish();
if (warningString != null) {
warnings.append(warningString);
warnings.append("\n");
}
}
return warnings.length() > 0 ? warnings.toString() : null;
}
use of org.knime.core.data.sort.ColumnBufferedDataTableSorter in project knime-core by knime.
the class MedianTable method sortOnDisk.
/**
* Sorts the data on the disk, it moves the missing values to the end.
*
* @param context An {@link ExecutionContext}.
* @param k The indices to read from the different columns
* (first dim: length 2 (above & below median indices), second dim: columns)
* @throws CanceledExecutionException Execution was cancelled.
*/
private void sortOnDisk(final ExecutionContext context, final long[][] k) throws CanceledExecutionException {
final SortingDescription[] sorting = new SortingDescription[m_indices.length];
final DataTableSpec spec = m_table.getSpec();
for (int i = 0; i < m_indices.length; i++) {
final DataColumnSpec columnSpec = spec.getColumnSpec(m_indices[i]);
final DataValueComparator comparator = columnSpec.getType().getComparator();
sorting[i] = new SortingDescription(columnSpec.getName()) {
@Override
public int compare(final DataRow o1, final DataRow o2) {
// Move missing values to the end.
final DataCell c1 = o1.getCell(0);
final DataCell c2 = o2.getCell(0);
if (c1.isMissing()) {
return c2.isMissing() ? 0 : 1;
}
if (c2.isMissing()) {
return -1;
}
return comparator.compare(c1, c2);
}
};
}
final ColumnBufferedDataTableSorter tableSorter;
try {
tableSorter = new ColumnBufferedDataTableSorter(m_table.getSpec(), m_table.size(), sorting);
} catch (InvalidSettingsException e) {
throw new IllegalStateException(e);
}
final MutableLong counter = new MutableLong();
final DoubleValue[][] cells = new DoubleValue[2][m_indices.length];
tableSorter.sort(m_table, context, new SortingConsumer() {
@Override
public void consume(final DataRow row) {
for (int kindex = 0; kindex < 2; kindex++) {
for (int i = 0; i < m_indices.length; i++) {
if (counter.longValue() == k[kindex][i]) {
DataCell cell = row.getCell(i);
if (cell instanceof DoubleValue) {
DoubleValue dv = (DoubleValue) cell;
cells[kindex][i] = dv;
} else {
cells[kindex][i] = new DoubleCell(Double.NaN);
}
}
}
}
counter.increment();
}
});
for (int index = m_indices.length; index-- > 0; ) {
if (cells[0][index] == null || cells[1][index] == null) {
// No non-missing rows
m_medians[index] = Double.NaN;
} else {
m_medians[index] = (cells[0][index].getDoubleValue() + cells[1][index].getDoubleValue()) / 2;
}
}
}
Aggregations