use of org.knime.core.data.sort.BufferedDataTableSorter in project knime-core by knime.
the class DataTableTrainingData method shuffle.
private void shuffle() throws CanceledExecutionException {
int nrRows = (int) m_data.size();
Random random = getRandomDataGenerator();
// create shuffle column
ColumnRearranger colre = new ColumnRearranger(m_data.getDataTableSpec());
colre.replace(new RandomNumberAppendFactory(random.nextLong(), nrRows, m_shuffleColSpec), m_shuffleColIdx);
m_data = m_exec.createColumnRearrangeTable(m_data, colre, m_exec.createSubProgress(0.0));
// sort by shuffle column
BufferedDataTableSorter sorter = new BufferedDataTableSorter(m_data, Collections.singleton(m_shuffleColSpec.getName()), SORT_ASCENDING);
m_data = sorter.sort(m_exec.createSubExecutionContext(0.0));
}
use of org.knime.core.data.sort.BufferedDataTableSorter in project knime-core by knime.
the class SorterNodeModel method execute.
/**
* When the model gets executed, the {@link org.knime.core.data.DataTable}
* is split in several {@link org.knime.core.data.container.DataContainer}s.
* Each one is first removed, then swapped back into memory, gets sorted and
* is then removed again. At the end, all containers are merged together in
* one Result-Container. The list of columns that shall be sorted and their
* corresponding sort order in a boolean array should be set, before
* executing the model.
*
* @param inData the data table at the input port
* @param exec the execution monitor
* @return the sorted data table
* @throws Exception if the settings (includeList and sortOrder) are not set
*
* @see java.util.Arrays sort(java.lang.Object[], int, int,
* java.util.Comparator)
* @see org.knime.core.node.NodeModel#execute(BufferedDataTable[],
* ExecutionContext)
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
// If no columns are set, we do not start the sorting process
if (m_inclList.size() == 0) {
setWarningMessage("No columns were selected - returning " + "original table");
return new BufferedDataTable[] { inData[INPORT] };
}
BufferedDataTableSorter sorter = new BufferedDataTableSorter(inData[INPORT], m_inclList, m_sortOrder, m_missingToEnd);
sorter.setSortInMemory(m_sortInMemory);
BufferedDataTable sortedTable = sorter.sort(exec);
return new BufferedDataTable[] { sortedTable };
}
use of org.knime.core.data.sort.BufferedDataTableSorter in project knime-core by knime.
the class GroupLoopStartNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
// /////////////////////////
//
// / DATA TABLES (SORTING)
//
// /////////////////////////
BufferedDataTable table = inData[0];
DataTableSpec spec = table.getDataTableSpec();
if (table.size() <= 0) {
m_endLoop = true;
}
// parameters
m_includedColIndices = getIncludedColIndices(table.getDataTableSpec());
boolean checkDuplicates = m_sortedInputTableModel.getBooleanValue();
// remember table and sort table if necessary
if (m_iteration == 0) {
assert getLoopEndNode() == null : "1st iteration but end node set";
m_table = table;
m_spec = m_table.getDataTableSpec();
// sort if not already sorted
if (!m_sortedInputTableModel.getBooleanValue()) {
// asc
final String[] includes = m_filterGroupColModel.applyTo(spec).getIncludes();
boolean[] sortAsc = new boolean[includes.length];
Arrays.fill(sortAsc, true);
BufferedDataTableSorter tableSorter = new BufferedDataTableSorter(table, Arrays.asList(includes), sortAsc, false);
m_sortedTable = tableSorter.sort(exec);
} else {
// no sort necessary
m_sortedTable = table;
}
m_iterator = m_sortedTable.iterator();
} else {
assert getLoopEndNode() != null : "No end node set";
assert table == m_table : "Input tables differ between iterations";
}
// /////////////////////////
//
// / INIT
//
// /////////////////////////
BufferedDataContainer cont = exec.createDataContainer(table.getSpec());
// create new duplicate checker if null
if (m_duplicateChecker == null) {
m_duplicateChecker = new DuplicateChecker();
}
// initialize grouping states if null
if (m_currentGroupingState == null) {
m_currentGroupingState = new GroupingState("", false, null);
}
m_lastGroupingState = m_currentGroupingState;
// add now to new group
if (m_lastRow != null) {
cont.addRowToTable(m_lastRow);
}
// if the final row has been reached and added set end loop flag
if (m_isFinalRow) {
m_endLoop = true;
}
// walk trough input table and group data
// as long as new row fits into the current group or there are no more
// rows left.
boolean groupEnd = false;
while (!groupEnd && m_iterator.hasNext()) {
DataRow row = m_iterator.next();
// get grouping state according to new row
m_currentGroupingState = getGroupingState(row);
groupEnd = m_currentGroupingState.isGroupEnd();
// to duplicate checker.
if (m_lastRow == null) {
m_lastGroupingState = m_currentGroupingState;
if (checkDuplicates) {
m_duplicateChecker.addKey(m_currentGroupingState.getGroupIdentifier());
}
}
m_lastRow = row;
// if group end has not been reached add row
if (!groupEnd) {
cont.addRowToTable(row);
m_lastGroupingState = m_currentGroupingState;
// if group end has been reached add identifier of new group to
// duplicate checker
} else {
if (checkDuplicates) {
try {
m_duplicateChecker.addKey(m_currentGroupingState.getGroupIdentifier());
} catch (DuplicateKeyException e) {
throw new DuplicateKeyException("Input table was " + "not sorted, found duplicate (group identifier:" + m_currentGroupingState.getGroupIdentifier() + ")");
}
}
}
// which row will be added.
if (!m_iterator.hasNext() && !m_isFinalRow) {
m_isFinalRow = true;
// thus end loop
if (!groupEnd) {
m_endLoop = true;
}
}
}
cont.close();
if (m_endLoop) {
// check for duplicates and throw exception if duplicate exist
try {
m_duplicateChecker.checkForDuplicates();
} catch (DuplicateKeyException e) {
throw new DuplicateKeyException("Input table was not sorted, found duplicate group identifier " + e.getKey());
} finally {
m_duplicateChecker.clear();
m_duplicateChecker = null;
}
}
// push variables
pushFlowVariableInt("currentIteration", m_iteration);
pushGroupColumnValuesAsFlowVariables(m_lastGroupingState);
pushFlowVariableString("groupIdentifier", m_lastGroupingState.getGroupIdentifier());
m_iteration++;
return new BufferedDataTable[] { cont.getTable() };
}
use of org.knime.core.data.sort.BufferedDataTableSorter in project knime-core by knime.
the class RankNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable table = inData[0];
if (table == null) {
throw new IllegalArgumentException("No input table found");
}
if (table.size() < 1) {
setWarningMessage("Empty input table found");
}
// get table spec
DataTableSpec inSpec = table.getDataTableSpec();
// get grouping columns
List<String> groupCols = Arrays.asList(m_groupColumns.getStringArrayValue());
// get ranking columns
List<String> rankCols = Arrays.asList(m_rankColumns.getStringArrayValue());
// get indices of ranking and grouping columns
int[] groupColIndices = getIndicesFromColNameList(groupCols, inSpec);
int[] rankColIndices = getIndicesFromColNameList(rankCols, inSpec);
// get rank mode
String rankMode = m_rankMode.getStringValue();
// calculate number of steps
double numSteps = 2;
if (m_retainRowOrder.getBooleanValue()) {
numSteps += 3;
}
// insert extra column containing the original order of the input table
final String rowOrder = "rowOrder";
if (m_retainRowOrder.getBooleanValue()) {
ColumnRearranger cr = new ColumnRearranger(inSpec);
DataColumnSpec rowOrderSpec = new DataColumnSpecCreator(rowOrder, LongCell.TYPE).createSpec();
OrderCellFactory cellFac = new OrderCellFactory(rowOrderSpec);
cr.append(cellFac);
table = exec.createColumnRearrangeTable(table, cr, exec.createSubProgress(1 / numSteps));
inSpec = table.getDataTableSpec();
}
// set boolean array to indicate ascending ranking columns
String[] orderRank = m_rankOrder.getStringArrayValue();
boolean[] ascRank = new boolean[orderRank.length];
for (int i = 0; i < ascRank.length; i++) {
ascRank[i] = (orderRank[i].equals("Ascending")) ? true : false;
}
// sort by rank
BufferedDataTable sortedTable = new BufferedDataTableSorter(table, rankCols, ascRank).sort(exec.createSubExecutionContext(1 / numSteps));
// prepare appending of rank column
ColumnRearranger columnRearranger = new ColumnRearranger(sortedTable.getDataTableSpec());
DataColumnSpec newColSpec = null;
boolean rankAsLong = m_rankAsLong.getBooleanValue();
if (rankAsLong) {
newColSpec = new DataColumnSpecCreator(m_rankOutColName.getStringValue(), LongCell.TYPE).createSpec();
} else {
newColSpec = new DataColumnSpecCreator(m_rankOutColName.getStringValue(), IntCell.TYPE).createSpec();
}
int initialHashtableCapacity = 11;
if (!groupCols.isEmpty()) {
initialHashtableCapacity = (int) Math.sqrt(table.size());
}
// append rank column
columnRearranger.append(new RankCellFactory(newColSpec, groupColIndices, rankColIndices, rankMode, rankAsLong, initialHashtableCapacity));
BufferedDataTable out = exec.createColumnRearrangeTable(sortedTable, columnRearranger, exec.createSubExecutionContext(1 / numSteps));
if (m_retainRowOrder.getBooleanValue()) {
// recover row order
LinkedList<String> sortBy = new LinkedList<String>();
sortBy.add(rowOrder);
out = new BufferedDataTableSorter(out, sortBy, new boolean[] { true }).sort(exec.createSubExecutionContext(1 / numSteps));
// remove order column
ColumnRearranger cr = new ColumnRearranger(out.getDataTableSpec());
cr.remove(rowOrder);
out = exec.createColumnRearrangeTable(out, cr, exec.createSubExecutionContext(1 / numSteps));
}
return new BufferedDataTable[] { out };
}
use of org.knime.core.data.sort.BufferedDataTableSorter in project knime-core by knime.
the class FileStoresInLoopCache method close.
/**
* @throws CanceledExecutionException
*/
BufferedDataTable close() throws CanceledExecutionException {
m_createdFileStoresContainer.close();
BufferedDataTable table = m_createdFileStoresContainer.getTable();
m_createdFileStoresContainer = null;
if (m_keysWereAddedSorted) {
m_createdFileStoresTable = table;
} else {
BufferedDataTableSorter sorter = new BufferedDataTableSorter(table, Collections.singletonList(COL_NAME), new boolean[] { true });
BufferedDataTable sort = sorter.sort(m_exec.createSilentSubExecutionContext(0.0));
BufferedDataContainer unique = m_exec.createDataContainer(LOOP_FILE_STORE_SPEC);
FileStoreKey last = null;
for (DataRow r : sort) {
FileStoreKey key = getFileStoreKey(r);
if (!ConvenienceMethods.areEqual(last, key)) {
unique.addRowToTable(r);
}
last = key;
}
unique.close();
m_exec.clearTable(table);
m_createdFileStoresTable = unique.getTable();
}
return m_createdFileStoresTable;
}
Aggregations