Search in sources :

Example 1 with BufferedDataTableSorter

use of org.knime.core.data.sort.BufferedDataTableSorter in project knime-core by knime.

the class DataTableTrainingData method shuffle.

private void shuffle() throws CanceledExecutionException {
    int nrRows = (int) m_data.size();
    Random random = getRandomDataGenerator();
    // create shuffle column
    ColumnRearranger colre = new ColumnRearranger(m_data.getDataTableSpec());
    colre.replace(new RandomNumberAppendFactory(random.nextLong(), nrRows, m_shuffleColSpec), m_shuffleColIdx);
    m_data = m_exec.createColumnRearrangeTable(m_data, colre, m_exec.createSubProgress(0.0));
    // sort by shuffle column
    BufferedDataTableSorter sorter = new BufferedDataTableSorter(m_data, Collections.singleton(m_shuffleColSpec.getName()), SORT_ASCENDING);
    m_data = sorter.sort(m_exec.createSubExecutionContext(0.0));
}
Also used : ColumnRearranger(org.knime.core.data.container.ColumnRearranger) Random(java.util.Random) BufferedDataTableSorter(org.knime.core.data.sort.BufferedDataTableSorter)

Example 2 with BufferedDataTableSorter

use of org.knime.core.data.sort.BufferedDataTableSorter in project knime-core by knime.

the class SorterNodeModel method execute.

/**
 * When the model gets executed, the {@link org.knime.core.data.DataTable}
 * is split in several {@link org.knime.core.data.container.DataContainer}s.
 * Each one is first removed, then swapped back into memory, gets sorted and
 * is then removed again. At the end, all containers are merged together in
 * one Result-Container. The list of columns that shall be sorted and their
 * corresponding sort order in a boolean array should be set, before
 * executing the model.
 *
 * @param inData the data table at the input port
 * @param exec the execution monitor
 * @return the sorted data table
 * @throws Exception if the settings (includeList and sortOrder) are not set
 *
 * @see java.util.Arrays sort(java.lang.Object[], int, int,
 *      java.util.Comparator)
 * @see org.knime.core.node.NodeModel#execute(BufferedDataTable[],
 *      ExecutionContext)
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    // If no columns are set, we do not start the sorting process
    if (m_inclList.size() == 0) {
        setWarningMessage("No columns were selected - returning " + "original table");
        return new BufferedDataTable[] { inData[INPORT] };
    }
    BufferedDataTableSorter sorter = new BufferedDataTableSorter(inData[INPORT], m_inclList, m_sortOrder, m_missingToEnd);
    sorter.setSortInMemory(m_sortInMemory);
    BufferedDataTable sortedTable = sorter.sort(exec);
    return new BufferedDataTable[] { sortedTable };
}
Also used : BufferedDataTable(org.knime.core.node.BufferedDataTable) BufferedDataTableSorter(org.knime.core.data.sort.BufferedDataTableSorter)

Example 3 with BufferedDataTableSorter

use of org.knime.core.data.sort.BufferedDataTableSorter in project knime-core by knime.

the class GroupLoopStartNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    // /////////////////////////
    // 
    // / DATA TABLES (SORTING)
    // 
    // /////////////////////////
    BufferedDataTable table = inData[0];
    DataTableSpec spec = table.getDataTableSpec();
    if (table.size() <= 0) {
        m_endLoop = true;
    }
    // parameters
    m_includedColIndices = getIncludedColIndices(table.getDataTableSpec());
    boolean checkDuplicates = m_sortedInputTableModel.getBooleanValue();
    // remember table and sort table if necessary
    if (m_iteration == 0) {
        assert getLoopEndNode() == null : "1st iteration but end node set";
        m_table = table;
        m_spec = m_table.getDataTableSpec();
        // sort if not already sorted
        if (!m_sortedInputTableModel.getBooleanValue()) {
            // asc
            final String[] includes = m_filterGroupColModel.applyTo(spec).getIncludes();
            boolean[] sortAsc = new boolean[includes.length];
            Arrays.fill(sortAsc, true);
            BufferedDataTableSorter tableSorter = new BufferedDataTableSorter(table, Arrays.asList(includes), sortAsc, false);
            m_sortedTable = tableSorter.sort(exec);
        } else {
            // no sort necessary
            m_sortedTable = table;
        }
        m_iterator = m_sortedTable.iterator();
    } else {
        assert getLoopEndNode() != null : "No end node set";
        assert table == m_table : "Input tables differ between iterations";
    }
    // /////////////////////////
    // 
    // / INIT
    // 
    // /////////////////////////
    BufferedDataContainer cont = exec.createDataContainer(table.getSpec());
    // create new duplicate checker if null
    if (m_duplicateChecker == null) {
        m_duplicateChecker = new DuplicateChecker();
    }
    // initialize grouping states if null
    if (m_currentGroupingState == null) {
        m_currentGroupingState = new GroupingState("", false, null);
    }
    m_lastGroupingState = m_currentGroupingState;
    // add now to new group
    if (m_lastRow != null) {
        cont.addRowToTable(m_lastRow);
    }
    // if the final row has been reached and added set end loop flag
    if (m_isFinalRow) {
        m_endLoop = true;
    }
    // walk trough input table and group data
    // as long as new row fits into the current group or there are no more
    // rows left.
    boolean groupEnd = false;
    while (!groupEnd && m_iterator.hasNext()) {
        DataRow row = m_iterator.next();
        // get grouping state according to new row
        m_currentGroupingState = getGroupingState(row);
        groupEnd = m_currentGroupingState.isGroupEnd();
        // to duplicate checker.
        if (m_lastRow == null) {
            m_lastGroupingState = m_currentGroupingState;
            if (checkDuplicates) {
                m_duplicateChecker.addKey(m_currentGroupingState.getGroupIdentifier());
            }
        }
        m_lastRow = row;
        // if group end has not been reached add row
        if (!groupEnd) {
            cont.addRowToTable(row);
            m_lastGroupingState = m_currentGroupingState;
        // if group end has been reached add identifier of new group to
        // duplicate checker
        } else {
            if (checkDuplicates) {
                try {
                    m_duplicateChecker.addKey(m_currentGroupingState.getGroupIdentifier());
                } catch (DuplicateKeyException e) {
                    throw new DuplicateKeyException("Input table was " + "not sorted, found duplicate (group identifier:" + m_currentGroupingState.getGroupIdentifier() + ")");
                }
            }
        }
        // which row will be added.
        if (!m_iterator.hasNext() && !m_isFinalRow) {
            m_isFinalRow = true;
            // thus end loop
            if (!groupEnd) {
                m_endLoop = true;
            }
        }
    }
    cont.close();
    if (m_endLoop) {
        // check for duplicates and throw exception if duplicate exist
        try {
            m_duplicateChecker.checkForDuplicates();
        } catch (DuplicateKeyException e) {
            throw new DuplicateKeyException("Input table was not sorted, found duplicate group identifier " + e.getKey());
        } finally {
            m_duplicateChecker.clear();
            m_duplicateChecker = null;
        }
    }
    // push variables
    pushFlowVariableInt("currentIteration", m_iteration);
    pushGroupColumnValuesAsFlowVariables(m_lastGroupingState);
    pushFlowVariableString("groupIdentifier", m_lastGroupingState.getGroupIdentifier());
    m_iteration++;
    return new BufferedDataTable[] { cont.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) BufferedDataTable(org.knime.core.node.BufferedDataTable) BufferedDataTableSorter(org.knime.core.data.sort.BufferedDataTableSorter) DuplicateChecker(org.knime.core.util.DuplicateChecker) DataRow(org.knime.core.data.DataRow) DuplicateKeyException(org.knime.core.util.DuplicateKeyException)

Example 4 with BufferedDataTableSorter

use of org.knime.core.data.sort.BufferedDataTableSorter in project knime-core by knime.

the class RankNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable table = inData[0];
    if (table == null) {
        throw new IllegalArgumentException("No input table found");
    }
    if (table.size() < 1) {
        setWarningMessage("Empty input table found");
    }
    // get table spec
    DataTableSpec inSpec = table.getDataTableSpec();
    // get grouping columns
    List<String> groupCols = Arrays.asList(m_groupColumns.getStringArrayValue());
    // get ranking columns
    List<String> rankCols = Arrays.asList(m_rankColumns.getStringArrayValue());
    // get indices of ranking and grouping columns
    int[] groupColIndices = getIndicesFromColNameList(groupCols, inSpec);
    int[] rankColIndices = getIndicesFromColNameList(rankCols, inSpec);
    // get rank mode
    String rankMode = m_rankMode.getStringValue();
    // calculate number of steps
    double numSteps = 2;
    if (m_retainRowOrder.getBooleanValue()) {
        numSteps += 3;
    }
    // insert extra column containing the original order of the input table
    final String rowOrder = "rowOrder";
    if (m_retainRowOrder.getBooleanValue()) {
        ColumnRearranger cr = new ColumnRearranger(inSpec);
        DataColumnSpec rowOrderSpec = new DataColumnSpecCreator(rowOrder, LongCell.TYPE).createSpec();
        OrderCellFactory cellFac = new OrderCellFactory(rowOrderSpec);
        cr.append(cellFac);
        table = exec.createColumnRearrangeTable(table, cr, exec.createSubProgress(1 / numSteps));
        inSpec = table.getDataTableSpec();
    }
    // set boolean array to indicate ascending ranking columns
    String[] orderRank = m_rankOrder.getStringArrayValue();
    boolean[] ascRank = new boolean[orderRank.length];
    for (int i = 0; i < ascRank.length; i++) {
        ascRank[i] = (orderRank[i].equals("Ascending")) ? true : false;
    }
    // sort by rank
    BufferedDataTable sortedTable = new BufferedDataTableSorter(table, rankCols, ascRank).sort(exec.createSubExecutionContext(1 / numSteps));
    // prepare appending of rank column
    ColumnRearranger columnRearranger = new ColumnRearranger(sortedTable.getDataTableSpec());
    DataColumnSpec newColSpec = null;
    boolean rankAsLong = m_rankAsLong.getBooleanValue();
    if (rankAsLong) {
        newColSpec = new DataColumnSpecCreator(m_rankOutColName.getStringValue(), LongCell.TYPE).createSpec();
    } else {
        newColSpec = new DataColumnSpecCreator(m_rankOutColName.getStringValue(), IntCell.TYPE).createSpec();
    }
    int initialHashtableCapacity = 11;
    if (!groupCols.isEmpty()) {
        initialHashtableCapacity = (int) Math.sqrt(table.size());
    }
    // append rank column
    columnRearranger.append(new RankCellFactory(newColSpec, groupColIndices, rankColIndices, rankMode, rankAsLong, initialHashtableCapacity));
    BufferedDataTable out = exec.createColumnRearrangeTable(sortedTable, columnRearranger, exec.createSubExecutionContext(1 / numSteps));
    if (m_retainRowOrder.getBooleanValue()) {
        // recover row order
        LinkedList<String> sortBy = new LinkedList<String>();
        sortBy.add(rowOrder);
        out = new BufferedDataTableSorter(out, sortBy, new boolean[] { true }).sort(exec.createSubExecutionContext(1 / numSteps));
        // remove order column
        ColumnRearranger cr = new ColumnRearranger(out.getDataTableSpec());
        cr.remove(rowOrder);
        out = exec.createColumnRearrangeTable(out, cr, exec.createSubExecutionContext(1 / numSteps));
    }
    return new BufferedDataTable[] { out };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) BufferedDataTableSorter(org.knime.core.data.sort.BufferedDataTableSorter) LinkedList(java.util.LinkedList) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 5 with BufferedDataTableSorter

use of org.knime.core.data.sort.BufferedDataTableSorter in project knime-core by knime.

the class FileStoresInLoopCache method close.

/**
 * @throws CanceledExecutionException
 */
BufferedDataTable close() throws CanceledExecutionException {
    m_createdFileStoresContainer.close();
    BufferedDataTable table = m_createdFileStoresContainer.getTable();
    m_createdFileStoresContainer = null;
    if (m_keysWereAddedSorted) {
        m_createdFileStoresTable = table;
    } else {
        BufferedDataTableSorter sorter = new BufferedDataTableSorter(table, Collections.singletonList(COL_NAME), new boolean[] { true });
        BufferedDataTable sort = sorter.sort(m_exec.createSilentSubExecutionContext(0.0));
        BufferedDataContainer unique = m_exec.createDataContainer(LOOP_FILE_STORE_SPEC);
        FileStoreKey last = null;
        for (DataRow r : sort) {
            FileStoreKey key = getFileStoreKey(r);
            if (!ConvenienceMethods.areEqual(last, key)) {
                unique.addRowToTable(r);
            }
            last = key;
        }
        unique.close();
        m_exec.clearTable(table);
        m_createdFileStoresTable = unique.getTable();
    }
    return m_createdFileStoresTable;
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) FileStoreKey(org.knime.core.data.filestore.FileStoreKey) BufferedDataTable(org.knime.core.node.BufferedDataTable) BufferedDataTableSorter(org.knime.core.data.sort.BufferedDataTableSorter) DataRow(org.knime.core.data.DataRow)

Aggregations

BufferedDataTableSorter (org.knime.core.data.sort.BufferedDataTableSorter)5 BufferedDataTable (org.knime.core.node.BufferedDataTable)4 DataRow (org.knime.core.data.DataRow)2 DataTableSpec (org.knime.core.data.DataTableSpec)2 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)2 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)2 LinkedList (java.util.LinkedList)1 Random (java.util.Random)1 DataColumnSpec (org.knime.core.data.DataColumnSpec)1 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)1 FileStoreKey (org.knime.core.data.filestore.FileStoreKey)1 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)1 DuplicateChecker (org.knime.core.util.DuplicateChecker)1 DuplicateKeyException (org.knime.core.util.DuplicateKeyException)1