Search in sources :

Example 1 with DuplicateChecker

use of org.knime.core.util.DuplicateChecker in project knime-core by knime.

the class GroupLoopStartNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    // /////////////////////////
    // 
    // / DATA TABLES (SORTING)
    // 
    // /////////////////////////
    BufferedDataTable table = inData[0];
    DataTableSpec spec = table.getDataTableSpec();
    if (table.size() <= 0) {
        m_endLoop = true;
    }
    // parameters
    m_includedColIndices = getIncludedColIndices(table.getDataTableSpec());
    boolean checkDuplicates = m_sortedInputTableModel.getBooleanValue();
    // remember table and sort table if necessary
    if (m_iteration == 0) {
        assert getLoopEndNode() == null : "1st iteration but end node set";
        m_table = table;
        m_spec = m_table.getDataTableSpec();
        // sort if not already sorted
        if (!m_sortedInputTableModel.getBooleanValue()) {
            // asc
            final String[] includes = m_filterGroupColModel.applyTo(spec).getIncludes();
            boolean[] sortAsc = new boolean[includes.length];
            Arrays.fill(sortAsc, true);
            BufferedDataTableSorter tableSorter = new BufferedDataTableSorter(table, Arrays.asList(includes), sortAsc, false);
            m_sortedTable = tableSorter.sort(exec);
        } else {
            // no sort necessary
            m_sortedTable = table;
        }
        m_iterator = m_sortedTable.iterator();
    } else {
        assert getLoopEndNode() != null : "No end node set";
        assert table == m_table : "Input tables differ between iterations";
    }
    // /////////////////////////
    // 
    // / INIT
    // 
    // /////////////////////////
    BufferedDataContainer cont = exec.createDataContainer(table.getSpec());
    // create new duplicate checker if null
    if (m_duplicateChecker == null) {
        m_duplicateChecker = new DuplicateChecker();
    }
    // initialize grouping states if null
    if (m_currentGroupingState == null) {
        m_currentGroupingState = new GroupingState("", false, null);
    }
    m_lastGroupingState = m_currentGroupingState;
    // add now to new group
    if (m_lastRow != null) {
        cont.addRowToTable(m_lastRow);
    }
    // if the final row has been reached and added set end loop flag
    if (m_isFinalRow) {
        m_endLoop = true;
    }
    // walk trough input table and group data
    // as long as new row fits into the current group or there are no more
    // rows left.
    boolean groupEnd = false;
    while (!groupEnd && m_iterator.hasNext()) {
        DataRow row = m_iterator.next();
        // get grouping state according to new row
        m_currentGroupingState = getGroupingState(row);
        groupEnd = m_currentGroupingState.isGroupEnd();
        // to duplicate checker.
        if (m_lastRow == null) {
            m_lastGroupingState = m_currentGroupingState;
            if (checkDuplicates) {
                m_duplicateChecker.addKey(m_currentGroupingState.getGroupIdentifier());
            }
        }
        m_lastRow = row;
        // if group end has not been reached add row
        if (!groupEnd) {
            cont.addRowToTable(row);
            m_lastGroupingState = m_currentGroupingState;
        // if group end has been reached add identifier of new group to
        // duplicate checker
        } else {
            if (checkDuplicates) {
                try {
                    m_duplicateChecker.addKey(m_currentGroupingState.getGroupIdentifier());
                } catch (DuplicateKeyException e) {
                    throw new DuplicateKeyException("Input table was " + "not sorted, found duplicate (group identifier:" + m_currentGroupingState.getGroupIdentifier() + ")");
                }
            }
        }
        // which row will be added.
        if (!m_iterator.hasNext() && !m_isFinalRow) {
            m_isFinalRow = true;
            // thus end loop
            if (!groupEnd) {
                m_endLoop = true;
            }
        }
    }
    cont.close();
    if (m_endLoop) {
        // check for duplicates and throw exception if duplicate exist
        try {
            m_duplicateChecker.checkForDuplicates();
        } catch (DuplicateKeyException e) {
            throw new DuplicateKeyException("Input table was not sorted, found duplicate group identifier " + e.getKey());
        } finally {
            m_duplicateChecker.clear();
            m_duplicateChecker = null;
        }
    }
    // push variables
    pushFlowVariableInt("currentIteration", m_iteration);
    pushGroupColumnValuesAsFlowVariables(m_lastGroupingState);
    pushFlowVariableString("groupIdentifier", m_lastGroupingState.getGroupIdentifier());
    m_iteration++;
    return new BufferedDataTable[] { cont.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) BufferedDataTable(org.knime.core.node.BufferedDataTable) BufferedDataTableSorter(org.knime.core.data.sort.BufferedDataTableSorter) DuplicateChecker(org.knime.core.util.DuplicateChecker) DataRow(org.knime.core.data.DataRow) DuplicateKeyException(org.knime.core.util.DuplicateKeyException)

Example 2 with DuplicateChecker

use of org.knime.core.util.DuplicateChecker in project knime-core by knime.

the class ConcatenateTable method checkForDuplicates.

private static void checkForDuplicates(final ExecutionMonitor mon, final BufferedDataTable[] tables, final long rowCount) throws CanceledExecutionException {
    DuplicateChecker check = new DuplicateChecker();
    int r = 0;
    for (int i = 0; i < tables.length; i++) {
        for (DataRow row : tables[i]) {
            RowKey key = row.getKey();
            try {
                check.addKey(key.toString());
            } catch (DuplicateKeyException | IOException ex) {
                throw new IllegalArgumentException("Duplicate row key \"" + key + "\" in table with index " + i);
            }
            r++;
            mon.setProgress(r / (double) rowCount, "Checking tables, row " + r + "/" + rowCount + " (\"" + row.getKey() + "\")");
        }
        mon.checkCanceled();
    }
    try {
        check.checkForDuplicates();
    } catch (DuplicateKeyException | IOException ex) {
        throw new IllegalArgumentException("Duplicate row keys");
    }
}
Also used : RowKey(org.knime.core.data.RowKey) IOException(java.io.IOException) DuplicateChecker(org.knime.core.util.DuplicateChecker) DataRow(org.knime.core.data.DataRow) DuplicateKeyException(org.knime.core.util.DuplicateKeyException)

Aggregations

DataRow (org.knime.core.data.DataRow)2 DuplicateChecker (org.knime.core.util.DuplicateChecker)2 DuplicateKeyException (org.knime.core.util.DuplicateKeyException)2 IOException (java.io.IOException)1 DataTableSpec (org.knime.core.data.DataTableSpec)1 RowKey (org.knime.core.data.RowKey)1 BufferedDataTableSorter (org.knime.core.data.sort.BufferedDataTableSorter)1 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)1 BufferedDataTable (org.knime.core.node.BufferedDataTable)1