Search in sources :

Example 6 with DuplicateKeyException

use of org.knime.core.util.DuplicateKeyException in project knime-core by knime.

the class ConcatenateTable method checkForDuplicates.

private static void checkForDuplicates(final ExecutionMonitor mon, final BufferedDataTable[] tables, final long rowCount) throws CanceledExecutionException {
    DuplicateChecker check = new DuplicateChecker();
    int r = 0;
    for (int i = 0; i < tables.length; i++) {
        for (DataRow row : tables[i]) {
            RowKey key = row.getKey();
            try {
                check.addKey(key.toString());
            } catch (DuplicateKeyException | IOException ex) {
                throw new IllegalArgumentException("Duplicate row key \"" + key + "\" in table with index " + i);
            }
            r++;
            mon.setProgress(r / (double) rowCount, "Checking tables, row " + r + "/" + rowCount + " (\"" + row.getKey() + "\")");
        }
        mon.checkCanceled();
    }
    try {
        check.checkForDuplicates();
    } catch (DuplicateKeyException | IOException ex) {
        throw new IllegalArgumentException("Duplicate row keys");
    }
}
Also used : RowKey(org.knime.core.data.RowKey) IOException(java.io.IOException) DuplicateChecker(org.knime.core.util.DuplicateChecker) DataRow(org.knime.core.data.DataRow) DuplicateKeyException(org.knime.core.util.DuplicateKeyException)

Example 7 with DuplicateKeyException

use of org.knime.core.util.DuplicateKeyException in project knime-core by knime.

the class NewJoinerNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable leftTable = inData[0];
    BufferedDataTable rightTable = inData[1];
    m_secondTableColIndex = rightTable.getDataTableSpec().findColumnIndex(m_settings.secondTableColumn());
    if (!NewJoinerSettings.ROW_KEY_IDENTIFIER.equals(m_settings.secondTableColumn()) && (m_secondTableColIndex == -1)) {
        throw new InvalidSettingsException("Join column '" + m_settings.secondTableColumn() + "' not found in second table");
    }
    BufferedDataContainer dc = exec.createDataContainer(createSpec(new DataTableSpec[] { leftTable.getDataTableSpec(), rightTable.getDataTableSpec() }));
    // create a row with missing values for left or full outer joins
    DataCell[] missingCells = new DataCell[rightTable.getDataTableSpec().getNumColumns()];
    for (int i = 0; i < missingCells.length; i++) {
        missingCells[i] = DataType.getMissingCell();
    }
    DataRow missingRow = new DefaultRow(new RowKey(""), missingCells);
    exec.setMessage("Reading first table");
    // build a map for sorting the second table which maps the row keys of
    // the first table to their row number
    final Map<String, Integer> orderMap = buildTableOrdering(leftTable, exec);
    Comparator<DataRow> rowComparator = new Comparator<DataRow>() {

        public int compare(final DataRow o1, final DataRow o2) {
            Integer k1 = orderMap.get(getRightJoinKey(o1));
            Integer k2 = orderMap.get(getRightJoinKey(o2));
            if ((k1 != null) && (k2 != null)) {
                return k1 - k2;
            } else if (k1 != null) {
                return -1;
            } else if (k2 != null) {
                return 1;
            } else {
                return 0;
            }
        }
    };
    // sort the second table based on the key order from the first table
    // non-matching rows are placed at the end
    exec.setMessage("Sorting second table");
    SortedTable rightSortedTable = new SortedTable(rightTable, rowComparator, false, exec.createSubExecutionContext(0.7));
    Iterator<DataRow> lit = leftTable.iterator();
    Iterator<DataRow> rit = rightSortedTable.iterator();
    exec.setMessage("Joining tables");
    final double max;
    boolean lofj = false;
    boolean rofj = false;
    if (JoinMode.InnerJoin.equals(m_settings.joinMode())) {
        max = Math.min(leftTable.getRowCount(), rightTable.getRowCount());
    } else if (JoinMode.LeftOuterJoin.equals(m_settings.joinMode())) {
        max = leftTable.getRowCount();
        lofj = true;
    } else if (JoinMode.RightOuterJoin.equals(m_settings.joinMode())) {
        max = rightTable.getRowCount();
        rofj = true;
    } else {
        max = Math.max(leftTable.getRowCount(), rightTable.getRowCount());
        lofj = true;
        rofj = true;
    }
    // now join the two tables
    int p = 0;
    DataRow lrow = lit.hasNext() ? lit.next() : null;
    DataRow rrow = rit.hasNext() ? rit.next() : null;
    String lkey = (lrow != null) ? lrow.getKey().getString() : null;
    String rkey = (rrow != null) ? getRightJoinKey(rrow) : null;
    outer: while ((lrow != null) && (rrow != null)) {
        exec.checkCanceled();
        String key = lkey.toString();
        if (lkey.equals(rkey)) {
            // loop over all matching rows in the second table
            for (int i = 0; lkey.equals(rkey); i++) {
                dc.addRowToTable(createJoinedRow(key, lrow, rrow));
                exec.setProgress(0.7 + 0.3 * p++ / max);
                if (!rit.hasNext()) {
                    rrow = null;
                    break outer;
                }
                rrow = rit.next();
                rkey = getRightJoinKey(rrow);
                key = lkey.toString() + m_settings.keySuffix() + i;
            }
        } else if (lofj) {
            // no matching row from right table => fill with missing values
            // if left or full outer join is required
            dc.addRowToTable(createJoinedRow(lkey.toString(), lrow, missingRow));
            exec.setProgress(0.7 + 0.3 * p++ / max);
        }
        if (!lit.hasNext()) {
            break outer;
        }
        lrow = lit.next();
        lkey = lrow.getKey().getString();
    }
    if (lit.hasNext() && lofj) {
        // outer join
        while (lit.hasNext()) {
            lrow = lit.next();
            dc.addRowToTable(createJoinedRow(lrow.getKey().toString(), lrow, missingRow));
            exec.setProgress(0.7 + 0.3 * p++ / max);
        }
    } else if ((rrow != null) && rofj) {
        // add remaining non-joined rows from the right table if right or
        // full outer join
        missingCells = new DataCell[leftTable.getDataTableSpec().getNumColumns()];
        for (int i = 0; i < missingCells.length; i++) {
            missingCells[i] = DataType.getMissingCell();
        }
        missingRow = new DefaultRow(new RowKey(""), missingCells);
        boolean warningSet = false;
        while (true) {
            String key = rrow.getKey().toString();
            int c = 0;
            while (true) {
                try {
                    dc.addRowToTable(createJoinedRow(key, missingRow, rrow));
                    exec.setProgress(0.7 + 0.3 * p++ / max);
                    break;
                } catch (DuplicateKeyException ex) {
                    if (++c > 10) {
                        throw ex;
                    }
                    key = key + "_r";
                    if (!warningSet) {
                        setWarningMessage("Encountered and fixed some " + "duplicate row keys at the end of the " + "table");
                        warningSet = true;
                    }
                }
            }
            if (!rit.hasNext()) {
                break;
            }
            rrow = rit.next();
        }
    }
    dc.close();
    return new BufferedDataTable[] { dc.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) DataRow(org.knime.core.data.DataRow) DuplicateKeyException(org.knime.core.util.DuplicateKeyException) Comparator(java.util.Comparator) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) SortedTable(org.knime.base.data.sort.SortedTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Aggregations

DuplicateKeyException (org.knime.core.util.DuplicateKeyException)7 DataRow (org.knime.core.data.DataRow)6 DataTableSpec (org.knime.core.data.DataTableSpec)6 RowKey (org.knime.core.data.RowKey)3 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)3 BufferedDataTable (org.knime.core.node.BufferedDataTable)3 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)3 IOException (java.io.IOException)2 DataCell (org.knime.core.data.DataCell)2 DefaultRow (org.knime.core.data.def.DefaultRow)2 DuplicateChecker (org.knime.core.util.DuplicateChecker)2 SettingsStatus (org.knime.core.util.tokenizer.SettingsStatus)2 Comparator (java.util.Comparator)1 ExecutionException (java.util.concurrent.ExecutionException)1 SortedTable (org.knime.base.data.sort.SortedTable)1 DataType (org.knime.core.data.DataType)1 IntCell (org.knime.core.data.def.IntCell)1 StringCell (org.knime.core.data.def.StringCell)1 BufferedDataTableSorter (org.knime.core.data.sort.BufferedDataTableSorter)1 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)1