use of org.knime.core.util.DuplicateKeyException in project knime-core by knime.
the class ConcatenateTable method checkForDuplicates.
private static void checkForDuplicates(final ExecutionMonitor mon, final BufferedDataTable[] tables, final long rowCount) throws CanceledExecutionException {
DuplicateChecker check = new DuplicateChecker();
int r = 0;
for (int i = 0; i < tables.length; i++) {
for (DataRow row : tables[i]) {
RowKey key = row.getKey();
try {
check.addKey(key.toString());
} catch (DuplicateKeyException | IOException ex) {
throw new IllegalArgumentException("Duplicate row key \"" + key + "\" in table with index " + i);
}
r++;
mon.setProgress(r / (double) rowCount, "Checking tables, row " + r + "/" + rowCount + " (\"" + row.getKey() + "\")");
}
mon.checkCanceled();
}
try {
check.checkForDuplicates();
} catch (DuplicateKeyException | IOException ex) {
throw new IllegalArgumentException("Duplicate row keys");
}
}
use of org.knime.core.util.DuplicateKeyException in project knime-core by knime.
the class NewJoinerNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable leftTable = inData[0];
BufferedDataTable rightTable = inData[1];
m_secondTableColIndex = rightTable.getDataTableSpec().findColumnIndex(m_settings.secondTableColumn());
if (!NewJoinerSettings.ROW_KEY_IDENTIFIER.equals(m_settings.secondTableColumn()) && (m_secondTableColIndex == -1)) {
throw new InvalidSettingsException("Join column '" + m_settings.secondTableColumn() + "' not found in second table");
}
BufferedDataContainer dc = exec.createDataContainer(createSpec(new DataTableSpec[] { leftTable.getDataTableSpec(), rightTable.getDataTableSpec() }));
// create a row with missing values for left or full outer joins
DataCell[] missingCells = new DataCell[rightTable.getDataTableSpec().getNumColumns()];
for (int i = 0; i < missingCells.length; i++) {
missingCells[i] = DataType.getMissingCell();
}
DataRow missingRow = new DefaultRow(new RowKey(""), missingCells);
exec.setMessage("Reading first table");
// build a map for sorting the second table which maps the row keys of
// the first table to their row number
final Map<String, Integer> orderMap = buildTableOrdering(leftTable, exec);
Comparator<DataRow> rowComparator = new Comparator<DataRow>() {
public int compare(final DataRow o1, final DataRow o2) {
Integer k1 = orderMap.get(getRightJoinKey(o1));
Integer k2 = orderMap.get(getRightJoinKey(o2));
if ((k1 != null) && (k2 != null)) {
return k1 - k2;
} else if (k1 != null) {
return -1;
} else if (k2 != null) {
return 1;
} else {
return 0;
}
}
};
// sort the second table based on the key order from the first table
// non-matching rows are placed at the end
exec.setMessage("Sorting second table");
SortedTable rightSortedTable = new SortedTable(rightTable, rowComparator, false, exec.createSubExecutionContext(0.7));
Iterator<DataRow> lit = leftTable.iterator();
Iterator<DataRow> rit = rightSortedTable.iterator();
exec.setMessage("Joining tables");
final double max;
boolean lofj = false;
boolean rofj = false;
if (JoinMode.InnerJoin.equals(m_settings.joinMode())) {
max = Math.min(leftTable.getRowCount(), rightTable.getRowCount());
} else if (JoinMode.LeftOuterJoin.equals(m_settings.joinMode())) {
max = leftTable.getRowCount();
lofj = true;
} else if (JoinMode.RightOuterJoin.equals(m_settings.joinMode())) {
max = rightTable.getRowCount();
rofj = true;
} else {
max = Math.max(leftTable.getRowCount(), rightTable.getRowCount());
lofj = true;
rofj = true;
}
// now join the two tables
int p = 0;
DataRow lrow = lit.hasNext() ? lit.next() : null;
DataRow rrow = rit.hasNext() ? rit.next() : null;
String lkey = (lrow != null) ? lrow.getKey().getString() : null;
String rkey = (rrow != null) ? getRightJoinKey(rrow) : null;
outer: while ((lrow != null) && (rrow != null)) {
exec.checkCanceled();
String key = lkey.toString();
if (lkey.equals(rkey)) {
// loop over all matching rows in the second table
for (int i = 0; lkey.equals(rkey); i++) {
dc.addRowToTable(createJoinedRow(key, lrow, rrow));
exec.setProgress(0.7 + 0.3 * p++ / max);
if (!rit.hasNext()) {
rrow = null;
break outer;
}
rrow = rit.next();
rkey = getRightJoinKey(rrow);
key = lkey.toString() + m_settings.keySuffix() + i;
}
} else if (lofj) {
// no matching row from right table => fill with missing values
// if left or full outer join is required
dc.addRowToTable(createJoinedRow(lkey.toString(), lrow, missingRow));
exec.setProgress(0.7 + 0.3 * p++ / max);
}
if (!lit.hasNext()) {
break outer;
}
lrow = lit.next();
lkey = lrow.getKey().getString();
}
if (lit.hasNext() && lofj) {
// outer join
while (lit.hasNext()) {
lrow = lit.next();
dc.addRowToTable(createJoinedRow(lrow.getKey().toString(), lrow, missingRow));
exec.setProgress(0.7 + 0.3 * p++ / max);
}
} else if ((rrow != null) && rofj) {
// add remaining non-joined rows from the right table if right or
// full outer join
missingCells = new DataCell[leftTable.getDataTableSpec().getNumColumns()];
for (int i = 0; i < missingCells.length; i++) {
missingCells[i] = DataType.getMissingCell();
}
missingRow = new DefaultRow(new RowKey(""), missingCells);
boolean warningSet = false;
while (true) {
String key = rrow.getKey().toString();
int c = 0;
while (true) {
try {
dc.addRowToTable(createJoinedRow(key, missingRow, rrow));
exec.setProgress(0.7 + 0.3 * p++ / max);
break;
} catch (DuplicateKeyException ex) {
if (++c > 10) {
throw ex;
}
key = key + "_r";
if (!warningSet) {
setWarningMessage("Encountered and fixed some " + "duplicate row keys at the end of the " + "table");
warningSet = true;
}
}
}
if (!rit.hasNext()) {
break;
}
rrow = rit.next();
}
}
dc.close();
return new BufferedDataTable[] { dc.getTable() };
}
Aggregations