use of org.knime.core.util.DuplicateKeyException in project knime-core by knime.
the class VariableFileReaderNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
Map<String, FlowVariable> stack = createStack(m_frSettings.getVariableName());
VariableFileReaderNodeSettings settings = m_frSettings.createSettingsFrom(stack);
LOGGER.info("Preparing to read from '" + m_frSettings.getDataFileLocation().toString() + "'.");
// check again the settings - especially file existence (under Linux
// files could be deleted/renamed since last config-call...
SettingsStatus status = settings.getStatusOfSettings(true, null);
if (status.getNumOfErrors() > 0) {
throw new InvalidSettingsException(status.getAllErrorMessages(10));
}
DataTableSpec tSpec = settings.createDataTableSpec();
FileTable fTable = new FileTable(tSpec, settings, settings.getSkippedColumns(), exec);
// create a DataContainer and fill it with the rows read. It is faster
// then reading the file every time (for each row iterator), and it
// collects the domain for each column for us. Also, if things fail,
// the error message is printed during file reader execution (were it
// belongs to) and not some time later when a node uses the row
// iterator from the file table.
BufferedDataContainer c = exec.createDataContainer(fTable.getDataTableSpec(), /* initDomain= */
true);
int row = 0;
FileRowIterator it = fTable.iterator();
try {
if (it.getZipEntryName() != null) {
// seems we are reading a ZIP archive.
LOGGER.info("Reading entry '" + it.getZipEntryName() + "' from the specified ZIP archive.");
}
while (it.hasNext()) {
row++;
DataRow next = it.next();
String message = "Caching row #" + row + " (\"" + next.getKey() + "\")";
exec.setMessage(message);
exec.checkCanceled();
c.addRowToTable(next);
}
if (it.zippedSourceHasMoreEntries()) {
// after reading til the end of the file this returns a valid
// result
setWarningMessage("Source is a ZIP archive with multiple " + "entries. Only reading first entry!");
}
} catch (DuplicateKeyException dke) {
String msg = dke.getMessage();
if (msg == null) {
msg = "Duplicate row IDs";
}
msg += ". Consider making IDs unique in the advanced settings.";
DuplicateKeyException newDKE = new DuplicateKeyException(msg);
newDKE.initCause(dke);
throw newDKE;
} finally {
c.close();
}
// user settings allow for truncating the table
if (it.iteratorEndedEarly()) {
setWarningMessage("Data was truncated due to user settings.");
}
BufferedDataTable out = c.getTable();
// closes all sources.
fTable.dispose();
return new BufferedDataTable[] { out };
}
use of org.knime.core.util.DuplicateKeyException in project knime-core by knime.
the class DataContainerTest method testDuplicateKey.
/**
* method being tested: addRowToTable().
*/
public final void testDuplicateKey() {
String[] colNames = new String[] { "Column 1", "Column 2" };
DataType[] colTypes = new DataType[] { StringCell.TYPE, IntCell.TYPE };
DataTableSpec spec1 = new DataTableSpec(colNames, colTypes);
DataContainer c = new DataContainer(spec1);
RowKey r1Key = new RowKey("row 1");
DataCell r1Cell1 = new StringCell("Row 1, Cell 1");
DataCell r1Cell2 = new IntCell(12);
DataRow r1 = new DefaultRow(r1Key, new DataCell[] { r1Cell1, r1Cell2 });
RowKey r2Key = new RowKey("row 2");
DataCell r2Cell1 = new StringCell("Row 2, Cell 1");
DataCell r2Cell2 = new IntCell(22);
DataRow r2 = new DefaultRow(r2Key, new DataCell[] { r2Cell1, r2Cell2 });
c.addRowToTable(r1);
c.addRowToTable(r2);
// add row 1 twice
try {
c.addRowToTable(r1);
c.close();
// ... eh eh, you don't do this
fail("Expected " + DuplicateKeyException.class + " not thrown");
} catch (DuplicateKeyException e) {
NodeLogger.getLogger(getClass()).debug("Got expected exception: " + e.getClass(), e);
}
}
use of org.knime.core.util.DuplicateKeyException in project knime-core by knime.
the class GroupLoopStartNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
// /////////////////////////
//
// / DATA TABLES (SORTING)
//
// /////////////////////////
BufferedDataTable table = inData[0];
DataTableSpec spec = table.getDataTableSpec();
if (table.size() <= 0) {
m_endLoop = true;
}
// parameters
m_includedColIndices = getIncludedColIndices(table.getDataTableSpec());
boolean checkDuplicates = m_sortedInputTableModel.getBooleanValue();
// remember table and sort table if necessary
if (m_iteration == 0) {
assert getLoopEndNode() == null : "1st iteration but end node set";
m_table = table;
m_spec = m_table.getDataTableSpec();
// sort if not already sorted
if (!m_sortedInputTableModel.getBooleanValue()) {
// asc
final String[] includes = m_filterGroupColModel.applyTo(spec).getIncludes();
boolean[] sortAsc = new boolean[includes.length];
Arrays.fill(sortAsc, true);
BufferedDataTableSorter tableSorter = new BufferedDataTableSorter(table, Arrays.asList(includes), sortAsc, false);
m_sortedTable = tableSorter.sort(exec);
} else {
// no sort necessary
m_sortedTable = table;
}
m_iterator = m_sortedTable.iterator();
} else {
assert getLoopEndNode() != null : "No end node set";
assert table == m_table : "Input tables differ between iterations";
}
// /////////////////////////
//
// / INIT
//
// /////////////////////////
BufferedDataContainer cont = exec.createDataContainer(table.getSpec());
// create new duplicate checker if null
if (m_duplicateChecker == null) {
m_duplicateChecker = new DuplicateChecker();
}
// initialize grouping states if null
if (m_currentGroupingState == null) {
m_currentGroupingState = new GroupingState("", false, null);
}
m_lastGroupingState = m_currentGroupingState;
// add now to new group
if (m_lastRow != null) {
cont.addRowToTable(m_lastRow);
}
// if the final row has been reached and added set end loop flag
if (m_isFinalRow) {
m_endLoop = true;
}
// walk trough input table and group data
// as long as new row fits into the current group or there are no more
// rows left.
boolean groupEnd = false;
while (!groupEnd && m_iterator.hasNext()) {
DataRow row = m_iterator.next();
// get grouping state according to new row
m_currentGroupingState = getGroupingState(row);
groupEnd = m_currentGroupingState.isGroupEnd();
// to duplicate checker.
if (m_lastRow == null) {
m_lastGroupingState = m_currentGroupingState;
if (checkDuplicates) {
m_duplicateChecker.addKey(m_currentGroupingState.getGroupIdentifier());
}
}
m_lastRow = row;
// if group end has not been reached add row
if (!groupEnd) {
cont.addRowToTable(row);
m_lastGroupingState = m_currentGroupingState;
// if group end has been reached add identifier of new group to
// duplicate checker
} else {
if (checkDuplicates) {
try {
m_duplicateChecker.addKey(m_currentGroupingState.getGroupIdentifier());
} catch (DuplicateKeyException e) {
throw new DuplicateKeyException("Input table was " + "not sorted, found duplicate (group identifier:" + m_currentGroupingState.getGroupIdentifier() + ")");
}
}
}
// which row will be added.
if (!m_iterator.hasNext() && !m_isFinalRow) {
m_isFinalRow = true;
// thus end loop
if (!groupEnd) {
m_endLoop = true;
}
}
}
cont.close();
if (m_endLoop) {
// check for duplicates and throw exception if duplicate exist
try {
m_duplicateChecker.checkForDuplicates();
} catch (DuplicateKeyException e) {
throw new DuplicateKeyException("Input table was not sorted, found duplicate group identifier " + e.getKey());
} finally {
m_duplicateChecker.clear();
m_duplicateChecker = null;
}
}
// push variables
pushFlowVariableInt("currentIteration", m_iteration);
pushGroupColumnValuesAsFlowVariables(m_lastGroupingState);
pushFlowVariableString("groupIdentifier", m_lastGroupingState.getGroupIdentifier());
m_iteration++;
return new BufferedDataTable[] { cont.getTable() };
}
use of org.knime.core.util.DuplicateKeyException in project knime-core by knime.
the class FileReaderNodeModel method createStreamableOperator.
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
return new StreamableOperator() {
@Override
public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
assert inputs.length == 0;
LOGGER.info("Preparing to read from '" + m_frSettings.getDataFileLocation().toString() + "'.");
// check again the settings - especially file existence (under Linux
// files could be deleted/renamed since last config-call...
SettingsStatus status = m_frSettings.getStatusOfSettings(true, null);
if (status.getNumOfErrors() > 0) {
throw new InvalidSettingsException(status.getAllErrorMessages(10));
}
DataTableSpec tSpec = m_frSettings.createDataTableSpec();
FileTable fTable = new FileTable(tSpec, m_frSettings, m_frSettings.getSkippedColumns(), exec);
// data output port
RowOutput rowOutput = (RowOutput) outputs[0];
int row = 0;
FileRowIterator it = fTable.iterator();
try {
if (it.getZipEntryName() != null) {
// seems we are reading a ZIP archive.
LOGGER.info("Reading entry '" + it.getZipEntryName() + "' from the specified ZIP archive.");
}
while (it.hasNext()) {
row++;
DataRow next = it.next();
final int finalRow = row;
exec.setMessage(() -> "Reading row #" + finalRow + " (\"" + next.getKey() + "\")");
exec.checkCanceled();
rowOutput.push(next);
}
rowOutput.close();
if (it.zippedSourceHasMoreEntries()) {
// after reading til the end of the file this returns a valid
// result
setWarningMessage("Source is a ZIP archive with multiple " + "entries. Only reading first entry!");
}
} catch (DuplicateKeyException dke) {
String msg = dke.getMessage();
if (msg == null) {
msg = "Duplicate row IDs";
}
msg += ". Consider making IDs unique in the advanced settings.";
DuplicateKeyException newDKE = new DuplicateKeyException(msg);
newDKE.initCause(dke);
throw newDKE;
}
// user settings allow for truncating the table
if (it.iteratorEndedEarly()) {
setWarningMessage("Data was truncated due to user settings.");
}
// closes all sources.
fTable.dispose();
}
};
}
use of org.knime.core.util.DuplicateKeyException in project knime-core by knime.
the class DataContainer method close.
/**
* Closes container and creates table that can be accessed by <code>getTable()</code>. Successive calls of
* <code>addRowToTable</code> will fail with an exception.
*
* @throws IllegalStateException If container is not open.
* @throws DuplicateKeyException If the final check for duplicate row keys fails.
* @throws DataContainerException If the duplicate check fails for an unknown IO problem
*/
public void close() {
if (isClosed()) {
return;
}
if (m_buffer == null) {
m_buffer = m_bufferCreator.createBuffer(m_spec, m_maxRowsInMemory, createInternalBufferID(), getGlobalTableRepository(), getLocalTableRepository(), getFileStoreHandler());
}
if (!m_isSynchronousWrite) {
try {
offerToAsynchronousQueue(CONTAINER_CLOSE);
m_asyncAddFuture.get();
checkAsyncWriteThrowable();
} catch (InterruptedException e) {
throw new DataContainerException("Adding rows to table was interrupted", e);
} catch (ExecutionException e) {
throw new DataContainerException("Adding rows to table threw exception", e);
}
}
// create table spec _after_ all_ rows have been added (i.e. wait for
// asynchronous write thread to finish)
DataTableSpec finalSpec = m_domainCreator.createSpec();
m_buffer.close(finalSpec);
try {
m_duplicateChecker.checkForDuplicates();
} catch (IOException ioe) {
throw new DataContainerException("Failed to check for duplicate row IDs", ioe);
} catch (DuplicateKeyException dke) {
String key = dke.getKey();
throw new DuplicateKeyException("Found duplicate row ID \"" + key + "\" (at unknown position)", key);
}
m_table = new ContainerTable(m_buffer);
getLocalTableRepository().put(m_table.getBufferID(), m_table);
m_buffer = null;
m_spec = null;
m_duplicateChecker.clear();
m_duplicateChecker = null;
m_domainCreator = null;
m_size = -1;
}
Aggregations