use of org.knime.core.data.DataRow in project knime-core by knime.
the class DBWriterImpl method deleteRows.
/**
* Create connection to update table in database.
* @param schema table schema name. Could be <code>null</code>.
* @param data The data to write.
* @param whereColumns columns part of the WHERE clause
* @param deleteStatus int array of length data#getRowCount; will be filled with
* the number of rows effected
* @param table name of table to write
* @param exec Used the cancel writing.
* @param cp {@link CredentialsProvider} providing user/password
* @param batchSize number of rows deleted in one batch
* @return error string or null, if non
* @throws Exception if connection could not be established
*/
@Override
public final String deleteRows(final String schema, final String table, final BufferedDataTable data, final String[] whereColumns, final int[] deleteStatus, final ExecutionMonitor exec, final CredentialsProvider cp, final int batchSize) throws Exception {
DatabaseConnectionSettings conSettings = getDatabaseConnectionSettings();
// synchronized (conSettings.syncConnection(conn)) {
return conSettings.execute(cp, conn -> {
exec.setMessage("Start deleting rows from database...");
final DataTableSpec spec = data.getDataTableSpec();
// create query connection object
final StringBuilder query = new StringBuilder("DELETE FROM " + table + " WHERE");
for (int i = 0; i < whereColumns.length; i++) {
if (i > 0) {
query.append(" AND");
}
final String newColumnName = replaceColumnName(whereColumns[i]);
query.append(" " + newColumnName + " = ?");
}
// problems writing more than 13 columns. the prepare statement
// ensures that we can set the columns directly row-by-row, the
// database will handle the commit
long rowCount = data.size();
int cnt = 1;
int errorCnt = 0;
int allErrors = 0;
// count number of rows added to current batch
int curBatchSize = 0;
// selected timezone
final TimeZone timezone = conSettings.getTimeZone();
LOGGER.debug("Executing SQL statement as prepareStatement: " + query);
final PreparedStatement stmt = conn.prepareStatement(query.toString());
// remember auto-commit flag
final boolean autoCommit = conn.getAutoCommit();
DatabaseConnectionSettings.setAutoCommit(conn, false);
try {
for (RowIterator it = data.iterator(); it.hasNext(); cnt++) {
exec.checkCanceled();
exec.setProgress(1.0 * cnt / rowCount, "Row " + "#" + cnt);
final DataRow row = it.next();
// WHERE columns
for (int i = 0; i < whereColumns.length; i++) {
final int dbIdx = i + 1;
final int columnIndex = spec.findColumnIndex(whereColumns[i]);
final DataColumnSpec cspec = spec.getColumnSpec(columnIndex);
final DataCell cell = row.getCell(columnIndex);
fillStatement(stmt, dbIdx, cspec, cell, timezone, null);
}
// if batch mode
if (batchSize > 1) {
// a new row will be added
stmt.addBatch();
}
curBatchSize++;
// if batch size equals number of row in batch or input table at end
if ((curBatchSize == batchSize) || !it.hasNext()) {
curBatchSize = 0;
try {
// write batch
if (batchSize > 1) {
int[] status = stmt.executeBatch();
for (int i = 0; i < status.length; i++) {
deleteStatus[cnt - status.length + i] = status[i];
}
} else {
// or write single row
int status = stmt.executeUpdate();
deleteStatus[cnt - 1] = status;
}
} catch (Throwable t) {
// anyway.
if (!conn.getAutoCommit()) {
conn.commit();
}
allErrors++;
if (errorCnt > -1) {
final String errorMsg;
if (batchSize > 1) {
errorMsg = "Error while deleting rows #" + (cnt - batchSize) + " - #" + cnt + ", reason: " + t.getMessage();
} else {
errorMsg = "Error while deleting row #" + cnt + " (" + row.getKey() + "), reason: " + t.getMessage();
}
exec.setMessage(errorMsg);
if (errorCnt++ < 10) {
LOGGER.warn(errorMsg);
} else {
errorCnt = -1;
LOGGER.warn(errorMsg + " - more errors...", t);
}
}
} finally {
// clear batch if in batch mode
if (batchSize > 1) {
stmt.clearBatch();
}
}
}
}
if (!conn.getAutoCommit()) {
conn.commit();
}
if (allErrors == 0) {
return null;
} else {
return "Errors \"" + allErrors + "\" deleting " + rowCount + " rows.";
}
} finally {
DatabaseConnectionSettings.setAutoCommit(conn, autoCommit);
stmt.close();
}
});
}
use of org.knime.core.data.DataRow in project knime-core by knime.
the class AbstractTableSorter method sortInMemory.
private DataTable sortInMemory(final ExecutionMonitor exec) throws CanceledExecutionException {
final DataTable dataTable = m_inputTable;
List<DataRow> rowList = new ArrayList<DataRow>();
int progress = 0;
final long rowCount = m_rowsInInputTable;
exec.setMessage("Reading data");
ExecutionMonitor readExec = exec.createSubProgress(0.5);
for (final DataRow r : dataTable) {
readExec.checkCanceled();
if (rowCount > 0) {
readExec.setProgress(progress / (double) rowCount, r.getKey().getString());
} else {
readExec.setMessage(r.getKey() + " (row " + progress + ")");
}
rowList.add(r);
progress++;
}
// "rowCount" as it might not be set)
if (rowList.size() <= 1) {
return m_inputTable;
}
exec.setMessage("Sorting");
Collections.sort(rowList, m_rowComparator);
exec.setMessage("Creating sorted table");
final DataContainer dc = createDataContainer(dataTable.getDataTableSpec(), false);
ExecutionMonitor writeExec = exec.createSubProgress(0.5);
progress = 0;
for (DataRow r : rowList) {
exec.checkCanceled();
if (rowCount > 0) {
writeExec.setProgress(progress / (double) rowCount, r.getKey().getString());
} else {
writeExec.setMessage(r.getKey() + " (row " + progress + ")");
}
dc.addRowToTable(r);
progress++;
}
dc.close();
return dc.getTable();
}
use of org.knime.core.data.DataRow in project knime-core by knime.
the class AbstractTableSorter method sortOnDisk.
/**
* Sorts the given data table using a disk-based k-way merge sort.
*
* @param dataTable the data table that sgetRowCounthould be sorted
* @param exec an execution context for reporting progress and creating BufferedDataContainers
* @throws CanceledExecutionException if the user has canceled execution
*/
private DataTable sortOnDisk(final ExecutionMonitor exec) throws CanceledExecutionException {
final DataTable dataTable = m_inputTable;
m_progress = 0.0;
m_incProgress = m_rowsInInputTable <= 0 ? -1.0 : 1.0 / (2.0 * m_rowsInInputTable);
long counter = createInitialChunks(exec, dataTable);
// (can't rely on global rowCount - might not be set)
if (counter <= 1) {
return m_inputTable;
}
exec.setMessage("Merging temporary tables");
// The final output container
// merge chunks until there are only so much left, as m_maxopencontainers
Iterator<DataRow> result = mergeChunks(exec, false);
// add results to the final container
// The final output container, leave it to the
// system to do the caching (bug 1809)
DataContainer resultContainer = createDataContainer(dataTable.getDataTableSpec(), false);
while (result.hasNext()) {
resultContainer.addRowToTable(result.next());
}
resultContainer.close();
return resultContainer.getTable();
}
use of org.knime.core.data.DataRow in project knime-core by knime.
the class PortObjectRepository method copy.
/**
* Copies the argument object by means of the associated serializer.
* @param object The port object to be copied.
* @param exec Host for BDTs being created
* @param progress For progress/cancelation
* @return The deep copy.
* @throws IOException In case of exceptions while accessing the streams
* @throws CanceledExecutionException If canceled.
*/
public static final PortObject copy(final PortObject object, final ExecutionContext exec, final ExecutionMonitor progress) throws IOException, CanceledExecutionException {
if (object instanceof BufferedDataTable) {
// need to copy the table cell by cell
// this is to workaround the standard knime philosophy according
// to which tables are referenced. A row-based copy will not work
// as it still will reference blobs
BufferedDataTable in = (BufferedDataTable) object;
BufferedDataContainer con = exec.createDataContainer(in.getSpec(), true, 0);
final long rowCount = in.size();
long row = 0;
boolean hasLoggedCloneProblem = false;
for (DataRow r : in) {
DataCell[] cells = new DataCell[r.getNumCells()];
for (int i = 0; i < cells.length; i++) {
// deserialize blob
DataCell c = r.getCell(i);
if (c instanceof BlobDataCell) {
try {
c = cloneBlobCell(c);
} catch (Exception e) {
if (!hasLoggedCloneProblem) {
LOGGER.warn("Can't clone blob object: " + e.getMessage(), e);
hasLoggedCloneProblem = true;
LOGGER.debug("Suppressing futher warnings.");
}
}
}
cells[i] = c;
}
con.addRowToTable(new DefaultRow(r.getKey(), cells));
progress.setProgress(row / (double) rowCount, "Copied row " + row + "/" + rowCount);
progress.checkCanceled();
row++;
}
con.close();
return con.getTable();
}
return Node.copyPortObject(object, exec);
}
use of org.knime.core.data.DataRow in project knime-core by knime.
the class StreamableFunction method runFinalInterwoven.
/**
* Helper function to run two {@link StreamableFunction}s that use the same input but different outputs.
*
* @param input the input
* @param func1 first streamable function
* @param output1 output for the first streamable function
* @param func2 second streamable function
* @param output2 output for the second streamable function
* @param exec for file store creation
* @throws Exception
* @since 3.1
*/
public static void runFinalInterwoven(final RowInput input, final StreamableFunction func1, final RowOutput output1, final StreamableFunction func2, final RowOutput output2, final ExecutionContext exec) throws Exception {
func1.init(exec);
func2.init(exec);
try {
DataRow inputRow;
long index = 0;
while ((inputRow = input.poll()) != null) {
output1.push(func1.compute(inputRow));
output2.push(func2.compute(inputRow));
exec.setMessage(String.format("Row %d (\"%s\"))", ++index, inputRow.getKey()));
}
input.close();
output1.close();
output2.close();
} finally {
func1.finish();
func2.finish();
}
}
Aggregations