use of org.knime.core.data.RowIterator in project knime-core by knime.
the class DatabaseWriterConnection method updateTable.
/**
* Create connection to update table in database.
* @param dbConn a database connection object
* @param data The data to write.
* @param setColumns columns part of the SET clause
* @param whereColumns columns part of the WHERE clause
* @param updateStatus int array of length data#getRowCount; will be filled with
* update info from the database
* @param table name of table to write
* @param exec Used the cancel writing.
* @param cp {@link CredentialsProvider} providing user/password
* @param batchSize number of rows updated in one batch
* @return error string or null, if non
* @throws Exception if connection could not be established
* @since 2.7
*/
public static final String updateTable(final DatabaseConnectionSettings dbConn, final String table, final BufferedDataTable data, final String[] setColumns, final String[] whereColumns, final int[] updateStatus, final ExecutionMonitor exec, final CredentialsProvider cp, final int batchSize) throws Exception {
// synchronized (dbConn.syncConnection(conn)) {
return dbConn.execute(cp, conn -> {
exec.setMessage("Start updating rows in database...");
final DataTableSpec spec = data.getDataTableSpec();
// create query connection object
final StringBuilder query = new StringBuilder("UPDATE " + table + " SET");
for (int i = 0; i < setColumns.length; i++) {
if (i > 0) {
query.append(",");
}
final String newColumnName = replaceColumnName(setColumns[i], dbConn);
query.append(" " + newColumnName + " = ?");
}
query.append(" WHERE");
for (int i = 0; i < whereColumns.length; i++) {
if (i > 0) {
query.append(" AND");
}
final String newColumnName = replaceColumnName(whereColumns[i], dbConn);
query.append(" " + newColumnName + " = ?");
}
// problems writing more than 13 columns. the prepare statement
// ensures that we can set the columns directly row-by-row, the
// database will handle the commit
long rowCount = data.size();
int cnt = 1;
int errorCnt = 0;
int allErrors = 0;
// count number of rows added to current batch
int curBatchSize = 0;
// selected timezone
final TimeZone timezone = dbConn.getTimeZone();
LOGGER.debug("Executing SQL statement as prepareStatement: " + query);
final PreparedStatement stmt = conn.prepareStatement(query.toString());
// remember auto-commit flag
final boolean autoCommit = conn.getAutoCommit();
DatabaseConnectionSettings.setAutoCommit(conn, false);
try {
for (RowIterator it = data.iterator(); it.hasNext(); cnt++) {
exec.checkCanceled();
exec.setProgress(1.0 * cnt / rowCount, "Row " + "#" + cnt);
final DataRow row = it.next();
// SET columns
for (int i = 0; i < setColumns.length; i++) {
final int dbIdx = i + 1;
final int columnIndex = spec.findColumnIndex(setColumns[i]);
final DataColumnSpec cspec = spec.getColumnSpec(columnIndex);
final DataCell cell = row.getCell(columnIndex);
fillStatement(stmt, dbIdx, cspec, cell, timezone, null);
}
// WHERE columns
for (int i = 0; i < whereColumns.length; i++) {
final int dbIdx = i + 1 + setColumns.length;
final int columnIndex = spec.findColumnIndex(whereColumns[i]);
final DataColumnSpec cspec = spec.getColumnSpec(columnIndex);
final DataCell cell = row.getCell(columnIndex);
fillStatement(stmt, dbIdx, cspec, cell, timezone, null);
}
// if batch mode
if (batchSize > 1) {
// a new row will be added
stmt.addBatch();
}
curBatchSize++;
// if batch size equals number of row in batch or input table at end
if ((curBatchSize == batchSize) || !it.hasNext()) {
curBatchSize = 0;
try {
// write batch
if (batchSize > 1) {
int[] status = stmt.executeBatch();
for (int i = 0; i < status.length; i++) {
updateStatus[cnt - status.length + i] = status[i];
}
} else {
// or write single row
int status = stmt.executeUpdate();
updateStatus[cnt - 1] = status;
}
} catch (Throwable t) {
// anyway.
if (!conn.getAutoCommit()) {
conn.commit();
}
allErrors++;
if (errorCnt > -1) {
final String errorMsg;
if (batchSize > 1) {
errorMsg = "Error while updating rows #" + (cnt - batchSize) + " - #" + cnt + ", reason: " + t.getMessage();
} else {
errorMsg = "Error while updating row #" + cnt + " (" + row.getKey() + "), reason: " + t.getMessage();
}
exec.setMessage(errorMsg);
if (errorCnt++ < 10) {
LOGGER.warn(errorMsg);
} else {
errorCnt = -1;
LOGGER.warn(errorMsg + " - more errors...", t);
}
}
} finally {
// clear batch if in batch mode
if (batchSize > 1) {
stmt.clearBatch();
}
}
}
}
if (!conn.getAutoCommit()) {
conn.commit();
}
if (allErrors == 0) {
return null;
} else {
return "Errors \"" + allErrors + "\" updating " + rowCount + " rows.";
}
} finally {
DatabaseConnectionSettings.setAutoCommit(conn, autoCommit);
stmt.close();
}
});
}
use of org.knime.core.data.RowIterator in project knime-core by knime.
the class DatabaseWriterConnection method deleteRows.
/**
* Create connection to update table in database.
* @param dbConn a database connection object
* @param data The data to write.
* @param whereColumns columns part of the WHERE clause
* @param deleteStatus int array of length data#getRowCount; will be filled with
* the number of rows effected
* @param table name of table to write
* @param exec Used the cancel writing.
* @param cp {@link CredentialsProvider} providing user/password
* @param batchSize number of rows deleted in one batch
* @return error string or null, if non
* @throws Exception if connection could not be established
* @since 2.7
*/
public static final String deleteRows(final DatabaseConnectionSettings dbConn, final String table, final BufferedDataTable data, final String[] whereColumns, final int[] deleteStatus, final ExecutionMonitor exec, final CredentialsProvider cp, final int batchSize) throws Exception {
// synchronized (dbConn.syncConnection(conn)) {
return dbConn.execute(cp, conn -> {
exec.setMessage("Start deleting rows from database...");
final DataTableSpec spec = data.getDataTableSpec();
// create query connection object
final StringBuilder query = new StringBuilder("DELETE FROM " + table + " WHERE");
for (int i = 0; i < whereColumns.length; i++) {
if (i > 0) {
query.append(" AND");
}
final String newColumnName = replaceColumnName(whereColumns[i], dbConn);
query.append(" " + newColumnName + " = ?");
}
// problems writing more than 13 columns. the prepare statement
// ensures that we can set the columns directly row-by-row, the
// database will handle the commit
long rowCount = data.size();
int cnt = 1;
int errorCnt = 0;
int allErrors = 0;
// count number of rows added to current batch
int curBatchSize = 0;
// selected timezone
final TimeZone timezone = dbConn.getTimeZone();
LOGGER.debug("Executing SQL statement as prepareStatement: " + query);
final PreparedStatement stmt = conn.prepareStatement(query.toString());
// remember auto-commit flag
final boolean autoCommit = conn.getAutoCommit();
DatabaseConnectionSettings.setAutoCommit(conn, false);
try {
for (RowIterator it = data.iterator(); it.hasNext(); cnt++) {
exec.checkCanceled();
exec.setProgress(1.0 * cnt / rowCount, "Row " + "#" + cnt);
final DataRow row = it.next();
// WHERE columns
for (int i = 0; i < whereColumns.length; i++) {
final int dbIdx = i + 1;
final int columnIndex = spec.findColumnIndex(whereColumns[i]);
final DataColumnSpec cspec = spec.getColumnSpec(columnIndex);
final DataCell cell = row.getCell(columnIndex);
fillStatement(stmt, dbIdx, cspec, cell, timezone, null);
}
// if batch mode
if (batchSize > 1) {
// a new row will be added
stmt.addBatch();
}
curBatchSize++;
// if batch size equals number of row in batch or input table at end
if ((curBatchSize == batchSize) || !it.hasNext()) {
curBatchSize = 0;
try {
// write batch
if (batchSize > 1) {
int[] status = stmt.executeBatch();
for (int i = 0; i < status.length; i++) {
deleteStatus[cnt - status.length + i] = status[i];
}
} else {
// or write single row
int status = stmt.executeUpdate();
deleteStatus[cnt - 1] = status;
}
} catch (Throwable t) {
// anyway.
if (!conn.getAutoCommit()) {
conn.commit();
}
allErrors++;
if (errorCnt > -1) {
final String errorMsg;
if (batchSize > 1) {
errorMsg = "Error while deleting rows #" + (cnt - batchSize) + " - #" + cnt + ", reason: " + t.getMessage();
} else {
errorMsg = "Error while deleting row #" + cnt + " (" + row.getKey() + "), reason: " + t.getMessage();
}
exec.setMessage(errorMsg);
if (errorCnt++ < 10) {
LOGGER.warn(errorMsg);
} else {
errorCnt = -1;
LOGGER.warn(errorMsg + " - more errors...", t);
}
}
} finally {
// clear batch if in batch mode
if (batchSize > 1) {
stmt.clearBatch();
}
}
}
}
if (!conn.getAutoCommit()) {
conn.commit();
}
if (allErrors == 0) {
return null;
} else {
return "Errors \"" + allErrors + "\" deleting " + rowCount + " rows.";
}
} finally {
DatabaseConnectionSettings.setAutoCommit(conn, autoCommit);
stmt.close();
}
});
}
use of org.knime.core.data.RowIterator in project knime-core by knime.
the class NormalizerApplyNodeModel method createStreamableOperator.
/**
* {@inheritDoc}
*/
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
if (getNrOutPorts() == 2) {
// by default call the default implementation of this method
return super.createStreamableOperator(partitionInfo, inSpecs);
} else {
return new StreamableOperator() {
@Override
public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
assert outputs.length == 1;
NormalizerPortObject model = (NormalizerPortObject) ((PortObjectInput) inputs[0]).getPortObject();
RowInput rowInput = (RowInput) inputs[1];
AffineTransTable t = new AffineTransTable(rowInput, getAffineTrans(model.getConfiguration()));
RowOutput rowOutput = (RowOutput) outputs[0];
RowIterator it = t.iterator();
while (it.hasNext()) {
rowOutput.push(it.next());
}
if (t.getErrorMessage() != null) {
// TODO collect error message from remote nodes if run distributed
setWarningMessage(t.getErrorMessage());
}
rowInput.close();
rowOutput.close();
}
};
}
}
use of org.knime.core.data.RowIterator in project knime-core by knime.
the class AutoBinner method createEdgesFromQuantiles.
@SuppressWarnings("null")
private static double[] createEdgesFromQuantiles(final BufferedDataTable data, final ExecutionContext exec, final double[] sampleQuantiles) throws CanceledExecutionException {
double[] edges = new double[sampleQuantiles.length];
long n = data.size();
long c = 0;
int cc = 0;
RowIterator iter = data.iterator();
DataRow rowQ = null;
DataRow rowQ1 = null;
if (iter.hasNext()) {
rowQ1 = iter.next();
rowQ = rowQ1;
}
for (double p : sampleQuantiles) {
double h = (n - 1) * p + 1;
int q = (int) Math.floor(h);
while ((1.0 == p || c < q) && iter.hasNext()) {
rowQ = rowQ1;
rowQ1 = iter.next();
c++;
exec.setProgress(c / (double) n);
exec.checkCanceled();
}
rowQ = 1.0 != p ? rowQ : rowQ1;
final DataCell xqCell = rowQ.getCell(0);
final DataCell xq1Cell = rowQ1.getCell(0);
// data first?)
if (xqCell.isMissing() || xq1Cell.isMissing()) {
throw new RuntimeException("Missing values not support for " + "quantile calculation (error in row \"" + rowQ1.getKey() + "\")");
}
// for quantile calculation see also
// http://en.wikipedia.org/wiki/
// Quantile#Estimating_the_quantiles_of_a_population.
// this implements R-7
double xq = ((DoubleValue) xqCell).getDoubleValue();
double xq1 = ((DoubleValue) xq1Cell).getDoubleValue();
double quantile = xq + (h - q) * (xq1 - xq);
edges[cc] = quantile;
cc++;
}
return edges;
}
use of org.knime.core.data.RowIterator in project knime-core by knime.
the class CAIMDiscretizationNodeModel method createAllIntervalBoundaries.
/**
* Sorts the data table in ascending order on the given column, then all
* distinct values are determined and finally a new table is created that
* holds the minimum, the maximum value and the midpoints of all adjacent
* values. These represent all possible boundaries.
*
* @param table the table with the data
* @param columnIndex the column of interest
* @param exec the execution context to set the progress
*/
private BoundaryScheme createAllIntervalBoundaries(final BufferedDataTable table, final int columnIndex, final ExecutionContext exec) throws Exception {
// sort the data according to the column index
List<String> sortColumn = new ArrayList<String>();
sortColumn.add(table.getDataTableSpec().getColumnSpec(columnIndex).getName());
// according to the class column
if (m_reducedBoundaries) {
sortColumn.add(m_classColumnName.getStringValue());
}
// in ascending order
// in case the class column is not used as second sort criteria
// the sort order of field 2 is ignored
boolean[] sortOrder = new boolean[sortColumn.size()];
Arrays.fill(sortOrder, true);
SortedTable sortedTable = new SortedTable(table, sortColumn, sortOrder, m_sortInMemory.getBooleanValue(), exec);
// the first different value is the minimum value of the sorted list
RowIterator rowIterator = sortedTable.iterator();
// get the first valid value (non-missing
double lastDifferentValue = Double.NaN;
String firstClassValueOfCurrentValue = null;
while (rowIterator.hasNext()) {
DataRow firstRow = rowIterator.next();
if (!firstRow.getCell(columnIndex).isMissing()) {
lastDifferentValue = ((DoubleValue) firstRow.getCell(columnIndex)).getDoubleValue();
// also remember the corresponding class value
firstClassValueOfCurrentValue = firstRow.getCell(m_classifyColumnIndex).toString();
break;
}
}
// needed to create a already passed candidate boundary due
// to a class value change
double lastChangeValueWithoutNewBoundary = Double.NaN;
// create the head of the linked double list
// marked by NaN
LinkedDouble head = new LinkedDouble(Double.NEGATIVE_INFINITY);
// set the last added element
LinkedDouble lastAdded = head;
// count the number of boundaries
int numBoundaries = 0;
// to determine if the class has changed during a single value sequence
boolean hasClassChanged = false;
while (rowIterator.hasNext()) {
DataRow row = rowIterator.next();
DataCell cell = row.getCell(columnIndex);
double value = ((DoubleValue) cell).getDoubleValue();
String classValue = row.getCell(m_classifyColumnIndex).toString();
if (!hasClassChanged && !firstClassValueOfCurrentValue.equals(classValue)) {
hasClassChanged = true;
// i.e. this value is not necessary any more
if (value != lastDifferentValue) {
lastChangeValueWithoutNewBoundary = Double.NaN;
}
}
// as long as the values do not change no boundary is added
if (value != lastDifferentValue) {
// since the last value change
if (hasClassChanged) {
// if the class value has changed since this time
if (!Double.isNaN(lastChangeValueWithoutNewBoundary)) {
// a new boundary is the midpoint
double newBoundary = (lastDifferentValue + lastChangeValueWithoutNewBoundary) / 2.0D;
// add the new midpoint boundary to the linked list
lastAdded.m_next = new LinkedDouble(newBoundary);
numBoundaries++;
lastAdded.m_next.m_previous = lastAdded;
lastAdded = lastAdded.m_next;
}
// a new boundary is the midpoint
double newBoundary = (value + lastDifferentValue) / 2.0D;
// add the new midpoint boundary to the linked list
lastAdded.m_next = new LinkedDouble(newBoundary);
numBoundaries++;
lastAdded.m_next.m_previous = lastAdded;
lastAdded = lastAdded.m_next;
// reset the value
lastChangeValueWithoutNewBoundary = Double.NaN;
} else {
lastChangeValueWithoutNewBoundary = lastDifferentValue;
}
// remember the value change
lastDifferentValue = value;
// remember the first class value of this first value
firstClassValueOfCurrentValue = classValue;
// reset the hasClassChanged value
hasClassChanged = false;
}
}
return new BoundaryScheme(head, numBoundaries);
}
Aggregations