Search in sources :

Example 61 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class DatabaseWriterConnection method updateTable.

/**
 * Create connection to update table in database.
 * @param dbConn a database connection object
 * @param data The data to write.
 * @param setColumns columns part of the SET clause
 * @param whereColumns columns part of the WHERE clause
 * @param updateStatus int array of length data#getRowCount; will be filled with
 *             update info from the database
 * @param table name of table to write
 * @param exec Used the cancel writing.
 * @param cp {@link CredentialsProvider} providing user/password
 * @param batchSize number of rows updated in one batch
 * @return error string or null, if non
 * @throws Exception if connection could not be established
 * @since 2.7
 */
public static final String updateTable(final DatabaseConnectionSettings dbConn, final String table, final BufferedDataTable data, final String[] setColumns, final String[] whereColumns, final int[] updateStatus, final ExecutionMonitor exec, final CredentialsProvider cp, final int batchSize) throws Exception {
    // synchronized (dbConn.syncConnection(conn)) {
    return dbConn.execute(cp, conn -> {
        exec.setMessage("Start updating rows in database...");
        final DataTableSpec spec = data.getDataTableSpec();
        // create query connection object
        final StringBuilder query = new StringBuilder("UPDATE " + table + " SET");
        for (int i = 0; i < setColumns.length; i++) {
            if (i > 0) {
                query.append(",");
            }
            final String newColumnName = replaceColumnName(setColumns[i], dbConn);
            query.append(" " + newColumnName + " = ?");
        }
        query.append(" WHERE");
        for (int i = 0; i < whereColumns.length; i++) {
            if (i > 0) {
                query.append(" AND");
            }
            final String newColumnName = replaceColumnName(whereColumns[i], dbConn);
            query.append(" " + newColumnName + " = ?");
        }
        // problems writing more than 13 columns. the prepare statement
        // ensures that we can set the columns directly row-by-row, the
        // database will handle the commit
        long rowCount = data.size();
        int cnt = 1;
        int errorCnt = 0;
        int allErrors = 0;
        // count number of rows added to current batch
        int curBatchSize = 0;
        // selected timezone
        final TimeZone timezone = dbConn.getTimeZone();
        LOGGER.debug("Executing SQL statement as prepareStatement: " + query);
        final PreparedStatement stmt = conn.prepareStatement(query.toString());
        // remember auto-commit flag
        final boolean autoCommit = conn.getAutoCommit();
        DatabaseConnectionSettings.setAutoCommit(conn, false);
        try {
            for (RowIterator it = data.iterator(); it.hasNext(); cnt++) {
                exec.checkCanceled();
                exec.setProgress(1.0 * cnt / rowCount, "Row " + "#" + cnt);
                final DataRow row = it.next();
                // SET columns
                for (int i = 0; i < setColumns.length; i++) {
                    final int dbIdx = i + 1;
                    final int columnIndex = spec.findColumnIndex(setColumns[i]);
                    final DataColumnSpec cspec = spec.getColumnSpec(columnIndex);
                    final DataCell cell = row.getCell(columnIndex);
                    fillStatement(stmt, dbIdx, cspec, cell, timezone, null);
                }
                // WHERE columns
                for (int i = 0; i < whereColumns.length; i++) {
                    final int dbIdx = i + 1 + setColumns.length;
                    final int columnIndex = spec.findColumnIndex(whereColumns[i]);
                    final DataColumnSpec cspec = spec.getColumnSpec(columnIndex);
                    final DataCell cell = row.getCell(columnIndex);
                    fillStatement(stmt, dbIdx, cspec, cell, timezone, null);
                }
                // if batch mode
                if (batchSize > 1) {
                    // a new row will be added
                    stmt.addBatch();
                }
                curBatchSize++;
                // if batch size equals number of row in batch or input table at end
                if ((curBatchSize == batchSize) || !it.hasNext()) {
                    curBatchSize = 0;
                    try {
                        // write batch
                        if (batchSize > 1) {
                            int[] status = stmt.executeBatch();
                            for (int i = 0; i < status.length; i++) {
                                updateStatus[cnt - status.length + i] = status[i];
                            }
                        } else {
                            // or write single row
                            int status = stmt.executeUpdate();
                            updateStatus[cnt - 1] = status;
                        }
                    } catch (Throwable t) {
                        // anyway.
                        if (!conn.getAutoCommit()) {
                            conn.commit();
                        }
                        allErrors++;
                        if (errorCnt > -1) {
                            final String errorMsg;
                            if (batchSize > 1) {
                                errorMsg = "Error while updating rows #" + (cnt - batchSize) + " - #" + cnt + ", reason: " + t.getMessage();
                            } else {
                                errorMsg = "Error while updating row #" + cnt + " (" + row.getKey() + "), reason: " + t.getMessage();
                            }
                            exec.setMessage(errorMsg);
                            if (errorCnt++ < 10) {
                                LOGGER.warn(errorMsg);
                            } else {
                                errorCnt = -1;
                                LOGGER.warn(errorMsg + " - more errors...", t);
                            }
                        }
                    } finally {
                        // clear batch if in batch mode
                        if (batchSize > 1) {
                            stmt.clearBatch();
                        }
                    }
                }
            }
            if (!conn.getAutoCommit()) {
                conn.commit();
            }
            if (allErrors == 0) {
                return null;
            } else {
                return "Errors \"" + allErrors + "\" updating " + rowCount + " rows.";
            }
        } finally {
            DatabaseConnectionSettings.setAutoCommit(conn, autoCommit);
            stmt.close();
        }
    });
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PreparedStatement(java.sql.PreparedStatement) DataRow(org.knime.core.data.DataRow) TimeZone(java.util.TimeZone) DataColumnSpec(org.knime.core.data.DataColumnSpec) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell)

Example 62 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class DatabaseWriterConnection method deleteRows.

/**
 * Create connection to update table in database.
 * @param dbConn a database connection object
 * @param data The data to write.
 * @param whereColumns columns part of the WHERE clause
 * @param deleteStatus int array of length data#getRowCount; will be filled with
 *             the number of rows effected
 * @param table name of table to write
 * @param exec Used the cancel writing.
 * @param cp {@link CredentialsProvider} providing user/password
 * @param batchSize number of rows deleted in one batch
 * @return error string or null, if non
 * @throws Exception if connection could not be established
 * @since 2.7
 */
public static final String deleteRows(final DatabaseConnectionSettings dbConn, final String table, final BufferedDataTable data, final String[] whereColumns, final int[] deleteStatus, final ExecutionMonitor exec, final CredentialsProvider cp, final int batchSize) throws Exception {
    // synchronized (dbConn.syncConnection(conn)) {
    return dbConn.execute(cp, conn -> {
        exec.setMessage("Start deleting rows from database...");
        final DataTableSpec spec = data.getDataTableSpec();
        // create query connection object
        final StringBuilder query = new StringBuilder("DELETE FROM " + table + " WHERE");
        for (int i = 0; i < whereColumns.length; i++) {
            if (i > 0) {
                query.append(" AND");
            }
            final String newColumnName = replaceColumnName(whereColumns[i], dbConn);
            query.append(" " + newColumnName + " = ?");
        }
        // problems writing more than 13 columns. the prepare statement
        // ensures that we can set the columns directly row-by-row, the
        // database will handle the commit
        long rowCount = data.size();
        int cnt = 1;
        int errorCnt = 0;
        int allErrors = 0;
        // count number of rows added to current batch
        int curBatchSize = 0;
        // selected timezone
        final TimeZone timezone = dbConn.getTimeZone();
        LOGGER.debug("Executing SQL statement as prepareStatement: " + query);
        final PreparedStatement stmt = conn.prepareStatement(query.toString());
        // remember auto-commit flag
        final boolean autoCommit = conn.getAutoCommit();
        DatabaseConnectionSettings.setAutoCommit(conn, false);
        try {
            for (RowIterator it = data.iterator(); it.hasNext(); cnt++) {
                exec.checkCanceled();
                exec.setProgress(1.0 * cnt / rowCount, "Row " + "#" + cnt);
                final DataRow row = it.next();
                // WHERE columns
                for (int i = 0; i < whereColumns.length; i++) {
                    final int dbIdx = i + 1;
                    final int columnIndex = spec.findColumnIndex(whereColumns[i]);
                    final DataColumnSpec cspec = spec.getColumnSpec(columnIndex);
                    final DataCell cell = row.getCell(columnIndex);
                    fillStatement(stmt, dbIdx, cspec, cell, timezone, null);
                }
                // if batch mode
                if (batchSize > 1) {
                    // a new row will be added
                    stmt.addBatch();
                }
                curBatchSize++;
                // if batch size equals number of row in batch or input table at end
                if ((curBatchSize == batchSize) || !it.hasNext()) {
                    curBatchSize = 0;
                    try {
                        // write batch
                        if (batchSize > 1) {
                            int[] status = stmt.executeBatch();
                            for (int i = 0; i < status.length; i++) {
                                deleteStatus[cnt - status.length + i] = status[i];
                            }
                        } else {
                            // or write single row
                            int status = stmt.executeUpdate();
                            deleteStatus[cnt - 1] = status;
                        }
                    } catch (Throwable t) {
                        // anyway.
                        if (!conn.getAutoCommit()) {
                            conn.commit();
                        }
                        allErrors++;
                        if (errorCnt > -1) {
                            final String errorMsg;
                            if (batchSize > 1) {
                                errorMsg = "Error while deleting rows #" + (cnt - batchSize) + " - #" + cnt + ", reason: " + t.getMessage();
                            } else {
                                errorMsg = "Error while deleting row #" + cnt + " (" + row.getKey() + "), reason: " + t.getMessage();
                            }
                            exec.setMessage(errorMsg);
                            if (errorCnt++ < 10) {
                                LOGGER.warn(errorMsg);
                            } else {
                                errorCnt = -1;
                                LOGGER.warn(errorMsg + " - more errors...", t);
                            }
                        }
                    } finally {
                        // clear batch if in batch mode
                        if (batchSize > 1) {
                            stmt.clearBatch();
                        }
                    }
                }
            }
            if (!conn.getAutoCommit()) {
                conn.commit();
            }
            if (allErrors == 0) {
                return null;
            } else {
                return "Errors \"" + allErrors + "\" deleting " + rowCount + " rows.";
            }
        } finally {
            DatabaseConnectionSettings.setAutoCommit(conn, autoCommit);
            stmt.close();
        }
    });
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PreparedStatement(java.sql.PreparedStatement) DataRow(org.knime.core.data.DataRow) TimeZone(java.util.TimeZone) DataColumnSpec(org.knime.core.data.DataColumnSpec) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell)

Example 63 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class NormalizerApplyNodeModel method createStreamableOperator.

/**
 * {@inheritDoc}
 */
@Override
public StreamableOperator createStreamableOperator(final PartitionInfo partitionInfo, final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    if (getNrOutPorts() == 2) {
        // by default call the default implementation of this method
        return super.createStreamableOperator(partitionInfo, inSpecs);
    } else {
        return new StreamableOperator() {

            @Override
            public void runFinal(final PortInput[] inputs, final PortOutput[] outputs, final ExecutionContext exec) throws Exception {
                assert outputs.length == 1;
                NormalizerPortObject model = (NormalizerPortObject) ((PortObjectInput) inputs[0]).getPortObject();
                RowInput rowInput = (RowInput) inputs[1];
                AffineTransTable t = new AffineTransTable(rowInput, getAffineTrans(model.getConfiguration()));
                RowOutput rowOutput = (RowOutput) outputs[0];
                RowIterator it = t.iterator();
                while (it.hasNext()) {
                    rowOutput.push(it.next());
                }
                if (t.getErrorMessage() != null) {
                    // TODO collect error message from remote nodes if run distributed
                    setWarningMessage(t.getErrorMessage());
                }
                rowInput.close();
                rowOutput.close();
            }
        };
    }
}
Also used : NormalizerPortObject(org.knime.base.data.normalize.NormalizerPortObject) RowOutput(org.knime.core.node.streamable.RowOutput) ExecutionContext(org.knime.core.node.ExecutionContext) StreamableOperator(org.knime.core.node.streamable.StreamableOperator) RowIterator(org.knime.core.data.RowIterator) AffineTransTable(org.knime.base.data.normalize.AffineTransTable) RowInput(org.knime.core.node.streamable.RowInput)

Example 64 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class AutoBinner method createEdgesFromQuantiles.

@SuppressWarnings("null")
private static double[] createEdgesFromQuantiles(final BufferedDataTable data, final ExecutionContext exec, final double[] sampleQuantiles) throws CanceledExecutionException {
    double[] edges = new double[sampleQuantiles.length];
    long n = data.size();
    long c = 0;
    int cc = 0;
    RowIterator iter = data.iterator();
    DataRow rowQ = null;
    DataRow rowQ1 = null;
    if (iter.hasNext()) {
        rowQ1 = iter.next();
        rowQ = rowQ1;
    }
    for (double p : sampleQuantiles) {
        double h = (n - 1) * p + 1;
        int q = (int) Math.floor(h);
        while ((1.0 == p || c < q) && iter.hasNext()) {
            rowQ = rowQ1;
            rowQ1 = iter.next();
            c++;
            exec.setProgress(c / (double) n);
            exec.checkCanceled();
        }
        rowQ = 1.0 != p ? rowQ : rowQ1;
        final DataCell xqCell = rowQ.getCell(0);
        final DataCell xq1Cell = rowQ1.getCell(0);
        // data first?)
        if (xqCell.isMissing() || xq1Cell.isMissing()) {
            throw new RuntimeException("Missing values not support for " + "quantile calculation (error in row \"" + rowQ1.getKey() + "\")");
        }
        // for quantile calculation see also
        // http://en.wikipedia.org/wiki/
        // Quantile#Estimating_the_quantiles_of_a_population.
        // this implements R-7
        double xq = ((DoubleValue) xqCell).getDoubleValue();
        double xq1 = ((DoubleValue) xq1Cell).getDoubleValue();
        double quantile = xq + (h - q) * (xq1 - xq);
        edges[cc] = quantile;
        cc++;
    }
    return edges;
}
Also used : DoubleValue(org.knime.core.data.DoubleValue) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow)

Example 65 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class CAIMDiscretizationNodeModel method createAllIntervalBoundaries.

/**
 * Sorts the data table in ascending order on the given column, then all
 * distinct values are determined and finally a new table is created that
 * holds the minimum, the maximum value and the midpoints of all adjacent
 * values. These represent all possible boundaries.
 *
 * @param table the table with the data
 * @param columnIndex the column of interest
 * @param exec the execution context to set the progress
 */
private BoundaryScheme createAllIntervalBoundaries(final BufferedDataTable table, final int columnIndex, final ExecutionContext exec) throws Exception {
    // sort the data according to the column index
    List<String> sortColumn = new ArrayList<String>();
    sortColumn.add(table.getDataTableSpec().getColumnSpec(columnIndex).getName());
    // according to the class column
    if (m_reducedBoundaries) {
        sortColumn.add(m_classColumnName.getStringValue());
    }
    // in ascending order
    // in case the class column is not used as second sort criteria
    // the sort order of field 2 is ignored
    boolean[] sortOrder = new boolean[sortColumn.size()];
    Arrays.fill(sortOrder, true);
    SortedTable sortedTable = new SortedTable(table, sortColumn, sortOrder, m_sortInMemory.getBooleanValue(), exec);
    // the first different value is the minimum value of the sorted list
    RowIterator rowIterator = sortedTable.iterator();
    // get the first valid value (non-missing
    double lastDifferentValue = Double.NaN;
    String firstClassValueOfCurrentValue = null;
    while (rowIterator.hasNext()) {
        DataRow firstRow = rowIterator.next();
        if (!firstRow.getCell(columnIndex).isMissing()) {
            lastDifferentValue = ((DoubleValue) firstRow.getCell(columnIndex)).getDoubleValue();
            // also remember the corresponding class value
            firstClassValueOfCurrentValue = firstRow.getCell(m_classifyColumnIndex).toString();
            break;
        }
    }
    // needed to create a already passed candidate boundary due
    // to a class value change
    double lastChangeValueWithoutNewBoundary = Double.NaN;
    // create the head of the linked double list
    // marked by NaN
    LinkedDouble head = new LinkedDouble(Double.NEGATIVE_INFINITY);
    // set the last added element
    LinkedDouble lastAdded = head;
    // count the number of boundaries
    int numBoundaries = 0;
    // to determine if the class has changed during a single value sequence
    boolean hasClassChanged = false;
    while (rowIterator.hasNext()) {
        DataRow row = rowIterator.next();
        DataCell cell = row.getCell(columnIndex);
        double value = ((DoubleValue) cell).getDoubleValue();
        String classValue = row.getCell(m_classifyColumnIndex).toString();
        if (!hasClassChanged && !firstClassValueOfCurrentValue.equals(classValue)) {
            hasClassChanged = true;
            // i.e. this value is not necessary any more
            if (value != lastDifferentValue) {
                lastChangeValueWithoutNewBoundary = Double.NaN;
            }
        }
        // as long as the values do not change no boundary is added
        if (value != lastDifferentValue) {
            // since the last value change
            if (hasClassChanged) {
                // if the class value has changed since this time
                if (!Double.isNaN(lastChangeValueWithoutNewBoundary)) {
                    // a new boundary is the midpoint
                    double newBoundary = (lastDifferentValue + lastChangeValueWithoutNewBoundary) / 2.0D;
                    // add the new midpoint boundary to the linked list
                    lastAdded.m_next = new LinkedDouble(newBoundary);
                    numBoundaries++;
                    lastAdded.m_next.m_previous = lastAdded;
                    lastAdded = lastAdded.m_next;
                }
                // a new boundary is the midpoint
                double newBoundary = (value + lastDifferentValue) / 2.0D;
                // add the new midpoint boundary to the linked list
                lastAdded.m_next = new LinkedDouble(newBoundary);
                numBoundaries++;
                lastAdded.m_next.m_previous = lastAdded;
                lastAdded = lastAdded.m_next;
                // reset the value
                lastChangeValueWithoutNewBoundary = Double.NaN;
            } else {
                lastChangeValueWithoutNewBoundary = lastDifferentValue;
            }
            // remember the value change
            lastDifferentValue = value;
            // remember the first class value of this first value
            firstClassValueOfCurrentValue = classValue;
            // reset the hasClassChanged value
            hasClassChanged = false;
        }
    }
    return new BoundaryScheme(head, numBoundaries);
}
Also used : ArrayList(java.util.ArrayList) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) DoubleValue(org.knime.core.data.DoubleValue) SortedTable(org.knime.base.data.sort.SortedTable) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell)

Aggregations

RowIterator (org.knime.core.data.RowIterator)77 DataRow (org.knime.core.data.DataRow)62 DataCell (org.knime.core.data.DataCell)28 DataTableSpec (org.knime.core.data.DataTableSpec)20 RowKey (org.knime.core.data.RowKey)16 DoubleValue (org.knime.core.data.DoubleValue)14 BufferedDataTable (org.knime.core.node.BufferedDataTable)13 DataColumnSpec (org.knime.core.data.DataColumnSpec)11 ArrayList (java.util.ArrayList)9 DefaultRow (org.knime.core.data.def.DefaultRow)8 PreparedStatement (java.sql.PreparedStatement)7 DataType (org.knime.core.data.DataType)6 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)6 HashSet (java.util.HashSet)5 Random (java.util.Random)5 TimeZone (java.util.TimeZone)5 DataTable (org.knime.core.data.DataTable)5 DoubleCell (org.knime.core.data.def.DoubleCell)5 StringCell (org.knime.core.data.def.StringCell)5 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)5