use of org.knime.core.data.DataRow in project knime-core by knime.
the class PolyRegLearnerNodeModel method getCellFactory.
private CellFactory getCellFactory(final int dependentIndex) {
final int degree = m_settings.getDegree();
return new CellFactory() {
@Override
public DataCell[] getCells(final DataRow row) {
double sum = m_betas[0];
int betaCount = 1;
double y = 0;
for (int col = 0; col < row.getNumCells(); col++) {
if ((col != dependentIndex) && m_colSelected[col]) {
final double value = ((DoubleValue) row.getCell(col)).getDoubleValue();
double poly = 1;
for (int d = 1; d <= degree; d++) {
poly *= value;
sum += m_betas[betaCount++] * poly;
}
} else if (col == dependentIndex) {
y = ((DoubleValue) row.getCell(col)).getDoubleValue();
}
}
double err = Math.abs(sum - y);
m_squaredError += err * err;
return new DataCell[] { new DoubleCell(sum), new DoubleCell(err) };
}
@Override
public DataColumnSpec[] getColumnSpecs() {
DataColumnSpecCreator crea = new DataColumnSpecCreator("PolyReg prediction", DoubleCell.TYPE);
DataColumnSpec col1 = crea.createSpec();
crea = new DataColumnSpecCreator("Prediction Error", DoubleCell.TYPE);
DataColumnSpec col2 = crea.createSpec();
return new DataColumnSpec[] { col1, col2 };
}
@Override
public void setProgress(final int curRowNr, final int rowCount, final RowKey lastKey, final ExecutionMonitor execMon) {
// do nothing
}
};
}
use of org.knime.core.data.DataRow in project knime-core by knime.
the class RegressionPredictorNodeModel method createRearranger.
private ColumnRearranger createRearranger(final DataTableSpec inSpec, final PMMLPortObjectSpec regModelSpec, final PMMLRegressionTranslator regModel) throws InvalidSettingsException {
if (regModelSpec == null) {
throw new InvalidSettingsException("No input");
}
// exclude last (response column)
String targetCol = "Response";
for (String s : regModelSpec.getTargetFields()) {
targetCol = s;
break;
}
final List<String> learnFields;
if (regModel != null) {
RegressionTable regTable = regModel.getRegressionTable();
learnFields = new ArrayList<String>();
for (NumericPredictor p : regTable.getVariables()) {
learnFields.add(p.getName());
}
} else {
learnFields = new ArrayList<String>(regModelSpec.getLearningFields());
}
final int[] colIndices = new int[learnFields.size()];
int k = 0;
for (String learnCol : learnFields) {
int index = inSpec.findColumnIndex(learnCol);
if (index < 0) {
throw new InvalidSettingsException("Missing column for " + "regressor variable : \"" + learnCol + "\"");
}
DataColumnSpec regressor = inSpec.getColumnSpec(index);
String name = regressor.getName();
DataColumnSpec col = inSpec.getColumnSpec(index);
if (!col.getType().isCompatible(DoubleValue.class)) {
throw new InvalidSettingsException("Incompatible type of " + "column \"" + name + "\": " + col.getType());
}
colIndices[k++] = index;
}
// try to use some smart naming scheme for the append column
String oldName = targetCol;
if (inSpec.containsName(oldName) && !oldName.toLowerCase().endsWith("(prediction)")) {
oldName = oldName + " (prediction)";
}
String newColName = DataTableSpec.getUniqueColumnName(inSpec, oldName);
DataColumnSpec newCol = new DataColumnSpecCreator(newColName, DoubleCell.TYPE).createSpec();
SingleCellFactory fac = new SingleCellFactory(newCol) {
@Override
public DataCell getCell(final DataRow row) {
RegressionTable t = regModel.getRegressionTable();
int j = 0;
double result = t.getIntercept();
for (NumericPredictor p : t.getVariables()) {
DataCell c = row.getCell(colIndices[j++]);
if (c.isMissing()) {
return DataType.getMissingCell();
}
double v = ((DoubleValue) c).getDoubleValue();
if (p.getExponent() != 1) {
v = Math.pow(v, p.getExponent());
}
result += p.getCoefficient() * v;
}
return new DoubleCell(result);
}
};
ColumnRearranger c = new ColumnRearranger(inSpec);
c.append(fac);
return c;
}
use of org.knime.core.data.DataRow in project knime-core by knime.
the class RegressionTreeModel method createDecisionTree.
public DecisionTree createDecisionTree(final DataTable sampleForHiliting) {
final DecisionTree result;
TreeModelRegression treeModel = getTreeModelRegression();
result = treeModel.createDecisionTree(getMetaData());
if (sampleForHiliting != null) {
final DataTableSpec dataSpec = sampleForHiliting.getDataTableSpec();
final DataTableSpec spec = getLearnAttributeSpec(dataSpec);
for (DataRow r : sampleForHiliting) {
try {
DataRow fullAttributeRow = createLearnAttributeRow(r, spec);
result.addCoveredPattern(fullAttributeRow, spec);
} catch (Exception e) {
// dunno what to do with that
NodeLogger.getLogger(getClass()).error("Error updating hilite info in tree view", e);
break;
}
}
}
return result;
}
use of org.knime.core.data.DataRow in project knime-core by knime.
the class MissingValueHandling3Table method createMissingValueHandlingTable.
/**
* Does missing value handling to the argument table given the col settings in an array and also reports progress.
*
* @param table the table to do missing value handling on
* @param colSettings the settings
* @param exec for progress/cancel and to create the buffered data table
* @param warningBuffer To which potential warning messages are added.
* @return a cache table, cleaned up
* @throws CanceledExecutionException if canceled
* @since 2.10
*/
public static BufferedDataTable createMissingValueHandlingTable(final BufferedDataTable table, final MissingValueHandling2ColSetting[] colSettings, final ExecutionContext exec, final StringBuilder warningBuffer) throws CanceledExecutionException {
MissingValueHandling2ColSetting[] colSetting;
try {
colSetting = getColSetting(table.getDataTableSpec(), colSettings, false, warningBuffer);
} catch (InvalidSettingsException ise) {
LOGGER.coding("getColSetting method is not supposed to throw an exception, ignoring settings", ise);
DataTableSpec s = table.getDataTableSpec();
colSetting = new MissingValueHandling2ColSetting[s.getNumColumns()];
for (int i = 0; i < s.getNumColumns(); i++) {
colSetting[i] = new MissingValueHandling2ColSetting(s.getColumnSpec(i));
colSetting[i].setMethod(MissingValueHandling2ColSetting.METHOD_NO_HANDLING);
}
}
boolean needStatistics = false;
final Set<Integer> mostFrequentColumns = new HashSet<Integer>();
for (int i = 0; i < colSetting.length; i++) {
MissingValueHandling2ColSetting c = colSetting[i];
switch(c.getMethod()) {
case MissingValueHandling2ColSetting.METHOD_MOST_FREQUENT:
mostFrequentColumns.add(i);
case MissingValueHandling2ColSetting.METHOD_MAX:
case MissingValueHandling2ColSetting.METHOD_MIN:
case MissingValueHandling2ColSetting.METHOD_MEAN:
needStatistics = true;
break;
default:
}
}
MyStatisticsTable myT;
ExecutionMonitor e;
if (needStatistics) {
// for creating statistics table
ExecutionContext subExec = exec.createSubExecutionContext(0.5);
myT = new MyStatisticsTable(table, subExec) {
// do not try to get this Iterable in the constructor, it will not work, as long as
// Statistics3Table does the statistical computation in the constructor.
@Override
protected Iterable<Integer> getMostFrequentColumns() {
return mostFrequentColumns;
}
};
if (myT.m_warningMessage != null) {
if (warningBuffer.length() > 0) {
warningBuffer.append('\n');
}
warningBuffer.append(myT.m_warningMessage);
}
// for the iterator
e = exec.createSubProgress(0.5);
} else {
myT = null;
e = exec;
}
MissingValueHandling3Table mvht = new MissingValueHandling3Table(table, myT, colSetting);
BufferedDataContainer container = exec.createDataContainer(mvht.getDataTableSpec());
e.setMessage("Adding rows...");
int count = 0;
try {
MissingValueHandling3TableIterator it = new MissingValueHandling3TableIterator(mvht, e);
while (it.hasNext()) {
DataRow next;
next = it.next();
e.setMessage("Adding row " + (count + 1) + " (\"" + next.getKey() + "\")");
container.addRowToTable(next);
count++;
}
} catch (MissingValueHandling3TableIterator.RuntimeCanceledExecutionException rcee) {
throw rcee.getCause();
} finally {
container.close();
}
return container.getTable();
}
use of org.knime.core.data.DataRow in project knime-core by knime.
the class DBWriterImpl method writeData.
/**
* {@inheritDoc}
* @deprecated
*/
@Deprecated
@Override
public String writeData(final String table, final RowInput input, final long rowCount, final boolean appendData, final ExecutionMonitor exec, final Map<String, String> sqlTypes, final CredentialsProvider cp, final int batchSize, final boolean insertNullForMissingCols, final boolean failOnError) throws Exception {
final DatabaseConnectionSettings conSettings = getDatabaseConnectionSettings();
// final Connection conn = conSettings.createConnection(cp);
return conSettings.execute(cp, conn -> {
exec.setMessage("Waiting for free database connection...");
final StringBuilder columnNamesForInsertStatement = new StringBuilder("(");
// synchronized (conSettings.syncConnection(conn)) {
exec.setMessage("Start writing rows in database...");
DataTableSpec spec = input.getDataTableSpec();
// mapping from spec columns to database columns
final int[] mapping;
// append data to existing table
if (appendData) {
if (conSettings.getUtility().tableExists(conn, table)) {
String query = conSettings.getUtility().getStatementManipulator().forMetadataOnly("SELECT * FROM " + table);
try (ResultSet rs = conn.createStatement().executeQuery(query)) {
ResultSetMetaData rsmd = rs.getMetaData();
final Map<String, Integer> columnNames = new LinkedHashMap<String, Integer>();
for (int i = 0; i < spec.getNumColumns(); i++) {
String colName = replaceColumnName(spec.getColumnSpec(i).getName());
columnNames.put(colName.toLowerCase(), i);
}
// sanity check to lock if all input columns are in db
ArrayList<String> columnNotInSpec = new ArrayList<String>(columnNames.keySet());
for (int i = 0; i < rsmd.getColumnCount(); i++) {
String dbColName = replaceColumnName(rsmd.getColumnName(i + 1));
if (columnNames.containsKey(dbColName.toLowerCase())) {
columnNotInSpec.remove(dbColName.toLowerCase());
columnNamesForInsertStatement.append(dbColName).append(',');
} else if (insertNullForMissingCols) {
// append the column name of a missing column only if the insert null for missing
// column option is enabled
columnNamesForInsertStatement.append(dbColName).append(',');
}
}
if (rsmd.getColumnCount() > 0) {
columnNamesForInsertStatement.deleteCharAt(columnNamesForInsertStatement.length() - 1);
}
columnNamesForInsertStatement.append(')');
if (columnNotInSpec.size() > 0) {
throw new RuntimeException("No. of columns in input" + " table > in database; not existing columns: " + columnNotInSpec.toString());
}
mapping = new int[rsmd.getColumnCount()];
for (int i = 0; i < mapping.length; i++) {
String name = replaceColumnName(rsmd.getColumnName(i + 1)).toLowerCase();
if (!columnNames.containsKey(name)) {
mapping[i] = -1;
continue;
}
mapping[i] = columnNames.get(name);
DataColumnSpec cspec = spec.getColumnSpec(mapping[i]);
int type = rsmd.getColumnType(i + 1);
switch(type) {
// check all boolean compatible types
case Types.BIT:
case Types.BOOLEAN:
// types must be compatible to BooleanValue
if (!cspec.getType().isCompatible(BooleanValue.class)) {
throw new RuntimeException("Column \"" + name + "\" of type \"" + cspec.getType() + "\" from input does not match type " + "\"" + rsmd.getColumnTypeName(i + 1) + "\" in database at position " + i);
}
break;
// check all int compatible types
case Types.TINYINT:
case Types.SMALLINT:
case Types.INTEGER:
// types must be compatible to IntValue
if (!cspec.getType().isCompatible(IntValue.class)) {
throw new RuntimeException("Column \"" + name + "\" of type \"" + cspec.getType() + "\" from input does not match type " + "\"" + rsmd.getColumnTypeName(i + 1) + "\" in database at position " + i);
}
break;
case Types.BIGINT:
// types must also be compatible to LongValue
if (!cspec.getType().isCompatible(LongValue.class)) {
throw new RuntimeException("Column \"" + name + "\" of type \"" + cspec.getType() + "\" from input does not match type " + "\"" + rsmd.getColumnTypeName(i + 1) + "\" in database at position " + i);
}
break;
// check all double compatible types
case Types.FLOAT:
case Types.DOUBLE:
case Types.NUMERIC:
case Types.DECIMAL:
case Types.REAL:
// types must also be compatible to DoubleValue
if (!cspec.getType().isCompatible(DoubleValue.class)) {
throw new RuntimeException("Column \"" + name + "\" of type \"" + cspec.getType() + "\" from input does not match type " + "\"" + rsmd.getColumnTypeName(i + 1) + "\" in database at position " + i);
}
break;
// check for date-and-time compatible types
case Types.DATE:
case Types.TIME:
case Types.TIMESTAMP:
// types must also be compatible to DataValue
if (!cspec.getType().isCompatible(DateAndTimeValue.class)) {
throw new RuntimeException("Column \"" + name + "\" of type \"" + cspec.getType() + "\" from input does not match type " + "\"" + rsmd.getColumnTypeName(i + 1) + "\" in database at position " + i);
}
break;
// check for blob compatible types
case Types.BLOB:
case Types.BINARY:
case Types.LONGVARBINARY:
// types must also be compatible to DataValue
if (!cspec.getType().isCompatible(BinaryObjectDataValue.class)) {
throw new RuntimeException("Column \"" + name + "\" of type \"" + cspec.getType() + "\" from input does not match type " + "\"" + rsmd.getColumnTypeName(i + 1) + "\" in database at position " + i);
}
break;
}
}
}
} else {
LOGGER.info("Table \"" + table + "\" does not exist in database, " + "will create new table.");
// and create new table
final String query = "CREATE TABLE " + table + " " + createTableStmt(spec, sqlTypes, columnNamesForInsertStatement);
LOGGER.debug("Executing SQL statement as execute: " + query);
try (Statement statement = conn.createStatement()) {
statement.execute(query);
}
if (!conn.getAutoCommit()) {
conn.commit();
}
mapping = new int[spec.getNumColumns()];
for (int k = 0; k < mapping.length; k++) {
mapping[k] = k;
}
}
} else {
LOGGER.debug("Append not enabled. Table " + table + " will be dropped if exists.");
mapping = new int[spec.getNumColumns()];
for (int k = 0; k < mapping.length; k++) {
mapping[k] = k;
}
Statement statement = null;
try {
statement = conn.createStatement();
// remove existing table (if any)
final String query = "DROP TABLE " + table;
LOGGER.debug("Executing SQL statement as execute: " + query);
statement.execute(query);
} catch (Throwable t) {
if (statement == null) {
throw new SQLException("Could not create SQL statement," + " reason: " + t.getMessage(), t);
}
LOGGER.info("Exception droping table \"" + table + "\": " + t.getMessage() + ". Will create new table.");
} finally {
if (!conn.getAutoCommit()) {
conn.commit();
}
}
// and create new table
final String query = "CREATE TABLE " + table + " " + createTableStmt(spec, sqlTypes, columnNamesForInsertStatement);
LOGGER.debug("Executing SQL statement as execute: " + query);
statement.execute(query);
statement.close();
if (!conn.getAutoCommit()) {
conn.commit();
}
}
// this is a (temporary) workaround for bug #5802: if there is a DataValue column in the input table
// we need to use the SQL type for creating the insert statements.
Map<Integer, Integer> columnTypes = null;
for (DataColumnSpec cs : spec) {
if (cs.getType().getPreferredValueClass() == DataValue.class) {
columnTypes = getColumnTypes(conn, table);
break;
}
}
final String insertStamtement = createInsertStatment(table, columnNamesForInsertStatement.toString(), mapping, insertNullForMissingCols);
// problems writing more than 13 columns. the prepare statement
// ensures that we can set the columns directly row-by-row, the
// database will handle the commit
long cnt = 1;
long errorCnt = 0;
long allErrors = 0;
// count number of rows added to current batch
int curBatchSize = 0;
LOGGER.debug("Executing SQL statement as prepareStatement: " + insertStamtement);
final PreparedStatement stmt = conn.prepareStatement(insertStamtement);
// remember auto-commit flag
final boolean autoCommit = conn.getAutoCommit();
DatabaseConnectionSettings.setAutoCommit(conn, false);
try {
final TimeZone timezone = conSettings.getTimeZone();
// get the first row
DataRow row;
DataRow nextRow = input.poll();
// iterate over all incoming data rows
while (nextRow != null) {
row = nextRow;
cnt++;
exec.checkCanceled();
if (rowCount > 0) {
exec.setProgress(1.0 * cnt / rowCount, "Row " + "#" + cnt);
} else {
exec.setProgress("Writing Row#" + cnt);
}
int dbIdx = 1;
for (int i = 0; i < mapping.length; i++) {
if (mapping[i] < 0) {
if (insertNullForMissingCols) {
// insert only null if the insert null for missing col option is enabled
stmt.setNull(dbIdx++, Types.NULL);
}
} else {
final DataColumnSpec cspec = spec.getColumnSpec(mapping[i]);
final DataCell cell = row.getCell(mapping[i]);
fillStatement(stmt, dbIdx++, cspec, cell, timezone, columnTypes);
}
}
// if batch mode
if (batchSize > 1) {
// a new row will be added
stmt.addBatch();
}
// get one more input row to check if 'row' is the last one
nextRow = input.poll();
curBatchSize++;
// if batch size equals number of row in batch or input table at end
if ((curBatchSize == batchSize) || nextRow == null) {
curBatchSize = 0;
try {
// write batch
if (batchSize > 1) {
stmt.executeBatch();
} else {
// or write single row
stmt.execute();
}
} catch (Throwable t) {
final String errorMsg;
if (batchSize > 1) {
errorMsg = "Error while adding rows #" + (cnt - batchSize) + " - #" + cnt + ", reason: " + t.getMessage();
} else {
errorMsg = "Error while adding row #" + cnt + " (" + row.getKey() + "), reason: " + t.getMessage();
}
// introduced in KNIME 3.3.2
if (failOnError) {
try {
// rollback all changes
conn.rollback();
LOGGER.debug("Rollback complete transaction with auto commit=" + autoCommit);
} catch (Throwable ex) {
LOGGER.info("Failed rollback after db exception with auto commit=" + autoCommit + ". Rollback error: " + ex.getMessage(), ex);
}
throw new Exception(errorMsg, t);
}
// anyway.
if (!conn.getAutoCommit()) {
conn.commit();
}
allErrors++;
if (errorCnt > -1) {
exec.setMessage(errorMsg);
if (errorCnt++ < 10) {
LOGGER.warn(errorMsg);
} else {
errorCnt = -1;
LOGGER.warn(errorMsg + " - more errors...", t);
}
}
} finally {
// clear batch if in batch mode
if (batchSize > 1) {
stmt.clearBatch();
}
}
}
}
if (!conn.getAutoCommit()) {
conn.commit();
}
if (allErrors == 0) {
return null;
} else {
return "Errors \"" + allErrors + "\" writing " + (cnt - 1) + " rows.";
}
} finally {
DatabaseConnectionSettings.setAutoCommit(conn, autoCommit);
stmt.close();
}
});
}
Aggregations