use of org.knime.core.data.def.DoubleCell in project knime-core by knime.
the class AddEmptyRowsNodeModel method createNewRowsTable.
private BufferedDataTable createNewRowsTable(final DataTableSpec inSpec, final long rowCount, final ExecutionContext subExec) throws CanceledExecutionException {
DataCell[] cells = new DataCell[inSpec.getNumColumns()];
for (int c = 0; c < cells.length; c++) {
DataType type = inSpec.getColumnSpec(c).getType();
if (type.isASuperTypeOf(DoubleCell.TYPE)) {
if (m_config.isUseMissingDouble()) {
cells[c] = DataType.getMissingCell();
} else {
cells[c] = new DoubleCell(m_config.getFillValueDouble());
}
} else if (type.isASuperTypeOf(IntCell.TYPE)) {
if (m_config.isUseMissingInt()) {
cells[c] = DataType.getMissingCell();
} else {
cells[c] = new IntCell(m_config.getFillValueInt());
}
} else if (type.isASuperTypeOf(StringCell.TYPE)) {
if (m_config.isUseMissingString()) {
cells[c] = DataType.getMissingCell();
} else {
cells[c] = new StringCell(m_config.getFillValueString());
}
} else {
cells[c] = DataType.getMissingCell();
}
}
BufferedDataContainer cont = subExec.createDataContainer(inSpec);
for (long i = 0; i < rowCount; i++) {
RowKey key = new RowKey(m_config.getNewRowKeyPrefix() + i);
subExec.setProgress(i / (double) rowCount, "Creating row \"" + key + "\", " + i + "/" + rowCount);
subExec.checkCanceled();
cont.addRowToTable(new DefaultRow(key, cells));
}
cont.close();
return cont.getTable();
}
use of org.knime.core.data.def.DoubleCell in project knime-core by knime.
the class AggregateOutputNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
// retrieve variables from the stack which the head of this
// loop hopefully put there:
int count;
int maxCount;
try {
count = peekFlowVariableInt("currentIteration");
maxCount = peekFlowVariableInt("maxIterations");
} catch (NoSuchElementException e) {
throw new Exception("No matching Loop Start node!", e);
}
if (count < 0 || count >= maxCount) {
throw new Exception("Conflicting loop variables, count is " + count + " and max count is " + maxCount);
}
final BufferedDataTable in = inData[0];
final DataTableSpec inSpec = in.getDataTableSpec();
if (count == 0) {
m_firstIterationSpec = in.getDataTableSpec();
m_predictionTable = exec.createDataContainer(createPredictionSpec(in.getDataTableSpec()));
} else if (m_predictionTable == null) {
throw new Exception("Loop Head claims this is NOT the first iteration" + " but the tail believes it is?!");
} else {
if (!inSpec.equalStructure(m_firstIterationSpec)) {
StringBuilder error = new StringBuilder("Input table's structure differs from reference " + "(first iteration) table: ");
if (inSpec.getNumColumns() != m_firstIterationSpec.getNumColumns()) {
error.append("different column counts ");
error.append(inSpec.getNumColumns());
error.append(" vs. ").append(m_firstIterationSpec.getNumColumns());
} else {
for (int i = 0; i < inSpec.getNumColumns(); i++) {
DataColumnSpec inCol = inSpec.getColumnSpec(i);
DataColumnSpec predCol = m_firstIterationSpec.getColumnSpec(i);
if (!inCol.equalStructure(predCol)) {
error.append("Column ").append(i).append(" [");
error.append(inCol).append("] vs. [");
error.append(predCol).append("]");
}
}
}
throw new IllegalArgumentException(error.toString());
}
}
final int rowCount = in.getRowCount();
final int targetColIndex = in.getDataTableSpec().findColumnIndex(m_settings.targetColumn());
final int predictColIndex = in.getDataTableSpec().findColumnIndex(m_settings.predictionColumn());
final boolean numericMode = in.getDataTableSpec().getColumnSpec(predictColIndex).getType().isCompatible(DoubleValue.class);
ExecutionMonitor subExec = exec.createSubProgress(count == maxCount - 1 ? 0.9 : 1);
final DataCell foldNumber = new IntCell(m_foldStatistics.size());
if (numericMode) {
double errorSum = 0;
int r = 0;
for (DataRow row : in) {
RowKey key = row.getKey();
DoubleValue target = (DoubleValue) row.getCell(targetColIndex);
DoubleValue predict = (DoubleValue) row.getCell(predictColIndex);
double d = (target.getDoubleValue() - predict.getDoubleValue());
errorSum += d * d;
r++;
if (m_settings.addFoldId()) {
m_predictionTable.addRowToTable(new AppendedColumnRow(row.getKey(), row, foldNumber));
} else {
m_predictionTable.addRowToTable(row);
}
subExec.setProgress(r / (double) rowCount, "Calculating output " + r + "/" + rowCount + " (\"" + key + "\")");
subExec.checkCanceled();
}
DataRow stats = new DefaultRow(new RowKey("fold " + m_foldStatistics.size()), new DoubleCell(errorSum), new DoubleCell(errorSum / rowCount), new IntCell(rowCount));
m_foldStatistics.add(stats);
} else {
int incorrect = 0;
int r = 0;
for (DataRow row : in) {
RowKey key = row.getKey();
DataCell target = row.getCell(targetColIndex);
DataCell predict = row.getCell(predictColIndex);
if (!target.equals(predict)) {
incorrect++;
}
r++;
if (m_settings.addFoldId()) {
m_predictionTable.addRowToTable(new AppendedColumnRow(row.getKey(), row, foldNumber));
} else {
m_predictionTable.addRowToTable(row);
}
subExec.setProgress(r / (double) rowCount, "Calculating output " + r + "/" + rowCount + " (\"" + key + "\")");
subExec.checkCanceled();
}
DataRow stats = new DefaultRow(new RowKey("fold " + m_foldStatistics.size()), new DoubleCell(100.0 * incorrect / rowCount), new IntCell(rowCount), new IntCell(incorrect));
m_foldStatistics.add(stats);
}
if (count < maxCount - 1) {
continueLoop();
return new BufferedDataTable[2];
} else {
BufferedDataContainer cont = exec.createDataContainer(numericMode ? NUMERIC_STATISTICS_SPEC : NOMINAL_STATISTICS_SPEC);
for (DataRow row : m_foldStatistics) {
cont.addRowToTable(row);
}
cont.close();
m_predictionTable.close();
return new BufferedDataTable[] { m_predictionTable.getTable(), cont.getTable() };
}
}
use of org.knime.core.data.def.DoubleCell in project knime-core by knime.
the class NaiveBayesCellFactory method getCells.
/**
* {@inheritDoc}
*/
@Override
public DataCell[] getCells(final DataRow row) {
final DataCell predictedClassCell = m_model.getMostLikelyClassCell(m_attributeNames, row);
if (predictedClassCell == null) {
throw new IllegalStateException("No class found for row with id " + row.getKey());
}
if (!m_inclClassProbVals) {
return new DataCell[] { predictedClassCell };
}
final Collection<DataCell> resultCells = new ArrayList<>(m_sortedClassVals.size() + 1);
final double[] classProbs = m_model.getClassProbabilities(m_attributeNames, row, m_sortedClassVals, true);
// add the probability per class
for (final double classVal : classProbs) {
resultCells.add(new DoubleCell(classVal));
}
// add the class cell last
resultCells.add(predictedClassCell);
return resultCells.toArray(new DataCell[0]);
}
use of org.knime.core.data.def.DoubleCell in project knime-core by knime.
the class LinReg2Predictor method getCells.
/**
* {@inheritDoc}
*/
@Override
public DataCell[] getCells(final DataRow row) {
if (hasMissingValues(row)) {
return createMissingOutput();
}
DataCell[] cells = new DataCell[1];
// column vector
RealMatrix x = MatrixUtils.createRealMatrix(1, m_parameters.size());
for (int i = 0; i < m_parameters.size(); i++) {
String parameter = m_parameters.get(i);
String predictor = null;
String value = null;
boolean rowIsEmpty = true;
for (Iterator<String> iter = m_predictors.iterator(); iter.hasNext(); ) {
predictor = iter.next();
value = m_ppMatrix.getValue(parameter, predictor, null);
if (null != value) {
rowIsEmpty = false;
break;
}
}
if (rowIsEmpty) {
x.setEntry(0, i, 1);
} else {
if (m_factors.contains(predictor)) {
List<DataCell> values = m_values.get(predictor);
DataCell cell = row.getCell(m_parameterI.get(parameter));
int index = values.indexOf(cell);
/* When building a general regression model, for each
categorical fields, there is one category used as the
default baseline and therefore it didn't show in the
ParameterList in PMML. This design for the training is fine,
but in the prediction, when the input of Employment is
the default baseline, the parameters should all be 0.
See the commit message for an example and more details.
*/
if (index > 0) {
x.setEntry(0, i + index - 1, 1);
i += values.size() - 2;
}
} else {
DataCell cell = row.getCell(m_parameterI.get(parameter));
double radix = ((DoubleValue) cell).getDoubleValue();
double exponent = Integer.valueOf(value);
x.setEntry(0, i, Math.pow(radix, exponent));
}
}
}
// column vector
RealMatrix r = x.multiply(m_beta);
double estimate = r.getEntry(0, 0);
if (m_content.getOffsetValue() != null) {
estimate = estimate + m_content.getOffsetValue();
}
cells[0] = new DoubleCell(estimate);
return cells;
}
use of org.knime.core.data.def.DoubleCell in project knime-core by knime.
the class RegressionPredictorCellFactory method createColumnSpec.
/**
* Creates the spec of the output if possible.
*
* @param portSpec the spec of the pmml input port
* @param tableSpec the spec of the data input port
* @param settings settings for the predictor node
* @return The spec of the output or null
* @throws InvalidSettingsException when tableSpec and portSpec do not match
*/
public static DataColumnSpec[] createColumnSpec(final PMMLPortObjectSpec portSpec, final DataTableSpec tableSpec, final RegressionPredictorSettings settings) throws InvalidSettingsException {
// Assertions
if (portSpec.getTargetCols().isEmpty()) {
throw new InvalidSettingsException("The general regression model" + " does not specify a target column.");
}
for (DataColumnSpec learningColSpec : portSpec.getLearningCols()) {
String learningCol = learningColSpec.getName();
if (tableSpec.containsName(learningCol)) {
DataColumnSpec colSpec = tableSpec.getColumnSpec(learningCol);
if (learningColSpec.getType().isCompatible(NominalValue.class)) {
if (!colSpec.getType().isCompatible(BitVectorValue.class) && !colSpec.getType().isCompatible(ByteVectorValue.class) && !colSpec.getType().isCompatible(NominalValue.class)) {
throw new InvalidSettingsException("The column \"" + learningCol + "\" in the table of prediction " + "is expected to be compatible with " + "\"NominalValue\".");
}
} else if (learningColSpec.getType().isCompatible(DoubleValue.class) && !colSpec.getType().isCompatible(DoubleValue.class)) {
throw new InvalidSettingsException("The column \"" + learningCol + "\" in the table of prediction " + "is expected to be numeric.");
}
} else {
throw new InvalidSettingsException("The table for prediction " + "does not contain the column \"" + learningCol + "\".");
}
}
// The list of added columns
List<DataColumnSpec> newColsSpec = new ArrayList<DataColumnSpec>();
String targetCol = portSpec.getTargetFields().get(0);
DataColumnSpec targetColSpec = portSpec.getDataTableSpec().getColumnSpec(targetCol);
if (settings.getIncludeProbabilities() && targetColSpec.getType().isCompatible(NominalValue.class)) {
if (!targetColSpec.getDomain().hasValues()) {
return null;
}
List<DataCell> targetCategories = new ArrayList<DataCell>();
targetCategories.addAll(targetColSpec.getDomain().getValues());
for (DataCell value : targetCategories) {
String name = "P (" + targetCol + "=" + value.toString() + ")" + settings.getPropColumnSuffix();
String newColName = DataTableSpec.getUniqueColumnName(tableSpec, name);
DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(newColName, DoubleCell.TYPE);
DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0));
colSpecCreator.setDomain(domainCreator.createDomain());
newColsSpec.add(colSpecCreator.createSpec());
}
}
String targetColName = settings.getHasCustomPredictionName() ? settings.getCustomPredictionName() : "Prediction (" + targetCol + ")";
String uniqueTargetColName = DataTableSpec.getUniqueColumnName(tableSpec, targetColName);
DataType targetType = targetColSpec.getType().isCompatible(NominalValue.class) ? targetColSpec.getType() : DoubleCell.TYPE;
DataColumnSpecCreator targetColSpecCreator = new DataColumnSpecCreator(uniqueTargetColName, targetType);
if (targetColSpec.getType().isCompatible(NominalValue.class)) {
DataColumnDomainCreator targetDomainCreator = new DataColumnDomainCreator(targetColSpec.getDomain());
targetColSpecCreator.setDomain(targetDomainCreator.createDomain());
}
newColsSpec.add(targetColSpecCreator.createSpec());
return newColsSpec.toArray(new DataColumnSpec[0]);
}
Aggregations