Search in sources :

Example 61 with DoubleCell

use of org.knime.core.data.def.DoubleCell in project knime-core by knime.

the class AddEmptyRowsNodeModel method createNewRowsTable.

private BufferedDataTable createNewRowsTable(final DataTableSpec inSpec, final long rowCount, final ExecutionContext subExec) throws CanceledExecutionException {
    DataCell[] cells = new DataCell[inSpec.getNumColumns()];
    for (int c = 0; c < cells.length; c++) {
        DataType type = inSpec.getColumnSpec(c).getType();
        if (type.isASuperTypeOf(DoubleCell.TYPE)) {
            if (m_config.isUseMissingDouble()) {
                cells[c] = DataType.getMissingCell();
            } else {
                cells[c] = new DoubleCell(m_config.getFillValueDouble());
            }
        } else if (type.isASuperTypeOf(IntCell.TYPE)) {
            if (m_config.isUseMissingInt()) {
                cells[c] = DataType.getMissingCell();
            } else {
                cells[c] = new IntCell(m_config.getFillValueInt());
            }
        } else if (type.isASuperTypeOf(StringCell.TYPE)) {
            if (m_config.isUseMissingString()) {
                cells[c] = DataType.getMissingCell();
            } else {
                cells[c] = new StringCell(m_config.getFillValueString());
            }
        } else {
            cells[c] = DataType.getMissingCell();
        }
    }
    BufferedDataContainer cont = subExec.createDataContainer(inSpec);
    for (long i = 0; i < rowCount; i++) {
        RowKey key = new RowKey(m_config.getNewRowKeyPrefix() + i);
        subExec.setProgress(i / (double) rowCount, "Creating row \"" + key + "\", " + i + "/" + rowCount);
        subExec.checkCanceled();
        cont.addRowToTable(new DefaultRow(key, cells));
    }
    cont.close();
    return cont.getTable();
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) StringCell(org.knime.core.data.def.StringCell) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType) DefaultRow(org.knime.core.data.def.DefaultRow) IntCell(org.knime.core.data.def.IntCell)

Example 62 with DoubleCell

use of org.knime.core.data.def.DoubleCell in project knime-core by knime.

the class AggregateOutputNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    // retrieve variables from the stack which the head of this
    // loop hopefully put there:
    int count;
    int maxCount;
    try {
        count = peekFlowVariableInt("currentIteration");
        maxCount = peekFlowVariableInt("maxIterations");
    } catch (NoSuchElementException e) {
        throw new Exception("No matching Loop Start node!", e);
    }
    if (count < 0 || count >= maxCount) {
        throw new Exception("Conflicting loop variables, count is " + count + " and max count is " + maxCount);
    }
    final BufferedDataTable in = inData[0];
    final DataTableSpec inSpec = in.getDataTableSpec();
    if (count == 0) {
        m_firstIterationSpec = in.getDataTableSpec();
        m_predictionTable = exec.createDataContainer(createPredictionSpec(in.getDataTableSpec()));
    } else if (m_predictionTable == null) {
        throw new Exception("Loop Head claims this is NOT the first iteration" + " but the tail believes it is?!");
    } else {
        if (!inSpec.equalStructure(m_firstIterationSpec)) {
            StringBuilder error = new StringBuilder("Input table's structure differs from reference " + "(first iteration) table: ");
            if (inSpec.getNumColumns() != m_firstIterationSpec.getNumColumns()) {
                error.append("different column counts ");
                error.append(inSpec.getNumColumns());
                error.append(" vs. ").append(m_firstIterationSpec.getNumColumns());
            } else {
                for (int i = 0; i < inSpec.getNumColumns(); i++) {
                    DataColumnSpec inCol = inSpec.getColumnSpec(i);
                    DataColumnSpec predCol = m_firstIterationSpec.getColumnSpec(i);
                    if (!inCol.equalStructure(predCol)) {
                        error.append("Column ").append(i).append(" [");
                        error.append(inCol).append("] vs. [");
                        error.append(predCol).append("]");
                    }
                }
            }
            throw new IllegalArgumentException(error.toString());
        }
    }
    final int rowCount = in.getRowCount();
    final int targetColIndex = in.getDataTableSpec().findColumnIndex(m_settings.targetColumn());
    final int predictColIndex = in.getDataTableSpec().findColumnIndex(m_settings.predictionColumn());
    final boolean numericMode = in.getDataTableSpec().getColumnSpec(predictColIndex).getType().isCompatible(DoubleValue.class);
    ExecutionMonitor subExec = exec.createSubProgress(count == maxCount - 1 ? 0.9 : 1);
    final DataCell foldNumber = new IntCell(m_foldStatistics.size());
    if (numericMode) {
        double errorSum = 0;
        int r = 0;
        for (DataRow row : in) {
            RowKey key = row.getKey();
            DoubleValue target = (DoubleValue) row.getCell(targetColIndex);
            DoubleValue predict = (DoubleValue) row.getCell(predictColIndex);
            double d = (target.getDoubleValue() - predict.getDoubleValue());
            errorSum += d * d;
            r++;
            if (m_settings.addFoldId()) {
                m_predictionTable.addRowToTable(new AppendedColumnRow(row.getKey(), row, foldNumber));
            } else {
                m_predictionTable.addRowToTable(row);
            }
            subExec.setProgress(r / (double) rowCount, "Calculating output " + r + "/" + rowCount + " (\"" + key + "\")");
            subExec.checkCanceled();
        }
        DataRow stats = new DefaultRow(new RowKey("fold " + m_foldStatistics.size()), new DoubleCell(errorSum), new DoubleCell(errorSum / rowCount), new IntCell(rowCount));
        m_foldStatistics.add(stats);
    } else {
        int incorrect = 0;
        int r = 0;
        for (DataRow row : in) {
            RowKey key = row.getKey();
            DataCell target = row.getCell(targetColIndex);
            DataCell predict = row.getCell(predictColIndex);
            if (!target.equals(predict)) {
                incorrect++;
            }
            r++;
            if (m_settings.addFoldId()) {
                m_predictionTable.addRowToTable(new AppendedColumnRow(row.getKey(), row, foldNumber));
            } else {
                m_predictionTable.addRowToTable(row);
            }
            subExec.setProgress(r / (double) rowCount, "Calculating output " + r + "/" + rowCount + " (\"" + key + "\")");
            subExec.checkCanceled();
        }
        DataRow stats = new DefaultRow(new RowKey("fold " + m_foldStatistics.size()), new DoubleCell(100.0 * incorrect / rowCount), new IntCell(rowCount), new IntCell(incorrect));
        m_foldStatistics.add(stats);
    }
    if (count < maxCount - 1) {
        continueLoop();
        return new BufferedDataTable[2];
    } else {
        BufferedDataContainer cont = exec.createDataContainer(numericMode ? NUMERIC_STATISTICS_SPEC : NOMINAL_STATISTICS_SPEC);
        for (DataRow row : m_foldStatistics) {
            cont.addRowToTable(row);
        }
        cont.close();
        m_predictionTable.close();
        return new BufferedDataTable[] { m_predictionTable.getTable(), cont.getTable() };
    }
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) NoSuchElementException(java.util.NoSuchElementException) IOException(java.io.IOException) IntCell(org.knime.core.data.def.IntCell) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) DefaultRow(org.knime.core.data.def.DefaultRow) NoSuchElementException(java.util.NoSuchElementException) AppendedColumnRow(org.knime.base.data.append.column.AppendedColumnRow)

Example 63 with DoubleCell

use of org.knime.core.data.def.DoubleCell in project knime-core by knime.

the class NaiveBayesCellFactory method getCells.

/**
 * {@inheritDoc}
 */
@Override
public DataCell[] getCells(final DataRow row) {
    final DataCell predictedClassCell = m_model.getMostLikelyClassCell(m_attributeNames, row);
    if (predictedClassCell == null) {
        throw new IllegalStateException("No class found for row with id " + row.getKey());
    }
    if (!m_inclClassProbVals) {
        return new DataCell[] { predictedClassCell };
    }
    final Collection<DataCell> resultCells = new ArrayList<>(m_sortedClassVals.size() + 1);
    final double[] classProbs = m_model.getClassProbabilities(m_attributeNames, row, m_sortedClassVals, true);
    // add the probability per class
    for (final double classVal : classProbs) {
        resultCells.add(new DoubleCell(classVal));
    }
    // add the class cell last
    resultCells.add(predictedClassCell);
    return resultCells.toArray(new DataCell[0]);
}
Also used : DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataCell(org.knime.core.data.DataCell)

Example 64 with DoubleCell

use of org.knime.core.data.def.DoubleCell in project knime-core by knime.

the class LinReg2Predictor method getCells.

/**
 * {@inheritDoc}
 */
@Override
public DataCell[] getCells(final DataRow row) {
    if (hasMissingValues(row)) {
        return createMissingOutput();
    }
    DataCell[] cells = new DataCell[1];
    // column vector
    RealMatrix x = MatrixUtils.createRealMatrix(1, m_parameters.size());
    for (int i = 0; i < m_parameters.size(); i++) {
        String parameter = m_parameters.get(i);
        String predictor = null;
        String value = null;
        boolean rowIsEmpty = true;
        for (Iterator<String> iter = m_predictors.iterator(); iter.hasNext(); ) {
            predictor = iter.next();
            value = m_ppMatrix.getValue(parameter, predictor, null);
            if (null != value) {
                rowIsEmpty = false;
                break;
            }
        }
        if (rowIsEmpty) {
            x.setEntry(0, i, 1);
        } else {
            if (m_factors.contains(predictor)) {
                List<DataCell> values = m_values.get(predictor);
                DataCell cell = row.getCell(m_parameterI.get(parameter));
                int index = values.indexOf(cell);
                /* When building a general regression model, for each
                    categorical fields, there is one category used as the
                    default baseline and therefore it didn't show in the
                    ParameterList in PMML. This design for the training is fine,
                    but in the prediction, when the input of Employment is
                    the default baseline, the parameters should all be 0.
                    See the commit message for an example and more details.
                    */
                if (index > 0) {
                    x.setEntry(0, i + index - 1, 1);
                    i += values.size() - 2;
                }
            } else {
                DataCell cell = row.getCell(m_parameterI.get(parameter));
                double radix = ((DoubleValue) cell).getDoubleValue();
                double exponent = Integer.valueOf(value);
                x.setEntry(0, i, Math.pow(radix, exponent));
            }
        }
    }
    // column vector
    RealMatrix r = x.multiply(m_beta);
    double estimate = r.getEntry(0, 0);
    if (m_content.getOffsetValue() != null) {
        estimate = estimate + m_content.getOffsetValue();
    }
    cells[0] = new DoubleCell(estimate);
    return cells;
}
Also used : RealMatrix(org.apache.commons.math3.linear.RealMatrix) DoubleValue(org.knime.core.data.DoubleValue) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell)

Example 65 with DoubleCell

use of org.knime.core.data.def.DoubleCell in project knime-core by knime.

the class RegressionPredictorCellFactory method createColumnSpec.

/**
 * Creates the spec of the output if possible.
 *
 * @param portSpec the spec of the pmml input port
 * @param tableSpec the spec of the data input port
 * @param settings settings for the predictor node
 * @return The spec of the output or null
 * @throws InvalidSettingsException when tableSpec and portSpec do not match
 */
public static DataColumnSpec[] createColumnSpec(final PMMLPortObjectSpec portSpec, final DataTableSpec tableSpec, final RegressionPredictorSettings settings) throws InvalidSettingsException {
    // Assertions
    if (portSpec.getTargetCols().isEmpty()) {
        throw new InvalidSettingsException("The general regression model" + " does not specify a target column.");
    }
    for (DataColumnSpec learningColSpec : portSpec.getLearningCols()) {
        String learningCol = learningColSpec.getName();
        if (tableSpec.containsName(learningCol)) {
            DataColumnSpec colSpec = tableSpec.getColumnSpec(learningCol);
            if (learningColSpec.getType().isCompatible(NominalValue.class)) {
                if (!colSpec.getType().isCompatible(BitVectorValue.class) && !colSpec.getType().isCompatible(ByteVectorValue.class) && !colSpec.getType().isCompatible(NominalValue.class)) {
                    throw new InvalidSettingsException("The column \"" + learningCol + "\" in the table of prediction " + "is expected to be  compatible with " + "\"NominalValue\".");
                }
            } else if (learningColSpec.getType().isCompatible(DoubleValue.class) && !colSpec.getType().isCompatible(DoubleValue.class)) {
                throw new InvalidSettingsException("The column \"" + learningCol + "\" in the table of prediction " + "is expected to be numeric.");
            }
        } else {
            throw new InvalidSettingsException("The table for prediction " + "does not contain the column \"" + learningCol + "\".");
        }
    }
    // The list of added columns
    List<DataColumnSpec> newColsSpec = new ArrayList<DataColumnSpec>();
    String targetCol = portSpec.getTargetFields().get(0);
    DataColumnSpec targetColSpec = portSpec.getDataTableSpec().getColumnSpec(targetCol);
    if (settings.getIncludeProbabilities() && targetColSpec.getType().isCompatible(NominalValue.class)) {
        if (!targetColSpec.getDomain().hasValues()) {
            return null;
        }
        List<DataCell> targetCategories = new ArrayList<DataCell>();
        targetCategories.addAll(targetColSpec.getDomain().getValues());
        for (DataCell value : targetCategories) {
            String name = "P (" + targetCol + "=" + value.toString() + ")" + settings.getPropColumnSuffix();
            String newColName = DataTableSpec.getUniqueColumnName(tableSpec, name);
            DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(newColName, DoubleCell.TYPE);
            DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0));
            colSpecCreator.setDomain(domainCreator.createDomain());
            newColsSpec.add(colSpecCreator.createSpec());
        }
    }
    String targetColName = settings.getHasCustomPredictionName() ? settings.getCustomPredictionName() : "Prediction (" + targetCol + ")";
    String uniqueTargetColName = DataTableSpec.getUniqueColumnName(tableSpec, targetColName);
    DataType targetType = targetColSpec.getType().isCompatible(NominalValue.class) ? targetColSpec.getType() : DoubleCell.TYPE;
    DataColumnSpecCreator targetColSpecCreator = new DataColumnSpecCreator(uniqueTargetColName, targetType);
    if (targetColSpec.getType().isCompatible(NominalValue.class)) {
        DataColumnDomainCreator targetDomainCreator = new DataColumnDomainCreator(targetColSpec.getDomain());
        targetColSpecCreator.setDomain(targetDomainCreator.createDomain());
    }
    newColsSpec.add(targetColSpecCreator.createSpec());
    return newColsSpec.toArray(new DataColumnSpec[0]);
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) NominalValue(org.knime.core.data.NominalValue) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue)

Aggregations

DoubleCell (org.knime.core.data.def.DoubleCell)189 DataCell (org.knime.core.data.DataCell)129 IntCell (org.knime.core.data.def.IntCell)67 DefaultRow (org.knime.core.data.def.DefaultRow)66 StringCell (org.knime.core.data.def.StringCell)65 DataRow (org.knime.core.data.DataRow)57 DataTableSpec (org.knime.core.data.DataTableSpec)55 ArrayList (java.util.ArrayList)42 DataColumnSpec (org.knime.core.data.DataColumnSpec)42 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)41 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)39 RowKey (org.knime.core.data.RowKey)37 DoubleValue (org.knime.core.data.DoubleValue)35 BufferedDataTable (org.knime.core.node.BufferedDataTable)28 DataColumnDomainCreator (org.knime.core.data.DataColumnDomainCreator)26 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)22 DataType (org.knime.core.data.DataType)20 LinkedHashMap (java.util.LinkedHashMap)17 HashMap (java.util.HashMap)13 Point (java.awt.Point)12