Search in sources :

Example 11 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class Statistics2Table method createNominalValueTable.

/**
 * Create nominal value table containing all possible values together with
 * their occurrences.
 * @param nominal value output table
 * @return data table with nominal values for each column
 */
public DataTable createNominalValueTable(final List<String> nominal) {
    DataTableSpec outSpec = createOutSpecNominal(m_spec, nominal);
    Iterator[] it = new Iterator[outSpec.getNumColumns() / 2];
    int idx = 0;
    for (int i = 0; i < m_nominalValues.length; i++) {
        if (m_nominalValues[i] != null) {
            it[idx++] = m_nominalValues[i].entrySet().iterator();
        }
    }
    DataContainer cont = new DataContainer(outSpec);
    int rowIndex = 0;
    do {
        boolean addEnd = true;
        DataCell[] cells = new DataCell[2 * it.length];
        for (int i = 0; i < it.length; i++) {
            if (it[i] != null && it[i].hasNext()) {
                Map.Entry<DataCell, Integer> e = (Map.Entry<DataCell, Integer>) it[i].next();
                cells[2 * i] = e.getKey();
                cells[2 * i + 1] = new IntCell(e.getValue());
                addEnd = false;
            } else {
                cells[2 * i] = DataType.getMissingCell();
                cells[2 * i + 1] = DataType.getMissingCell();
            }
        }
        if (addEnd) {
            break;
        }
        cont.addRowToTable(new DefaultRow(RowKey.createRowKey(rowIndex++), cells));
    } while (true);
    cont.close();
    return cont.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) IntCell(org.knime.core.data.def.IntCell) MutableInteger(org.knime.core.util.MutableInteger) DataContainer(org.knime.core.data.container.DataContainer) Entry(java.util.Map.Entry) Iterator(java.util.Iterator) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 12 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class AppendVariableToTableNodeModel method createColumnRearranger.

private ColumnRearranger createColumnRearranger(final DataTableSpec spec) throws InvalidSettingsException {
    ColumnRearranger arranger = new ColumnRearranger(spec);
    Set<String> nameHash = new HashSet<String>();
    for (DataColumnSpec c : spec) {
        nameHash.add(c.getName());
    }
    List<Pair<String, FlowVariable.Type>> vars;
    if (m_settings.getIncludeAll()) {
        vars = getAllVariables();
    } else {
        vars = m_settings.getVariablesOfInterest();
    }
    if (vars.isEmpty()) {
        throw new InvalidSettingsException("No variables selected");
    }
    DataColumnSpec[] specs = new DataColumnSpec[vars.size()];
    final DataCell[] values = new DataCell[vars.size()];
    for (int i = 0; i < vars.size(); i++) {
        Pair<String, FlowVariable.Type> c = vars.get(i);
        String name = c.getFirst();
        DataType type;
        switch(c.getSecond()) {
            case DOUBLE:
                type = DoubleCell.TYPE;
                try {
                    double dValue = peekFlowVariableDouble(name);
                    values[i] = new DoubleCell(dValue);
                } catch (NoSuchElementException e) {
                    throw new InvalidSettingsException("No such flow variable (of type double): " + name);
                }
                break;
            case INTEGER:
                type = IntCell.TYPE;
                try {
                    int iValue = peekFlowVariableInt(name);
                    values[i] = new IntCell(iValue);
                } catch (NoSuchElementException e) {
                    throw new InvalidSettingsException("No such flow variable (of type int): " + name);
                }
                break;
            case STRING:
                type = StringCell.TYPE;
                try {
                    String sValue = peekFlowVariableString(name);
                    sValue = sValue == null ? "" : sValue;
                    values[i] = new StringCell(sValue);
                } catch (NoSuchElementException e) {
                    throw new InvalidSettingsException("No such flow variable (of type String): " + name);
                }
                break;
            default:
                throw new InvalidSettingsException("Unsupported variable type: " + c.getSecond());
        }
        if (nameHash.contains(name) && !name.toLowerCase().endsWith("(variable)")) {
            name = name.concat(" (variable)");
        }
        String newName = name;
        int uniquifier = 1;
        while (!nameHash.add(newName)) {
            newName = name + " (#" + (uniquifier++) + ")";
        }
        specs[i] = new DataColumnSpecCreator(newName, type).createSpec();
    }
    arranger.append(new AbstractCellFactory(specs) {

        /**
         * {@inheritDoc}
         */
        @Override
        public DataCell[] getCells(final DataRow row) {
            return values;
        }
    });
    return arranger;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataType(org.knime.core.data.DataType) HashSet(java.util.HashSet) Pair(org.knime.core.util.Pair) AbstractCellFactory(org.knime.core.data.container.AbstractCellFactory) PortType(org.knime.core.node.port.PortType) DataType(org.knime.core.data.DataType) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) NoSuchElementException(java.util.NoSuchElementException) FlowVariable(org.knime.core.node.workflow.FlowVariable)

Example 13 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class LogRegPredictor method getCells.

/**
 * {@inheritDoc}
 */
@Override
public DataCell[] getCells(final DataRow row) {
    if (hasMissingValues(row)) {
        return createMissingOutput();
    }
    final MissingHandling missingHandling = new MissingHandling(true);
    DataCell[] cells = m_includeProbs ? new DataCell[1 + m_targetDomainValuesCount] : new DataCell[1];
    Arrays.fill(cells, new IntCell(0));
    // column vector
    final RealMatrix x = MatrixUtils.createRealMatrix(1, m_parameters.size());
    for (int i = 0; i < m_parameters.size(); i++) {
        String parameter = m_parameters.get(i);
        String predictor = null;
        String value = null;
        boolean rowIsEmpty = true;
        for (final Iterator<String> iter = m_predictors.iterator(); iter.hasNext(); ) {
            predictor = iter.next();
            value = m_ppMatrix.getValue(parameter, predictor, null);
            if (null != value) {
                rowIsEmpty = false;
                break;
            }
        }
        if (rowIsEmpty) {
            x.setEntry(0, i, 1);
        } else {
            if (m_factors.contains(predictor)) {
                List<DataCell> values = m_values.get(predictor);
                DataCell cell = row.getCell(m_parameterI.get(parameter));
                int index = values.indexOf(cell);
                /* When building a general regression model, for each
                    categorical fields, there is one category used as the
                    default baseline and therefore it didn't show in the
                    ParameterList in PMML. This design for the training is fine,
                    but in the prediction, when the input of Employment is
                    the default baseline, the parameters should all be 0.
                    See the commit message for an example and more details.
                    */
                if (index > 0) {
                    x.setEntry(0, i + index - 1, 1);
                    i += values.size() - 2;
                }
            } else if (m_baseLabelToColName.containsKey(parameter) && m_vectorLengths.containsKey(m_baseLabelToColName.get(parameter))) {
                final DataCell cell = row.getCell(m_parameterI.get(parameter));
                Optional<NameAndIndex> vectorValue = VectorHandling.parse(predictor);
                if (vectorValue.isPresent()) {
                    int j = vectorValue.get().getIndex();
                    value = m_ppMatrix.getValue(parameter, predictor, null);
                    double exponent = Integer.valueOf(value);
                    double radix = RegressionTrainingRow.getValue(cell, j, missingHandling);
                    x.setEntry(0, i, Math.pow(radix, exponent));
                }
            } else {
                DataCell cell = row.getCell(m_parameterI.get(parameter));
                double radix = ((DoubleValue) cell).getDoubleValue();
                double exponent = Integer.valueOf(value);
                x.setEntry(0, i, Math.pow(radix, exponent));
            }
        }
    }
    // column vector
    RealMatrix r = x.multiply(m_beta);
    // determine the column with highest probability
    int maxIndex = 0;
    double maxValue = r.getEntry(0, 0);
    for (int i = 1; i < r.getColumnDimension(); i++) {
        if (r.getEntry(0, i) > maxValue) {
            maxValue = r.getEntry(0, i);
            maxIndex = i;
        }
    }
    if (m_includeProbs) {
        // compute probabilities of the target categories
        for (int i = 0; i < m_targetCategories.size(); i++) {
            // test if calculation would overflow
            boolean overflow = false;
            for (int k = 0; k < r.getColumnDimension(); k++) {
                if ((r.getEntry(0, k) - r.getEntry(0, i)) > 700) {
                    overflow = true;
                }
            }
            if (!overflow) {
                double sum = 0;
                for (int k = 0; k < r.getColumnDimension(); k++) {
                    sum += Math.exp(r.getEntry(0, k) - r.getEntry(0, i));
                }
                cells[m_targetCategoryIndex.get(i)] = new DoubleCell(1.0 / sum);
            } else {
                cells[m_targetCategoryIndex.get(i)] = new DoubleCell(0);
            }
        }
    }
    // the last cell is the prediction
    cells[cells.length - 1] = m_targetCategories.get(maxIndex);
    return cells;
}
Also used : Optional(java.util.Optional) DoubleCell(org.knime.core.data.def.DoubleCell) IntCell(org.knime.core.data.def.IntCell) RealMatrix(org.apache.commons.math3.linear.RealMatrix) MissingHandling(org.knime.base.node.mine.regression.RegressionTrainingRow.MissingHandling) DataCell(org.knime.core.data.DataCell)

Example 14 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class CategoryToNumberNodeDialogPane method saveSettingsTo.

/**
 * {@inheritDoc}
 */
@Override
protected void saveSettingsTo(final NodeSettingsWO settings) throws InvalidSettingsException {
    Set<String> included = m_includedColumns.getIncludedColumnSet();
    m_settings.setIncludedColumns(included.toArray(new String[included.size()]));
    m_settings.setIncludeAll(m_includedColumns.isKeepAllSelected());
    m_settings.setAppendColumns(m_appendColums.isSelected());
    m_settings.setColumnSuffix(m_columnSuffix.getText());
    m_settings.setStartIndex((Integer) m_startIndex.getValue());
    m_settings.setIncrement((Integer) m_increment.getValue());
    m_settings.setMaxCategories((Integer) m_maxCategories.getValue());
    if (!m_defaultValue.getText().trim().isEmpty()) {
        int value = Integer.valueOf(m_defaultValue.getText());
        m_settings.setDefaultValue(new IntCell(value));
    } else {
        m_settings.setDefaultValue(DataType.getMissingCell());
    }
    if (!m_mapMissingTo.getText().trim().isEmpty()) {
        int value = Integer.valueOf(m_mapMissingTo.getText());
        m_settings.setMapMissingTo(new IntCell(value));
    } else {
        m_settings.setMapMissingTo(DataType.getMissingCell());
    }
    m_settings.saveSettings(settings);
}
Also used : IntCell(org.knime.core.data.def.IntCell)

Example 15 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class TreeEnsembleClassificationPredictorCellFactory method getCells.

/**
 * {@inheritDoc}
 */
@Override
public DataCell[] getCells(final DataRow row) {
    TreeEnsembleModelPortObject modelObject = m_predictor.getModelObject();
    TreeEnsemblePredictorConfiguration cfg = m_predictor.getConfiguration();
    final TreeEnsembleModel ensembleModel = modelObject.getEnsembleModel();
    int size = 1;
    final boolean appendConfidence = cfg.isAppendPredictionConfidence();
    if (appendConfidence) {
        size += 1;
    }
    final boolean appendClassConfidences = cfg.isAppendClassConfidences();
    if (appendClassConfidences) {
        size += m_targetValueMap.size();
    }
    final boolean appendModelCount = cfg.isAppendModelCount();
    if (appendModelCount) {
        size += 1;
    }
    final boolean hasOutOfBagFilter = m_predictor.hasOutOfBagFilter();
    DataCell[] result = new DataCell[size];
    DataRow filterRow = new FilterColumnRow(row, m_learnColumnInRealDataIndices);
    PredictorRecord record = ensembleModel.createPredictorRecord(filterRow, m_learnSpec);
    if (record == null) {
        // missing value
        Arrays.fill(result, DataType.getMissingCell());
        return result;
    }
    final Voting voting = m_votingFactory.createVoting();
    final int nrModels = ensembleModel.getNrModels();
    int nrValidModels = 0;
    for (int i = 0; i < nrModels; i++) {
        if (hasOutOfBagFilter && m_predictor.isRowPartOfTrainingData(row.getKey(), i)) {
        // ignore, row was used to train the model
        } else {
            TreeModelClassification m = ensembleModel.getTreeModelClassification(i);
            TreeNodeClassification match = m.findMatchingNode(record);
            voting.addVote(match);
            nrValidModels += 1;
        }
    }
    final NominalValueRepresentation[] targetVals = ((TreeTargetNominalColumnMetaData) ensembleModel.getMetaData().getTargetMetaData()).getValues();
    String majorityClass = voting.getMajorityClass();
    int index = 0;
    if (majorityClass == null) {
        assert nrValidModels == 0;
        Arrays.fill(result, DataType.getMissingCell());
        index = size - 1;
    } else {
        result[index++] = m_targetValueMap.get(majorityClass);
        // final float[] distribution = voting.getClassProbabilities();
        if (appendConfidence) {
            result[index++] = new DoubleCell(voting.getClassProbabilityForClass(majorityClass));
        }
        if (appendClassConfidences) {
            for (String targetValue : m_targetValueMap.keySet()) {
                result[index++] = new DoubleCell(voting.getClassProbabilityForClass(targetValue));
            }
        }
    }
    if (appendModelCount) {
        result[index++] = new IntCell(voting.getNrVotes());
    }
    return result;
}
Also used : TreeNodeClassification(org.knime.base.node.mine.treeensemble2.model.TreeNodeClassification) TreeEnsembleModel(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModel) TreeTargetNominalColumnMetaData(org.knime.base.node.mine.treeensemble2.data.TreeTargetNominalColumnMetaData) DoubleCell(org.knime.core.data.def.DoubleCell) TreeEnsemblePredictorConfiguration(org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration) NominalValueRepresentation(org.knime.base.node.mine.treeensemble2.data.NominalValueRepresentation) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) PredictorRecord(org.knime.base.node.mine.treeensemble2.data.PredictorRecord) DataCell(org.knime.core.data.DataCell) FilterColumnRow(org.knime.base.data.filter.column.FilterColumnRow) TreeModelClassification(org.knime.base.node.mine.treeensemble2.model.TreeModelClassification)

Aggregations

IntCell (org.knime.core.data.def.IntCell)109 DataCell (org.knime.core.data.DataCell)79 DoubleCell (org.knime.core.data.def.DoubleCell)67 StringCell (org.knime.core.data.def.StringCell)55 DefaultRow (org.knime.core.data.def.DefaultRow)46 DataRow (org.knime.core.data.DataRow)33 DataTableSpec (org.knime.core.data.DataTableSpec)21 RowKey (org.knime.core.data.RowKey)21 ArrayList (java.util.ArrayList)20 DataType (org.knime.core.data.DataType)20 LongCell (org.knime.core.data.def.LongCell)14 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)14 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)12 BufferedDataTable (org.knime.core.node.BufferedDataTable)12 Test (org.junit.Test)11 DataColumnSpec (org.knime.core.data.DataColumnSpec)11 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)9 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)9 DataContainer (org.knime.core.data.container.DataContainer)8 DateAndTimeValue (org.knime.core.data.date.DateAndTimeValue)8