Search in sources :

Example 21 with DoubleCell

use of org.knime.core.data.def.DoubleCell in project knime-core by knime.

the class AutoBinner method calcDomainBoundsIfNeccessary.

/**
 * Determines the per column min/max values of the given data if not already present in the domain.
 *
 * @param data the data
 * @param exec the execution context
 * @param recalcValuesFor The columns
 * @return The data with extended domain information
 * @throws InvalidSettingsException ...
 * @throws CanceledExecutionException ...
 */
public BufferedDataTable calcDomainBoundsIfNeccessary(final BufferedDataTable data, final ExecutionContext exec, final List<String> recalcValuesFor) throws InvalidSettingsException, CanceledExecutionException {
    if (null == recalcValuesFor || recalcValuesFor.isEmpty()) {
        return data;
    }
    List<Integer> valuesI = new ArrayList<Integer>();
    for (String colName : recalcValuesFor) {
        DataColumnSpec colSpec = data.getDataTableSpec().getColumnSpec(colName);
        if (!colSpec.getType().isCompatible(DoubleValue.class)) {
            throw new InvalidSettingsException("Can only process numeric " + "data. The column \"" + colSpec.getName() + "\" is not numeric.");
        }
        if (recalcValuesFor.contains(colName) && !colSpec.getDomain().hasBounds()) {
            valuesI.add(data.getDataTableSpec().findColumnIndex(colName));
        }
    }
    if (valuesI.isEmpty()) {
        return data;
    }
    Map<Integer, Double> min = new HashMap<Integer, Double>();
    Map<Integer, Double> max = new HashMap<Integer, Double>();
    for (int col : valuesI) {
        min.put(col, Double.MAX_VALUE);
        max.put(col, Double.MIN_VALUE);
    }
    int c = 0;
    for (DataRow row : data) {
        c++;
        exec.checkCanceled();
        exec.setProgress(c / (double) data.getRowCount());
        for (int col : valuesI) {
            double val = ((DoubleValue) row.getCell(col)).getDoubleValue();
            if (min.get(col) > val) {
                min.put(col, val);
            }
            if (max.get(col) < val) {
                min.put(col, val);
            }
        }
    }
    List<DataColumnSpec> newColSpecList = new ArrayList<DataColumnSpec>();
    int cc = 0;
    for (DataColumnSpec columnSpec : data.getDataTableSpec()) {
        if (recalcValuesFor.contains(columnSpec.getName())) {
            DataColumnSpecCreator specCreator = new DataColumnSpecCreator(columnSpec);
            DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(min.get(cc)), new DoubleCell(max.get(cc)));
            specCreator.setDomain(domainCreator.createDomain());
            DataColumnSpec newColSpec = specCreator.createSpec();
            newColSpecList.add(newColSpec);
        } else {
            newColSpecList.add(columnSpec);
        }
        cc++;
    }
    DataTableSpec spec = new DataTableSpec(newColSpecList.toArray(new DataColumnSpec[0]));
    BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
    return newDataTable;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 22 with DoubleCell

use of org.knime.core.data.def.DoubleCell in project knime-core by knime.

the class TreeEnsembleClassificationPredictorCellFactory method getCells.

/**
 * {@inheritDoc}
 */
@Override
public DataCell[] getCells(final DataRow row) {
    TreeEnsembleModelPortObject modelObject = m_predictor.getModelObject();
    TreeEnsemblePredictorConfiguration cfg = m_predictor.getConfiguration();
    final TreeEnsembleModel ensembleModel = modelObject.getEnsembleModel();
    int size = 1;
    final boolean appendConfidence = cfg.isAppendPredictionConfidence();
    if (appendConfidence) {
        size += 1;
    }
    final boolean appendClassConfidences = cfg.isAppendClassConfidences();
    if (appendClassConfidences) {
        size += m_targetValueMap.size();
    }
    final boolean appendModelCount = cfg.isAppendModelCount();
    if (appendModelCount) {
        size += 1;
    }
    final boolean hasOutOfBagFilter = m_predictor.hasOutOfBagFilter();
    DataCell[] result = new DataCell[size];
    DataRow filterRow = new FilterColumnRow(row, m_learnColumnInRealDataIndices);
    PredictorRecord record = ensembleModel.createPredictorRecord(filterRow, m_learnSpec);
    if (record == null) {
        // missing value
        Arrays.fill(result, DataType.getMissingCell());
        return result;
    }
    final Voting voting = m_votingFactory.createVoting();
    final int nrModels = ensembleModel.getNrModels();
    int nrValidModels = 0;
    for (int i = 0; i < nrModels; i++) {
        if (hasOutOfBagFilter && m_predictor.isRowPartOfTrainingData(row.getKey(), i)) {
        // ignore, row was used to train the model
        } else {
            TreeModelClassification m = ensembleModel.getTreeModelClassification(i);
            TreeNodeClassification match = m.findMatchingNode(record);
            voting.addVote(match);
            nrValidModels += 1;
        }
    }
    final NominalValueRepresentation[] targetVals = ((TreeTargetNominalColumnMetaData) ensembleModel.getMetaData().getTargetMetaData()).getValues();
    String majorityClass = voting.getMajorityClass();
    int index = 0;
    if (majorityClass == null) {
        assert nrValidModels == 0;
        Arrays.fill(result, DataType.getMissingCell());
        index = size - 1;
    } else {
        result[index++] = m_targetValueMap.get(majorityClass);
        // final float[] distribution = voting.getClassProbabilities();
        if (appendConfidence) {
            result[index++] = new DoubleCell(voting.getClassProbabilityForClass(majorityClass));
        }
        if (appendClassConfidences) {
            for (String targetValue : m_targetValueMap.keySet()) {
                result[index++] = new DoubleCell(voting.getClassProbabilityForClass(targetValue));
            }
        }
    }
    if (appendModelCount) {
        result[index++] = new IntCell(voting.getNrVotes());
    }
    return result;
}
Also used : TreeNodeClassification(org.knime.base.node.mine.treeensemble2.model.TreeNodeClassification) TreeEnsembleModel(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModel) TreeTargetNominalColumnMetaData(org.knime.base.node.mine.treeensemble2.data.TreeTargetNominalColumnMetaData) DoubleCell(org.knime.core.data.def.DoubleCell) TreeEnsemblePredictorConfiguration(org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration) NominalValueRepresentation(org.knime.base.node.mine.treeensemble2.data.NominalValueRepresentation) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) PredictorRecord(org.knime.base.node.mine.treeensemble2.data.PredictorRecord) DataCell(org.knime.core.data.DataCell) FilterColumnRow(org.knime.base.data.filter.column.FilterColumnRow) TreeModelClassification(org.knime.base.node.mine.treeensemble2.model.TreeModelClassification)

Example 23 with DoubleCell

use of org.knime.core.data.def.DoubleCell in project knime-core by knime.

the class ProximityMatrix method createTable.

public BufferedDataTable createTable(final ExecutionContext exec) throws CanceledExecutionException {
    int numCols = getNumCols();
    int numRows = getNumRows();
    DataColumnSpec[] colSpecs = new DataColumnSpec[numCols];
    for (int i = 0; i < colSpecs.length; i++) {
        colSpecs[i] = new DataColumnSpecCreator(getRowKeyForTable(1, i).getString(), DoubleCell.TYPE).createSpec();
    }
    DataTableSpec tableSpec = new DataTableSpec(colSpecs);
    BufferedDataContainer container = exec.createDataContainer(tableSpec);
    for (int i = 0; i < numRows; i++) {
        exec.checkCanceled();
        exec.setProgress(((double) i) / numRows, "Row " + i + "/" + numRows);
        DataCell[] cells = new DataCell[numCols];
        for (int j = 0; j < numCols; j++) {
            cells[j] = new DoubleCell(getEntryAt(i, j));
        }
        container.addRowToTable(new DefaultRow(getRowKeyForTable(0, i), cells));
    }
    container.close();
    return container.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 24 with DoubleCell

use of org.knime.core.data.def.DoubleCell in project knime-core by knime.

the class TreeEnsembleStatisticsNodeModel method execute.

@Override
protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception {
    TreeEnsembleModel treeEnsemble = ((TreeEnsembleModelPortObject) inObjects[0]).getEnsembleModel();
    EnsembleStatistic ensembleStats = new EnsembleStatistic(treeEnsemble);
    DataContainer containerEnsembleStats = exec.createDataContainer(createEnsembleStatsSpec());
    DataCell[] cells = new DataCell[7];
    cells[0] = new IntCell(treeEnsemble.getNrModels());
    cells[1] = new IntCell(ensembleStats.getMinLevel());
    cells[2] = new IntCell(ensembleStats.getMaxLevel());
    cells[3] = new DoubleCell(ensembleStats.getAvgLevel());
    cells[4] = new IntCell(ensembleStats.getMinNumNodes());
    cells[5] = new IntCell(ensembleStats.getMaxNumNodes());
    cells[6] = new DoubleCell(ensembleStats.getAvgNumNodes());
    containerEnsembleStats.addRowToTable(new DefaultRow(RowKey.createRowKey(0L), cells));
    containerEnsembleStats.close();
    DataContainer containerTreeStats = exec.createDataContainer(createTreeStatsSpec());
    for (int i = 0; i < treeEnsemble.getNrModels(); i++) {
        DataCell[] treeCells = new DataCell[2];
        TreeStatistic treeStat = ensembleStats.getTreeStatistic(i);
        treeCells[0] = new IntCell(treeStat.getNumLevels());
        treeCells[1] = new IntCell(treeStat.getNumNodes());
        containerTreeStats.addRowToTable(new DefaultRow(RowKey.createRowKey((long) i), treeCells));
    }
    containerTreeStats.close();
    return new PortObject[] { (PortObject) containerEnsembleStats.getTable(), (PortObject) containerTreeStats.getTable() };
}
Also used : TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) TreeStatistic(org.knime.base.node.mine.treeensemble2.statistics.TreeStatistic) TreeEnsembleModel(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModel) DataContainer(org.knime.core.data.container.DataContainer) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell) EnsembleStatistic(org.knime.base.node.mine.treeensemble2.statistics.EnsembleStatistic) DefaultRow(org.knime.core.data.def.DefaultRow) TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) PortObject(org.knime.core.node.port.PortObject) IntCell(org.knime.core.data.def.IntCell)

Example 25 with DoubleCell

use of org.knime.core.data.def.DoubleCell in project knime-core by knime.

the class GradientBoostingPredictorCellFactory method getCell.

/**
 * {@inheritDoc}
 */
@Override
public DataCell getCell(final DataRow row) {
    DataRow filterRow = new FilterColumnRow(row, m_learnColumnInRealDataIndices);
    double prediction = m_model.predict(m_model.createPredictorRecord(filterRow, m_learnSpec));
    return new DoubleCell(prediction);
}
Also used : DoubleCell(org.knime.core.data.def.DoubleCell) DataRow(org.knime.core.data.DataRow) FilterColumnRow(org.knime.base.data.filter.column.FilterColumnRow)

Aggregations

DoubleCell (org.knime.core.data.def.DoubleCell)189 DataCell (org.knime.core.data.DataCell)129 IntCell (org.knime.core.data.def.IntCell)67 DefaultRow (org.knime.core.data.def.DefaultRow)66 StringCell (org.knime.core.data.def.StringCell)65 DataRow (org.knime.core.data.DataRow)57 DataTableSpec (org.knime.core.data.DataTableSpec)55 ArrayList (java.util.ArrayList)42 DataColumnSpec (org.knime.core.data.DataColumnSpec)42 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)41 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)39 RowKey (org.knime.core.data.RowKey)37 DoubleValue (org.knime.core.data.DoubleValue)35 BufferedDataTable (org.knime.core.node.BufferedDataTable)28 DataColumnDomainCreator (org.knime.core.data.DataColumnDomainCreator)26 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)22 DataType (org.knime.core.data.DataType)20 LinkedHashMap (java.util.LinkedHashMap)17 HashMap (java.util.HashMap)13 Point (java.awt.Point)12