Search in sources :

Example 11 with TreeEnsemblePredictorConfiguration

use of org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration in project knime-core by knime.

the class TreeEnsembleRegressionLearnerNodeModel method createOutOfBagPredictor.

/**
 * @param ensembleSpec
 * @param ensembleModel
 * @param inSpec
 * @return
 * @throws InvalidSettingsException
 */
private TreeEnsemblePredictor createOutOfBagPredictor(final TreeEnsembleModelPortObjectSpec ensembleSpec, final TreeEnsembleModelPortObject ensembleModel, final DataTableSpec inSpec) throws InvalidSettingsException {
    String targetColumn = m_configuration.getTargetColumn();
    TreeEnsemblePredictorConfiguration ooBConfig = new TreeEnsemblePredictorConfiguration(true, targetColumn);
    String append = targetColumn + " (Out-of-bag)";
    ooBConfig.setPredictionColumnName(append);
    ooBConfig.setAppendPredictionConfidence(true);
    ooBConfig.setAppendClassConfidences(true);
    ooBConfig.setAppendModelCount(true);
    return new TreeEnsemblePredictor(ensembleSpec, ensembleModel, inSpec, ooBConfig);
}
Also used : TreeEnsemblePredictorConfiguration(org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration) TreeEnsemblePredictor(org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictor)

Example 12 with TreeEnsemblePredictorConfiguration

use of org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration in project knime-core by knime.

the class TreeEnsembleClassificationPredictorCellFactory2 method getCells.

/**
 * {@inheritDoc}
 */
@Override
public DataCell[] getCells(final DataRow row) {
    TreeEnsembleModelPortObject modelObject = m_predictor.getModelObject();
    TreeEnsemblePredictorConfiguration cfg = m_predictor.getConfiguration();
    final TreeEnsembleModel ensembleModel = modelObject.getEnsembleModel();
    int size = 1;
    final boolean appendConfidence = cfg.isAppendPredictionConfidence();
    if (appendConfidence) {
        size += 1;
    }
    final boolean appendClassConfidences = cfg.isAppendClassConfidences();
    if (appendClassConfidences) {
        size += m_targetValueMap.size();
    }
    final boolean appendModelCount = cfg.isAppendModelCount();
    if (appendModelCount) {
        size += 1;
    }
    final boolean hasOutOfBagFilter = m_predictor.hasOutOfBagFilter();
    DataCell[] result = new DataCell[size];
    DataRow filterRow = new FilterColumnRow(row, m_learnColumnInRealDataIndices);
    PredictorRecord record = ensembleModel.createPredictorRecord(filterRow, m_learnSpec);
    if (record == null) {
        // missing value
        Arrays.fill(result, DataType.getMissingCell());
        return result;
    }
    OccurrenceCounter<String> counter = new OccurrenceCounter<String>();
    final int nrModels = ensembleModel.getNrModels();
    TreeTargetNominalColumnMetaData targetMeta = (TreeTargetNominalColumnMetaData) ensembleModel.getMetaData().getTargetMetaData();
    final double[] classProbabilities = new double[targetMeta.getValues().length];
    int nrValidModels = 0;
    for (int i = 0; i < nrModels; i++) {
        if (hasOutOfBagFilter && m_predictor.isRowPartOfTrainingData(row.getKey(), i)) {
        // ignore, row was used to train the model
        } else {
            TreeModelClassification m = ensembleModel.getTreeModelClassification(i);
            TreeNodeClassification match = m.findMatchingNode(record);
            String majorityClassName = match.getMajorityClassName();
            final float[] nodeClassProbs = match.getTargetDistribution();
            double instancesInNode = 0;
            for (int c = 0; c < nodeClassProbs.length; c++) {
                instancesInNode += nodeClassProbs[c];
            }
            for (int c = 0; c < classProbabilities.length; c++) {
                classProbabilities[c] += nodeClassProbs[c] / instancesInNode;
            }
            counter.add(majorityClassName);
            nrValidModels += 1;
        }
    }
    String bestValue = counter.getMostFrequent();
    int index = 0;
    if (bestValue == null) {
        assert nrValidModels == 0;
        Arrays.fill(result, DataType.getMissingCell());
        index = size - 1;
    } else {
        // result[index++] = m_targetValueMap.get(bestValue);
        int indexBest = -1;
        double probBest = -1;
        for (int c = 0; c < classProbabilities.length; c++) {
            double prob = classProbabilities[c];
            if (prob > probBest) {
                probBest = prob;
                indexBest = c;
            }
        }
        result[index++] = new StringCell(targetMeta.getValues()[indexBest].getNominalValue());
        if (appendConfidence) {
            // final int freqValue = counter.getFrequency(bestValue);
            // result[index++] = new DoubleCell(freqValue / (double)nrValidModels);
            result[index++] = new DoubleCell(probBest);
        }
        if (appendClassConfidences) {
            for (NominalValueRepresentation nomVal : targetMeta.getValues()) {
                double prob = classProbabilities[nomVal.getAssignedInteger()] / nrValidModels;
                result[index++] = new DoubleCell(prob);
            }
        }
    }
    if (appendModelCount) {
        result[index++] = new IntCell(nrValidModels);
    }
    return result;
}
Also used : TreeNodeClassification(org.knime.base.node.mine.treeensemble2.model.TreeNodeClassification) TreeEnsembleModel(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModel) TreeTargetNominalColumnMetaData(org.knime.base.node.mine.treeensemble2.data.TreeTargetNominalColumnMetaData) DoubleCell(org.knime.core.data.def.DoubleCell) TreeEnsemblePredictorConfiguration(org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration) NominalValueRepresentation(org.knime.base.node.mine.treeensemble2.data.NominalValueRepresentation) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) StringCell(org.knime.core.data.def.StringCell) PredictorRecord(org.knime.base.node.mine.treeensemble2.data.PredictorRecord) DataCell(org.knime.core.data.DataCell) FilterColumnRow(org.knime.base.data.filter.column.FilterColumnRow) TreeModelClassification(org.knime.base.node.mine.treeensemble2.model.TreeModelClassification)

Example 13 with TreeEnsemblePredictorConfiguration

use of org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration in project knime-core by knime.

the class TreeEnsembleClassificationPredictorCellFactory2 method createFactory.

/**
 * Creates a TreeEnsembleClassificationPredictorCellFactory from the provided <b>predictor</b>
 * @param predictor
 * @return an instance of TreeEnsembleClassificationPredictorCellFactory configured according to the settings of the provided
 * <b>predictor<b>
 * @throws InvalidSettingsException
 */
public static TreeEnsembleClassificationPredictorCellFactory2 createFactory(final TreeEnsemblePredictor predictor) throws InvalidSettingsException {
    DataTableSpec testDataSpec = predictor.getDataSpec();
    TreeEnsembleModelPortObjectSpec modelSpec = predictor.getModelSpec();
    TreeEnsembleModelPortObject modelObject = predictor.getModelObject();
    TreeEnsemblePredictorConfiguration configuration = predictor.getConfiguration();
    UniqueNameGenerator nameGen = new UniqueNameGenerator(testDataSpec);
    Map<String, DataCell> targetValueMap = modelSpec.getTargetColumnPossibleValueMap();
    List<DataColumnSpec> newColsList = new ArrayList<DataColumnSpec>();
    DataType targetColType = modelSpec.getTargetColumn().getType();
    String targetColName = configuration.getPredictionColumnName();
    DataColumnSpec targetCol = nameGen.newColumn(targetColName, targetColType);
    newColsList.add(targetCol);
    if (configuration.isAppendPredictionConfidence()) {
        newColsList.add(nameGen.newColumn(targetCol.getName() + " (Confidence)", DoubleCell.TYPE));
    }
    if (configuration.isAppendClassConfidences()) {
        // and this class is not called)
        assert targetValueMap != null : "Target column has no possible values";
        for (String v : targetValueMap.keySet()) {
            newColsList.add(nameGen.newColumn(v, DoubleCell.TYPE));
        }
    }
    if (configuration.isAppendModelCount()) {
        newColsList.add(nameGen.newColumn("model count", IntCell.TYPE));
    }
    // assigned
    assert modelObject == null || targetValueMap != null : "Target values must be known during execution";
    DataColumnSpec[] newCols = newColsList.toArray(new DataColumnSpec[newColsList.size()]);
    int[] learnColumnInRealDataIndices = modelSpec.calculateFilterIndices(testDataSpec);
    return new TreeEnsembleClassificationPredictorCellFactory2(predictor, targetValueMap, newCols, learnColumnInRealDataIndices);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) TreeEnsembleModelPortObjectSpec(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObjectSpec) TreeEnsemblePredictorConfiguration(org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration) ArrayList(java.util.ArrayList) UniqueNameGenerator(org.knime.core.util.UniqueNameGenerator) TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType)

Example 14 with TreeEnsemblePredictorConfiguration

use of org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration in project knime-core by knime.

the class TreeEnsembleRegressionPredictorCellFactory method createFactory.

/**
 * Creates a TreeEnsembleRegressionPredictorCellFactory from the provided <b>predictor</b>
 *
 * @param predictor
 * @return an instance of TreeEnsembleRegressionPredictorCellFactory configured according to the settings of the provided
 * <b>predictor<b>
 * @throws InvalidSettingsException
 */
public static TreeEnsembleRegressionPredictorCellFactory createFactory(final TreeEnsemblePredictor predictor) throws InvalidSettingsException {
    DataTableSpec testDataSpec = predictor.getDataSpec();
    TreeEnsembleModelPortObjectSpec modelSpec = predictor.getModelSpec();
    // TreeEnsembleModelPortObject modelObject = predictor.getModelObject();
    TreeEnsemblePredictorConfiguration configuration = predictor.getConfiguration();
    UniqueNameGenerator nameGen = new UniqueNameGenerator(testDataSpec);
    List<DataColumnSpec> newColsList = new ArrayList<DataColumnSpec>();
    String targetColName = configuration.getPredictionColumnName();
    DataColumnSpec targetCol = nameGen.newColumn(targetColName, DoubleCell.TYPE);
    newColsList.add(targetCol);
    if (configuration.isAppendPredictionConfidence()) {
        newColsList.add(nameGen.newColumn(targetCol.getName() + " (Prediction Variance)", DoubleCell.TYPE));
    }
    if (configuration.isAppendModelCount()) {
        newColsList.add(nameGen.newColumn("model count", IntCell.TYPE));
    }
    DataColumnSpec[] newCols = newColsList.toArray(new DataColumnSpec[newColsList.size()]);
    int[] learnColumnInRealDataIndices = modelSpec.calculateFilterIndices(testDataSpec);
    return new TreeEnsembleRegressionPredictorCellFactory(predictor, newCols, learnColumnInRealDataIndices);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) TreeEnsembleModelPortObjectSpec(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObjectSpec) TreeEnsemblePredictorConfiguration(org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration) ArrayList(java.util.ArrayList) UniqueNameGenerator(org.knime.core.util.UniqueNameGenerator)

Example 15 with TreeEnsemblePredictorConfiguration

use of org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration in project knime-core by knime.

the class LKGradientBoostingPredictorCellFactory method createFactory.

public static LKGradientBoostingPredictorCellFactory createFactory(final GradientBoostingPredictor<MultiClassGradientBoostedTreesModel> predictor) throws InvalidSettingsException {
    TreeEnsemblePredictorConfiguration config = predictor.getConfiguration();
    DataTableSpec testSpec = predictor.getDataSpec();
    TreeEnsembleModelPortObjectSpec modelSpec = predictor.getModelSpec();
    ArrayList<DataColumnSpec> newColSpecs = new ArrayList<DataColumnSpec>();
    UniqueNameGenerator nameGen = new UniqueNameGenerator(testSpec);
    newColSpecs.add(nameGen.newColumn(config.getPredictionColumnName(), StringCell.TYPE));
    if (config.isAppendPredictionConfidence()) {
        newColSpecs.add(nameGen.newColumn("Confidence", DoubleCell.TYPE));
    }
    if (config.isAppendClassConfidences()) {
        final String targetColName = modelSpec.getTargetColumn().getName();
        final String suffix = config.getSuffixForClassProbabilities();
        for (String val : modelSpec.getTargetColumnPossibleValueMap().keySet()) {
            String colName = "P(" + targetColName + "=" + val + ")" + suffix;
            newColSpecs.add(nameGen.newColumn(colName, DoubleCell.TYPE));
        }
    }
    final Map<String, DataCell> targetValueMap = modelSpec.getTargetColumnPossibleValueMap();
    return new LKGradientBoostingPredictorCellFactory(newColSpecs.toArray(new DataColumnSpec[newColSpecs.size()]), predictor.getModel(), modelSpec.getLearnTableSpec(), modelSpec.calculateFilterIndices(testSpec), config, targetValueMap);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) TreeEnsembleModelPortObjectSpec(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObjectSpec) TreeEnsemblePredictorConfiguration(org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration) ArrayList(java.util.ArrayList) DataCell(org.knime.core.data.DataCell) UniqueNameGenerator(org.knime.core.util.UniqueNameGenerator)

Aggregations

TreeEnsemblePredictorConfiguration (org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration)25 DataCell (org.knime.core.data.DataCell)6 TreeEnsembleModelPortObject (org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject)5 ArrayList (java.util.ArrayList)4 TreeEnsembleModelPortObjectSpec (org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObjectSpec)4 TreeEnsemblePredictor (org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictor)4 DataColumnSpec (org.knime.core.data.DataColumnSpec)4 DataTableSpec (org.knime.core.data.DataTableSpec)4 UniqueNameGenerator (org.knime.core.util.UniqueNameGenerator)4 FilterColumnRow (org.knime.base.data.filter.column.FilterColumnRow)3 PredictorRecord (org.knime.base.node.mine.treeensemble2.data.PredictorRecord)3 TreeEnsembleModel (org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModel)3 DataRow (org.knime.core.data.DataRow)3 DoubleCell (org.knime.core.data.def.DoubleCell)3 IntCell (org.knime.core.data.def.IntCell)3 NominalValueRepresentation (org.knime.base.node.mine.treeensemble2.data.NominalValueRepresentation)2 TreeTargetNominalColumnMetaData (org.knime.base.node.mine.treeensemble2.data.TreeTargetNominalColumnMetaData)2 TreeModelClassification (org.knime.base.node.mine.treeensemble2.model.TreeModelClassification)2 TreeNodeClassification (org.knime.base.node.mine.treeensemble2.model.TreeNodeClassification)2 DataType (org.knime.core.data.DataType)2