Search in sources :

Example 1 with PredictorRecord

use of org.knime.base.node.mine.treeensemble2.data.PredictorRecord in project knime-core by knime.

the class TreeEnsembleClassificationPredictorCellFactory method getCells.

/**
 * {@inheritDoc}
 */
@Override
public DataCell[] getCells(final DataRow row) {
    TreeEnsembleModelPortObject modelObject = m_predictor.getModelObject();
    TreeEnsemblePredictorConfiguration cfg = m_predictor.getConfiguration();
    final TreeEnsembleModel ensembleModel = modelObject.getEnsembleModel();
    int size = 1;
    final boolean appendConfidence = cfg.isAppendPredictionConfidence();
    if (appendConfidence) {
        size += 1;
    }
    final boolean appendClassConfidences = cfg.isAppendClassConfidences();
    if (appendClassConfidences) {
        size += m_targetValueMap.size();
    }
    final boolean appendModelCount = cfg.isAppendModelCount();
    if (appendModelCount) {
        size += 1;
    }
    final boolean hasOutOfBagFilter = m_predictor.hasOutOfBagFilter();
    DataCell[] result = new DataCell[size];
    DataRow filterRow = new FilterColumnRow(row, m_learnColumnInRealDataIndices);
    PredictorRecord record = ensembleModel.createPredictorRecord(filterRow, m_learnSpec);
    if (record == null) {
        // missing value
        Arrays.fill(result, DataType.getMissingCell());
        return result;
    }
    final Voting voting = m_votingFactory.createVoting();
    final int nrModels = ensembleModel.getNrModels();
    int nrValidModels = 0;
    for (int i = 0; i < nrModels; i++) {
        if (hasOutOfBagFilter && m_predictor.isRowPartOfTrainingData(row.getKey(), i)) {
        // ignore, row was used to train the model
        } else {
            TreeModelClassification m = ensembleModel.getTreeModelClassification(i);
            TreeNodeClassification match = m.findMatchingNode(record);
            voting.addVote(match);
            nrValidModels += 1;
        }
    }
    final NominalValueRepresentation[] targetVals = ((TreeTargetNominalColumnMetaData) ensembleModel.getMetaData().getTargetMetaData()).getValues();
    String majorityClass = voting.getMajorityClass();
    int index = 0;
    if (majorityClass == null) {
        assert nrValidModels == 0;
        Arrays.fill(result, DataType.getMissingCell());
        index = size - 1;
    } else {
        result[index++] = m_targetValueMap.get(majorityClass);
        // final float[] distribution = voting.getClassProbabilities();
        if (appendConfidence) {
            result[index++] = new DoubleCell(voting.getClassProbabilityForClass(majorityClass));
        }
        if (appendClassConfidences) {
            for (String targetValue : m_targetValueMap.keySet()) {
                result[index++] = new DoubleCell(voting.getClassProbabilityForClass(targetValue));
            }
        }
    }
    if (appendModelCount) {
        result[index++] = new IntCell(voting.getNrVotes());
    }
    return result;
}
Also used : TreeNodeClassification(org.knime.base.node.mine.treeensemble2.model.TreeNodeClassification) TreeEnsembleModel(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModel) TreeTargetNominalColumnMetaData(org.knime.base.node.mine.treeensemble2.data.TreeTargetNominalColumnMetaData) DoubleCell(org.knime.core.data.def.DoubleCell) TreeEnsemblePredictorConfiguration(org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration) NominalValueRepresentation(org.knime.base.node.mine.treeensemble2.data.NominalValueRepresentation) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) PredictorRecord(org.knime.base.node.mine.treeensemble2.data.PredictorRecord) DataCell(org.knime.core.data.DataCell) FilterColumnRow(org.knime.base.data.filter.column.FilterColumnRow) TreeModelClassification(org.knime.base.node.mine.treeensemble2.model.TreeModelClassification)

Example 2 with PredictorRecord

use of org.knime.base.node.mine.treeensemble2.data.PredictorRecord in project knime-core by knime.

the class TreeEnsembleClassificationPredictorCellFactory2 method getCells.

/**
 * {@inheritDoc}
 */
@Override
public DataCell[] getCells(final DataRow row) {
    TreeEnsembleModelPortObject modelObject = m_predictor.getModelObject();
    TreeEnsemblePredictorConfiguration cfg = m_predictor.getConfiguration();
    final TreeEnsembleModel ensembleModel = modelObject.getEnsembleModel();
    int size = 1;
    final boolean appendConfidence = cfg.isAppendPredictionConfidence();
    if (appendConfidence) {
        size += 1;
    }
    final boolean appendClassConfidences = cfg.isAppendClassConfidences();
    if (appendClassConfidences) {
        size += m_targetValueMap.size();
    }
    final boolean appendModelCount = cfg.isAppendModelCount();
    if (appendModelCount) {
        size += 1;
    }
    final boolean hasOutOfBagFilter = m_predictor.hasOutOfBagFilter();
    DataCell[] result = new DataCell[size];
    DataRow filterRow = new FilterColumnRow(row, m_learnColumnInRealDataIndices);
    PredictorRecord record = ensembleModel.createPredictorRecord(filterRow, m_learnSpec);
    if (record == null) {
        // missing value
        Arrays.fill(result, DataType.getMissingCell());
        return result;
    }
    OccurrenceCounter<String> counter = new OccurrenceCounter<String>();
    final int nrModels = ensembleModel.getNrModels();
    TreeTargetNominalColumnMetaData targetMeta = (TreeTargetNominalColumnMetaData) ensembleModel.getMetaData().getTargetMetaData();
    final double[] classProbabilities = new double[targetMeta.getValues().length];
    int nrValidModels = 0;
    for (int i = 0; i < nrModels; i++) {
        if (hasOutOfBagFilter && m_predictor.isRowPartOfTrainingData(row.getKey(), i)) {
        // ignore, row was used to train the model
        } else {
            TreeModelClassification m = ensembleModel.getTreeModelClassification(i);
            TreeNodeClassification match = m.findMatchingNode(record);
            String majorityClassName = match.getMajorityClassName();
            final float[] nodeClassProbs = match.getTargetDistribution();
            double instancesInNode = 0;
            for (int c = 0; c < nodeClassProbs.length; c++) {
                instancesInNode += nodeClassProbs[c];
            }
            for (int c = 0; c < classProbabilities.length; c++) {
                classProbabilities[c] += nodeClassProbs[c] / instancesInNode;
            }
            counter.add(majorityClassName);
            nrValidModels += 1;
        }
    }
    String bestValue = counter.getMostFrequent();
    int index = 0;
    if (bestValue == null) {
        assert nrValidModels == 0;
        Arrays.fill(result, DataType.getMissingCell());
        index = size - 1;
    } else {
        // result[index++] = m_targetValueMap.get(bestValue);
        int indexBest = -1;
        double probBest = -1;
        for (int c = 0; c < classProbabilities.length; c++) {
            double prob = classProbabilities[c];
            if (prob > probBest) {
                probBest = prob;
                indexBest = c;
            }
        }
        result[index++] = new StringCell(targetMeta.getValues()[indexBest].getNominalValue());
        if (appendConfidence) {
            // final int freqValue = counter.getFrequency(bestValue);
            // result[index++] = new DoubleCell(freqValue / (double)nrValidModels);
            result[index++] = new DoubleCell(probBest);
        }
        if (appendClassConfidences) {
            for (NominalValueRepresentation nomVal : targetMeta.getValues()) {
                double prob = classProbabilities[nomVal.getAssignedInteger()] / nrValidModels;
                result[index++] = new DoubleCell(prob);
            }
        }
    }
    if (appendModelCount) {
        result[index++] = new IntCell(nrValidModels);
    }
    return result;
}
Also used : TreeNodeClassification(org.knime.base.node.mine.treeensemble2.model.TreeNodeClassification) TreeEnsembleModel(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModel) TreeTargetNominalColumnMetaData(org.knime.base.node.mine.treeensemble2.data.TreeTargetNominalColumnMetaData) DoubleCell(org.knime.core.data.def.DoubleCell) TreeEnsemblePredictorConfiguration(org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration) NominalValueRepresentation(org.knime.base.node.mine.treeensemble2.data.NominalValueRepresentation) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) StringCell(org.knime.core.data.def.StringCell) PredictorRecord(org.knime.base.node.mine.treeensemble2.data.PredictorRecord) DataCell(org.knime.core.data.DataCell) FilterColumnRow(org.knime.base.data.filter.column.FilterColumnRow) TreeModelClassification(org.knime.base.node.mine.treeensemble2.model.TreeModelClassification)

Example 3 with PredictorRecord

use of org.knime.base.node.mine.treeensemble2.data.PredictorRecord in project knime-core by knime.

the class AbstractTreeEnsembleModel method createNominalNumericPredictorRecord.

private PredictorRecord createNominalNumericPredictorRecord(final DataRow filterRow, final DataTableSpec trainSpec) {
    final int nrCols = trainSpec.getNumColumns();
    Map<String, Object> valueMap = new LinkedHashMap<String, Object>((int) (nrCols / 0.75 + 1.0));
    for (int i = 0; i < nrCols; i++) {
        DataColumnSpec col = trainSpec.getColumnSpec(i);
        String colName = col.getName();
        DataType colType = col.getType();
        DataCell cell = filterRow.getCell(i);
        if (cell.isMissing()) {
            valueMap.put(colName, PredictorRecord.NULL);
        } else if (colType.isCompatible(NominalValue.class)) {
            TreeNominalColumnMetaData nomColMeta = (TreeNominalColumnMetaData) m_metaData.getAttributeMetaData(i);
            NominalValueRepresentation[] nomVals = nomColMeta.getValues();
            int assignedInteger = -1;
            String val = cell.toString();
            // find assignedInteger of value
            for (NominalValueRepresentation nomVal : nomVals) {
                if (nomVal.getNominalValue().equals(val)) {
                    assignedInteger = nomVal.getAssignedInteger();
                    break;
                }
            }
            // the value is not known to the model
            if (assignedInteger == -1) {
                // treat as missing value
                valueMap.put(colName, PredictorRecord.NULL);
            } else {
                valueMap.put(colName, Integer.valueOf(assignedInteger));
            }
        } else if (colType.isCompatible(DoubleValue.class)) {
            double val = ((DoubleValue) cell).getDoubleValue();
            if (Double.isNaN(val)) {
                // make sure that NaNs are treated as missing values
                // bug AP-7169
                valueMap.put(colName, PredictorRecord.NULL);
            } else {
                valueMap.put(colName, val);
            }
        } else {
            throw new IllegalStateException("Expected nominal or numeric column type for column \"" + colName + "\" but got \"" + colType + "\"");
        }
    }
    return new PredictorRecord(valueMap);
}
Also used : NominalValue(org.knime.core.data.NominalValue) NominalValueRepresentation(org.knime.base.node.mine.treeensemble2.data.NominalValueRepresentation) LinkedHashMap(java.util.LinkedHashMap) TreeNominalColumnMetaData(org.knime.base.node.mine.treeensemble2.data.TreeNominalColumnMetaData) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) PredictorRecord(org.knime.base.node.mine.treeensemble2.data.PredictorRecord) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell)

Example 4 with PredictorRecord

use of org.knime.base.node.mine.treeensemble2.data.PredictorRecord in project knime-core by knime.

the class AbstractTreeEnsembleModel method createByteVectorPredictorRecord.

private PredictorRecord createByteVectorPredictorRecord(final DataRow filterRow) {
    assert filterRow.getNumCells() == 1 : "Expected one cell as byte vector data";
    DataCell c = filterRow.getCell(0);
    if (c.isMissing()) {
        return null;
    }
    ByteVectorValue bv = (ByteVectorValue) c;
    final long length = bv.length();
    if (length != getMetaData().getNrAttributes()) {
        throw new IllegalArgumentException("The byte-vector in " + filterRow.getKey().getString() + " has the wrong length. (" + length + " instead of " + getMetaData().getNrAttributes() + ")");
    }
    Map<String, Object> valueMap = new LinkedHashMap<String, Object>((int) (length / 0.75 + 1.0));
    for (int i = 0; i < length; i++) {
        valueMap.put(TreeNumericColumnMetaData.getAttributeNameByte(i), Integer.valueOf(bv.get(i)));
    }
    return new PredictorRecord(valueMap);
}
Also used : PredictorRecord(org.knime.base.node.mine.treeensemble2.data.PredictorRecord) DataCell(org.knime.core.data.DataCell) ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) LinkedHashMap(java.util.LinkedHashMap)

Example 5 with PredictorRecord

use of org.knime.base.node.mine.treeensemble2.data.PredictorRecord in project knime-core by knime.

the class RandomForestDistance method computeDistance.

/**
 * {@inheritDoc}
 */
@Override
public double computeDistance(final DataRow row1, final DataRow row2) throws DistanceMeasurementException {
    List<Integer> filterIndicesList = getColumnIndices();
    int[] filterIndices = new int[filterIndicesList.size()];
    int i = 0;
    for (Integer index : filterIndicesList) {
        filterIndices[i++] = index;
    }
    final DataRow filterRow1 = new FilterColumnRow(row1, filterIndices);
    final DataRow filterRow2 = new FilterColumnRow(row2, filterIndices);
    final PredictorRecord record1 = m_ensembleModel.createPredictorRecord(filterRow1, m_learnTableSpec);
    final PredictorRecord record2 = m_ensembleModel.createPredictorRecord(filterRow2, m_learnTableSpec);
    final int nrModels = m_ensembleModel.getNrModels();
    double proximity = 0.0;
    for (int t = 0; t < nrModels; t++) {
        AbstractTreeModel<?> tree = m_ensembleModel.getTreeModel(t);
        AbstractTreeNode leaf1 = tree.findMatchingNode(record1);
        AbstractTreeNode leaf2 = tree.findMatchingNode(record2);
        if (leaf1.getSignature().equals(leaf2.getSignature())) {
            proximity += 1.0;
        }
    }
    proximity /= nrModels;
    // to get a distance measure, we have to subtract the proximity from 1
    return 1 - proximity;
}
Also used : PredictorRecord(org.knime.base.node.mine.treeensemble2.data.PredictorRecord) AbstractTreeNode(org.knime.base.node.mine.treeensemble2.model.AbstractTreeNode) DataRow(org.knime.core.data.DataRow) FilterColumnRow(org.knime.base.data.filter.column.FilterColumnRow)

Aggregations

PredictorRecord (org.knime.base.node.mine.treeensemble2.data.PredictorRecord)16 DataCell (org.knime.core.data.DataCell)9 FilterColumnRow (org.knime.base.data.filter.column.FilterColumnRow)6 DataRow (org.knime.core.data.DataRow)6 DoubleCell (org.knime.core.data.def.DoubleCell)5 LinkedHashMap (java.util.LinkedHashMap)4 Test (org.junit.Test)3 NominalValueRepresentation (org.knime.base.node.mine.treeensemble2.data.NominalValueRepresentation)3 TestDataGenerator (org.knime.base.node.mine.treeensemble2.data.TestDataGenerator)3 TreeEnsembleModel (org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModel)3 TreeEnsembleModelPortObject (org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject)3 TreeNodeRegression (org.knime.base.node.mine.treeensemble2.model.TreeNodeRegression)3 TreeEnsembleLearnerConfiguration (org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration)3 TreeEnsemblePredictorConfiguration (org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration)3 IntCell (org.knime.core.data.def.IntCell)3 HashMap (java.util.HashMap)2 TreeData (org.knime.base.node.mine.treeensemble2.data.TreeData)2 TreeNominalColumnData (org.knime.base.node.mine.treeensemble2.data.TreeNominalColumnData)2 TreeTargetNominalColumnMetaData (org.knime.base.node.mine.treeensemble2.data.TreeTargetNominalColumnMetaData)2 IDataIndexManager (org.knime.base.node.mine.treeensemble2.data.memberships.IDataIndexManager)2