Search in sources :

Example 6 with Mean

use of org.apache.commons.math.stat.descriptive.moment.Mean in project knime-core by knime.

the class TreeEnsembleRegressionPredictorCellFactory method getCells.

/**
 * {@inheritDoc}
 */
@Override
public DataCell[] getCells(final DataRow row) {
    TreeEnsembleModelPortObject modelObject = m_predictor.getModelObject();
    TreeEnsemblePredictorConfiguration cfg = m_predictor.getConfiguration();
    final TreeEnsembleModel ensembleModel = modelObject.getEnsembleModel();
    int size = 1;
    final boolean appendConfidence = cfg.isAppendPredictionConfidence();
    final boolean appendModelCount = cfg.isAppendModelCount();
    if (appendConfidence) {
        size += 1;
    }
    if (appendModelCount) {
        size += 1;
    }
    final boolean hasOutOfBagFilter = m_predictor.hasOutOfBagFilter();
    DataCell[] result = new DataCell[size];
    DataRow filterRow = new FilterColumnRow(row, m_learnColumnInRealDataIndices);
    PredictorRecord record = ensembleModel.createPredictorRecord(filterRow, m_learnSpec);
    if (record == null) {
        // missing value
        Arrays.fill(result, DataType.getMissingCell());
        return result;
    }
    Mean mean = new Mean();
    Variance variance = new Variance();
    final int nrModels = ensembleModel.getNrModels();
    for (int i = 0; i < nrModels; i++) {
        if (hasOutOfBagFilter && m_predictor.isRowPartOfTrainingData(row.getKey(), i)) {
        // ignore, row was used to train the model
        } else {
            TreeModelRegression m = ensembleModel.getTreeModelRegression(i);
            TreeNodeRegression match = m.findMatchingNode(record);
            double nodeMean = match.getMean();
            mean.increment(nodeMean);
            variance.increment(nodeMean);
        }
    }
    int nrValidModels = (int) mean.getN();
    int index = 0;
    result[index++] = nrValidModels == 0 ? DataType.getMissingCell() : new DoubleCell(mean.getResult());
    if (appendConfidence) {
        result[index++] = nrValidModels == 0 ? DataType.getMissingCell() : new DoubleCell(variance.getResult());
    }
    if (appendModelCount) {
        result[index++] = new IntCell(nrValidModels);
    }
    return result;
}
Also used : Mean(org.apache.commons.math.stat.descriptive.moment.Mean) TreeEnsembleModel(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModel) DoubleCell(org.knime.core.data.def.DoubleCell) TreeEnsemblePredictorConfiguration(org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration) DataRow(org.knime.core.data.DataRow) TreeNodeRegression(org.knime.base.node.mine.treeensemble2.model.TreeNodeRegression) Variance(org.apache.commons.math.stat.descriptive.moment.Variance) TreeModelRegression(org.knime.base.node.mine.treeensemble2.model.TreeModelRegression) IntCell(org.knime.core.data.def.IntCell) TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) PredictorRecord(org.knime.base.node.mine.treeensemble2.data.PredictorRecord) DataCell(org.knime.core.data.DataCell) FilterColumnRow(org.knime.base.data.filter.column.FilterColumnRow)

Aggregations

Mean (org.apache.commons.math.stat.descriptive.moment.Mean)6 Variance (org.apache.commons.math.stat.descriptive.moment.Variance)3 DataCell (org.knime.core.data.DataCell)3 DataRow (org.knime.core.data.DataRow)3 StandardDeviation (org.apache.commons.math.stat.descriptive.moment.StandardDeviation)2 FilterColumnRow (org.knime.base.data.filter.column.FilterColumnRow)2 DoubleCell (org.knime.core.data.def.DoubleCell)2 IntCell (org.knime.core.data.def.IntCell)2 Topology (com.ibm.streamsx.topology.Topology)1 Random (java.util.Random)1 Median (org.apache.commons.math.stat.descriptive.rank.Median)1 SumOfSquares (org.apache.commons.math.stat.descriptive.summary.SumOfSquares)1 DrillConfig (org.apache.drill.common.config.DrillConfig)1 SchemaPath (org.apache.drill.common.expression.SchemaPath)1 DrillClient (org.apache.drill.exec.client.DrillClient)1 RecordBatchLoader (org.apache.drill.exec.record.RecordBatchLoader)1 QueryDataBatch (org.apache.drill.exec.rpc.user.QueryDataBatch)1 BootStrapContext (org.apache.drill.exec.server.BootStrapContext)1 Drillbit (org.apache.drill.exec.server.Drillbit)1 RemoteServiceSet (org.apache.drill.exec.server.RemoteServiceSet)1