Search in sources :

Example 81 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class ProximityMatrix method createTable.

public BufferedDataTable createTable(final ExecutionContext exec) throws CanceledExecutionException {
    int numCols = getNumCols();
    int numRows = getNumRows();
    DataColumnSpec[] colSpecs = new DataColumnSpec[numCols];
    for (int i = 0; i < colSpecs.length; i++) {
        colSpecs[i] = new DataColumnSpecCreator(getRowKeyForTable(1, i).getString(), DoubleCell.TYPE).createSpec();
    }
    DataTableSpec tableSpec = new DataTableSpec(colSpecs);
    BufferedDataContainer container = exec.createDataContainer(tableSpec);
    for (int i = 0; i < numRows; i++) {
        exec.checkCanceled();
        exec.setProgress(((double) i) / numRows, "Row " + i + "/" + numRows);
        DataCell[] cells = new DataCell[numCols];
        for (int j = 0; j < numCols; j++) {
            cells[j] = new DoubleCell(getEntryAt(i, j));
        }
        container.addRowToTable(new DefaultRow(getRowKeyForTable(0, i), cells));
    }
    container.close();
    return container.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 82 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class TreeEnsembleStatisticsNodeModel method execute.

@Override
protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception {
    TreeEnsembleModel treeEnsemble = ((TreeEnsembleModelPortObject) inObjects[0]).getEnsembleModel();
    EnsembleStatistic ensembleStats = new EnsembleStatistic(treeEnsemble);
    DataContainer containerEnsembleStats = exec.createDataContainer(createEnsembleStatsSpec());
    DataCell[] cells = new DataCell[7];
    cells[0] = new IntCell(treeEnsemble.getNrModels());
    cells[1] = new IntCell(ensembleStats.getMinLevel());
    cells[2] = new IntCell(ensembleStats.getMaxLevel());
    cells[3] = new DoubleCell(ensembleStats.getAvgLevel());
    cells[4] = new IntCell(ensembleStats.getMinNumNodes());
    cells[5] = new IntCell(ensembleStats.getMaxNumNodes());
    cells[6] = new DoubleCell(ensembleStats.getAvgNumNodes());
    containerEnsembleStats.addRowToTable(new DefaultRow(RowKey.createRowKey(0L), cells));
    containerEnsembleStats.close();
    DataContainer containerTreeStats = exec.createDataContainer(createTreeStatsSpec());
    for (int i = 0; i < treeEnsemble.getNrModels(); i++) {
        DataCell[] treeCells = new DataCell[2];
        TreeStatistic treeStat = ensembleStats.getTreeStatistic(i);
        treeCells[0] = new IntCell(treeStat.getNumLevels());
        treeCells[1] = new IntCell(treeStat.getNumNodes());
        containerTreeStats.addRowToTable(new DefaultRow(RowKey.createRowKey((long) i), treeCells));
    }
    containerTreeStats.close();
    return new PortObject[] { (PortObject) containerEnsembleStats.getTable(), (PortObject) containerTreeStats.getTable() };
}
Also used : TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) TreeStatistic(org.knime.base.node.mine.treeensemble2.statistics.TreeStatistic) TreeEnsembleModel(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModel) DataContainer(org.knime.core.data.container.DataContainer) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell) EnsembleStatistic(org.knime.base.node.mine.treeensemble2.statistics.EnsembleStatistic) DefaultRow(org.knime.core.data.def.DefaultRow) TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) PortObject(org.knime.core.node.port.PortObject) IntCell(org.knime.core.data.def.IntCell)

Example 83 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class TreeEnsembleClassificationLearnerNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception {
    BufferedDataTable t = (BufferedDataTable) inObjects[0];
    DataTableSpec spec = t.getDataTableSpec();
    final FilterLearnColumnRearranger learnRearranger = m_configuration.filterLearnColumns(spec);
    String warn = learnRearranger.getWarning();
    BufferedDataTable learnTable = exec.createColumnRearrangeTable(t, learnRearranger, exec.createSubProgress(0.0));
    DataTableSpec learnSpec = learnTable.getDataTableSpec();
    TreeEnsembleModelPortObjectSpec ensembleSpec = m_configuration.createPortObjectSpec(learnSpec);
    Map<String, DataCell> targetValueMap = ensembleSpec.getTargetColumnPossibleValueMap();
    if (targetValueMap == null) {
        throw new InvalidSettingsException("The target column does not " + "have possible values assigned. Most likely it " + "has too many different distinct values (learning an ID " + "column?) Fix it by preprocessing the table using " + "a \"Domain Calculator\".");
    }
    ExecutionMonitor readInExec = exec.createSubProgress(0.1);
    ExecutionMonitor learnExec = exec.createSubProgress(0.8);
    ExecutionMonitor outOfBagExec = exec.createSubProgress(0.1);
    TreeDataCreator dataCreator = new TreeDataCreator(m_configuration, learnSpec, learnTable.getRowCount());
    exec.setProgress("Reading data into memory");
    TreeData data = dataCreator.readData(learnTable, m_configuration, readInExec);
    m_hiliteRowSample = dataCreator.getDataRowsForHilite();
    m_viewMessage = dataCreator.getViewMessage();
    String dataCreationWarning = dataCreator.getAndClearWarningMessage();
    if (dataCreationWarning != null) {
        if (warn == null) {
            warn = dataCreationWarning;
        } else {
            warn = warn + "\n" + dataCreationWarning;
        }
    }
    readInExec.setProgress(1.0);
    exec.setMessage("Learning trees");
    TreeEnsembleLearner learner = new TreeEnsembleLearner(m_configuration, data);
    TreeEnsembleModel model;
    try {
        model = learner.learnEnsemble(learnExec);
    } catch (ExecutionException e) {
        Throwable cause = e.getCause();
        if (cause instanceof Exception) {
            throw (Exception) cause;
        }
        throw e;
    }
    TreeEnsembleModelPortObject modelPortObject = TreeEnsembleModelPortObject.createPortObject(ensembleSpec, model, exec.createFileStore(UUID.randomUUID().toString() + ""));
    learnExec.setProgress(1.0);
    exec.setMessage("Out of bag prediction");
    TreeEnsemblePredictor outOfBagPredictor = createOutOfBagPredictor(ensembleSpec, modelPortObject, spec);
    outOfBagPredictor.setOutofBagFilter(learner.getRowSamples(), data.getTargetColumn());
    ColumnRearranger outOfBagRearranger = outOfBagPredictor.getPredictionRearranger();
    BufferedDataTable outOfBagTable = exec.createColumnRearrangeTable(t, outOfBagRearranger, outOfBagExec);
    BufferedDataTable colStatsTable = learner.createColumnStatisticTable(exec.createSubExecutionContext(0.0));
    m_ensembleModelPortObject = modelPortObject;
    if (warn != null) {
        setWarningMessage(warn);
    }
    return new PortObject[] { outOfBagTable, colStatsTable, modelPortObject };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) TreeEnsembleModel(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModel) TreeEnsembleModelPortObjectSpec(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObjectSpec) TreeEnsembleLearner(org.knime.base.node.mine.treeensemble2.learner.TreeEnsembleLearner) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) FilterLearnColumnRearranger(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration.FilterLearnColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BufferedDataTable(org.knime.core.node.BufferedDataTable) FilterLearnColumnRearranger(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration.FilterLearnColumnRearranger) DataCell(org.knime.core.data.DataCell) TreeData(org.knime.base.node.mine.treeensemble2.data.TreeData) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) ExecutionException(java.util.concurrent.ExecutionException) TreeEnsemblePredictor(org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictor) TreeDataCreator(org.knime.base.node.mine.treeensemble2.data.TreeDataCreator) TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) PortObject(org.knime.core.node.port.PortObject)

Example 84 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class TreeEnsembleModelExtractorNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception {
    TreeEnsembleModelPortObject treeEnsembleModel = (TreeEnsembleModelPortObject) inObjects[0];
    DataTableSpec outSpec = createOutSpec();
    BufferedDataContainer container = exec.createDataContainer(outSpec, false, 0);
    int nrModels = treeEnsembleModel.getEnsembleModel().getNrModels();
    for (int i = 0; i < nrModels; i++) {
        PMMLPortObject pmmlObject = treeEnsembleModel.createDecisionTreePMMLPortObject(i);
        DataCell cell = PMMLCellFactory.create(pmmlObject.getPMMLValue().toString());
        RowKey key = RowKey.createRowKey(i);
        container.addRowToTable(new DefaultRow(key, cell));
        exec.checkCanceled();
        exec.setProgress(i / (double) nrModels, "Exported model " + (i + 1) + "/" + nrModels);
    }
    container.close();
    return new BufferedDataTable[] { container.getTable() };
}
Also used : TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) RowKey(org.knime.core.data.RowKey) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 85 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class TreeEnsembleClassificationPredictorCellFactory2 method getCells.

/**
 * {@inheritDoc}
 */
@Override
public DataCell[] getCells(final DataRow row) {
    TreeEnsembleModelPortObject modelObject = m_predictor.getModelObject();
    TreeEnsemblePredictorConfiguration cfg = m_predictor.getConfiguration();
    final TreeEnsembleModel ensembleModel = modelObject.getEnsembleModel();
    int size = 1;
    final boolean appendConfidence = cfg.isAppendPredictionConfidence();
    if (appendConfidence) {
        size += 1;
    }
    final boolean appendClassConfidences = cfg.isAppendClassConfidences();
    if (appendClassConfidences) {
        size += m_targetValueMap.size();
    }
    final boolean appendModelCount = cfg.isAppendModelCount();
    if (appendModelCount) {
        size += 1;
    }
    final boolean hasOutOfBagFilter = m_predictor.hasOutOfBagFilter();
    DataCell[] result = new DataCell[size];
    DataRow filterRow = new FilterColumnRow(row, m_learnColumnInRealDataIndices);
    PredictorRecord record = ensembleModel.createPredictorRecord(filterRow, m_learnSpec);
    if (record == null) {
        // missing value
        Arrays.fill(result, DataType.getMissingCell());
        return result;
    }
    OccurrenceCounter<String> counter = new OccurrenceCounter<String>();
    final int nrModels = ensembleModel.getNrModels();
    TreeTargetNominalColumnMetaData targetMeta = (TreeTargetNominalColumnMetaData) ensembleModel.getMetaData().getTargetMetaData();
    final double[] classProbabilities = new double[targetMeta.getValues().length];
    int nrValidModels = 0;
    for (int i = 0; i < nrModels; i++) {
        if (hasOutOfBagFilter && m_predictor.isRowPartOfTrainingData(row.getKey(), i)) {
        // ignore, row was used to train the model
        } else {
            TreeModelClassification m = ensembleModel.getTreeModelClassification(i);
            TreeNodeClassification match = m.findMatchingNode(record);
            String majorityClassName = match.getMajorityClassName();
            final float[] nodeClassProbs = match.getTargetDistribution();
            double instancesInNode = 0;
            for (int c = 0; c < nodeClassProbs.length; c++) {
                instancesInNode += nodeClassProbs[c];
            }
            for (int c = 0; c < classProbabilities.length; c++) {
                classProbabilities[c] += nodeClassProbs[c] / instancesInNode;
            }
            counter.add(majorityClassName);
            nrValidModels += 1;
        }
    }
    String bestValue = counter.getMostFrequent();
    int index = 0;
    if (bestValue == null) {
        assert nrValidModels == 0;
        Arrays.fill(result, DataType.getMissingCell());
        index = size - 1;
    } else {
        // result[index++] = m_targetValueMap.get(bestValue);
        int indexBest = -1;
        double probBest = -1;
        for (int c = 0; c < classProbabilities.length; c++) {
            double prob = classProbabilities[c];
            if (prob > probBest) {
                probBest = prob;
                indexBest = c;
            }
        }
        result[index++] = new StringCell(targetMeta.getValues()[indexBest].getNominalValue());
        if (appendConfidence) {
            // final int freqValue = counter.getFrequency(bestValue);
            // result[index++] = new DoubleCell(freqValue / (double)nrValidModels);
            result[index++] = new DoubleCell(probBest);
        }
        if (appendClassConfidences) {
            for (NominalValueRepresentation nomVal : targetMeta.getValues()) {
                double prob = classProbabilities[nomVal.getAssignedInteger()] / nrValidModels;
                result[index++] = new DoubleCell(prob);
            }
        }
    }
    if (appendModelCount) {
        result[index++] = new IntCell(nrValidModels);
    }
    return result;
}
Also used : TreeNodeClassification(org.knime.base.node.mine.treeensemble2.model.TreeNodeClassification) TreeEnsembleModel(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModel) TreeTargetNominalColumnMetaData(org.knime.base.node.mine.treeensemble2.data.TreeTargetNominalColumnMetaData) DoubleCell(org.knime.core.data.def.DoubleCell) TreeEnsemblePredictorConfiguration(org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration) NominalValueRepresentation(org.knime.base.node.mine.treeensemble2.data.NominalValueRepresentation) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) StringCell(org.knime.core.data.def.StringCell) PredictorRecord(org.knime.base.node.mine.treeensemble2.data.PredictorRecord) DataCell(org.knime.core.data.DataCell) FilterColumnRow(org.knime.base.data.filter.column.FilterColumnRow) TreeModelClassification(org.knime.base.node.mine.treeensemble2.model.TreeModelClassification)

Aggregations

DataCell (org.knime.core.data.DataCell)780 DataRow (org.knime.core.data.DataRow)268 DataTableSpec (org.knime.core.data.DataTableSpec)175 DataColumnSpec (org.knime.core.data.DataColumnSpec)170 DefaultRow (org.knime.core.data.def.DefaultRow)169 ArrayList (java.util.ArrayList)141 StringCell (org.knime.core.data.def.StringCell)131 DoubleCell (org.knime.core.data.def.DoubleCell)129 DoubleValue (org.knime.core.data.DoubleValue)111 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)109 DataType (org.knime.core.data.DataType)97 RowKey (org.knime.core.data.RowKey)94 BufferedDataTable (org.knime.core.node.BufferedDataTable)93 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)91 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)84 LinkedHashMap (java.util.LinkedHashMap)81 IntCell (org.knime.core.data.def.IntCell)79 HashMap (java.util.HashMap)60 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)57 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)56