Search in sources :

Example 16 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class TreeEnsembleStatisticsNodeModel method execute.

@Override
protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception {
    TreeEnsembleModel treeEnsemble = ((TreeEnsembleModelPortObject) inObjects[0]).getEnsembleModel();
    EnsembleStatistic ensembleStats = new EnsembleStatistic(treeEnsemble);
    DataContainer containerEnsembleStats = exec.createDataContainer(createEnsembleStatsSpec());
    DataCell[] cells = new DataCell[7];
    cells[0] = new IntCell(treeEnsemble.getNrModels());
    cells[1] = new IntCell(ensembleStats.getMinLevel());
    cells[2] = new IntCell(ensembleStats.getMaxLevel());
    cells[3] = new DoubleCell(ensembleStats.getAvgLevel());
    cells[4] = new IntCell(ensembleStats.getMinNumNodes());
    cells[5] = new IntCell(ensembleStats.getMaxNumNodes());
    cells[6] = new DoubleCell(ensembleStats.getAvgNumNodes());
    containerEnsembleStats.addRowToTable(new DefaultRow(RowKey.createRowKey(0L), cells));
    containerEnsembleStats.close();
    DataContainer containerTreeStats = exec.createDataContainer(createTreeStatsSpec());
    for (int i = 0; i < treeEnsemble.getNrModels(); i++) {
        DataCell[] treeCells = new DataCell[2];
        TreeStatistic treeStat = ensembleStats.getTreeStatistic(i);
        treeCells[0] = new IntCell(treeStat.getNumLevels());
        treeCells[1] = new IntCell(treeStat.getNumNodes());
        containerTreeStats.addRowToTable(new DefaultRow(RowKey.createRowKey((long) i), treeCells));
    }
    containerTreeStats.close();
    return new PortObject[] { (PortObject) containerEnsembleStats.getTable(), (PortObject) containerTreeStats.getTable() };
}
Also used : TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) TreeStatistic(org.knime.base.node.mine.treeensemble2.statistics.TreeStatistic) TreeEnsembleModel(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModel) DataContainer(org.knime.core.data.container.DataContainer) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell) EnsembleStatistic(org.knime.base.node.mine.treeensemble2.statistics.EnsembleStatistic) DefaultRow(org.knime.core.data.def.DefaultRow) TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) PortObject(org.knime.core.node.port.PortObject) IntCell(org.knime.core.data.def.IntCell)

Example 17 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class TreeEnsembleClassificationPredictorCellFactory2 method getCells.

/**
 * {@inheritDoc}
 */
@Override
public DataCell[] getCells(final DataRow row) {
    TreeEnsembleModelPortObject modelObject = m_predictor.getModelObject();
    TreeEnsemblePredictorConfiguration cfg = m_predictor.getConfiguration();
    final TreeEnsembleModel ensembleModel = modelObject.getEnsembleModel();
    int size = 1;
    final boolean appendConfidence = cfg.isAppendPredictionConfidence();
    if (appendConfidence) {
        size += 1;
    }
    final boolean appendClassConfidences = cfg.isAppendClassConfidences();
    if (appendClassConfidences) {
        size += m_targetValueMap.size();
    }
    final boolean appendModelCount = cfg.isAppendModelCount();
    if (appendModelCount) {
        size += 1;
    }
    final boolean hasOutOfBagFilter = m_predictor.hasOutOfBagFilter();
    DataCell[] result = new DataCell[size];
    DataRow filterRow = new FilterColumnRow(row, m_learnColumnInRealDataIndices);
    PredictorRecord record = ensembleModel.createPredictorRecord(filterRow, m_learnSpec);
    if (record == null) {
        // missing value
        Arrays.fill(result, DataType.getMissingCell());
        return result;
    }
    OccurrenceCounter<String> counter = new OccurrenceCounter<String>();
    final int nrModels = ensembleModel.getNrModels();
    TreeTargetNominalColumnMetaData targetMeta = (TreeTargetNominalColumnMetaData) ensembleModel.getMetaData().getTargetMetaData();
    final double[] classProbabilities = new double[targetMeta.getValues().length];
    int nrValidModels = 0;
    for (int i = 0; i < nrModels; i++) {
        if (hasOutOfBagFilter && m_predictor.isRowPartOfTrainingData(row.getKey(), i)) {
        // ignore, row was used to train the model
        } else {
            TreeModelClassification m = ensembleModel.getTreeModelClassification(i);
            TreeNodeClassification match = m.findMatchingNode(record);
            String majorityClassName = match.getMajorityClassName();
            final float[] nodeClassProbs = match.getTargetDistribution();
            double instancesInNode = 0;
            for (int c = 0; c < nodeClassProbs.length; c++) {
                instancesInNode += nodeClassProbs[c];
            }
            for (int c = 0; c < classProbabilities.length; c++) {
                classProbabilities[c] += nodeClassProbs[c] / instancesInNode;
            }
            counter.add(majorityClassName);
            nrValidModels += 1;
        }
    }
    String bestValue = counter.getMostFrequent();
    int index = 0;
    if (bestValue == null) {
        assert nrValidModels == 0;
        Arrays.fill(result, DataType.getMissingCell());
        index = size - 1;
    } else {
        // result[index++] = m_targetValueMap.get(bestValue);
        int indexBest = -1;
        double probBest = -1;
        for (int c = 0; c < classProbabilities.length; c++) {
            double prob = classProbabilities[c];
            if (prob > probBest) {
                probBest = prob;
                indexBest = c;
            }
        }
        result[index++] = new StringCell(targetMeta.getValues()[indexBest].getNominalValue());
        if (appendConfidence) {
            // final int freqValue = counter.getFrequency(bestValue);
            // result[index++] = new DoubleCell(freqValue / (double)nrValidModels);
            result[index++] = new DoubleCell(probBest);
        }
        if (appendClassConfidences) {
            for (NominalValueRepresentation nomVal : targetMeta.getValues()) {
                double prob = classProbabilities[nomVal.getAssignedInteger()] / nrValidModels;
                result[index++] = new DoubleCell(prob);
            }
        }
    }
    if (appendModelCount) {
        result[index++] = new IntCell(nrValidModels);
    }
    return result;
}
Also used : TreeNodeClassification(org.knime.base.node.mine.treeensemble2.model.TreeNodeClassification) TreeEnsembleModel(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModel) TreeTargetNominalColumnMetaData(org.knime.base.node.mine.treeensemble2.data.TreeTargetNominalColumnMetaData) DoubleCell(org.knime.core.data.def.DoubleCell) TreeEnsemblePredictorConfiguration(org.knime.base.node.mine.treeensemble2.node.predictor.TreeEnsemblePredictorConfiguration) NominalValueRepresentation(org.knime.base.node.mine.treeensemble2.data.NominalValueRepresentation) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) TreeEnsembleModelPortObject(org.knime.base.node.mine.treeensemble2.model.TreeEnsembleModelPortObject) StringCell(org.knime.core.data.def.StringCell) PredictorRecord(org.knime.base.node.mine.treeensemble2.data.PredictorRecord) DataCell(org.knime.core.data.DataCell) FilterColumnRow(org.knime.base.data.filter.column.FilterColumnRow) TreeModelClassification(org.knime.base.node.mine.treeensemble2.model.TreeModelClassification)

Example 18 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class RuleEngineNodeModel method createRearranger.

private ColumnRearranger createRearranger(final DataTableSpec inSpec, final List<Rule> rules) throws InvalidSettingsException {
    ColumnRearranger crea = new ColumnRearranger(inSpec);
    String newColName = DataTableSpec.getUniqueColumnName(inSpec, m_settings.getNewColName());
    final int defaultLabelColumnIndex;
    if (m_settings.getDefaultLabelIsColumn()) {
        if (m_settings.getDefaultLabel().length() < 3) {
            throw new InvalidSettingsException("Default label is not a column reference");
        }
        if (!m_settings.getDefaultLabel().startsWith("$") || !m_settings.getDefaultLabel().endsWith("$")) {
            throw new InvalidSettingsException("Column references in default label must be enclosed in $");
        }
        String colRef = m_settings.getDefaultLabel().substring(1, m_settings.getDefaultLabel().length() - 1);
        defaultLabelColumnIndex = inSpec.findColumnIndex(colRef);
        if (defaultLabelColumnIndex == -1) {
            throw new InvalidSettingsException("Column '" + m_settings.getDefaultLabel() + "' for default label does not exist in input table");
        }
    } else {
        defaultLabelColumnIndex = -1;
    }
    // determine output type
    List<DataType> types = new ArrayList<DataType>();
    // add outcome column types
    for (Rule r : rules) {
        if (r.getOutcome() instanceof ColumnReference) {
            types.add(((ColumnReference) r.getOutcome()).spec.getType());
        } else if (r.getOutcome() instanceof Double) {
            types.add(DoubleCell.TYPE);
        } else if (r.getOutcome() instanceof Integer) {
            types.add(IntCell.TYPE);
        } else if (r.getOutcome().toString().length() > 0) {
            types.add(StringCell.TYPE);
        }
    }
    if (defaultLabelColumnIndex >= 0) {
        types.add(inSpec.getColumnSpec(defaultLabelColumnIndex).getType());
    } else if (m_settings.getDefaultLabel().length() > 0) {
        try {
            Integer.parseInt(m_settings.getDefaultLabel());
            types.add(IntCell.TYPE);
        } catch (NumberFormatException ex) {
            try {
                Double.parseDouble(m_settings.getDefaultLabel());
                types.add(DoubleCell.TYPE);
            } catch (NumberFormatException ex1) {
                types.add(StringCell.TYPE);
            }
        }
    }
    final DataType outType;
    if (types.size() > 0) {
        DataType temp = types.get(0);
        for (int i = 1; i < types.size(); i++) {
            temp = DataType.getCommonSuperType(temp, types.get(i));
        }
        if ((temp.getValueClasses().size() == 1) && temp.getValueClasses().contains(DataValue.class)) {
            // a non-native type, we replace it with string
            temp = StringCell.TYPE;
        }
        outType = temp;
    } else {
        outType = StringCell.TYPE;
    }
    DataColumnSpec cs = new DataColumnSpecCreator(newColName, outType).createSpec();
    crea.append(new SingleCellFactory(cs) {

        @Override
        public DataCell getCell(final DataRow row) {
            for (Rule r : rules) {
                if (r.matches(row)) {
                    Object outcome = r.getOutcome();
                    if (outcome instanceof ColumnReference) {
                        DataCell cell = row.getCell(((ColumnReference) outcome).index);
                        if (outType.equals(StringCell.TYPE) && !cell.isMissing() && !cell.getType().equals(StringCell.TYPE)) {
                            return new StringCell(cell.toString());
                        } else {
                            return cell;
                        }
                    } else if (outType.equals(IntCell.TYPE)) {
                        return new IntCell((Integer) outcome);
                    } else if (outType.equals(DoubleCell.TYPE)) {
                        return new DoubleCell((Double) outcome);
                    } else {
                        return new StringCell(outcome.toString());
                    }
                }
            }
            if (defaultLabelColumnIndex >= 0) {
                DataCell cell = row.getCell(defaultLabelColumnIndex);
                if (outType.equals(StringCell.TYPE) && !cell.getType().equals(StringCell.TYPE)) {
                    return new StringCell(cell.toString());
                } else {
                    return cell;
                }
            } else if (m_settings.getDefaultLabel().length() > 0) {
                String l = m_settings.getDefaultLabel();
                if (outType.equals(StringCell.TYPE)) {
                    return new StringCell(l);
                }
                try {
                    int i = Integer.parseInt(l);
                    return new IntCell(i);
                } catch (NumberFormatException ex) {
                    try {
                        double d = Double.parseDouble(l);
                        return new DoubleCell(d);
                    } catch (NumberFormatException ex1) {
                        return new StringCell(l);
                    }
                }
            } else {
                return DataType.getMissingCell();
            }
        }
    });
    return crea;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataValue(org.knime.core.data.DataValue) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringCell(org.knime.core.data.def.StringCell) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) ColumnReference(org.knime.base.node.rules.Rule.ColumnReference)

Example 19 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class AbstractTreeEnsembleModel method createLearnAttributeRow.

public DataRow createLearnAttributeRow(final DataRow learnRow, final DataTableSpec learnSpec) {
    final TreeType type = getType();
    final DataCell c = learnRow.getCell(0);
    final int nrAttributes = getMetaData().getNrAttributes();
    switch(type) {
        case Ordinary:
            return learnRow;
        case BitVector:
            if (c.isMissing()) {
                return null;
            }
            BitVectorValue bv = (BitVectorValue) c;
            final long length = bv.length();
            if (length != nrAttributes) {
                // TODO indicate error message
                return null;
            }
            DataCell trueCell = new StringCell("1");
            DataCell falseCell = new StringCell("0");
            DataCell[] cells = new DataCell[nrAttributes];
            for (int i = 0; i < nrAttributes; i++) {
                cells[i] = bv.get(i) ? trueCell : falseCell;
            }
            return new DefaultRow(learnRow.getKey(), cells);
        case ByteVector:
            if (c.isMissing()) {
                return null;
            }
            ByteVectorValue byteVector = (ByteVectorValue) c;
            final long bvLength = byteVector.length();
            if (bvLength != nrAttributes) {
                return null;
            }
            DataCell[] bvCells = new DataCell[nrAttributes];
            for (int i = 0; i < nrAttributes; i++) {
                bvCells[i] = new IntCell(byteVector.get(i));
            }
            return new DefaultRow(learnRow.getKey(), bvCells);
        case DoubleVector:
            if (c.isMissing()) {
                return null;
            }
            DoubleVectorValue doubleVector = (DoubleVectorValue) c;
            final int dvLength = doubleVector.getLength();
            if (dvLength != nrAttributes) {
                return null;
            }
            DataCell[] dvCells = new DataCell[nrAttributes];
            for (int i = 0; i < nrAttributes; i++) {
                dvCells[i] = new DoubleCell(doubleVector.getValue(i));
            }
            return new DefaultRow(learnRow.getKey(), dvCells);
        default:
            throw new IllegalStateException("Type unknown (not implemented): " + type);
    }
}
Also used : DoubleVectorValue(org.knime.core.data.vector.doublevector.DoubleVectorValue) DoubleCell(org.knime.core.data.def.DoubleCell) ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) IntCell(org.knime.core.data.def.IntCell) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 20 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class PMMLDataDictionaryTranslator method addColSpecsForDataFields.

/**
 * @param pmmlDoc the PMML document to analyze
 * @param colSpecs the list to add the data column specs to
 */
private void addColSpecsForDataFields(final PMMLDocument pmmlDoc, final List<DataColumnSpec> colSpecs) {
    DataDictionary dict = pmmlDoc.getPMML().getDataDictionary();
    for (DataField dataField : dict.getDataFieldArray()) {
        String name = dataField.getName();
        DataType dataType = getKNIMEDataType(dataField.getDataType());
        DataColumnSpecCreator specCreator = new DataColumnSpecCreator(name, dataType);
        DataColumnDomain domain = null;
        if (dataType.isCompatible(NominalValue.class)) {
            Value[] valueArray = dataField.getValueArray();
            DataCell[] cells;
            if (DataType.getType(StringCell.class).equals(dataType)) {
                if (dataField.getIntervalArray().length > 0) {
                    throw new IllegalArgumentException("Intervals cannot be defined for Strings.");
                }
                cells = new StringCell[valueArray.length];
                if (valueArray.length > 0) {
                    for (int j = 0; j < cells.length; j++) {
                        cells[j] = new StringCell(valueArray[j].getValue());
                    }
                }
                domain = new DataColumnDomainCreator(cells).createDomain();
            }
        } else if (dataType.isCompatible(DoubleValue.class)) {
            Double leftMargin = null;
            Double rightMargin = null;
            Interval[] intervalArray = dataField.getIntervalArray();
            if (intervalArray != null && intervalArray.length > 0) {
                Interval interval = dataField.getIntervalArray(0);
                leftMargin = interval.getLeftMargin();
                rightMargin = interval.getRightMargin();
            } else if (dataField.getValueArray() != null && dataField.getValueArray().length > 0) {
                // try to derive the bounds from the values
                Value[] valueArray = dataField.getValueArray();
                List<Double> values = new ArrayList<Double>();
                for (int j = 0; j < valueArray.length; j++) {
                    String value = "";
                    try {
                        value = valueArray[j].getValue();
                        values.add(Double.parseDouble(value));
                    } catch (Exception e) {
                        throw new IllegalArgumentException("Skipping domain calculation. " + "Value \"" + value + "\" cannot be cast to double.");
                    }
                }
                leftMargin = Collections.min(values);
                rightMargin = Collections.max(values);
            }
            if (leftMargin != null && rightMargin != null) {
                // set the bounds of the domain if available
                DataCell lowerBound = null;
                DataCell upperBound = null;
                if (DataType.getType(IntCell.class).equals(dataType)) {
                    lowerBound = new IntCell(leftMargin.intValue());
                    upperBound = new IntCell(rightMargin.intValue());
                } else if (DataType.getType(DoubleCell.class).equals(dataType)) {
                    lowerBound = new DoubleCell(leftMargin);
                    upperBound = new DoubleCell(rightMargin);
                }
                domain = new DataColumnDomainCreator(lowerBound, upperBound).createDomain();
            } else {
                domain = new DataColumnDomainCreator().createDomain();
            }
        }
        specCreator.setDomain(domain);
        colSpecs.add(specCreator.createSpec());
        m_dictFields.add(name);
    }
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataDictionary(org.dmg.pmml.DataDictionaryDocument.DataDictionary) IntCell(org.knime.core.data.def.IntCell) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataField(org.dmg.pmml.DataFieldDocument.DataField) StringCell(org.knime.core.data.def.StringCell) DoubleValue(org.knime.core.data.DoubleValue) NominalValue(org.knime.core.data.NominalValue) BooleanValue(org.knime.core.data.BooleanValue) IntValue(org.knime.core.data.IntValue) Value(org.dmg.pmml.ValueDocument.Value) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) Interval(org.dmg.pmml.IntervalDocument.Interval)

Aggregations

IntCell (org.knime.core.data.def.IntCell)109 DataCell (org.knime.core.data.DataCell)79 DoubleCell (org.knime.core.data.def.DoubleCell)67 StringCell (org.knime.core.data.def.StringCell)55 DefaultRow (org.knime.core.data.def.DefaultRow)46 DataRow (org.knime.core.data.DataRow)33 DataTableSpec (org.knime.core.data.DataTableSpec)21 RowKey (org.knime.core.data.RowKey)21 ArrayList (java.util.ArrayList)20 DataType (org.knime.core.data.DataType)20 LongCell (org.knime.core.data.def.LongCell)14 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)14 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)12 BufferedDataTable (org.knime.core.node.BufferedDataTable)12 Test (org.junit.Test)11 DataColumnSpec (org.knime.core.data.DataColumnSpec)11 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)9 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)9 DataContainer (org.knime.core.data.container.DataContainer)8 DateAndTimeValue (org.knime.core.data.date.DateAndTimeValue)8