Search in sources :

Example 21 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class RuleEngineNodeModel method getRulesOutcome.

/**
 * @param outType
 * @param row
 * @param r
 * @param isDisallowlongOutputForCompatibility TODO
 * @param variableProvider TODO
 * @return
 * @noreference This method is not intended to be referenced by clients.
 */
public static final DataCell getRulesOutcome(final DataType outType, final DataRow row, final List<Rule> rules, final boolean isDisallowLongOutputForCompatibility, final VariableProvider variableProvider) {
    for (Rule r : rules) {
        if (r.getCondition().matches(row, variableProvider).getOutcome() == MatchState.matchedAndStop) {
            Outcome outcome2 = r.getOutcome();
            // r.getSideEffect().perform(row, this);
            DataCell cell = (DataCell) outcome2.getComputedResult(row, variableProvider);
            // ... don't want Booleans (also implementing Long), for instance)
            if (cell instanceof LongCell && isDisallowLongOutputForCompatibility) {
                long l = ((LongValue) cell).getLongValue();
                if (l > Integer.MAX_VALUE) {
                    throw new RuntimeException("Values larger than " + Integer.MAX_VALUE + " not supported in old instances of the node -- recreate the node " + "(node was created using an KNIME version < 3.2");
                }
                cell = new IntCell((int) l);
            }
            if (outType.equals(StringCell.TYPE) && !cell.isMissing() && !cell.getType().equals(StringCell.TYPE)) {
                return new StringCell(cell.toString());
            } else {
                return cell;
            }
        }
    }
    return DataType.getMissingCell();
}
Also used : LongCell(org.knime.core.data.def.LongCell) StringCell(org.knime.core.data.def.StringCell) Outcome(org.knime.base.node.rules.engine.Rule.Outcome) LongValue(org.knime.core.data.LongValue) DataCell(org.knime.core.data.DataCell) IntCell(org.knime.core.data.def.IntCell)

Example 22 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class FromDecisionTreeNodeModel method addRules.

/**
 * Adds the rules to {@code rs} (recursively on each leaf).
 *
 * @param rs The output {@link RuleSet}.
 * @param parents The parent stack.
 * @param node The actual node.
 */
private void addRules(final RuleSet rs, final List<DecisionTreeNode> parents, final DecisionTreeNode node) {
    if (node.isLeaf()) {
        SimpleRule rule = rs.addNewSimpleRule();
        if (m_rulesToTable.getScorePmmlRecordCount().getBooleanValue()) {
            // This increases the PMML quite significantly
            BigDecimal sum = BigDecimal.ZERO;
            final MathContext mc = new MathContext(7, RoundingMode.HALF_EVEN);
            final boolean computeProbability = m_rulesToTable.getScorePmmlProbability().getBooleanValue();
            if (computeProbability) {
                sum = new BigDecimal(node.getClassCounts().entrySet().stream().mapToDouble(e -> e.getValue().doubleValue()).sum(), mc);
            }
            for (final Entry<DataCell, Double> entry : node.getClassCounts().entrySet()) {
                final ScoreDistribution scoreDistrib = rule.addNewScoreDistribution();
                scoreDistrib.setValue(entry.getKey().toString());
                scoreDistrib.setRecordCount(entry.getValue());
                if (computeProbability) {
                    if (Double.compare(entry.getValue().doubleValue(), 0.0) == 0) {
                        scoreDistrib.setProbability(new BigDecimal(0.0));
                    } else {
                        scoreDistrib.setProbability(new BigDecimal(entry.getValue().doubleValue(), mc).divide(sum, mc));
                    }
                }
            }
        }
        CompoundPredicate and = rule.addNewCompoundPredicate();
        and.setBooleanOperator(BooleanOperator.AND);
        DecisionTreeNode n = node;
        do {
            PMMLPredicate pmmlPredicate = ((DecisionTreeNodeSplitPMML) n.getParent()).getSplitPred()[n.getParent().getIndex(n)];
            if (pmmlPredicate instanceof PMMLSimplePredicate) {
                PMMLSimplePredicate simple = (PMMLSimplePredicate) pmmlPredicate;
                SimplePredicate predicate = and.addNewSimplePredicate();
                copy(predicate, simple);
            } else if (pmmlPredicate instanceof PMMLCompoundPredicate) {
                PMMLCompoundPredicate compound = (PMMLCompoundPredicate) pmmlPredicate;
                CompoundPredicate predicate = and.addNewCompoundPredicate();
                copy(predicate, compound);
            } else if (pmmlPredicate instanceof PMMLSimpleSetPredicate) {
                PMMLSimpleSetPredicate simpleSet = (PMMLSimpleSetPredicate) pmmlPredicate;
                copy(and.addNewSimpleSetPredicate(), simpleSet);
            } else if (pmmlPredicate instanceof PMMLTruePredicate) {
                and.addNewTrue();
            } else if (pmmlPredicate instanceof PMMLFalsePredicate) {
                and.addNewFalse();
            }
            n = n.getParent();
        } while (n.getParent() != null);
        // Simple fix for the case when a single condition was used.
        while (and.getFalseList().size() + and.getCompoundPredicateList().size() + and.getSimplePredicateList().size() + and.getSimpleSetPredicateList().size() + and.getTrueList().size() < 2) {
            and.addNewTrue();
        }
        if (m_rulesToTable.getProvideStatistics().getBooleanValue()) {
            rule.setNbCorrect(node.getOwnClassCount());
            rule.setRecordCount(node.getEntireClassCount());
        }
        rule.setScore(node.getMajorityClass().toString());
    } else {
        parents.add(node);
        for (int i = 0; i < node.getChildCount(); ++i) {
            addRules(rs, parents, node.getChildAt(i));
        }
        parents.remove(node);
    }
}
Also used : PMMLDocument(org.dmg.pmml.PMMLDocument) NodeSettingsRO(org.knime.core.node.NodeSettingsRO) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) BooleanOperator(org.dmg.pmml.CompoundPredicateDocument.CompoundPredicate.BooleanOperator) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) PMMLMiningSchemaTranslator(org.knime.core.node.port.pmml.PMMLMiningSchemaTranslator) PMMLCompoundPredicate(org.knime.base.node.mine.decisiontree2.PMMLCompoundPredicate) PMMLSimplePredicate(org.knime.base.node.mine.decisiontree2.PMMLSimplePredicate) BigDecimal(java.math.BigDecimal) PMML(org.dmg.pmml.PMMLDocument.PMML) PMMLFalsePredicate(org.knime.base.node.mine.decisiontree2.PMMLFalsePredicate) RuleSetToTable(org.knime.base.node.rules.engine.totable.RuleSetToTable) PMMLSimpleSetPredicate(org.knime.base.node.mine.decisiontree2.PMMLSimpleSetPredicate) RoundingMode(java.math.RoundingMode) PortType(org.knime.core.node.port.PortType) SimplePredicate(org.dmg.pmml.SimplePredicateDocument.SimplePredicate) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) SimpleSetPredicate(org.dmg.pmml.SimpleSetPredicateDocument.SimpleSetPredicate) CompoundPredicate(org.dmg.pmml.CompoundPredicateDocument.CompoundPredicate) MathContext(java.math.MathContext) NodeModel(org.knime.core.node.NodeModel) DecisionTreeNode(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNode) List(java.util.List) BufferedDataTable(org.knime.core.node.BufferedDataTable) PMMLTruePredicate(org.knime.base.node.mine.decisiontree2.PMMLTruePredicate) PMMLDataDictionaryTranslator(org.knime.core.node.port.pmml.PMMLDataDictionaryTranslator) Entry(java.util.Map.Entry) PortObject(org.knime.core.node.port.PortObject) DecisionTreeNodeSplitPMML(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeSplitPMML) MININGFUNCTION(org.dmg.pmml.MININGFUNCTION) RuleSetModel(org.dmg.pmml.RuleSetModelDocument.RuleSetModel) PMMLOperator(org.knime.base.node.mine.decisiontree2.PMMLOperator) Criterion(org.dmg.pmml.RuleSelectionMethodDocument.RuleSelectionMethod.Criterion) RuleSet(org.dmg.pmml.RuleSetDocument.RuleSet) PMMLDecisionTreeTranslator(org.knime.base.node.mine.decisiontree2.PMMLDecisionTreeTranslator) Enum(org.dmg.pmml.SimplePredicateDocument.SimplePredicate.Operator.Enum) ArrayList(java.util.ArrayList) ExecutionContext(org.knime.core.node.ExecutionContext) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PMMLPredicate(org.knime.base.node.mine.decisiontree2.PMMLPredicate) DataCell(org.knime.core.data.DataCell) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PMMLPortObjectSpecCreator(org.knime.core.node.port.pmml.PMMLPortObjectSpecCreator) PMMLPredicateTranslator(org.knime.base.node.mine.decisiontree2.PMMLPredicateTranslator) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) IOException(java.io.IOException) File(java.io.File) NodeSettingsWO(org.knime.core.node.NodeSettingsWO) DecisionTree(org.knime.base.node.mine.decisiontree2.model.DecisionTree) ScoreDistribution(org.dmg.pmml.ScoreDistributionDocument.ScoreDistribution) PMMLPredicate(org.knime.base.node.mine.decisiontree2.PMMLPredicate) PMMLFalsePredicate(org.knime.base.node.mine.decisiontree2.PMMLFalsePredicate) BigDecimal(java.math.BigDecimal) MathContext(java.math.MathContext) PMMLSimplePredicate(org.knime.base.node.mine.decisiontree2.PMMLSimplePredicate) SimplePredicate(org.dmg.pmml.SimplePredicateDocument.SimplePredicate) PMMLCompoundPredicate(org.knime.base.node.mine.decisiontree2.PMMLCompoundPredicate) PMMLTruePredicate(org.knime.base.node.mine.decisiontree2.PMMLTruePredicate) ScoreDistribution(org.dmg.pmml.ScoreDistributionDocument.ScoreDistribution) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) PMMLSimpleSetPredicate(org.knime.base.node.mine.decisiontree2.PMMLSimpleSetPredicate) DataCell(org.knime.core.data.DataCell) PMMLSimplePredicate(org.knime.base.node.mine.decisiontree2.PMMLSimplePredicate) PMMLCompoundPredicate(org.knime.base.node.mine.decisiontree2.PMMLCompoundPredicate) CompoundPredicate(org.dmg.pmml.CompoundPredicateDocument.CompoundPredicate) DecisionTreeNode(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNode)

Example 23 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class PMMLRuleSetPredictorNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected DataTableSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    DataTableSpec original = (DataTableSpec) inSpecs[DATA_INDEX];
    ColumnRearranger rearranger = new ColumnRearranger(original);
    PMMLPortObjectSpec portObjectSpec = (PMMLPortObjectSpec) inSpecs[MODEL_INDEX];
    List<DataColumnSpec> activeColumnList = portObjectSpec.getActiveColumnList();
    List<DataColumnSpec> notFound = new ArrayList<DataColumnSpec>();
    for (DataColumnSpec dataColumnSpec : activeColumnList) {
        if (original.containsName(dataColumnSpec.getName())) {
            DataColumnSpec origSpec = original.getColumnSpec(dataColumnSpec.getName());
            if (!origSpec.getType().equals(dataColumnSpec.getType())) {
                notFound.add(dataColumnSpec);
            }
        } else {
            notFound.add(dataColumnSpec);
        }
    }
    if (!notFound.isEmpty()) {
        StringBuilder sb = new StringBuilder("Incompatible to the table, the following columns are not present, or have a wrong type:");
        for (DataColumnSpec dataColumnSpec : notFound) {
            sb.append("\n   ").append(dataColumnSpec);
        }
        throw new InvalidSettingsException(sb.toString());
    }
    List<DataColumnSpec> targetCols = portObjectSpec.getTargetCols();
    final DataType dataType = targetCols.isEmpty() ? StringCell.TYPE : targetCols.get(0).getType();
    DataColumnSpecCreator specCreator;
    if (m_doReplaceColumn.getBooleanValue()) {
        String col = m_replaceColumn.getStringValue();
        specCreator = new DataColumnSpecCreator(col, dataType);
    } else {
        specCreator = new DataColumnSpecCreator(DataTableSpec.getUniqueColumnName(original, m_outputColumn.getStringValue()), dataType);
    }
    SingleCellFactory dummy = new SingleCellFactory(specCreator.createSpec()) {

        /**
         * {@inheritDoc}
         */
        @Override
        public DataCell getCell(final DataRow row) {
            throw new IllegalStateException();
        }
    };
    if (m_addConfidence.getBooleanValue()) {
        rearranger.append(new SingleCellFactory(new DataColumnSpecCreator(DataTableSpec.getUniqueColumnName(rearranger.createSpec(), m_confidenceColumn.getStringValue()), DoubleCell.TYPE).createSpec()) {

            @Override
            public DataCell getCell(final DataRow row) {
                throw new IllegalStateException();
            }
        });
    }
    if (m_doReplaceColumn.getBooleanValue()) {
        rearranger.replace(dummy, m_replaceColumn.getStringValue());
    } else {
        rearranger.append(dummy);
    }
    return new DataTableSpec[] { rearranger.createSpec() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) ArrayList(java.util.ArrayList) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) SingleCellFactory(org.knime.core.data.container.SingleCellFactory)

Example 24 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class PMMLRuleSetPredictorNodeModel method createRearranger.

/**
 * Constructs the {@link ColumnRearranger} for computing the new columns.
 *
 * @param obj The {@link PMMLPortObject} of the preprocessing model.
 * @param spec The {@link DataTableSpec} of the table.
 * @param replaceColumn Should replace the {@code outputColumnName}?
 * @param outputColumnName The output column name (which might be an existing).
 * @param addConfidence Should add the confidence values to a column?
 * @param confidenceColumnName The name of the confidence column.
 * @param validationColumnIdx Index of the validation column, {@code -1} if not specified.
 * @param processConcurrently Should be {@code false} when the statistics are to be computed.
 * @return The {@link ColumnRearranger} computing the result.
 * @throws InvalidSettingsException Problem with rules.
 */
private static ColumnRearranger createRearranger(final PMMLPortObject obj, final DataTableSpec spec, final boolean replaceColumn, final String outputColumnName, final boolean addConfidence, final String confidenceColumnName, final int validationColumnIdx, final boolean processConcurrently) throws InvalidSettingsException {
    List<Node> models = obj.getPMMLValue().getModels(PMMLModelType.RuleSetModel);
    if (models.size() != 1) {
        throw new InvalidSettingsException("Expected exactly on RuleSetModel, but got: " + models.size());
    }
    final PMMLRuleTranslator translator = new PMMLRuleTranslator();
    obj.initializeModelTranslator(translator);
    if (!translator.isScorable()) {
        throw new UnsupportedOperationException("The model is not scorable.");
    }
    final List<PMMLRuleTranslator.Rule> rules = translator.getRules();
    ColumnRearranger ret = new ColumnRearranger(spec);
    final List<DataColumnSpec> targetCols = obj.getSpec().getTargetCols();
    final DataType dataType = targetCols.isEmpty() ? StringCell.TYPE : targetCols.get(0).getType();
    DataColumnSpecCreator specCreator = new DataColumnSpecCreator(outputColumnName, dataType);
    Set<DataCell> outcomes = new LinkedHashSet<>();
    for (Rule rule : rules) {
        DataCell outcome;
        if (dataType.equals(BooleanCell.TYPE)) {
            outcome = BooleanCellFactory.create(rule.getOutcome());
        } else if (dataType.equals(StringCell.TYPE)) {
            outcome = new StringCell(rule.getOutcome());
        } else if (dataType.equals(DoubleCell.TYPE)) {
            try {
                outcome = new DoubleCell(Double.parseDouble(rule.getOutcome()));
            } catch (NumberFormatException e) {
                // ignore
                continue;
            }
        } else if (dataType.equals(IntCell.TYPE)) {
            try {
                outcome = new IntCell(Integer.parseInt(rule.getOutcome()));
            } catch (NumberFormatException e) {
                // ignore
                continue;
            }
        } else if (dataType.equals(LongCell.TYPE)) {
            try {
                outcome = new LongCell(Long.parseLong(rule.getOutcome()));
            } catch (NumberFormatException e) {
                // ignore
                continue;
            }
        } else {
            throw new UnsupportedOperationException("Unknown outcome type: " + dataType);
        }
        outcomes.add(outcome);
    }
    specCreator.setDomain(new DataColumnDomainCreator(outcomes).createDomain());
    DataColumnSpec colSpec = specCreator.createSpec();
    final RuleSelectionMethod ruleSelectionMethod = translator.getSelectionMethodList().get(0);
    final String defaultScore = translator.getDefaultScore();
    final Double defaultConfidence = translator.getDefaultConfidence();
    final DataColumnSpec[] specs;
    if (addConfidence) {
        specs = new DataColumnSpec[] { new DataColumnSpecCreator(DataTableSpec.getUniqueColumnName(ret.createSpec(), confidenceColumnName), DoubleCell.TYPE).createSpec(), colSpec };
    } else {
        specs = new DataColumnSpec[] { colSpec };
    }
    final int oldColumnIndex = replaceColumn ? ret.indexOf(outputColumnName) : -1;
    ret.append(new AbstractCellFactory(processConcurrently, specs) {

        private final List<String> m_values;

        {
            Map<String, List<String>> dd = translator.getDataDictionary();
            m_values = dd.get(targetCols.get(0).getName());
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public DataCell[] getCells(final DataRow row) {
            // See http://www.dmg.org/v4-1/RuleSet.html#Rule
            switch(ruleSelectionMethod.getCriterion().intValue()) {
                case RuleSelectionMethod.Criterion.INT_FIRST_HIT:
                    {
                        Pair<DataCell, Double> resultAndConfidence = selectFirstHit(row);
                        return toCells(resultAndConfidence);
                    }
                case RuleSelectionMethod.Criterion.INT_WEIGHTED_MAX:
                    {
                        Pair<DataCell, Double> resultAndConfidence = selectWeightedMax(row);
                        return toCells(resultAndConfidence);
                    }
                case RuleSelectionMethod.Criterion.INT_WEIGHTED_SUM:
                    {
                        Pair<DataCell, Double> resultAndConfidence = selectWeightedSum(row);
                        return toCells(resultAndConfidence);
                    }
                default:
                    throw new UnsupportedOperationException(ruleSelectionMethod.getCriterion().toString());
            }
        }

        /**
         * Converts the pair to a {@link DataCell} array.
         *
         * @param resultAndConfidence The {@link Pair}.
         * @return The result and possibly the confidence.
         */
        private DataCell[] toCells(final Pair<DataCell, Double> resultAndConfidence) {
            if (!addConfidence) {
                return new DataCell[] { resultAndConfidence.getFirst() };
            }
            if (resultAndConfidence.getSecond() == null) {
                return new DataCell[] { DataType.getMissingCell(), resultAndConfidence.getFirst() };
            }
            return new DataCell[] { new DoubleCell(resultAndConfidence.getSecond()), resultAndConfidence.getFirst() };
        }

        /**
         * Computes the result and the confidence using the weighted sum method.
         *
         * @param row A {@link DataRow}
         * @return The result and the confidence.
         */
        private Pair<DataCell, Double> selectWeightedSum(final DataRow row) {
            final Map<String, Double> scoreToSumWeight = new LinkedHashMap<String, Double>();
            for (String val : m_values) {
                scoreToSumWeight.put(val, 0.0);
            }
            int matchedRuleCount = 0;
            for (final PMMLRuleTranslator.Rule rule : rules) {
                if (rule.getCondition().evaluate(row, spec) == Boolean.TRUE) {
                    ++matchedRuleCount;
                    Double sumWeight = scoreToSumWeight.get(rule.getOutcome());
                    if (sumWeight == null) {
                        throw new IllegalStateException("The score value: " + rule.getOutcome() + " is not in the data dictionary.");
                    }
                    final Double wRaw = rule.getWeight();
                    final double w = wRaw == null ? 0.0 : wRaw.doubleValue();
                    scoreToSumWeight.put(rule.getOutcome(), sumWeight + w);
                }
            }
            double maxSumWeight = Double.NEGATIVE_INFINITY;
            String bestScore = null;
            for (Entry<String, Double> entry : scoreToSumWeight.entrySet()) {
                final double d = entry.getValue().doubleValue();
                if (d > maxSumWeight) {
                    maxSumWeight = d;
                    bestScore = entry.getKey();
                }
            }
            if (bestScore == null || matchedRuleCount == 0) {
                return pair(result(defaultScore), defaultConfidence);
            }
            return pair(result(bestScore), maxSumWeight / matchedRuleCount);
        }

        /**
         * Helper method to create {@link Pair}s.
         *
         * @param f The first element.
         * @param s The second element.
         * @return The new pair.
         */
        private <F, S> Pair<F, S> pair(final F f, final S s) {
            return new Pair<F, S>(f, s);
        }

        /**
         * Computes the result and the confidence using the weighted max method.
         *
         * @param row A {@link DataRow}
         * @return The result and the confidence.
         */
        private Pair<DataCell, Double> selectWeightedMax(final DataRow row) {
            double maxWeight = Double.NEGATIVE_INFINITY;
            PMMLRuleTranslator.Rule bestRule = null;
            for (final PMMLRuleTranslator.Rule rule : rules) {
                if (rule.getCondition().evaluate(row, spec) == Boolean.TRUE) {
                    if (rule.getWeight() > maxWeight) {
                        maxWeight = rule.getWeight();
                        bestRule = rule;
                    }
                }
            }
            if (bestRule == null) {
                return pair(result(defaultScore), defaultConfidence);
            }
            bestRule.setRecordCount(bestRule.getRecordCount() + 1);
            DataCell result = result(bestRule);
            if (validationColumnIdx >= 0) {
                if (row.getCell(validationColumnIdx).equals(result)) {
                    bestRule.setNbCorrect(bestRule.getNbCorrect() + 1);
                }
            }
            Double confidence = bestRule.getConfidence();
            return pair(result, confidence == null ? defaultConfidence : confidence);
        }

        /**
         * Selects the outcome of the rule and converts it to the proper outcome type.
         *
         * @param rule A {@link Rule}.
         * @return The {@link DataCell} representing the result. (May be missing.)
         */
        private DataCell result(final PMMLRuleTranslator.Rule rule) {
            String outcome = rule.getOutcome();
            return result(outcome);
        }

        /**
         * Constructs the {@link DataCell} from its {@link String} representation ({@code outcome}) and its type.
         *
         * @param dataType The expected {@link DataType}
         * @param outcome The {@link String} representation.
         * @return The {@link DataCell}.
         */
        private DataCell result(final String outcome) {
            if (outcome == null) {
                return DataType.getMissingCell();
            }
            try {
                if (dataType.isCompatible(BooleanValue.class)) {
                    return BooleanCellFactory.create(outcome);
                }
                if (IntCell.TYPE.isASuperTypeOf(dataType)) {
                    return new IntCell(Integer.parseInt(outcome));
                }
                if (LongCell.TYPE.isASuperTypeOf(dataType)) {
                    return new LongCell(Long.parseLong(outcome));
                }
                if (DoubleCell.TYPE.isASuperTypeOf(dataType)) {
                    return new DoubleCell(Double.parseDouble(outcome));
                }
                return new StringCell(outcome);
            } catch (NumberFormatException e) {
                return new MissingCell(outcome + "\n" + e.getMessage());
            }
        }

        /**
         * Selects the first rule that matches and computes the confidence and result for the {@code row}.
         *
         * @param row A {@link DataRow}.
         * @return The result and the confidence.
         */
        private Pair<DataCell, Double> selectFirstHit(final DataRow row) {
            for (final PMMLRuleTranslator.Rule rule : rules) {
                Boolean eval = rule.getCondition().evaluate(row, spec);
                if (eval == Boolean.TRUE) {
                    rule.setRecordCount(rule.getRecordCount() + 1);
                    DataCell result = result(rule);
                    if (validationColumnIdx >= 0) {
                        if (row.getCell(validationColumnIdx).equals(result)) {
                            rule.setNbCorrect(rule.getNbCorrect() + 1);
                        }
                    }
                    Double confidence = rule.getConfidence();
                    return pair(result, confidence == null ? defaultConfidence : confidence);
                }
            }
            return pair(result(defaultScore), defaultConfidence);
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public void afterProcessing() {
            super.afterProcessing();
            obj.getPMMLValue();
            RuleSetModel ruleSet = translator.getOriginalRuleSetModel();
            assert rules.size() == ruleSet.getRuleSet().getSimpleRuleList().size() + ruleSet.getRuleSet().getCompoundRuleList().size();
            if (ruleSet.getRuleSet().getSimpleRuleList().size() == rules.size()) {
                for (int i = 0; i < rules.size(); ++i) {
                    Rule rule = rules.get(i);
                    final SimpleRule simpleRuleArray = ruleSet.getRuleSet().getSimpleRuleArray(i);
                    synchronized (simpleRuleArray) /*synchronized fixes AP-6766 */
                    {
                        simpleRuleArray.setRecordCount(rule.getRecordCount());
                        if (validationColumnIdx >= 0) {
                            simpleRuleArray.setNbCorrect(rule.getNbCorrect());
                        } else if (simpleRuleArray.isSetNbCorrect()) {
                            simpleRuleArray.unsetNbCorrect();
                        }
                    }
                }
            }
        }
    });
    if (replaceColumn) {
        ret.remove(outputColumnName);
        ret.move(ret.getColumnCount() - 1 - (addConfidence ? 1 : 0), oldColumnIndex);
    }
    return ret;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) RuleSetModel(org.dmg.pmml.RuleSetModelDocument.RuleSetModel) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) Node(org.w3c.dom.Node) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) Entry(java.util.Map.Entry) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) BooleanValue(org.knime.core.data.BooleanValue) DataType(org.knime.core.data.DataType) SettingsModelBoolean(org.knime.core.node.defaultnodesettings.SettingsModelBoolean) Pair(org.knime.core.util.Pair) AbstractCellFactory(org.knime.core.data.container.AbstractCellFactory) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) RuleSelectionMethod(org.dmg.pmml.RuleSelectionMethodDocument.RuleSelectionMethod) Rule(org.knime.base.node.rules.engine.pmml.PMMLRuleTranslator.Rule) LongCell(org.knime.core.data.def.LongCell) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringCell(org.knime.core.data.def.StringCell) MissingCell(org.knime.core.data.MissingCell) DataCell(org.knime.core.data.DataCell) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) Rule(org.knime.base.node.rules.engine.pmml.PMMLRuleTranslator.Rule) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap)

Example 25 with DataCell

use of org.knime.core.data.DataCell in project knime-core by knime.

the class NumericOutliersReviser method replaceOutliers.

/**
 * Replaces outliers found in the row input according to the selected replacement option. Additionally, the outlier
 * replacement counts and new domains are calculated.
 *
 * @param exec the execution context
 * @param in the row input whose outliers have to be treated
 * @param out the row output whose outliers have been treated
 * @param outlierModel the model storing the permitted intervals
 * @param memberCounter the member counter
 * @param outlierRepCounter the outlier replacement counter
 * @param missingGroupsCounter the missing groups counter
 * @throws Exception any exception to indicate an error, cancelation
 */
private void replaceOutliers(final ExecutionContext exec, final RowInput in, final RowOutput out, final NumericOutliersModel outlierModel, final MemberCounter memberCounter, final MemberCounter outlierRepCounter, final MemberCounter missingGroupsCounter) throws Exception {
    // total number of outlier columns
    final int noOutliers = m_outlierColNames.length;
    // the in table spec
    final DataTableSpec inSpec = in.getDataTableSpec();
    // create column re-arranger to overwrite cells corresponding to outliers
    final ColumnRearranger colRearranger = new ColumnRearranger(inSpec);
    // store the positions where the outlier column names can be found in the input table
    final int[] outlierIndices = calculateOutlierIndicies(inSpec);
    final DataColumnSpec[] outlierSpecs = new DataColumnSpec[noOutliers];
    for (int i = 0; i < noOutliers; i++) {
        outlierSpecs[i] = inSpec.getColumnSpec(outlierIndices[i]);
    }
    // values are copied anyways by the re-arranger so there is no need to
    // create new instances for each row
    final DataCell[] treatedVals = new DataCell[noOutliers];
    final AbstractCellFactory fac = new AbstractCellFactory(true, outlierSpecs) {

        @Override
        public DataCell[] getCells(final DataRow row) {
            final GroupKey key = outlierModel.getKey(row, inSpec);
            final Map<String, double[]> colsMap = outlierModel.getGroupIntervals(key);
            for (int i = 0; i < noOutliers; i++) {
                final DataCell curCell = row.getCell(outlierIndices[i]);
                final DataCell treatedCell;
                final String outlierColName = m_outlierColNames[i];
                if (!curCell.isMissing()) {
                    // if the key exists treat the value otherwise we process an unkown group
                    if (colsMap != null) {
                        // increment the member counter
                        memberCounter.incrementMemberCount(outlierColName, key);
                        // treat the value of the cell if its a outlier
                        treatedCell = treatCellValue(colsMap.get(outlierColName), curCell);
                    } else {
                        missingGroupsCounter.incrementMemberCount(outlierColName, key);
                        treatedCell = curCell;
                    }
                } else {
                    treatedCell = curCell;
                }
                // if we changed the value this is an outlier
                if (!treatedCell.equals(curCell)) {
                    outlierRepCounter.incrementMemberCount(outlierColName, key);
                }
                // update the domain if necessary
                if (m_updateDomain && !treatedCell.isMissing()) {
                    m_domainUpdater.updateDomain(outlierColName, ((DoubleValue) treatedCell).getDoubleValue());
                }
                treatedVals[i] = treatedCell;
            }
            return treatedVals;
        }
    };
    // replace the outlier columns by their updated versions
    colRearranger.replace(fac, outlierIndices);
    // stream it
    colRearranger.createStreamableFunction().runFinal(new PortInput[] { in }, new PortOutput[] { out }, exec);
    exec.setProgress(1);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) AbstractCellFactory(org.knime.core.data.container.AbstractCellFactory) GroupKey(org.knime.base.node.preproc.groupby.GroupKey) DataRow(org.knime.core.data.DataRow) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataCell(org.knime.core.data.DataCell)

Aggregations

DataCell (org.knime.core.data.DataCell)780 DataRow (org.knime.core.data.DataRow)268 DataTableSpec (org.knime.core.data.DataTableSpec)175 DataColumnSpec (org.knime.core.data.DataColumnSpec)170 DefaultRow (org.knime.core.data.def.DefaultRow)169 ArrayList (java.util.ArrayList)141 StringCell (org.knime.core.data.def.StringCell)131 DoubleCell (org.knime.core.data.def.DoubleCell)129 DoubleValue (org.knime.core.data.DoubleValue)111 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)109 DataType (org.knime.core.data.DataType)97 RowKey (org.knime.core.data.RowKey)94 BufferedDataTable (org.knime.core.node.BufferedDataTable)93 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)91 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)84 LinkedHashMap (java.util.LinkedHashMap)81 IntCell (org.knime.core.data.def.IntCell)79 HashMap (java.util.HashMap)60 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)57 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)56