Search in sources :

Example 1 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class TSAverageHandler method getMean.

private DataCell getMean() {
    if (m_previous instanceof IntValue) {
        // get an int, create an int
        double mean = (((DoubleValue) m_previous).getDoubleValue() + ((DoubleValue) m_next).getDoubleValue()) * 0.5;
        return new IntCell((int) Math.round(mean));
    }
    if (m_previous instanceof LongValue) {
        // get an int, create an int
        double mean = (((DoubleValue) m_previous).getDoubleValue() + ((DoubleValue) m_next).getDoubleValue()) * 0.5;
        return new LongCell(Math.round(mean));
    }
    if (m_previous instanceof DoubleValue) {
        // get an double, create an double
        double mean = (((DoubleValue) m_previous).getDoubleValue() + ((DoubleValue) m_next).getDoubleValue()) * 0.5;
        return new DoubleCell(mean);
    }
    if (m_previous instanceof DateAndTimeValue) {
        // get an int, create an int
        DateAndTimeValue dataCell1 = (DateAndTimeValue) m_previous;
        DateAndTimeValue dataCell2 = ((DateAndTimeValue) m_next);
        boolean hasDate = dataCell1.hasDate() | dataCell2.hasDate();
        boolean hasTime = dataCell1.hasTime() | dataCell2.hasTime();
        boolean hasMilis = dataCell1.hasMillis() | dataCell2.hasMillis();
        double d = dataCell1.getUTCTimeInMillis() + dataCell2.getUTCTimeInMillis();
        d *= 0.5;
        return new DateAndTimeCell((long) d, hasDate, hasTime, hasMilis);
    }
    return DataType.getMissingCell();
}
Also used : LongCell(org.knime.core.data.def.LongCell) DoubleValue(org.knime.core.data.DoubleValue) DoubleCell(org.knime.core.data.def.DoubleCell) DateAndTimeValue(org.knime.core.data.date.DateAndTimeValue) LongValue(org.knime.core.data.LongValue) DateAndTimeCell(org.knime.core.data.date.DateAndTimeCell) IntValue(org.knime.core.data.IntValue) IntCell(org.knime.core.data.def.IntCell)

Example 2 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class TSLinearHandler method getLinearInterpolation.

private DataCell getLinearInterpolation(final double stepnumber, final double stepcount) {
    if (m_previous instanceof DoubleValue || m_previous instanceof IntValue || m_previous instanceof LongValue) {
        double prev = ((DoubleValue) m_previous).getDoubleValue();
        double next = ((DoubleValue) m_next).getDoubleValue();
        double lin = prev + 1.0 * stepnumber / (1.0 * stepcount) * (next - prev);
        if (m_previous instanceof IntValue) {
            // get an int, create an int
            return new IntCell((int) Math.round(lin));
        }
        if (m_previous instanceof LongValue) {
            // get an long, create an long
            return new LongCell(Math.round(lin));
        }
        return new DoubleCell(lin);
    }
    if (m_previous instanceof DateAndTimeValue) {
        // get an int, create an int
        DateAndTimeValue dataCell1 = (DateAndTimeValue) m_previous;
        DateAndTimeValue dataCell2 = ((DateAndTimeValue) m_next);
        boolean hasDate = dataCell1.hasDate() | dataCell2.hasDate();
        boolean hasTime = dataCell1.hasTime() | dataCell2.hasTime();
        boolean hasMilis = dataCell1.hasMillis() | dataCell2.hasMillis();
        double prev = dataCell1.getUTCTimeInMillis();
        double next = dataCell2.getUTCTimeInMillis();
        double d = prev + stepnumber / stepcount * (next - prev);
        return new DateAndTimeCell((long) d, hasDate, hasTime, hasMilis);
    }
    return DataType.getMissingCell();
}
Also used : LongCell(org.knime.core.data.def.LongCell) DoubleValue(org.knime.core.data.DoubleValue) DoubleCell(org.knime.core.data.def.DoubleCell) DateAndTimeValue(org.knime.core.data.date.DateAndTimeValue) LongValue(org.knime.core.data.LongValue) DateAndTimeCell(org.knime.core.data.date.DateAndTimeCell) IntValue(org.knime.core.data.IntValue) IntCell(org.knime.core.data.def.IntCell)

Example 3 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class RuleEngineNodeModel method getRulesOutcome.

/**
 * @param outType
 * @param row
 * @param r
 * @param isDisallowlongOutputForCompatibility TODO
 * @param variableProvider TODO
 * @return
 * @noreference This method is not intended to be referenced by clients.
 */
public static final DataCell getRulesOutcome(final DataType outType, final DataRow row, final List<Rule> rules, final boolean isDisallowLongOutputForCompatibility, final VariableProvider variableProvider) {
    for (Rule r : rules) {
        if (r.getCondition().matches(row, variableProvider).getOutcome() == MatchState.matchedAndStop) {
            Outcome outcome2 = r.getOutcome();
            // r.getSideEffect().perform(row, this);
            DataCell cell = (DataCell) outcome2.getComputedResult(row, variableProvider);
            // ... don't want Booleans (also implementing Long), for instance)
            if (cell instanceof LongCell && isDisallowLongOutputForCompatibility) {
                long l = ((LongValue) cell).getLongValue();
                if (l > Integer.MAX_VALUE) {
                    throw new RuntimeException("Values larger than " + Integer.MAX_VALUE + " not supported in old instances of the node -- recreate the node " + "(node was created using an KNIME version < 3.2");
                }
                cell = new IntCell((int) l);
            }
            if (outType.equals(StringCell.TYPE) && !cell.isMissing() && !cell.getType().equals(StringCell.TYPE)) {
                return new StringCell(cell.toString());
            } else {
                return cell;
            }
        }
    }
    return DataType.getMissingCell();
}
Also used : LongCell(org.knime.core.data.def.LongCell) StringCell(org.knime.core.data.def.StringCell) Outcome(org.knime.base.node.rules.engine.Rule.Outcome) LongValue(org.knime.core.data.LongValue) DataCell(org.knime.core.data.DataCell) IntCell(org.knime.core.data.def.IntCell)

Example 4 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class PMMLRuleSetPredictorNodeModel method createRearranger.

/**
 * Constructs the {@link ColumnRearranger} for computing the new columns.
 *
 * @param obj The {@link PMMLPortObject} of the preprocessing model.
 * @param spec The {@link DataTableSpec} of the table.
 * @param replaceColumn Should replace the {@code outputColumnName}?
 * @param outputColumnName The output column name (which might be an existing).
 * @param addConfidence Should add the confidence values to a column?
 * @param confidenceColumnName The name of the confidence column.
 * @param validationColumnIdx Index of the validation column, {@code -1} if not specified.
 * @param processConcurrently Should be {@code false} when the statistics are to be computed.
 * @return The {@link ColumnRearranger} computing the result.
 * @throws InvalidSettingsException Problem with rules.
 */
private static ColumnRearranger createRearranger(final PMMLPortObject obj, final DataTableSpec spec, final boolean replaceColumn, final String outputColumnName, final boolean addConfidence, final String confidenceColumnName, final int validationColumnIdx, final boolean processConcurrently) throws InvalidSettingsException {
    List<Node> models = obj.getPMMLValue().getModels(PMMLModelType.RuleSetModel);
    if (models.size() != 1) {
        throw new InvalidSettingsException("Expected exactly on RuleSetModel, but got: " + models.size());
    }
    final PMMLRuleTranslator translator = new PMMLRuleTranslator();
    obj.initializeModelTranslator(translator);
    if (!translator.isScorable()) {
        throw new UnsupportedOperationException("The model is not scorable.");
    }
    final List<PMMLRuleTranslator.Rule> rules = translator.getRules();
    ColumnRearranger ret = new ColumnRearranger(spec);
    final List<DataColumnSpec> targetCols = obj.getSpec().getTargetCols();
    final DataType dataType = targetCols.isEmpty() ? StringCell.TYPE : targetCols.get(0).getType();
    DataColumnSpecCreator specCreator = new DataColumnSpecCreator(outputColumnName, dataType);
    Set<DataCell> outcomes = new LinkedHashSet<>();
    for (Rule rule : rules) {
        DataCell outcome;
        if (dataType.equals(BooleanCell.TYPE)) {
            outcome = BooleanCellFactory.create(rule.getOutcome());
        } else if (dataType.equals(StringCell.TYPE)) {
            outcome = new StringCell(rule.getOutcome());
        } else if (dataType.equals(DoubleCell.TYPE)) {
            try {
                outcome = new DoubleCell(Double.parseDouble(rule.getOutcome()));
            } catch (NumberFormatException e) {
                // ignore
                continue;
            }
        } else if (dataType.equals(IntCell.TYPE)) {
            try {
                outcome = new IntCell(Integer.parseInt(rule.getOutcome()));
            } catch (NumberFormatException e) {
                // ignore
                continue;
            }
        } else if (dataType.equals(LongCell.TYPE)) {
            try {
                outcome = new LongCell(Long.parseLong(rule.getOutcome()));
            } catch (NumberFormatException e) {
                // ignore
                continue;
            }
        } else {
            throw new UnsupportedOperationException("Unknown outcome type: " + dataType);
        }
        outcomes.add(outcome);
    }
    specCreator.setDomain(new DataColumnDomainCreator(outcomes).createDomain());
    DataColumnSpec colSpec = specCreator.createSpec();
    final RuleSelectionMethod ruleSelectionMethod = translator.getSelectionMethodList().get(0);
    final String defaultScore = translator.getDefaultScore();
    final Double defaultConfidence = translator.getDefaultConfidence();
    final DataColumnSpec[] specs;
    if (addConfidence) {
        specs = new DataColumnSpec[] { new DataColumnSpecCreator(DataTableSpec.getUniqueColumnName(ret.createSpec(), confidenceColumnName), DoubleCell.TYPE).createSpec(), colSpec };
    } else {
        specs = new DataColumnSpec[] { colSpec };
    }
    final int oldColumnIndex = replaceColumn ? ret.indexOf(outputColumnName) : -1;
    ret.append(new AbstractCellFactory(processConcurrently, specs) {

        private final List<String> m_values;

        {
            Map<String, List<String>> dd = translator.getDataDictionary();
            m_values = dd.get(targetCols.get(0).getName());
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public DataCell[] getCells(final DataRow row) {
            // See http://www.dmg.org/v4-1/RuleSet.html#Rule
            switch(ruleSelectionMethod.getCriterion().intValue()) {
                case RuleSelectionMethod.Criterion.INT_FIRST_HIT:
                    {
                        Pair<DataCell, Double> resultAndConfidence = selectFirstHit(row);
                        return toCells(resultAndConfidence);
                    }
                case RuleSelectionMethod.Criterion.INT_WEIGHTED_MAX:
                    {
                        Pair<DataCell, Double> resultAndConfidence = selectWeightedMax(row);
                        return toCells(resultAndConfidence);
                    }
                case RuleSelectionMethod.Criterion.INT_WEIGHTED_SUM:
                    {
                        Pair<DataCell, Double> resultAndConfidence = selectWeightedSum(row);
                        return toCells(resultAndConfidence);
                    }
                default:
                    throw new UnsupportedOperationException(ruleSelectionMethod.getCriterion().toString());
            }
        }

        /**
         * Converts the pair to a {@link DataCell} array.
         *
         * @param resultAndConfidence The {@link Pair}.
         * @return The result and possibly the confidence.
         */
        private DataCell[] toCells(final Pair<DataCell, Double> resultAndConfidence) {
            if (!addConfidence) {
                return new DataCell[] { resultAndConfidence.getFirst() };
            }
            if (resultAndConfidence.getSecond() == null) {
                return new DataCell[] { DataType.getMissingCell(), resultAndConfidence.getFirst() };
            }
            return new DataCell[] { new DoubleCell(resultAndConfidence.getSecond()), resultAndConfidence.getFirst() };
        }

        /**
         * Computes the result and the confidence using the weighted sum method.
         *
         * @param row A {@link DataRow}
         * @return The result and the confidence.
         */
        private Pair<DataCell, Double> selectWeightedSum(final DataRow row) {
            final Map<String, Double> scoreToSumWeight = new LinkedHashMap<String, Double>();
            for (String val : m_values) {
                scoreToSumWeight.put(val, 0.0);
            }
            int matchedRuleCount = 0;
            for (final PMMLRuleTranslator.Rule rule : rules) {
                if (rule.getCondition().evaluate(row, spec) == Boolean.TRUE) {
                    ++matchedRuleCount;
                    Double sumWeight = scoreToSumWeight.get(rule.getOutcome());
                    if (sumWeight == null) {
                        throw new IllegalStateException("The score value: " + rule.getOutcome() + " is not in the data dictionary.");
                    }
                    final Double wRaw = rule.getWeight();
                    final double w = wRaw == null ? 0.0 : wRaw.doubleValue();
                    scoreToSumWeight.put(rule.getOutcome(), sumWeight + w);
                }
            }
            double maxSumWeight = Double.NEGATIVE_INFINITY;
            String bestScore = null;
            for (Entry<String, Double> entry : scoreToSumWeight.entrySet()) {
                final double d = entry.getValue().doubleValue();
                if (d > maxSumWeight) {
                    maxSumWeight = d;
                    bestScore = entry.getKey();
                }
            }
            if (bestScore == null || matchedRuleCount == 0) {
                return pair(result(defaultScore), defaultConfidence);
            }
            return pair(result(bestScore), maxSumWeight / matchedRuleCount);
        }

        /**
         * Helper method to create {@link Pair}s.
         *
         * @param f The first element.
         * @param s The second element.
         * @return The new pair.
         */
        private <F, S> Pair<F, S> pair(final F f, final S s) {
            return new Pair<F, S>(f, s);
        }

        /**
         * Computes the result and the confidence using the weighted max method.
         *
         * @param row A {@link DataRow}
         * @return The result and the confidence.
         */
        private Pair<DataCell, Double> selectWeightedMax(final DataRow row) {
            double maxWeight = Double.NEGATIVE_INFINITY;
            PMMLRuleTranslator.Rule bestRule = null;
            for (final PMMLRuleTranslator.Rule rule : rules) {
                if (rule.getCondition().evaluate(row, spec) == Boolean.TRUE) {
                    if (rule.getWeight() > maxWeight) {
                        maxWeight = rule.getWeight();
                        bestRule = rule;
                    }
                }
            }
            if (bestRule == null) {
                return pair(result(defaultScore), defaultConfidence);
            }
            bestRule.setRecordCount(bestRule.getRecordCount() + 1);
            DataCell result = result(bestRule);
            if (validationColumnIdx >= 0) {
                if (row.getCell(validationColumnIdx).equals(result)) {
                    bestRule.setNbCorrect(bestRule.getNbCorrect() + 1);
                }
            }
            Double confidence = bestRule.getConfidence();
            return pair(result, confidence == null ? defaultConfidence : confidence);
        }

        /**
         * Selects the outcome of the rule and converts it to the proper outcome type.
         *
         * @param rule A {@link Rule}.
         * @return The {@link DataCell} representing the result. (May be missing.)
         */
        private DataCell result(final PMMLRuleTranslator.Rule rule) {
            String outcome = rule.getOutcome();
            return result(outcome);
        }

        /**
         * Constructs the {@link DataCell} from its {@link String} representation ({@code outcome}) and its type.
         *
         * @param dataType The expected {@link DataType}
         * @param outcome The {@link String} representation.
         * @return The {@link DataCell}.
         */
        private DataCell result(final String outcome) {
            if (outcome == null) {
                return DataType.getMissingCell();
            }
            try {
                if (dataType.isCompatible(BooleanValue.class)) {
                    return BooleanCellFactory.create(outcome);
                }
                if (IntCell.TYPE.isASuperTypeOf(dataType)) {
                    return new IntCell(Integer.parseInt(outcome));
                }
                if (LongCell.TYPE.isASuperTypeOf(dataType)) {
                    return new LongCell(Long.parseLong(outcome));
                }
                if (DoubleCell.TYPE.isASuperTypeOf(dataType)) {
                    return new DoubleCell(Double.parseDouble(outcome));
                }
                return new StringCell(outcome);
            } catch (NumberFormatException e) {
                return new MissingCell(outcome + "\n" + e.getMessage());
            }
        }

        /**
         * Selects the first rule that matches and computes the confidence and result for the {@code row}.
         *
         * @param row A {@link DataRow}.
         * @return The result and the confidence.
         */
        private Pair<DataCell, Double> selectFirstHit(final DataRow row) {
            for (final PMMLRuleTranslator.Rule rule : rules) {
                Boolean eval = rule.getCondition().evaluate(row, spec);
                if (eval == Boolean.TRUE) {
                    rule.setRecordCount(rule.getRecordCount() + 1);
                    DataCell result = result(rule);
                    if (validationColumnIdx >= 0) {
                        if (row.getCell(validationColumnIdx).equals(result)) {
                            rule.setNbCorrect(rule.getNbCorrect() + 1);
                        }
                    }
                    Double confidence = rule.getConfidence();
                    return pair(result, confidence == null ? defaultConfidence : confidence);
                }
            }
            return pair(result(defaultScore), defaultConfidence);
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public void afterProcessing() {
            super.afterProcessing();
            obj.getPMMLValue();
            RuleSetModel ruleSet = translator.getOriginalRuleSetModel();
            assert rules.size() == ruleSet.getRuleSet().getSimpleRuleList().size() + ruleSet.getRuleSet().getCompoundRuleList().size();
            if (ruleSet.getRuleSet().getSimpleRuleList().size() == rules.size()) {
                for (int i = 0; i < rules.size(); ++i) {
                    Rule rule = rules.get(i);
                    final SimpleRule simpleRuleArray = ruleSet.getRuleSet().getSimpleRuleArray(i);
                    synchronized (simpleRuleArray) /*synchronized fixes AP-6766 */
                    {
                        simpleRuleArray.setRecordCount(rule.getRecordCount());
                        if (validationColumnIdx >= 0) {
                            simpleRuleArray.setNbCorrect(rule.getNbCorrect());
                        } else if (simpleRuleArray.isSetNbCorrect()) {
                            simpleRuleArray.unsetNbCorrect();
                        }
                    }
                }
            }
        }
    });
    if (replaceColumn) {
        ret.remove(outputColumnName);
        ret.move(ret.getColumnCount() - 1 - (addConfidence ? 1 : 0), oldColumnIndex);
    }
    return ret;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) RuleSetModel(org.dmg.pmml.RuleSetModelDocument.RuleSetModel) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) Node(org.w3c.dom.Node) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) Entry(java.util.Map.Entry) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) BooleanValue(org.knime.core.data.BooleanValue) DataType(org.knime.core.data.DataType) SettingsModelBoolean(org.knime.core.node.defaultnodesettings.SettingsModelBoolean) Pair(org.knime.core.util.Pair) AbstractCellFactory(org.knime.core.data.container.AbstractCellFactory) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) RuleSelectionMethod(org.dmg.pmml.RuleSelectionMethodDocument.RuleSelectionMethod) Rule(org.knime.base.node.rules.engine.pmml.PMMLRuleTranslator.Rule) LongCell(org.knime.core.data.def.LongCell) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringCell(org.knime.core.data.def.StringCell) MissingCell(org.knime.core.data.MissingCell) DataCell(org.knime.core.data.DataCell) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) Rule(org.knime.base.node.rules.engine.pmml.PMMLRuleTranslator.Rule) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap)

Example 5 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class TwoSampleTTestStatistics method getGroupStatistics.

/**
 * Get descriptive statistics for the given Group.
 * @param group the group
 * @return the
 */
public List<DataCell> getGroupStatistics(final Group group) {
    List<DataCell> cells = new ArrayList<DataCell>();
    cells.add(new StringCell(m_column));
    cells.add(new StringCell(m_groups.get(group)));
    SummaryStatistics stats = m_gstats.get(group);
    cells.add(new IntCell((int) stats.getN()));
    cells.add(new IntCell(m_missing.get(group).intValue()));
    cells.add(new IntCell(m_missingGroup.intValue()));
    cells.add(new IntCell(m_ignoredGroup.intValue()));
    cells.add(new DoubleCell(stats.getMean()));
    cells.add(new DoubleCell(stats.getStandardDeviation()));
    cells.add(new DoubleCell(StatsUtil.getStandardError(stats)));
    return cells;
}
Also used : StringCell(org.knime.core.data.def.StringCell) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataCell(org.knime.core.data.DataCell) SummaryStatistics(org.apache.commons.math3.stat.descriptive.SummaryStatistics) IntCell(org.knime.core.data.def.IntCell)

Aggregations

IntCell (org.knime.core.data.def.IntCell)109 DataCell (org.knime.core.data.DataCell)79 DoubleCell (org.knime.core.data.def.DoubleCell)67 StringCell (org.knime.core.data.def.StringCell)55 DefaultRow (org.knime.core.data.def.DefaultRow)46 DataRow (org.knime.core.data.DataRow)33 DataTableSpec (org.knime.core.data.DataTableSpec)21 RowKey (org.knime.core.data.RowKey)21 ArrayList (java.util.ArrayList)20 DataType (org.knime.core.data.DataType)20 LongCell (org.knime.core.data.def.LongCell)14 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)14 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)12 BufferedDataTable (org.knime.core.node.BufferedDataTable)12 Test (org.junit.Test)11 DataColumnSpec (org.knime.core.data.DataColumnSpec)11 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)9 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)9 DataContainer (org.knime.core.data.container.DataContainer)8 DateAndTimeValue (org.knime.core.data.date.DateAndTimeValue)8