Search in sources :

Example 21 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class PMMLRuleEditorNodeModel method createRearranger.

/**
 * Creates the {@link ColumnRearranger} that can compute the new column.
 *
 * @param tableSpec The spec of the input table.
 * @param ruleSet The {@link RuleSet} xml object where the rules should be added.
 * @param parser The parser for the rules.
 * @return The {@link ColumnRearranger}.
 * @throws ParseException Problem during parsing.
 * @throws InvalidSettingsException if settings are invalid
 */
private ColumnRearranger createRearranger(final DataTableSpec tableSpec, final RuleSet ruleSet, final PMMLRuleParser parser) throws ParseException, InvalidSettingsException {
    if (m_settings.isAppendColumn() && m_settings.getNewColName().isEmpty()) {
        throw new InvalidSettingsException("No name for prediction column provided");
    }
    Set<String> outcomes = new LinkedHashSet<String>();
    List<DataType> outcomeTypes = new ArrayList<DataType>();
    int line = 0;
    final List<Pair<PMMLPredicate, Expression>> rules = new ArrayList<Pair<PMMLPredicate, Expression>>();
    for (String ruleText : m_settings.rules()) {
        ++line;
        if (RuleSupport.isComment(ruleText)) {
            continue;
        }
        try {
            ParseState state = new ParseState(ruleText);
            PMMLPredicate expression = parser.parseBooleanExpression(state);
            SimpleRule simpleRule = ruleSet.addNewSimpleRule();
            setCondition(simpleRule, expression);
            state.skipWS();
            state.consumeText("=>");
            state.skipWS();
            Expression outcome = parser.parseOutcomeOperand(state, null);
            // Only constants are allowed in the outcomes.
            assert outcome.isConstant() : outcome;
            rules.add(new Pair<PMMLPredicate, Expression>(expression, outcome));
            outcomeTypes.add(outcome.getOutputType());
            simpleRule.setScore(outcome.toString());
            // simpleRule.setConfidence(confidenceForRule(simpleRule, line, ruleText));
            simpleRule.setWeight(weightForRule(simpleRule, line, ruleText));
            outcomes.add(simpleRule.getScore());
        } catch (ParseException e) {
            throw Util.addContext(e, ruleText, line);
        }
    }
    DataType outcomeType = RuleEngineNodeModel.computeOutputType(outcomeTypes, true);
    ColumnRearranger rearranger = new ColumnRearranger(tableSpec);
    DataColumnSpecCreator specProto = new DataColumnSpecCreator(m_settings.isAppendColumn() ? DataTableSpec.getUniqueColumnName(tableSpec, m_settings.getNewColName()) : m_settings.getReplaceColumn(), outcomeType);
    specProto.setDomain(new DataColumnDomainCreator(toCells(outcomes, outcomeType)).createDomain());
    SingleCellFactory cellFactory = new SingleCellFactory(true, specProto.createSpec()) {

        @Override
        public DataCell getCell(final DataRow row) {
            for (Pair<PMMLPredicate, Expression> pair : rules) {
                if (pair.getFirst().evaluate(row, tableSpec) == Boolean.TRUE) {
                    return pair.getSecond().evaluate(row, null).getValue();
                }
            }
            return DataType.getMissingCell();
        }
    };
    if (m_settings.isAppendColumn()) {
        rearranger.append(cellFactory);
    } else {
        rearranger.replace(cellFactory, m_settings.getReplaceColumn());
    }
    return rearranger;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) PMMLPredicate(org.knime.base.node.mine.decisiontree2.PMMLPredicate) ParseState(org.knime.base.node.rules.engine.BaseRuleParser.ParseState) DataRow(org.knime.core.data.DataRow) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) Expression(org.knime.base.node.rules.engine.Expression) DataType(org.knime.core.data.DataType) ParseException(java.text.ParseException) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) Pair(org.knime.core.util.Pair)

Example 22 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class PMMLRuleSetPredictorNodeModel method configure.

/**
 * {@inheritDoc}
 */
@Override
protected DataTableSpec[] configure(final PortObjectSpec[] inSpecs) throws InvalidSettingsException {
    DataTableSpec original = (DataTableSpec) inSpecs[DATA_INDEX];
    ColumnRearranger rearranger = new ColumnRearranger(original);
    PMMLPortObjectSpec portObjectSpec = (PMMLPortObjectSpec) inSpecs[MODEL_INDEX];
    List<DataColumnSpec> activeColumnList = portObjectSpec.getActiveColumnList();
    List<DataColumnSpec> notFound = new ArrayList<DataColumnSpec>();
    for (DataColumnSpec dataColumnSpec : activeColumnList) {
        if (original.containsName(dataColumnSpec.getName())) {
            DataColumnSpec origSpec = original.getColumnSpec(dataColumnSpec.getName());
            if (!origSpec.getType().equals(dataColumnSpec.getType())) {
                notFound.add(dataColumnSpec);
            }
        } else {
            notFound.add(dataColumnSpec);
        }
    }
    if (!notFound.isEmpty()) {
        StringBuilder sb = new StringBuilder("Incompatible to the table, the following columns are not present, or have a wrong type:");
        for (DataColumnSpec dataColumnSpec : notFound) {
            sb.append("\n   ").append(dataColumnSpec);
        }
        throw new InvalidSettingsException(sb.toString());
    }
    List<DataColumnSpec> targetCols = portObjectSpec.getTargetCols();
    final DataType dataType = targetCols.isEmpty() ? StringCell.TYPE : targetCols.get(0).getType();
    DataColumnSpecCreator specCreator;
    if (m_doReplaceColumn.getBooleanValue()) {
        String col = m_replaceColumn.getStringValue();
        specCreator = new DataColumnSpecCreator(col, dataType);
    } else {
        specCreator = new DataColumnSpecCreator(DataTableSpec.getUniqueColumnName(original, m_outputColumn.getStringValue()), dataType);
    }
    SingleCellFactory dummy = new SingleCellFactory(specCreator.createSpec()) {

        /**
         * {@inheritDoc}
         */
        @Override
        public DataCell getCell(final DataRow row) {
            throw new IllegalStateException();
        }
    };
    if (m_addConfidence.getBooleanValue()) {
        rearranger.append(new SingleCellFactory(new DataColumnSpecCreator(DataTableSpec.getUniqueColumnName(rearranger.createSpec(), m_confidenceColumn.getStringValue()), DoubleCell.TYPE).createSpec()) {

            @Override
            public DataCell getCell(final DataRow row) {
                throw new IllegalStateException();
            }
        });
    }
    if (m_doReplaceColumn.getBooleanValue()) {
        rearranger.replace(dummy, m_replaceColumn.getStringValue());
    } else {
        rearranger.append(dummy);
    }
    return new DataTableSpec[] { rearranger.createSpec() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) ArrayList(java.util.ArrayList) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) SingleCellFactory(org.knime.core.data.container.SingleCellFactory)

Example 23 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class PMMLRuleSetPredictorNodeModel method createRearranger.

/**
 * Constructs the {@link ColumnRearranger} for computing the new columns.
 *
 * @param obj The {@link PMMLPortObject} of the preprocessing model.
 * @param spec The {@link DataTableSpec} of the table.
 * @param replaceColumn Should replace the {@code outputColumnName}?
 * @param outputColumnName The output column name (which might be an existing).
 * @param addConfidence Should add the confidence values to a column?
 * @param confidenceColumnName The name of the confidence column.
 * @param validationColumnIdx Index of the validation column, {@code -1} if not specified.
 * @param processConcurrently Should be {@code false} when the statistics are to be computed.
 * @return The {@link ColumnRearranger} computing the result.
 * @throws InvalidSettingsException Problem with rules.
 */
private static ColumnRearranger createRearranger(final PMMLPortObject obj, final DataTableSpec spec, final boolean replaceColumn, final String outputColumnName, final boolean addConfidence, final String confidenceColumnName, final int validationColumnIdx, final boolean processConcurrently) throws InvalidSettingsException {
    List<Node> models = obj.getPMMLValue().getModels(PMMLModelType.RuleSetModel);
    if (models.size() != 1) {
        throw new InvalidSettingsException("Expected exactly on RuleSetModel, but got: " + models.size());
    }
    final PMMLRuleTranslator translator = new PMMLRuleTranslator();
    obj.initializeModelTranslator(translator);
    if (!translator.isScorable()) {
        throw new UnsupportedOperationException("The model is not scorable.");
    }
    final List<PMMLRuleTranslator.Rule> rules = translator.getRules();
    ColumnRearranger ret = new ColumnRearranger(spec);
    final List<DataColumnSpec> targetCols = obj.getSpec().getTargetCols();
    final DataType dataType = targetCols.isEmpty() ? StringCell.TYPE : targetCols.get(0).getType();
    DataColumnSpecCreator specCreator = new DataColumnSpecCreator(outputColumnName, dataType);
    Set<DataCell> outcomes = new LinkedHashSet<>();
    for (Rule rule : rules) {
        DataCell outcome;
        if (dataType.equals(BooleanCell.TYPE)) {
            outcome = BooleanCellFactory.create(rule.getOutcome());
        } else if (dataType.equals(StringCell.TYPE)) {
            outcome = new StringCell(rule.getOutcome());
        } else if (dataType.equals(DoubleCell.TYPE)) {
            try {
                outcome = new DoubleCell(Double.parseDouble(rule.getOutcome()));
            } catch (NumberFormatException e) {
                // ignore
                continue;
            }
        } else if (dataType.equals(IntCell.TYPE)) {
            try {
                outcome = new IntCell(Integer.parseInt(rule.getOutcome()));
            } catch (NumberFormatException e) {
                // ignore
                continue;
            }
        } else if (dataType.equals(LongCell.TYPE)) {
            try {
                outcome = new LongCell(Long.parseLong(rule.getOutcome()));
            } catch (NumberFormatException e) {
                // ignore
                continue;
            }
        } else {
            throw new UnsupportedOperationException("Unknown outcome type: " + dataType);
        }
        outcomes.add(outcome);
    }
    specCreator.setDomain(new DataColumnDomainCreator(outcomes).createDomain());
    DataColumnSpec colSpec = specCreator.createSpec();
    final RuleSelectionMethod ruleSelectionMethod = translator.getSelectionMethodList().get(0);
    final String defaultScore = translator.getDefaultScore();
    final Double defaultConfidence = translator.getDefaultConfidence();
    final DataColumnSpec[] specs;
    if (addConfidence) {
        specs = new DataColumnSpec[] { new DataColumnSpecCreator(DataTableSpec.getUniqueColumnName(ret.createSpec(), confidenceColumnName), DoubleCell.TYPE).createSpec(), colSpec };
    } else {
        specs = new DataColumnSpec[] { colSpec };
    }
    final int oldColumnIndex = replaceColumn ? ret.indexOf(outputColumnName) : -1;
    ret.append(new AbstractCellFactory(processConcurrently, specs) {

        private final List<String> m_values;

        {
            Map<String, List<String>> dd = translator.getDataDictionary();
            m_values = dd.get(targetCols.get(0).getName());
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public DataCell[] getCells(final DataRow row) {
            // See http://www.dmg.org/v4-1/RuleSet.html#Rule
            switch(ruleSelectionMethod.getCriterion().intValue()) {
                case RuleSelectionMethod.Criterion.INT_FIRST_HIT:
                    {
                        Pair<DataCell, Double> resultAndConfidence = selectFirstHit(row);
                        return toCells(resultAndConfidence);
                    }
                case RuleSelectionMethod.Criterion.INT_WEIGHTED_MAX:
                    {
                        Pair<DataCell, Double> resultAndConfidence = selectWeightedMax(row);
                        return toCells(resultAndConfidence);
                    }
                case RuleSelectionMethod.Criterion.INT_WEIGHTED_SUM:
                    {
                        Pair<DataCell, Double> resultAndConfidence = selectWeightedSum(row);
                        return toCells(resultAndConfidence);
                    }
                default:
                    throw new UnsupportedOperationException(ruleSelectionMethod.getCriterion().toString());
            }
        }

        /**
         * Converts the pair to a {@link DataCell} array.
         *
         * @param resultAndConfidence The {@link Pair}.
         * @return The result and possibly the confidence.
         */
        private DataCell[] toCells(final Pair<DataCell, Double> resultAndConfidence) {
            if (!addConfidence) {
                return new DataCell[] { resultAndConfidence.getFirst() };
            }
            if (resultAndConfidence.getSecond() == null) {
                return new DataCell[] { DataType.getMissingCell(), resultAndConfidence.getFirst() };
            }
            return new DataCell[] { new DoubleCell(resultAndConfidence.getSecond()), resultAndConfidence.getFirst() };
        }

        /**
         * Computes the result and the confidence using the weighted sum method.
         *
         * @param row A {@link DataRow}
         * @return The result and the confidence.
         */
        private Pair<DataCell, Double> selectWeightedSum(final DataRow row) {
            final Map<String, Double> scoreToSumWeight = new LinkedHashMap<String, Double>();
            for (String val : m_values) {
                scoreToSumWeight.put(val, 0.0);
            }
            int matchedRuleCount = 0;
            for (final PMMLRuleTranslator.Rule rule : rules) {
                if (rule.getCondition().evaluate(row, spec) == Boolean.TRUE) {
                    ++matchedRuleCount;
                    Double sumWeight = scoreToSumWeight.get(rule.getOutcome());
                    if (sumWeight == null) {
                        throw new IllegalStateException("The score value: " + rule.getOutcome() + " is not in the data dictionary.");
                    }
                    final Double wRaw = rule.getWeight();
                    final double w = wRaw == null ? 0.0 : wRaw.doubleValue();
                    scoreToSumWeight.put(rule.getOutcome(), sumWeight + w);
                }
            }
            double maxSumWeight = Double.NEGATIVE_INFINITY;
            String bestScore = null;
            for (Entry<String, Double> entry : scoreToSumWeight.entrySet()) {
                final double d = entry.getValue().doubleValue();
                if (d > maxSumWeight) {
                    maxSumWeight = d;
                    bestScore = entry.getKey();
                }
            }
            if (bestScore == null || matchedRuleCount == 0) {
                return pair(result(defaultScore), defaultConfidence);
            }
            return pair(result(bestScore), maxSumWeight / matchedRuleCount);
        }

        /**
         * Helper method to create {@link Pair}s.
         *
         * @param f The first element.
         * @param s The second element.
         * @return The new pair.
         */
        private <F, S> Pair<F, S> pair(final F f, final S s) {
            return new Pair<F, S>(f, s);
        }

        /**
         * Computes the result and the confidence using the weighted max method.
         *
         * @param row A {@link DataRow}
         * @return The result and the confidence.
         */
        private Pair<DataCell, Double> selectWeightedMax(final DataRow row) {
            double maxWeight = Double.NEGATIVE_INFINITY;
            PMMLRuleTranslator.Rule bestRule = null;
            for (final PMMLRuleTranslator.Rule rule : rules) {
                if (rule.getCondition().evaluate(row, spec) == Boolean.TRUE) {
                    if (rule.getWeight() > maxWeight) {
                        maxWeight = rule.getWeight();
                        bestRule = rule;
                    }
                }
            }
            if (bestRule == null) {
                return pair(result(defaultScore), defaultConfidence);
            }
            bestRule.setRecordCount(bestRule.getRecordCount() + 1);
            DataCell result = result(bestRule);
            if (validationColumnIdx >= 0) {
                if (row.getCell(validationColumnIdx).equals(result)) {
                    bestRule.setNbCorrect(bestRule.getNbCorrect() + 1);
                }
            }
            Double confidence = bestRule.getConfidence();
            return pair(result, confidence == null ? defaultConfidence : confidence);
        }

        /**
         * Selects the outcome of the rule and converts it to the proper outcome type.
         *
         * @param rule A {@link Rule}.
         * @return The {@link DataCell} representing the result. (May be missing.)
         */
        private DataCell result(final PMMLRuleTranslator.Rule rule) {
            String outcome = rule.getOutcome();
            return result(outcome);
        }

        /**
         * Constructs the {@link DataCell} from its {@link String} representation ({@code outcome}) and its type.
         *
         * @param dataType The expected {@link DataType}
         * @param outcome The {@link String} representation.
         * @return The {@link DataCell}.
         */
        private DataCell result(final String outcome) {
            if (outcome == null) {
                return DataType.getMissingCell();
            }
            try {
                if (dataType.isCompatible(BooleanValue.class)) {
                    return BooleanCellFactory.create(outcome);
                }
                if (IntCell.TYPE.isASuperTypeOf(dataType)) {
                    return new IntCell(Integer.parseInt(outcome));
                }
                if (LongCell.TYPE.isASuperTypeOf(dataType)) {
                    return new LongCell(Long.parseLong(outcome));
                }
                if (DoubleCell.TYPE.isASuperTypeOf(dataType)) {
                    return new DoubleCell(Double.parseDouble(outcome));
                }
                return new StringCell(outcome);
            } catch (NumberFormatException e) {
                return new MissingCell(outcome + "\n" + e.getMessage());
            }
        }

        /**
         * Selects the first rule that matches and computes the confidence and result for the {@code row}.
         *
         * @param row A {@link DataRow}.
         * @return The result and the confidence.
         */
        private Pair<DataCell, Double> selectFirstHit(final DataRow row) {
            for (final PMMLRuleTranslator.Rule rule : rules) {
                Boolean eval = rule.getCondition().evaluate(row, spec);
                if (eval == Boolean.TRUE) {
                    rule.setRecordCount(rule.getRecordCount() + 1);
                    DataCell result = result(rule);
                    if (validationColumnIdx >= 0) {
                        if (row.getCell(validationColumnIdx).equals(result)) {
                            rule.setNbCorrect(rule.getNbCorrect() + 1);
                        }
                    }
                    Double confidence = rule.getConfidence();
                    return pair(result, confidence == null ? defaultConfidence : confidence);
                }
            }
            return pair(result(defaultScore), defaultConfidence);
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public void afterProcessing() {
            super.afterProcessing();
            obj.getPMMLValue();
            RuleSetModel ruleSet = translator.getOriginalRuleSetModel();
            assert rules.size() == ruleSet.getRuleSet().getSimpleRuleList().size() + ruleSet.getRuleSet().getCompoundRuleList().size();
            if (ruleSet.getRuleSet().getSimpleRuleList().size() == rules.size()) {
                for (int i = 0; i < rules.size(); ++i) {
                    Rule rule = rules.get(i);
                    final SimpleRule simpleRuleArray = ruleSet.getRuleSet().getSimpleRuleArray(i);
                    synchronized (simpleRuleArray) /*synchronized fixes AP-6766 */
                    {
                        simpleRuleArray.setRecordCount(rule.getRecordCount());
                        if (validationColumnIdx >= 0) {
                            simpleRuleArray.setNbCorrect(rule.getNbCorrect());
                        } else if (simpleRuleArray.isSetNbCorrect()) {
                            simpleRuleArray.unsetNbCorrect();
                        }
                    }
                }
            }
        }
    });
    if (replaceColumn) {
        ret.remove(outputColumnName);
        ret.move(ret.getColumnCount() - 1 - (addConfidence ? 1 : 0), oldColumnIndex);
    }
    return ret;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) RuleSetModel(org.dmg.pmml.RuleSetModelDocument.RuleSetModel) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) Node(org.w3c.dom.Node) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) Entry(java.util.Map.Entry) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) BooleanValue(org.knime.core.data.BooleanValue) DataType(org.knime.core.data.DataType) SettingsModelBoolean(org.knime.core.node.defaultnodesettings.SettingsModelBoolean) Pair(org.knime.core.util.Pair) AbstractCellFactory(org.knime.core.data.container.AbstractCellFactory) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) RuleSelectionMethod(org.dmg.pmml.RuleSelectionMethodDocument.RuleSelectionMethod) Rule(org.knime.base.node.rules.engine.pmml.PMMLRuleTranslator.Rule) LongCell(org.knime.core.data.def.LongCell) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringCell(org.knime.core.data.def.StringCell) MissingCell(org.knime.core.data.MissingCell) DataCell(org.knime.core.data.DataCell) SimpleRule(org.dmg.pmml.SimpleRuleDocument.SimpleRule) Rule(org.knime.base.node.rules.engine.pmml.PMMLRuleTranslator.Rule) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap)

Example 24 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class NumericOutliersReviser method replaceOutliers.

/**
 * Replaces outliers found in the row input according to the selected replacement option. Additionally, the outlier
 * replacement counts and new domains are calculated.
 *
 * @param exec the execution context
 * @param in the row input whose outliers have to be treated
 * @param out the row output whose outliers have been treated
 * @param outlierModel the model storing the permitted intervals
 * @param memberCounter the member counter
 * @param outlierRepCounter the outlier replacement counter
 * @param missingGroupsCounter the missing groups counter
 * @throws Exception any exception to indicate an error, cancelation
 */
private void replaceOutliers(final ExecutionContext exec, final RowInput in, final RowOutput out, final NumericOutliersModel outlierModel, final MemberCounter memberCounter, final MemberCounter outlierRepCounter, final MemberCounter missingGroupsCounter) throws Exception {
    // total number of outlier columns
    final int noOutliers = m_outlierColNames.length;
    // the in table spec
    final DataTableSpec inSpec = in.getDataTableSpec();
    // create column re-arranger to overwrite cells corresponding to outliers
    final ColumnRearranger colRearranger = new ColumnRearranger(inSpec);
    // store the positions where the outlier column names can be found in the input table
    final int[] outlierIndices = calculateOutlierIndicies(inSpec);
    final DataColumnSpec[] outlierSpecs = new DataColumnSpec[noOutliers];
    for (int i = 0; i < noOutliers; i++) {
        outlierSpecs[i] = inSpec.getColumnSpec(outlierIndices[i]);
    }
    // values are copied anyways by the re-arranger so there is no need to
    // create new instances for each row
    final DataCell[] treatedVals = new DataCell[noOutliers];
    final AbstractCellFactory fac = new AbstractCellFactory(true, outlierSpecs) {

        @Override
        public DataCell[] getCells(final DataRow row) {
            final GroupKey key = outlierModel.getKey(row, inSpec);
            final Map<String, double[]> colsMap = outlierModel.getGroupIntervals(key);
            for (int i = 0; i < noOutliers; i++) {
                final DataCell curCell = row.getCell(outlierIndices[i]);
                final DataCell treatedCell;
                final String outlierColName = m_outlierColNames[i];
                if (!curCell.isMissing()) {
                    // if the key exists treat the value otherwise we process an unkown group
                    if (colsMap != null) {
                        // increment the member counter
                        memberCounter.incrementMemberCount(outlierColName, key);
                        // treat the value of the cell if its a outlier
                        treatedCell = treatCellValue(colsMap.get(outlierColName), curCell);
                    } else {
                        missingGroupsCounter.incrementMemberCount(outlierColName, key);
                        treatedCell = curCell;
                    }
                } else {
                    treatedCell = curCell;
                }
                // if we changed the value this is an outlier
                if (!treatedCell.equals(curCell)) {
                    outlierRepCounter.incrementMemberCount(outlierColName, key);
                }
                // update the domain if necessary
                if (m_updateDomain && !treatedCell.isMissing()) {
                    m_domainUpdater.updateDomain(outlierColName, ((DoubleValue) treatedCell).getDoubleValue());
                }
                treatedVals[i] = treatedCell;
            }
            return treatedVals;
        }
    };
    // replace the outlier columns by their updated versions
    colRearranger.replace(fac, outlierIndices);
    // stream it
    colRearranger.createStreamableFunction().runFinal(new PortInput[] { in }, new PortOutput[] { out }, exec);
    exec.setProgress(1);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) AbstractCellFactory(org.knime.core.data.container.AbstractCellFactory) GroupKey(org.knime.base.node.preproc.groupby.GroupKey) DataRow(org.knime.core.data.DataRow) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataCell(org.knime.core.data.DataCell)

Example 25 with DataRow

use of org.knime.core.data.DataRow in project knime-core by knime.

the class HistogramColumn method createColumnRearranger.

/**
 * Creates the rearranger that adds the histograms.
 *
 * @param data The input data table that contains the columns referred by {@code histograms} keys.
 * @param stats The statistics table to be adjusted.
 * @param histograms The histograms.
 * @param columns The columns to be described.
 * @return The {@link ColumnRearranger}.
 */
ColumnRearranger createColumnRearranger(final BufferedDataTable data, final BufferedDataTable stats, final Map<Integer, HistogramNumericModel> histograms, final int maxBinCount, final String... columns) {
    ColumnRearranger rearranger = new ColumnRearranger(stats.getDataTableSpec());
    final DataColumnSpec spec = createHistogramColumnSpec();
    rearranger.append(new SingleCellFactory(true, spec) {

        String[] m_sortedColumns = columns.clone();

        {
            Arrays.sort(m_sortedColumns);
        }

        @Override
        public DataCell getCell(final DataRow row) {
            if (Arrays.binarySearch(m_sortedColumns, row.getKey().getString()) < 0) {
                return DataType.getMissingCell();
            }
            final int columnIndex = data.getSpec().findColumnIndex(row.getKey().getString());
            final HistogramNumericModel histogramData = histograms.get(Integer.valueOf(columnIndex));
            if (histogramData == null) {
                // Wrong bounds
                return DataType.getMissingCell();
            }
            assert columnIndex == histogramData.getColIndex() : "Expected: " + columnIndex + ", but got: " + histogramData.getColIndex();
            return createImageCell(histogramData, false);
        }
    });
    return rearranger;
}
Also used : ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataCell(org.knime.core.data.DataCell) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) DataRow(org.knime.core.data.DataRow)

Aggregations

DataRow (org.knime.core.data.DataRow)482 DataCell (org.knime.core.data.DataCell)268 DataTableSpec (org.knime.core.data.DataTableSpec)159 BufferedDataTable (org.knime.core.node.BufferedDataTable)125 DataColumnSpec (org.knime.core.data.DataColumnSpec)109 RowKey (org.knime.core.data.RowKey)88 DefaultRow (org.knime.core.data.def.DefaultRow)88 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)80 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)76 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)73 DoubleValue (org.knime.core.data.DoubleValue)72 ArrayList (java.util.ArrayList)65 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)65 RowIterator (org.knime.core.data.RowIterator)62 DataType (org.knime.core.data.DataType)61 DoubleCell (org.knime.core.data.def.DoubleCell)57 StringCell (org.knime.core.data.def.StringCell)53 SingleCellFactory (org.knime.core.data.container.SingleCellFactory)48 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)44 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)43