Search in sources :

Example 6 with DecisionTreeNodeLeaf

use of org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeLeaf in project knime-core by knime.

the class PMMLDecisionTreeTranslator method addKnimeTreeNode.

private DecisionTreeNode addKnimeTreeNode(final Node pmmlNode) {
    Node[] pmmlChildrenNode = pmmlNode.getNodeArray();
    // TODO Handle the case that the id from PMML might not be an integer.
    String nodeId = pmmlNode.getId();
    int id;
    try {
        id = Integer.parseInt(nodeId);
    } catch (NumberFormatException e) {
        throw new IllegalArgumentException("Only numeric node ids are supported in KNIME. Found \"" + nodeId + "\".");
    }
    if (pmmlChildrenNode.length == 0) {
        DecisionTreeNodeLeaf knimeLeaf = new DecisionTreeNodeLeaf(id, getMajorityClass(pmmlNode), getClassCount(pmmlNode));
        return knimeLeaf;
    } else {
        PMMLPredicate[] pmmlPredicates = new PMMLPredicate[pmmlChildrenNode.length];
        DecisionTreeNode[] children = new DecisionTreeNode[pmmlChildrenNode.length];
        for (int i = 0; i < pmmlChildrenNode.length; i++) {
            children[i] = addKnimeTreeNode(pmmlChildrenNode[i]);
            pmmlPredicates[i] = getPredicate(pmmlChildrenNode[i]);
        }
        DecisionTreeNodeSplitPMML knimeNode;
        if (pmmlNode.isSetDefaultChild()) {
            String defaultChild = pmmlNode.getDefaultChild();
            Integer knimeDefaultChildIndex;
            try {
                knimeDefaultChildIndex = Integer.parseInt(defaultChild);
            } catch (NumberFormatException e) {
                throw new IllegalArgumentException("Only numeric node ids are supported in KNIME. " + "Found \"" + defaultChild + "\" as defaultChild.");
            }
            knimeNode = new DecisionTreeNodeSplitPMML(id, getMajorityClass(pmmlNode), getClassCount(pmmlNode), getChildrenSplitAttribute(pmmlNode), pmmlPredicates, children, knimeDefaultChildIndex);
        } else {
            knimeNode = new DecisionTreeNodeSplitPMML(id, getMajorityClass(pmmlNode), getClassCount(pmmlNode), getChildrenSplitAttribute(pmmlNode), pmmlPredicates, children);
        }
        return knimeNode;
    }
}
Also used : Node(org.dmg.pmml.NodeDocument.Node) DecisionTreeNode(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNode) BigInteger(java.math.BigInteger) DecisionTreeNodeLeaf(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeLeaf) DecisionTreeNodeSplitPMML(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeSplitPMML) DecisionTreeNode(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNode)

Example 7 with DecisionTreeNodeLeaf

use of org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeLeaf in project knime-core by knime.

the class DecisionTreeLearnerNodeModel2 method buildTree.

/**
 * Recursively induces the decision tree.
 *
 * @param table the {@link InMemoryTable} representing the data for this
 *            node to determine the split and after that perform
 *            partitioning
 * @param exec the execution context for progress information
 * @param depth the current recursion depth
 */
private DecisionTreeNode buildTree(final InMemoryTable table, final ExecutionContext exec, final int depth, final SplitQualityMeasure splitQualityMeasure, final ParallelProcessing parallelProcessing, final int firstSplitCol) throws CanceledExecutionException, IllegalAccessException {
    exec.checkCanceled();
    // derive this node's id from the counter
    int nodeId = m_counter.getAndIncrement();
    DataCell majorityClass = table.getMajorityClassAsCell();
    LinkedHashMap<DataCell, Double> frequencies = table.getClassFrequencies();
    // if the distribution allows for a leaf
    if (table.isPureEnough()) {
        // free memory
        table.freeUnderlyingDataRows();
        double value = m_finishedCounter.incrementAndGet(table.getSumOfWeights());
        exec.setProgress(value / m_alloverRowCount, "Created node with id " + nodeId + " at level " + depth);
        return new DecisionTreeNodeLeaf(nodeId, majorityClass, frequencies);
    } else {
        Split split = null;
        // find best split in specified column for first split
        if (depth == 0 && m_useFirstSplitCol.getBooleanValue()) {
            if (table.isNominal(firstSplitCol)) {
                if (m_binaryNominalSplitMode.getBooleanValue()) {
                    split = new SplitNominalBinary(table, firstSplitCol, splitQualityMeasure, m_minNumberRecordsPerNode.getIntValue(), m_maxNumNominalsForCompleteComputation.getIntValue());
                } else {
                    split = new SplitNominalNormal(table, firstSplitCol, splitQualityMeasure, m_minNumberRecordsPerNode.getIntValue());
                }
            } else {
                split = new SplitContinuous(table, firstSplitCol, splitQualityMeasure, m_averageSplitpoint.getBooleanValue(), m_minNumberRecordsPerNode.getIntValue());
            }
            if (Double.isNaN(split.getBestQualityMeasure()) || split.getBestQualityMeasure() == 0.0) {
                m_warningMessageSb.append("The specified root split column \"").append(split.getSplitAttributeName()).append("\" does not contain a valid split.");
            }
        }
        if (split == null) {
            // no root split column found or selected
            // find the best splits for all attributes
            SplitFinder splittFinder = new SplitFinder(table, splitQualityMeasure, m_averageSplitpoint.getBooleanValue(), m_minNumberRecordsPerNode.getIntValue(), m_binaryNominalSplitMode.getBooleanValue(), m_maxNumNominalsForCompleteComputation.getIntValue());
            // check for enough memory
            checkMemory();
            // get the best split among the best attribute splits
            split = splittFinder.getSplit();
        }
        // if no best split could be evaluated, create a leaf node
        if (split == null || !split.isValidSplit()) {
            table.freeUnderlyingDataRows();
            double value = m_finishedCounter.incrementAndGet(table.getSumOfWeights());
            exec.setProgress(value / m_alloverRowCount, "Created node with id " + nodeId + " at level " + depth);
            return new DecisionTreeNodeLeaf(nodeId, majorityClass, frequencies);
        }
        // partition the attribute lists according to this split
        Partitioner partitioner = new Partitioner(table, split, m_minNumberRecordsPerNode.getIntValue());
        if (!partitioner.couldBeUsefulPartitioned()) {
            table.freeUnderlyingDataRows();
            double value = m_finishedCounter.incrementAndGet(table.getSumOfWeights());
            exec.setProgress(value / m_alloverRowCount, "Created node with id " + nodeId + " at level " + depth);
            return new DecisionTreeNodeLeaf(nodeId, majorityClass, frequencies);
        }
        // get the just created partitions
        InMemoryTable[] partitionTables = partitioner.getPartitionTables();
        // recursively build the  child nodes
        DecisionTreeNode[] children = new DecisionTreeNode[partitionTables.length];
        ArrayList<ParallelBuilding> threads = new ArrayList<ParallelBuilding>();
        int i = 0;
        for (InMemoryTable partitionTable : partitionTables) {
            exec.checkCanceled();
            if (partitionTable.getNumberDataRows() * m_numberAttributes < 10000 || !parallelProcessing.isThreadAvailable()) {
                children[i] = buildTree(partitionTable, exec, depth + 1, splitQualityMeasure, parallelProcessing, firstSplitCol);
            } else {
                String threadName = "Build thread, node: " + nodeId + "." + i;
                ParallelBuilding buildThread = new ParallelBuilding(threadName, partitionTable, exec, depth + 1, i, splitQualityMeasure, parallelProcessing);
                LOGGER.debug("Start new parallel building thread: " + threadName);
                threads.add(buildThread);
                buildThread.start();
            }
            i++;
        }
        // already assigned to the child array
        for (ParallelBuilding buildThread : threads) {
            children[buildThread.getThreadIndex()] = buildThread.getResultNode();
            exec.checkCanceled();
            if (buildThread.getException() != null) {
                for (ParallelBuilding buildThread2 : threads) {
                    buildThread2.stop();
                }
                throw new RuntimeException(buildThread.getException().getMessage());
            }
        }
        threads.clear();
        if (split instanceof SplitContinuous) {
            double splitValue = ((SplitContinuous) split).getBestSplitValue();
            // return new DecisionTreeNodeSplitContinuous(nodeId,
            // majorityClass, frequencies, split
            // .getSplitAttributeName(), children, splitValue);
            String splitAttribute = split.getSplitAttributeName();
            PMMLPredicate[] splitPredicates = new PMMLPredicate[] { new PMMLSimplePredicate(splitAttribute, PMMLOperator.LESS_OR_EQUAL, Double.toString(splitValue)), new PMMLSimplePredicate(splitAttribute, PMMLOperator.GREATER_THAN, Double.toString(splitValue)) };
            return new DecisionTreeNodeSplitPMML(nodeId, majorityClass, frequencies, splitAttribute, splitPredicates, children);
        } else if (split instanceof SplitNominalNormal) {
            // else the attribute is nominal
            DataCell[] splitValues = ((SplitNominalNormal) split).getSplitValues();
            // return new DecisionTreeNodeSplitNominal(nodeId, majorityClass,
            // frequencies, split.getSplitAttributeName(),
            // splitValues, children);
            int num = children.length;
            PMMLPredicate[] splitPredicates = new PMMLPredicate[num];
            String splitAttribute = split.getSplitAttributeName();
            for (int j = 0; j < num; j++) {
                splitPredicates[j] = new PMMLSimplePredicate(splitAttribute, PMMLOperator.EQUAL, splitValues[j].toString());
            }
            return new DecisionTreeNodeSplitPMML(nodeId, majorityClass, frequencies, splitAttribute, splitPredicates, children);
        } else {
            // binary nominal
            SplitNominalBinary splitNominalBinary = (SplitNominalBinary) split;
            DataCell[] splitValues = splitNominalBinary.getSplitValues();
            // return new DecisionTreeNodeSplitNominalBinary(nodeId,
            // majorityClass, frequencies, split
            // .getSplitAttributeName(), splitValues,
            // splitNominalBinary.getIntMappingsLeftPartition(),
            // splitNominalBinary.getIntMappingsRightPartition(),
            // children/* children[0]=left, ..[1] right */);
            String splitAttribute = split.getSplitAttributeName();
            int[][] indices = new int[][] { splitNominalBinary.getIntMappingsLeftPartition(), splitNominalBinary.getIntMappingsRightPartition() };
            PMMLPredicate[] splitPredicates = new PMMLPredicate[2];
            for (int j = 0; j < splitPredicates.length; j++) {
                PMMLSimpleSetPredicate pred = null;
                pred = new PMMLSimpleSetPredicate(splitAttribute, PMMLSetOperator.IS_IN);
                pred.setArrayType(PMMLArrayType.STRING);
                LinkedHashSet<String> values = new LinkedHashSet<String>();
                for (int index : indices[j]) {
                    values.add(splitValues[index].toString());
                }
                pred.setValues(values);
                splitPredicates[j] = pred;
            }
            return new DecisionTreeNodeSplitPMML(nodeId, majorityClass, frequencies, splitAttribute, splitPredicates, children);
        }
    }
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ArrayList(java.util.ArrayList) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DecisionTreeNodeSplitPMML(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeSplitPMML) PMMLSimpleSetPredicate(org.knime.base.node.mine.decisiontree2.PMMLSimpleSetPredicate) PMMLSimplePredicate(org.knime.base.node.mine.decisiontree2.PMMLSimplePredicate) PMMLPredicate(org.knime.base.node.mine.decisiontree2.PMMLPredicate) DecisionTreeNodeLeaf(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeLeaf) DataCell(org.knime.core.data.DataCell) DecisionTreeNode(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNode)

Example 8 with DecisionTreeNodeLeaf

use of org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeLeaf in project knime-core by knime.

the class Pruner method trainingErrorPruningRecurse.

/**
 * The recursion for the training error based pruning.
 *
 * @param node the node to prune
 *
 * @return the resulting error; this value is
 *         used in higher levels of the recursion, i.e. for the parent node
 */
private static PruningResult trainingErrorPruningRecurse(final DecisionTreeNode node) {
    // if this is a child, just return the error rate
    if (node.isLeaf()) {
        double error = node.getEntireClassCount() - node.getOwnClassCount();
        return new PruningResult(error, node);
    }
    // holds the error rates of the children
    double[] childErrorRates = new double[node.getChildCount()];
    // this node must be a split node
    DecisionTreeNodeSplit splitNode = (DecisionTreeNodeSplit) node;
    // prune all children
    DecisionTreeNode[] children = splitNode.getChildren();
    int count = 0;
    for (DecisionTreeNode childNode : children) {
        PruningResult result = trainingErrorPruningRecurse(childNode);
        childErrorRates[count] = result.getQualityValue();
        // replace the child with the one from the result (could of course
        // be the same)
        splitNode.replaceChild(childNode, result.getNode());
        count++;
    }
    // calculate the error if this would be a leaf
    double leafError = node.getEntireClassCount() - node.getOwnClassCount();
    // calculate the current error including the children
    double currentError = 0.0;
    for (double childError : childErrorRates) {
        currentError += childError;
    }
    // define the return node
    DecisionTreeNode returnNode = node;
    double returnError = currentError;
    // with a leaf
    if (leafError - 0.001 <= currentError) {
        DecisionTreeNodeLeaf newLeaf = new DecisionTreeNodeLeaf(node.getOwnIndex(), node.getMajorityClass(), node.getClassCounts());
        newLeaf.setParent(node.getParent());
        newLeaf.setPrefix(node.getPrefix());
        returnNode = newLeaf;
        returnError = leafError;
    }
    return new PruningResult(returnError, returnNode);
}
Also used : DecisionTreeNodeSplit(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeSplit) DecisionTreeNodeLeaf(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeLeaf) DecisionTreeNode(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNode)

Example 9 with DecisionTreeNodeLeaf

use of org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeLeaf in project knime-core by knime.

the class TreeNodeClassification method createDecisionTreeNode.

/**
 * Creates DecisionTreeNode model that is used in Decision Tree of KNIME
 *
 * @param idGenerator
 * @param metaData
 * @return a DecisionTreeNode
 */
public DecisionTreeNode createDecisionTreeNode(final MutableInteger idGenerator, final TreeMetaData metaData) {
    DataCell majorityCell = new StringCell(getMajorityClassName());
    double[] targetDistribution = getTargetDistribution();
    int initSize = (int) (targetDistribution.length / 0.75 + 1.0);
    LinkedHashMap<DataCell, Double> scoreDistributionMap = new LinkedHashMap<DataCell, Double>(initSize);
    NominalValueRepresentation[] targets = getTargetMetaData().getValues();
    for (int i = 0; i < targetDistribution.length; i++) {
        String cl = targets[i].getNominalValue();
        double d = targetDistribution[i];
        scoreDistributionMap.put(new StringCell(cl), d);
    }
    final int nrChildren = getNrChildren();
    if (nrChildren == 0) {
        return new DecisionTreeNodeLeaf(idGenerator.inc(), majorityCell, scoreDistributionMap);
    } else {
        int id = idGenerator.inc();
        DecisionTreeNode[] childNodes = new DecisionTreeNode[nrChildren];
        int splitAttributeIndex = getSplitAttributeIndex();
        assert splitAttributeIndex >= 0 : "non-leaf node has no split";
        String splitAttribute = metaData.getAttributeMetaData(splitAttributeIndex).getAttributeName();
        PMMLPredicate[] childPredicates = new PMMLPredicate[nrChildren];
        for (int i = 0; i < nrChildren; i++) {
            final TreeNodeClassification treeNode = getChild(i);
            TreeNodeCondition cond = treeNode.getCondition();
            childPredicates[i] = cond.toPMMLPredicate();
            childNodes[i] = treeNode.createDecisionTreeNode(idGenerator, metaData);
        }
        return new DecisionTreeNodeSplitPMML(id, majorityCell, scoreDistributionMap, splitAttribute, childPredicates, childNodes);
    }
}
Also used : NominalValueRepresentation(org.knime.base.node.mine.treeensemble.data.NominalValueRepresentation) PMMLPredicate(org.knime.base.node.mine.decisiontree2.PMMLPredicate) LinkedHashMap(java.util.LinkedHashMap) DecisionTreeNodeLeaf(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeLeaf) StringCell(org.knime.core.data.def.StringCell) DecisionTreeNodeSplitPMML(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeSplitPMML) DataCell(org.knime.core.data.DataCell) DecisionTreeNode(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNode)

Example 10 with DecisionTreeNodeLeaf

use of org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeLeaf in project knime-core by knime.

the class TreeNodeClassification method createDecisionTreeNode.

/**
 * Creates DecisionTreeNode model that is used in Decision Tree of KNIME
 *
 * @param idGenerator
 * @param metaData
 * @return a DecisionTreeNode
 */
public DecisionTreeNode createDecisionTreeNode(final MutableInteger idGenerator, final TreeMetaData metaData) {
    DataCell majorityCell = new StringCell(getMajorityClassName());
    final float[] targetDistribution = getTargetDistribution();
    int initSize = (int) (targetDistribution.length / 0.75 + 1.0);
    LinkedHashMap<DataCell, Double> scoreDistributionMap = new LinkedHashMap<DataCell, Double>(initSize);
    NominalValueRepresentation[] targets = getTargetMetaData().getValues();
    for (int i = 0; i < targetDistribution.length; i++) {
        String cl = targets[i].getNominalValue();
        double d = targetDistribution[i];
        scoreDistributionMap.put(new StringCell(cl), d);
    }
    final int nrChildren = getNrChildren();
    if (nrChildren == 0) {
        return new DecisionTreeNodeLeaf(idGenerator.inc(), majorityCell, scoreDistributionMap);
    } else {
        int id = idGenerator.inc();
        DecisionTreeNode[] childNodes = new DecisionTreeNode[nrChildren];
        int splitAttributeIndex = getSplitAttributeIndex();
        assert splitAttributeIndex >= 0 : "non-leaf node has no split";
        String splitAttribute = metaData.getAttributeMetaData(splitAttributeIndex).getAttributeName();
        PMMLPredicate[] childPredicates = new PMMLPredicate[nrChildren];
        for (int i = 0; i < nrChildren; i++) {
            final TreeNodeClassification treeNode = getChild(i);
            TreeNodeCondition cond = treeNode.getCondition();
            childPredicates[i] = cond.toPMMLPredicate();
            childNodes[i] = treeNode.createDecisionTreeNode(idGenerator, metaData);
        }
        return new DecisionTreeNodeSplitPMML(id, majorityCell, scoreDistributionMap, splitAttribute, childPredicates, childNodes);
    }
}
Also used : NominalValueRepresentation(org.knime.base.node.mine.treeensemble2.data.NominalValueRepresentation) PMMLPredicate(org.knime.base.node.mine.decisiontree2.PMMLPredicate) LinkedHashMap(java.util.LinkedHashMap) DecisionTreeNodeLeaf(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeLeaf) StringCell(org.knime.core.data.def.StringCell) DecisionTreeNodeSplitPMML(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeSplitPMML) DataCell(org.knime.core.data.DataCell) DecisionTreeNode(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNode)

Aggregations

DecisionTreeNode (org.knime.base.node.mine.decisiontree2.model.DecisionTreeNode)12 DecisionTreeNodeLeaf (org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeLeaf)12 DecisionTreeNodeSplitPMML (org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeSplitPMML)8 DataCell (org.knime.core.data.DataCell)7 PMMLPredicate (org.knime.base.node.mine.decisiontree2.PMMLPredicate)6 LinkedHashMap (java.util.LinkedHashMap)4 DecisionTreeNodeSplit (org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeSplit)4 StringCell (org.knime.core.data.def.StringCell)4 BigInteger (java.math.BigInteger)2 ArrayList (java.util.ArrayList)2 LinkedHashSet (java.util.LinkedHashSet)2 PMMLSimplePredicate (org.knime.base.node.mine.decisiontree2.PMMLSimplePredicate)2 PMMLSimpleSetPredicate (org.knime.base.node.mine.decisiontree2.PMMLSimpleSetPredicate)2 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)2 Entry (java.util.Map.Entry)1 XmlCursor (org.apache.xmlbeans.XmlCursor)1 ArrayType (org.dmg.pmml.ArrayType)1 Node (org.dmg.pmml.NodeDocument.Node)1 ScoreDistribution (org.dmg.pmml.ScoreDistributionDocument.ScoreDistribution)1 SimplePredicate (org.dmg.pmml.SimplePredicateDocument.SimplePredicate)1