Search in sources :

Example 6 with ColumnMemberships

use of org.knime.base.node.mine.treeensemble2.data.memberships.ColumnMemberships in project knime-core by knime.

the class TreeNominalColumnData method updateChildMembershipsBinary.

private BitSet updateChildMembershipsBinary(final TreeNodeNominalBinaryCondition childBinaryCondition, final DataMemberships parentMemberships) {
    ColumnMemberships columnMemberships = parentMemberships.getColumnMemberships(getMetaData().getAttributeIndex());
    columnMemberships.reset();
    BitSet inChild = new BitSet(columnMemberships.size());
    // TODO Check if this can be done more efficiently
    NominalValueRepresentation[] reps = getMetaData().getValues();
    int start = 0;
    boolean reachedEnd = false;
    final int lengthNonMissing = containsMissingValues() ? reps.length - 1 : reps.length;
    for (int att = 0; att < lengthNonMissing; att++) {
        if (childBinaryCondition.testCondition(att)) {
            // move columnMemberships to correct position
            if (!columnMemberships.nextIndexFrom(start)) {
                // reached end of columnMemberships
                break;
            }
            int end = start + m_nominalValueCounts[att];
            for (int index = columnMemberships.getIndexInColumn(); index < end; index = columnMemberships.getIndexInColumn()) {
                inChild.set(columnMemberships.getIndexInDataMemberships());
                if (!columnMemberships.next()) {
                    reachedEnd = true;
                    break;
                }
            }
        }
        start += m_nominalValueCounts[att];
    }
    if (!reachedEnd && containsMissingValues() && childBinaryCondition.acceptsMissings()) {
        if (columnMemberships.nextIndexFrom(start)) {
            do {
                inChild.set(columnMemberships.getIndexInDataMemberships());
            } while (columnMemberships.next());
        }
    }
    return inChild;
}
Also used : BitSet(java.util.BitSet) ColumnMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.ColumnMemberships)

Example 7 with ColumnMemberships

use of org.knime.base.node.mine.treeensemble2.data.memberships.ColumnMemberships in project knime-core by knime.

the class TreeNominalColumnData method calcBestSplitRegressionBinary.

private NominalBinarySplitCandidate calcBestSplitRegressionBinary(final ColumnMemberships columnMemberships, final RegressionPriors targetPriors, final TreeTargetNumericColumnData targetColumn, final NominalValueRepresentation[] nomVals, final RandomData rd) {
    final int minChildSize = getConfiguration().getMinChildSize();
    final double ySumTotal = targetPriors.getYSum();
    final double nrRecordsTotal = targetPriors.getNrRecords();
    final double criterionTotal = ySumTotal * ySumTotal / nrRecordsTotal;
    final double[] ySums = new double[nomVals.length];
    final double[] sumWeightsAttributes = new double[nomVals.length];
    columnMemberships.next();
    int start = 0;
    for (int att = 0; att < nomVals.length; att++) {
        int end = start + m_nominalValueCounts[att];
        double weightSum = 0.0;
        double ySum = 0.0;
        boolean reachedEnd = false;
        for (int index = columnMemberships.getIndexInColumn(); index < end; index = columnMemberships.getIndexInColumn()) {
            final double weight = columnMemberships.getRowWeight();
            assert weight > EPSILON : "Instances in columnMemberships must have weights larger than EPSILON.";
            ySum += weight * targetColumn.getValueFor(columnMemberships.getOriginalIndex());
            weightSum += weight;
            if (!columnMemberships.next()) {
                // reached end of columnMemberships
                reachedEnd = true;
                break;
            }
        }
        sumWeightsAttributes[att] = weightSum;
        ySums[att] = ySum;
        start = end;
        if (reachedEnd) {
            break;
        }
    }
    BinarySplitEnumeration splitEnumeration;
    if (nomVals.length <= 10) {
        splitEnumeration = new FullBinarySplitEnumeration(nomVals.length);
    } else {
        int maxSearch = (1 << 10 - 2);
        splitEnumeration = new RandomBinarySplitEnumeration(nomVals.length, maxSearch, rd);
    }
    BigInteger bestPartitionMask = null;
    boolean isBestSplitValid = false;
    double bestPartitionGain = Double.NEGATIVE_INFINITY;
    do {
        double weightLeft = 0.0;
        double ySumLeft = 0.0;
        double weightRight = 0.0;
        double ySumRight = 0.0;
        for (int i = 0; i < nomVals.length; i++) {
            final boolean isAttributeInRightBranch = splitEnumeration.isInRightBranch(i);
            if (isAttributeInRightBranch) {
                weightRight += sumWeightsAttributes[i];
                ySumRight += ySums[i];
            } else {
                weightLeft += sumWeightsAttributes[i];
                ySumLeft += ySums[i];
            }
        }
        final boolean isValidSplit = weightRight >= minChildSize && weightLeft >= minChildSize;
        double gain = ySumRight * ySumRight / weightRight + ySumLeft * ySumLeft / weightLeft - criterionTotal;
        // use random tie breaker if gains are equal
        boolean randomTieBreaker = gain == bestPartitionGain ? rd.nextInt(0, 1) == 1 : false;
        // store if better than before or first valid split
        if (gain > bestPartitionGain || (!isBestSplitValid && isValidSplit) || randomTieBreaker) {
            if (isValidSplit || !isBestSplitValid) {
                bestPartitionGain = gain;
                bestPartitionMask = splitEnumeration.getValueMask();
                isBestSplitValid = isValidSplit;
            }
        }
    } while (splitEnumeration.next());
    if (bestPartitionGain > 0.0) {
        return new NominalBinarySplitCandidate(this, bestPartitionGain, bestPartitionMask, getMissedRows(columnMemberships), NominalBinarySplitCandidate.NO_MISSINGS);
    }
    return null;
}
Also used : BigInteger(java.math.BigInteger) NominalBinarySplitCandidate(org.knime.base.node.mine.treeensemble2.learner.NominalBinarySplitCandidate)

Example 8 with ColumnMemberships

use of org.knime.base.node.mine.treeensemble2.data.memberships.ColumnMemberships in project knime-core by knime.

the class TreeNominalColumnData method calcBestSplitClassification.

/**
 * {@inheritDoc}
 */
@Override
public SplitCandidate calcBestSplitClassification(final DataMemberships dataMemberships, final ClassificationPriors targetPriors, final TreeTargetNominalColumnData targetColumn, final RandomData rd) {
    final NominalValueRepresentation[] targetVals = targetColumn.getMetaData().getValues();
    IImpurity impCriterion = targetPriors.getImpurityCriterion();
    // distribution of target for each attribute value
    final NominalValueRepresentation[] nomVals = getMetaData().getValues();
    final boolean useBinaryNominalSplits = getConfiguration().isUseBinaryNominalSplits();
    final ColumnMemberships columnMemberships = dataMemberships.getColumnMemberships(getMetaData().getAttributeIndex());
    if (useBinaryNominalSplits) {
        if (targetVals.length == 2) {
            return calcBestSplitClassificationBinaryTwoClass(columnMemberships, targetPriors, targetColumn, impCriterion, nomVals, targetVals, rd);
        } else {
            return calcBestSplitClassificationBinaryPCA(columnMemberships, targetPriors, targetColumn, impCriterion, nomVals, targetVals, rd);
        // return calcBestSplitClassificationBinary(membershipController, rowWeights, targetPriors, targetColumn,
        // impCriterion, nomVals, targetVals, originalIndexInColumnList, rd);
        }
    } else {
        return calcBestSplitClassificationMultiway(columnMemberships, targetPriors, targetColumn, impCriterion, nomVals, targetVals, rd);
    }
}
Also used : ColumnMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.ColumnMemberships) IImpurity(org.knime.base.node.mine.treeensemble2.learner.IImpurity)

Example 9 with ColumnMemberships

use of org.knime.base.node.mine.treeensemble2.data.memberships.ColumnMemberships in project knime-core by knime.

the class TreeBitVectorColumnData method calcBestSplitRegression.

/**
 * {@inheritDoc}
 */
@Override
public SplitCandidate calcBestSplitRegression(final DataMemberships dataMemberships, final RegressionPriors targetPriors, final TreeTargetNumericColumnData targetColumn, final RandomData rd) {
    final double ySumTotal = targetPriors.getYSum();
    final double nrRecordsTotal = targetPriors.getNrRecords();
    final double criterionTotal = ySumTotal * ySumTotal / nrRecordsTotal;
    final int minChildSize = getConfiguration().getMinChildSize();
    final ColumnMemberships columnMemberships = dataMemberships.getColumnMemberships(getMetaData().getAttributeIndex());
    double onWeights = 0.0;
    double offWeights = 0.0;
    double ySumOn = 0.0;
    double ySumOff = 0.0;
    while (columnMemberships.next()) {
        final double weight = columnMemberships.getRowWeight();
        if (weight < EPSILON) {
        // ignore record: not in current branch or not in sample
        } else {
            final double y = targetColumn.getValueFor(columnMemberships.getOriginalIndex());
            if (m_columnBitSet.get(columnMemberships.getIndexInColumn())) {
                onWeights += weight;
                ySumOn += weight * y;
            } else {
                offWeights += weight;
                ySumOff += weight * y;
            }
        }
    }
    if (onWeights < minChildSize || offWeights < minChildSize) {
        return null;
    }
    final double onCriterion = ySumOn * ySumOn / onWeights;
    final double offCriterion = ySumOff * ySumOff / offWeights;
    final double gain = onCriterion + offCriterion - criterionTotal;
    if (gain > 0) {
        return new BitSplitCandidate(this, gain);
    }
    return null;
}
Also used : BitSplitCandidate(org.knime.base.node.mine.treeensemble2.learner.BitSplitCandidate) ColumnMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.ColumnMemberships)

Example 10 with ColumnMemberships

use of org.knime.base.node.mine.treeensemble2.data.memberships.ColumnMemberships in project knime-core by knime.

the class TreeBitVectorColumnData method updateChildMemberships.

/**
 * {@inheritDoc}
 */
@Override
public BitSet updateChildMemberships(final TreeNodeCondition childCondition, final DataMemberships parentMemberships) {
    TreeNodeBitCondition bitCondition = (TreeNodeBitCondition) childCondition;
    assert getMetaData().getAttributeName().equals(bitCondition.getColumnMetaData().getAttributeName());
    final boolean value = bitCondition.getValue();
    final ColumnMemberships columnMemberships = parentMemberships.getColumnMemberships(getMetaData().getAttributeIndex());
    BitSet inChild = new BitSet(columnMemberships.size());
    columnMemberships.reset();
    columnMemberships.next();
    for (int i = columnMemberships.getIndexInColumn(); ; i = columnMemberships.getIndexInColumn()) {
        if (m_columnBitSet.get(i) == value) {
            inChild.set(columnMemberships.getIndexInDataMemberships());
        }
        if (!columnMemberships.next()) {
            break;
        }
    }
    return inChild;
}
Also used : BitSet(java.util.BitSet) ColumnMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.ColumnMemberships) TreeNodeBitCondition(org.knime.base.node.mine.treeensemble2.model.TreeNodeBitCondition)

Aggregations

ColumnMemberships (org.knime.base.node.mine.treeensemble2.data.memberships.ColumnMemberships)10 BitSet (java.util.BitSet)7 BigInteger (java.math.BigInteger)5 NominalBinarySplitCandidate (org.knime.base.node.mine.treeensemble2.learner.NominalBinarySplitCandidate)5 TreeEnsembleLearnerConfiguration (org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration)5 IImpurity (org.knime.base.node.mine.treeensemble2.learner.IImpurity)3 BitSplitCandidate (org.knime.base.node.mine.treeensemble2.learner.BitSplitCandidate)2 NumericSplitCandidate (org.knime.base.node.mine.treeensemble2.learner.NumericSplitCandidate)2 ArrayList (java.util.ArrayList)1 LinkedHashMap (java.util.LinkedHashMap)1 RealVector (org.apache.commons.math3.linear.RealVector)1 Test (org.junit.Test)1 CombinedAttributeValues (org.knime.base.node.mine.treeensemble2.data.BinaryNominalSplitsPCA.CombinedAttributeValues)1 TestDataGenerator (org.knime.base.node.mine.treeensemble2.data.TestDataGenerator)1 TreeData (org.knime.base.node.mine.treeensemble2.data.TreeData)1 TreeNodeBitCondition (org.knime.base.node.mine.treeensemble2.model.TreeNodeBitCondition)1 TreeNodeNumericCondition (org.knime.base.node.mine.treeensemble2.model.TreeNodeNumericCondition)1 NumericOperator (org.knime.base.node.mine.treeensemble2.model.TreeNodeNumericCondition.NumericOperator)1 DefaultRowSample (org.knime.base.node.mine.treeensemble2.sample.row.DefaultRowSample)1 RowSample (org.knime.base.node.mine.treeensemble2.sample.row.RowSample)1