use of in project knime-core by knime.
the class TreeNominalColumnData method calcBestSplitClassificationBinaryPCA.
* Implements the approach proposed by Coppersmith et al. (1999) in their paper
* "Partitioning Nominal Attributes in Decision Trees"
* @param membershipController
* @param rowWeights
* @param targetPriors
* @param targetColumn
* @param impCriterion
* @param nomVals
* @param targetVals
* @param originalIndexInColumnList
* @return the best binary split candidate or null if there is no valid split with positive gain
private NominalBinarySplitCandidate calcBestSplitClassificationBinaryPCA(final ColumnMemberships columnMemberships, final ClassificationPriors targetPriors, final TreeTargetNominalColumnData targetColumn, final IImpurity impCriterion, final NominalValueRepresentation[] nomVals, final NominalValueRepresentation[] targetVals, final RandomData rd) {
final TreeEnsembleLearnerConfiguration config = getConfiguration();
final int minChildSize = config.getMinChildSize();
final boolean useXGBoostMissingValueHandling = config.getMissingValueHandling() == MissingValueHandling.XGBoost;
// The algorithm combines attribute values with the same class probabilities into a single attribute
// therefore it is necessary to track the known classProbabilities
final LinkedHashMap<ClassProbabilityVector, CombinedAttributeValues> combinedAttValsMap = new LinkedHashMap<ClassProbabilityVector, CombinedAttributeValues>();;
double totalWeight = 0.0;
boolean branchContainsMissingValues = containsMissingValues();
int start = 0;
final int lengthNonMissing = containsMissingValues() ? nomVals.length - 1 : nomVals.length;
final int attToConsider = useXGBoostMissingValueHandling ? nomVals.length : lengthNonMissing;
for (int att = 0; att < lengthNonMissing; /*attToConsider*/
att++) {
int end = start + m_nominalValueCounts[att];
double attWeight = 0.0;
final double[] classFrequencies = new double[targetVals.length];
boolean reachedEnd = false;
for (int index = columnMemberships.getIndexInColumn(); index < end; index = columnMemberships.getIndexInColumn()) {
double weight = columnMemberships.getRowWeight();
assert weight > EPSILON : "Instances in columnMemberships must have weights larger than EPSILON.";
int instanceClass = targetColumn.getValueFor(columnMemberships.getOriginalIndex());
classFrequencies[instanceClass] += weight;
attWeight += weight;
totalWeight += weight;
if (! {
// reached end of columnMemberships
reachedEnd = true;
if (att == nomVals.length - 1) {
// if the column contains no missing values, the last possible nominal value is
// not the missing value and therefore branchContainsMissingValues needs to be false
branchContainsMissingValues = branchContainsMissingValues && true;
start = end;
if (attWeight < EPSILON) {
// attribute value did not occur in this branch or sample
final double[] classProbabilities = new double[targetVals.length];
for (int i = 0; i < classProbabilities.length; i++) {
classProbabilities[i] = truncateDouble(8, classFrequencies[i] / attWeight);
CombinedAttributeValues attVal = new CombinedAttributeValues(classFrequencies, classProbabilities, attWeight, nomVals[att]);
ClassProbabilityVector classProbabilityVector = new ClassProbabilityVector(classProbabilities);
CombinedAttributeValues knownAttVal = combinedAttValsMap.get(classProbabilityVector);
if (knownAttVal == null) {
combinedAttValsMap.put(classProbabilityVector, attVal);
} else {
if (reachedEnd) {
// account for missing values and their weight
double missingWeight = 0.0;
double[] missingClassCounts = null;
// otherwise the current indexInColumn won't be larger than start
if (columnMemberships.getIndexInColumn() >= start) {
missingClassCounts = new double[targetVals.length];
do {
final double recordWeight = columnMemberships.getRowWeight();
final int recordClass = targetColumn.getValueFor(columnMemberships.getOriginalIndex());
missingWeight += recordWeight;
missingClassCounts[recordClass] += recordWeight;
} while (;
if (missingWeight > EPSILON) {
branchContainsMissingValues = true;
} else {
branchContainsMissingValues = false;
ArrayList<CombinedAttributeValues> attValList = Lists.newArrayList(combinedAttValsMap.values());
CombinedAttributeValues[] attVals = combinedAttValsMap.values().toArray(new CombinedAttributeValues[combinedAttValsMap.size()]);
attVals = BinaryNominalSplitsPCA.calculatePCAOrdering(attVals, totalWeight, targetVals.length);
// EigenDecomposition failed
if (attVals == null) {
return null;
// Start searching for split candidates
final int highestBitPosition = containsMissingValues() ? nomVals.length - 2 : nomVals.length - 1;
final double[] binaryImpurityValues = new double[2];
final double[] binaryPartitionWeights = new double[2];
double sumRemainingWeights = totalWeight;
double sumCurrPartitionWeight = 0.0;
RealVector targetFrequenciesCurrentPartition = MatrixUtils.createRealVector(new double[targetVals.length]);
RealVector targetFrequenciesRemaining = MatrixUtils.createRealVector(new double[targetVals.length]);
for (CombinedAttributeValues attVal : attValList) {
targetFrequenciesRemaining = targetFrequenciesRemaining.add(attVal.m_classFrequencyVector);
BigInteger currPartitionBitMask = BigInteger.ZERO;
double bestPartitionGain = Double.NEGATIVE_INFINITY;
BigInteger bestPartitionMask = null;
boolean isBestSplitValid = false;
boolean missingsGoLeft = false;
final double priorImpurity = useXGBoostMissingValueHandling ? targetPriors.getPriorImpurity() : impCriterion.getPartitionImpurity(subtractMissingClassCounts(targetPriors.getDistribution(), missingClassCounts), totalWeight);
// no need to iterate over full list because at least one value must remain on the other side of the split
for (int i = 0; i < attVals.length - 1; i++) {
CombinedAttributeValues currAttVal = attVals[i];
sumCurrPartitionWeight += currAttVal.m_totalWeight;
sumRemainingWeights -= currAttVal.m_totalWeight;
assert sumCurrPartitionWeight + sumRemainingWeights == totalWeight : "The weights of the partitions do not sum up to the total weight.";
targetFrequenciesCurrentPartition = targetFrequenciesCurrentPartition.add(currAttVal.m_classFrequencyVector);
targetFrequenciesRemaining = targetFrequenciesRemaining.subtract(currAttVal.m_classFrequencyVector);
currPartitionBitMask = currPartitionBitMask.or(currAttVal.m_bitMask);
boolean partitionIsRightBranch = currPartitionBitMask.testBit(highestBitPosition);
boolean isValidSplit;
double gain;
boolean tempMissingsGoLeft = true;
if (branchContainsMissingValues && useXGBoostMissingValueHandling) {
// send missing values with partition
boolean isValidSplitFirst = sumCurrPartitionWeight + missingWeight >= minChildSize && sumRemainingWeights >= minChildSize;
binaryImpurityValues[0] = impCriterion.getPartitionImpurity(addMissingClassCounts(targetFrequenciesCurrentPartition.toArray(), missingClassCounts), sumCurrPartitionWeight + missingWeight);
binaryImpurityValues[1] = impCriterion.getPartitionImpurity(targetFrequenciesRemaining.toArray(), sumRemainingWeights);
binaryPartitionWeights[0] = sumCurrPartitionWeight + missingWeight;
binaryPartitionWeights[1] = sumRemainingWeights;
double postSplitImpurity = impCriterion.getPostSplitImpurity(binaryImpurityValues, binaryPartitionWeights, totalWeight + missingWeight);
double gainFirst = impCriterion.getGain(priorImpurity, postSplitImpurity, binaryPartitionWeights, totalWeight + missingWeight);
// send missing values with remaining
boolean isValidSplitSecond = sumCurrPartitionWeight >= minChildSize && sumRemainingWeights + missingWeight >= minChildSize;
binaryImpurityValues[0] = impCriterion.getPartitionImpurity(targetFrequenciesCurrentPartition.toArray(), sumCurrPartitionWeight);
binaryImpurityValues[1] = impCriterion.getPartitionImpurity(addMissingClassCounts(targetFrequenciesRemaining.toArray(), missingClassCounts), sumRemainingWeights + missingWeight);
binaryPartitionWeights[0] = sumCurrPartitionWeight;
binaryPartitionWeights[1] = sumRemainingWeights + missingWeight;
postSplitImpurity = impCriterion.getPostSplitImpurity(binaryImpurityValues, binaryPartitionWeights, totalWeight + missingWeight);
double gainSecond = impCriterion.getGain(priorImpurity, postSplitImpurity, binaryPartitionWeights, totalWeight + missingWeight);
// choose alternative with better gain
if (gainFirst >= gainSecond) {
gain = gainFirst;
isValidSplit = isValidSplitFirst;
tempMissingsGoLeft = !partitionIsRightBranch;
} else {
gain = gainSecond;
isValidSplit = isValidSplitSecond;
tempMissingsGoLeft = partitionIsRightBranch;
} else {
// TODO if invalid splits should not be considered skip partition
isValidSplit = sumCurrPartitionWeight >= minChildSize && sumRemainingWeights >= minChildSize;
binaryImpurityValues[0] = impCriterion.getPartitionImpurity(targetFrequenciesCurrentPartition.toArray(), sumCurrPartitionWeight);
binaryImpurityValues[1] = impCriterion.getPartitionImpurity(targetFrequenciesRemaining.toArray(), sumRemainingWeights);
binaryPartitionWeights[0] = sumCurrPartitionWeight;
binaryPartitionWeights[1] = sumRemainingWeights;
double postSplitImpurity = impCriterion.getPostSplitImpurity(binaryImpurityValues, binaryPartitionWeights, totalWeight);
gain = impCriterion.getGain(priorImpurity, postSplitImpurity, binaryPartitionWeights, totalWeight);
// use random tie breaker if gains are equal
boolean randomTieBreaker = gain == bestPartitionGain ? rd.nextInt(0, 1) == 1 : false;
// store if better than before or first valid split
if (gain > bestPartitionGain || (!isBestSplitValid && isValidSplit) || randomTieBreaker) {
if (isValidSplit || !isBestSplitValid) {
bestPartitionGain = gain;
bestPartitionMask = partitionIsRightBranch ? currPartitionBitMask : BigInteger.ZERO.setBit(highestBitPosition + 1).subtract(BigInteger.ONE).xor(currPartitionBitMask);
isBestSplitValid = isValidSplit;
if (branchContainsMissingValues) {
missingsGoLeft = tempMissingsGoLeft;
// missing values are encountered during the search for the best split
// missingsGoLeft = partitionIsRightBranch;
} else {
// no missing values were encountered during the search for the best split
// missing values should be sent with the majority
missingsGoLeft = partitionIsRightBranch ? sumCurrPartitionWeight < sumRemainingWeights : sumCurrPartitionWeight >= sumRemainingWeights;
if (isBestSplitValid && bestPartitionGain > 0.0) {
if (useXGBoostMissingValueHandling) {
return new NominalBinarySplitCandidate(this, bestPartitionGain, bestPartitionMask, NO_MISSED_ROWS, missingsGoLeft ? NominalBinarySplitCandidate.MISSINGS_GO_LEFT : NominalBinarySplitCandidate.MISSINGS_GO_RIGHT);
return new NominalBinarySplitCandidate(this, bestPartitionGain, bestPartitionMask, getMissedRows(columnMemberships), NominalBinarySplitCandidate.NO_MISSINGS);
return null;
use of in project knime-core by knime.
the class TreeNominalColumnData method calcBestSplitClassificationBinary.
NominalBinarySplitCandidate calcBestSplitClassificationBinary(final ColumnMemberships columnMemberships, final ClassificationPriors targetPriors, final TreeTargetNominalColumnData targetColumn, final IImpurity impCriterion, final NominalValueRepresentation[] nomVals, final NominalValueRepresentation[] targetVals, final RandomData rd) {
if (nomVals.length <= 1) {
return null;
final int minChildSize = getConfiguration().getMinChildSize();
final int lengthNonMissing = containsMissingValues() ? nomVals.length - 1 : nomVals.length;
// distribution of target for each attribute value
final double[][] targetCountsSplitPerAttribute = new double[lengthNonMissing][targetVals.length];
// number of valid records for each attribute value
final double[] attWeights = new double[lengthNonMissing];
// number (sum) of total valid values
double totalWeight = 0.0;
int start = 0;;
for (int att = 0; att < lengthNonMissing; att++) {
final int end = start + m_nominalValueCounts[att];
double currentAttValWeight = 0.0;
for (int index = columnMemberships.getIndexInColumn(); index < end;, index = columnMemberships.getIndexInColumn()) {
final double weight = columnMemberships.getRowWeight();
assert weight > EPSILON : "The usage of datamemberships should ensure that no rows with zero weight are encountered";
int target = targetColumn.getValueFor(columnMemberships.getOriginalIndex());
targetCountsSplitPerAttribute[att][target] += weight;
currentAttValWeight += weight;
totalWeight += currentAttValWeight;
attWeights[att] = currentAttValWeight;
start = end;
BinarySplitEnumeration splitEnumeration;
if (nomVals.length <= 10) {
splitEnumeration = new FullBinarySplitEnumeration(nomVals.length);
} else {
int maxSearch = (1 << 10 - 2);
splitEnumeration = new RandomBinarySplitEnumeration(nomVals.length, maxSearch, rd);
BigInteger bestPartitionMask = null;
boolean isBestSplitValid = false;
double bestPartitionGain = Double.NEGATIVE_INFINITY;
final double[] targetCountsSplitLeft = new double[targetVals.length];
final double[] targetCountsSplitRight = new double[targetVals.length];
final double[] binaryImpurityValues = new double[2];
final double[] binaryPartitionWeights = new double[2];
do {
Arrays.fill(targetCountsSplitLeft, 0.0);
Arrays.fill(targetCountsSplitRight, 0.0);
double weightLeft = 0.0;
double weightRight = 0.0;
for (int i = 0; i < nomVals.length; i++) {
final boolean isAttributeInRightBranch = splitEnumeration.isInRightBranch(i);
double[] targetCountsCurrentAttribute = targetCountsSplitPerAttribute[i];
for (int targetVal = 0; targetVal < targetVals.length; targetVal++) {
if (isAttributeInRightBranch) {
targetCountsSplitRight[targetVal] += targetCountsCurrentAttribute[targetVal];
} else {
targetCountsSplitLeft[targetVal] += targetCountsCurrentAttribute[targetVal];
if (isAttributeInRightBranch) {
weightRight += attWeights[i];
} else {
weightLeft += attWeights[i];
binaryPartitionWeights[0] = weightRight;
binaryPartitionWeights[1] = weightLeft;
boolean isValidSplit = weightRight >= minChildSize && weightLeft >= minChildSize;
binaryImpurityValues[0] = impCriterion.getPartitionImpurity(targetCountsSplitRight, weightRight);
binaryImpurityValues[1] = impCriterion.getPartitionImpurity(targetCountsSplitLeft, weightLeft);
double postSplitImpurity = impCriterion.getPostSplitImpurity(binaryImpurityValues, binaryPartitionWeights, totalWeight);
double gain = impCriterion.getGain(targetPriors.getPriorImpurity(), postSplitImpurity, binaryPartitionWeights, totalWeight);
// use random tie breaker if gains are equal
boolean randomTieBreaker = gain == bestPartitionGain ? rd.nextInt(0, 1) == 1 : false;
// store if better than before or first valid split
if (gain > bestPartitionGain || (!isBestSplitValid && isValidSplit) || randomTieBreaker) {
if (isValidSplit || !isBestSplitValid) {
bestPartitionGain = gain;
bestPartitionMask = splitEnumeration.getValueMask();
isBestSplitValid = isValidSplit;
} while (;
if (bestPartitionGain > 0.0) {
return new NominalBinarySplitCandidate(this, bestPartitionGain, bestPartitionMask, getMissedRows(columnMemberships), NominalBinarySplitCandidate.NO_MISSINGS);
return null;
use of in project knime-core by knime.
the class TreeNominalColumnData method updateChildMembershipsMultiway.
private BitSet updateChildMembershipsMultiway(final TreeNodeNominalCondition nomCondition, final DataMemberships parentMemberships) {
String value = nomCondition.getValue();
int att = -1;
final NominalValueRepresentation[] reps = getMetaData().getValues();
for (final NominalValueRepresentation rep : reps) {
if (rep.getNominalValue().equals(value)) {
att = rep.getAssignedInteger();
if (att == -1) {
throw new IllegalStateException("Unknown value: " + value);
ColumnMemberships columnMemberships = parentMemberships.getColumnMemberships(getMetaData().getAttributeIndex());
BitSet inChild = new BitSet(columnMemberships.size());
int start = 0;
for (int a = 0; a < att; a++) {
start += m_nominalValueCounts[a];
// Make sure that we are using an index >= start
if (!columnMemberships.nextIndexFrom(start)) {
return inChild;
boolean reachedEnd = false;
int end = start + m_nominalValueCounts[att];
for (int index = columnMemberships.getIndexInColumn(); index < end; index = columnMemberships.getIndexInColumn()) {
if (! {
reachedEnd = true;
if (!reachedEnd && containsMissingValues() && nomCondition.acceptsMissings()) {
// move to missing values
for (int i = att; i < reps.length - 1; i++) {
start += m_nominalValueCounts[i];
if (columnMemberships.nextIndexFrom(start)) {
do {
} while (;
return inChild;
use of in project knime-core by knime.
the class TreeNominalColumnData method calcBestSplitRegression.
* {@inheritDoc}
public SplitCandidate calcBestSplitRegression(final DataMemberships dataMemberships, final RegressionPriors targetPriors, final TreeTargetNumericColumnData targetColumn, final RandomData rd) {
final NominalValueRepresentation[] nomVals = getMetaData().getValues();
final ColumnMemberships columnMemberships = dataMemberships.getColumnMemberships(getMetaData().getAttributeIndex());
final boolean useBinaryNominalSplits = getConfiguration().isUseBinaryNominalSplits();
if (useBinaryNominalSplits) {
return calcBestSplitRegressionBinaryBreiman(columnMemberships, targetPriors, targetColumn, nomVals, rd);
} else {
return calcBestSplitRegressionMultiway(columnMemberships, targetPriors, targetColumn, nomVals, rd);
use of in project knime-core by knime.
the class TreeNominalColumnData method calcBestSplitRegressionBinaryBreiman.
* If an attribute value does not appear in the current branch, it is not guaranteed in which child branch this
* value will fall. (This should not be a problem since we cannot make any assumptions about this attribute value
* anyway)
* @param membershipController
* @param rowWeights
* @param targetPriors
* @param targetColumn
* @param nomVals
* @param originalIndexInColumnList
* @return best split candidate or null if there is no split candidate with positive gain or too small child nodes
private NominalBinarySplitCandidate calcBestSplitRegressionBinaryBreiman(final ColumnMemberships columnMemberships, final RegressionPriors targetPriors, final TreeTargetNumericColumnData targetColumn, final NominalValueRepresentation[] nomVals, final RandomData rd) {
final int minChildSize = getConfiguration().getMinChildSize();
double sumYTotal = targetPriors.getYSum();
double sumWeightTotal = targetPriors.getNrRecords();
final boolean useXGBoostMissingValueHandling = getConfiguration().getMissingValueHandling() == MissingValueHandling.XGBoost;
boolean branchContainsMissingValues = containsMissingValues();
double missingWeight = 0.0;
double missingY = 0.0;
if (branchContainsMissingValues) {
while (columnMemberships.getIndexInColumn() >= m_idxOfFirstMissing) {
final double weight = columnMemberships.getRowWeight();
missingWeight += weight;
missingY += weight * targetColumn.getValueFor(columnMemberships.getOriginalIndex());
if (!columnMemberships.previous()) {
sumYTotal -= missingY;
sumWeightTotal -= missingWeight;
branchContainsMissingValues = missingWeight > 0.0;
final double criterionTotal;
if (useXGBoostMissingValueHandling) {
criterionTotal = (sumYTotal + missingY) * (sumYTotal + missingY) / (sumWeightTotal + missingWeight);
} else {
criterionTotal = sumYTotal + sumYTotal / sumWeightTotal;
final ArrayList<AttValTupleRegression> attValList = Lists.newArrayList();;
int start = 0;
final int lengthNonMissing = containsMissingValues() ? nomVals.length - 1 : nomVals.length;
for (int att = 0; att < lengthNonMissing; att++) {
double sumY = 0.0;
double sumWeight = 0.0;
int end = start + m_nominalValueCounts[att];
boolean reachedEnd = false;
for (int index = columnMemberships.getIndexInColumn(); index < end; index = columnMemberships.getIndexInColumn()) {
double weight = columnMemberships.getRowWeight();
assert weight > EPSILON : "Instances in columnMemberships must have weights larger than EPSILON.";
sumY += targetColumn.getValueFor(columnMemberships.getOriginalIndex());
sumWeight += weight;
if (! {
reachedEnd = true;
start = end;
if (sumWeight < EPSILON) {
// we cannot make any assumptions about this attribute value
attValList.add(new AttValTupleRegression(sumY, sumWeight, sumY / sumWeight, nomVals[att]));
if (reachedEnd) {
assert sumWeights(attValList) == sumWeightTotal : "The weights of the attribute values does not sum up to the total weight";
// sort attribute values according to their mean Y value
BigInteger bestPartitionMask = null;
boolean isBestSplitValid = false;
double bestPartitionGain = Double.NEGATIVE_INFINITY;
final int highestBitPosition = containsMissingValues() ? nomVals.length - 2 : nomVals.length - 1;
double sumYPartition = 0.0;
double sumWeightPartition = 0.0;
BigInteger partitionMask = BigInteger.ZERO;
double sumYRemaining = sumYTotal;
double sumWeightRemaining = sumWeightTotal;
boolean missingsGoLeft = true;
// no need to iterate over full list because at least one value must remain on the other side of the split
for (int i = 0; i < attValList.size() - 1; i++) {
AttValTupleRegression attVal = attValList.get(i);
sumYPartition += attVal.m_sumY;
sumWeightPartition += attVal.m_sumWeight;
sumYRemaining -= attVal.m_sumY;
sumWeightRemaining -= attVal.m_sumWeight;
assert AbsIsSmallerEpsilon(sumWeightTotal - sumWeightRemaining - sumWeightPartition) : "The weights left and right of the split do not add up to the total weight.";
assert sumWeightPartition > 0.0 : "The weight of the partition is zero.";
assert sumWeightRemaining > 0.0 : "The weight of the remaining is zero.";
partitionMask = partitionMask.or(attVal.m_bitMask);
double gain;
boolean isValidSplit;
boolean tempMissingsGoLeft = true;
if (branchContainsMissingValues && useXGBoostMissingValueHandling) {
boolean isValidSplitPartitionWithMissing = sumWeightPartition + missingWeight >= minChildSize && sumWeightRemaining >= minChildSize;
double sumYMissingWithPartition = sumYPartition + missingY;
double gainMissingWithPartition = sumYMissingWithPartition * sumYMissingWithPartition / (sumWeightPartition + missingWeight) + sumYRemaining * sumYRemaining / sumWeightRemaining - criterionTotal;
boolean isValidSplitRemainingWithMissing = sumWeightPartition >= minChildSize && sumWeightRemaining + missingWeight >= minChildSize;
double sumYMissingWithRemaining = sumYRemaining + missingY;
double gainMissingWithRemaining = sumYPartition * sumYPartition / sumWeightPartition + sumYMissingWithRemaining * sumYMissingWithRemaining / (sumWeightRemaining + missingWeight) - criterionTotal;
if (gainMissingWithPartition >= gainMissingWithRemaining) {
gain = gainMissingWithPartition;
isValidSplit = isValidSplitPartitionWithMissing;
tempMissingsGoLeft = !partitionMask.testBit(highestBitPosition);
} else {
gain = gainMissingWithRemaining;
isValidSplit = isValidSplitRemainingWithMissing;
tempMissingsGoLeft = partitionMask.testBit(highestBitPosition);
} else {
isValidSplit = sumWeightPartition >= minChildSize && sumWeightRemaining >= minChildSize;
gain = sumYPartition * sumYPartition / sumWeightPartition + sumYRemaining * sumYRemaining / sumWeightRemaining - criterionTotal;
// use random tie breaker if gains are equal
boolean randomTieBreaker = gain == bestPartitionGain ? rd.nextInt(0, 1) == 1 : false;
// store if better than before or first valid split
if (gain > bestPartitionGain || (!isBestSplitValid && isValidSplit) || randomTieBreaker) {
if (isValidSplit || !isBestSplitValid) {
bestPartitionGain = gain;
// right branch must by convention always contain the nominal value
// with the highest assigned integer
bestPartitionMask = partitionMask.testBit(highestBitPosition) ? partitionMask : BigInteger.ZERO.setBit(highestBitPosition + 1).subtract(BigInteger.ONE).xor(partitionMask);
isBestSplitValid = isValidSplit;
if (branchContainsMissingValues) {
missingsGoLeft = tempMissingsGoLeft;
} else {
// no missings in this branch, but we still have to provide a direction for missing values
// send missings in the direction the most records in the node are sent to
boolean sendWithPartition = sumWeightPartition >= sumWeightRemaining;
missingsGoLeft = sendWithPartition ? !partitionMask.testBit(highestBitPosition) : partitionMask.testBit(highestBitPosition);
if (bestPartitionGain > 0.0 && isBestSplitValid) {
if (useXGBoostMissingValueHandling) {
return new NominalBinarySplitCandidate(this, bestPartitionGain, bestPartitionMask, NO_MISSED_ROWS, missingsGoLeft ? NominalBinarySplitCandidate.MISSINGS_GO_LEFT : NominalBinarySplitCandidate.MISSINGS_GO_RIGHT);
return new NominalBinarySplitCandidate(this, bestPartitionGain, bestPartitionMask, getMissedRows(columnMemberships), NominalBinarySplitCandidate.NO_MISSINGS);
return null;