use of org.apache.ignite.ml.trees.CategoricalSplitInfo in project ignite by apache.
the class CategoricalFeatureProcessor method split.
/**
*/
private SplitInfo<CategoricalRegionInfo> split(BitSet leftCats, int intervalIdx, Map<Integer, Integer> mapping, Integer[] sampleIndexes, double[] values, double[] labels, double impurity) {
Map<Boolean, List<Integer>> leftRight = Arrays.stream(sampleIndexes).collect(Collectors.partitioningBy((smpl) -> leftCats.get(mapping.get((int) values[smpl]))));
List<Integer> left = leftRight.get(true);
int leftSize = left.size();
double leftImpurity = calc.apply(left.stream().mapToDouble(s -> labels[s]));
List<Integer> right = leftRight.get(false);
int rightSize = right.size();
double rightImpurity = calc.apply(right.stream().mapToDouble(s -> labels[s]));
int totalSize = leftSize + rightSize;
// Result of this call will be sent back to trainer node, we do not need vectors inside of sent data.
CategoricalSplitInfo<CategoricalRegionInfo> res = new CategoricalSplitInfo<>(intervalIdx, // cats can be computed on the last step.
new CategoricalRegionInfo(leftImpurity, null), new CategoricalRegionInfo(rightImpurity, null), leftCats);
res.setInfoGain(impurity - (double) leftSize / totalSize * leftImpurity - (double) rightSize / totalSize * rightImpurity);
return res;
}
Aggregations