use of com.tencent.angel.ml.GBDT.algo.tree.SplitEntry in project angel by Tencent.
the class GradHistHelper method findBestSplitOfOneFeature.
// find the best split result of one feature
public SplitEntry findBestSplitOfOneFeature(int fid, TDoubleVector histogram, int startIdx, GradStats rootStats) {
SplitEntry splitEntry = new SplitEntry();
// 1. set the feature id
splitEntry.setFid(fid);
// 2. create the best left stats and right stats
GradStats bestLeftStat = new GradStats();
GradStats bestRightStat = new GradStats();
if (startIdx + 2 * this.controller.param.numSplit <= histogram.getDimension()) {
// 3. the gain of the root node
float rootGain = rootStats.calcGain(this.controller.param);
// 4. create the temp left and right grad stats
GradStats leftStats = new GradStats();
GradStats rightStats = new GradStats();
// 5. loop over all the data in histogram
for (int histIdx = startIdx; histIdx < startIdx + this.controller.param.numSplit - 1; histIdx++) {
// 5.1. get the grad and hess of current hist bin
float grad = (float) histogram.get(histIdx);
float hess = (float) histogram.get(this.controller.param.numSplit + histIdx);
leftStats.add(grad, hess);
// 5.2. check whether we can split with current left hessian
if (leftStats.sumHess >= this.controller.param.minChildWeight) {
// right = root - left
rightStats.setSubstract(rootStats, leftStats);
// 5.3. check whether we can split with current right hessian
if (rightStats.sumHess >= this.controller.param.minChildWeight) {
// 5.4. calculate the current loss gain
float lossChg = leftStats.calcGain(this.controller.param) + rightStats.calcGain(this.controller.param) - rootGain;
// 5.5. check whether we should update the split result with current loss gain
// split value = sketches[splitIdx]
int splitIdx = fid * this.controller.param.numSplit + histIdx - startIdx;
if (splitEntry.update(lossChg, fid, this.controller.sketches[splitIdx])) {
// 5.6. if should update, also update the best left and right grad stats
bestLeftStat.update(leftStats.sumGrad, leftStats.sumHess);
bestRightStat.update(rightStats.sumGrad, rightStats.sumHess);
}
}
}
}
// 6. set the best left and right grad stats
splitEntry.leftGradStat = bestLeftStat;
splitEntry.rightGradStat = bestRightStat;
} else {
LOG.error("index out of grad histogram size.");
}
return splitEntry;
}
use of com.tencent.angel.ml.GBDT.algo.tree.SplitEntry in project angel by Tencent.
the class GradHistHelper method findBestSplit.
// find the best split result of the histogram of a tree node
public SplitEntry findBestSplit(TDoubleVector histogram) throws Exception {
LOG.info(String.format("------To find the best split of node[%d]------", this.nid));
SplitEntry splitEntry = new SplitEntry();
LOG.info(String.format("The best split before looping the histogram: fid[%d], fvalue[%f]", splitEntry.fid, splitEntry.fvalue));
// 1. calculate the gradStats of the root node
GradStats rootStats = null;
if (null != histogram) {
rootStats = calGradStats(histogram);
// 1.1. update the grad stats of the root node on PS, only called once by leader worker
if (this.nid == 0) {
this.controller.updateNodeGradStats(this.nid, rootStats);
}
} else {
LOG.error("null histogram.");
}
// 2. loop over features
if (null == rootStats) {
LOG.error("null root stat.");
return splitEntry;
}
for (int fid = 0; fid < this.controller.fSet.length; fid++) {
// 2.1. get the ture feature id in the sampled feature set
int trueFid = this.controller.fSet[fid];
// 2.2. get the indexes of histogram of this feature
int startIdx = 2 * this.controller.param.numSplit * fid;
// 2.3. find the best split of current feature
SplitEntry curSplit = findBestSplitOfOneFeature(trueFid, histogram, startIdx, rootStats);
// 2.4. update the best split result if possible
splitEntry.update(curSplit);
}
// update the grad stats of the root node on PS, only called once by leader worker
if (this.nid == 0) {
this.controller.updateNodeGradStats(this.nid, rootStats);
}
// 3. update the grad stats of children node
if (splitEntry.fid != -1) {
// 3.1. update the left child
this.controller.updateNodeGradStats(2 * this.nid + 1, splitEntry.leftGradStat);
// 3.2. update the right child
this.controller.updateNodeGradStats(2 * this.nid + 2, splitEntry.rightGradStat);
}
LOG.info(String.format("The best split after looping the histogram: fid[%d], fvalue[%f], loss gain[%f]", splitEntry.fid, splitEntry.fvalue, splitEntry.lossChg));
return splitEntry;
}
use of com.tencent.angel.ml.GBDT.algo.tree.SplitEntry in project angel by Tencent.
the class GradHistHelper method findBestFromServerSplit.
public SplitEntry findBestFromServerSplit(TDoubleVector histogram) throws Exception {
LOG.debug(String.format("------To find the best split of node[%d]------", this.nid));
SplitEntry splitEntry = new SplitEntry();
LOG.debug(String.format("The best split before looping the histogram: fid[%d], fvalue[%f]", splitEntry.fid, splitEntry.fvalue));
// partition number
int partitionNum = WorkerContext.get().getConf().getInt(AngelConf.ANGEL_PS_NUMBER, AngelConf.DEFAULT_ANGEL_PS_NUMBER);
// cols of each partition
int colPerPartition = histogram.getDimension() / partitionNum;
assert histogram.getDimension() == partitionNum * colPerPartition;
for (int pid = 0; pid < partitionNum; pid++) {
int startIdx = pid * colPerPartition;
int splitFid = (int) histogram.get(startIdx);
if (splitFid == -1) {
continue;
}
int trueSplitFid = this.controller.fSet[splitFid];
int splitIdx = (int) histogram.get(startIdx + 1);
float splitValue = this.controller.sketches[trueSplitFid * this.controller.param.numSplit + splitIdx];
float lossChg = (float) histogram.get(startIdx + 2);
float leftSumGrad = (float) histogram.get(startIdx + 3);
float leftSumHess = (float) histogram.get(startIdx + 4);
float rightSumGrad = (float) histogram.get(startIdx + 5);
float rightSumHess = (float) histogram.get(startIdx + 6);
LOG.debug(String.format("The best split of the %d-th partition: " + "split feature[%d], split index[%d], split value[%f], loss gain[%f], " + "left sumGrad[%f], left sumHess[%f], right sumGrad[%f], right sumHess[%f]", pid, trueSplitFid, splitIdx, splitValue, lossChg, leftSumGrad, leftSumHess, rightSumGrad, rightSumHess));
GradStats curLeftGradStat = new GradStats(leftSumGrad, leftSumHess);
GradStats curRightGradStat = new GradStats(rightSumGrad, rightSumHess);
SplitEntry curSplitEntry = new SplitEntry(trueSplitFid, splitValue, lossChg);
curSplitEntry.leftGradStat = curLeftGradStat;
curSplitEntry.rightGradStat = curRightGradStat;
splitEntry.update(curSplitEntry);
}
LOG.debug(String.format("The best split after looping the histogram: fid[%d], fvalue[%f], loss gain[%f]", splitEntry.fid, splitEntry.fvalue, splitEntry.lossChg));
return splitEntry;
}
use of com.tencent.angel.ml.GBDT.algo.tree.SplitEntry in project angel by Tencent.
the class GBDTGradHistGetRowFunc method merge.
@Override
public GetResult merge(List<PartitionGetResult> partResults) {
int size = partResults.size();
List<ServerRow> rowSplits = new ArrayList<ServerRow>(size);
for (int i = 0; i < size; i++) {
rowSplits.add(((PartitionGetRowResult) partResults.get(i)).getRowSplit());
}
SplitEntry splitEntry = new SplitEntry();
for (int i = 0; i < size; i++) {
ServerDenseDoubleRow row = (ServerDenseDoubleRow) ((PartitionGetRowResult) partResults.get(i)).getRowSplit();
int fid = (int) row.getData().get(0);
if (fid != -1) {
int splitIndex = (int) row.getData().get(1);
float lossGain = (float) row.getData().get(2);
float leftSumGrad = (float) row.getData().get(3);
float leftSumHess = (float) row.getData().get(4);
float rightSumGrad = (float) row.getData().get(5);
float rightSumHess = (float) row.getData().get(6);
LOG.debug(String.format("psFunc: the best split after looping a split: fid[%d], fvalue[%d], loss gain[%f]" + ", leftSumGrad[%f], leftSumHess[%f], rightSumGrad[%f], rightSumHess[%f]", fid, splitIndex, lossGain, leftSumGrad, leftSumHess, rightSumGrad, rightSumHess));
GradStats curLeftGradStat = new GradStats(leftSumGrad, leftSumHess);
GradStats curRightGradStat = new GradStats(rightSumGrad, rightSumHess);
SplitEntry curSplitEntry = new SplitEntry(fid, splitIndex, lossGain);
curSplitEntry.leftGradStat = curLeftGradStat;
curSplitEntry.rightGradStat = curRightGradStat;
splitEntry.update(curSplitEntry);
}
}
return new GBDTGradHistGetRowResult(ResponseType.SUCCESS, splitEntry);
}
use of com.tencent.angel.ml.GBDT.algo.tree.SplitEntry in project angel by Tencent.
the class AfterSplitRunner method run.
@Override
public void run() {
int splitFeature = splitFeatureVec.get(nid);
float splitValue = (float) splitValueVec.get(nid);
float splitGain = (float) splitGainVec.get(nid);
float nodeSumGrad = (float) nodeGradStatsVec.get(nid);
float nodeSumHess = (float) nodeGradStatsVec.get(nid + this.controller.maxNodeNum);
LOG.info(String.format("Active node[%d]: split feature[%d] value[%f], lossChg[%f], sumGrad[%f], sumHess[%f]", nid, splitFeature, splitValue, splitGain, nodeSumGrad, nodeSumHess));
if (splitFeature != -1) {
// 5.1. set the children nodes of this node
this.controller.forest[this.controller.currentTree].nodes.get(nid).setLeftChild(2 * nid + 1);
this.controller.forest[this.controller.currentTree].nodes.get(nid).setRightChild(2 * nid + 2);
// 5.2. set split info and grad stats to this node
SplitEntry splitEntry = new SplitEntry(splitFeature, splitValue, splitGain);
this.controller.forest[this.controller.currentTree].stats.get(nid).setSplitEntry(splitEntry);
this.controller.forest[this.controller.currentTree].stats.get(nid).lossChg = splitGain;
this.controller.forest[this.controller.currentTree].stats.get(nid).setStats(nodeSumGrad, nodeSumHess);
// 5.2. create children nodes
TNode leftChild = new TNode(2 * nid + 1, nid, -1, -1);
TNode rightChild = new TNode(2 * nid + 2, nid, -1, -1);
this.controller.forest[this.controller.currentTree].nodes.set(2 * nid + 1, leftChild);
this.controller.forest[this.controller.currentTree].nodes.set(2 * nid + 2, rightChild);
// 5.3. create node stats for children nodes, and add them to the tree
RegTNodeStat leftChildStat = new RegTNodeStat(this.controller.param);
RegTNodeStat rightChildStat = new RegTNodeStat(this.controller.param);
float leftChildSumGrad = (float) nodeGradStatsVec.get(2 * nid + 1);
float rightChildSumGrad = (float) nodeGradStatsVec.get(2 * nid + 2);
float leftChildSumHess = (float) nodeGradStatsVec.get(2 * nid + 1 + this.controller.maxNodeNum);
float rightChildSumHess = (float) nodeGradStatsVec.get(2 * nid + 2 + this.controller.maxNodeNum);
leftChildStat.setStats(leftChildSumGrad, leftChildSumHess);
rightChildStat.setStats(rightChildSumGrad, rightChildSumHess);
this.controller.forest[this.controller.currentTree].stats.set(2 * nid + 1, leftChildStat);
this.controller.forest[this.controller.currentTree].stats.set(2 * nid + 2, rightChildStat);
// 5.4. reset instance position
this.controller.resetInsPos(nid, splitFeature, splitValue);
// 5.5. add new active nodes if possible, inc depth, otherwise finish this tree
if (this.controller.currentDepth < this.controller.param.maxDepth - 1) {
LOG.debug(String.format("Add children nodes of node[%d]:[%d][%d] to active nodes", nid, 2 * nid + 1, 2 * nid + 2));
this.controller.addActiveNode(2 * nid + 1);
this.controller.addActiveNode(2 * nid + 2);
} else {
// 5.6. set children nodes to leaf nodes
LOG.debug(String.format("Set children nodes of node[%d]:[%d][%d] to leaf nodes", nid, 2 * nid + 1, 2 * nid + 2));
this.controller.setNodeToLeaf(2 * nid + 1, leftChildStat.baseWeight);
this.controller.setNodeToLeaf(2 * nid + 2, rightChildStat.baseWeight);
}
} else {
// 5.7. set nid to leaf node
this.controller.setNodeToLeaf(nid, this.controller.param.calcWeight(nodeSumGrad, nodeSumHess));
}
// 5.8. deactivate active node
this.controller.resetActiveTNodes(nid);
this.controller.activeNodeStat[nid] = 0;
}
Aggregations