use of com.tencent.angel.ml.param.GBDTParam in project angel by Tencent.
the class GradHistHelper method findBestSplitOfOneFeatureHelper.
// find the best split result of one feature
public static SplitEntry findBestSplitOfOneFeatureHelper(int fid, TDoubleVector histogram, int startIdx) {
LOG.info(String.format("Find best split for fid[%d] in histogram size[%d], startIdx[%d]", fid, histogram.getDimension(), startIdx));
int splitNum = WorkerContext.get().getConf().getInt(MLConf.ML_GBDT_SPLIT_NUM(), MLConf.DEFAULT_ML_GBDT_SPLIT_NUM());
SplitEntry splitEntry = new SplitEntry();
// 1. set the feature id
// splitEntry.setFid(fid);
// 2. create the best left stats and right stats
GradStats bestLeftStat = new GradStats();
GradStats bestRightStat = new GradStats();
GradStats rootStats = calGradStats(histogram, startIdx, splitNum);
GBDTParam param = new GBDTParam();
if (startIdx + 2 * splitNum <= histogram.getDimension()) {
// 3. the gain of the root node
float rootGain = rootStats.calcGain(param);
LOG.info(String.format("Feature[%d]: sumGrad[%f], sumHess[%f], gain[%f]", fid, rootStats.sumGrad, rootStats.sumHess, rootGain));
// 4. create the temp left and right grad stats
GradStats leftStats = new GradStats();
GradStats rightStats = new GradStats();
// 5. loop over all the data in histogram
for (int histIdx = startIdx; histIdx < startIdx + splitNum - 1; histIdx++) {
// 5.1. get the grad and hess of current hist bin
float grad = (float) histogram.get(histIdx);
float hess = (float) histogram.get(splitNum + histIdx);
leftStats.add(grad, hess);
// 5.2. check whether we can split with current left hessian
if (leftStats.sumHess >= param.minChildWeight) {
// right = root - left
rightStats.setSubstract(rootStats, leftStats);
// 5.3. check whether we can split with current right hessian
if (rightStats.sumHess >= param.minChildWeight) {
// 5.4. calculate the current loss gain
float lossChg = leftStats.calcGain(param) + rightStats.calcGain(param) - rootGain;
// 5.5. check whether we should update the split result with current loss gain
int splitIdx = histIdx - startIdx + 1;
if (splitEntry.update(lossChg, fid, splitIdx)) {
// 5.6. if should update, also update the best left and right grad stats
bestLeftStat.update(leftStats.sumGrad, leftStats.sumHess);
bestRightStat.update(rightStats.sumGrad, rightStats.sumHess);
}
}
}
}
// 6. set the best left and right grad stats
splitEntry.leftGradStat = bestLeftStat;
splitEntry.rightGradStat = bestRightStat;
LOG.info(String.format("Find best split for fid[%d], split feature[%d]: split index[%f], lossChg[%f]", fid, splitEntry.fid, splitEntry.fvalue, splitEntry.lossChg));
} else {
LOG.error("index out of grad histogram size.");
}
return splitEntry;
}
use of com.tencent.angel.ml.param.GBDTParam in project angel by Tencent.
the class GBDTGradHistGetRowFunc method partitionGet.
@Override
public PartitionGetResult partitionGet(PartitionGetParam partParam) {
HistAggrParam.HistPartitionAggrParam param = (HistAggrParam.HistPartitionAggrParam) partParam;
LOG.info("For the gradient histogram of GBT, we use PS to find the optimal split");
GBDTParam gbtparam = new GBDTParam();
gbtparam.numSplit = param.getSplitNum();
gbtparam.minChildWeight = param.getMinChildWeight();
gbtparam.regAlpha = param.getRegAlpha();
gbtparam.regLambda = param.getRegLambda();
ServerDenseDoubleRow row = (ServerDenseDoubleRow) psContext.getMatrixStorageManager().getRow(param.getMatrixId(), param.getRowId(), param.getPartKey().getPartitionId());
SplitEntry splitEntry = GradHistHelper.findSplitOfServerRow(row, gbtparam);
int fid = splitEntry.getFid();
int splitIndex = (int) splitEntry.getFvalue();
double lossGain = splitEntry.getLossChg();
GradStats leftGradStat = splitEntry.leftGradStat;
GradStats rightGradStat = splitEntry.rightGradStat;
double leftSumGrad = leftGradStat.sumGrad;
double leftSumHess = leftGradStat.sumHess;
double rightSumGrad = rightGradStat.sumGrad;
double rightSumHess = rightGradStat.sumHess;
LOG.info(String.format("split of matrix[%d] part[%d] row[%d]: fid[%d], split index[%d], loss gain[%f], " + "left sumGrad[%f], left sum hess[%f], right sumGrad[%f], right sum hess[%f]", param.getMatrixId(), param.getPartKey().getPartitionId(), param.getRowId(), fid, splitIndex, lossGain, leftSumGrad, leftSumHess, rightSumGrad, rightSumHess));
int startFid = (int) row.getStartCol() / (2 * gbtparam.numSplit);
// int sendStartCol = startFid * 7; // each split contains 7 doubles
int sendStartCol = (int) row.getStartCol();
int sendEndCol = sendStartCol + 7;
ServerDenseDoubleRow sendRow = new ServerDenseDoubleRow(param.getRowId(), sendStartCol, sendEndCol);
LOG.info(String.format("Create server row of split result: row id[%d], start col[%d], end col[%d]", param.getRowId(), sendStartCol, sendEndCol));
sendRow.getData().put(0, fid);
sendRow.getData().put(1, splitIndex);
sendRow.getData().put(2, lossGain);
sendRow.getData().put(3, leftSumGrad);
sendRow.getData().put(4, leftSumHess);
sendRow.getData().put(5, rightSumGrad);
sendRow.getData().put(6, rightSumHess);
return new PartitionGetRowResult(sendRow);
}
Aggregations