Search in sources :

Example 11 with SplitEntry

use of com.tencent.angel.ml.GBDT.algo.tree.SplitEntry in project angel by Tencent.

the class GradHistHelper method findBestSplit.

// find the best split result of the histogram of a tree node
public SplitEntry findBestSplit(IntDoubleVector histogram) throws Exception {
    LOG.debug(String.format("------To find the best split of node[%d]------", this.nid));
    SplitEntry splitEntry = new SplitEntry();
    LOG.debug(String.format("The best split before looping the histogram: fid[%d], fvalue[%f]", splitEntry.fid, splitEntry.fvalue));
    // 1. calculate the gradStats of the root node
    GradStats rootStats = null;
    if (null != histogram) {
        rootStats = calGradStats(histogram);
        // 1.1. update the grad stats of the root node on PS, only called once by leader worker
        if (this.nid == 0) {
            this.controller.updateNodeGradStats(this.nid, rootStats);
        }
    } else {
        LOG.error("null histogram.");
    }
    // 2. loop over features
    if (null == rootStats) {
        LOG.error("null root stat.");
        return splitEntry;
    }
    for (int fid = 0; fid < this.controller.fSet.length; fid++) {
        // 2.1. get the ture feature id in the sampled feature set
        int trueFid = this.controller.fSet[fid];
        // 2.2. get the indexes of histogram of this feature
        int startIdx = 2 * this.controller.param.numSplit * fid;
        // 2.3. find the best split of current feature
        SplitEntry curSplit = findBestSplitOfOneFeature(trueFid, histogram, startIdx, rootStats);
        // 2.4. update the best split result if possible
        splitEntry.update(curSplit);
    }
    // update the grad stats of the root node on PS, only called once by leader worker
    if (this.nid == 0) {
        this.controller.updateNodeGradStats(this.nid, rootStats);
    }
    // 3. update the grad stats of children node
    if (splitEntry.fid != -1) {
        // 3.1. update the left child
        this.controller.updateNodeGradStats(2 * this.nid + 1, splitEntry.leftGradStat);
        // 3.2. update the right child
        this.controller.updateNodeGradStats(2 * this.nid + 2, splitEntry.rightGradStat);
    }
    LOG.debug(String.format("The best split after looping the histogram: fid[%d], fvalue[%f], loss gain[%f]", splitEntry.fid, splitEntry.fvalue, splitEntry.lossChg));
    return splitEntry;
}
Also used : SplitEntry(com.tencent.angel.ml.GBDT.algo.tree.SplitEntry)

Example 12 with SplitEntry

use of com.tencent.angel.ml.GBDT.algo.tree.SplitEntry in project angel by Tencent.

the class GradHistHelper method findBestFromServerSplit.

public SplitEntry findBestFromServerSplit(IntDoubleVector histogram) throws Exception {
    LOG.debug(String.format("------To find the best split of node[%d]------", this.nid));
    SplitEntry splitEntry = new SplitEntry();
    LOG.debug(String.format("The best split before looping the histogram: fid[%d], fvalue[%f]", splitEntry.fid, splitEntry.fvalue));
    // partition number
    int partitionNum = WorkerContext.get().getConf().getInt(AngelConf.ANGEL_PS_NUMBER, AngelConf.DEFAULT_ANGEL_PS_NUMBER);
    // cols of each partition
    int colPerPartition = histogram.getDim() / partitionNum;
    assert histogram.getDim() == partitionNum * colPerPartition;
    for (int pid = 0; pid < partitionNum; pid++) {
        int startIdx = pid * colPerPartition;
        int splitFid = (int) histogram.get(startIdx);
        if (splitFid == -1) {
            continue;
        }
        int trueSplitFid = this.controller.fSet[splitFid];
        int splitIdx = (int) histogram.get(startIdx + 1);
        float splitValue = this.controller.sketches[trueSplitFid * this.controller.param.numSplit + splitIdx];
        float lossChg = (float) histogram.get(startIdx + 2);
        float leftSumGrad = (float) histogram.get(startIdx + 3);
        float leftSumHess = (float) histogram.get(startIdx + 4);
        float rightSumGrad = (float) histogram.get(startIdx + 5);
        float rightSumHess = (float) histogram.get(startIdx + 6);
        LOG.debug(String.format("The best split of the %d-th partition: " + "split feature[%d], split index[%d], split value[%f], loss gain[%f], " + "left sumGrad[%f], left sumHess[%f], right sumGrad[%f], right sumHess[%f]", pid, trueSplitFid, splitIdx, splitValue, lossChg, leftSumGrad, leftSumHess, rightSumGrad, rightSumHess));
        GradStats curLeftGradStat = new GradStats(leftSumGrad, leftSumHess);
        GradStats curRightGradStat = new GradStats(rightSumGrad, rightSumHess);
        SplitEntry curSplitEntry = new SplitEntry(trueSplitFid, splitValue, lossChg);
        curSplitEntry.leftGradStat = curLeftGradStat;
        curSplitEntry.rightGradStat = curRightGradStat;
        splitEntry.update(curSplitEntry);
    }
    LOG.debug(String.format("The best split after looping the histogram: fid[%d], fvalue[%f], loss gain[%f]", splitEntry.fid, splitEntry.fvalue, splitEntry.lossChg));
    return splitEntry;
}
Also used : SplitEntry(com.tencent.angel.ml.GBDT.algo.tree.SplitEntry)

Example 13 with SplitEntry

use of com.tencent.angel.ml.GBDT.algo.tree.SplitEntry in project angel by Tencent.

the class GradHistHelper method findBestSplitOfOneFeature.

// find the best split result of one feature
public SplitEntry findBestSplitOfOneFeature(int fid, IntDoubleVector histogram, int startIdx, GradStats rootStats) {
    SplitEntry splitEntry = new SplitEntry();
    // 1. set the feature id
    splitEntry.setFid(fid);
    // 2. create the best left stats and right stats
    GradStats bestLeftStat = new GradStats();
    GradStats bestRightStat = new GradStats();
    if (startIdx + 2 * this.controller.param.numSplit <= histogram.getDim()) {
        // 3. the gain of the root node
        float rootGain = rootStats.calcGain(this.controller.param);
        // 4. create the temp left and right grad stats
        GradStats leftStats = new GradStats();
        GradStats rightStats = new GradStats();
        // 5. loop over all the data in histogram
        for (int histIdx = startIdx; histIdx < startIdx + this.controller.param.numSplit - 1; histIdx++) {
            // 5.1. get the grad and hess of current hist bin
            float grad = (float) histogram.get(histIdx);
            float hess = (float) histogram.get(this.controller.param.numSplit + histIdx);
            leftStats.add(grad, hess);
            // 5.2. check whether we can split with current left hessian
            if (leftStats.sumHess >= this.controller.param.minChildWeight) {
                // right = root - left
                rightStats.setSubstract(rootStats, leftStats);
                // 5.3. check whether we can split with current right hessian
                if (rightStats.sumHess >= this.controller.param.minChildWeight) {
                    // 5.4. calculate the current loss gain
                    float lossChg = leftStats.calcGain(this.controller.param) + rightStats.calcGain(this.controller.param) - rootGain;
                    // 5.5. check whether we should update the split result with current loss gain
                    // split value = sketches[splitIdx]
                    int splitIdx = fid * this.controller.param.numSplit + histIdx - startIdx;
                    if (splitEntry.update(lossChg, fid, this.controller.sketches[splitIdx])) {
                        // 5.6. if should update, also update the best left and right grad stats
                        bestLeftStat.update(leftStats.sumGrad, leftStats.sumHess);
                        bestRightStat.update(rightStats.sumGrad, rightStats.sumHess);
                    }
                }
            }
        }
        // 6. set the best left and right grad stats
        splitEntry.leftGradStat = bestLeftStat;
        splitEntry.rightGradStat = bestRightStat;
    } else {
        LOG.error("index out of grad histogram size.");
    }
    return splitEntry;
}
Also used : SplitEntry(com.tencent.angel.ml.GBDT.algo.tree.SplitEntry)

Example 14 with SplitEntry

use of com.tencent.angel.ml.GBDT.algo.tree.SplitEntry in project angel by Tencent.

the class GBDTGradHistGetRowFunc method partitionGet.

@Override
public PartitionGetResult partitionGet(PartitionGetParam partParam) {
    HistAggrParam.HistPartitionAggrParam param = (HistAggrParam.HistPartitionAggrParam) partParam;
    LOG.info("For the gradient histogram of GBDT, we use PS to find the optimal split");
    GBDTParam gbtparam = new GBDTParam();
    gbtparam.numSplit = param.getSplitNum();
    gbtparam.minChildWeight = param.getMinChildWeight();
    gbtparam.regAlpha = param.getRegAlpha();
    gbtparam.regLambda = param.getRegLambda();
    ServerIntDoubleRow row = (ServerIntDoubleRow) psContext.getMatrixStorageManager().getRow(param.getMatrixId(), param.getRowId(), param.getPartKey().getPartitionId());
    SplitEntry splitEntry = GradHistHelper.findSplitOfServerRow(row, gbtparam);
    int fid = splitEntry.getFid();
    int splitIndex = (int) splitEntry.getFvalue();
    double lossGain = splitEntry.getLossChg();
    GradStats leftGradStat = splitEntry.leftGradStat;
    GradStats rightGradStat = splitEntry.rightGradStat;
    double leftSumGrad = leftGradStat.sumGrad;
    double leftSumHess = leftGradStat.sumHess;
    double rightSumGrad = rightGradStat.sumGrad;
    double rightSumHess = rightGradStat.sumHess;
    LOG.info(String.format("split of matrix[%d] part[%d] row[%d]: fid[%d], split index[%d], loss gain[%f], " + "left sumGrad[%f], left sum hess[%f], right sumGrad[%f], right sum hess[%f]", param.getMatrixId(), param.getPartKey().getPartitionId(), param.getRowId(), fid, splitIndex, lossGain, leftSumGrad, leftSumHess, rightSumGrad, rightSumHess));
    int startFid = (int) row.getStartCol() / (2 * gbtparam.numSplit);
    // each split contains 7 doubles
    int sendStartCol = startFid * 7;
    // int sendStartCol = (int) row.getStartCol();
    int sendEndCol = sendStartCol + 7;
    ServerIntDoubleRow sendRow = new ServerIntDoubleRow(param.getRowId(), RowType.T_DOUBLE_DENSE, sendStartCol, sendEndCol, sendEndCol - sendStartCol, RouterType.RANGE);
    LOG.info(String.format("Create server row of split result: row id[%d], start col[%d], end col[%d]", param.getRowId(), sendStartCol, sendEndCol));
    sendRow.set(0 + sendStartCol, fid);
    sendRow.set(1 + sendStartCol, splitIndex);
    sendRow.set(2 + sendStartCol, lossGain);
    sendRow.set(3 + sendStartCol, leftSumGrad);
    sendRow.set(4 + sendStartCol, leftSumHess);
    sendRow.set(5 + sendStartCol, rightSumGrad);
    sendRow.set(6 + sendStartCol, rightSumHess);
    return new PartitionGetRowResult(sendRow);
}
Also used : ServerIntDoubleRow(com.tencent.angel.ps.storage.vector.ServerIntDoubleRow) SplitEntry(com.tencent.angel.ml.GBDT.algo.tree.SplitEntry) GradStats(com.tencent.angel.ml.GBDT.algo.RegTree.GradStats) PartitionGetRowResult(com.tencent.angel.ml.matrix.psf.get.getrow.PartitionGetRowResult) GBDTParam(com.tencent.angel.ml.GBDT.param.GBDTParam)

Example 15 with SplitEntry

use of com.tencent.angel.ml.GBDT.algo.tree.SplitEntry in project angel by Tencent.

the class AfterSplitRunner method run.

@Override
public void run() {
    int splitFeature = splitFeatureVec.get(nid);
    float splitValue = (float) splitValueVec.get(nid);
    float splitGain = (float) splitGainVec.get(nid);
    float nodeSumGrad = (float) nodeGradStatsVec.get(nid);
    float nodeSumHess = (float) nodeGradStatsVec.get(nid + this.controller.maxNodeNum);
    LOG.info(String.format("Active node[%d]: split feature[%d] value[%f], lossChg[%f], sumGrad[%f], sumHess[%f]", nid, splitFeature, splitValue, splitGain, nodeSumGrad, nodeSumHess));
    if (splitFeature != -1) {
        // 5.1. set the children nodes of this node
        this.controller.forest[this.controller.currentTree].nodes.get(nid).setLeftChild(2 * nid + 1);
        this.controller.forest[this.controller.currentTree].nodes.get(nid).setRightChild(2 * nid + 2);
        // 5.2. set split info and grad stats to this node
        SplitEntry splitEntry = new SplitEntry(splitFeature, splitValue, splitGain);
        this.controller.forest[this.controller.currentTree].stats.get(nid).setSplitEntry(splitEntry);
        this.controller.forest[this.controller.currentTree].stats.get(nid).lossChg = splitGain;
        this.controller.forest[this.controller.currentTree].stats.get(nid).setStats(nodeSumGrad, nodeSumHess);
        // 5.2. create children nodes
        TNode leftChild = new TNode(2 * nid + 1, nid, -1, -1);
        TNode rightChild = new TNode(2 * nid + 2, nid, -1, -1);
        this.controller.forest[this.controller.currentTree].nodes.set(2 * nid + 1, leftChild);
        this.controller.forest[this.controller.currentTree].nodes.set(2 * nid + 2, rightChild);
        // 5.3. create node stats for children nodes, and add them to the tree
        RegTNodeStat leftChildStat = new RegTNodeStat(this.controller.param);
        RegTNodeStat rightChildStat = new RegTNodeStat(this.controller.param);
        float leftChildSumGrad = (float) nodeGradStatsVec.get(2 * nid + 1);
        float rightChildSumGrad = (float) nodeGradStatsVec.get(2 * nid + 2);
        float leftChildSumHess = (float) nodeGradStatsVec.get(2 * nid + 1 + this.controller.maxNodeNum);
        float rightChildSumHess = (float) nodeGradStatsVec.get(2 * nid + 2 + this.controller.maxNodeNum);
        leftChildStat.setStats(leftChildSumGrad, leftChildSumHess);
        rightChildStat.setStats(rightChildSumGrad, rightChildSumHess);
        this.controller.forest[this.controller.currentTree].stats.set(2 * nid + 1, leftChildStat);
        this.controller.forest[this.controller.currentTree].stats.set(2 * nid + 2, rightChildStat);
        // 5.4. reset instance position
        this.controller.resetInsPos(nid, splitFeature, splitValue);
        // 5.5. add new active nodes if possible, inc depth, otherwise finish this tree
        if (this.controller.currentDepth < this.controller.param.maxDepth - 1) {
            LOG.debug(String.format("Add children nodes of node[%d]:[%d][%d] to active nodes", nid, 2 * nid + 1, 2 * nid + 2));
            this.controller.addActiveNode(2 * nid + 1);
            this.controller.addActiveNode(2 * nid + 2);
        } else {
            // 5.6. set children nodes to leaf nodes
            LOG.debug(String.format("Set children nodes of node[%d]:[%d][%d] to leaf nodes", nid, 2 * nid + 1, 2 * nid + 2));
            this.controller.setNodeToLeaf(2 * nid + 1, leftChildStat.baseWeight);
            this.controller.setNodeToLeaf(2 * nid + 2, rightChildStat.baseWeight);
        }
    } else {
        // 5.7. set nid to leaf node
        this.controller.setNodeToLeaf(nid, this.controller.param.calcWeight(nodeSumGrad, nodeSumHess));
    }
    // 5.8. deactivate active node
    this.controller.resetActiveTNodes(nid);
    this.controller.activeNodeStat[nid] = 0;
}
Also used : TNode(com.tencent.angel.ml.GBDT.algo.tree.TNode) SplitEntry(com.tencent.angel.ml.GBDT.algo.tree.SplitEntry) RegTNodeStat(com.tencent.angel.ml.GBDT.algo.RegTree.RegTNodeStat)

Aggregations

SplitEntry (com.tencent.angel.ml.GBDT.algo.tree.SplitEntry)19 GradStats (com.tencent.angel.ml.GBDT.algo.RegTree.GradStats)2 RegTNodeStat (com.tencent.angel.ml.GBDT.algo.RegTree.RegTNodeStat)2 TNode (com.tencent.angel.ml.GBDT.algo.tree.TNode)2 GBDTParam (com.tencent.angel.ml.GBDT.param.GBDTParam)2 ServerIntDoubleRow (com.tencent.angel.ps.storage.vector.ServerIntDoubleRow)2 GBDTGradHistGetRowFunc (com.tencent.angel.ml.GBDT.psf.GBDTGradHistGetRowFunc)1 HistAggrParam (com.tencent.angel.ml.GBDT.psf.HistAggrParam)1 IntDoubleDenseVectorStorage (com.tencent.angel.ml.math2.storage.IntDoubleDenseVectorStorage)1 IntIntDenseVectorStorage (com.tencent.angel.ml.math2.storage.IntIntDenseVectorStorage)1 IntDoubleVector (com.tencent.angel.ml.math2.vector.IntDoubleVector)1 IntIntVector (com.tencent.angel.ml.math2.vector.IntIntVector)1 PartitionGetRowResult (com.tencent.angel.ml.matrix.psf.get.getrow.PartitionGetRowResult)1 PSModel (com.tencent.angel.ml.model.PSModel)1 GBDTParam (com.tencent.angel.ml.param.GBDTParam)1 ServerRow (com.tencent.angel.ps.storage.vector.ServerRow)1 ArrayList (java.util.ArrayList)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1