Search in sources :

Example 1 with TreeStats

use of hex.gbm.DTree.TreeModel.TreeStats in project h2o-2 by h2oai.

the class SpeeDRFModel method generateHTMLTreeStats.

public void generateHTMLTreeStats(StringBuilder sb, JsonObject trees) {
    DocGen.HTML.section(sb, "Tree stats");
    DocGen.HTML.arrayHead(sb);
    sb.append("<tr><th>&nbsp;</th>").append("<th>Min</th><th>Mean</th><th>Max</th></tr>");
    TreeStats treeStats = new TreeStats();
    double[] depth_stats = stats(trees.get(Constants.TREE_DEPTH));
    double[] leaf_stats = stats(trees.get(Constants.TREE_LEAVES));
    sb.append("<tr><th>Depth</th>").append("<td>").append(depth_stats != null ? (int) depth_stats[0] : NA).append("</td>").append("<td>").append(depth_stats != null ? depth_stats[1] : NA).append("</td>").append("<td>").append(depth_stats != null ? (int) depth_stats[2] : NA).append("</td></tr>");
    sb.append("<th>Leaves</th>").append("<td>").append(leaf_stats != null ? (int) leaf_stats[0] : NA).append("</td>").append("<td>").append(leaf_stats != null ? leaf_stats[1] : NA).append("</td>").append("<td>").append(leaf_stats != null ? (int) leaf_stats[2] : NA).append("</td></tr>");
    DocGen.HTML.arrayTail(sb);
    if (depth_stats != null && leaf_stats != null) {
        treeStats.minDepth = (int) depth_stats[0];
        treeStats.meanDepth = (float) depth_stats[1];
        treeStats.maxDepth = (int) depth_stats[2];
        treeStats.minLeaves = (int) leaf_stats[0];
        treeStats.meanLeaves = (float) leaf_stats[1];
        treeStats.maxLeaves = (int) leaf_stats[2];
        treeStats.setNumTrees(N);
    } else {
        treeStats = null;
    }
    this.treeStats = treeStats;
}
Also used : TreeStats(hex.gbm.DTree.TreeModel.TreeStats)

Example 2 with TreeStats

use of hex.gbm.DTree.TreeModel.TreeStats in project h2o-2 by h2oai.

the class GBM method buildModel.

// ==========================================================================
// Compute a GBM tree.
// Start by splitting all the data according to some criteria (minimize
// variance at the leaves).  Record on each row which split it goes to, and
// assign a split number to it (for next pass).  On *this* pass, use the
// split-number to build a per-split histogram, with a per-histogram-bucket
// variance.
@Override
protected GBMModel buildModel(GBMModel model, final Frame fr, String[] names, String[][] domains, Timer t_build) {
    // Build trees until we hit the limit
    int tid;
    // Trees
    DTree[] ktrees = null;
    TreeStats tstats = model.treeStats != null ? model.treeStats : new TreeStats();
    for (tid = 0; tid < ntrees; tid++) {
        // but on-the-fly computed data are used
        if (tid != 0 || checkpoint == null) {
            // do not make initial scoring if model already exist
            model = doScoring(model, fr, ktrees, tid, tstats, false, false, false);
        }
        // ESL2, page 387
        // Step 2a: Compute prediction (prob distribution) from prior tree results:
        //   Work <== f(Tree)
        new ComputeProb().doAll(fr);
        // ESL2, page 387
        // Step 2b i: Compute residuals from the prediction (probability distribution)
        //   Work <== f(Work)
        new ComputeRes().doAll(fr);
        // ESL2, page 387, Step 2b ii, iii, iv
        Timer kb_timer = new Timer();
        ktrees = buildNextKTrees(fr);
        Log.info(Sys.GBM__, (tid + 1) + ". tree was built in " + kb_timer.toString());
        // If canceled during building, do not bulkscore
        if (!Job.isRunning(self()))
            break;
        // Check latest predictions
        tstats.updateBy(ktrees);
    }
    // Final scoring (skip if job was cancelled)
    if (Job.isRunning(self())) {
        model = doScoring(model, fr, ktrees, tid, tstats, true, false, false);
    }
    return model;
}
Also used : TreeStats(hex.gbm.DTree.TreeModel.TreeStats)

Example 3 with TreeStats

use of hex.gbm.DTree.TreeModel.TreeStats in project h2o-2 by h2oai.

the class DRF method buildModel.

@Override
protected DRFModel buildModel(DRFModel model, final Frame fr, String[] names, String[][] domains, final Timer t_build) {
    // The RNG used to pick split columns
    Random rand = createRNG(_seed);
    // put random generator to the same state
    for (int i = 0; i < _ntreesFromCheckpoint; i++) rand.nextLong();
    int tid;
    DTree[] ktrees = null;
    // Prepare tree statistics
    TreeStats tstats = model.treeStats != null ? model.treeStats : new TreeStats();
    // Build trees until we hit the limit
    for (tid = 0; tid < ntrees; tid++) {
        // Building tid-tree
        if (tid != 0 || checkpoint == null) {
            // do not make initial scoring if model already exist
            model = doScoring(model, fr, ktrees, tid, tstats, tid == 0, !hasValidation(), build_tree_one_node);
        }
        // At each iteration build K trees (K = nclass = response column domain size)
        // TODO: parallelize more? build more than k trees at each time, we need to care about temporary data
        // Idea: launch more DRF at once.
        Timer kb_timer = new Timer();
        ktrees = buildNextKTrees(fr, _mtry, sample_rate, rand, tid);
        Log.info(logTag(), (tid + 1) + ". tree was built " + kb_timer.toString());
        // If canceled during building, do not bulkscore
        if (!Job.isRunning(self()))
            break;
        // Check latest predictions
        tstats.updateBy(ktrees);
    }
    if (Job.isRunning(self())) {
        // do not perform final scoring and finish
        model = doScoring(model, fr, ktrees, tid, tstats, true, !hasValidation(), build_tree_one_node);
    // Make sure that we did not miss any votes
    //      assert !importance || _treeMeasuresOnOOB.npredictors() == _treeMeasuresOnSOOB[0/*variable*/].npredictors() : "Missing some tree votes in variable importance voting?!";
    }
    return model;
}
Also used : TreeStats(hex.gbm.DTree.TreeModel.TreeStats) Random(java.util.Random) DTree(hex.gbm.DTree)

Aggregations

TreeStats (hex.gbm.DTree.TreeModel.TreeStats)3 DTree (hex.gbm.DTree)1 Random (java.util.Random)1