use of hex.gbm.DTree.TreeModel.TreeStats in project h2o-2 by h2oai.
the class SpeeDRFModel method generateHTMLTreeStats.
public void generateHTMLTreeStats(StringBuilder sb, JsonObject trees) {
DocGen.HTML.section(sb, "Tree stats");
DocGen.HTML.arrayHead(sb);
sb.append("<tr><th> </th>").append("<th>Min</th><th>Mean</th><th>Max</th></tr>");
TreeStats treeStats = new TreeStats();
double[] depth_stats = stats(trees.get(Constants.TREE_DEPTH));
double[] leaf_stats = stats(trees.get(Constants.TREE_LEAVES));
sb.append("<tr><th>Depth</th>").append("<td>").append(depth_stats != null ? (int) depth_stats[0] : NA).append("</td>").append("<td>").append(depth_stats != null ? depth_stats[1] : NA).append("</td>").append("<td>").append(depth_stats != null ? (int) depth_stats[2] : NA).append("</td></tr>");
sb.append("<th>Leaves</th>").append("<td>").append(leaf_stats != null ? (int) leaf_stats[0] : NA).append("</td>").append("<td>").append(leaf_stats != null ? leaf_stats[1] : NA).append("</td>").append("<td>").append(leaf_stats != null ? (int) leaf_stats[2] : NA).append("</td></tr>");
DocGen.HTML.arrayTail(sb);
if (depth_stats != null && leaf_stats != null) {
treeStats.minDepth = (int) depth_stats[0];
treeStats.meanDepth = (float) depth_stats[1];
treeStats.maxDepth = (int) depth_stats[2];
treeStats.minLeaves = (int) leaf_stats[0];
treeStats.meanLeaves = (float) leaf_stats[1];
treeStats.maxLeaves = (int) leaf_stats[2];
treeStats.setNumTrees(N);
} else {
treeStats = null;
}
this.treeStats = treeStats;
}
use of hex.gbm.DTree.TreeModel.TreeStats in project h2o-2 by h2oai.
the class GBM method buildModel.
// ==========================================================================
// Compute a GBM tree.
// Start by splitting all the data according to some criteria (minimize
// variance at the leaves). Record on each row which split it goes to, and
// assign a split number to it (for next pass). On *this* pass, use the
// split-number to build a per-split histogram, with a per-histogram-bucket
// variance.
@Override
protected GBMModel buildModel(GBMModel model, final Frame fr, String[] names, String[][] domains, Timer t_build) {
// Build trees until we hit the limit
int tid;
// Trees
DTree[] ktrees = null;
TreeStats tstats = model.treeStats != null ? model.treeStats : new TreeStats();
for (tid = 0; tid < ntrees; tid++) {
// but on-the-fly computed data are used
if (tid != 0 || checkpoint == null) {
// do not make initial scoring if model already exist
model = doScoring(model, fr, ktrees, tid, tstats, false, false, false);
}
// ESL2, page 387
// Step 2a: Compute prediction (prob distribution) from prior tree results:
// Work <== f(Tree)
new ComputeProb().doAll(fr);
// ESL2, page 387
// Step 2b i: Compute residuals from the prediction (probability distribution)
// Work <== f(Work)
new ComputeRes().doAll(fr);
// ESL2, page 387, Step 2b ii, iii, iv
Timer kb_timer = new Timer();
ktrees = buildNextKTrees(fr);
Log.info(Sys.GBM__, (tid + 1) + ". tree was built in " + kb_timer.toString());
// If canceled during building, do not bulkscore
if (!Job.isRunning(self()))
break;
// Check latest predictions
tstats.updateBy(ktrees);
}
// Final scoring (skip if job was cancelled)
if (Job.isRunning(self())) {
model = doScoring(model, fr, ktrees, tid, tstats, true, false, false);
}
return model;
}
use of hex.gbm.DTree.TreeModel.TreeStats in project h2o-2 by h2oai.
the class DRF method buildModel.
@Override
protected DRFModel buildModel(DRFModel model, final Frame fr, String[] names, String[][] domains, final Timer t_build) {
// The RNG used to pick split columns
Random rand = createRNG(_seed);
// put random generator to the same state
for (int i = 0; i < _ntreesFromCheckpoint; i++) rand.nextLong();
int tid;
DTree[] ktrees = null;
// Prepare tree statistics
TreeStats tstats = model.treeStats != null ? model.treeStats : new TreeStats();
// Build trees until we hit the limit
for (tid = 0; tid < ntrees; tid++) {
// Building tid-tree
if (tid != 0 || checkpoint == null) {
// do not make initial scoring if model already exist
model = doScoring(model, fr, ktrees, tid, tstats, tid == 0, !hasValidation(), build_tree_one_node);
}
// At each iteration build K trees (K = nclass = response column domain size)
// TODO: parallelize more? build more than k trees at each time, we need to care about temporary data
// Idea: launch more DRF at once.
Timer kb_timer = new Timer();
ktrees = buildNextKTrees(fr, _mtry, sample_rate, rand, tid);
Log.info(logTag(), (tid + 1) + ". tree was built " + kb_timer.toString());
// If canceled during building, do not bulkscore
if (!Job.isRunning(self()))
break;
// Check latest predictions
tstats.updateBy(ktrees);
}
if (Job.isRunning(self())) {
// do not perform final scoring and finish
model = doScoring(model, fr, ktrees, tid, tstats, true, !hasValidation(), build_tree_one_node);
// Make sure that we did not miss any votes
// assert !importance || _treeMeasuresOnOOB.npredictors() == _treeMeasuresOnSOOB[0/*variable*/].npredictors() : "Missing some tree votes in variable importance voting?!";
}
return model;
}