use of water.Timer in project h2o-2 by h2oai.
the class SpeeDRF method build.
/** Build random forest for data stored on this node. */
public static void build(final Key jobKey, final Key modelKey, final DRFParams drfParams, final Data localData, int ntrees, int numSplitFeatures, int[] rowsPerChunks) {
Timer t_alltrees = new Timer();
Tree[] trees = new Tree[ntrees];
Log.info(Log.Tag.Sys.RANDF, "Building " + ntrees + " trees");
Log.info(Log.Tag.Sys.RANDF, "Number of split features: " + numSplitFeatures);
Log.info(Log.Tag.Sys.RANDF, "Starting RF computation with " + localData.rows() + " rows ");
Random rnd = Utils.getRNG(localData.seed() + ROOT_SEED_ADD);
Sampling sampler = createSampler(drfParams, rowsPerChunks);
byte producerId = (byte) H2O.SELF.index();
for (int i = 0; i < ntrees; ++i) {
// make sure that enough bits is initialized
long treeSeed = rnd.nextLong() + TREE_SEED_INIT;
trees[i] = new Tree(jobKey, modelKey, localData, producerId, drfParams.max_depth, drfParams.stat_type, numSplitFeatures, treeSeed, i, drfParams._exclusiveSplitLimit, sampler, drfParams._verbose, drfParams.regression, !drfParams._useNonLocalData, ((SpeeDRFModel) UKV.get(modelKey)).score_pojo);
}
Log.info("Invoking the tree build tasks on all nodes.");
DRemoteTask.invokeAll(trees);
Log.info(Log.Tag.Sys.RANDF, "All trees (" + ntrees + ") done in " + t_alltrees);
}
use of water.Timer in project h2o-2 by h2oai.
the class DABuilder method inhaleData.
/** Build data adapter for given frame */
protected DataAdapter inhaleData(Frame fr, boolean useNonLocal) {
Log.info("Prepping for data inhale.");
long id = getChunkId(fr);
if (id == -99999) {
return null;
}
Timer t_inhale = new Timer();
final SpeeDRFModel rfmodel = UKV.get(_rfModel);
boolean[] _isByteCol = new boolean[fr.numCols()];
long[] _naCnts = new long[fr.numCols()];
for (int i = 0; i < _isByteCol.length; ++i) {
_isByteCol[i] = DataAdapter.isByteCol(fr.vecs()[i], (int) fr.numRows(), i == _isByteCol.length - 1, rfmodel.regression);
_naCnts[i] = fr.vecs()[i].naCnt();
}
// The model columns are dense packed - but there will be columns in the
// data being ignored. This is a map from the model's columns to the
// building dataset's columns.
final int[] modelDataMap = colMap(fr._names, rfmodel._names);
final int totalRows = getRowCount(fr);
final DataAdapter dapt = new DataAdapter(fr, rfmodel, modelDataMap, totalRows, getChunkId(fr), _rfParams.seed, _rfParams.bin_limit, _rfParams.class_weights);
// Check that we have proper number of valid columns vs. features selected, if not cap.
checkAndLimitFeatureUsedPerSplit(fr);
// Collects jobs loading local chunks
ArrayList<RecursiveAction> dataInhaleJobs = new ArrayList<RecursiveAction>();
Log.info("\n\nTotal Number of Chunks: " + fr.anyVec().nChunks() + "\n\n");
int cnter_local = 0;
int cnter_remote = 0;
for (int i = 0; i < fr.anyVec().nChunks(); ++i) {
if (useNonLocal) {
if (fr.anyVec().chunkKey(i).home()) {
cnter_local++;
} else {
cnter_remote++;
}
dataInhaleJobs.add(loadChunkAction(dapt, fr, i, _isByteCol, _naCnts, rfmodel.regression));
} else if (fr.anyVec().chunkKey(i).home()) {
cnter_local++;
dataInhaleJobs.add(loadChunkAction(dapt, fr, i, _isByteCol, _naCnts, rfmodel.regression));
}
}
Log.info("\n\nTotal local chunks to load: " + cnter_local + "\n\nTotal remote chunks to load:" + cnter_remote);
SpeeDRF.DRFTask.updateRFModelStatus(_rfModel, "Inhaling Data.");
Log.info(Log.Tag.Sys.RANDF, "Beginning Random Forest Inhale.");
ForkJoinTask.invokeAll(dataInhaleJobs);
if (dapt._jobKey != null && !Job.isRunning(dapt._jobKey))
throw new Job.JobCancelledException();
// Shrink data
dapt.shrink();
if (dapt._jobKey != null && !Job.isRunning(dapt._jobKey))
throw new Job.JobCancelledException();
Log.info(Log.Tag.Sys.RANDF, "Inhale done in " + t_inhale);
return dapt;
}
use of water.Timer in project h2o-2 by h2oai.
the class SpeeDRF method buildForest.
private void buildForest() {
logStart();
SpeeDRFModel model = null;
try {
Frame train = setTrain();
Frame test = setTest();
Vec resp = regression ? null : train.lastVec().toEnum();
if (resp != null)
gtrash(resp);
float[] priorDist = setPriorDist(train);
train = setStrat(train, test, resp);
model = initModel(train, test, priorDist);
model.start_training(null);
model.write_lock(self());
drfParams = DRFParams.create(train.find(resp), model.N, model.max_depth, (int) train.numRows(), model.nbins, model.statType, use_seed, model.weights, mtries, model.sampling_strategy, (float) sample_rate, model.strata_samples, model.verbose ? 100 : 1, _exclusiveSplitLimit, true, regression);
DRFTask tsk = new DRFTask(self(), train, drfParams, model._key, model.src_key);
tsk.validateInputData(train);
tsk.invokeOnAllNodes();
Log.info("Tree building complete. Scoring...");
model = UKV.get(dest());
model.scoreAllTrees(test == null ? train : test, resp);
// Launch a Variable Importance Task
if (importance && !regression) {
Log.info("Scoring complete. Performing Variable Importance Calculations.");
model.current_status = "Performing Variable Importance Calculation.";
Timer VITimer = new Timer();
model.variableImportanceCalc(train, resp);
Log.info("Variable Importance on " + (train.numCols() - 1) + " variables and " + ntrees + " trees done in " + VITimer);
}
Log.info("Generating Tree Stats");
JsonObject trees = new JsonObject();
trees.addProperty(Constants.TREE_COUNT, model.size());
if (model.size() > 0) {
trees.add(Constants.TREE_DEPTH, model.depth().toJson());
trees.add(Constants.TREE_LEAVES, model.leaves().toJson());
}
model.generateHTMLTreeStats(new StringBuilder(), trees);
model.current_status = "Model Complete";
} finally {
if (model != null) {
model.unlock(self());
model.stop_training();
}
}
}
Aggregations