Search in sources :

Example 1 with Timer

use of water.Timer in project h2o-2 by h2oai.

the class SpeeDRF method build.

/** Build random forest for data stored on this node. */
public static void build(final Key jobKey, final Key modelKey, final DRFParams drfParams, final Data localData, int ntrees, int numSplitFeatures, int[] rowsPerChunks) {
    Timer t_alltrees = new Timer();
    Tree[] trees = new Tree[ntrees];
    Log.info(Log.Tag.Sys.RANDF, "Building " + ntrees + " trees");
    Log.info(Log.Tag.Sys.RANDF, "Number of split features: " + numSplitFeatures);
    Log.info(Log.Tag.Sys.RANDF, "Starting RF computation with " + localData.rows() + " rows ");
    Random rnd = Utils.getRNG(localData.seed() + ROOT_SEED_ADD);
    Sampling sampler = createSampler(drfParams, rowsPerChunks);
    byte producerId = (byte) H2O.SELF.index();
    for (int i = 0; i < ntrees; ++i) {
        // make sure that enough bits is initialized
        long treeSeed = rnd.nextLong() + TREE_SEED_INIT;
        trees[i] = new Tree(jobKey, modelKey, localData, producerId, drfParams.max_depth, drfParams.stat_type, numSplitFeatures, treeSeed, i, drfParams._exclusiveSplitLimit, sampler, drfParams._verbose, drfParams.regression, !drfParams._useNonLocalData, ((SpeeDRFModel) UKV.get(modelKey)).score_pojo);
    }
    Log.info("Invoking the tree build tasks on all nodes.");
    DRemoteTask.invokeAll(trees);
    Log.info(Log.Tag.Sys.RANDF, "All trees (" + ntrees + ") done in " + t_alltrees);
}
Also used : Timer(water.Timer)

Example 2 with Timer

use of water.Timer in project h2o-2 by h2oai.

the class DABuilder method inhaleData.

/** Build data adapter for given frame */
protected DataAdapter inhaleData(Frame fr, boolean useNonLocal) {
    Log.info("Prepping for data inhale.");
    long id = getChunkId(fr);
    if (id == -99999) {
        return null;
    }
    Timer t_inhale = new Timer();
    final SpeeDRFModel rfmodel = UKV.get(_rfModel);
    boolean[] _isByteCol = new boolean[fr.numCols()];
    long[] _naCnts = new long[fr.numCols()];
    for (int i = 0; i < _isByteCol.length; ++i) {
        _isByteCol[i] = DataAdapter.isByteCol(fr.vecs()[i], (int) fr.numRows(), i == _isByteCol.length - 1, rfmodel.regression);
        _naCnts[i] = fr.vecs()[i].naCnt();
    }
    // The model columns are dense packed - but there will be columns in the
    // data being ignored.  This is a map from the model's columns to the
    // building dataset's columns.
    final int[] modelDataMap = colMap(fr._names, rfmodel._names);
    final int totalRows = getRowCount(fr);
    final DataAdapter dapt = new DataAdapter(fr, rfmodel, modelDataMap, totalRows, getChunkId(fr), _rfParams.seed, _rfParams.bin_limit, _rfParams.class_weights);
    // Check that we have proper number of valid columns vs. features selected, if not cap.
    checkAndLimitFeatureUsedPerSplit(fr);
    // Collects jobs loading local chunks
    ArrayList<RecursiveAction> dataInhaleJobs = new ArrayList<RecursiveAction>();
    Log.info("\n\nTotal Number of Chunks: " + fr.anyVec().nChunks() + "\n\n");
    int cnter_local = 0;
    int cnter_remote = 0;
    for (int i = 0; i < fr.anyVec().nChunks(); ++i) {
        if (useNonLocal) {
            if (fr.anyVec().chunkKey(i).home()) {
                cnter_local++;
            } else {
                cnter_remote++;
            }
            dataInhaleJobs.add(loadChunkAction(dapt, fr, i, _isByteCol, _naCnts, rfmodel.regression));
        } else if (fr.anyVec().chunkKey(i).home()) {
            cnter_local++;
            dataInhaleJobs.add(loadChunkAction(dapt, fr, i, _isByteCol, _naCnts, rfmodel.regression));
        }
    }
    Log.info("\n\nTotal local  chunks to load: " + cnter_local + "\n\nTotal remote chunks to load:" + cnter_remote);
    SpeeDRF.DRFTask.updateRFModelStatus(_rfModel, "Inhaling Data.");
    Log.info(Log.Tag.Sys.RANDF, "Beginning Random Forest Inhale.");
    ForkJoinTask.invokeAll(dataInhaleJobs);
    if (dapt._jobKey != null && !Job.isRunning(dapt._jobKey))
        throw new Job.JobCancelledException();
    // Shrink data
    dapt.shrink();
    if (dapt._jobKey != null && !Job.isRunning(dapt._jobKey))
        throw new Job.JobCancelledException();
    Log.info(Log.Tag.Sys.RANDF, "Inhale done in " + t_inhale);
    return dapt;
}
Also used : ArrayList(java.util.ArrayList) RecursiveAction(jsr166y.RecursiveAction) Timer(water.Timer) Job(water.Job)

Example 3 with Timer

use of water.Timer in project h2o-2 by h2oai.

the class SpeeDRF method buildForest.

private void buildForest() {
    logStart();
    SpeeDRFModel model = null;
    try {
        Frame train = setTrain();
        Frame test = setTest();
        Vec resp = regression ? null : train.lastVec().toEnum();
        if (resp != null)
            gtrash(resp);
        float[] priorDist = setPriorDist(train);
        train = setStrat(train, test, resp);
        model = initModel(train, test, priorDist);
        model.start_training(null);
        model.write_lock(self());
        drfParams = DRFParams.create(train.find(resp), model.N, model.max_depth, (int) train.numRows(), model.nbins, model.statType, use_seed, model.weights, mtries, model.sampling_strategy, (float) sample_rate, model.strata_samples, model.verbose ? 100 : 1, _exclusiveSplitLimit, true, regression);
        DRFTask tsk = new DRFTask(self(), train, drfParams, model._key, model.src_key);
        tsk.validateInputData(train);
        tsk.invokeOnAllNodes();
        Log.info("Tree building complete. Scoring...");
        model = UKV.get(dest());
        model.scoreAllTrees(test == null ? train : test, resp);
        // Launch a Variable Importance Task
        if (importance && !regression) {
            Log.info("Scoring complete. Performing Variable Importance Calculations.");
            model.current_status = "Performing Variable Importance Calculation.";
            Timer VITimer = new Timer();
            model.variableImportanceCalc(train, resp);
            Log.info("Variable Importance on " + (train.numCols() - 1) + " variables and " + ntrees + " trees done in " + VITimer);
        }
        Log.info("Generating Tree Stats");
        JsonObject trees = new JsonObject();
        trees.addProperty(Constants.TREE_COUNT, model.size());
        if (model.size() > 0) {
            trees.add(Constants.TREE_DEPTH, model.depth().toJson());
            trees.add(Constants.TREE_LEAVES, model.leaves().toJson());
        }
        model.generateHTMLTreeStats(new StringBuilder(), trees);
        model.current_status = "Model Complete";
    } finally {
        if (model != null) {
            model.unlock(self());
            model.stop_training();
        }
    }
}
Also used : Frame(water.fvec.Frame) Timer(water.Timer) Vec(water.fvec.Vec) JsonObject(dontweave.gson.JsonObject)

Aggregations

Timer (water.Timer)3 JsonObject (dontweave.gson.JsonObject)1 ArrayList (java.util.ArrayList)1 RecursiveAction (jsr166y.RecursiveAction)1 Job (water.Job)1 Frame (water.fvec.Frame)1 Vec (water.fvec.Vec)1