Search in sources :

Example 6 with RecursiveAction

use of jsr166y.RecursiveAction in project h2o-2 by h2oai.

the class DABuilder method inhaleData.

/** Build data adapter for given frame */
protected DataAdapter inhaleData(Frame fr, boolean useNonLocal) {
    Log.info("Prepping for data inhale.");
    long id = getChunkId(fr);
    if (id == -99999) {
        return null;
    }
    Timer t_inhale = new Timer();
    final SpeeDRFModel rfmodel = UKV.get(_rfModel);
    boolean[] _isByteCol = new boolean[fr.numCols()];
    long[] _naCnts = new long[fr.numCols()];
    for (int i = 0; i < _isByteCol.length; ++i) {
        _isByteCol[i] = DataAdapter.isByteCol(fr.vecs()[i], (int) fr.numRows(), i == _isByteCol.length - 1, rfmodel.regression);
        _naCnts[i] = fr.vecs()[i].naCnt();
    }
    // The model columns are dense packed - but there will be columns in the
    // data being ignored.  This is a map from the model's columns to the
    // building dataset's columns.
    final int[] modelDataMap = colMap(fr._names, rfmodel._names);
    final int totalRows = getRowCount(fr);
    final DataAdapter dapt = new DataAdapter(fr, rfmodel, modelDataMap, totalRows, getChunkId(fr), _rfParams.seed, _rfParams.bin_limit, _rfParams.class_weights);
    // Check that we have proper number of valid columns vs. features selected, if not cap.
    checkAndLimitFeatureUsedPerSplit(fr);
    // Collects jobs loading local chunks
    ArrayList<RecursiveAction> dataInhaleJobs = new ArrayList<RecursiveAction>();
    Log.info("\n\nTotal Number of Chunks: " + fr.anyVec().nChunks() + "\n\n");
    int cnter_local = 0;
    int cnter_remote = 0;
    for (int i = 0; i < fr.anyVec().nChunks(); ++i) {
        if (useNonLocal) {
            if (fr.anyVec().chunkKey(i).home()) {
                cnter_local++;
            } else {
                cnter_remote++;
            }
            dataInhaleJobs.add(loadChunkAction(dapt, fr, i, _isByteCol, _naCnts, rfmodel.regression));
        } else if (fr.anyVec().chunkKey(i).home()) {
            cnter_local++;
            dataInhaleJobs.add(loadChunkAction(dapt, fr, i, _isByteCol, _naCnts, rfmodel.regression));
        }
    }
    Log.info("\n\nTotal local  chunks to load: " + cnter_local + "\n\nTotal remote chunks to load:" + cnter_remote);
    SpeeDRF.DRFTask.updateRFModelStatus(_rfModel, "Inhaling Data.");
    Log.info(Log.Tag.Sys.RANDF, "Beginning Random Forest Inhale.");
    ForkJoinTask.invokeAll(dataInhaleJobs);
    if (dapt._jobKey != null && !Job.isRunning(dapt._jobKey))
        throw new Job.JobCancelledException();
    // Shrink data
    dapt.shrink();
    if (dapt._jobKey != null && !Job.isRunning(dapt._jobKey))
        throw new Job.JobCancelledException();
    Log.info(Log.Tag.Sys.RANDF, "Inhale done in " + t_inhale);
    return dapt;
}
Also used : ArrayList(java.util.ArrayList) RecursiveAction(jsr166y.RecursiveAction) Timer(water.Timer) Job(water.Job)

Aggregations

RecursiveAction (jsr166y.RecursiveAction)6 ArrayList (java.util.ArrayList)2 ForkJoinTask (jsr166y.ForkJoinTask)2 CholeskyDecomposition (Jama.CholeskyDecomposition)1 Matrix (Jama.Matrix)1 Job (water.Job)1 Timer (water.Timer)1 PrettyPrint (water.util.PrettyPrint)1