Search in sources :

Example 1 with C0DChunk

use of water.fvec.C0DChunk in project h2o-3 by h2oai.

the class ScoreBuildHistogram method map.

@Override
public void map(Chunk[] chks) {
    final Chunk wrks = chks[_workIdx];
    final Chunk nids = chks[_nidIdx];
    final Chunk weight = _weightIdx >= 0 ? chks[_weightIdx] : new C0DChunk(1, chks[0].len());
    // Pass 1: Score a prior partially-built tree model, and make new Node
    // assignments to every row.  This involves pulling out the current
    // assigned DecidedNode, "scoring" the row against that Node's decision
    // criteria, and assigning the row to a new child UndecidedNode (and
    // giving it an improved prediction).
    int[] nnids = new int[nids._len];
    if (// Prior pass exists?
    _leaf > 0)
        score_decide(chks, nids, nnids);
    else
        // Just flag all the NA rows
        for (int row = 0; row < nids._len; row++) {
            if (weight.atd(row) == 0)
                continue;
            if (isDecidedRow((int) nids.atd(row)))
                nnids[row] = DECIDED_ROW;
        }
    // Pass 2: accumulate all rows, cols into histograms
    //    if (_subset)
    //      accum_subset(chks,wrks,weight,nnids); //for debugging - simple code
    //    else
    //generally faster
    accum_all(chks, wrks, weight, nnids);
}
Also used : C0DChunk(water.fvec.C0DChunk) Chunk(water.fvec.Chunk) C0DChunk(water.fvec.C0DChunk)

Example 2 with C0DChunk

use of water.fvec.C0DChunk in project h2o-3 by h2oai.

the class TreeMeasuresCollector method map.

@Override
public void map(Chunk[] chks) {
    double[] data = new double[_ncols];
    double[] preds = new double[_nclasses + 1];
    Chunk cresp = _st.chk_resp(chks);
    Chunk weights = _st.hasWeightCol() ? _st.chk_weight(chks) : new C0DChunk(1, chks[0]._len);
    int nrows = cresp._len;
    // preallocate
    int[] oob = new int[2 + Math.round((1f - _rate) * nrows * 1.2f + 0.5f)];
    int[] soob = null;
    // Prepare output data
    _nrows = new double[_ntrees];
    _votes = _classification ? new double[_ntrees] : null;
    _sse = _classification ? null : new float[_ntrees];
    // seed for shuffling oob samples
    long seedForOob = ShuffleTask.seed(cresp.cidx());
    // Start iteration
    for (int tidx = 0; tidx < _ntrees; tidx++) {
        // tree
        // OOB RNG for this tree
        Random rng = rngForTree(_trees[tidx], cresp.cidx());
        // Collect oob rows and permutate them
        // reuse use the same array for sampling
        oob = ModelUtils.sampleOOBRows(nrows, _rate, rng, oob);
        // Get number of sample rows
        int oobcnt = oob[0];
        if (_var >= 0) {
            if (soob == null || soob.length < oobcnt)
                soob = new int[oobcnt];
            // Shuffle array and copy results into <code>soob</code>
            ArrayUtils.shuffleArray(oob, oobcnt, soob, seedForOob, 1);
        }
        for (int j = 1; j < 1 + oobcnt; j++) {
            int row = oob[j];
            double w = weights.atd(row);
            // we cannot deal with this row anyhow
            if (cresp.isNA(row))
                continue;
            if (w == 0)
                continue;
            // 1+i - one free is expected by prediction
            for (int i = 0; i < _ncols; i++) data[i] = chks[i].atd(row);
            // - permute variable
            if (_var >= 0)
                data[_var] = chks[_var].atd(soob[j - 1]);
            else
                assert soob == null;
            // - score data
            Arrays.fill(preds, 0);
            // - score only the tree
            score0(data, preds, _trees[tidx]);
            // - derive a prediction
            if (_classification) {
                int pred = getPrediction(preds, null, /*FIXME: should use model's _priorClassDistribution*/
                data, _threshold);
                int actu = (int) cresp.at8(row);
                // - collect only correct votes
                if (pred == actu)
                    _votes[tidx] += w;
            } else {
                /* regression */
                // Important!
                double pred = preds[0];
                double actu = cresp.atd(row);
                _sse[tidx] += (actu - pred) * (actu - pred);
            }
            // - collect rows which were used for voting
            _nrows[tidx] += w;
        //if (_var<0) System.err.println("VARIMP OOB row: " + (cresp._start+row) + " : " + Arrays.toString(data) + " tree/actu: " + pred + "/" + actu);
        }
    }
    // Clean-up
    _trees = null;
}
Also used : C0DChunk(water.fvec.C0DChunk) Random(java.util.Random) Chunk(water.fvec.Chunk) C0DChunk(water.fvec.C0DChunk)

Example 3 with C0DChunk

use of water.fvec.C0DChunk in project h2o-3 by h2oai.

the class VecUtils method numericToStringVec.

/**
   * Create a new {@link Vec} of string values from a numeric {@link Vec}.
   *
   * Currently only uses a default pretty printer. Would be better if
   * it accepted a format string PUBDEV-2211
   *
   * @param src a numeric {@link Vec}
   * @return a string {@link Vec}
   */
public static Vec numericToStringVec(Vec src) {
    if (src.isCategorical() || src.isUUID())
        throw new H2OIllegalValueException("Cannot convert a non-numeric column" + " using numericToStringVec() ", src);
    Vec res = new MRTask() {

        @Override
        public void map(Chunk chk, NewChunk newChk) {
            if (chk instanceof C0DChunk) {
                // all NAs
                for (int i = 0; i < chk._len; i++) newChk.addNA();
            } else {
                for (int i = 0; i < chk._len; i++) {
                    if (!chk.isNA(i))
                        newChk.addStr(PrettyPrint.number(chk, chk.atd(i), 4));
                    else
                        newChk.addNA();
                }
            }
        }
    }.doAll(Vec.T_STR, src).outputFrame().anyVec();
    assert res != null;
    return res;
}
Also used : C0DChunk(water.fvec.C0DChunk) Vec(water.fvec.Vec) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) C0DChunk(water.fvec.C0DChunk) H2OIllegalValueException(water.exceptions.H2OIllegalValueException) NewChunk(water.fvec.NewChunk)

Example 4 with C0DChunk

use of water.fvec.C0DChunk in project h2o-3 by h2oai.

the class ReconstructTreeState method map.

@Override
public void map(Chunk[] chks) {
    double[] data = new double[_ncols];
    double[] preds = new double[_nclass + 1];
    int ntrees = _trees.length;
    Chunk weight = _st.hasWeightCol() ? _st.chk_weight(chks) : new C0DChunk(1, chks[0]._len);
    Chunk oobt = _st.chk_oobt(chks);
    Chunk resp = _st.chk_resp(chks);
    for (int tidx = 0; tidx < ntrees; tidx++) {
        // tree
        // OOB RNG for this tree
        Random rng = rngForTree(_trees[tidx], oobt.cidx());
        for (int row = 0; row < oobt._len; row++) {
            double w = weight.atd(row);
            if (w == 0)
                continue;
            double y = resp.atd(row);
            if (Double.isNaN(y))
                continue;
            boolean rowIsOOB = _OOBEnabled && rng.nextFloat() >= _rate;
            if (!_OOBEnabled || rowIsOOB) {
                // Make a prediction
                for (int i = 0; i < _ncols; i++) data[i] = chks[i].atd(row);
                Arrays.fill(preds, 0);
                score0(data, preds, _trees[tidx]);
                // Only for regression, keep consistency
                if (_nclass == 1)
                    preds[1] = preds[0];
                // Write tree predictions
                for (int c = 0; c < _nclass; c++) {
                    // over all class
                    double prediction = preds[1 + c];
                    if (preds[1 + c] != 0) {
                        Chunk ctree = _st.chk_tree(chks, c);
                        double wcount = oobt.atd(row);
                        if (_OOBEnabled && _nclass >= 2)
                            //store avg prediction
                            ctree.set(row, (float) (ctree.atd(row) * wcount + prediction) / (wcount + w));
                        else
                            ctree.set(row, (float) (ctree.atd(row) + prediction));
                    }
                }
                // Mark oob row and store number of trees voting for this row
                if (rowIsOOB)
                    oobt.set(row, oobt.atd(row) + w);
            }
        }
    }
    _st = null;
}
Also used : C0DChunk(water.fvec.C0DChunk) Random(java.util.Random) Chunk(water.fvec.Chunk) C0DChunk(water.fvec.C0DChunk)

Example 5 with C0DChunk

use of water.fvec.C0DChunk in project h2o-3 by h2oai.

the class VecUtils method UUIDToStringVec.

/**
   * Create a new {@link Vec} of string values from a UUID {@link Vec}.
   *
   * String {@link Vec} is the standard hexadecimal representations of a UUID.
   *
   * @param src a UUID {@link Vec}
   * @return a string {@link Vec}
   */
public static Vec UUIDToStringVec(Vec src) {
    if (!src.isUUID())
        throw new H2OIllegalArgumentException("UUIDToStringVec() conversion only works on UUID columns");
    Vec res = new MRTask() {

        @Override
        public void map(Chunk chk, NewChunk newChk) {
            if (chk instanceof C0DChunk) {
                // all NAs
                for (int i = 0; i < chk._len; i++) newChk.addNA();
            } else {
                for (int i = 0; i < chk._len; i++) {
                    if (!chk.isNA(i))
                        newChk.addStr(PrettyPrint.UUID(chk.at16l(i), chk.at16h(i)));
                    else
                        newChk.addNA();
                }
            }
        }
    }.doAll(Vec.T_STR, src).outputFrame().anyVec();
    assert res != null;
    return res;
}
Also used : C0DChunk(water.fvec.C0DChunk) Vec(water.fvec.Vec) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) C0DChunk(water.fvec.C0DChunk) NewChunk(water.fvec.NewChunk)

Aggregations

C0DChunk (water.fvec.C0DChunk)5 Chunk (water.fvec.Chunk)5 Random (java.util.Random)2 NewChunk (water.fvec.NewChunk)2 Vec (water.fvec.Vec)2 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)1 H2OIllegalValueException (water.exceptions.H2OIllegalValueException)1