Search in sources :

Example 56 with Chunk

use of water.fvec.Chunk in project h2o-2 by h2oai.

the class DeepLearningModel method scoreAutoEncoder.

/**
   * Score auto-encoded reconstruction (on-the-fly, without allocating the reconstruction as done in Frame score(Frame fr))
   * @param frame Original data (can contain response, will be ignored)
   * @return Frame containing one Vec with reconstruction error (MSE) of each reconstructed row, caller is responsible for deletion
   */
public Frame scoreAutoEncoder(Frame frame) {
    final int len = _names.length;
    // Adapt the Frame layout - returns adapted frame and frame containing only
    // newly created vectors
    Frame[] adaptFrms = adapt(frame, false, false);
    // Adapted frame containing all columns - mix of original vectors from fr
    // and newly created vectors serving as adaptors
    Frame adaptFrm = adaptFrms[0];
    // Contains only newly created vectors. The frame eases deletion of these vectors.
    Frame onlyAdaptFrm = adaptFrms[1];
    adaptFrm.add("Reconstruction.MSE", adaptFrm.anyVec().makeZero());
    new MRTask2() {

        @Override
        public void map(Chunk[] chks) {
            double[] tmp = new double[len];
            final Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
            for (int row = 0; row < chks[0]._len; row++) {
                for (int i = 0; i < _names.length; i++) //original data
                tmp[i] = chks[i].at0(row);
                //store the per-row reconstruction error (MSE) in the last column
                chks[len].set0(row, score_autoencoder(tmp, null, neurons));
            }
        }
    }.doAll(adaptFrm);
    // Return just the output columns
    int x = _names.length, y = adaptFrm.numCols();
    final Frame l2 = adaptFrm.extractFrame(x, y);
    onlyAdaptFrm.delete();
    return l2;
}
Also used : Frame(water.fvec.Frame) Chunk(water.fvec.Chunk)

Example 57 with Chunk

use of water.fvec.Chunk in project h2o-2 by h2oai.

the class DeepLearningModel method score.

/**
   * This is an overridden version of Model.score(). Make either a prediction or a reconstruction.
   * @param frame Test dataset
   * @return A frame containing the prediction or reconstruction
   */
@Override
public Frame score(Frame frame) {
    if (!get_params().autoencoder) {
        return super.score(frame);
    } else {
        // Reconstruction
        // Adapt the Frame layout - returns adapted frame and frame containing only
        // newly created vectors
        Frame[] adaptFrms = adapt(frame, false, false);
        // Adapted frame containing all columns - mix of original vectors from fr
        // and newly created vectors serving as adaptors
        Frame adaptFrm = adaptFrms[0];
        // Contains only newly created vectors. The frame eases deletion of these vectors.
        Frame onlyAdaptFrm = adaptFrms[1];
        final int len = model_info().data_info().fullN();
        String prefix = "reconstr_";
        assert (model_info().data_info()._responses == 0);
        String[] coefnames = model_info().data_info().coefNames();
        assert (len == coefnames.length);
        for (int c = 0; c < len; c++) adaptFrm.add(prefix + coefnames[c], adaptFrm.anyVec().makeZero());
        new MRTask2() {

            @Override
            public void map(Chunk[] chks) {
                double[] tmp = new double[_names.length];
                float[] preds = new float[len];
                final Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
                for (int row = 0; row < chks[0]._len; row++) {
                    float[] p = score_autoencoder(chks, row, tmp, preds, neurons);
                    for (int c = 0; c < preds.length; c++) chks[_names.length + c].set0(row, p[c]);
                }
            }
        }.doAll(adaptFrm);
        // Return the predicted columns
        int x = _names.length, y = adaptFrm.numCols();
        //this will call vec_impl() and we cannot call the delete() below just yet
        Frame f = adaptFrm.extractFrame(x, y);
        onlyAdaptFrm.delete();
        return f;
    }
}
Also used : Frame(water.fvec.Frame) Chunk(water.fvec.Chunk)

Example 58 with Chunk

use of water.fvec.Chunk in project h2o-2 by h2oai.

the class OOBScorer method map.

@Override
public void map(Chunk[] chks) {
    double[] data = new double[_ncols];
    float[] preds = new float[_nclass + 1];
    int ntrees = _trees.length;
    Chunk coobt = chk_oobt(chks);
    Chunk cys = chk_resp(chks);
    for (int tidx = 0; tidx < ntrees; tidx++) {
        // tree
        // OOB RNG for this tree
        Random rng = rngForTree(_trees[tidx], coobt.cidx());
        for (int row = 0; row < coobt._len; row++) {
            if (rng.nextFloat() >= _rate || Double.isNaN(cys.at0(row))) {
                // Mark oob row and store number of trees voting for this row (only for regression)
                coobt.set0(row, _nclass > 1 ? 1 : coobt.at0(row) + 1);
                // Make a prediction
                for (int i = 0; i < _ncols; i++) data[i] = chks[i].at0(row);
                Arrays.fill(preds, 0);
                score0(data, preds, _trees[tidx]);
                // Only for regression, keep consistency
                if (_nclass == 1)
                    preds[1] = preds[0];
                // Write tree predictions
                for (int c = 0; c < _nclass; c++) {
                    // over all class
                    if (preds[1 + c] != 0) {
                        Chunk ctree = chk_tree(chks, c);
                        ctree.set0(row, (float) (ctree.at0(row) + preds[1 + c]));
                    }
                }
            }
        }
    }
}
Also used : Random(java.util.Random) Chunk(water.fvec.Chunk)

Example 59 with Chunk

use of water.fvec.Chunk in project h2o-2 by h2oai.

the class TreeMeasuresCollector method map.

@Override
public void map(Chunk[] chks) {
    double[] data = new double[_ncols];
    float[] preds = new float[_nclasses + 1];
    Chunk cresp = chk_resp(chks);
    int nrows = cresp._len;
    // preallocate
    int[] oob = new int[2 + Math.round((1f - _rate) * nrows * 1.2f + 0.5f)];
    int[] soob = null;
    // Prepare output data
    _nrows = new long[_ntrees];
    _votes = _classification ? new long[_ntrees] : null;
    _sse = _classification ? null : new float[_ntrees];
    // seed for shuffling oob samples
    long seedForOob = ShuffleTask.seed(cresp.cidx());
    // Start iteration
    for (int tidx = 0; tidx < _ntrees; tidx++) {
        // tree
        // OOB RNG for this tree
        Random rng = rngForTree(_trees[tidx], cresp.cidx());
        // Collect oob rows and permutate them
        // reuse use the same array for sampling
        oob = ModelUtils.sampleOOBRows(nrows, _rate, rng, oob);
        // Get number of sample rows
        int oobcnt = oob[0];
        if (_var >= 0) {
            if (soob == null || soob.length < oobcnt)
                soob = new int[oobcnt];
            // Shuffle array and copy results into <code>soob</code>
            Utils.shuffleArray(oob, oobcnt, soob, seedForOob, 1);
        }
        for (int j = 1; j < 1 + oobcnt; j++) {
            int row = oob[j];
            // we cannot deal with this row anyhow
            if (cresp.isNA0(row))
                continue;
            // 1+i - one free is expected by prediction
            for (int i = 0; i < _ncols; i++) data[i] = chks[i].at0(row);
            // - permute variable
            if (_var >= 0)
                data[_var] = chks[_var].at0(soob[j - 1]);
            else
                assert soob == null;
            // - score data
            Arrays.fill(preds, 0);
            // - score only the tree
            score0(data, preds, _trees[tidx]);
            // - derive a prediction
            if (_classification) {
                int pred = ModelUtils.getPrediction(preds, data);
                int actu = (int) cresp.at80(row);
                // - collect only correct votes
                if (pred == actu)
                    _votes[tidx]++;
            } else {
                /* regression */
                // Important!
                float pred = preds[0];
                float actu = (float) cresp.at0(row);
                _sse[tidx] += (actu - pred) * (actu - pred);
            }
            // - collect rows which were used for voting
            _nrows[tidx]++;
        //if (_var<0) System.err.println("VARIMP OOB row: " + (cresp._start+row) + " : " + Arrays.toString(data) + " tree/actu: " + pred + "/" + actu);
        }
    }
    // Clean-up
    _trees = null;
}
Also used : Random(java.util.Random) Chunk(water.fvec.Chunk)

Example 60 with Chunk

use of water.fvec.Chunk in project h2o-2 by h2oai.

the class VecChunkDemo method frame_001.

@Test
public void frame_001() {
    String fileName = "../smalldata/iris/iris.csv";
    File file = new File(fileName);
    Key fkey = NFSFileVec.make(file);
    Key okey = Key.make("iris.hex");
    Frame fr;
    fr = ParseDataset2.parse(okey, new Key[] { fkey });
    Value v = DKV.get(okey);
    Frame f = v.get();
    Log.info("frame              : " + f);
    int len = f.numCols();
    for (int i = 0; i < len; i++) {
        Log.info("vector                        :" + i);
        // looping through the vectors of a frame and printing specifics
        Vec vv = f.vec(i);
        Log.info("vector     summary                :" + vv);
        Log.info("vector     length                 :" + vv.length());
        Log.info("vector     group                  :" + vv.group());
        Log.info("vector     na count               :" + vv.naCnt());
        // null if not enum
        Log.info("vector     domain null if not enum:" + vv.domain());
        int cardinality = vv.cardinality();
        Log.info("vector     cardianlity            :" + vv.cardinality());
        if (cardinality != -1) {
            for (int j = 0; j < cardinality; j++) Log.info("labels                    :" + vv.domain(j));
        }
        //gives the element at that row; count starts from 0.
        Log.info("vector value at row 50            :" + vv.at(51));
        int chunk_count = vv.nChunks();
        Log.info("chunk     count                   :" + chunk_count);
        Chunk c = vv.chunkForRow(100);
        Log.info("chunk     for row 100             :" + c);
    }
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) NFSFileVec(water.fvec.NFSFileVec) Value(water.Value) Chunk(water.fvec.Chunk) File(java.io.File) Key(water.Key) Test(org.junit.Test)

Aggregations

Chunk (water.fvec.Chunk)74 Frame (water.fvec.Frame)50 NewChunk (water.fvec.NewChunk)36 MRTask (water.MRTask)33 Vec (water.fvec.Vec)30 ValFrame (water.rapids.vals.ValFrame)26 C0DChunk (water.fvec.C0DChunk)7 BufferedString (water.parser.BufferedString)7 Random (java.util.Random)6 Test (org.junit.Test)5 MRTask2 (water.MRTask2)4 Val (water.rapids.Val)4 Key (water.Key)3 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)3 AstRoot (water.rapids.ast.AstRoot)3 AstNumList (water.rapids.ast.params.AstNumList)3 File (java.io.File)2 IOException (java.io.IOException)2 ValNum (water.rapids.vals.ValNum)2 PrettyPrint (water.util.PrettyPrint)2