Search in sources :

Example 51 with Chunk

use of water.fvec.Chunk in project h2o-3 by h2oai.

the class SVDModel method predictScoreImpl.

protected Frame predictScoreImpl(Frame orig, Frame adaptedFr, String destination_key, final Job j, boolean computeMetrics) {
    Frame adaptFrm = new Frame(adaptedFr);
    for (int i = 0; i < _parms._nv; i++) adaptFrm.add("PC" + String.valueOf(i + 1), adaptFrm.anyVec().makeZero());
    new MRTask() {

        public void map(Chunk[] chks) {
            if (isCancelled() || j != null && j.stop_requested())
            double[] tmp = new double[_output._names.length];
            double[] preds = new double[_parms._nv];
            for (int row = 0; row < chks[0]._len; row++) {
                double[] p = score0(chks, row, tmp, preds);
                for (int c = 0; c < preds.length; c++) chks[_output._names.length + c].set(row, p[c]);
            if (j != null)
    // Return the projection into right singular vector (V) space
    int x = _output._names.length, y = adaptFrm.numCols();
    // this will call vec_impl() and we cannot call the delete() below just yet
    Frame f = adaptFrm.extractFrame(x, y);
    f = new Frame(Key.<Frame>make(destination_key), f.names(), f.vecs());
    makeMetricBuilder(null).makeModelMetrics(this, orig, null, null);
    return f;
Also used : Frame(water.fvec.Frame) Chunk(water.fvec.Chunk)

Example 52 with Chunk

use of water.fvec.Chunk in project h2o-3 by h2oai.

the class ReconstructTreeState method map.

public void map(Chunk[] chks) {
    double[] data = new double[_ncols];
    double[] preds = new double[_nclass + 1];
    int ntrees = _trees.length;
    Chunk weight = _st.hasWeightCol() ? _st.chk_weight(chks) : new C0DChunk(1, chks[0]._len);
    Chunk oobt = _st.chk_oobt(chks);
    Chunk resp = _st.chk_resp(chks);
    for (int tidx = 0; tidx < ntrees; tidx++) {
        // tree
        // OOB RNG for this tree
        Random rng = rngForTree(_trees[tidx], oobt.cidx());
        for (int row = 0; row < oobt._len; row++) {
            double w = weight.atd(row);
            if (w == 0)
            double y = resp.atd(row);
            if (Double.isNaN(y))
            boolean rowIsOOB = _OOBEnabled && rng.nextFloat() >= _rate;
            if (!_OOBEnabled || rowIsOOB) {
                // Make a prediction
                for (int i = 0; i < _ncols; i++) data[i] = chks[i].atd(row);
                Arrays.fill(preds, 0);
                score0(data, preds, _trees[tidx]);
                // Only for regression, keep consistency
                if (_nclass == 1)
                    preds[1] = preds[0];
                // Write tree predictions
                for (int c = 0; c < _nclass; c++) {
                    // over all class
                    double prediction = preds[1 + c];
                    if (preds[1 + c] != 0) {
                        Chunk ctree = _st.chk_tree(chks, c);
                        double wcount = oobt.atd(row);
                        if (_OOBEnabled && _nclass >= 2)
                            //store avg prediction
                            ctree.set(row, (float) (ctree.atd(row) * wcount + prediction) / (wcount + w));
                            ctree.set(row, (float) (ctree.atd(row) + prediction));
                // Mark oob row and store number of trees voting for this row
                if (rowIsOOB)
                    oobt.set(row, oobt.atd(row) + w);
    _st = null;
Also used : C0DChunk(water.fvec.C0DChunk) Random(java.util.Random) Chunk(water.fvec.Chunk) C0DChunk(water.fvec.C0DChunk)

Example 53 with Chunk

use of water.fvec.Chunk in project h2o-3 by h2oai.

the class VecUtils method categoricalToInt.

   * Create a new {@link Vec} of numeric values from a categorical {@link Vec}.
   * If the first value in the domain of the src Vec is a stringified ints,
   * then it will use those ints. Otherwise, it will use the raw enumeration level mapping.
   * If the domain is stringified ints, then all of the domain must be able to be parsed as
   * an int. If it cannot be parsed as such, a NumberFormatException will be caught and
   * rethrown as an H2OIllegalArgumentException that declares the illegal domain value.
   * Otherwise, the this pointer is copied to a new Vec whose domain is null.
   * The magic of this method should be eliminated. It should just use enumeration level
   * maps. If the user wants domains to be used, call categoricalDomainsToNumeric().
   * PUBDEV-2209
   * @param src a categorical {@link Vec}
   * @return a numeric {@link Vec}
public static Vec categoricalToInt(final Vec src) {
    if (src.isInt() && (src.domain() == null || src.domain().length == 0))
        return copyOver(src, Vec.T_NUM, null);
    if (!src.isCategorical())
        throw new IllegalArgumentException("categoricalToInt conversion only works on categorical columns.");
    // check if the 1st lvl of the domain can be parsed as int
    boolean useDomain = false;
    Vec newVec = copyOver(src, Vec.T_NUM, null);
    try {
        useDomain = true;
    } catch (NumberFormatException e) {
    // makeCopy and return...
    if (useDomain) {
        new MRTask() {

            public void map(Chunk c) {
                for (int i = 0; i < c._len; ++i) if (!c.isNA(i))
                    c.set(i, Integer.parseInt(src.domain()[(int) c.at8(i)]));
    return newVec;
Also used : Vec(water.fvec.Vec) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) C0DChunk(water.fvec.C0DChunk) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException)

Example 54 with Chunk

use of water.fvec.Chunk in project h2o-3 by h2oai.

the class VecUtils method UUIDToStringVec.

   * Create a new {@link Vec} of string values from a UUID {@link Vec}.
   * String {@link Vec} is the standard hexadecimal representations of a UUID.
   * @param src a UUID {@link Vec}
   * @return a string {@link Vec}
public static Vec UUIDToStringVec(Vec src) {
    if (!src.isUUID())
        throw new H2OIllegalArgumentException("UUIDToStringVec() conversion only works on UUID columns");
    Vec res = new MRTask() {

        public void map(Chunk chk, NewChunk newChk) {
            if (chk instanceof C0DChunk) {
                // all NAs
                for (int i = 0; i < chk._len; i++) newChk.addNA();
            } else {
                for (int i = 0; i < chk._len; i++) {
                    if (!chk.isNA(i))
                        newChk.addStr(PrettyPrint.UUID(chk.at16l(i), chk.at16h(i)));
    }.doAll(Vec.T_STR, src).outputFrame().anyVec();
    assert res != null;
    return res;
Also used : C0DChunk(water.fvec.C0DChunk) Vec(water.fvec.Vec) H2OIllegalArgumentException(water.exceptions.H2OIllegalArgumentException) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) C0DChunk(water.fvec.C0DChunk) NewChunk(water.fvec.NewChunk)

Example 55 with Chunk

use of water.fvec.Chunk in project h2o-2 by h2oai.

the class DeepLearningModel method scoreDeepFeatures.

   * Score auto-encoded reconstruction (on-the-fly, without allocating the reconstruction as done in Frame score(Frame fr))
   * @param frame Original data (can contain response, will be ignored)
   * @return Frame containing one Vec with reconstruction error (MSE) of each reconstructed row, caller is responsible for deletion
public Frame scoreDeepFeatures(Frame frame, final int layer) {
    assert (layer >= 0 && layer < model_info().get_params().hidden.length);
    final int len = nfeatures();
    Vec resp = null;
    if (isSupervised()) {
        int ridx = frame.find(responseName());
        if (ridx != -1) {
            // drop the response for scoring!
            frame = new Frame(frame);
            resp = frame.vecs()[ridx];
    // Adapt the Frame layout - returns adapted frame and frame containing only
    // newly created vectors
    Frame[] adaptFrms = adapt(frame, false, false);
    // Adapted frame containing all columns - mix of original vectors from fr
    // and newly created vectors serving as adaptors
    Frame adaptFrm = adaptFrms[0];
    // Contains only newly created vectors. The frame eases deletion of these vectors.
    Frame onlyAdaptFrm = adaptFrms[1];
    //create new features, will be dense
    final int features = model_info().get_params().hidden[layer];
    Vec[] vecs = adaptFrm.anyVec().makeZeros(features);
    for (int j = 0; j < features; ++j) {
        adaptFrm.add("DF.C" + (j + 1), vecs[j]);
    new MRTask2() {

        public void map(Chunk[] chks) {
            double[] tmp = new double[len];
            float[] df = new float[features];
            final Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
            for (int row = 0; row < chks[0]._len; row++) {
                for (int i = 0; i < len; i++) tmp[i] = chks[i].at0(row);
                ((Neurons.Input) neurons[0]).setInput(-1, tmp);
                DeepLearningTask.step(-1, neurons, model_info, false, null);
                //extract the layer-th hidden feature
                float[] out = neurons[layer + 1]._a.raw();
                for (int c = 0; c < df.length; c++) chks[_names.length + c].set0(row, out[c]);
    // Return just the output columns
    int x = _names.length, y = adaptFrm.numCols();
    Frame ret = adaptFrm.extractFrame(x, y);
    if (resp != null)
        ret.prepend(responseName(), resp);
    return ret;
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) Chunk(water.fvec.Chunk)


Chunk (water.fvec.Chunk)74 Frame (water.fvec.Frame)50 NewChunk (water.fvec.NewChunk)36 MRTask (water.MRTask)33 Vec (water.fvec.Vec)30 ValFrame (water.rapids.vals.ValFrame)26 C0DChunk (water.fvec.C0DChunk)7 BufferedString (water.parser.BufferedString)7 Random (java.util.Random)6 Test (org.junit.Test)5 MRTask2 (water.MRTask2)4 Val (water.rapids.Val)4 Key (water.Key)3 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)3 AstRoot (water.rapids.ast.AstRoot)3 AstNumList (water.rapids.ast.params.AstNumList)3 File ( IOException ( ValNum (water.rapids.vals.ValNum)2 PrettyPrint (water.util.PrettyPrint)2