Search in sources :

Example 41 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class AstCumu method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    Frame f = stk.track(asts[1].exec(env)).getFrame();
    AstRoot axisAR = asts[2];
    for (Vec v : f.vecs()) {
        if (v.isCategorical() || v.isString() || v.isUUID())
            throw new IllegalArgumentException("Cumulative functions not applicable to enum, string, or UUID values");
    }
    double axis = axisAR.exec(env).getNum();
    if (axis != 1.0 && axis != 0.0)
        throw new IllegalArgumentException("Axis must be 0 or 1");
    if (f.numCols() == 1) {
        if (axis == 0.0) {
            AstCumu.CumuTask t = new AstCumu.CumuTask(f.anyVec().nChunks(), init());
            t.doAll(new byte[] { Vec.T_NUM }, f.anyVec());
            final double[] chkCumu = t._chkCumu;
            Vec cumuVec = t.outputFrame().anyVec();
            new MRTask() {

                @Override
                public void map(Chunk c) {
                    if (c.cidx() != 0) {
                        double d = chkCumu[c.cidx() - 1];
                        for (int i = 0; i < c._len; ++i) c.set(i, op(c.atd(i), d));
                    }
                }
            }.doAll(cumuVec);
            Key<Frame> k = Key.make();
            return new ValFrame(new Frame(k, null, new Vec[] { cumuVec }));
        } else {
            return new ValFrame(new Frame(f));
        }
    } else {
        if (axis == 0.0) {
            // down the column implementation
            AstCumu.CumuTaskWholeFrame t = new AstCumu.CumuTaskWholeFrame(f.anyVec().nChunks(), init(), f.numCols());
            Frame fr2 = t.doAll(f.numCols(), Vec.T_NUM, f).outputFrame(null, f.names(), null);
            final double[][] chkCumu = t._chkCumu;
            new MRTask() {

                @Override
                public void map(Chunk[] cs) {
                    if (cs[0].cidx() != 0) {
                        for (int i = 0; i < cs.length; i++) {
                            double d = chkCumu[i][cs[i].cidx() - 1];
                            for (int j = 0; j < cs[i]._len; ++j) cs[i].set(j, op(cs[i].atd(j), d));
                        }
                    }
                }
            }.doAll(fr2);
            return new ValFrame(new Frame(fr2));
        } else {
            AstCumu.CumuTaskAxis1 t = new AstCumu.CumuTaskAxis1(init());
            Frame fr2 = t.doAll(f.numCols(), Vec.T_NUM, f).outputFrame(null, f.names(), null);
            return new ValFrame(new Frame(fr2));
        }
    }
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) ValFrame(water.rapids.vals.ValFrame) Vec(water.fvec.Vec) MRTask(water.MRTask) AstRoot(water.rapids.ast.AstRoot)

Example 42 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class RefCntTest method testBasic.

// Test basic Copy-On-Write optimization is working, by witnessing that the
// correct (small) number of real vec copies are made, despite many virtual
// copies being made.
@Test
public void testBasic() {
    Session session = new Session();
    Frame crimes = parse_test_file(Key.make("chicagoCrimes10k.hex"), "smalldata/chicago/chicagoCrimes10k.csv.zip");
    Vec.VectorGroup vg = crimes.anyVec().group();
    // Expect to compute and update crimes.hex "Date" column in-place, but the
    // result is called py_1.  Exactly 1 new vector is made (result of +)
    // Pull latest value from DKV (no caching allowed)
    int key1 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
    Assert.assertTrue(crimes.vec("Date").isTime());
    Rapids.exec("(tmp= py_1 (:= chicagoCrimes10k.hex (+ (cols_py chicagoCrimes10k.hex \"Date\") 1) 2 []))", session);
    // User named frame is unchanged
    Assert.assertTrue(crimes.vec("Date").isTime());
    Frame py_1 = DKV.getGet(Key.make("py_1"));
    // tmp= py_1 holds the changed column
    Assert.assertTrue(py_1.vec("Date").isNumeric());
    // msec since epoch is generally >1.3b msec
    Assert.assertTrue(py_1.vec("Date").mean() > 1300000000L);
    // Pull latest value from DKV (no caching allowed)
    int key2 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
    // Exactly 1 new vector is made: as.Date
    Assert.assertEquals(key1 + 1, key2);
    // Remove original hex key - even though most columns are shared.  Note
    // that this remove is only valid when done in the session context -
    // otherwise the sharing can't be tracked.  Since most columns are shared,
    // the DKV key should be removed, but NOT most data.
    Rapids.exec("(rm chicagoCrimes10k.hex)", session);
    crimes = null;
    // Verify we can compute rollups on all cols; will crash if some cols are deleted
    for (Vec vec : py_1.vecs()) vec.mean();
    // Pull latest value from DKV (no caching allowed)
    int key_tmp = DKV.<Vec.VectorGroup>getGet(vg._key).len();
    // No New Vectors, and VecGroup never rolls backwards
    Assert.assertEquals(key2, key_tmp);
    // Both append, and nuke a dead temp, in one expression
    Rapids.exec("(, (tmp= py_2 (append py_1 (day (cols_py py_1 \"Date\")) \"Day\")) (rm py_1))", session);
    py_1 = null;
    Frame py_2 = DKV.getGet(Key.make("py_2"));
    // Verify we can compute rollups on all cols; will crash if some cols are deleted
    for (Vec vec : py_2.vecs()) vec.mean();
    // Pull latest value from DKV (no caching allowed)
    int key3 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
    // Exactly 1 new vector
    Assert.assertEquals(key2 + 1, key3);
    // Start a series of computations that append columns
    Rapids.exec("(tmp= py_3 (append py_2 (month (cols_py py_2 \"Date\")) \"Month\"))", session);
    Frame py_3 = DKV.getGet(Key.make("py_3"));
    // Verify we can compute rollups on all cols; will crash if some cols are deleted
    for (Vec vec : py_3.vecs()) vec.mean();
    // Pull latest value from DKV (no caching allowed)
    int key4 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
    // Exactly 1 new vector
    Assert.assertEquals(key3 + 1, key4);
    // This one does 2 computations to append 1 column, also does an over-write
    // instead of append.
    Rapids.exec("(, (rm py_2) (tmp= py_4 (:= py_3 (+ (year (cols_py py_3 \"Date\")) 1900) 17 [])))", session);
    Frame py_4 = DKV.getGet(Key.make("py_4"));
    py_2 = null;
    // Verify we can compute rollups on all cols; will crash if some cols are deleted
    for (Vec vec : py_4.vecs()) vec.mean();
    // Pull latest value from DKV (no caching allowed)
    int key5 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
    // Exactly 2 new vector, for two ops: "year" and "+1900".
    Assert.assertEquals(key4 + 2, key5);
    Rapids.exec("(, (rm py_3) (tmp= py_5 (append py_4 (week (cols_py py_4 \"Date\")) \"WeekNum\")))", session);
    Frame py_5 = DKV.getGet(Key.make("py_5"));
    py_3 = null;
    // Verify we can compute rollups on all cols; will crash if some cols are deleted
    for (Vec vec : py_5.vecs()) vec.mean();
    // Pull latest value from DKV (no caching allowed)
    int key6 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
    // Exactly 1 new vector
    Assert.assertEquals(key5 + 1, key6);
    Rapids.exec("(,  (rm py_4) (tmp= py_6 (append py_5 (dayOfWeek (cols_py py_5 \"Date\")) \"WeekDay\")))", session);
    Frame py_6 = DKV.getGet(Key.make("py_6"));
    py_4 = null;
    // Verify we can compute rollups on all cols; will crash if some cols are deleted
    for (Vec vec : py_6.vecs()) vec.mean();
    // Pull latest value from DKV (no caching allowed)
    int key7 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
    // Exactly 1 new vector
    Assert.assertEquals(key6 + 1, key7);
    Rapids.exec("(, (rm py_5) (tmp= py_7 (append py_6 (hour (cols_py py_6 \"Date\")) \"HourOfDay\")))", session);
    Frame py_7 = DKV.getGet(Key.make("py_7"));
    py_5 = null;
    // Verify we can compute rollups on all cols; will crash if some cols are deleted
    for (Vec vec : py_7.vecs()) vec.mean();
    // Pull latest value from DKV (no caching allowed)
    int key8 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
    // Exactly 1 new vector
    Assert.assertEquals(key7 + 1, key8);
    // A more involved expression; lots of internal temps
    Rapids.exec("(, (rm py_6) (tmp= py_8 (append py_7 (| (== (cols_py py_7 \"WeekDay\") \"Sun\") (== (cols_py py_7 \"WeekDay\") \"Sat\")) \"Weekend\")))", session);
    Frame py_8 = DKV.getGet(Key.make("py_8"));
    py_6 = null;
    // Verify we can compute rollups on all cols; will crash if some cols are deleted
    for (Vec vec : py_8.vecs()) vec.mean();
    // Pull latest value from DKV (no caching allowed)
    int key9 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
    // Exactly 3 new vectors, one for each of {==, ==, |}
    Assert.assertEquals(key8 + 3, key9);
    // A more involved expression; lots of internal temps
    Rapids.exec("(, (rm py_7) (tmp= py_9 (append py_8 (cut (cols_py py_8 \"Month\") [0 2 5 7 10 12] [\"Winter\" \"Spring\" \"Summer\" \"Autumn\" \"Winter\"] FALSE TRUE 3) \"Season\")))", session);
    Frame py_9 = DKV.getGet(Key.make("py_9"));
    py_7 = null;
    // Verify we can compute rollups on all cols; will crash if some cols are deleted
    for (Vec vec : py_9.vecs()) vec.mean();
    // Pull latest value from DKV (no caching allowed)
    int key10 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
    // Exactly 1 new vector, despite lots of internal vecs
    Assert.assertEquals(key9 + 1, key10);
    // Drop a column
    Rapids.exec("(, (rm py_8) (tmp= py_10 (cols py_9 -3)))", session);
    Frame py_10 = DKV.getGet(Key.make("py_10"));
    py_8 = null;
    // Verify we can compute rollups on all cols; will crash if some cols are deleted
    for (Vec vec : py_10.vecs()) vec.mean();
    // Pull latest value from DKV (no caching allowed)
    key_tmp = DKV.<Vec.VectorGroup>getGet(vg._key).len();
    // No new vectors
    Assert.assertEquals(key10, key_tmp);
    // End the session; freeing all resources
    session.end(null);
// NO FINALLY FRAME DELETES HERE PLEASE...
// Session ending should clean up; if it does not we need to detect the leak
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) Test(org.junit.Test)

Example 43 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class RapidsTest method checkSaneFrame_impl.

static boolean checkSaneFrame_impl() {
    for (Key k : H2O.localKeySet()) {
        Value val = Value.STORE_get(k);
        if (val != null && val.isFrame()) {
            Frame fr = val.get();
            Vec[] vecs = fr.vecs();
            for (int i = 0; i < vecs.length; i++) {
                Vec v = vecs[i];
                if (DKV.get(v._key) == null) {
                    System.err.println("Frame " + fr._key + " in the DKV, is missing Vec " + v._key + ", name=" + fr._names[i]);
                    return false;
                }
            }
        }
    }
    return true;
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) NFSFileVec(water.fvec.NFSFileVec) Vec(water.fvec.Vec)

Example 44 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class AstRectangleConditionalAssignTest method testConditionalAssignCategorical.

@Test
public void testConditionalAssignCategorical() {
    Frame fr = makeTestFrame();
    Vec expected = cvec(new String[] { "a", "b" }, "b", "b", "b", "b", "b");
    try {
        Val val = Rapids.exec("(tmp= py_1 (:= data \"b\" 4 (== (cols_py data 4) \"a\")))");
        if (val instanceof ValFrame) {
            Frame fr2 = val.getFrame();
            assertCatVecEquals(expected, fr2.vec(4));
            fr2.remove();
        }
    } finally {
        fr.remove();
        expected.remove();
    }
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec) Test(org.junit.Test)

Example 45 with Vec

use of water.fvec.Vec in project h2o-3 by h2oai.

the class AstRectangleConditionalAssignTest method makeTestFrame.

private Frame makeTestFrame() {
    Frame fr = null;
    Vec v = ivec(1, 2, 3, 4, 5);
    try {
        fr = new MRTask() {

            @Override
            public void map(Chunk[] cs, NewChunk[] ncs) {
                for (int i = 0; i < cs[0]._len; i++) {
                    int r = (int) cs[0].atd(i);
                    ncs[0].addNum(r);
                    ncs[1].addNum(11.2 * r);
                    ncs[2].addUUID(r, r * 10);
                    ncs[3].addStr("row" + r);
                    ncs[4].addCategorical(r % 2 == 0 ? 0 : 1);
                }
            }
        }.doAll(new byte[] { Vec.T_NUM, Vec.T_NUM, Vec.T_UUID, Vec.T_STR, Vec.T_CAT }, v).outputFrame(Key.make("data"), new String[] { "v1", "v2", "v3", "v4", "v5" }, new String[][] { null, null, null, null, new String[] { "a", "b" } });
    } finally {
        v.remove();
    }
    assert fr != null;
    return fr;
}
Also used : ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec) MRTask(water.MRTask) Chunk(water.fvec.Chunk) NewChunk(water.fvec.NewChunk) NewChunk(water.fvec.NewChunk)

Aggregations

Vec (water.fvec.Vec)280 Frame (water.fvec.Frame)213 Test (org.junit.Test)82 NFSFileVec (water.fvec.NFSFileVec)48 ValFrame (water.rapids.vals.ValFrame)47 Chunk (water.fvec.Chunk)30 Random (java.util.Random)25 NewChunk (water.fvec.NewChunk)23 DeepLearningParameters (hex.deeplearning.DeepLearningModel.DeepLearningParameters)22 Key (water.Key)21 MRTask (water.MRTask)17 Val (water.rapids.Val)14 File (java.io.File)11 ArrayList (java.util.ArrayList)11 Futures (water.Futures)11 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)11 ValNum (water.rapids.vals.ValNum)11 ShuffleSplitFrame (hex.splitframe.ShuffleSplitFrame)10 BufferedString (water.parser.BufferedString)10 AppendableVec (water.fvec.AppendableVec)9