use of water.fvec.Vec in project h2o-3 by h2oai.
the class AstCumu method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame f = stk.track(asts[1].exec(env)).getFrame();
AstRoot axisAR = asts[2];
for (Vec v : f.vecs()) {
if (v.isCategorical() || v.isString() || v.isUUID())
throw new IllegalArgumentException("Cumulative functions not applicable to enum, string, or UUID values");
}
double axis = axisAR.exec(env).getNum();
if (axis != 1.0 && axis != 0.0)
throw new IllegalArgumentException("Axis must be 0 or 1");
if (f.numCols() == 1) {
if (axis == 0.0) {
AstCumu.CumuTask t = new AstCumu.CumuTask(f.anyVec().nChunks(), init());
t.doAll(new byte[] { Vec.T_NUM }, f.anyVec());
final double[] chkCumu = t._chkCumu;
Vec cumuVec = t.outputFrame().anyVec();
new MRTask() {
@Override
public void map(Chunk c) {
if (c.cidx() != 0) {
double d = chkCumu[c.cidx() - 1];
for (int i = 0; i < c._len; ++i) c.set(i, op(c.atd(i), d));
}
}
}.doAll(cumuVec);
Key<Frame> k = Key.make();
return new ValFrame(new Frame(k, null, new Vec[] { cumuVec }));
} else {
return new ValFrame(new Frame(f));
}
} else {
if (axis == 0.0) {
// down the column implementation
AstCumu.CumuTaskWholeFrame t = new AstCumu.CumuTaskWholeFrame(f.anyVec().nChunks(), init(), f.numCols());
Frame fr2 = t.doAll(f.numCols(), Vec.T_NUM, f).outputFrame(null, f.names(), null);
final double[][] chkCumu = t._chkCumu;
new MRTask() {
@Override
public void map(Chunk[] cs) {
if (cs[0].cidx() != 0) {
for (int i = 0; i < cs.length; i++) {
double d = chkCumu[i][cs[i].cidx() - 1];
for (int j = 0; j < cs[i]._len; ++j) cs[i].set(j, op(cs[i].atd(j), d));
}
}
}
}.doAll(fr2);
return new ValFrame(new Frame(fr2));
} else {
AstCumu.CumuTaskAxis1 t = new AstCumu.CumuTaskAxis1(init());
Frame fr2 = t.doAll(f.numCols(), Vec.T_NUM, f).outputFrame(null, f.names(), null);
return new ValFrame(new Frame(fr2));
}
}
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class RefCntTest method testBasic.
// Test basic Copy-On-Write optimization is working, by witnessing that the
// correct (small) number of real vec copies are made, despite many virtual
// copies being made.
@Test
public void testBasic() {
Session session = new Session();
Frame crimes = parse_test_file(Key.make("chicagoCrimes10k.hex"), "smalldata/chicago/chicagoCrimes10k.csv.zip");
Vec.VectorGroup vg = crimes.anyVec().group();
// Expect to compute and update crimes.hex "Date" column in-place, but the
// result is called py_1. Exactly 1 new vector is made (result of +)
// Pull latest value from DKV (no caching allowed)
int key1 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
Assert.assertTrue(crimes.vec("Date").isTime());
Rapids.exec("(tmp= py_1 (:= chicagoCrimes10k.hex (+ (cols_py chicagoCrimes10k.hex \"Date\") 1) 2 []))", session);
// User named frame is unchanged
Assert.assertTrue(crimes.vec("Date").isTime());
Frame py_1 = DKV.getGet(Key.make("py_1"));
// tmp= py_1 holds the changed column
Assert.assertTrue(py_1.vec("Date").isNumeric());
// msec since epoch is generally >1.3b msec
Assert.assertTrue(py_1.vec("Date").mean() > 1300000000L);
// Pull latest value from DKV (no caching allowed)
int key2 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
// Exactly 1 new vector is made: as.Date
Assert.assertEquals(key1 + 1, key2);
// Remove original hex key - even though most columns are shared. Note
// that this remove is only valid when done in the session context -
// otherwise the sharing can't be tracked. Since most columns are shared,
// the DKV key should be removed, but NOT most data.
Rapids.exec("(rm chicagoCrimes10k.hex)", session);
crimes = null;
// Verify we can compute rollups on all cols; will crash if some cols are deleted
for (Vec vec : py_1.vecs()) vec.mean();
// Pull latest value from DKV (no caching allowed)
int key_tmp = DKV.<Vec.VectorGroup>getGet(vg._key).len();
// No New Vectors, and VecGroup never rolls backwards
Assert.assertEquals(key2, key_tmp);
// Both append, and nuke a dead temp, in one expression
Rapids.exec("(, (tmp= py_2 (append py_1 (day (cols_py py_1 \"Date\")) \"Day\")) (rm py_1))", session);
py_1 = null;
Frame py_2 = DKV.getGet(Key.make("py_2"));
// Verify we can compute rollups on all cols; will crash if some cols are deleted
for (Vec vec : py_2.vecs()) vec.mean();
// Pull latest value from DKV (no caching allowed)
int key3 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
// Exactly 1 new vector
Assert.assertEquals(key2 + 1, key3);
// Start a series of computations that append columns
Rapids.exec("(tmp= py_3 (append py_2 (month (cols_py py_2 \"Date\")) \"Month\"))", session);
Frame py_3 = DKV.getGet(Key.make("py_3"));
// Verify we can compute rollups on all cols; will crash if some cols are deleted
for (Vec vec : py_3.vecs()) vec.mean();
// Pull latest value from DKV (no caching allowed)
int key4 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
// Exactly 1 new vector
Assert.assertEquals(key3 + 1, key4);
// This one does 2 computations to append 1 column, also does an over-write
// instead of append.
Rapids.exec("(, (rm py_2) (tmp= py_4 (:= py_3 (+ (year (cols_py py_3 \"Date\")) 1900) 17 [])))", session);
Frame py_4 = DKV.getGet(Key.make("py_4"));
py_2 = null;
// Verify we can compute rollups on all cols; will crash if some cols are deleted
for (Vec vec : py_4.vecs()) vec.mean();
// Pull latest value from DKV (no caching allowed)
int key5 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
// Exactly 2 new vector, for two ops: "year" and "+1900".
Assert.assertEquals(key4 + 2, key5);
Rapids.exec("(, (rm py_3) (tmp= py_5 (append py_4 (week (cols_py py_4 \"Date\")) \"WeekNum\")))", session);
Frame py_5 = DKV.getGet(Key.make("py_5"));
py_3 = null;
// Verify we can compute rollups on all cols; will crash if some cols are deleted
for (Vec vec : py_5.vecs()) vec.mean();
// Pull latest value from DKV (no caching allowed)
int key6 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
// Exactly 1 new vector
Assert.assertEquals(key5 + 1, key6);
Rapids.exec("(, (rm py_4) (tmp= py_6 (append py_5 (dayOfWeek (cols_py py_5 \"Date\")) \"WeekDay\")))", session);
Frame py_6 = DKV.getGet(Key.make("py_6"));
py_4 = null;
// Verify we can compute rollups on all cols; will crash if some cols are deleted
for (Vec vec : py_6.vecs()) vec.mean();
// Pull latest value from DKV (no caching allowed)
int key7 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
// Exactly 1 new vector
Assert.assertEquals(key6 + 1, key7);
Rapids.exec("(, (rm py_5) (tmp= py_7 (append py_6 (hour (cols_py py_6 \"Date\")) \"HourOfDay\")))", session);
Frame py_7 = DKV.getGet(Key.make("py_7"));
py_5 = null;
// Verify we can compute rollups on all cols; will crash if some cols are deleted
for (Vec vec : py_7.vecs()) vec.mean();
// Pull latest value from DKV (no caching allowed)
int key8 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
// Exactly 1 new vector
Assert.assertEquals(key7 + 1, key8);
// A more involved expression; lots of internal temps
Rapids.exec("(, (rm py_6) (tmp= py_8 (append py_7 (| (== (cols_py py_7 \"WeekDay\") \"Sun\") (== (cols_py py_7 \"WeekDay\") \"Sat\")) \"Weekend\")))", session);
Frame py_8 = DKV.getGet(Key.make("py_8"));
py_6 = null;
// Verify we can compute rollups on all cols; will crash if some cols are deleted
for (Vec vec : py_8.vecs()) vec.mean();
// Pull latest value from DKV (no caching allowed)
int key9 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
// Exactly 3 new vectors, one for each of {==, ==, |}
Assert.assertEquals(key8 + 3, key9);
// A more involved expression; lots of internal temps
Rapids.exec("(, (rm py_7) (tmp= py_9 (append py_8 (cut (cols_py py_8 \"Month\") [0 2 5 7 10 12] [\"Winter\" \"Spring\" \"Summer\" \"Autumn\" \"Winter\"] FALSE TRUE 3) \"Season\")))", session);
Frame py_9 = DKV.getGet(Key.make("py_9"));
py_7 = null;
// Verify we can compute rollups on all cols; will crash if some cols are deleted
for (Vec vec : py_9.vecs()) vec.mean();
// Pull latest value from DKV (no caching allowed)
int key10 = DKV.<Vec.VectorGroup>getGet(vg._key).len();
// Exactly 1 new vector, despite lots of internal vecs
Assert.assertEquals(key9 + 1, key10);
// Drop a column
Rapids.exec("(, (rm py_8) (tmp= py_10 (cols py_9 -3)))", session);
Frame py_10 = DKV.getGet(Key.make("py_10"));
py_8 = null;
// Verify we can compute rollups on all cols; will crash if some cols are deleted
for (Vec vec : py_10.vecs()) vec.mean();
// Pull latest value from DKV (no caching allowed)
key_tmp = DKV.<Vec.VectorGroup>getGet(vg._key).len();
// No new vectors
Assert.assertEquals(key10, key_tmp);
// End the session; freeing all resources
session.end(null);
// NO FINALLY FRAME DELETES HERE PLEASE...
// Session ending should clean up; if it does not we need to detect the leak
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class RapidsTest method checkSaneFrame_impl.
static boolean checkSaneFrame_impl() {
for (Key k : H2O.localKeySet()) {
Value val = Value.STORE_get(k);
if (val != null && val.isFrame()) {
Frame fr = val.get();
Vec[] vecs = fr.vecs();
for (int i = 0; i < vecs.length; i++) {
Vec v = vecs[i];
if (DKV.get(v._key) == null) {
System.err.println("Frame " + fr._key + " in the DKV, is missing Vec " + v._key + ", name=" + fr._names[i]);
return false;
}
}
}
}
return true;
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class AstRectangleConditionalAssignTest method testConditionalAssignCategorical.
@Test
public void testConditionalAssignCategorical() {
Frame fr = makeTestFrame();
Vec expected = cvec(new String[] { "a", "b" }, "b", "b", "b", "b", "b");
try {
Val val = Rapids.exec("(tmp= py_1 (:= data \"b\" 4 (== (cols_py data 4) \"a\")))");
if (val instanceof ValFrame) {
Frame fr2 = val.getFrame();
assertCatVecEquals(expected, fr2.vec(4));
fr2.remove();
}
} finally {
fr.remove();
expected.remove();
}
}
use of water.fvec.Vec in project h2o-3 by h2oai.
the class AstRectangleConditionalAssignTest method makeTestFrame.
private Frame makeTestFrame() {
Frame fr = null;
Vec v = ivec(1, 2, 3, 4, 5);
try {
fr = new MRTask() {
@Override
public void map(Chunk[] cs, NewChunk[] ncs) {
for (int i = 0; i < cs[0]._len; i++) {
int r = (int) cs[0].atd(i);
ncs[0].addNum(r);
ncs[1].addNum(11.2 * r);
ncs[2].addUUID(r, r * 10);
ncs[3].addStr("row" + r);
ncs[4].addCategorical(r % 2 == 0 ? 0 : 1);
}
}
}.doAll(new byte[] { Vec.T_NUM, Vec.T_NUM, Vec.T_UUID, Vec.T_STR, Vec.T_CAT }, v).outputFrame(Key.make("data"), new String[] { "v1", "v2", "v3", "v4", "v5" }, new String[][] { null, null, null, null, new String[] { "a", "b" } });
} finally {
v.remove();
}
assert fr != null;
return fr;
}
Aggregations