use of water.rapids.Val in project h2o-3 by h2oai.
the class AstRBind method apply.
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
// Execute all args. Find a canonical frame; all Frames must look like this one.
// Each argument turns into either a Frame (whose rows are entirely
// inlined) or a scalar (which is replicated across as a single row).
// Canonical Frame; all frames have the same column count, types and names
Frame fr = null;
// Total chunks
int nchks = 0;
// Computed AstRoot results
Val[] vals = new Val[asts.length];
for (int i = 1; i < asts.length; i++) {
vals[i] = stk.track(asts[i].exec(env));
if (vals[i].isFrame()) {
fr = vals[i].getFrame();
// Total chunks
nchks += fr.anyVec().nChunks();
} else
// One chunk per scalar
nchks++;
}
// No Frame, just a pile-o-scalars?
// The zero-length vec for the zero-frame frame
Vec zz = null;
if (fr == null) {
// Zero-length, 1-column, default name
fr = new Frame(new String[] { Frame.defaultColName(0) }, new Vec[] { zz = Vec.makeZero(0) });
if (asts.length == 1)
return new ValFrame(fr);
}
// Verify all Frames are the same columns, names, and types. Domains can vary, and will be the union
// Input frame
final Frame[] frs = new Frame[asts.length];
// Column types
final byte[] types = fr.types();
// Compute a new layout!
final long[] espc = new long[nchks + 1];
int coffset = 0;
Frame[] tmp_frs = new Frame[asts.length];
for (int i = 1; i < asts.length; i++) {
// Save values computed for pass 2
Val val = vals[i];
Frame fr0 = val.isFrame() ? val.getFrame() : // Scalar: auto-expand into a 1-row frame
(tmp_frs[i] = new Frame(fr._names, Vec.makeCons(val.getNum(), 1L, fr.numCols())));
// Check that all frames are compatible
if (fr.numCols() != fr0.numCols())
throw new IllegalArgumentException("rbind frames must have all the same columns, found " + fr.numCols() + " and " + fr0.numCols() + " columns.");
if (!Arrays.deepEquals(fr._names, fr0._names))
throw new IllegalArgumentException("rbind frames must have all the same column names, found " + Arrays.toString(fr._names) + " and " + Arrays.toString(fr0._names));
if (!Arrays.equals(types, fr0.types()))
throw new IllegalArgumentException("rbind frames must have all the same column types, found " + Arrays.toString(types) + " and " + Arrays.toString(fr0.types()));
// Save frame
frs[i] = fr0;
// Roll up the ESPC row counts
long roffset = espc[coffset];
long[] espc2 = fr0.anyVec().espc();
for (// Roll up the row counts
int j = 1; // Roll up the row counts
j < espc2.length; // Roll up the row counts
j++) espc[coffset + j] = (roffset + espc2[j]);
// Chunk offset
coffset += espc2.length - 1;
}
if (zz != null)
zz.remove();
// build up the new domains for each vec
HashMap<String, Integer>[] dmap = new HashMap[types.length];
String[][] domains = new String[types.length][];
int[][][] cmaps = new int[types.length][][];
for (int k = 0; k < types.length; ++k) {
dmap[k] = new HashMap<>();
int c = 0;
byte t = types[k];
if (t == Vec.T_CAT) {
int[][] maps = new int[frs.length][];
for (int i = 1; i < frs.length; i++) {
maps[i] = new int[frs[i].vec(k).domain().length];
for (int j = 0; j < maps[i].length; j++) {
String s = frs[i].vec(k).domain()[j];
if (!dmap[k].containsKey(s))
dmap[k].put(s, maps[i][j] = c++);
else
maps[i][j] = dmap[k].get(s);
}
}
cmaps[k] = maps;
} else {
cmaps[k] = new int[frs.length][];
}
domains[k] = c == 0 ? null : new String[c];
for (Map.Entry<String, Integer> e : dmap[k].entrySet()) domains[k][e.getValue()] = e.getKey();
}
// Now make Keys for the new Vecs
Key<Vec>[] keys = fr.anyVec().group().addVecs(fr.numCols());
Vec[] vecs = new Vec[fr.numCols()];
int rowLayout = Vec.ESPC.rowLayout(keys[0], espc);
for (int i = 0; i < vecs.length; i++) vecs[i] = new Vec(keys[i], rowLayout, domains[i], types[i]);
// Do the row-binds column-by-column.
// Switch to F/J thread for continuations
AstRBind.ParallelRbinds t;
H2O.submitTask(t = new AstRBind.ParallelRbinds(frs, espc, vecs, cmaps)).join();
for (Frame tfr : tmp_frs) if (tfr != null)
tfr.delete();
return new ValFrame(new Frame(fr.names(), t._vecs));
}
use of water.rapids.Val in project h2o-3 by h2oai.
the class StratifiedSplitTest method testStratifiedSampling.
@Test
public void testStratifiedSampling() {
f = ArrayUtils.frame("response", vec(ari(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1)));
fanimal = ArrayUtils.frame("response", vec(ar("dog", "cat"), ari(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1)));
f = new Frame(f);
fanimal = new Frame(fanimal);
f._key = Key.make();
fanimal._key = Key.make();
DKV.put(f);
DKV.put(fanimal);
//
Val res1 = Rapids.exec("(h2o.random_stratified_split (cols_py " + f._key + " 0) 0.3333333 123)");
fr1 = res1.getFrame();
// minority class should be in the test split
Assert.assertEquals(fr1.vec(0).at8(0), 1);
// minority class should be in the train split
Assert.assertEquals(fr1.vec(0).at8(11), 0);
// minority class should be in the train split
Assert.assertEquals(fr1.vec(0).mean(), 1.0 / 3.0, 1e-5);
//test categorical
//
Val res2 = Rapids.exec("(h2o.random_stratified_split (cols_py " + fanimal._key + " 0) 0.3333333 123)");
fr2 = res2.getFrame();
// minority class should be in the test split
Assert.assertEquals(fr2.vec(0).at8(0), 1);
// minority class should be in the test split
Assert.assertEquals(fr2.vec(0).at8(11), 0);
// minority class should be in the test split
Assert.assertEquals(fr2.vec(0).mean(), 1.0 / 3.0, 1e-5);
}
use of water.rapids.Val in project h2o-3 by h2oai.
the class AstRectangleConditionalAssignTest method testConditionalAssignCategorical.
@Test
public void testConditionalAssignCategorical() {
Frame fr = makeTestFrame();
Vec expected = cvec(new String[] { "a", "b" }, "b", "b", "b", "b", "b");
try {
Val val = Rapids.exec("(tmp= py_1 (:= data \"b\" 4 (== (cols_py data 4) \"a\")))");
if (val instanceof ValFrame) {
Frame fr2 = val.getFrame();
assertCatVecEquals(expected, fr2.vec(4));
fr2.remove();
}
} finally {
fr.remove();
expected.remove();
}
}
use of water.rapids.Val in project h2o-3 by h2oai.
the class AstRectangleConditionalAssignTest method testConditionalAssignString.
@Test
public void testConditionalAssignString() {
Frame fr = makeTestFrame();
Vec expected = svec("row1", "tst", "row3", "tst", "row5");
try {
Val val = Rapids.exec("(tmp= py_1 (:= data \"tst\" 3 (== (cols_py data 4) \"a\")))");
if (val instanceof ValFrame) {
Frame fr2 = val.getFrame();
assertStringVecEquals(expected, fr2.vec(3));
fr2.remove();
}
} finally {
fr.remove();
expected.remove();
}
}
use of water.rapids.Val in project h2o-3 by h2oai.
the class AstRectangleFrameSliceAssignTest method testAssignFrameSlice.
@Test
public void testAssignFrameSlice() throws Exception {
final Frame data = parse_test_file(Key.make("data"), "smalldata/airlines/allyears2k_headers.zip");
Frame output = null;
try {
String rapids = "(tmp= tst (:= data (rows (cols data [8.0, 11.0] ) [10000.0:" + _nRows + ".0] ) [8.0, 11.0] [0.0:" + _nRows + ".0] ) )";
Val val = Rapids.exec(rapids);
if (val instanceof ValFrame) {
output = val.getFrame();
// categorical column
String[] expectedCats = catVec2array(data.vec(8));
System.arraycopy(expectedCats, 10000, expectedCats, 0, _nRows);
String[] actualCats = catVec2array(output.vec(8));
assertArrayEquals(expectedCats, actualCats);
// numerical column
double[] expected = vec2array(data.vec(11));
System.arraycopy(expected, 10000, expected, 0, _nRows);
double[] actual = vec2array(output.vec(11));
assertArrayEquals(expected, actual, 0.0001d);
}
} finally {
data.delete();
if (output != null) {
output.delete();
}
}
}
Aggregations