Search in sources :

Example 6 with Val

use of water.rapids.Val in project h2o-3 by h2oai.

the class AstRBind method apply.

@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
    // Execute all args.  Find a canonical frame; all Frames must look like this one.
    // Each argument turns into either a Frame (whose rows are entirely
    // inlined) or a scalar (which is replicated across as a single row).
    // Canonical Frame; all frames have the same column count, types and names
    Frame fr = null;
    // Total chunks
    int nchks = 0;
    // Computed AstRoot results
    Val[] vals = new Val[asts.length];
    for (int i = 1; i < asts.length; i++) {
        vals[i] = stk.track(asts[i].exec(env));
        if (vals[i].isFrame()) {
            fr = vals[i].getFrame();
            // Total chunks
            nchks += fr.anyVec().nChunks();
        } else
            // One chunk per scalar
            nchks++;
    }
    // No Frame, just a pile-o-scalars?
    // The zero-length vec for the zero-frame frame
    Vec zz = null;
    if (fr == null) {
        // Zero-length, 1-column, default name
        fr = new Frame(new String[] { Frame.defaultColName(0) }, new Vec[] { zz = Vec.makeZero(0) });
        if (asts.length == 1)
            return new ValFrame(fr);
    }
    // Verify all Frames are the same columns, names, and types.  Domains can vary, and will be the union
    // Input frame
    final Frame[] frs = new Frame[asts.length];
    // Column types
    final byte[] types = fr.types();
    // Compute a new layout!
    final long[] espc = new long[nchks + 1];
    int coffset = 0;
    Frame[] tmp_frs = new Frame[asts.length];
    for (int i = 1; i < asts.length; i++) {
        // Save values computed for pass 2
        Val val = vals[i];
        Frame fr0 = val.isFrame() ? val.getFrame() : // Scalar: auto-expand into a 1-row frame
        (tmp_frs[i] = new Frame(fr._names, Vec.makeCons(val.getNum(), 1L, fr.numCols())));
        // Check that all frames are compatible
        if (fr.numCols() != fr0.numCols())
            throw new IllegalArgumentException("rbind frames must have all the same columns, found " + fr.numCols() + " and " + fr0.numCols() + " columns.");
        if (!Arrays.deepEquals(fr._names, fr0._names))
            throw new IllegalArgumentException("rbind frames must have all the same column names, found " + Arrays.toString(fr._names) + " and " + Arrays.toString(fr0._names));
        if (!Arrays.equals(types, fr0.types()))
            throw new IllegalArgumentException("rbind frames must have all the same column types, found " + Arrays.toString(types) + " and " + Arrays.toString(fr0.types()));
        // Save frame
        frs[i] = fr0;
        // Roll up the ESPC row counts
        long roffset = espc[coffset];
        long[] espc2 = fr0.anyVec().espc();
        for (// Roll up the row counts
        int j = 1; // Roll up the row counts
        j < espc2.length; // Roll up the row counts
        j++) espc[coffset + j] = (roffset + espc2[j]);
        // Chunk offset
        coffset += espc2.length - 1;
    }
    if (zz != null)
        zz.remove();
    // build up the new domains for each vec
    HashMap<String, Integer>[] dmap = new HashMap[types.length];
    String[][] domains = new String[types.length][];
    int[][][] cmaps = new int[types.length][][];
    for (int k = 0; k < types.length; ++k) {
        dmap[k] = new HashMap<>();
        int c = 0;
        byte t = types[k];
        if (t == Vec.T_CAT) {
            int[][] maps = new int[frs.length][];
            for (int i = 1; i < frs.length; i++) {
                maps[i] = new int[frs[i].vec(k).domain().length];
                for (int j = 0; j < maps[i].length; j++) {
                    String s = frs[i].vec(k).domain()[j];
                    if (!dmap[k].containsKey(s))
                        dmap[k].put(s, maps[i][j] = c++);
                    else
                        maps[i][j] = dmap[k].get(s);
                }
            }
            cmaps[k] = maps;
        } else {
            cmaps[k] = new int[frs.length][];
        }
        domains[k] = c == 0 ? null : new String[c];
        for (Map.Entry<String, Integer> e : dmap[k].entrySet()) domains[k][e.getValue()] = e.getKey();
    }
    // Now make Keys for the new Vecs
    Key<Vec>[] keys = fr.anyVec().group().addVecs(fr.numCols());
    Vec[] vecs = new Vec[fr.numCols()];
    int rowLayout = Vec.ESPC.rowLayout(keys[0], espc);
    for (int i = 0; i < vecs.length; i++) vecs[i] = new Vec(keys[i], rowLayout, domains[i], types[i]);
    // Do the row-binds column-by-column.
    // Switch to F/J thread for continuations
    AstRBind.ParallelRbinds t;
    H2O.submitTask(t = new AstRBind.ParallelRbinds(frs, espc, vecs, cmaps)).join();
    for (Frame tfr : tmp_frs) if (tfr != null)
        tfr.delete();
    return new ValFrame(new Frame(fr.names(), t._vecs));
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) HashMap(java.util.HashMap) ValFrame(water.rapids.vals.ValFrame) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Vec(water.fvec.Vec) HashMap(java.util.HashMap) Map(java.util.Map) Key(water.Key)

Example 7 with Val

use of water.rapids.Val in project h2o-3 by h2oai.

the class StratifiedSplitTest method testStratifiedSampling.

@Test
public void testStratifiedSampling() {
    f = ArrayUtils.frame("response", vec(ari(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1)));
    fanimal = ArrayUtils.frame("response", vec(ar("dog", "cat"), ari(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1)));
    f = new Frame(f);
    fanimal = new Frame(fanimal);
    f._key = Key.make();
    fanimal._key = Key.make();
    DKV.put(f);
    DKV.put(fanimal);
    //
    Val res1 = Rapids.exec("(h2o.random_stratified_split (cols_py " + f._key + " 0) 0.3333333 123)");
    fr1 = res1.getFrame();
    // minority class should be in the test split
    Assert.assertEquals(fr1.vec(0).at8(0), 1);
    // minority class should be in the train split
    Assert.assertEquals(fr1.vec(0).at8(11), 0);
    // minority class should be in the train split
    Assert.assertEquals(fr1.vec(0).mean(), 1.0 / 3.0, 1e-5);
    //test categorical
    //
    Val res2 = Rapids.exec("(h2o.random_stratified_split (cols_py " + fanimal._key + " 0) 0.3333333 123)");
    fr2 = res2.getFrame();
    // minority class should be in the test split
    Assert.assertEquals(fr2.vec(0).at8(0), 1);
    // minority class should be in the test split
    Assert.assertEquals(fr2.vec(0).at8(11), 0);
    // minority class should be in the test split
    Assert.assertEquals(fr2.vec(0).mean(), 1.0 / 3.0, 1e-5);
}
Also used : Val(water.rapids.Val) CreateFrame(hex.CreateFrame) Frame(water.fvec.Frame) Test(org.junit.Test)

Example 8 with Val

use of water.rapids.Val in project h2o-3 by h2oai.

the class AstRectangleConditionalAssignTest method testConditionalAssignCategorical.

@Test
public void testConditionalAssignCategorical() {
    Frame fr = makeTestFrame();
    Vec expected = cvec(new String[] { "a", "b" }, "b", "b", "b", "b", "b");
    try {
        Val val = Rapids.exec("(tmp= py_1 (:= data \"b\" 4 (== (cols_py data 4) \"a\")))");
        if (val instanceof ValFrame) {
            Frame fr2 = val.getFrame();
            assertCatVecEquals(expected, fr2.vec(4));
            fr2.remove();
        }
    } finally {
        fr.remove();
        expected.remove();
    }
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec) Test(org.junit.Test)

Example 9 with Val

use of water.rapids.Val in project h2o-3 by h2oai.

the class AstRectangleConditionalAssignTest method testConditionalAssignString.

@Test
public void testConditionalAssignString() {
    Frame fr = makeTestFrame();
    Vec expected = svec("row1", "tst", "row3", "tst", "row5");
    try {
        Val val = Rapids.exec("(tmp= py_1 (:= data \"tst\" 3 (== (cols_py data 4) \"a\")))");
        if (val instanceof ValFrame) {
            Frame fr2 = val.getFrame();
            assertStringVecEquals(expected, fr2.vec(3));
            fr2.remove();
        }
    } finally {
        fr.remove();
        expected.remove();
    }
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Frame(water.fvec.Frame) Vec(water.fvec.Vec) Test(org.junit.Test)

Example 10 with Val

use of water.rapids.Val in project h2o-3 by h2oai.

the class AstRectangleFrameSliceAssignTest method testAssignFrameSlice.

@Test
public void testAssignFrameSlice() throws Exception {
    final Frame data = parse_test_file(Key.make("data"), "smalldata/airlines/allyears2k_headers.zip");
    Frame output = null;
    try {
        String rapids = "(tmp= tst (:= data (rows (cols data [8.0, 11.0] ) [10000.0:" + _nRows + ".0] ) [8.0, 11.0] [0.0:" + _nRows + ".0] ) )";
        Val val = Rapids.exec(rapids);
        if (val instanceof ValFrame) {
            output = val.getFrame();
            // categorical column
            String[] expectedCats = catVec2array(data.vec(8));
            System.arraycopy(expectedCats, 10000, expectedCats, 0, _nRows);
            String[] actualCats = catVec2array(output.vec(8));
            assertArrayEquals(expectedCats, actualCats);
            // numerical column
            double[] expected = vec2array(data.vec(11));
            System.arraycopy(expected, 10000, expected, 0, _nRows);
            double[] actual = vec2array(output.vec(11));
            assertArrayEquals(expected, actual, 0.0001d);
        }
    } finally {
        data.delete();
        if (output != null) {
            output.delete();
        }
    }
}
Also used : Val(water.rapids.Val) ValFrame(water.rapids.vals.ValFrame) ValFrame(water.rapids.vals.ValFrame) Test(org.junit.Test)

Aggregations

Val (water.rapids.Val)76 Frame (water.fvec.Frame)65 Test (org.junit.Test)56 ValFrame (water.rapids.vals.ValFrame)52 Vec (water.fvec.Vec)14 ValRow (water.rapids.vals.ValRow)8 MRTask (water.MRTask)5 Session (water.rapids.Session)5 Chunk (water.fvec.Chunk)4 TestFrameBuilder (water.fvec.TestFrameBuilder)4 ValNum (water.rapids.vals.ValNum)4 NewChunk (water.fvec.NewChunk)2 AstParameter (water.rapids.ast.AstParameter)2 AstNumList (water.rapids.ast.params.AstNumList)2 CreateFrame (hex.CreateFrame)1 GLRMParameters (hex.glrm.GLRMModel.GLRMParameters)1 FileInputStream (java.io.FileInputStream)1 InputStream (java.io.InputStream)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1