Search in sources :

Example 1 with TestFrameBuilder

use of water.fvec.TestFrameBuilder in project h2o-3 by h2oai.

the class Word2VecTest method testTransformAggregate.

@Test
public void testTransformAggregate() {
    Scope.enter();
    try {
        Vec v = Scope.track(svec("a", "b"));
        Frame fr = Scope.track(new Frame(Key.<Frame>make(), new String[] { "Words" }, new Vec[] { v }));
        DKV.put(fr);
        // build an arbitrary w2v model & overwrite the learned vector with fixed values
        Word2VecModel.Word2VecParameters p = new Word2VecModel.Word2VecParameters();
        p._train = fr._key;
        p._min_word_freq = 0;
        p._epochs = 1;
        p._vec_size = 2;
        Word2VecModel w2vm = (Word2VecModel) Scope.track_generic(new Word2Vec(p).trainModel().get());
        w2vm._output._vecs = new float[] { 1.0f, 0.0f, 0.0f, 1.0f };
        DKV.put(w2vm);
        String[] sentences = { "a", "b", null, "a", "c", null, "c", null, "a", "a", /*chunk end*/
        "a", "b", null, // no terminator at the end
        "b" };
        Frame f = new TestFrameBuilder().withName("data").withColNames("Sentences").withVecTypes(Vec.T_STR).withDataForCol(0, sentences).withChunkLayout(10, 4).build();
        Frame result = Scope.track(w2vm.transform(f.vec(0), Word2VecModel.AggregateMethod.AVERAGE));
        Vec expectedAs = Scope.track(dvec(0.5, 1.0, Double.NaN, 0.75, 0.0));
        Vec expectedBs = Scope.track(dvec(0.5, 0.0, Double.NaN, 0.25, 1.0));
        assertVecEquals(expectedAs, result.vec(w2vm._output._vocab.get(new BufferedString("a"))), 0.0001);
        assertVecEquals(expectedBs, result.vec(w2vm._output._vocab.get(new BufferedString("b"))), 0.0001);
    } finally {
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) TestFrameBuilder(water.fvec.TestFrameBuilder) Vec(water.fvec.Vec) BufferedString(water.parser.BufferedString) BufferedString(water.parser.BufferedString)

Example 2 with TestFrameBuilder

use of water.fvec.TestFrameBuilder in project h2o-3 by h2oai.

the class Word2VecTest method testW2V_pretrained.

@Test
public void testW2V_pretrained() {
    String[] words = new String[1000];
    double[] v1 = new double[words.length];
    double[] v2 = new double[words.length];
    for (int i = 0; i < words.length; i++) {
        words[i] = "word" + i;
        v1[i] = i / (float) words.length;
        v2[i] = 1 - v1[i];
    }
    Scope.enter();
    Frame pretrained = new TestFrameBuilder().withName("w2v-pretrained").withColNames("Word", "V1", "V2").withVecTypes(Vec.T_STR, Vec.T_NUM, Vec.T_NUM).withDataForCol(0, words).withDataForCol(1, v1).withDataForCol(2, v2).withChunkLayout(100, 100, 20, 80, 100, 100, 100, 100, 100, 100, 100).build();
    Scope.track(pretrained);
    try {
        Word2VecModel.Word2VecParameters p = new Word2VecModel.Word2VecParameters();
        p._vec_size = 2;
        p._pre_trained = pretrained._key;
        Word2VecModel w2vm = (Word2VecModel) Scope.track_generic(new Word2Vec(p).trainModel().get());
        for (int i = 0; i < words.length; i++) {
            float[] wordVector = w2vm.transform(words[i]);
            assertArrayEquals("wordvec " + i, new float[] { (float) v1[i], (float) v2[i] }, wordVector, 0.0001f);
        }
    } finally {
        Scope.exit();
    }
}
Also used : Frame(water.fvec.Frame) TestFrameBuilder(water.fvec.TestFrameBuilder) BufferedString(water.parser.BufferedString)

Example 3 with TestFrameBuilder

use of water.fvec.TestFrameBuilder in project h2o-3 by h2oai.

the class AstMomentTest method testOneRowFrame.

@Test
public void testOneRowFrame() {
    Scope.enter();
    try {
        Session s = new Session();
        new TestFrameBuilder().withName("$frame1", s).withColNames("day", "hour").withDataForCol(0, ar(1)).withDataForCol(1, ard(Double.NaN)).build();
        new TestFrameBuilder().withName("$month", s).withColNames("month").withDataForCol(0, ar(2, 3)).build();
        Val result = Rapids.exec("(moment 2010 $month (cols $frame1 'day') 0 0 0 0)->$res1", s);
        assertTrue(result.isFrame());
        Frame fr = result.getFrame();
        Scope.track(fr);
        assertEquals(1, fr.numCols());
        assertEquals(2, fr.numRows());
        assertEquals(Vec.T_TIME, fr.vec(0).get_type());
        result = Rapids.exec("(moment 2010 $month 1 (cols $frame1 'hour') 0 0 0)->$res2", s);
        assertTrue(result.isFrame());
        fr = result.getFrame();
        Scope.track(fr);
        assertEquals(1, fr.numCols());
        assertEquals(2, fr.numRows());
        assertEquals(Vec.T_TIME, fr.vec(0).get_type());
        assertTrue(Double.isNaN(fr.vec(0).at(0)));
        assertTrue(Double.isNaN(fr.vec(0).at(1)));
    } finally {
        Scope.exit();
    }
}
Also used : Val(water.rapids.Val) Frame(water.fvec.Frame) TestFrameBuilder(water.fvec.TestFrameBuilder) Session(water.rapids.Session) Test(org.junit.Test)

Example 4 with TestFrameBuilder

use of water.fvec.TestFrameBuilder in project h2o-3 by h2oai.

the class ExternalFrameReaderClientTest method testReading.

@Test
public void testReading() throws IOException, InterruptedException {
    final String frameName = "testFrame";
    final long[] chunkLayout = { 2, 2, 2, 1 };
    final Frame testFrame = new TestFrameBuilder().withName(frameName).withColNames("ColA", "ColB").withVecTypes(Vec.T_NUM, Vec.T_STR).withDataForCol(0, ard(Double.NaN, 1, 2, 3, 4, 5.6, 7, -1, 3.14)).withDataForCol(1, ar("A", "B", "C", "E", "F", "I", "J", "", null)).withChunkLayout(chunkLayout).build();
    // create frame
    final String[] nodes = new String[H2O.CLOUD._memary.length];
    // get ip and ports of h2o nodes
    for (int i = 0; i < nodes.length; i++) {
        nodes[i] = H2O.CLOUD._memary[i].getIpPortString();
    }
    final int[] selectedColumnIndices = { 0, 1 };
    // specify expected types for selected columns
    final byte[] expectedTypes = { ExternalFrameUtils.EXPECTED_DOUBLE, ExternalFrameUtils.EXPECTED_STRING };
    final int nChunks = testFrame.anyVec().nChunks();
    // we will read from all chunks at the same time
    Thread[] threads = new Thread[nChunks];
    try {
        // open all connections in connStrings array
        for (int idx = 0; idx < nChunks; idx++) {
            final int currentChunkIdx = idx;
            threads[idx] = new Thread() {

                @Override
                public void run() {
                    try {
                        ByteChannel sock = ExternalFrameUtils.getConnection(nodes[currentChunkIdx % nodes.length]);
                        ExternalFrameReaderClient reader = new ExternalFrameReaderClient(sock, frameName, currentChunkIdx, selectedColumnIndices, expectedTypes);
                        int rowsRead = 0;
                        assertEquals(reader.getNumRows(), chunkLayout[currentChunkIdx]);
                        while (rowsRead < reader.getNumRows()) {
                            if (rowsRead == 0 & currentChunkIdx == 0) {
                                reader.readDouble();
                                assertTrue("[0,0] in chunk 0 should be NA", reader.isLastNA());
                            } else {
                                reader.readDouble();
                                assertFalse("Should not be NA", reader.isLastNA());
                            }
                            reader.readString();
                            assertFalse("Should not be NA", reader.isLastNA());
                            rowsRead++;
                        }
                        assertEquals("Num or rows read was " + rowsRead + ", expecting " + reader.getNumRows(), rowsRead, reader.getNumRows());
                        reader.waitUntilAllReceived();
                        sock.close();
                    } catch (AssertionError e) {
                        exc = e;
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            };
            threads[idx].start();
        }
        // wait for all writer thread to finish
        for (Thread t : threads) {
            t.join();
            if (exc != null) {
                throw exc;
            }
        }
    } finally {
        testFrame.remove();
    }
}
Also used : Frame(water.fvec.Frame) TestFrameBuilder(water.fvec.TestFrameBuilder) IOException(java.io.IOException) ByteChannel(java.nio.channels.ByteChannel) Test(org.junit.Test)

Example 5 with TestFrameBuilder

use of water.fvec.TestFrameBuilder in project h2o-3 by h2oai.

the class WordCountTaskTest method testWordCount.

@Test
public void testWordCount() {
    String[] strData = new String[10000];
    for (int i = 0; i < strData.length; i++) {
        int b = i % 10;
        if (b < 3)
            strData[i] = "A";
        else if (b < 5)
            strData[i] = "B";
        else
            strData[i] = "C";
    }
    Frame fr = new TestFrameBuilder().withName("data").withColNames("Str").withVecTypes(Vec.T_STR).withDataForCol(0, strData).withChunkLayout(100, 900, 5000, 4000).build();
    try {
        Map<BufferedString, IcedLong> counts = new WordCountTask().doAll(fr.vec(0))._counts;
        assertEquals(3, counts.size());
        assertEquals(3000L, counts.get(new BufferedString("A"))._val);
        assertEquals(2000L, counts.get(new BufferedString("B"))._val);
        assertEquals(5000L, counts.get(new BufferedString("C"))._val);
        System.out.println(counts);
    } finally {
        fr.remove();
    }
}
Also used : Frame(water.fvec.Frame) TestFrameBuilder(water.fvec.TestFrameBuilder) IcedLong(water.util.IcedLong) BufferedString(water.parser.BufferedString) BufferedString(water.parser.BufferedString) Test(org.junit.Test)

Aggregations

TestFrameBuilder (water.fvec.TestFrameBuilder)12 Frame (water.fvec.Frame)9 Test (org.junit.Test)8 Session (water.rapids.Session)6 Val (water.rapids.Val)4 Random (java.util.Random)3 BufferedString (water.parser.BufferedString)3 IOException (java.io.IOException)1 ByteChannel (java.nio.channels.ByteChannel)1 Vec (water.fvec.Vec)1 ValFrame (water.rapids.vals.ValFrame)1 IcedLong (water.util.IcedLong)1