use of water.fvec.TestFrameBuilder in project h2o-3 by h2oai.
the class Word2VecTest method testTransformAggregate.
@Test
public void testTransformAggregate() {
Scope.enter();
try {
Vec v = Scope.track(svec("a", "b"));
Frame fr = Scope.track(new Frame(Key.<Frame>make(), new String[] { "Words" }, new Vec[] { v }));
DKV.put(fr);
// build an arbitrary w2v model & overwrite the learned vector with fixed values
Word2VecModel.Word2VecParameters p = new Word2VecModel.Word2VecParameters();
p._train = fr._key;
p._min_word_freq = 0;
p._epochs = 1;
p._vec_size = 2;
Word2VecModel w2vm = (Word2VecModel) Scope.track_generic(new Word2Vec(p).trainModel().get());
w2vm._output._vecs = new float[] { 1.0f, 0.0f, 0.0f, 1.0f };
DKV.put(w2vm);
String[] sentences = { "a", "b", null, "a", "c", null, "c", null, "a", "a", /*chunk end*/
"a", "b", null, // no terminator at the end
"b" };
Frame f = new TestFrameBuilder().withName("data").withColNames("Sentences").withVecTypes(Vec.T_STR).withDataForCol(0, sentences).withChunkLayout(10, 4).build();
Frame result = Scope.track(w2vm.transform(f.vec(0), Word2VecModel.AggregateMethod.AVERAGE));
Vec expectedAs = Scope.track(dvec(0.5, 1.0, Double.NaN, 0.75, 0.0));
Vec expectedBs = Scope.track(dvec(0.5, 0.0, Double.NaN, 0.25, 1.0));
assertVecEquals(expectedAs, result.vec(w2vm._output._vocab.get(new BufferedString("a"))), 0.0001);
assertVecEquals(expectedBs, result.vec(w2vm._output._vocab.get(new BufferedString("b"))), 0.0001);
} finally {
Scope.exit();
}
}
use of water.fvec.TestFrameBuilder in project h2o-3 by h2oai.
the class Word2VecTest method testW2V_pretrained.
@Test
public void testW2V_pretrained() {
String[] words = new String[1000];
double[] v1 = new double[words.length];
double[] v2 = new double[words.length];
for (int i = 0; i < words.length; i++) {
words[i] = "word" + i;
v1[i] = i / (float) words.length;
v2[i] = 1 - v1[i];
}
Scope.enter();
Frame pretrained = new TestFrameBuilder().withName("w2v-pretrained").withColNames("Word", "V1", "V2").withVecTypes(Vec.T_STR, Vec.T_NUM, Vec.T_NUM).withDataForCol(0, words).withDataForCol(1, v1).withDataForCol(2, v2).withChunkLayout(100, 100, 20, 80, 100, 100, 100, 100, 100, 100, 100).build();
Scope.track(pretrained);
try {
Word2VecModel.Word2VecParameters p = new Word2VecModel.Word2VecParameters();
p._vec_size = 2;
p._pre_trained = pretrained._key;
Word2VecModel w2vm = (Word2VecModel) Scope.track_generic(new Word2Vec(p).trainModel().get());
for (int i = 0; i < words.length; i++) {
float[] wordVector = w2vm.transform(words[i]);
assertArrayEquals("wordvec " + i, new float[] { (float) v1[i], (float) v2[i] }, wordVector, 0.0001f);
}
} finally {
Scope.exit();
}
}
use of water.fvec.TestFrameBuilder in project h2o-3 by h2oai.
the class AstMomentTest method testOneRowFrame.
@Test
public void testOneRowFrame() {
Scope.enter();
try {
Session s = new Session();
new TestFrameBuilder().withName("$frame1", s).withColNames("day", "hour").withDataForCol(0, ar(1)).withDataForCol(1, ard(Double.NaN)).build();
new TestFrameBuilder().withName("$month", s).withColNames("month").withDataForCol(0, ar(2, 3)).build();
Val result = Rapids.exec("(moment 2010 $month (cols $frame1 'day') 0 0 0 0)->$res1", s);
assertTrue(result.isFrame());
Frame fr = result.getFrame();
Scope.track(fr);
assertEquals(1, fr.numCols());
assertEquals(2, fr.numRows());
assertEquals(Vec.T_TIME, fr.vec(0).get_type());
result = Rapids.exec("(moment 2010 $month 1 (cols $frame1 'hour') 0 0 0)->$res2", s);
assertTrue(result.isFrame());
fr = result.getFrame();
Scope.track(fr);
assertEquals(1, fr.numCols());
assertEquals(2, fr.numRows());
assertEquals(Vec.T_TIME, fr.vec(0).get_type());
assertTrue(Double.isNaN(fr.vec(0).at(0)));
assertTrue(Double.isNaN(fr.vec(0).at(1)));
} finally {
Scope.exit();
}
}
use of water.fvec.TestFrameBuilder in project h2o-3 by h2oai.
the class ExternalFrameReaderClientTest method testReading.
@Test
public void testReading() throws IOException, InterruptedException {
final String frameName = "testFrame";
final long[] chunkLayout = { 2, 2, 2, 1 };
final Frame testFrame = new TestFrameBuilder().withName(frameName).withColNames("ColA", "ColB").withVecTypes(Vec.T_NUM, Vec.T_STR).withDataForCol(0, ard(Double.NaN, 1, 2, 3, 4, 5.6, 7, -1, 3.14)).withDataForCol(1, ar("A", "B", "C", "E", "F", "I", "J", "