Search in sources :

Example 26 with Key

use of water.Key in project h2o-3 by h2oai.

the class ParseTimeTest method testMonthParseNoDay.

@Test
public void testMonthParseNoDay() {
    DateTimeZone pst = DateTimeZone.forID("America/Los_Angeles");
    DateTimeZone localTZ = DateTimeZone.getDefault();
    // Just mmmyy, no time no day
    // Just yy-mmm, no time no day
    String data = "Date\n" + "JAN14\n" + "FEB14\n" + "MAR14\n" + "APR14\n" + "MAY14\n" + "JUN14\n" + "JUL14\n" + "AUG14\n" + "SEP14\n" + "OCT14\n" + "NOV14\n" + "DEC14\n" + "JAN16\n" + "MAR17\n" + "JUN18\n" + "SEP19\n" + "DEC20\n" + "14-JAN\n" + "14-FEB\n" + "14-MAR\n" + "14-APR\n" + "14-MAY\n" + "14-JUN\n" + "14-JUL\n" + "14-AUG\n" + "14-SEP\n" + "14-OCT\n" + "14-NOV\n" + "14-DEC\n" + "16-JAN\n" + "17-MAR\n" + "18-JUN\n" + "19-SEP\n" + "20-DEC\n";
    Key k1 = ParserTest.makeByteVec(data);
    Key r1 = Key.make("r1");
    Frame fr = ParseDataset.parse(r1, k1);
    Assert.assertTrue(fr.vec(0).get_type_str().equals("Time"));
    long[] exp = new long[] { // jan, feb, mar, apr 2014
    1388563200000L, // jan, feb, mar, apr 2014
    1391241600000L, // jan, feb, mar, apr 2014
    1393660800000L, // jan, feb, mar, apr 2014
    1396335600000L, // may, jun, jul, aug 2014
    1398927600000L, // may, jun, jul, aug 2014
    1401606000000L, // may, jun, jul, aug 2014
    1404198000000L, // may, jun, jul, aug 2014
    1406876400000L, // sep, oct, nov, dec 2014
    1409554800000L, // sep, oct, nov, dec 2014
    1412146800000L, // sep, oct, nov, dec 2014
    1414825200000L, // sep, oct, nov, dec 2014
    1417420800000L, // jan 2016, mar 2017, jun 2018, sep 2019, dec 2020
    1451635200000L, // jan 2016, mar 2017, jun 2018, sep 2019, dec 2020
    1488355200000L, // jan 2016, mar 2017, jun 2018, sep 2019, dec 2020
    1527836400000L, // jan 2016, mar 2017, jun 2018, sep 2019, dec 2020
    1567321200000L, // jan 2016, mar 2017, jun 2018, sep 2019, dec 2020
    1606809600000L, // jan, feb, mar, apr 2014
    1388563200000L, // jan, feb, mar, apr 2014
    1391241600000L, // jan, feb, mar, apr 2014
    1393660800000L, // jan, feb, mar, apr 2014
    1396335600000L, // may, jun, jul, aug 2014
    1398927600000L, // may, jun, jul, aug 2014
    1401606000000L, // may, jun, jul, aug 2014
    1404198000000L, // may, jun, jul, aug 2014
    1406876400000L, // sep, oct, nov, dec 2014
    1409554800000L, // sep, oct, nov, dec 2014
    1412146800000L, // sep, oct, nov, dec 2014
    1414825200000L, // sep, oct, nov, dec 2014
    1417420800000L, // jan 2016, mar 2017, jun 2018, sep 2019, dec 2020
    1451635200000L, // jan 2016, mar 2017, jun 2018, sep 2019, dec 2020
    1488355200000L, // jan 2016, mar 2017, jun 2018, sep 2019, dec 2020
    1527836400000L, // jan 2016, mar 2017, jun 2018, sep 2019, dec 2020
    1567321200000L, // jan 2016, mar 2017, jun 2018, sep 2019, dec 2020
    1606809600000L };
    for (// Adjust exp[] to local time
    int i = 0; // Adjust exp[] to local time
    i < exp.length; // Adjust exp[] to local time
    i++) exp[i] += pst.getOffset(exp[i]) - localTZ.getOffset(exp[i]);
    Vec vec = fr.vec("Date");
    for (int i = 0; i < exp.length; i++) Assert.assertEquals(exp[i], vec.at8(i));
    fr.delete();
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) Key(water.Key)

Example 27 with Key

use of water.Key in project h2o-3 by h2oai.

the class ParserTest2 method testSparse2.

// test correctnes of sparse chunks
// added after failing to encode properly following data as
// 0s were not considered when computing compression strategy and then
// lemin was 6108 and there was Short overflow when encoding zeros.
// So, the first column was compressed into C2SChunk with 0s causing short overflow,
@Test
public void testSparse2() {
    String data = "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "35351, 0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "6108,  0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "35351, 0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "6334,  0,0,0,0,0\n" + "0,     0,0,0,0,0\n" + "0,     0,0,0,0,0\n";
    double[][] exp = new double[][] { ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(35351, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(6108, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(35351, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(6334, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0), ard(0, 0, 0, 0, 0, 0) };
    Key k = ParserTest.makeByteVec(data);
    ParserTest.testParsed(ParseDataset.parse(Key.make(), k), exp, 33);
}
Also used : Key(water.Key) Test(org.junit.Test)

Example 28 with Key

use of water.Key in project h2o-3 by h2oai.

the class ParserTest2 method testNAs.

@Test
public void testNAs() {
    String[] data = new String[] { "'C1Chunk',C1SChunk, 'C2Chunk', 'C2SChunk',  'C4Chunk',  'C4FChunk',  'C8Chunk',  'C8DChunk',   'Categorical'\n" + "0,       0.0,          0,           0,           0,          0 ,          0,   8.878979,           A \n", "1,       0.1,          1,         0.1,           1,          1 ,          1,   1.985934,           B \n", "2,       0.2,          2,         0.2,           2,          2 ,          2,   3.398018,           C \n", "3,       0.3,          3,         0.3,           3,          3 ,          3,   9.329589,           D \n", "4,       0.4,          4,           4,           4,          4 , 2147483649,   0.290184,           A \n", "0,       0.5,          0,           0,     -100000,    1.234e2 ,-2147483650,   1e-30,              B \n", "254,    0.25,       2550,      6553.4,      100000,    2.345e-2,          0,    1e30,              C \n", " ,          ,           ,            ,            ,            ,           ,        ,                \n", "?,        NA,          ?,           ?,           ?,           ?,          ?,       ?,                \n" };
    Key rkey = ParserTest.makeByteVec(data);
    ParseSetup ps = new ParseSetup(CSV_INFO, (byte) ',', false, ParseSetup.HAS_HEADER, 9, new String[] { "'C1Chunk'", "C1SChunk", "'C2Chunk'", "'C2SChunk'", "'C4Chunk'", "'C4FChunk'", "'C8Chunk'", "'C8DChunk'", "'Categorical'" }, ParseSetup.strToColumnTypes(new String[] { "Numeric", "Numeric", "Numeric", "Numeric", "Numeric", "Numeric", "Numeric", "Numeric", "Enum" }), null, null, null);
    Frame fr = ParseDataset.parse(Key.make("na_test.hex"), new Key[] { rkey }, true, ps);
    int nlines = (int) fr.numRows();
    Assert.assertEquals(9, nlines);
    Assert.assertEquals(9, fr.numCols());
    for (int i = 0; i < nlines - 2; ++i) for (Vec v : fr.vecs()) Assert.assertTrue("error at line " + i + ", vec " + v.chunkForChunkIdx(0).getClass().getSimpleName(), !Double.isNaN(v.at(i)) && !v.isNA(i));
    for (int j = 0; j < fr.vecs().length; j++) {
        Vec v = fr.vecs()[j];
        for (int i = nlines - 2; i < nlines; ++i) Assert.assertTrue(i + ", " + j + ":" + v.at(i) + ", " + v.isNA(i), Double.isNaN(v.at(i)) && v.isNA(i));
    }
    fr.delete();
}
Also used : Frame(water.fvec.Frame) Vec(water.fvec.Vec) Key(water.Key) PrettyPrint(water.util.PrettyPrint) Test(org.junit.Test)

Example 29 with Key

use of water.Key in project h2o-3 by h2oai.

the class ParserTestARFF method testEnumSplit.

@Test
public void testEnumSplit() {
    String data1 = "@RELATION type\n" + "\n" + "@ATTRIBUTE num ENUM\n" + "\n" + "@DATA\n" + "0\n" + "1.324e-13\n" + "-2\n";
    String data2 = "4\n" + "5\n" + "6\n";
    Key k1 = ParserTest.makeByteVec(data1);
    Key k2 = ParserTest.makeByteVec(data2);
    Key[] k = new Key[] { k1, k2 };
    Frame fr = ParseDataset.parse(Key.make(), k);
    Assert.assertTrue(fr.anyVec().isCategorical());
    Assert.assertFalse(fr.anyVec().isString());
    Assert.assertTrue(fr.anyVec().cardinality() == 6);
    fr.delete();
}
Also used : Frame(water.fvec.Frame) Key(water.Key) Test(org.junit.Test)

Example 30 with Key

use of water.Key in project h2o-3 by h2oai.

the class ParserTestARFF method testMultipleFilesEnum.

@Test
public void testMultipleFilesEnum() {
    String data1 = "@RELATION type\n" + "\n" + "@ATTRIBUTE num enum\n" + "\n" + "@DATA\n" + "0\n" + "1.324e-13\n" + "-2\n";
    Key k1 = ParserTest.makeByteVec(data1);
    Key k2 = ParserTest.makeByteVec(data1);
    Key[] k = new Key[] { k1, k2 };
    Frame fr = ParseDataset.parse(Key.make(), k);
    Assert.assertFalse(fr.anyVec().isString());
    Assert.assertTrue(fr.anyVec().isCategorical());
    Assert.assertFalse(fr.anyVec().isUUID());
    Assert.assertTrue(fr.anyVec().at(0) == 1);
    Assert.assertTrue(fr.anyVec().at(1) == 2);
    Assert.assertTrue(fr.anyVec().at(2) == 0);
    Assert.assertTrue(fr.anyVec().at(3) == 1);
    Assert.assertTrue(fr.anyVec().at(4) == 2);
    Assert.assertTrue(fr.anyVec().at(5) == 0);
    fr.delete();
}
Also used : Frame(water.fvec.Frame) Key(water.Key) Test(org.junit.Test)

Aggregations

Key (water.Key)94 Frame (water.fvec.Frame)56 Test (org.junit.Test)42 Vec (water.fvec.Vec)21 File (java.io.File)18 NFSFileVec (water.fvec.NFSFileVec)17 Futures (water.Futures)10 Random (java.util.Random)7 H2OIllegalArgumentException (water.exceptions.H2OIllegalArgumentException)6 ValFrame (water.rapids.vals.ValFrame)6 DateTimeZone (org.joda.time.DateTimeZone)5 Model (hex.Model)4 SplitFrame (hex.SplitFrame)4 DeepLearning (hex.deeplearning.DeepLearning)4 DeepLearningModel (hex.deeplearning.DeepLearningModel)4 AppendableVec (water.fvec.AppendableVec)4 NewChunk (water.fvec.NewChunk)4 Grid (hex.grid.Grid)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3