Search in sources :

Example 31 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class Frame method toTwoDimTable.

public TwoDimTable toTwoDimTable(long off, int len, boolean rollups) {
    if (off > numRows())
        off = numRows();
    if (off + len > numRows())
        len = (int) (numRows() - off);
    String[] rowHeaders = new String[len];
    int H = 0;
    if (rollups) {
        H = 5;
        rowHeaders = new String[len + H];
        rowHeaders[0] = "min";
        rowHeaders[1] = "mean";
        rowHeaders[2] = "stddev";
        rowHeaders[3] = "max";
        rowHeaders[4] = "missing";
        for (int i = 0; i < len; i++) rowHeaders[i + H] = "" + (off + i);
    final int ncols = numCols();
    final Vec[] vecs = vecs();
    String[] coltypes = new String[ncols];
    String[][] strCells = new String[len + H][ncols];
    double[][] dblCells = new double[len + H][ncols];
    final BufferedString tmpStr = new BufferedString();
    for (int i = 0; i < ncols; i++) {
        if (DKV.get(_keys[i]) == null) {
            // deleted Vec in Frame
            coltypes[i] = "string";
            for (int j = 0; j < len + H; j++) dblCells[j][i] = TwoDimTable.emptyDouble;
            for (int j = 0; j < len; j++) strCells[j + H][i] = "NO_VEC";
        Vec vec = vecs[i];
        if (rollups) {
            dblCells[0][i] = vec.min();
            dblCells[1][i] = vec.mean();
            dblCells[2][i] = vec.sigma();
            dblCells[3][i] = vec.max();
            dblCells[4][i] = vec.naCnt();
        switch(vec.get_type()) {
            case Vec.T_BAD:
                coltypes[i] = "string";
                for (int j = 0; j < len; j++) {
                    strCells[j + H][i] = null;
                    dblCells[j + H][i] = TwoDimTable.emptyDouble;
            case Vec.T_STR:
                coltypes[i] = "string";
                for (int j = 0; j < len; j++) {
                    strCells[j + H][i] = vec.isNA(off + j) ? "" : vec.atStr(tmpStr, off + j).toString();
                    dblCells[j + H][i] = TwoDimTable.emptyDouble;
            case Vec.T_CAT:
                coltypes[i] = "string";
                for (int j = 0; j < len; j++) {
                    strCells[j + H][i] = vec.isNA(off + j) ? "" : vec.factor(vec.at8(off + j));
                    dblCells[j + H][i] = TwoDimTable.emptyDouble;
            case Vec.T_TIME:
                coltypes[i] = "string";
                DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
                for (int j = 0; j < len; j++) {
                    strCells[j + H][i] = vec.isNA(off + j) ? "" : fmt.print(vec.at8(off + j));
                    dblCells[j + H][i] = TwoDimTable.emptyDouble;
            case Vec.T_NUM:
                coltypes[i] = vec.isInt() ? "long" : "double";
                for (int j = 0; j < len; j++) {
                    dblCells[j + H][i] = vec.isNA(off + j) ? TwoDimTable.emptyDouble : + j);
                    strCells[j + H][i] = null;
            case Vec.T_UUID:
                throw H2O.unimpl();
                System.err.println("bad vector type during debug print: " + vec.get_type());
    return new TwoDimTable("Frame " + _key, numRows() + " rows and " + numCols() + " cols", rowHeaders, /* clone the names, the TwoDimTable will replace nulls with ""*/
    _names.clone(), coltypes, null, "", strCells, dblCells);
Also used : BufferedString(water.parser.BufferedString) BufferedString(water.parser.BufferedString) DateTimeFormatter(org.joda.time.format.DateTimeFormatter)

Example 32 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class NewChunk method add2Chunk_impl.

private void add2Chunk_impl(NewChunk c, int i) {
    if (isNA2(i)) {
    } else if (isUUID()) {
        c.addUUID(_ms.get(i), Double.doubleToRawLongBits(_ds[i]));
    } else if (_ms != null) {
        c.addNum(_ms.get(i), _xs.get(i));
    } else if (_ds != null) {
    } else if (_ss != null) {
        int sidx = _is[i];
        int nextNotNAIdx = i + 1;
        // Find next not-NA value (_is[idx] != -1)
        while (nextNotNAIdx < _is.length && _is[nextNotNAIdx] == -1) nextNotNAIdx++;
        int send = nextNotNAIdx < _is.length ? _is[nextNotNAIdx] : _sslen;
        int slen = send - sidx - 1;
        // null-BufferedString represents NA value
        BufferedString bStr = sidx == -1 ? null : _bfstr.set(_ss, sidx, slen);
    } else
        throw new IllegalStateException();
Also used : BufferedString(water.parser.BufferedString) PrettyPrint(water.util.PrettyPrint)

Example 33 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class ParquetFileGenerator method testParseAvroPrimitiveTypes.

   * Test parsing of Parquet file originally made from Avro records (avro < 1.8, before introduction of logical types)
public void testParseAvroPrimitiveTypes() {
    FrameAssertion assertion = new GenFrameAssertion("avroPrimitiveTypes.parquet", TestUtil.ari(9, 100)) {

        protected File prepareFile() throws IOException {
            return ParquetFileGenerator.generateAvroPrimitiveTypes(Files.createTempDir(), file, nrows(), new Date());

        public void check(Frame f) {
            assertArrayEquals("Column names need to match!", ar("myboolean", "myint", "mylong", "myfloat", "mydouble", "mydate", "myuuid", "mystring", "myenum"), f.names());
            assertArrayEquals("Column types need to match!", ar(Vec.T_NUM, Vec.T_NUM, Vec.T_NUM, Vec.T_NUM, Vec.T_NUM, Vec.T_TIME, Vec.T_UUID, Vec.T_STR, Vec.T_CAT), f.types());
            BufferedString bs = new BufferedString();
            for (int row = 0; row < nrows(); row++) {
                assertEquals("Value in column myboolean", 1 - (row % 2), f.vec(0).at8(row));
                assertEquals("Value in column myint", 1 + row, f.vec(1).at8(row));
                assertEquals("Value in column mylong", 2 + row, f.vec(2).at8(row));
                assertEquals("Value in column myfloat", 3.1f + row, f.vec(3).at(row), EPSILON);
                assertEquals("Value in column myfloat", 4.1 + row, f.vec(4).at(row), EPSILON);
                assertEquals("Value in column mystring", "hello world: " + row, f.vec(7).atStr(bs, row).bytesToString());
                assertEquals("Value in column myenum", row % 2 == 0 ? "a" : "b", f.vec(8).factor(f.vec(8).at8(row)));
Also used : Frame(water.fvec.Frame) BufferedString(water.parser.BufferedString) Test(org.junit.Test)

Example 34 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class FrameTestUtil method collectS.

public static String[] collectS(Vec v) {
    String[] res = new String[(int) v.length()];
    BufferedString tmpStr = new BufferedString();
    for (int i = 0; i < v.length(); i++) res[i] = v.isNA(i) ? null : v.atStr(tmpStr, i).toString();
    return res;
Also used : BufferedString(water.parser.BufferedString) BufferedString(water.parser.BufferedString)

Example 35 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class CStrChunkTest method test_inflate_impl.

public void test_inflate_impl() {
    for (int l = 0; l < 2; ++l) {
        NewChunk nc = new NewChunk(null, 0);
        BufferedString[] vals = new BufferedString[1000001];
        for (int i = 0; i < vals.length; i++) {
            vals[i] = new BufferedString("Foo" + i);
        if (l == 1)
        for (BufferedString v : vals) nc.addStr(v);
        int len = nc.len();
        Chunk cc = nc.compress();
        Assert.assertEquals(vals.length + 1 + l, cc._len);
        Assert.assertTrue(cc instanceof CStrChunk);
        if (l == 1)
        if (l == 1)
        BufferedString tmpStr = new BufferedString();
        for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.atStr(tmpStr, l + i));
        for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.atStr_abs(tmpStr, l + i));
        Assert.assertTrue(cc.isNA(vals.length + l));
        Assert.assertTrue(cc.isNA_abs(vals.length + l));
        Chunk cc2 = IcedUtils.deepCopy(cc);
        Assert.assertEquals(vals.length + 1 + l, cc2._len);
        Assert.assertTrue(cc2 instanceof CStrChunk);
        if (l == 1)
        if (l == 1)
        for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc2.atStr(tmpStr, l + i));
        for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc2.atStr_abs(tmpStr, l + i));
        Assert.assertTrue(cc2.isNA(vals.length + l));
        Assert.assertTrue(cc2.isNA_abs(vals.length + l));
        nc = cc.extractRows(new NewChunk(null, 0), 0, len);
        Assert.assertEquals(vals.length + 1 + l, nc._len);
        if (l == 1)
        if (l == 1)
        for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], nc.atStr(tmpStr, l + i));
        for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], nc.atStr_abs(tmpStr, l + i));
        Assert.assertTrue(nc.isNA(vals.length + l));
        Assert.assertTrue(nc.isNA_abs(vals.length + l));
        cc2 = nc.compress();
        Assert.assertEquals(vals.length + 1 + l, cc._len);
        Assert.assertTrue(cc2 instanceof CStrChunk);
        if (l == 1)
        if (l == 1)
        for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc2.atStr(tmpStr, l + i));
        for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc2.atStr_abs(tmpStr, l + i));
        Assert.assertTrue(cc2.isNA(vals.length + l));
        Assert.assertTrue(cc2.isNA_abs(vals.length + l));
        Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem));
Also used : BufferedString(water.parser.BufferedString)


BufferedString (water.parser.BufferedString)43 Frame (water.fvec.Frame)12 Test (org.junit.Test)9 MRTask (water.MRTask)8 Vec (water.fvec.Vec)8 Chunk (water.fvec.Chunk)7 NewChunk (water.fvec.NewChunk)6 ValFrame (water.rapids.vals.ValFrame)5 IcedLong (water.util.IcedLong)5 IOException ( ByteBuffer (java.nio.ByteBuffer)2 Random (java.util.Random)2 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)2 TestFrameBuilder (water.fvec.TestFrameBuilder)2 BackendModel (deepwater.backends.BackendModel)1 BackendParams (deepwater.backends.BackendParams)1 RuntimeOptions (deepwater.backends.RuntimeOptions)1 ImageDataSet (deepwater.datasets.ImageDataSet)1 GenModel (hex.genmodel.GenModel)1 EasyPredictModelWrapper (hex.genmodel.easy.EasyPredictModelWrapper)1