Search in sources :

Example 31 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class Frame method toTwoDimTable.

public TwoDimTable toTwoDimTable(long off, int len, boolean rollups) {
    if (off > numRows())
        off = numRows();
    if (off + len > numRows())
        len = (int) (numRows() - off);
    String[] rowHeaders = new String[len];
    int H = 0;
    if (rollups) {
        H = 5;
        rowHeaders = new String[len + H];
        rowHeaders[0] = "min";
        rowHeaders[1] = "mean";
        rowHeaders[2] = "stddev";
        rowHeaders[3] = "max";
        rowHeaders[4] = "missing";
        for (int i = 0; i < len; i++) rowHeaders[i + H] = "" + (off + i);
    }
    final int ncols = numCols();
    final Vec[] vecs = vecs();
    String[] coltypes = new String[ncols];
    String[][] strCells = new String[len + H][ncols];
    double[][] dblCells = new double[len + H][ncols];
    final BufferedString tmpStr = new BufferedString();
    for (int i = 0; i < ncols; i++) {
        if (DKV.get(_keys[i]) == null) {
            // deleted Vec in Frame
            coltypes[i] = "string";
            for (int j = 0; j < len + H; j++) dblCells[j][i] = TwoDimTable.emptyDouble;
            for (int j = 0; j < len; j++) strCells[j + H][i] = "NO_VEC";
            continue;
        }
        Vec vec = vecs[i];
        if (rollups) {
            dblCells[0][i] = vec.min();
            dblCells[1][i] = vec.mean();
            dblCells[2][i] = vec.sigma();
            dblCells[3][i] = vec.max();
            dblCells[4][i] = vec.naCnt();
        }
        switch(vec.get_type()) {
            case Vec.T_BAD:
                coltypes[i] = "string";
                for (int j = 0; j < len; j++) {
                    strCells[j + H][i] = null;
                    dblCells[j + H][i] = TwoDimTable.emptyDouble;
                }
                break;
            case Vec.T_STR:
                coltypes[i] = "string";
                for (int j = 0; j < len; j++) {
                    strCells[j + H][i] = vec.isNA(off + j) ? "" : vec.atStr(tmpStr, off + j).toString();
                    dblCells[j + H][i] = TwoDimTable.emptyDouble;
                }
                break;
            case Vec.T_CAT:
                coltypes[i] = "string";
                for (int j = 0; j < len; j++) {
                    strCells[j + H][i] = vec.isNA(off + j) ? "" : vec.factor(vec.at8(off + j));
                    dblCells[j + H][i] = TwoDimTable.emptyDouble;
                }
                break;
            case Vec.T_TIME:
                coltypes[i] = "string";
                DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
                for (int j = 0; j < len; j++) {
                    strCells[j + H][i] = vec.isNA(off + j) ? "" : fmt.print(vec.at8(off + j));
                    dblCells[j + H][i] = TwoDimTable.emptyDouble;
                }
                break;
            case Vec.T_NUM:
                coltypes[i] = vec.isInt() ? "long" : "double";
                for (int j = 0; j < len; j++) {
                    dblCells[j + H][i] = vec.isNA(off + j) ? TwoDimTable.emptyDouble : vec.at(off + j);
                    strCells[j + H][i] = null;
                }
                break;
            case Vec.T_UUID:
                throw H2O.unimpl();
            default:
                System.err.println("bad vector type during debug print: " + vec.get_type());
                throw H2O.fail();
        }
    }
    return new TwoDimTable("Frame " + _key, numRows() + " rows and " + numCols() + " cols", rowHeaders, /* clone the names, the TwoDimTable will replace nulls with ""*/
    _names.clone(), coltypes, null, "", strCells, dblCells);
}
Also used : BufferedString(water.parser.BufferedString) BufferedString(water.parser.BufferedString) DateTimeFormatter(org.joda.time.format.DateTimeFormatter)

Example 32 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class NewChunk method add2Chunk_impl.

private void add2Chunk_impl(NewChunk c, int i) {
    if (isNA2(i)) {
        c.addNA();
    } else if (isUUID()) {
        c.addUUID(_ms.get(i), Double.doubleToRawLongBits(_ds[i]));
    } else if (_ms != null) {
        c.addNum(_ms.get(i), _xs.get(i));
    } else if (_ds != null) {
        c.addNum(_ds[i]);
    } else if (_ss != null) {
        int sidx = _is[i];
        int nextNotNAIdx = i + 1;
        // Find next not-NA value (_is[idx] != -1)
        while (nextNotNAIdx < _is.length && _is[nextNotNAIdx] == -1) nextNotNAIdx++;
        int send = nextNotNAIdx < _is.length ? _is[nextNotNAIdx] : _sslen;
        int slen = send - sidx - 1;
        // null-BufferedString represents NA value
        BufferedString bStr = sidx == -1 ? null : _bfstr.set(_ss, sidx, slen);
        c.addStr(bStr);
    } else
        throw new IllegalStateException();
}
Also used : BufferedString(water.parser.BufferedString) PrettyPrint(water.util.PrettyPrint)

Example 33 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class ParquetFileGenerator method testParseAvroPrimitiveTypes.

/**
   * Test parsing of Parquet file originally made from Avro records (avro < 1.8, before introduction of logical types)
   */
@Test
public void testParseAvroPrimitiveTypes() {
    FrameAssertion assertion = new GenFrameAssertion("avroPrimitiveTypes.parquet", TestUtil.ari(9, 100)) {

        @Override
        protected File prepareFile() throws IOException {
            return ParquetFileGenerator.generateAvroPrimitiveTypes(Files.createTempDir(), file, nrows(), new Date());
        }

        @Override
        public void check(Frame f) {
            assertArrayEquals("Column names need to match!", ar("myboolean", "myint", "mylong", "myfloat", "mydouble", "mydate", "myuuid", "mystring", "myenum"), f.names());
            assertArrayEquals("Column types need to match!", ar(Vec.T_NUM, Vec.T_NUM, Vec.T_NUM, Vec.T_NUM, Vec.T_NUM, Vec.T_TIME, Vec.T_UUID, Vec.T_STR, Vec.T_CAT), f.types());
            BufferedString bs = new BufferedString();
            for (int row = 0; row < nrows(); row++) {
                assertEquals("Value in column myboolean", 1 - (row % 2), f.vec(0).at8(row));
                assertEquals("Value in column myint", 1 + row, f.vec(1).at8(row));
                assertEquals("Value in column mylong", 2 + row, f.vec(2).at8(row));
                assertEquals("Value in column myfloat", 3.1f + row, f.vec(3).at(row), EPSILON);
                assertEquals("Value in column myfloat", 4.1 + row, f.vec(4).at(row), EPSILON);
                assertEquals("Value in column mystring", "hello world: " + row, f.vec(7).atStr(bs, row).bytesToString());
                assertEquals("Value in column myenum", row % 2 == 0 ? "a" : "b", f.vec(8).factor(f.vec(8).at8(row)));
            }
        }
    };
    assertFrameAssertion(assertion);
}
Also used : Frame(water.fvec.Frame) BufferedString(water.parser.BufferedString) Test(org.junit.Test)

Example 34 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class FrameTestUtil method collectS.

public static String[] collectS(Vec v) {
    String[] res = new String[(int) v.length()];
    BufferedString tmpStr = new BufferedString();
    for (int i = 0; i < v.length(); i++) res[i] = v.isNA(i) ? null : v.atStr(tmpStr, i).toString();
    return res;
}
Also used : BufferedString(water.parser.BufferedString) BufferedString(water.parser.BufferedString)

Example 35 with BufferedString

use of water.parser.BufferedString in project h2o-3 by h2oai.

the class CStrChunkTest method test_inflate_impl.

@Test
public void test_inflate_impl() {
    for (int l = 0; l < 2; ++l) {
        NewChunk nc = new NewChunk(null, 0);
        BufferedString[] vals = new BufferedString[1000001];
        for (int i = 0; i < vals.length; i++) {
            vals[i] = new BufferedString("Foo" + i);
        }
        if (l == 1)
            nc.addNA();
        for (BufferedString v : vals) nc.addStr(v);
        nc.addNA();
        int len = nc.len();
        Chunk cc = nc.compress();
        Assert.assertEquals(vals.length + 1 + l, cc._len);
        Assert.assertTrue(cc instanceof CStrChunk);
        if (l == 1)
            Assert.assertTrue(cc.isNA(0));
        if (l == 1)
            Assert.assertTrue(cc.isNA_abs(0));
        BufferedString tmpStr = new BufferedString();
        for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.atStr(tmpStr, l + i));
        for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.atStr_abs(tmpStr, l + i));
        Assert.assertTrue(cc.isNA(vals.length + l));
        Assert.assertTrue(cc.isNA_abs(vals.length + l));
        Chunk cc2 = IcedUtils.deepCopy(cc);
        Assert.assertEquals(vals.length + 1 + l, cc2._len);
        Assert.assertTrue(cc2 instanceof CStrChunk);
        if (l == 1)
            Assert.assertTrue(cc2.isNA(0));
        if (l == 1)
            Assert.assertTrue(cc2.isNA_abs(0));
        for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc2.atStr(tmpStr, l + i));
        for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc2.atStr_abs(tmpStr, l + i));
        Assert.assertTrue(cc2.isNA(vals.length + l));
        Assert.assertTrue(cc2.isNA_abs(vals.length + l));
        nc = cc.extractRows(new NewChunk(null, 0), 0, len);
        Assert.assertEquals(vals.length + 1 + l, nc._len);
        if (l == 1)
            Assert.assertTrue(nc.isNA(0));
        if (l == 1)
            Assert.assertTrue(nc.isNA_abs(0));
        for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], nc.atStr(tmpStr, l + i));
        for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], nc.atStr_abs(tmpStr, l + i));
        Assert.assertTrue(nc.isNA(vals.length + l));
        Assert.assertTrue(nc.isNA_abs(vals.length + l));
        cc2 = nc.compress();
        Assert.assertEquals(vals.length + 1 + l, cc._len);
        Assert.assertTrue(cc2 instanceof CStrChunk);
        if (l == 1)
            Assert.assertTrue(cc2.isNA(0));
        if (l == 1)
            Assert.assertTrue(cc2.isNA_abs(0));
        for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc2.atStr(tmpStr, l + i));
        for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc2.atStr_abs(tmpStr, l + i));
        Assert.assertTrue(cc2.isNA(vals.length + l));
        Assert.assertTrue(cc2.isNA_abs(vals.length + l));
        Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem));
    }
}
Also used : BufferedString(water.parser.BufferedString)

Aggregations

BufferedString (water.parser.BufferedString)43 Frame (water.fvec.Frame)12 Test (org.junit.Test)9 MRTask (water.MRTask)8 Vec (water.fvec.Vec)8 Chunk (water.fvec.Chunk)7 NewChunk (water.fvec.NewChunk)6 ValFrame (water.rapids.vals.ValFrame)5 IcedLong (water.util.IcedLong)5 IOException (java.io.IOException)2 ByteBuffer (java.nio.ByteBuffer)2 Random (java.util.Random)2 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)2 TestFrameBuilder (water.fvec.TestFrameBuilder)2 BackendModel (deepwater.backends.BackendModel)1 BackendParams (deepwater.backends.BackendParams)1 RuntimeOptions (deepwater.backends.RuntimeOptions)1 ImageDataSet (deepwater.datasets.ImageDataSet)1 GenModel (hex.genmodel.GenModel)1 EasyPredictModelWrapper (hex.genmodel.easy.EasyPredictModelWrapper)1