use of water.parser.BufferedString in project h2o-3 by h2oai.
the class Frame method toTwoDimTable.
public TwoDimTable toTwoDimTable(long off, int len, boolean rollups) {
if (off > numRows())
off = numRows();
if (off + len > numRows())
len = (int) (numRows() - off);
String[] rowHeaders = new String[len];
int H = 0;
if (rollups) {
H = 5;
rowHeaders = new String[len + H];
rowHeaders[0] = "min";
rowHeaders[1] = "mean";
rowHeaders[2] = "stddev";
rowHeaders[3] = "max";
rowHeaders[4] = "missing";
for (int i = 0; i < len; i++) rowHeaders[i + H] = "" + (off + i);
}
final int ncols = numCols();
final Vec[] vecs = vecs();
String[] coltypes = new String[ncols];
String[][] strCells = new String[len + H][ncols];
double[][] dblCells = new double[len + H][ncols];
final BufferedString tmpStr = new BufferedString();
for (int i = 0; i < ncols; i++) {
if (DKV.get(_keys[i]) == null) {
// deleted Vec in Frame
coltypes[i] = "string";
for (int j = 0; j < len + H; j++) dblCells[j][i] = TwoDimTable.emptyDouble;
for (int j = 0; j < len; j++) strCells[j + H][i] = "NO_VEC";
continue;
}
Vec vec = vecs[i];
if (rollups) {
dblCells[0][i] = vec.min();
dblCells[1][i] = vec.mean();
dblCells[2][i] = vec.sigma();
dblCells[3][i] = vec.max();
dblCells[4][i] = vec.naCnt();
}
switch(vec.get_type()) {
case Vec.T_BAD:
coltypes[i] = "string";
for (int j = 0; j < len; j++) {
strCells[j + H][i] = null;
dblCells[j + H][i] = TwoDimTable.emptyDouble;
}
break;
case Vec.T_STR:
coltypes[i] = "string";
for (int j = 0; j < len; j++) {
strCells[j + H][i] = vec.isNA(off + j) ? "" : vec.atStr(tmpStr, off + j).toString();
dblCells[j + H][i] = TwoDimTable.emptyDouble;
}
break;
case Vec.T_CAT:
coltypes[i] = "string";
for (int j = 0; j < len; j++) {
strCells[j + H][i] = vec.isNA(off + j) ? "" : vec.factor(vec.at8(off + j));
dblCells[j + H][i] = TwoDimTable.emptyDouble;
}
break;
case Vec.T_TIME:
coltypes[i] = "string";
DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
for (int j = 0; j < len; j++) {
strCells[j + H][i] = vec.isNA(off + j) ? "" : fmt.print(vec.at8(off + j));
dblCells[j + H][i] = TwoDimTable.emptyDouble;
}
break;
case Vec.T_NUM:
coltypes[i] = vec.isInt() ? "long" : "double";
for (int j = 0; j < len; j++) {
dblCells[j + H][i] = vec.isNA(off + j) ? TwoDimTable.emptyDouble : vec.at(off + j);
strCells[j + H][i] = null;
}
break;
case Vec.T_UUID:
throw H2O.unimpl();
default:
System.err.println("bad vector type during debug print: " + vec.get_type());
throw H2O.fail();
}
}
return new TwoDimTable("Frame " + _key, numRows() + " rows and " + numCols() + " cols", rowHeaders, /* clone the names, the TwoDimTable will replace nulls with ""*/
_names.clone(), coltypes, null, "", strCells, dblCells);
}
use of water.parser.BufferedString in project h2o-3 by h2oai.
the class NewChunk method add2Chunk_impl.
private void add2Chunk_impl(NewChunk c, int i) {
if (isNA2(i)) {
c.addNA();
} else if (isUUID()) {
c.addUUID(_ms.get(i), Double.doubleToRawLongBits(_ds[i]));
} else if (_ms != null) {
c.addNum(_ms.get(i), _xs.get(i));
} else if (_ds != null) {
c.addNum(_ds[i]);
} else if (_ss != null) {
int sidx = _is[i];
int nextNotNAIdx = i + 1;
// Find next not-NA value (_is[idx] != -1)
while (nextNotNAIdx < _is.length && _is[nextNotNAIdx] == -1) nextNotNAIdx++;
int send = nextNotNAIdx < _is.length ? _is[nextNotNAIdx] : _sslen;
int slen = send - sidx - 1;
// null-BufferedString represents NA value
BufferedString bStr = sidx == -1 ? null : _bfstr.set(_ss, sidx, slen);
c.addStr(bStr);
} else
throw new IllegalStateException();
}
use of water.parser.BufferedString in project h2o-3 by h2oai.
the class ParquetFileGenerator method testParseAvroPrimitiveTypes.
/**
* Test parsing of Parquet file originally made from Avro records (avro < 1.8, before introduction of logical types)
*/
@Test
public void testParseAvroPrimitiveTypes() {
FrameAssertion assertion = new GenFrameAssertion("avroPrimitiveTypes.parquet", TestUtil.ari(9, 100)) {
@Override
protected File prepareFile() throws IOException {
return ParquetFileGenerator.generateAvroPrimitiveTypes(Files.createTempDir(), file, nrows(), new Date());
}
@Override
public void check(Frame f) {
assertArrayEquals("Column names need to match!", ar("myboolean", "myint", "mylong", "myfloat", "mydouble", "mydate", "myuuid", "mystring", "myenum"), f.names());
assertArrayEquals("Column types need to match!", ar(Vec.T_NUM, Vec.T_NUM, Vec.T_NUM, Vec.T_NUM, Vec.T_NUM, Vec.T_TIME, Vec.T_UUID, Vec.T_STR, Vec.T_CAT), f.types());
BufferedString bs = new BufferedString();
for (int row = 0; row < nrows(); row++) {
assertEquals("Value in column myboolean", 1 - (row % 2), f.vec(0).at8(row));
assertEquals("Value in column myint", 1 + row, f.vec(1).at8(row));
assertEquals("Value in column mylong", 2 + row, f.vec(2).at8(row));
assertEquals("Value in column myfloat", 3.1f + row, f.vec(3).at(row), EPSILON);
assertEquals("Value in column myfloat", 4.1 + row, f.vec(4).at(row), EPSILON);
assertEquals("Value in column mystring", "hello world: " + row, f.vec(7).atStr(bs, row).bytesToString());
assertEquals("Value in column myenum", row % 2 == 0 ? "a" : "b", f.vec(8).factor(f.vec(8).at8(row)));
}
}
};
assertFrameAssertion(assertion);
}
use of water.parser.BufferedString in project h2o-3 by h2oai.
the class FrameTestUtil method collectS.
public static String[] collectS(Vec v) {
String[] res = new String[(int) v.length()];
BufferedString tmpStr = new BufferedString();
for (int i = 0; i < v.length(); i++) res[i] = v.isNA(i) ? null : v.atStr(tmpStr, i).toString();
return res;
}
use of water.parser.BufferedString in project h2o-3 by h2oai.
the class CStrChunkTest method test_inflate_impl.
@Test
public void test_inflate_impl() {
for (int l = 0; l < 2; ++l) {
NewChunk nc = new NewChunk(null, 0);
BufferedString[] vals = new BufferedString[1000001];
for (int i = 0; i < vals.length; i++) {
vals[i] = new BufferedString("Foo" + i);
}
if (l == 1)
nc.addNA();
for (BufferedString v : vals) nc.addStr(v);
nc.addNA();
int len = nc.len();
Chunk cc = nc.compress();
Assert.assertEquals(vals.length + 1 + l, cc._len);
Assert.assertTrue(cc instanceof CStrChunk);
if (l == 1)
Assert.assertTrue(cc.isNA(0));
if (l == 1)
Assert.assertTrue(cc.isNA_abs(0));
BufferedString tmpStr = new BufferedString();
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.atStr(tmpStr, l + i));
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc.atStr_abs(tmpStr, l + i));
Assert.assertTrue(cc.isNA(vals.length + l));
Assert.assertTrue(cc.isNA_abs(vals.length + l));
Chunk cc2 = IcedUtils.deepCopy(cc);
Assert.assertEquals(vals.length + 1 + l, cc2._len);
Assert.assertTrue(cc2 instanceof CStrChunk);
if (l == 1)
Assert.assertTrue(cc2.isNA(0));
if (l == 1)
Assert.assertTrue(cc2.isNA_abs(0));
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc2.atStr(tmpStr, l + i));
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc2.atStr_abs(tmpStr, l + i));
Assert.assertTrue(cc2.isNA(vals.length + l));
Assert.assertTrue(cc2.isNA_abs(vals.length + l));
nc = cc.extractRows(new NewChunk(null, 0), 0, len);
Assert.assertEquals(vals.length + 1 + l, nc._len);
if (l == 1)
Assert.assertTrue(nc.isNA(0));
if (l == 1)
Assert.assertTrue(nc.isNA_abs(0));
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], nc.atStr(tmpStr, l + i));
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], nc.atStr_abs(tmpStr, l + i));
Assert.assertTrue(nc.isNA(vals.length + l));
Assert.assertTrue(nc.isNA_abs(vals.length + l));
cc2 = nc.compress();
Assert.assertEquals(vals.length + 1 + l, cc._len);
Assert.assertTrue(cc2 instanceof CStrChunk);
if (l == 1)
Assert.assertTrue(cc2.isNA(0));
if (l == 1)
Assert.assertTrue(cc2.isNA_abs(0));
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc2.atStr(tmpStr, l + i));
for (int i = 0; i < vals.length; ++i) Assert.assertEquals(vals[i], cc2.atStr_abs(tmpStr, l + i));
Assert.assertTrue(cc2.isNA(vals.length + l));
Assert.assertTrue(cc2.isNA_abs(vals.length + l));
Assert.assertTrue(Arrays.equals(cc._mem, cc2._mem));
}
}
Aggregations