use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class TestVectorizedColumnReader method testNullSplitForParquetReader.
@Test
public void testNullSplitForParquetReader() throws Exception {
Configuration conf = new Configuration();
conf.set(IOConstants.COLUMNS, "int32_field");
conf.set(IOConstants.COLUMNS_TYPES, "int");
HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
HiveConf.setVar(conf, HiveConf.ConfVars.PLAN, "//tmp");
initialVectorizedRowBatchCtx(conf);
VectorizedParquetRecordReader reader = new VectorizedParquetRecordReader((org.apache.hadoop.mapred.InputSplit) null, new JobConf(conf));
assertFalse(reader.next(reader.createKey(), reader.createValue()));
}
use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method structRead.
protected void structRead(boolean isDictionaryEncoding) throws Exception {
Configuration conf = new Configuration();
conf.set(IOConstants.COLUMNS, "struct_field");
conf.set(IOConstants.COLUMNS_TYPES, "struct<a:int,b:double>");
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
String schema = "message hive_schema {\n" + "group struct_field {\n" + " optional int32 a;\n" + " optional double b;\n" + "}\n" + "}\n";
VectorizedParquetRecordReader reader = createParquetReader(schema, conf);
VectorizedRowBatch previous = reader.createValue();
int c = 0;
try {
while (reader.next(NullWritable.get(), previous)) {
StructColumnVector vector = (StructColumnVector) previous.cols[0];
LongColumnVector cv = (LongColumnVector) vector.fields[0];
DoubleColumnVector dv = (DoubleColumnVector) vector.fields[1];
for (int i = 0; i < cv.vector.length; i++) {
if (c == nElements) {
break;
}
assertEquals(getIntValue(isDictionaryEncoding, c), cv.vector[i]);
assertEquals(getDoubleValue(isDictionaryEncoding, c), dv.vector[i], 0);
assertFalse(vector.isNull[i]);
assertFalse(vector.isRepeating);
c++;
}
}
assertEquals("It doesn't exit at expected position", nElements, c);
} finally {
reader.close();
}
}
use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method structReadSomeNull.
protected void structReadSomeNull(boolean isDictionaryEncoding) throws Exception {
Configuration conf = new Configuration();
conf.set(IOConstants.COLUMNS, "struct_field_some_null");
conf.set(IOConstants.COLUMNS_TYPES, "struct<f:int,g:double>");
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
String schema = "message hive_schema {\n" + "group struct_field_some_null {\n" + " optional int32 f;\n" + " optional double g;\n" + "}\n";
VectorizedParquetRecordReader reader = createParquetReader(schema, conf);
VectorizedRowBatch previous = reader.createValue();
int c = 0;
try {
while (reader.next(NullWritable.get(), previous)) {
StructColumnVector sv = (StructColumnVector) previous.cols[0];
LongColumnVector fv = (LongColumnVector) sv.fields[0];
DoubleColumnVector gv = (DoubleColumnVector) sv.fields[1];
for (int i = 0; i < fv.vector.length; i++) {
if (c == nElements) {
break;
}
assertEquals(c % 2 == 0, fv.isNull[i]);
assertEquals(c % 3 == 0, gv.isNull[i]);
assertEquals(c % /* 2*3 = */
6 == 0, sv.isNull[i]);
if (!sv.isNull[i]) {
if (!fv.isNull[i]) {
assertEquals(getIntValue(isDictionaryEncoding, c), fv.vector[i]);
}
if (!gv.isNull[i]) {
assertEquals(getDoubleValue(isDictionaryEncoding, c), gv.vector[i], 0);
}
}
assertFalse(fv.isRepeating);
c++;
}
}
assertEquals("It doesn't exit at expected position", nElements, c);
} finally {
reader.close();
}
}
use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method longRead.
protected void longRead(boolean isDictionaryEncoding) throws Exception {
Configuration conf = new Configuration();
conf.set(IOConstants.COLUMNS, "int64_field");
conf.set(IOConstants.COLUMNS_TYPES, "bigint");
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
VectorizedParquetRecordReader reader = createParquetReader("message test { required int64 int64_field;}", conf);
VectorizedRowBatch previous = reader.createValue();
try {
int c = 0;
while (reader.next(NullWritable.get(), previous)) {
LongColumnVector vector = (LongColumnVector) previous.cols[0];
assertTrue(vector.noNulls);
for (int i = 0; i < vector.vector.length; i++) {
if (c == nElements) {
break;
}
assertEquals("Failed at " + c, getLongValue(isDictionaryEncoding, c), vector.vector[i]);
assertFalse(vector.isNull[i]);
c++;
}
}
assertEquals(nElements, c);
} finally {
reader.close();
}
}
use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method createParquetReader.
protected VectorizedParquetRecordReader createParquetReader(String schemaString, Configuration conf) throws IOException, InterruptedException, HiveException {
conf.set(PARQUET_READ_SCHEMA, schemaString);
HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
HiveConf.setVar(conf, HiveConf.ConfVars.PLAN, "//tmp");
Job vectorJob = new Job(conf, "read vector");
ParquetInputFormat.setInputPaths(vectorJob, file);
ParquetInputFormat parquetInputFormat = new ParquetInputFormat(GroupReadSupport.class);
ParquetInputSplit split = (ParquetInputSplit) parquetInputFormat.getSplits(vectorJob).get(0);
initialVectorizedRowBatchCtx(conf);
return new VectorizedParquetRecordReader(split, new JobConf(conf));
}
Aggregations