use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method createTestParquetReader.
public static VectorizedParquetRecordReader createTestParquetReader(String schemaString, Configuration conf) throws IOException, InterruptedException, HiveException {
conf.set(PARQUET_READ_SCHEMA, schemaString);
HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
HiveConf.setVar(conf, HiveConf.ConfVars.PLAN, "//tmp");
Job vectorJob = new Job(conf, "read vector");
ParquetInputFormat.setInputPaths(vectorJob, file);
initialVectorizedRowBatchCtx(conf);
return new VectorizedParquetRecordReader(getFileSplit(vectorJob), new JobConf(conf));
}
use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method booleanRead.
protected void booleanRead() throws Exception {
Configuration conf = new Configuration();
conf.set(IOConstants.COLUMNS, "boolean_field");
conf.set(IOConstants.COLUMNS_TYPES, "boolean");
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
VectorizedParquetRecordReader reader = createTestParquetReader("message test { required boolean boolean_field;}", conf);
VectorizedRowBatch previous = reader.createValue();
try {
int c = 0;
while (reader.next(NullWritable.get(), previous)) {
LongColumnVector vector = (LongColumnVector) previous.cols[0];
assertTrue(vector.noNulls);
for (int i = 0; i < vector.vector.length; i++) {
if (c == nElements) {
break;
}
assertEquals("Failed at " + c, (getBooleanValue(c) ? 1 : 0), vector.vector[i]);
assertFalse(vector.isNull[i]);
c++;
}
}
assertEquals(nElements, c);
} finally {
reader.close();
}
}
use of org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader in project hive by apache.
the class VectorizedColumnReaderTestBase method floatReadInt.
protected void floatReadInt(boolean isDictionaryEncoding) throws InterruptedException, HiveException, IOException {
conf.set(IOConstants.COLUMNS, "int32_field");
conf.set(IOConstants.COLUMNS_TYPES, "float");
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
VectorizedParquetRecordReader reader = createTestParquetReader("message test { required int32" + " int32_field;}", conf);
VectorizedRowBatch previous = reader.createValue();
try {
int c = 0;
while (reader.next(NullWritable.get(), previous)) {
DoubleColumnVector vector = (DoubleColumnVector) previous.cols[0];
assertTrue(vector.noNulls);
for (int i = 0; i < vector.vector.length; i++) {
if (c == nElements) {
break;
}
assertEquals("Failed at " + c, getIntValue(isDictionaryEncoding, c), vector.vector[i], 0);
assertFalse(vector.isNull[i]);
c++;
}
}
assertEquals(nElements, c);
} finally {
reader.close();
}
}
Aggregations