use of org.apache.parquet.hadoop.ParquetInputFormat in project hive by apache.
the class VectorizedColumnReaderTestBase method createParquetReader.
protected VectorizedParquetRecordReader createParquetReader(String schemaString, Configuration conf) throws IOException, InterruptedException, HiveException {
conf.set(PARQUET_READ_SCHEMA, schemaString);
HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
HiveConf.setVar(conf, HiveConf.ConfVars.PLAN, "//tmp");
Job vectorJob = new Job(conf, "read vector");
ParquetInputFormat.setInputPaths(vectorJob, file);
ParquetInputFormat parquetInputFormat = new ParquetInputFormat(GroupReadSupport.class);
ParquetInputSplit split = (ParquetInputSplit) parquetInputFormat.getSplits(vectorJob).get(0);
initialVectorizedRowBatchCtx(conf);
return new VectorizedParquetRecordReader(split, new JobConf(conf));
}
Aggregations