use of org.apache.hadoop.hive.ql.io.orc.VectorizedOrcInputFormat in project hive by apache.
the class HiveVectorizedReader method orcRecordReader.
private static RecordReader<NullWritable, VectorizedRowBatch> orcRecordReader(JobConf job, Reporter reporter, FileScanTask task, InputFile inputFile, Path path, long start, long length, List<Integer> readColumnIds, SyntheticFileId fileId) throws IOException {
RecordReader<NullWritable, VectorizedRowBatch> recordReader = null;
// Need to turn positional schema evolution off since we use column name based schema evolution for projection
// and Iceberg will make a mapping between the file schema and the current reading schema.
job.setBoolean(OrcConf.FORCE_POSITIONAL_EVOLUTION.getHiveConfName(), false);
// Metadata information has to be passed along in the OrcSplit. Without specifying this, the vectorized
// reader will assume that the ORC file ends at the task's start + length, and might fail reading the tail..
ByteBuffer serializedOrcTail = VectorizedReadUtils.getSerializedOrcTail(inputFile, fileId, job);
OrcTail orcTail = VectorizedReadUtils.deserializeToOrcTail(serializedOrcTail);
VectorizedReadUtils.handleIcebergProjection(task, job, VectorizedReadUtils.deserializeToShadedOrcTail(serializedOrcTail).getSchema());
// If LLAP enabled, try to retrieve an LLAP record reader - this might yield to null in some special cases
if (HiveConf.getBoolVar(job, HiveConf.ConfVars.LLAP_IO_ENABLED, LlapProxy.isDaemon()) && LlapProxy.getIo() != null) {
// Required to prevent LLAP from dealing with decimal64, HiveIcebergInputFormat.getSupportedFeatures()
HiveConf.setVar(job, HiveConf.ConfVars.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED, "");
recordReader = LlapProxy.getIo().llapVectorizedOrcReaderForPath(fileId, path, null, readColumnIds, job, start, length, reporter);
}
if (recordReader == null) {
InputSplit split = new OrcSplit(path, fileId, start, length, (String[]) null, orcTail, false, false, com.google.common.collect.Lists.newArrayList(), 0, length, path.getParent(), null);
recordReader = new VectorizedOrcInputFormat().getRecordReader(split, job, reporter);
}
return recordReader;
}
Aggregations