use of org.apache.hive.iceberg.org.apache.orc.impl.ReaderImpl in project hive by apache.
the class VectorizedReadUtils method getSerializedOrcTail.
/**
* Opens the ORC inputFile and reads the metadata information to construct a byte buffer with OrcTail content.
* @param inputFile - the original ORC file - this needs to be accessed to retrieve the original schema for mapping
* @param job - JobConf instance to adjust
* @param fileId - FileID for the input file, serves as cache key in an LLAP setup
* @throws IOException - errors relating to accessing the ORC file
*/
public static ByteBuffer getSerializedOrcTail(InputFile inputFile, SyntheticFileId fileId, JobConf job) throws IOException {
ByteBuffer result = null;
if (HiveConf.getBoolVar(job, HiveConf.ConfVars.LLAP_IO_ENABLED, LlapProxy.isDaemon()) && LlapProxy.getIo() != null) {
MapWork mapWork = LlapHiveUtils.findMapWork(job);
Path path = new Path(inputFile.location());
PartitionDesc partitionDesc = LlapHiveUtils.partitionDescForPath(path, mapWork.getPathToPartitionInfo());
// Note: Since Hive doesn't know about partition information of Iceberg tables, partitionDesc is only used to
// deduct the table (and DB) name here.
CacheTag cacheTag = HiveConf.getBoolVar(job, HiveConf.ConfVars.LLAP_TRACK_CACHE_USAGE) ? LlapHiveUtils.getDbAndTableNameForMetrics(path, true, partitionDesc) : null;
try {
// Schema has to be serialized and deserialized as it is passed between different packages of TypeDescription:
// Iceberg expects org.apache.hive.iceberg.org.apache.orc.TypeDescription as it shades ORC, while LLAP provides
// the unshaded org.apache.orc.TypeDescription type.
BufferChunk tailBuffer = LlapProxy.getIo().getOrcTailFromCache(path, job, cacheTag, fileId).getTailBuffer();
result = tailBuffer.getData();
} catch (IOException ioe) {
LOG.warn("LLAP is turned on but was unable to get file metadata information through its cache for {}", path, ioe);
}
}
// Fallback to simple ORC reader file opening method in lack of or failure of LLAP.
if (result == null) {
try (ReaderImpl orcFileReader = (ReaderImpl) ORC.newFileReader(inputFile, job)) {
result = orcFileReader.getSerializedFileFooter();
}
}
return result;
}
Aggregations