use of org.apache.orc.impl.OrcTail in project hive by apache.
the class VectorizedOrcAcidRowBatchReader method getOrcReaderData.
/**
* Gets the OrcTail from cache if LLAP IO is enabled, otherwise creates the reader to get the tail.
* Always store the Reader along with the Tail as part of ReaderData so we can reuse it.
* @param path The Orc file path we want to get the OrcTail for
* @param conf The Configuration to access LLAP
* @param cacheTag The cacheTag needed to get OrcTail from LLAP IO cache
* @param fileKey fileId of the Orc file (either the Long fileId of HDFS or the SyntheticFileId).
* Optional, if it is not provided, it will be generated, see:
* {@link org.apache.hadoop.hive.ql.io.HdfsUtils.getFileId()}
* @return ReaderData object where the orcTail is not null. Reader can be null, but if we had to create
* one we return that as well for further reuse.
*/
private static ReaderData getOrcReaderData(Path path, Configuration conf, CacheTag cacheTag, Object fileKey) throws IOException {
ReaderData readerData = new ReaderData();
if (shouldReadDeleteDeltasWithLlap(conf, true)) {
try {
readerData.orcTail = LlapProxy.getIo().getOrcTailFromCache(path, conf, cacheTag, fileKey);
readerData.reader = OrcFile.createReader(path, OrcFile.readerOptions(conf).orcTail(readerData.orcTail));
} catch (IllegalCacheConfigurationException icce) {
throw new IOException("LLAP cache is not configured properly while delete delta caching is turned on", icce);
}
}
readerData.reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
readerData.orcTail = new OrcTail(readerData.reader.getFileTail(), readerData.reader.getSerializedFileFooter());
return readerData;
}
use of org.apache.orc.impl.OrcTail in project hive by apache.
the class OrcFileFormatProxy method applySargToMetadata.
@Override
public SplitInfos applySargToMetadata(SearchArgument sarg, ByteBuffer fileMetadata, Configuration conf) throws IOException {
// TODO: ideally we should store shortened representation of only the necessary fields
// in HBase; it will probably require custom SARG application code.
OrcTail orcTail = ReaderImpl.extractFileTail(fileMetadata);
OrcProto.Footer footer = orcTail.getFooter();
int stripeCount = footer.getStripesCount();
// Always convert To PROLEPTIC_GREGORIAN
List<StripeStatistics> stripeStats;
try (org.apache.orc.Reader dummyReader = new org.apache.orc.impl.ReaderImpl(null, org.apache.orc.OrcFile.readerOptions(org.apache.orc.OrcFile.readerOptions(conf).getConfiguration()).useUTCTimestamp(true).convertToProlepticGregorian(true).orcTail(orcTail))) {
stripeStats = dummyReader.getVariantStripeStatistics(null);
}
boolean[] result = OrcInputFormat.pickStripesViaTranslatedSarg(sarg, orcTail.getWriterVersion(), footer.getTypesList(), stripeStats, stripeCount);
// For ORC case, send the boundaries of the stripes so we don't have to send the footer.
SplitInfos.Builder sb = SplitInfos.newBuilder();
List<StripeInformation> stripes = orcTail.getStripes();
boolean isEliminated = true;
for (int i = 0; i < result.length; ++i) {
if (result != null && !result[i])
continue;
isEliminated = false;
StripeInformation si = stripes.get(i);
if (LOG.isDebugEnabled()) {
LOG.debug("PPD is adding a split " + i + ": " + si.getOffset() + ", " + si.getLength());
}
sb.addInfos(SplitInfo.newBuilder().setIndex(i).setOffset(si.getOffset()).setLength(si.getLength()));
}
return isEliminated ? null : sb.build();
}
use of org.apache.orc.impl.OrcTail in project hive by apache.
the class TestEncodedOrcFile method testFileSystemIsNotInitializedWithKnownTail.
@Test
public void testFileSystemIsNotInitializedWithKnownTail() throws IOException {
JobConf conf = new JobConf();
Path path = new Path("fmock:///testtable/bucket_0");
conf.set("hive.orc.splits.include.file.footer", "true");
conf.set("fs.defaultFS", "fmock:///");
conf.set("fs.mock.impl", FailingMockFileSystem.class.getName());
OrcProto.FileTail tail = OrcProto.FileTail.newBuilder().setFooter(Footer.newBuilder().addTypes(OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.BINARY).build()).build()).build();
OrcFile.ReaderOptions readerOptions = EncodedOrcFile.readerOptions(conf).filesystem(() -> {
throw new RuntimeException("Filesystem should not have been initialized");
}).orcTail(new OrcTail(tail, new BufferChunk(0, 0), -1));
// an orc reader is created, this should not cause filesystem initialization
// because orc tail is already provided and we are not making any real reads.
Reader reader = EncodedOrcFile.createReader(path, readerOptions);
// Following initiates the creation of data reader in ORC reader. This should
// not cause file system initialization either as we are still not making any
// real read.
reader.rows();
}
use of org.apache.orc.impl.OrcTail in project hive by apache.
the class OrcSplit method readFields.
@Override
public void readFields(DataInput in) throws IOException {
// deserialize path, offset, length using FileSplit
super.readFields(in);
byte flags = in.readByte();
hasFooter = (FOOTER_FLAG & flags) != 0;
isOriginal = (ORIGINAL_FLAG & flags) != 0;
hasBase = (BASE_FLAG & flags) != 0;
boolean hasLongFileId = (HAS_LONG_FILEID_FLAG & flags) != 0, hasWritableFileId = (HAS_SYNTHETIC_FILEID_FLAG & flags) != 0, hasSyntheticProps = (HAS_SYNTHETIC_ACID_PROPS_FLAG & flags) != 0;
if (hasLongFileId && hasWritableFileId) {
throw new IOException("Invalid split - both file ID types present");
}
deltas.clear();
int numDeltas = in.readInt();
for (int i = 0; i < numDeltas; i++) {
AcidInputFormat.DeltaMetaData dmd = new AcidInputFormat.DeltaMetaData();
dmd.readFields(in);
deltas.add(dmd);
}
if (hasFooter) {
int tailLen = WritableUtils.readVInt(in);
byte[] tailBuffer = new byte[tailLen];
in.readFully(tailBuffer);
OrcProto.FileTail fileTail = OrcProto.FileTail.parseFrom(tailBuffer);
orcTail = new OrcTail(fileTail, null);
}
if (hasLongFileId) {
fileKey = in.readLong();
} else if (hasWritableFileId) {
SyntheticFileId fileId = new SyntheticFileId();
fileId.readFields(in);
this.fileKey = fileId;
}
fileLen = in.readLong();
rootDir = new Path(in.readUTF());
if (hasSyntheticProps) {
long rowId = in.readLong();
int bucket = in.readInt();
long writeId = in.readLong();
syntheticAcidProps = new OffsetAndBucketProperty(rowId, bucket, writeId);
}
}
use of org.apache.orc.impl.OrcTail in project hive by apache.
the class OrcEncodedDataReader method ensureOrcReader.
/**
* Ensures orcReader is initialized for the split.
*/
private void ensureOrcReader() throws IOException {
if (orcReader != null)
return;
path = split.getPath();
if (fileKey instanceof Long && HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_IO_USE_FILEID_PATH)) {
path = HdfsUtils.getFileIdPath(path, (long) fileKey);
}
LlapIoImpl.ORC_LOGGER.trace("Creating reader for {} ({})", path, split.getPath());
long startTime = counters.startTimeCounter();
ReaderOptions opts = EncodedOrcFile.readerOptions(jobConf).filesystem(fsSupplier).fileMetadata(fileMetadata);
if (split instanceof OrcSplit) {
OrcTail orcTail = ((OrcSplit) split).getOrcTail();
if (orcTail != null) {
LlapIoImpl.ORC_LOGGER.debug("Setting OrcTail. path={}", path);
opts.orcTail(orcTail);
}
}
orcReader = EncodedOrcFile.createReader(path, opts);
counters.incrWallClockCounter(LlapIOCounters.HDFS_TIME_NS, startTime);
}
Aggregations