use of org.apache.hadoop.hive.common.io.DataCache.BooleanRef in project hive by apache.
the class SerDeEncodedDataReader method readFileWithCache.
public Boolean readFileWithCache(long startTime) throws IOException, InterruptedException {
if (fileKey == null)
return false;
BooleanRef gotAllData = new BooleanRef();
long endOfSplit = split.getStart() + split.getLength();
this.cachedData = cache.getFileData(fileKey, split.getStart(), endOfSplit, writerIncludes, CC_FACTORY, counters, gotAllData);
if (cachedData == null) {
if (LlapIoImpl.CACHE_LOGGER.isTraceEnabled()) {
LlapIoImpl.CACHE_LOGGER.trace("No data for the split found in cache");
}
return false;
}
String[] hosts = extractHosts(split, false), inMemoryHosts = extractHosts(split, true);
List<StripeData> slices = cachedData.getData();
if (slices.isEmpty())
return false;
long uncachedPrefixEnd = slices.get(0).getKnownTornStart(), uncachedSuffixStart = slices.get(slices.size() - 1).getLastEnd();
Ref<Integer> stripeIx = Ref.from(0);
if (uncachedPrefixEnd > split.getStart()) {
// TODO: can we merge neighboring splits? So we don't init so many readers.
FileSplit sliceSplit = new FileSplit(split.getPath(), split.getStart(), uncachedPrefixEnd - split.getStart(), hosts, inMemoryHosts);
if (!processOneFileSplit(sliceSplit, startTime, stripeIx, null))
return null;
}
while (!slices.isEmpty()) {
StripeData slice = slices.get(0);
long start = slice.getKnownTornStart();
// Will also read the last row.
long len = slice.getLastStart() - start;
FileSplit sliceSplit = new FileSplit(split.getPath(), start, len, hosts, inMemoryHosts);
if (!processOneFileSplit(sliceSplit, startTime, stripeIx, slice))
return null;
}
boolean isUnfortunate = false;
if (uncachedSuffixStart == endOfSplit) {
// This is rather obscure. The end of last row cached is precisely at the split end offset.
// If the split is in the middle of the file, LRR would read one more row after that,
// therefore as unfortunate as it is, we have to do a one-row read. However, for that to
// have happened, someone should have supplied a split that ends inside the last row, i.e.
// a few bytes earlier than the current split, which is pretty unlikely. What is more likely
// is that the split, and the last row, both end at the end of file. Check for this.
long size = split.getPath().getFileSystem(daemonConf).getFileStatus(split.getPath()).getLen();
isUnfortunate = size > endOfSplit;
if (isUnfortunate) {
// Log at warn, given how unfortunate this is.
LlapIoImpl.LOG.warn("One-row mismatch at the end of split " + split.getPath() + " at " + endOfSplit + "; file size is " + size);
}
}
if (uncachedSuffixStart < endOfSplit || isUnfortunate) {
// Note: we assume 0-length split is correct given now LRR interprets offsets (reading an
// extra row). Should we instead assume 1+ chars and add 1 for isUnfortunate?
FileSplit splitPart = new FileSplit(split.getPath(), uncachedSuffixStart, endOfSplit - uncachedSuffixStart, hosts, inMemoryHosts);
if (!processOneFileSplit(splitPart, startTime, stripeIx, null))
return null;
}
return true;
}
use of org.apache.hadoop.hive.common.io.DataCache.BooleanRef in project hive by apache.
the class EncodedReaderImpl method getDataFromCacheAndDisk.
private DiskRangeList.MutateHelper getDataFromCacheAndDisk(DiskRangeList listToRead, long stripeOffset, boolean hasFileId, IdentityHashMap<ByteBuffer, Boolean> toRelease) throws IOException {
DiskRangeList.MutateHelper toRead = new DiskRangeList.MutateHelper(listToRead);
if (LOG.isInfoEnabled()) {
LOG.info("Resulting disk ranges to read (file " + fileKey + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
}
BooleanRef isAllInCache = new BooleanRef();
if (hasFileId) {
cacheWrapper.getFileData(fileKey, toRead.next, stripeOffset, CC_FACTORY, isAllInCache);
if (LOG.isInfoEnabled()) {
LOG.info("Disk ranges after cache (found everything " + isAllInCache.value + "; file " + fileKey + ", base offset " + stripeOffset + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
}
trace.logRanges(fileKey, stripeOffset, toRead.next, RangesSrc.CACHE);
}
// can be freed in advance, we remove it from the map.
if (!isAllInCache.value) {
boolean hasError = true;
try {
if (!isDataReaderOpen) {
this.dataReader.open();
isDataReaderOpen = true;
}
dataReader.readFileData(toRead.next, stripeOffset, cacheWrapper.getAllocator().isDirectAlloc());
toRelease = new IdentityHashMap<>();
DiskRangeList drl = toRead.next;
while (drl != null) {
if (drl instanceof BufferChunk) {
toRelease.put(drl.getData(), true);
}
drl = drl.next;
}
hasError = false;
} finally {
// We are assuming here that toRelease will not be present in such cases.
if (hasError) {
releaseInitialRefcounts(toRead.next);
}
}
}
return toRead;
}
use of org.apache.hadoop.hive.common.io.DataCache.BooleanRef in project hive by apache.
the class EncodedReaderImpl method readEncodedColumns.
@Override
public void readEncodedColumns(int stripeIx, StripeInformation stripe, OrcProto.RowIndex[] indexes, List<OrcProto.ColumnEncoding> encodings, List<OrcProto.Stream> streamList, boolean[] included, boolean[][] colRgs, Consumer<OrcEncodedColumnBatch> consumer) throws IOException {
// Note: for now we don't have to setError here, caller will setError if we throw.
// We are also not supposed to call setDone, since we are only part of the operation.
long stripeOffset = stripe.getOffset();
// 1. Figure out what we have to read.
// Stream offset in relation to the stripe.
long offset = 0;
// 1.1. Figure out which columns have a present stream
boolean[] hasNull = RecordReaderUtils.findPresentStreamsByColumn(streamList, types);
if (isTracingEnabled) {
LOG.trace("The following columns have PRESENT streams: " + arrayToString(hasNull));
}
// We assume stream list is sorted by column and that non-data
// streams do not interleave data streams for the same column.
// 1.2. With that in mind, determine disk ranges to read/get from cache (not by stream).
ColumnReadContext[] colCtxs = new ColumnReadContext[included.length];
int colRgIx = -1;
// Don't create context for the 0-s column.
for (int i = 1; i < included.length; ++i) {
if (!included[i])
continue;
colCtxs[i] = new ColumnReadContext(i, encodings.get(i), indexes[i], ++colRgIx);
if (isTracingEnabled) {
LOG.trace("Creating context: " + colCtxs[i].toString());
}
}
boolean isCompressed = (codec != null);
CreateHelper listToRead = new CreateHelper();
boolean hasIndexOnlyCols = false;
// Will always be the same for all cols at the moment.
boolean[] includedRgs = null;
for (OrcProto.Stream stream : streamList) {
long length = stream.getLength();
int colIx = stream.getColumn();
OrcProto.Stream.Kind streamKind = stream.getKind();
if (!included[colIx] || StreamName.getArea(streamKind) != StreamName.Area.DATA) {
// We have a stream for included column, but in future it might have no data streams.
// It's more like "has at least one column included that has an index stream".
hasIndexOnlyCols = hasIndexOnlyCols || included[colIx];
if (isTracingEnabled) {
LOG.trace("Skipping stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length);
}
offset += length;
continue;
}
ColumnReadContext ctx = colCtxs[colIx];
assert ctx != null;
includedRgs = colRgs[ctx.includedIx];
int indexIx = RecordReaderUtils.getIndexPosition(ctx.encoding.getKind(), types.get(colIx).getKind(), streamKind, isCompressed, hasNull[colIx]);
ctx.addStream(offset, stream, indexIx);
if (isTracingEnabled) {
LOG.trace("Adding stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length + ", index position " + indexIx);
}
if (includedRgs == null || RecordReaderUtils.isDictionary(streamKind, encodings.get(colIx))) {
RecordReaderUtils.addEntireStreamToRanges(offset, length, listToRead, true);
if (isTracingEnabled) {
LOG.trace("Will read whole stream " + streamKind + "; added to " + listToRead.getTail());
}
} else {
RecordReaderUtils.addRgFilteredStreamToRanges(stream, includedRgs, codec != null, indexes[colIx], encodings.get(colIx), types.get(colIx), bufferSize, hasNull[colIx], offset, length, listToRead, true);
}
offset += length;
}
boolean hasFileId = this.fileKey != null;
if (listToRead.get() == null) {
// TODO: there may be a bug here. Could there be partial RG filtering on index-only column?
if (hasIndexOnlyCols && (includedRgs == null)) {
OrcEncodedColumnBatch ecb = POOLS.ecbPool.take();
ecb.init(fileKey, stripeIx, OrcEncodedColumnBatch.ALL_RGS, included.length);
consumer.consumeData(ecb);
} else {
LOG.warn("Nothing to read for stripe [" + stripe + "]");
}
return;
}
// 2. Now, read all of the ranges from cache or disk.
DiskRangeList.MutateHelper toRead = new DiskRangeList.MutateHelper(listToRead.get());
if (/*isTracingEnabled && */
LOG.isInfoEnabled()) {
LOG.info("Resulting disk ranges to read (file " + fileKey + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
}
BooleanRef isAllInCache = new BooleanRef();
if (hasFileId) {
cacheWrapper.getFileData(fileKey, toRead.next, stripeOffset, CC_FACTORY, isAllInCache);
if (/*isTracingEnabled && */
LOG.isInfoEnabled()) {
LOG.info("Disk ranges after cache (found everything " + isAllInCache.value + "; file " + fileKey + ", base offset " + stripeOffset + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
}
}
if (!isAllInCache.value) {
if (!isDataReaderOpen) {
this.dataReader.open();
isDataReaderOpen = true;
}
dataReader.readFileData(toRead.next, stripeOffset, cacheWrapper.getAllocator().isDirectAlloc());
}
// 3. For uncompressed case, we need some special processing before read.
// Keep "toRead" list for future use, don't extract().
DiskRangeList iter = toRead.next;
if (codec == null) {
for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
ColumnReadContext ctx = colCtxs[colIx];
// This column is not included.
if (ctx == null)
continue;
for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
StreamContext sctx = ctx.streams[streamIx];
DiskRangeList newIter = preReadUncompressedStream(stripeOffset, iter, sctx.offset, sctx.offset + sctx.length);
if (newIter != null) {
iter = newIter;
}
}
}
if (isTracingEnabled) {
LOG.trace("Disk ranges after pre-read (file " + fileKey + ", base offset " + stripeOffset + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
}
// Reset the iter to start.
iter = toRead.next;
}
// 4. Finally, decompress data, map per RG, and return to caller.
// We go by RG and not by column because that is how data is processed.
int rgCount = (int) Math.ceil((double) stripe.getNumberOfRows() / rowIndexStride);
for (int rgIx = 0; rgIx < rgCount; ++rgIx) {
boolean isLastRg = rgIx == rgCount - 1;
// Create the batch we will use to return data for this RG.
OrcEncodedColumnBatch ecb = POOLS.ecbPool.take();
ecb.init(fileKey, stripeIx, rgIx, included.length);
boolean isRGSelected = true;
for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
ColumnReadContext ctx = colCtxs[colIx];
// This column is not included.
if (ctx == null)
continue;
if (isTracingEnabled) {
LOG.trace("ctx: {} rgIx: {} isLastRg: {} rgCount: {}", ctx, rgIx, isLastRg, rgCount);
}
// TODO: simplify this now that high-level cache has been removed. Same RGs for all cols.
if (colRgs[ctx.includedIx] != null && !colRgs[ctx.includedIx][rgIx]) {
// RG x col filtered.
isRGSelected = false;
if (isTracingEnabled) {
LOG.trace("colIxMod: {} rgIx: {} colRgs[{}]: {} colRgs[{}][{}]: {}", ctx.includedIx, rgIx, ctx.includedIx, Arrays.toString(colRgs[ctx.includedIx]), ctx.includedIx, rgIx, colRgs[ctx.includedIx][rgIx]);
}
continue;
}
OrcProto.RowIndexEntry index = ctx.rowIndex.getEntry(rgIx), nextIndex = isLastRg ? null : ctx.rowIndex.getEntry(rgIx + 1);
ecb.initOrcColumn(ctx.colIx);
for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
StreamContext sctx = ctx.streams[streamIx];
ColumnStreamData cb = null;
try {
if (RecordReaderUtils.isDictionary(sctx.kind, ctx.encoding)) {
// This stream is for entire stripe and needed for every RG; uncompress once and reuse.
if (isTracingEnabled) {
LOG.trace("Getting stripe-level stream [" + sctx.kind + ", " + ctx.encoding + "] for" + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length);
}
if (sctx.stripeLevelStream == null) {
sctx.stripeLevelStream = POOLS.csdPool.take();
// We will be using this for each RG while also sending RGs to processing.
// To avoid buffers being unlocked, run refcount one ahead; we will not increase
// it when building the last RG, so each RG processing will decref once, and the
// last one will unlock the buffers.
sctx.stripeLevelStream.incRef();
// For stripe-level streams we don't need the extra refcount on the block.
// See class comment about refcounts.
long unlockUntilCOffset = sctx.offset + sctx.length;
DiskRangeList lastCached = readEncodedStream(stripeOffset, iter, sctx.offset, sctx.offset + sctx.length, sctx.stripeLevelStream, unlockUntilCOffset, sctx.offset);
if (lastCached != null) {
iter = lastCached;
}
}
if (!isLastRg) {
sctx.stripeLevelStream.incRef();
}
cb = sctx.stripeLevelStream;
} else {
// This stream can be separated by RG using index. Let's do that.
// Offset to where this RG begins.
long cOffset = sctx.offset + index.getPositions(sctx.streamIndexOffset);
// Offset relative to the beginning of the stream of where this RG ends.
long nextCOffsetRel = isLastRg ? sctx.length : nextIndex.getPositions(sctx.streamIndexOffset);
// Offset before which this RG is guaranteed to end. Can only be estimated.
// We estimate the same way for compressed and uncompressed for now.
long endCOffset = sctx.offset + RecordReaderUtils.estimateRgEndOffset(isCompressed, isLastRg, nextCOffsetRel, sctx.length, bufferSize);
// As we read, we can unlock initial refcounts for the buffers that end before
// the data that we need for this RG.
long unlockUntilCOffset = sctx.offset + nextCOffsetRel;
cb = createRgColumnStreamData(rgIx, isLastRg, ctx.colIx, sctx, cOffset, endCOffset, isCompressed);
boolean isStartOfStream = sctx.bufferIter == null;
DiskRangeList lastCached = readEncodedStream(stripeOffset, (isStartOfStream ? iter : sctx.bufferIter), cOffset, endCOffset, cb, unlockUntilCOffset, sctx.offset);
if (lastCached != null) {
sctx.bufferIter = iter = lastCached;
}
}
ecb.setStreamData(ctx.colIx, sctx.kind.getNumber(), cb);
} catch (Exception ex) {
DiskRangeList drl = toRead == null ? null : toRead.next;
LOG.error("Error getting stream [" + sctx.kind + ", " + ctx.encoding + "] for" + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length + "; toRead " + RecordReaderUtils.stringifyDiskRanges(drl), ex);
throw (ex instanceof IOException) ? (IOException) ex : new IOException(ex);
}
}
}
if (isRGSelected) {
consumer.consumeData(ecb);
}
}
if (isTracingEnabled) {
LOG.trace("Disk ranges after preparing all the data " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
}
// Release the unreleased buffers. See class comment about refcounts.
releaseInitialRefcounts(toRead.next);
releaseCacheChunksIntoObjectPool(toRead.next);
}
Aggregations