use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.
the class EncodedReaderImpl method readIndexStreams.
@Override
public void readIndexStreams(OrcIndex index, StripeInformation stripe, List<OrcProto.Stream> streams, boolean[] physicalFileIncludes, boolean[] sargColumns) throws IOException {
long stripeOffset = stripe.getOffset();
DiskRangeList indexRanges = planIndexReading(fileSchema, streams, true, physicalFileIncludes, sargColumns, version, index.getBloomFilterKinds());
if (indexRanges == null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Nothing to read for stripe [" + stripe + "]");
}
return;
}
ReadContext[] colCtxs = new ReadContext[physicalFileIncludes.length];
int colRgIx = -1;
for (int i = 0; i < physicalFileIncludes.length; ++i) {
if (!physicalFileIncludes[i] && (sargColumns == null || !sargColumns[i]))
continue;
colCtxs[i] = new ReadContext(i, ++colRgIx);
if (isTracingEnabled) {
LOG.trace("Creating context: " + colCtxs[i].toString());
}
// Bogus encoding.
trace.logColumnRead(i, colRgIx, ColumnEncoding.Kind.DIRECT);
}
long offset = 0;
for (OrcProto.Stream stream : streams) {
long length = stream.getLength();
int colIx = stream.getColumn();
OrcProto.Stream.Kind streamKind = stream.getKind();
// See planIndexReading - only read non-row-index streams if involved in SARGs.
if ((StreamName.getArea(streamKind) == StreamName.Area.INDEX) && ((sargColumns != null && sargColumns[colIx]) || (physicalFileIncludes[colIx] && streamKind == Kind.ROW_INDEX))) {
trace.logAddStream(colIx, streamKind, offset, length, -1, true);
colCtxs[colIx].addStream(offset, stream, -1);
if (isTracingEnabled) {
LOG.trace("Adding stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length);
}
}
offset += length;
}
boolean hasFileId = this.fileKey != null;
// 2. Now, read all of the ranges from cache or disk.
IdentityHashMap<ByteBuffer, Boolean> toRelease = new IdentityHashMap<>();
MutateHelper toRead = getDataFromCacheAndDisk(indexRanges, stripeOffset, hasFileId, toRelease);
// 3. For uncompressed case, we need some special processing before read.
DiskRangeList iter = preReadUncompressedStreams(stripeOffset, colCtxs, toRead, toRelease);
// 4. Decompress the data.
boolean hasError = true;
try {
for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
ReadContext ctx = colCtxs[colIx];
// This column is not included.
if (ctx == null)
continue;
for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
StreamContext sctx = ctx.streams[streamIx];
try {
if (isTracingEnabled) {
LOG.trace("Getting index stream " + sctx.kind + " for column " + ctx.colIx + " at " + sctx.offset + ", " + sctx.length);
}
ColumnStreamData csd = POOLS.csdPool.take();
long endCOffset = sctx.offset + sctx.length;
DiskRangeList lastCached = readEncodedStream(stripeOffset, iter, sctx.offset, endCOffset, csd, endCOffset, sctx.offset, toRelease);
if (lastCached != null) {
iter = lastCached;
}
if (isTracingEnabled) {
traceLogBuffersUsedToParse(csd);
}
CodedInputStream cis = CodedInputStream.newInstance(new IndexStream(csd.getCacheBuffers(), sctx.length));
cis.setSizeLimit(InStream.PROTOBUF_MESSAGE_MAX_LIMIT);
switch(sctx.kind) {
case ROW_INDEX:
OrcProto.RowIndex tmp = index.getRowGroupIndex()[colIx] = OrcProto.RowIndex.parseFrom(cis);
if (isTracingEnabled) {
LOG.trace("Index is " + tmp.toString().replace('\n', ' '));
}
break;
case BLOOM_FILTER:
case BLOOM_FILTER_UTF8:
index.getBloomFilterIndex()[colIx] = OrcProto.BloomFilterIndex.parseFrom(cis);
break;
default:
throw new AssertionError("Unexpected index stream type " + sctx.kind);
}
// We are done with the buffers; unlike data blocks, we are also the consumer. Release.
for (MemoryBuffer buf : csd.getCacheBuffers()) {
if (buf == null)
continue;
cacheWrapper.releaseBuffer(buf);
}
} catch (Exception ex) {
DiskRangeList drl = toRead == null ? null : toRead.next;
LOG.error("Error getting stream " + sctx.kind + " for column " + ctx.colIx + " at " + sctx.offset + ", " + sctx.length + "; toRead " + RecordReaderUtils.stringifyDiskRanges(drl), ex);
throw (ex instanceof IOException) ? (IOException) ex : new IOException(ex);
}
}
}
if (isTracingEnabled) {
LOG.trace("Disk ranges after preparing all the data " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
}
hasError = false;
} finally {
// Release the unreleased buffers. See class comment about refcounts.
try {
releaseInitialRefcounts(toRead.next);
releaseBuffers(toRelease.keySet(), true);
} catch (Throwable t) {
if (!hasError)
throw new IOException(t);
LOG.error("Error during the cleanup after another error; ignoring", t);
}
}
}
use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.
the class EncodedReaderImpl method planIndexReading.
// TODO: temporary, need to expose from ORC utils (note the difference in null checks)
static DiskRangeList planIndexReading(TypeDescription fileSchema, List<OrcProto.Stream> streams, boolean ignoreNonUtf8BloomFilter, boolean[] fileIncluded, boolean[] sargColumns, WriterVersion version, OrcProto.Stream.Kind[] bloomFilterKinds) {
DiskRangeList.CreateHelper result = new DiskRangeList.CreateHelper();
// picks bloom_filter_utf8 if its available, otherwise bloom_filter
if (sargColumns != null) {
for (OrcProto.Stream stream : streams) {
if (stream.hasKind() && stream.hasColumn()) {
int column = stream.getColumn();
if (sargColumns[column]) {
switch(stream.getKind()) {
case BLOOM_FILTER:
if (bloomFilterKinds[column] == null && !(ignoreNonUtf8BloomFilter && hadBadBloomFilters(fileSchema.findSubtype(column).getCategory(), version))) {
bloomFilterKinds[column] = OrcProto.Stream.Kind.BLOOM_FILTER;
}
break;
case BLOOM_FILTER_UTF8:
bloomFilterKinds[column] = OrcProto.Stream.Kind.BLOOM_FILTER_UTF8;
break;
default:
break;
}
}
}
}
}
long offset = 0;
for (OrcProto.Stream stream : streams) {
if (stream.hasKind() && stream.hasColumn()) {
int column = stream.getColumn();
if (fileIncluded == null || fileIncluded[column]) {
boolean needStream = false;
switch(stream.getKind()) {
case ROW_INDEX:
needStream = true;
break;
case BLOOM_FILTER:
case BLOOM_FILTER_UTF8:
needStream = (sargColumns != null) && (bloomFilterKinds[column] == stream.getKind());
break;
default:
// PASS
break;
}
if (needStream) {
result.addOrMerge(offset, offset + stream.getLength(), true, false);
}
}
}
offset += stream.getLength();
}
return result.get();
}
use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.
the class EncodedReaderImpl method readEncodedColumns.
@Override
public void readEncodedColumns(int stripeIx, StripeInformation stripe, OrcProto.RowIndex[] indexes, List<OrcProto.ColumnEncoding> encodings, List<OrcProto.Stream> streamList, boolean[] physicalFileIncludes, boolean[] rgs, Consumer<OrcEncodedColumnBatch> consumer) throws IOException {
// Note: for now we don't have to setError here, caller will setError if we throw.
// We are also not supposed to call setDone, since we are only part of the operation.
long stripeOffset = stripe.getOffset();
// 1. Figure out what we have to read.
// Stream offset in relation to the stripe.
long offset = 0;
// 1.1. Figure out which columns have a present stream
boolean[] hasNull = RecordReaderUtils.findPresentStreamsByColumn(streamList, types);
if (isTracingEnabled) {
LOG.trace("The following columns have PRESENT streams: " + arrayToString(hasNull));
}
// We assume stream list is sorted by column and that non-data
// streams do not interleave data streams for the same column.
// 1.2. With that in mind, determine disk ranges to read/get from cache (not by stream).
ColumnReadContext[] colCtxs = new ColumnReadContext[physicalFileIncludes.length];
int colRgIx = -1;
// Don't create context for the 0-s column.
for (int i = 1; i < physicalFileIncludes.length; ++i) {
if (!physicalFileIncludes[i])
continue;
ColumnEncoding enc = encodings.get(i);
colCtxs[i] = new ColumnReadContext(i, enc, indexes[i], ++colRgIx);
if (isTracingEnabled) {
LOG.trace("Creating context: " + colCtxs[i].toString());
}
trace.logColumnRead(i, colRgIx, enc.getKind());
}
CreateHelper listToRead = new CreateHelper();
boolean hasIndexOnlyCols = false;
for (OrcProto.Stream stream : streamList) {
long length = stream.getLength();
int colIx = stream.getColumn();
OrcProto.Stream.Kind streamKind = stream.getKind();
if (!physicalFileIncludes[colIx] || StreamName.getArea(streamKind) != StreamName.Area.DATA) {
// We have a stream for included column, but in future it might have no data streams.
// It's more like "has at least one column included that has an index stream".
hasIndexOnlyCols = hasIndexOnlyCols || physicalFileIncludes[colIx];
if (isTracingEnabled) {
LOG.trace("Skipping stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length);
}
trace.logSkipStream(colIx, streamKind, offset, length);
offset += length;
continue;
}
ColumnReadContext ctx = colCtxs[colIx];
assert ctx != null;
int indexIx = RecordReaderUtils.getIndexPosition(ctx.encoding.getKind(), types.get(colIx).getKind(), streamKind, isCompressed, hasNull[colIx]);
ctx.addStream(offset, stream, indexIx);
if (isTracingEnabled) {
LOG.trace("Adding stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length + ", index position " + indexIx);
}
if (rgs == null || RecordReaderUtils.isDictionary(streamKind, encodings.get(colIx))) {
trace.logAddStream(colIx, streamKind, offset, length, indexIx, true);
RecordReaderUtils.addEntireStreamToRanges(offset, length, listToRead, true);
if (isTracingEnabled) {
LOG.trace("Will read whole stream " + streamKind + "; added to " + listToRead.getTail());
}
} else {
trace.logAddStream(colIx, streamKind, offset, length, indexIx, false);
RecordReaderUtils.addRgFilteredStreamToRanges(stream, rgs, isCompressed, indexes[colIx], encodings.get(colIx), types.get(colIx), bufferSize, hasNull[colIx], offset, length, listToRead, true);
}
offset += length;
}
boolean hasFileId = this.fileKey != null;
if (listToRead.get() == null) {
// TODO: there may be a bug here. Could there be partial RG filtering on index-only column?
if (hasIndexOnlyCols && (rgs == null)) {
OrcEncodedColumnBatch ecb = POOLS.ecbPool.take();
ecb.init(fileKey, stripeIx, OrcEncodedColumnBatch.ALL_RGS, physicalFileIncludes.length);
try {
consumer.consumeData(ecb);
} catch (InterruptedException e) {
LOG.error("IO thread interrupted while queueing data");
throw new IOException(e);
}
} else {
LOG.warn("Nothing to read for stripe [" + stripe + "]");
}
return;
}
// 2. Now, read all of the ranges from cache or disk.
IdentityHashMap<ByteBuffer, Boolean> toRelease = new IdentityHashMap<>();
MutateHelper toRead = getDataFromCacheAndDisk(listToRead.get(), stripeOffset, hasFileId, toRelease);
// 3. For uncompressed case, we need some special processing before read.
// Basically, we are trying to create artificial, consistent ranges to cache, as there are
// no CBs in an uncompressed file. At the end of this processing, the list would contain
// either cache buffers, or buffers allocated by us and not cached (if we are only reading
// parts of the data for some ranges and don't want to cache it). Both are represented by
// CacheChunks, so the list is just CacheChunk-s from that point on.
DiskRangeList iter = preReadUncompressedStreams(stripeOffset, colCtxs, toRead, toRelease);
// 4. Finally, decompress data, map per RG, and return to caller.
// We go by RG and not by column because that is how data is processed.
boolean hasError = true;
try {
int rgCount = (int) Math.ceil((double) stripe.getNumberOfRows() / rowIndexStride);
for (int rgIx = 0; rgIx < rgCount; ++rgIx) {
if (rgs != null && !rgs[rgIx]) {
// RG filtered.
continue;
}
boolean isLastRg = rgIx == rgCount - 1;
// Create the batch we will use to return data for this RG.
OrcEncodedColumnBatch ecb = POOLS.ecbPool.take();
trace.logStartRg(rgIx);
boolean hasErrorForEcb = true;
try {
ecb.init(fileKey, stripeIx, rgIx, physicalFileIncludes.length);
for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
ColumnReadContext ctx = colCtxs[colIx];
// This column is not included.
if (ctx == null)
continue;
if (isTracingEnabled) {
LOG.trace("ctx: {} rgIx: {} isLastRg: {} rgCount: {}", ctx, rgIx, isLastRg, rgCount);
}
OrcProto.RowIndexEntry index = ctx.rowIndex.getEntry(rgIx), nextIndex = isLastRg ? null : ctx.rowIndex.getEntry(rgIx + 1);
ecb.initOrcColumn(ctx.colIx);
trace.logStartCol(ctx.colIx);
for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
StreamContext sctx = ctx.streams[streamIx];
ColumnStreamData cb = null;
try {
if (RecordReaderUtils.isDictionary(sctx.kind, ctx.encoding)) {
// This stream is for entire stripe and needed for every RG; uncompress once and reuse.
if (isTracingEnabled) {
LOG.trace("Getting stripe-level stream [" + sctx.kind + ", " + ctx.encoding + "] for" + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length);
}
trace.logStartStripeStream(sctx.kind);
if (sctx.stripeLevelStream == null) {
sctx.stripeLevelStream = POOLS.csdPool.take();
// We will be using this for each RG while also sending RGs to processing.
// To avoid buffers being unlocked, run refcount one ahead; so each RG
// processing will decref once, and the last one will unlock the buffers.
sctx.stripeLevelStream.incRef();
// For stripe-level streams we don't need the extra refcount on the block.
// See class comment about refcounts.
long unlockUntilCOffset = sctx.offset + sctx.length;
DiskRangeList lastCached = readEncodedStream(stripeOffset, iter, sctx.offset, sctx.offset + sctx.length, sctx.stripeLevelStream, unlockUntilCOffset, sctx.offset, toRelease);
if (lastCached != null) {
iter = lastCached;
}
}
sctx.stripeLevelStream.incRef();
cb = sctx.stripeLevelStream;
} else {
// This stream can be separated by RG using index. Let's do that.
// Offset to where this RG begins.
long cOffset = sctx.offset + index.getPositions(sctx.streamIndexOffset);
// Offset relative to the beginning of the stream of where this RG ends.
long nextCOffsetRel = isLastRg ? sctx.length : nextIndex.getPositions(sctx.streamIndexOffset);
// Offset before which this RG is guaranteed to end. Can only be estimated.
// We estimate the same way for compressed and uncompressed for now.
long endCOffset = sctx.offset + RecordReaderUtils.estimateRgEndOffset(isCompressed, isLastRg, nextCOffsetRel, sctx.length, bufferSize);
// As we read, we can unlock initial refcounts for the buffers that end before
// the data that we need for this RG.
long unlockUntilCOffset = sctx.offset + nextCOffsetRel;
cb = createRgColumnStreamData(rgIx, isLastRg, ctx.colIx, sctx, cOffset, endCOffset, isCompressed, unlockUntilCOffset);
boolean isStartOfStream = sctx.bufferIter == null;
DiskRangeList lastCached = readEncodedStream(stripeOffset, (isStartOfStream ? iter : sctx.bufferIter), cOffset, endCOffset, cb, unlockUntilCOffset, sctx.offset, toRelease);
if (lastCached != null) {
sctx.bufferIter = iter = lastCached;
}
}
ecb.setStreamData(ctx.colIx, sctx.kind.getNumber(), cb);
} catch (Exception ex) {
DiskRangeList drl = toRead == null ? null : toRead.next;
LOG.error("Error getting stream [" + sctx.kind + ", " + ctx.encoding + "] for" + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length + "; toRead " + RecordReaderUtils.stringifyDiskRanges(drl), ex);
throw (ex instanceof IOException) ? (IOException) ex : new IOException(ex);
}
}
}
hasErrorForEcb = false;
} finally {
if (hasErrorForEcb) {
releaseEcbRefCountsOnError(ecb);
}
}
try {
consumer.consumeData(ecb);
// After this, the non-initial refcounts are the responsibility of the consumer.
} catch (InterruptedException e) {
LOG.error("IO thread interrupted while queueing data");
releaseEcbRefCountsOnError(ecb);
throw new IOException(e);
}
}
if (isTracingEnabled) {
LOG.trace("Disk ranges after preparing all the data " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
}
trace.logRanges(fileKey, stripeOffset, toRead.next, RangesSrc.PREREAD);
hasError = false;
} finally {
try {
// Release the unreleased stripe-level buffers. See class comment about refcounts.
for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
ColumnReadContext ctx = colCtxs[colIx];
// This column is not included.
if (ctx == null)
continue;
for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
StreamContext sctx = ctx.streams[streamIx];
if (sctx == null || sctx.stripeLevelStream == null)
continue;
if (0 != sctx.stripeLevelStream.decRef())
continue;
// essentially the "consumer" refcount being released here.
for (MemoryBuffer buf : sctx.stripeLevelStream.getCacheBuffers()) {
if (LOG.isTraceEnabled()) {
LOG.trace("Unlocking {} at the end of processing", buf);
}
cacheWrapper.releaseBuffer(buf);
}
}
}
releaseInitialRefcounts(toRead.next);
// Release buffers as we are done with all the streams... also see toRelease comment.
releaseBuffers(toRelease.keySet(), true);
} catch (Throwable t) {
if (!hasError)
throw new IOException(t);
LOG.error("Error during the cleanup after another error; ignoring", t);
}
}
}
use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.
the class EncodedReaderImpl method readLengthBytesFromSmallBuffers.
@VisibleForTesting
static BufferChunk readLengthBytesFromSmallBuffers(BufferChunk first, long cbStartOffset, int[] result, List<IncompleteCb> badEstimates, boolean isTracingEnabled, IoTrace trace) throws IOException {
if (!first.hasContiguousNext()) {
badEstimates.add(addIncompleteCompressionBuffer(cbStartOffset, first, 0, isTracingEnabled, trace));
// This is impossible to read from this chunk.
return null;
}
int ix = readLengthBytes(first.getChunk(), result, 0);
// Otherwise we wouldn't be here.
assert ix < 3;
DiskRangeList current = first.next;
first.removeSelf();
while (true) {
if (!(current instanceof BufferChunk)) {
throw new IOException("Trying to extend compressed block into uncompressed block " + current);
}
BufferChunk currentBc = (BufferChunk) current;
ix = readLengthBytes(currentBc.getChunk(), result, ix);
// Done, we have 3 bytes. Continue reading this buffer.
if (ix == 3)
return currentBc;
DiskRangeList tmp = current;
current = current.hasContiguousNext() ? current.next : null;
if (current != null) {
if (isTracingEnabled) {
LOG.trace("Removing partial CB " + tmp + " from ranges after copying its contents");
}
trace.logPartialCb(tmp);
tmp.removeSelf();
} else {
badEstimates.add(addIncompleteCompressionBuffer(cbStartOffset, tmp, -1, isTracingEnabled, trace));
// This is impossible to read from this chunk.
return null;
}
}
}
use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.
the class EncodedReaderImpl method addOneCompressionBuffer.
/**
* Reads one compression block from the source; handles compression blocks read from
* multiple ranges (usually, that would only happen with zcr).
* Adds stuff to cachedBuffers, toDecompress and toRelease (see below what each does).
* @param current BufferChunk where compression block starts.
* @param cacheBuffers The result buffer array to add pre-allocated target cache buffer.
* @param toDecompress The list of work to decompress - pairs of compressed buffers and the
* target buffers (same as the ones added to cacheBuffers).
* @param toRelease The list of buffers to release to zcr because they are no longer in use.
* @param badEstimates The list of bad estimates that cannot be decompressed.
* @return The resulting cache chunk.
*/
private ProcCacheChunk addOneCompressionBuffer(BufferChunk current, List<MemoryBuffer> cacheBuffers, List<ProcCacheChunk> toDecompress, IdentityHashMap<ByteBuffer, Boolean> toRelease, List<ByteBuffer> toReleaseCopies, List<IncompleteCb> badEstimates) throws IOException {
ByteBuffer slice = null;
ByteBuffer compressed = current.getChunk();
long cbStartOffset = current.getOffset();
int b0 = -1, b1 = -1, b2 = -1;
// First, read the CB header. Due to ORC estimates, ZCR, etc. this can be complex.
if (compressed.remaining() >= 3) {
// The overwhelming majority of cases will go here. Read 3 bytes. Tada!
b0 = compressed.get() & 0xff;
b1 = compressed.get() & 0xff;
b2 = compressed.get() & 0xff;
} else {
// Bad luck! Handle the corner cases where 3 bytes are in multiple blocks.
int[] bytes = new int[3];
current = readLengthBytesFromSmallBuffers(current, cbStartOffset, bytes, badEstimates, isTracingEnabled, trace);
if (current == null)
return null;
compressed = current.getChunk();
b0 = bytes[0];
b1 = bytes[1];
b2 = bytes[2];
}
int chunkLength = (b2 << 15) | (b1 << 7) | (b0 >> 1);
if (chunkLength > bufferSize) {
throw new IllegalArgumentException("Buffer size too small. size = " + bufferSize + " needed = " + chunkLength);
}
int consumedLength = chunkLength + OutStream.HEADER_SIZE;
long cbEndOffset = cbStartOffset + consumedLength;
boolean isUncompressed = ((b0 & 0x01) == 1);
if (isTracingEnabled) {
LOG.trace("Found CB at " + cbStartOffset + ", chunk length " + chunkLength + ", total " + consumedLength + ", " + (isUncompressed ? "not " : "") + "compressed");
}
trace.logOrcCb(cbStartOffset, chunkLength, isUncompressed);
if (compressed.remaining() >= chunkLength) {
// Simple case - CB fits entirely in the disk range.
slice = compressed.slice();
slice.limit(chunkLength);
return addOneCompressionBlockByteBuffer(slice, isUncompressed, cbStartOffset, cbEndOffset, chunkLength, current, toDecompress, cacheBuffers, false);
}
if (current.getEnd() < cbEndOffset && !current.hasContiguousNext()) {
badEstimates.add(addIncompleteCompressionBuffer(cbStartOffset, current, 0, isTracingEnabled, trace));
// This is impossible to read from this chunk.
return null;
}
// TODO: we could remove extra copy for isUncompressed case by copying directly to cache.
// We need to consolidate 2 or more buffers into one to decompress.
ByteBuffer copy = allocateBuffer(chunkLength, compressed.isDirect());
// We will always release copies at the end.
toReleaseCopies.add(copy);
int remaining = chunkLength - compressed.remaining();
int originalPos = compressed.position();
copy.put(compressed);
if (isTracingEnabled) {
LOG.trace("Removing partial CB " + current + " from ranges after copying its contents");
}
trace.logPartialCb(current);
DiskRangeList next = current.next;
current.removeSelf();
if (originalPos == 0 && toRelease.remove(compressed)) {
releaseBuffer(compressed, true);
}
int extraChunkCount = 0;
while (true) {
if (!(next instanceof BufferChunk)) {
throw new IOException("Trying to extend compressed block into uncompressed block " + next);
}
compressed = next.getData();
++extraChunkCount;
if (compressed.remaining() >= remaining) {
// This is the last range for this compression block. Yay!
slice = compressed.slice();
slice.limit(remaining);
copy.put(slice);
ProcCacheChunk cc = addOneCompressionBlockByteBuffer(copy, isUncompressed, cbStartOffset, cbEndOffset, remaining, (BufferChunk) next, toDecompress, cacheBuffers, true);
if (compressed.remaining() <= 0 && toRelease.remove(compressed)) {
// We copied the entire buffer.
releaseBuffer(compressed, true);
}
// else there's more data to process; will be handled in next call.
return cc;
}
remaining -= compressed.remaining();
// TODO: move into the if below; account for release call
copy.put(compressed);
if (toRelease.remove(compressed)) {
// We copied the entire buffer.
releaseBuffer(compressed, true);
}
DiskRangeList tmp = next;
next = next.hasContiguousNext() ? next.next : null;
if (next != null) {
if (isTracingEnabled) {
LOG.trace("Removing partial CB " + tmp + " from ranges after copying its contents");
}
trace.logPartialCb(tmp);
tmp.removeSelf();
} else {
badEstimates.add(addIncompleteCompressionBuffer(cbStartOffset, tmp, extraChunkCount, isTracingEnabled, trace));
// This is impossible to read from this chunk.
return null;
}
}
}
Aggregations