use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.
the class EncodedReaderImpl method addOneCompressionBuffer.
/**
* Reads one compression block from the source; handles compression blocks read from
* multiple ranges (usually, that would only happen with zcr).
* Adds stuff to cachedBuffers, toDecompress and toRelease (see below what each does).
* @param current BufferChunk where compression block starts.
* @param cacheBuffers The result buffer array to add pre-allocated target cache buffer.
* @param toDecompress The list of work to decompress - pairs of compressed buffers and the
* target buffers (same as the ones added to cacheBuffers).
* @param toRelease The list of buffers to release to zcr because they are no longer in use.
* @param badEstimates The list of bad estimates that cannot be decompressed.
* @return The resulting cache chunk.
*/
private ProcCacheChunk addOneCompressionBuffer(BufferChunk current, List<MemoryBuffer> cacheBuffers, List<ProcCacheChunk> toDecompress, List<ByteBuffer> toRelease, List<IncompleteCb> badEstimates) throws IOException {
ByteBuffer slice = null;
ByteBuffer compressed = current.getChunk();
long cbStartOffset = current.getOffset();
int b0 = compressed.get() & 0xff;
int b1 = compressed.get() & 0xff;
int b2 = compressed.get() & 0xff;
int chunkLength = (b2 << 15) | (b1 << 7) | (b0 >> 1);
if (chunkLength > bufferSize) {
throw new IllegalArgumentException("Buffer size too small. size = " + bufferSize + " needed = " + chunkLength);
}
int consumedLength = chunkLength + OutStream.HEADER_SIZE;
long cbEndOffset = cbStartOffset + consumedLength;
boolean isUncompressed = ((b0 & 0x01) == 1);
if (isTracingEnabled) {
LOG.trace("Found CB at " + cbStartOffset + ", chunk length " + chunkLength + ", total " + consumedLength + ", " + (isUncompressed ? "not " : "") + "compressed");
}
if (compressed.remaining() >= chunkLength) {
// Simple case - CB fits entirely in the disk range.
slice = compressed.slice();
slice.limit(chunkLength);
ProcCacheChunk cc = addOneCompressionBlockByteBuffer(slice, isUncompressed, cbStartOffset, cbEndOffset, chunkLength, current, toDecompress, cacheBuffers);
if (compressed.remaining() <= 0 && dataReader.isTrackingDiskRanges()) {
toRelease.add(compressed);
}
return cc;
}
if (current.getEnd() < cbEndOffset && !current.hasContiguousNext()) {
badEstimates.add(addIncompleteCompressionBuffer(cbStartOffset, current, 0));
// This is impossible to read from this chunk.
return null;
}
// TODO: we could remove extra copy for isUncompressed case by copying directly to cache.
// We need to consolidate 2 or more buffers into one to decompress.
ByteBuffer copy = allocateBuffer(chunkLength, compressed.isDirect());
int remaining = chunkLength - compressed.remaining();
int originalPos = compressed.position();
copy.put(compressed);
if (isTracingEnabled) {
LOG.trace("Removing partial CB " + current + " from ranges after copying its contents");
}
DiskRangeList next = current.next;
current.removeSelf();
if (dataReader.isTrackingDiskRanges()) {
if (originalPos == 0) {
// We copied the entire buffer.
dataReader.releaseBuffer(compressed);
} else {
// There might be slices depending on this buffer.
toRelease.add(compressed);
}
}
int extraChunkCount = 0;
while (true) {
if (!(next instanceof BufferChunk)) {
throw new IOException("Trying to extend compressed block into uncompressed block " + next);
}
compressed = next.getData();
++extraChunkCount;
if (compressed.remaining() >= remaining) {
// This is the last range for this compression block. Yay!
slice = compressed.slice();
slice.limit(remaining);
copy.put(slice);
ProcCacheChunk cc = addOneCompressionBlockByteBuffer(copy, isUncompressed, cbStartOffset, cbEndOffset, remaining, (BufferChunk) next, toDecompress, cacheBuffers);
if (compressed.remaining() <= 0 && dataReader.isTrackingDiskRanges()) {
// We copied the entire buffer.
dataReader.releaseBuffer(compressed);
}
return cc;
}
remaining -= compressed.remaining();
copy.put(compressed);
if (dataReader.isTrackingDiskRanges()) {
// We copied the entire buffer.
dataReader.releaseBuffer(compressed);
}
DiskRangeList tmp = next;
next = next.hasContiguousNext() ? next.next : null;
if (next != null) {
if (isTracingEnabled) {
LOG.trace("Removing partial CB " + tmp + " from ranges after copying its contents");
}
tmp.removeSelf();
} else {
badEstimates.add(addIncompleteCompressionBuffer(cbStartOffset, tmp, extraChunkCount));
// This is impossible to read from this chunk.
return null;
}
}
}
use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.
the class EncodedReaderImpl method prepareRangesForUncompressedRead.
private CacheChunk prepareRangesForUncompressedRead(long cOffset, long endCOffset, long streamOffset, long unlockUntilCOffset, DiskRangeList current, ColumnStreamData columnStreamData) throws IOException {
long currentOffset = cOffset;
CacheChunk lastUncompressed = null;
boolean isFirst = true;
while (true) {
DiskRangeList next = null;
assert current instanceof CacheChunk;
lastUncompressed = (CacheChunk) current;
if (isTracingEnabled) {
LOG.trace("Locking " + lastUncompressed.getBuffer() + " due to reuse");
}
cacheWrapper.reuseBuffer(lastUncompressed.getBuffer());
if (isFirst) {
columnStreamData.setIndexBaseOffset((int) (lastUncompressed.getOffset() - streamOffset));
isFirst = false;
}
columnStreamData.getCacheBuffers().add(lastUncompressed.getBuffer());
currentOffset = lastUncompressed.getEnd();
if (isTracingEnabled) {
LOG.trace("Adding an uncompressed buffer " + lastUncompressed.getBuffer());
}
ponderReleaseInitialRefcount(unlockUntilCOffset, streamOffset, lastUncompressed);
next = current.next;
if (next == null || (endCOffset >= 0 && currentOffset >= endCOffset)) {
break;
}
current = next;
}
return lastUncompressed;
}
use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.
the class EncodedReaderImpl method prepareRangesForCompressedRead.
private CacheChunk prepareRangesForCompressedRead(long cOffset, long endCOffset, long streamOffset, long unlockUntilCOffset, DiskRangeList current, ColumnStreamData columnStreamData, List<ByteBuffer> toRelease, List<ProcCacheChunk> toDecompress, List<IncompleteCb> badEstimates) throws IOException {
if (cOffset > current.getOffset()) {
// Target compression block is in the middle of the range; slice the range in two.
current = current.split(cOffset).next;
}
long currentOffset = cOffset;
CacheChunk lastUncompressed = null;
while (true) {
DiskRangeList next = null;
if (current instanceof CacheChunk) {
// 2a. This is a decoded compression buffer, add as is.
CacheChunk cc = (CacheChunk) current;
if (isTracingEnabled) {
LOG.trace("Locking " + cc.getBuffer() + " due to reuse");
}
cacheWrapper.reuseBuffer(cc.getBuffer());
columnStreamData.getCacheBuffers().add(cc.getBuffer());
currentOffset = cc.getEnd();
if (isTracingEnabled) {
LOG.trace("Adding an already-uncompressed buffer " + cc.getBuffer());
}
ponderReleaseInitialRefcount(unlockUntilCOffset, streamOffset, cc);
lastUncompressed = cc;
next = current.next;
if (next != null && (endCOffset >= 0 && currentOffset < endCOffset) && next.getOffset() >= endCOffset) {
throw new IOException("Expected data at " + currentOffset + " (reading until " + endCOffset + "), but the next buffer starts at " + next.getOffset());
}
} else if (current instanceof IncompleteCb) {
// 2b. This is a known incomplete CB caused by ORC CB end boundaries being estimates.
if (isTracingEnabled) {
LOG.trace("Cannot read " + current);
}
next = null;
currentOffset = -1;
} else {
// several disk ranges, so we might need to combine them.
if (!(current instanceof BufferChunk)) {
String msg = "Found an unexpected " + current.getClass().getSimpleName() + ": " + current + " while looking at " + currentOffset;
LOG.error(msg);
throw new RuntimeException(msg);
}
BufferChunk bc = (BufferChunk) current;
ProcCacheChunk newCached = addOneCompressionBuffer(bc, columnStreamData.getCacheBuffers(), toDecompress, toRelease, badEstimates);
lastUncompressed = (newCached == null) ? lastUncompressed : newCached;
next = (newCached != null) ? newCached.next : null;
currentOffset = (next != null) ? next.getOffset() : -1;
}
if (next == null || (endCOffset >= 0 && currentOffset >= endCOffset)) {
break;
}
current = next;
}
return lastUncompressed;
}
use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.
the class EncodedReaderImpl method copyAndReplaceUncompressedChunks.
private static void copyAndReplaceUncompressedChunks(UncompressedCacheChunk candidateCached, ByteBuffer dest, CacheChunk tcc) {
int startPos = dest.position(), startLim = dest.limit();
DiskRangeList next = null;
for (int i = 0; i < candidateCached.getCount(); ++i) {
BufferChunk chunk = (i == 0) ? candidateCached.getChunk() : (BufferChunk) next;
dest.put(chunk.getData());
next = chunk.next;
if (i == 0) {
chunk.replaceSelfWith(tcc);
} else {
chunk.removeSelf();
}
}
int newPos = dest.position();
if (newPos > startLim) {
throw new AssertionError("After copying, buffer [" + startPos + ", " + startLim + ") became [" + newPos + ", " + dest.limit() + ")");
}
dest.position(startPos);
dest.limit(newPos);
}
use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.
the class TestLowLevelCacheImpl method testMTTWithCleanup.
@Test
public void testMTTWithCleanup() {
final LowLevelCacheImpl cache = new LowLevelCacheImpl(LlapDaemonCacheMetrics.create("test", "1"), new DummyCachePolicy(), new DummyAllocator(), true, 1);
final long fn1 = 1, fn2 = 2;
final int offsetsToUse = 8;
final CountDownLatch cdlIn = new CountDownLatch(4), cdlOut = new CountDownLatch(1);
final AtomicInteger rdmsDone = new AtomicInteger(0);
Callable<Long> rdmCall = new Callable<Long>() {
public Long call() {
int gets = 0, puts = 0;
try {
Random rdm = new Random(1234 + Thread.currentThread().getId());
syncThreadStart(cdlIn, cdlOut);
for (int i = 0; i < 20000; ++i) {
boolean isGet = rdm.nextBoolean(), isFn1 = rdm.nextBoolean();
long fileName = isFn1 ? fn1 : fn2;
int fileIndex = isFn1 ? 1 : 2;
int count = rdm.nextInt(offsetsToUse);
if (isGet) {
int[] offsets = new int[count];
count = generateOffsets(offsetsToUse, rdm, offsets);
CreateHelper list = new CreateHelper();
for (int j = 0; i < count; ++i) {
list.addOrMerge(offsets[j], offsets[j] + 1, true, false);
}
DiskRangeList iter = cache.getFileData(fileName, list.get(), 0, testFactory, null, null);
int j = -1;
while (iter != null) {
++j;
if (!(iter instanceof CacheChunk)) {
iter = iter.next;
continue;
}
++gets;
LlapDataBuffer result = (LlapDataBuffer) ((CacheChunk) iter).getBuffer();
assertEquals(makeFakeArenaIndex(fileIndex, offsets[j]), result.arenaIndex);
cache.decRefBuffer(result);
iter = iter.next;
}
} else {
DiskRange[] ranges = new DiskRange[count];
int[] offsets = new int[count];
for (int j = 0; j < count; ++j) {
int next = rdm.nextInt(offsetsToUse);
ranges[j] = dr(next, next + 1);
offsets[j] = next;
}
MemoryBuffer[] buffers = new MemoryBuffer[count];
for (int j = 0; j < offsets.length; ++j) {
LlapDataBuffer buf = LowLevelCacheImpl.allocateFake();
buf.arenaIndex = makeFakeArenaIndex(fileIndex, offsets[j]);
buffers[j] = buf;
}
long[] mask = cache.putFileData(fileName, ranges, buffers, 0, Priority.NORMAL, null);
puts += buffers.length;
long maskVal = 0;
if (mask != null) {
assertEquals(1, mask.length);
maskVal = mask[0];
}
for (int j = 0; j < offsets.length; ++j) {
LlapDataBuffer buf = (LlapDataBuffer) (buffers[j]);
if ((maskVal & 1) == 1) {
assertEquals(makeFakeArenaIndex(fileIndex, offsets[j]), buf.arenaIndex);
}
maskVal >>= 1;
cache.decRefBuffer(buf);
}
}
}
} finally {
rdmsDone.incrementAndGet();
}
return (((long) gets) << 32) | puts;
}
private int makeFakeArenaIndex(int fileIndex, long offset) {
return (int) ((fileIndex << 16) + offset);
}
};
FutureTask<Integer> evictionTask = new FutureTask<Integer>(new Callable<Integer>() {
public Integer call() {
boolean isFirstFile = false;
Random rdm = new Random(1234 + Thread.currentThread().getId());
int evictions = 0;
syncThreadStart(cdlIn, cdlOut);
while (rdmsDone.get() < 3) {
DiskRangeList head = new DiskRangeList(0, offsetsToUse + 1);
isFirstFile = !isFirstFile;
long fileId = isFirstFile ? fn1 : fn2;
head = cache.getFileData(fileId, head, 0, testFactory, null, null);
DiskRange[] results = head.listToArray();
int startIndex = rdm.nextInt(results.length), index = startIndex;
LlapDataBuffer victim = null;
do {
DiskRange r = results[index];
if (r instanceof CacheChunk) {
LlapDataBuffer result = (LlapDataBuffer) ((CacheChunk) r).getBuffer();
cache.decRefBuffer(result);
if (victim == null && result.invalidate()) {
++evictions;
victim = result;
}
}
++index;
if (index == results.length)
index = 0;
} while (index != startIndex);
if (victim == null)
continue;
cache.notifyEvicted(victim);
}
return evictions;
}
});
FutureTask<Long> rdmTask1 = new FutureTask<Long>(rdmCall), rdmTask2 = new FutureTask<Long>(rdmCall), rdmTask3 = new FutureTask<Long>(rdmCall);
Executor threadPool = Executors.newFixedThreadPool(4);
threadPool.execute(rdmTask1);
threadPool.execute(rdmTask2);
threadPool.execute(rdmTask3);
threadPool.execute(evictionTask);
try {
cdlIn.await();
cdlOut.countDown();
long result1 = rdmTask1.get(), result2 = rdmTask2.get(), result3 = rdmTask3.get();
int evictions = evictionTask.get();
LOG.info("MTT test: task 1: " + descRdmTask(result1) + ", task 2: " + descRdmTask(result2) + ", task 3: " + descRdmTask(result3) + "; " + evictions + " evictions");
} catch (Throwable t) {
throw new RuntimeException(t);
}
}
Aggregations