use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.
the class TestLowLevelCacheImpl method testMTTWithCleanup.
@Test
public void testMTTWithCleanup() {
final LowLevelCacheImpl cache = new LowLevelCacheImpl(LlapDaemonCacheMetrics.create("test", "1"), new DummyCachePolicy(), new DummyAllocator(), true, 1);
final long fn1 = 1, fn2 = 2;
final int offsetsToUse = 8;
final CountDownLatch cdlIn = new CountDownLatch(4), cdlOut = new CountDownLatch(1);
final AtomicInteger rdmsDone = new AtomicInteger(0);
Callable<Long> rdmCall = new Callable<Long>() {
public Long call() {
int gets = 0, puts = 0;
try {
Random rdm = new Random(1234 + Thread.currentThread().getId());
syncThreadStart(cdlIn, cdlOut);
for (int i = 0; i < 20000; ++i) {
boolean isGet = rdm.nextBoolean(), isFn1 = rdm.nextBoolean();
long fileName = isFn1 ? fn1 : fn2;
int fileIndex = isFn1 ? 1 : 2;
int count = rdm.nextInt(offsetsToUse);
if (isGet) {
int[] offsets = new int[count];
count = generateOffsets(offsetsToUse, rdm, offsets);
CreateHelper list = new CreateHelper();
for (int j = 0; i < count; ++i) {
list.addOrMerge(offsets[j], offsets[j] + 1, true, false);
}
DiskRangeList iter = cache.getFileData(fileName, list.get(), 0, testFactory, null, null);
int j = -1;
while (iter != null) {
++j;
if (!(iter instanceof CacheChunk)) {
iter = iter.next;
continue;
}
++gets;
LlapAllocatorBuffer result = (LlapAllocatorBuffer) ((CacheChunk) iter).getBuffer();
assertEquals(makeFakeArenaIndex(fileIndex, offsets[j]), result.getArenaIndex());
cache.decRefBuffer(result);
iter = iter.next;
}
} else {
DiskRange[] ranges = new DiskRange[count];
int[] offsets = new int[count];
for (int j = 0; j < count; ++j) {
int next = rdm.nextInt(offsetsToUse);
ranges[j] = dr(next, next + 1);
offsets[j] = next;
}
MemoryBuffer[] buffers = new MemoryBuffer[count];
for (int j = 0; j < offsets.length; ++j) {
LlapDataBuffer buf = LowLevelCacheImpl.allocateFake();
buf.setNewAllocLocation(makeFakeArenaIndex(fileIndex, offsets[j]), 0);
buffers[j] = buf;
}
long[] mask = cache.putFileData(fileName, ranges, buffers, 0, Priority.NORMAL, null, null);
puts += buffers.length;
long maskVal = 0;
if (mask != null) {
assertEquals(1, mask.length);
maskVal = mask[0];
}
for (int j = 0; j < offsets.length; ++j) {
LlapDataBuffer buf = (LlapDataBuffer) (buffers[j]);
if ((maskVal & 1) == 1) {
assertEquals(makeFakeArenaIndex(fileIndex, offsets[j]), buf.getArenaIndex());
}
maskVal >>= 1;
cache.decRefBuffer(buf);
}
}
}
} finally {
rdmsDone.incrementAndGet();
}
return (((long) gets) << 32) | puts;
}
private int makeFakeArenaIndex(int fileIndex, long offset) {
return (int) ((fileIndex << 12) + offset);
}
};
FutureTask<Integer> evictionTask = new FutureTask<Integer>(new Callable<Integer>() {
public Integer call() {
boolean isFirstFile = false;
Random rdm = new Random(1234 + Thread.currentThread().getId());
int evictions = 0;
syncThreadStart(cdlIn, cdlOut);
while (rdmsDone.get() < 3) {
DiskRangeList head = new DiskRangeList(0, offsetsToUse + 1);
isFirstFile = !isFirstFile;
long fileId = isFirstFile ? fn1 : fn2;
head = cache.getFileData(fileId, head, 0, testFactory, null, null);
DiskRange[] results = head.listToArray();
int startIndex = rdm.nextInt(results.length), index = startIndex;
LlapDataBuffer victim = null;
do {
DiskRange r = results[index];
if (r instanceof CacheChunk) {
LlapDataBuffer result = (LlapDataBuffer) ((CacheChunk) r).getBuffer();
cache.decRefBuffer(result);
if (victim == null && result.invalidate() == LlapCacheableBuffer.INVALIDATE_OK) {
++evictions;
victim = result;
}
}
++index;
if (index == results.length)
index = 0;
} while (index != startIndex);
if (victim == null)
continue;
cache.notifyEvicted(victim);
}
return evictions;
}
});
FutureTask<Long> rdmTask1 = new FutureTask<Long>(rdmCall), rdmTask2 = new FutureTask<Long>(rdmCall), rdmTask3 = new FutureTask<Long>(rdmCall);
Executor threadPool = Executors.newFixedThreadPool(4);
threadPool.execute(rdmTask1);
threadPool.execute(rdmTask2);
threadPool.execute(rdmTask3);
threadPool.execute(evictionTask);
try {
cdlIn.await();
cdlOut.countDown();
long result1 = rdmTask1.get(), result2 = rdmTask2.get(), result3 = rdmTask3.get();
int evictions = evictionTask.get();
LOG.info("MTT test: task 1: " + descRdmTask(result1) + ", task 2: " + descRdmTask(result2) + ", task 3: " + descRdmTask(result3) + "; " + evictions + " evictions");
} catch (Throwable t) {
throw new RuntimeException(t);
}
}
use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.
the class TestOrcMetadataCache method testIncompleteCbs.
@Test
public void testIncompleteCbs() throws Exception {
DummyMemoryManager mm = new DummyMemoryManager();
DummyCachePolicy cp = new DummyCachePolicy();
final int MAX_ALLOC = 64;
LlapDaemonCacheMetrics metrics = LlapDaemonCacheMetrics.create("", "");
BuddyAllocator alloc = new BuddyAllocator(false, false, 8, MAX_ALLOC, 1, 4096, 0, null, mm, metrics, null);
MetadataCache cache = new MetadataCache(alloc, mm, cp, true, metrics);
DataCache.BooleanRef gotAllData = new DataCache.BooleanRef();
Object fileKey1 = new Object();
// Note: incomplete CBs are always an exact match.
cache.putIncompleteCbs(fileKey1, new DiskRange[] { new DiskRangeList(0, 3) }, 0);
cp.verifyEquals(1);
DiskRangeList result = cache.getIncompleteCbs(fileKey1, new DiskRangeList(0, 3), 0, gotAllData);
assertTrue(gotAllData.value);
verifyResult(result, INCOMPLETE, 0, 3);
cache.putIncompleteCbs(fileKey1, new DiskRange[] { new DiskRangeList(5, 6) }, 0);
cp.verifyEquals(3);
DiskRangeList ranges = new DiskRangeList(0, 3);
ranges.insertAfter(new DiskRangeList(4, 6));
result = cache.getIncompleteCbs(fileKey1, ranges, 0, gotAllData);
assertFalse(gotAllData.value);
verifyResult(result, INCOMPLETE, 0, 3, DRL, 4, 6);
ranges = new DiskRangeList(0, 3);
ranges.insertAfter(new DiskRangeList(3, 5)).insertAfter(new DiskRangeList(5, 6));
result = cache.getIncompleteCbs(fileKey1, ranges, 0, gotAllData);
assertFalse(gotAllData.value);
verifyResult(result, INCOMPLETE, 0, 3, DRL, 3, 5, INCOMPLETE, 5, 6);
result = cache.getIncompleteCbs(fileKey1, new DiskRangeList(5, 6), 0, gotAllData);
assertTrue(gotAllData.value);
verifyResult(result, INCOMPLETE, 5, 6);
result = cache.getIncompleteCbs(fileKey1, new DiskRangeList(4, 5), 0, gotAllData);
assertFalse(gotAllData.value);
verifyResult(result, DRL, 4, 5);
}
use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.
the class OrcFileEstimateErrors method getIncompleteCbs.
public DiskRangeList getIncompleteCbs(DiskRangeList ranges, long baseOffset, DiskRangeListFactory factory, BooleanRef gotAllData) {
DiskRangeList prev = ranges.prev;
if (prev == null) {
prev = new MutateHelper(ranges);
}
DiskRangeList current = ranges;
// Assume by default that we would find everything.
gotAllData.value = true;
while (current != null) {
// We assume ranges in "ranges" are non-overlapping; thus, we will save next in advance.
DiskRangeList check = current;
current = current.next;
if (check.hasData())
continue;
Integer badLength = cache.get(Long.valueOf(check.getOffset() + baseOffset));
if (badLength == null || badLength < check.getLength()) {
gotAllData.value = false;
continue;
}
// We could just remove here and handle the missing tail during read, but that can be
// dangerous; let's explicitly add an incomplete CB.
check.replaceSelfWith(new IncompleteCb(check.getOffset(), check.getEnd()));
}
return prev.next;
}
use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.
the class TestLowLevelCacheImpl method verifyCacheGet.
private void verifyCacheGet(LowLevelCacheImpl cache, long fileId, Object... stuff) {
CreateHelper list = new CreateHelper();
DiskRangeList iter = null;
int intCount = 0, lastInt = -1;
int resultCount = stuff.length;
for (Object obj : stuff) {
if (obj instanceof Integer) {
--resultCount;
assertTrue(intCount >= 0);
if (intCount == 0) {
lastInt = (Integer) obj;
intCount = 1;
} else {
list.addOrMerge(lastInt, (Integer) obj, true, true);
intCount = 0;
}
continue;
} else if (intCount >= 0) {
assertTrue(intCount == 0);
intCount = -1;
iter = cache.getFileData(fileId, list.get(), 0, testFactory, null, null);
assertEquals(resultCount, iter.listSize());
}
assertTrue(iter != null);
if (obj instanceof MemoryBuffer) {
assertTrue(iter instanceof CacheChunk);
assertSame(obj, ((CacheChunk) iter).getBuffer());
} else {
assertTrue(iter.equals(obj));
}
iter = iter.next;
}
}
use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.
the class EncodedReaderImpl method readEncodedColumns.
@Override
public void readEncodedColumns(int stripeIx, StripeInformation stripe, OrcProto.RowIndex[] indexes, List<OrcProto.ColumnEncoding> encodings, List<OrcProto.Stream> streamList, boolean[] included, boolean[][] colRgs, Consumer<OrcEncodedColumnBatch> consumer) throws IOException {
// Note: for now we don't have to setError here, caller will setError if we throw.
// We are also not supposed to call setDone, since we are only part of the operation.
long stripeOffset = stripe.getOffset();
// 1. Figure out what we have to read.
// Stream offset in relation to the stripe.
long offset = 0;
// 1.1. Figure out which columns have a present stream
boolean[] hasNull = RecordReaderUtils.findPresentStreamsByColumn(streamList, types);
if (isTracingEnabled) {
LOG.trace("The following columns have PRESENT streams: " + arrayToString(hasNull));
}
// We assume stream list is sorted by column and that non-data
// streams do not interleave data streams for the same column.
// 1.2. With that in mind, determine disk ranges to read/get from cache (not by stream).
ColumnReadContext[] colCtxs = new ColumnReadContext[included.length];
int colRgIx = -1;
// Don't create context for the 0-s column.
for (int i = 1; i < included.length; ++i) {
if (!included[i])
continue;
colCtxs[i] = new ColumnReadContext(i, encodings.get(i), indexes[i], ++colRgIx);
if (isTracingEnabled) {
LOG.trace("Creating context: " + colCtxs[i].toString());
}
}
boolean isCompressed = (codec != null);
CreateHelper listToRead = new CreateHelper();
boolean hasIndexOnlyCols = false;
// Will always be the same for all cols at the moment.
boolean[] includedRgs = null;
for (OrcProto.Stream stream : streamList) {
long length = stream.getLength();
int colIx = stream.getColumn();
OrcProto.Stream.Kind streamKind = stream.getKind();
if (!included[colIx] || StreamName.getArea(streamKind) != StreamName.Area.DATA) {
// We have a stream for included column, but in future it might have no data streams.
// It's more like "has at least one column included that has an index stream".
hasIndexOnlyCols = hasIndexOnlyCols || included[colIx];
if (isTracingEnabled) {
LOG.trace("Skipping stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length);
}
offset += length;
continue;
}
ColumnReadContext ctx = colCtxs[colIx];
assert ctx != null;
includedRgs = colRgs[ctx.includedIx];
int indexIx = RecordReaderUtils.getIndexPosition(ctx.encoding.getKind(), types.get(colIx).getKind(), streamKind, isCompressed, hasNull[colIx]);
ctx.addStream(offset, stream, indexIx);
if (isTracingEnabled) {
LOG.trace("Adding stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length + ", index position " + indexIx);
}
if (includedRgs == null || RecordReaderUtils.isDictionary(streamKind, encodings.get(colIx))) {
RecordReaderUtils.addEntireStreamToRanges(offset, length, listToRead, true);
if (isTracingEnabled) {
LOG.trace("Will read whole stream " + streamKind + "; added to " + listToRead.getTail());
}
} else {
RecordReaderUtils.addRgFilteredStreamToRanges(stream, includedRgs, codec != null, indexes[colIx], encodings.get(colIx), types.get(colIx), bufferSize, hasNull[colIx], offset, length, listToRead, true);
}
offset += length;
}
boolean hasFileId = this.fileKey != null;
if (listToRead.get() == null) {
// TODO: there may be a bug here. Could there be partial RG filtering on index-only column?
if (hasIndexOnlyCols && (includedRgs == null)) {
OrcEncodedColumnBatch ecb = POOLS.ecbPool.take();
ecb.init(fileKey, stripeIx, OrcEncodedColumnBatch.ALL_RGS, included.length);
consumer.consumeData(ecb);
} else {
LOG.warn("Nothing to read for stripe [" + stripe + "]");
}
return;
}
// 2. Now, read all of the ranges from cache or disk.
DiskRangeList.MutateHelper toRead = new DiskRangeList.MutateHelper(listToRead.get());
if (/*isTracingEnabled && */
LOG.isInfoEnabled()) {
LOG.info("Resulting disk ranges to read (file " + fileKey + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
}
BooleanRef isAllInCache = new BooleanRef();
if (hasFileId) {
cacheWrapper.getFileData(fileKey, toRead.next, stripeOffset, CC_FACTORY, isAllInCache);
if (/*isTracingEnabled && */
LOG.isInfoEnabled()) {
LOG.info("Disk ranges after cache (found everything " + isAllInCache.value + "; file " + fileKey + ", base offset " + stripeOffset + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
}
}
if (!isAllInCache.value) {
if (!isDataReaderOpen) {
this.dataReader.open();
isDataReaderOpen = true;
}
dataReader.readFileData(toRead.next, stripeOffset, cacheWrapper.getAllocator().isDirectAlloc());
}
// 3. For uncompressed case, we need some special processing before read.
// Keep "toRead" list for future use, don't extract().
DiskRangeList iter = toRead.next;
if (codec == null) {
for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
ColumnReadContext ctx = colCtxs[colIx];
// This column is not included.
if (ctx == null)
continue;
for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
StreamContext sctx = ctx.streams[streamIx];
DiskRangeList newIter = preReadUncompressedStream(stripeOffset, iter, sctx.offset, sctx.offset + sctx.length);
if (newIter != null) {
iter = newIter;
}
}
}
if (isTracingEnabled) {
LOG.trace("Disk ranges after pre-read (file " + fileKey + ", base offset " + stripeOffset + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
}
// Reset the iter to start.
iter = toRead.next;
}
// 4. Finally, decompress data, map per RG, and return to caller.
// We go by RG and not by column because that is how data is processed.
int rgCount = (int) Math.ceil((double) stripe.getNumberOfRows() / rowIndexStride);
for (int rgIx = 0; rgIx < rgCount; ++rgIx) {
boolean isLastRg = rgIx == rgCount - 1;
// Create the batch we will use to return data for this RG.
OrcEncodedColumnBatch ecb = POOLS.ecbPool.take();
ecb.init(fileKey, stripeIx, rgIx, included.length);
boolean isRGSelected = true;
for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
ColumnReadContext ctx = colCtxs[colIx];
// This column is not included.
if (ctx == null)
continue;
if (isTracingEnabled) {
LOG.trace("ctx: {} rgIx: {} isLastRg: {} rgCount: {}", ctx, rgIx, isLastRg, rgCount);
}
// TODO: simplify this now that high-level cache has been removed. Same RGs for all cols.
if (colRgs[ctx.includedIx] != null && !colRgs[ctx.includedIx][rgIx]) {
// RG x col filtered.
isRGSelected = false;
if (isTracingEnabled) {
LOG.trace("colIxMod: {} rgIx: {} colRgs[{}]: {} colRgs[{}][{}]: {}", ctx.includedIx, rgIx, ctx.includedIx, Arrays.toString(colRgs[ctx.includedIx]), ctx.includedIx, rgIx, colRgs[ctx.includedIx][rgIx]);
}
continue;
}
OrcProto.RowIndexEntry index = ctx.rowIndex.getEntry(rgIx), nextIndex = isLastRg ? null : ctx.rowIndex.getEntry(rgIx + 1);
ecb.initOrcColumn(ctx.colIx);
for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
StreamContext sctx = ctx.streams[streamIx];
ColumnStreamData cb = null;
try {
if (RecordReaderUtils.isDictionary(sctx.kind, ctx.encoding)) {
// This stream is for entire stripe and needed for every RG; uncompress once and reuse.
if (isTracingEnabled) {
LOG.trace("Getting stripe-level stream [" + sctx.kind + ", " + ctx.encoding + "] for" + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length);
}
if (sctx.stripeLevelStream == null) {
sctx.stripeLevelStream = POOLS.csdPool.take();
// We will be using this for each RG while also sending RGs to processing.
// To avoid buffers being unlocked, run refcount one ahead; we will not increase
// it when building the last RG, so each RG processing will decref once, and the
// last one will unlock the buffers.
sctx.stripeLevelStream.incRef();
// For stripe-level streams we don't need the extra refcount on the block.
// See class comment about refcounts.
long unlockUntilCOffset = sctx.offset + sctx.length;
DiskRangeList lastCached = readEncodedStream(stripeOffset, iter, sctx.offset, sctx.offset + sctx.length, sctx.stripeLevelStream, unlockUntilCOffset, sctx.offset);
if (lastCached != null) {
iter = lastCached;
}
}
if (!isLastRg) {
sctx.stripeLevelStream.incRef();
}
cb = sctx.stripeLevelStream;
} else {
// This stream can be separated by RG using index. Let's do that.
// Offset to where this RG begins.
long cOffset = sctx.offset + index.getPositions(sctx.streamIndexOffset);
// Offset relative to the beginning of the stream of where this RG ends.
long nextCOffsetRel = isLastRg ? sctx.length : nextIndex.getPositions(sctx.streamIndexOffset);
// Offset before which this RG is guaranteed to end. Can only be estimated.
// We estimate the same way for compressed and uncompressed for now.
long endCOffset = sctx.offset + RecordReaderUtils.estimateRgEndOffset(isCompressed, isLastRg, nextCOffsetRel, sctx.length, bufferSize);
// As we read, we can unlock initial refcounts for the buffers that end before
// the data that we need for this RG.
long unlockUntilCOffset = sctx.offset + nextCOffsetRel;
cb = createRgColumnStreamData(rgIx, isLastRg, ctx.colIx, sctx, cOffset, endCOffset, isCompressed);
boolean isStartOfStream = sctx.bufferIter == null;
DiskRangeList lastCached = readEncodedStream(stripeOffset, (isStartOfStream ? iter : sctx.bufferIter), cOffset, endCOffset, cb, unlockUntilCOffset, sctx.offset);
if (lastCached != null) {
sctx.bufferIter = iter = lastCached;
}
}
ecb.setStreamData(ctx.colIx, sctx.kind.getNumber(), cb);
} catch (Exception ex) {
DiskRangeList drl = toRead == null ? null : toRead.next;
LOG.error("Error getting stream [" + sctx.kind + ", " + ctx.encoding + "] for" + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length + "; toRead " + RecordReaderUtils.stringifyDiskRanges(drl), ex);
throw (ex instanceof IOException) ? (IOException) ex : new IOException(ex);
}
}
}
if (isRGSelected) {
consumer.consumeData(ecb);
}
}
if (isTracingEnabled) {
LOG.trace("Disk ranges after preparing all the data " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
}
// Release the unreleased buffers. See class comment about refcounts.
releaseInitialRefcounts(toRead.next);
releaseCacheChunksIntoObjectPool(toRead.next);
}
Aggregations