Examples with DiskRangeList - org.apache.hadoop.hive.common.io.DiskRangeList

Example 11 with DiskRangeList

use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.

the class TestLowLevelCacheImpl method testMTTWithCleanup.

@Test
public void testMTTWithCleanup() {
    final LowLevelCacheImpl cache = new LowLevelCacheImpl(LlapDaemonCacheMetrics.create("test", "1"), new DummyCachePolicy(), new DummyAllocator(), true, 1);
    final long fn1 = 1, fn2 = 2;
    final int offsetsToUse = 8;
    final CountDownLatch cdlIn = new CountDownLatch(4), cdlOut = new CountDownLatch(1);
    final AtomicInteger rdmsDone = new AtomicInteger(0);
    Callable<Long> rdmCall = new Callable<Long>() {

        public Long call() {
            int gets = 0, puts = 0;
            try {
                Random rdm = new Random(1234 + Thread.currentThread().getId());
                syncThreadStart(cdlIn, cdlOut);
                for (int i = 0; i < 20000; ++i) {
                    boolean isGet = rdm.nextBoolean(), isFn1 = rdm.nextBoolean();
                    long fileName = isFn1 ? fn1 : fn2;
                    int fileIndex = isFn1 ? 1 : 2;
                    int count = rdm.nextInt(offsetsToUse);
                    if (isGet) {
                        int[] offsets = new int[count];
                        count = generateOffsets(offsetsToUse, rdm, offsets);
                        CreateHelper list = new CreateHelper();
                        for (int j = 0; i < count; ++i) {
                            list.addOrMerge(offsets[j], offsets[j] + 1, true, false);
                        }
                        DiskRangeList iter = cache.getFileData(fileName, list.get(), 0, testFactory, null, null);
                        int j = -1;
                        while (iter != null) {
                            ++j;
                            if (!(iter instanceof CacheChunk)) {
                                iter = iter.next;
                                continue;
                            }
                            ++gets;
                            LlapAllocatorBuffer result = (LlapAllocatorBuffer) ((CacheChunk) iter).getBuffer();
                            assertEquals(makeFakeArenaIndex(fileIndex, offsets[j]), result.getArenaIndex());
                            cache.decRefBuffer(result);
                            iter = iter.next;
                        }
                    } else {
                        DiskRange[] ranges = new DiskRange[count];
                        int[] offsets = new int[count];
                        for (int j = 0; j < count; ++j) {
                            int next = rdm.nextInt(offsetsToUse);
                            ranges[j] = dr(next, next + 1);
                            offsets[j] = next;
                        }
                        MemoryBuffer[] buffers = new MemoryBuffer[count];
                        for (int j = 0; j < offsets.length; ++j) {
                            LlapDataBuffer buf = LowLevelCacheImpl.allocateFake();
                            buf.setNewAllocLocation(makeFakeArenaIndex(fileIndex, offsets[j]), 0);
                            buffers[j] = buf;
                        }
                        long[] mask = cache.putFileData(fileName, ranges, buffers, 0, Priority.NORMAL, null, null);
                        puts += buffers.length;
                        long maskVal = 0;
                        if (mask != null) {
                            assertEquals(1, mask.length);
                            maskVal = mask[0];
                        }
                        for (int j = 0; j < offsets.length; ++j) {
                            LlapDataBuffer buf = (LlapDataBuffer) (buffers[j]);
                            if ((maskVal & 1) == 1) {
                                assertEquals(makeFakeArenaIndex(fileIndex, offsets[j]), buf.getArenaIndex());
                            }
                            maskVal >>= 1;
                            cache.decRefBuffer(buf);
                        }
                    }
                }
            } finally {
                rdmsDone.incrementAndGet();
            }
            return (((long) gets) << 32) | puts;
        }

        private int makeFakeArenaIndex(int fileIndex, long offset) {
            return (int) ((fileIndex << 12) + offset);
        }
    };
    FutureTask<Integer> evictionTask = new FutureTask<Integer>(new Callable<Integer>() {

        public Integer call() {
            boolean isFirstFile = false;
            Random rdm = new Random(1234 + Thread.currentThread().getId());
            int evictions = 0;
            syncThreadStart(cdlIn, cdlOut);
            while (rdmsDone.get() < 3) {
                DiskRangeList head = new DiskRangeList(0, offsetsToUse + 1);
                isFirstFile = !isFirstFile;
                long fileId = isFirstFile ? fn1 : fn2;
                head = cache.getFileData(fileId, head, 0, testFactory, null, null);
                DiskRange[] results = head.listToArray();
                int startIndex = rdm.nextInt(results.length), index = startIndex;
                LlapDataBuffer victim = null;
                do {
                    DiskRange r = results[index];
                    if (r instanceof CacheChunk) {
                        LlapDataBuffer result = (LlapDataBuffer) ((CacheChunk) r).getBuffer();
                        cache.decRefBuffer(result);
                        if (victim == null && result.invalidate() == LlapCacheableBuffer.INVALIDATE_OK) {
                            ++evictions;
                            victim = result;
                        }
                    }
                    ++index;
                    if (index == results.length)
                        index = 0;
                } while (index != startIndex);
                if (victim == null)
                    continue;
                cache.notifyEvicted(victim);
            }
            return evictions;
        }
    });
    FutureTask<Long> rdmTask1 = new FutureTask<Long>(rdmCall), rdmTask2 = new FutureTask<Long>(rdmCall), rdmTask3 = new FutureTask<Long>(rdmCall);
    Executor threadPool = Executors.newFixedThreadPool(4);
    threadPool.execute(rdmTask1);
    threadPool.execute(rdmTask2);
    threadPool.execute(rdmTask3);
    threadPool.execute(evictionTask);
    try {
        cdlIn.await();
        cdlOut.countDown();
        long result1 = rdmTask1.get(), result2 = rdmTask2.get(), result3 = rdmTask3.get();
        int evictions = evictionTask.get();
        LOG.info("MTT test: task 1: " + descRdmTask(result1) + ", task 2: " + descRdmTask(result2) + ", task 3: " + descRdmTask(result3) + "; " + evictions + " evictions");
    } catch (Throwable t) {
        throw new RuntimeException(t);
    }
}

Also used : DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) Callable(java.util.concurrent.Callable) CreateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper) Executor(java.util.concurrent.Executor) Random(java.util.Random) FutureTask(java.util.concurrent.FutureTask) CacheChunk(org.apache.hadoop.hive.ql.io.orc.encoded.CacheChunk) CountDownLatch(java.util.concurrent.CountDownLatch) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) MemoryBuffer(org.apache.hadoop.hive.common.io.encoded.MemoryBuffer) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DiskRange(org.apache.hadoop.hive.common.io.DiskRange) Test(org.junit.Test)

Example 12 with DiskRangeList

use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.

the class TestOrcMetadataCache method testIncompleteCbs.

@Test
public void testIncompleteCbs() throws Exception {
    DummyMemoryManager mm = new DummyMemoryManager();
    DummyCachePolicy cp = new DummyCachePolicy();
    final int MAX_ALLOC = 64;
    LlapDaemonCacheMetrics metrics = LlapDaemonCacheMetrics.create("", "");
    BuddyAllocator alloc = new BuddyAllocator(false, false, 8, MAX_ALLOC, 1, 4096, 0, null, mm, metrics, null);
    MetadataCache cache = new MetadataCache(alloc, mm, cp, true, metrics);
    DataCache.BooleanRef gotAllData = new DataCache.BooleanRef();
    Object fileKey1 = new Object();
    // Note: incomplete CBs are always an exact match.
    cache.putIncompleteCbs(fileKey1, new DiskRange[] { new DiskRangeList(0, 3) }, 0);
    cp.verifyEquals(1);
    DiskRangeList result = cache.getIncompleteCbs(fileKey1, new DiskRangeList(0, 3), 0, gotAllData);
    assertTrue(gotAllData.value);
    verifyResult(result, INCOMPLETE, 0, 3);
    cache.putIncompleteCbs(fileKey1, new DiskRange[] { new DiskRangeList(5, 6) }, 0);
    cp.verifyEquals(3);
    DiskRangeList ranges = new DiskRangeList(0, 3);
    ranges.insertAfter(new DiskRangeList(4, 6));
    result = cache.getIncompleteCbs(fileKey1, ranges, 0, gotAllData);
    assertFalse(gotAllData.value);
    verifyResult(result, INCOMPLETE, 0, 3, DRL, 4, 6);
    ranges = new DiskRangeList(0, 3);
    ranges.insertAfter(new DiskRangeList(3, 5)).insertAfter(new DiskRangeList(5, 6));
    result = cache.getIncompleteCbs(fileKey1, ranges, 0, gotAllData);
    assertFalse(gotAllData.value);
    verifyResult(result, INCOMPLETE, 0, 3, DRL, 3, 5, INCOMPLETE, 5, 6);
    result = cache.getIncompleteCbs(fileKey1, new DiskRangeList(5, 6), 0, gotAllData);
    assertTrue(gotAllData.value);
    verifyResult(result, INCOMPLETE, 5, 6);
    result = cache.getIncompleteCbs(fileKey1, new DiskRangeList(4, 5), 0, gotAllData);
    assertFalse(gotAllData.value);
    verifyResult(result, DRL, 4, 5);
}

Also used : LlapDaemonCacheMetrics(org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics) DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) MetadataCache(org.apache.hadoop.hive.llap.io.metadata.MetadataCache) DataCache(org.apache.hadoop.hive.common.io.DataCache) Test(org.junit.Test)

Example 13 with DiskRangeList

use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.

the class OrcFileEstimateErrors method getIncompleteCbs.

public DiskRangeList getIncompleteCbs(DiskRangeList ranges, long baseOffset, DiskRangeListFactory factory, BooleanRef gotAllData) {
    DiskRangeList prev = ranges.prev;
    if (prev == null) {
        prev = new MutateHelper(ranges);
    }
    DiskRangeList current = ranges;
    // Assume by default that we would find everything.
    gotAllData.value = true;
    while (current != null) {
        // We assume ranges in "ranges" are non-overlapping; thus, we will save next in advance.
        DiskRangeList check = current;
        current = current.next;
        if (check.hasData())
            continue;
        Integer badLength = cache.get(Long.valueOf(check.getOffset() + baseOffset));
        if (badLength == null || badLength < check.getLength()) {
            gotAllData.value = false;
            continue;
        }
        // We could just remove here and handle the missing tail during read, but that can be
        // dangerous; let's explicitly add an incomplete CB.
        check.replaceSelfWith(new IncompleteCb(check.getOffset(), check.getEnd()));
    }
    return prev.next;
}

Also used : MutateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper) DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) IncompleteCb(org.apache.hadoop.hive.ql.io.orc.encoded.IncompleteCb)

Example 14 with DiskRangeList

use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.

the class TestLowLevelCacheImpl method verifyCacheGet.

private void verifyCacheGet(LowLevelCacheImpl cache, long fileId, Object... stuff) {
    CreateHelper list = new CreateHelper();
    DiskRangeList iter = null;
    int intCount = 0, lastInt = -1;
    int resultCount = stuff.length;
    for (Object obj : stuff) {
        if (obj instanceof Integer) {
            --resultCount;
            assertTrue(intCount >= 0);
            if (intCount == 0) {
                lastInt = (Integer) obj;
                intCount = 1;
            } else {
                list.addOrMerge(lastInt, (Integer) obj, true, true);
                intCount = 0;
            }
            continue;
        } else if (intCount >= 0) {
            assertTrue(intCount == 0);
            intCount = -1;
            iter = cache.getFileData(fileId, list.get(), 0, testFactory, null, null);
            assertEquals(resultCount, iter.listSize());
        }
        assertTrue(iter != null);
        if (obj instanceof MemoryBuffer) {
            assertTrue(iter instanceof CacheChunk);
            assertSame(obj, ((CacheChunk) iter).getBuffer());
        } else {
            assertTrue(iter.equals(obj));
        }
        iter = iter.next;
    }
}

Also used : CreateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) MemoryBuffer(org.apache.hadoop.hive.common.io.encoded.MemoryBuffer) DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) CacheChunk(org.apache.hadoop.hive.ql.io.orc.encoded.CacheChunk)

Example 15 with DiskRangeList

use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.

the class EncodedReaderImpl method readEncodedColumns.

@Override
public void readEncodedColumns(int stripeIx, StripeInformation stripe, OrcProto.RowIndex[] indexes, List<OrcProto.ColumnEncoding> encodings, List<OrcProto.Stream> streamList, boolean[] included, boolean[][] colRgs, Consumer<OrcEncodedColumnBatch> consumer) throws IOException {
    // Note: for now we don't have to setError here, caller will setError if we throw.
    // We are also not supposed to call setDone, since we are only part of the operation.
    long stripeOffset = stripe.getOffset();
    // 1. Figure out what we have to read.
    // Stream offset in relation to the stripe.
    long offset = 0;
    // 1.1. Figure out which columns have a present stream
    boolean[] hasNull = RecordReaderUtils.findPresentStreamsByColumn(streamList, types);
    if (isTracingEnabled) {
        LOG.trace("The following columns have PRESENT streams: " + arrayToString(hasNull));
    }
    // We assume stream list is sorted by column and that non-data
    // streams do not interleave data streams for the same column.
    // 1.2. With that in mind, determine disk ranges to read/get from cache (not by stream).
    ColumnReadContext[] colCtxs = new ColumnReadContext[included.length];
    int colRgIx = -1;
    // Don't create context for the 0-s column.
    for (int i = 1; i < included.length; ++i) {
        if (!included[i])
            continue;
        colCtxs[i] = new ColumnReadContext(i, encodings.get(i), indexes[i], ++colRgIx);
        if (isTracingEnabled) {
            LOG.trace("Creating context: " + colCtxs[i].toString());
        }
    }
    boolean isCompressed = (codec != null);
    CreateHelper listToRead = new CreateHelper();
    boolean hasIndexOnlyCols = false;
    // Will always be the same for all cols at the moment.
    boolean[] includedRgs = null;
    for (OrcProto.Stream stream : streamList) {
        long length = stream.getLength();
        int colIx = stream.getColumn();
        OrcProto.Stream.Kind streamKind = stream.getKind();
        if (!included[colIx] || StreamName.getArea(streamKind) != StreamName.Area.DATA) {
            // We have a stream for included column, but in future it might have no data streams.
            // It's more like "has at least one column included that has an index stream".
            hasIndexOnlyCols = hasIndexOnlyCols || included[colIx];
            if (isTracingEnabled) {
                LOG.trace("Skipping stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length);
            }
            offset += length;
            continue;
        }
        ColumnReadContext ctx = colCtxs[colIx];
        assert ctx != null;
        includedRgs = colRgs[ctx.includedIx];
        int indexIx = RecordReaderUtils.getIndexPosition(ctx.encoding.getKind(), types.get(colIx).getKind(), streamKind, isCompressed, hasNull[colIx]);
        ctx.addStream(offset, stream, indexIx);
        if (isTracingEnabled) {
            LOG.trace("Adding stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length + ", index position " + indexIx);
        }
        if (includedRgs == null || RecordReaderUtils.isDictionary(streamKind, encodings.get(colIx))) {
            RecordReaderUtils.addEntireStreamToRanges(offset, length, listToRead, true);
            if (isTracingEnabled) {
                LOG.trace("Will read whole stream " + streamKind + "; added to " + listToRead.getTail());
            }
        } else {
            RecordReaderUtils.addRgFilteredStreamToRanges(stream, includedRgs, codec != null, indexes[colIx], encodings.get(colIx), types.get(colIx), bufferSize, hasNull[colIx], offset, length, listToRead, true);
        }
        offset += length;
    }
    boolean hasFileId = this.fileKey != null;
    if (listToRead.get() == null) {
        // TODO: there may be a bug here. Could there be partial RG filtering on index-only column?
        if (hasIndexOnlyCols && (includedRgs == null)) {
            OrcEncodedColumnBatch ecb = POOLS.ecbPool.take();
            ecb.init(fileKey, stripeIx, OrcEncodedColumnBatch.ALL_RGS, included.length);
            consumer.consumeData(ecb);
        } else {
            LOG.warn("Nothing to read for stripe [" + stripe + "]");
        }
        return;
    }
    // 2. Now, read all of the ranges from cache or disk.
    DiskRangeList.MutateHelper toRead = new DiskRangeList.MutateHelper(listToRead.get());
    if (/*isTracingEnabled && */
    LOG.isInfoEnabled()) {
        LOG.info("Resulting disk ranges to read (file " + fileKey + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
    }
    BooleanRef isAllInCache = new BooleanRef();
    if (hasFileId) {
        cacheWrapper.getFileData(fileKey, toRead.next, stripeOffset, CC_FACTORY, isAllInCache);
        if (/*isTracingEnabled && */
        LOG.isInfoEnabled()) {
            LOG.info("Disk ranges after cache (found everything " + isAllInCache.value + "; file " + fileKey + ", base offset " + stripeOffset + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
        }
    }
    if (!isAllInCache.value) {
        if (!isDataReaderOpen) {
            this.dataReader.open();
            isDataReaderOpen = true;
        }
        dataReader.readFileData(toRead.next, stripeOffset, cacheWrapper.getAllocator().isDirectAlloc());
    }
    // 3. For uncompressed case, we need some special processing before read.
    // Keep "toRead" list for future use, don't extract().
    DiskRangeList iter = toRead.next;
    if (codec == null) {
        for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
            ColumnReadContext ctx = colCtxs[colIx];
            // This column is not included.
            if (ctx == null)
                continue;
            for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
                StreamContext sctx = ctx.streams[streamIx];
                DiskRangeList newIter = preReadUncompressedStream(stripeOffset, iter, sctx.offset, sctx.offset + sctx.length);
                if (newIter != null) {
                    iter = newIter;
                }
            }
        }
        if (isTracingEnabled) {
            LOG.trace("Disk ranges after pre-read (file " + fileKey + ", base offset " + stripeOffset + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
        }
        // Reset the iter to start.
        iter = toRead.next;
    }
    // 4. Finally, decompress data, map per RG, and return to caller.
    // We go by RG and not by column because that is how data is processed.
    int rgCount = (int) Math.ceil((double) stripe.getNumberOfRows() / rowIndexStride);
    for (int rgIx = 0; rgIx < rgCount; ++rgIx) {
        boolean isLastRg = rgIx == rgCount - 1;
        // Create the batch we will use to return data for this RG.
        OrcEncodedColumnBatch ecb = POOLS.ecbPool.take();
        ecb.init(fileKey, stripeIx, rgIx, included.length);
        boolean isRGSelected = true;
        for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
            ColumnReadContext ctx = colCtxs[colIx];
            // This column is not included.
            if (ctx == null)
                continue;
            if (isTracingEnabled) {
                LOG.trace("ctx: {} rgIx: {} isLastRg: {} rgCount: {}", ctx, rgIx, isLastRg, rgCount);
            }
            // TODO: simplify this now that high-level cache has been removed. Same RGs for all cols.
            if (colRgs[ctx.includedIx] != null && !colRgs[ctx.includedIx][rgIx]) {
                // RG x col filtered.
                isRGSelected = false;
                if (isTracingEnabled) {
                    LOG.trace("colIxMod: {} rgIx: {} colRgs[{}]: {} colRgs[{}][{}]: {}", ctx.includedIx, rgIx, ctx.includedIx, Arrays.toString(colRgs[ctx.includedIx]), ctx.includedIx, rgIx, colRgs[ctx.includedIx][rgIx]);
                }
                continue;
            }
            OrcProto.RowIndexEntry index = ctx.rowIndex.getEntry(rgIx), nextIndex = isLastRg ? null : ctx.rowIndex.getEntry(rgIx + 1);
            ecb.initOrcColumn(ctx.colIx);
            for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
                StreamContext sctx = ctx.streams[streamIx];
                ColumnStreamData cb = null;
                try {
                    if (RecordReaderUtils.isDictionary(sctx.kind, ctx.encoding)) {
                        // This stream is for entire stripe and needed for every RG; uncompress once and reuse.
                        if (isTracingEnabled) {
                            LOG.trace("Getting stripe-level stream [" + sctx.kind + ", " + ctx.encoding + "] for" + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length);
                        }
                        if (sctx.stripeLevelStream == null) {
                            sctx.stripeLevelStream = POOLS.csdPool.take();
                            // We will be using this for each RG while also sending RGs to processing.
                            // To avoid buffers being unlocked, run refcount one ahead; we will not increase
                            // it when building the last RG, so each RG processing will decref once, and the
                            // last one will unlock the buffers.
                            sctx.stripeLevelStream.incRef();
                            // For stripe-level streams we don't need the extra refcount on the block.
                            // See class comment about refcounts.
                            long unlockUntilCOffset = sctx.offset + sctx.length;
                            DiskRangeList lastCached = readEncodedStream(stripeOffset, iter, sctx.offset, sctx.offset + sctx.length, sctx.stripeLevelStream, unlockUntilCOffset, sctx.offset);
                            if (lastCached != null) {
                                iter = lastCached;
                            }
                        }
                        if (!isLastRg) {
                            sctx.stripeLevelStream.incRef();
                        }
                        cb = sctx.stripeLevelStream;
                    } else {
                        // This stream can be separated by RG using index. Let's do that.
                        // Offset to where this RG begins.
                        long cOffset = sctx.offset + index.getPositions(sctx.streamIndexOffset);
                        // Offset relative to the beginning of the stream of where this RG ends.
                        long nextCOffsetRel = isLastRg ? sctx.length : nextIndex.getPositions(sctx.streamIndexOffset);
                        // Offset before which this RG is guaranteed to end. Can only be estimated.
                        // We estimate the same way for compressed and uncompressed for now.
                        long endCOffset = sctx.offset + RecordReaderUtils.estimateRgEndOffset(isCompressed, isLastRg, nextCOffsetRel, sctx.length, bufferSize);
                        // As we read, we can unlock initial refcounts for the buffers that end before
                        // the data that we need for this RG.
                        long unlockUntilCOffset = sctx.offset + nextCOffsetRel;
                        cb = createRgColumnStreamData(rgIx, isLastRg, ctx.colIx, sctx, cOffset, endCOffset, isCompressed);
                        boolean isStartOfStream = sctx.bufferIter == null;
                        DiskRangeList lastCached = readEncodedStream(stripeOffset, (isStartOfStream ? iter : sctx.bufferIter), cOffset, endCOffset, cb, unlockUntilCOffset, sctx.offset);
                        if (lastCached != null) {
                            sctx.bufferIter = iter = lastCached;
                        }
                    }
                    ecb.setStreamData(ctx.colIx, sctx.kind.getNumber(), cb);
                } catch (Exception ex) {
                    DiskRangeList drl = toRead == null ? null : toRead.next;
                    LOG.error("Error getting stream [" + sctx.kind + ", " + ctx.encoding + "] for" + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length + "; toRead " + RecordReaderUtils.stringifyDiskRanges(drl), ex);
                    throw (ex instanceof IOException) ? (IOException) ex : new IOException(ex);
                }
            }
        }
        if (isRGSelected) {
            consumer.consumeData(ecb);
        }
    }
    if (isTracingEnabled) {
        LOG.trace("Disk ranges after preparing all the data " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
    }
    // Release the unreleased buffers. See class comment about refcounts.
    releaseInitialRefcounts(toRead.next);
    releaseCacheChunksIntoObjectPool(toRead.next);
}

Also used : DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) OrcProto(org.apache.orc.OrcProto) BooleanRef(org.apache.hadoop.hive.common.io.DataCache.BooleanRef) IOException(java.io.IOException) IOException(java.io.IOException) CreateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper) OrcEncodedColumnBatch(org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch) OutStream(org.apache.orc.impl.OutStream) ColumnStreamData(org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData)

Aggregations

DiskRangeList (org.apache.hadoop.hive.common.io.DiskRangeList)29 IOException (java.io.IOException)11 BufferChunk (org.apache.orc.impl.BufferChunk)11 MemoryBuffer (org.apache.hadoop.hive.common.io.encoded.MemoryBuffer)10 ByteBuffer (java.nio.ByteBuffer)9 DiskRange (org.apache.hadoop.hive.common.io.DiskRange)6 MutateHelper (org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper)6 CreateHelper (org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper)5 OrcProto (org.apache.orc.OrcProto)5 ColumnStreamData (org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData)3 Stream (org.apache.orc.OrcProto.Stream)3 OutStream (org.apache.orc.impl.OutStream)3 CodedInputStream (com.google.protobuf.CodedInputStream)2 InputStream (java.io.InputStream)2 ArrayList (java.util.ArrayList)2 IdentityHashMap (java.util.IdentityHashMap)2 Map (java.util.Map)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 ConcurrentSkipListMap (java.util.concurrent.ConcurrentSkipListMap)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2