Search in sources :

Example 26 with DiskRangeList

use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.

the class EncodedReaderImpl method releaseInitialRefcounts.

private void releaseInitialRefcounts(DiskRangeList current) {
    while (current != null) {
        DiskRangeList toFree = current;
        current = current.next;
        if (toFree instanceof ProcCacheChunk) {
            ProcCacheChunk pcc = (ProcCacheChunk) toFree;
            if (pcc.originalData != null) {
                // it. Deallocate the buffer directly, do not decref.
                if (pcc.getBuffer() != null) {
                    cacheWrapper.getAllocator().deallocate(pcc.getBuffer());
                }
                continue;
            }
        }
        if (!(toFree instanceof CacheChunk))
            continue;
        CacheChunk cc = (CacheChunk) toFree;
        if (cc.getBuffer() == null)
            continue;
        MemoryBuffer buffer = cc.getBuffer();
        cacheWrapper.releaseBuffer(buffer);
        cc.setBuffer(null);
    }
}
Also used : MemoryBuffer(org.apache.hadoop.hive.common.io.encoded.MemoryBuffer) DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList)

Example 27 with DiskRangeList

use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.

the class EncodedReaderImpl method ponderReleaseInitialRefcount.

private void ponderReleaseInitialRefcount(long unlockUntilCOffset, long streamStartOffset, CacheChunk cc) {
    // Don't release if the buffer contains any data beyond the acceptable boundary.
    if (cc.getEnd() > unlockUntilCOffset)
        return;
    assert cc.getBuffer() != null;
    try {
        releaseInitialRefcount(cc, false);
    } catch (AssertionError e) {
        LOG.error("BUG: releasing initial refcount; stream start " + streamStartOffset + ", " + "unlocking until " + unlockUntilCOffset + " from [" + cc + "]: " + e.getMessage());
        throw e;
    }
    // Release all the previous buffers that we may not have been able to release due to reuse,
    // as long as they are still in the same stream and are not already released.
    DiskRangeList prev = cc.prev;
    while (true) {
        // Do not release beyond current stream (we don't know which RGs that buffer is for).
        if ((prev == null) || (prev.getEnd() <= streamStartOffset))
            break;
        // Only release cache chunks; do not release ProcCacheChunks - they may not yet have data.
        if (prev.getClass() != CacheChunk.class)
            break;
        CacheChunk prevCc = (CacheChunk) prev;
        if (prevCc.buffer == null)
            break;
        try {
            releaseInitialRefcount(prevCc, true);
        } catch (AssertionError e) {
            LOG.error("BUG: releasing initial refcount; stream start " + streamStartOffset + ", " + "unlocking until " + unlockUntilCOffset + " from [" + cc + "] and backtracked to [" + prevCc + "]: " + e.getMessage());
            throw e;
        }
        prev = prev.prev;
    }
}
Also used : DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList)

Example 28 with DiskRangeList

use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.

the class OrcFileEstimateErrors method getIncompleteCbs.

public DiskRangeList getIncompleteCbs(DiskRangeList ranges, long baseOffset, BooleanRef gotAllData) {
    DiskRangeList prev = ranges.prev;
    if (prev == null) {
        prev = new MutateHelper(ranges);
    }
    DiskRangeList current = ranges;
    // Assume by default that we would find everything.
    gotAllData.value = true;
    while (current != null) {
        // We assume ranges in "ranges" are non-overlapping; thus, we will save next in advance.
        DiskRangeList check = current;
        current = current.next;
        if (check.hasData())
            continue;
        Integer badLength = cache.get(Long.valueOf(check.getOffset() + baseOffset));
        if (badLength == null || badLength < check.getLength()) {
            gotAllData.value = false;
            continue;
        }
        // We could just remove here and handle the missing tail during read, but that can be
        // dangerous; let's explicitly add an incomplete CB.
        check.replaceSelfWith(new IncompleteCb(check.getOffset(), check.getEnd()));
    }
    return prev.next;
}
Also used : MutateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper) DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) IncompleteCb(org.apache.hadoop.hive.ql.io.orc.encoded.IncompleteCb)

Example 29 with DiskRangeList

use of org.apache.hadoop.hive.common.io.DiskRangeList in project hive by apache.

the class LowLevelCacheImpl method addCachedBufferToIter.

/**
 * Adds cached buffer to buffer list.
 * @param currentNotCached Pointer to the list node where we are inserting.
 * @param currentCached The cached buffer found for this node, to insert.
 * @return The new currentNotCached pointer, following the cached buffer insertion.
 */
private DiskRangeList addCachedBufferToIter(DiskRangeList currentNotCached, DiskRangeList currentCached, BooleanRef gotAllData) {
    if (currentNotCached.getOffset() >= currentCached.getOffset()) {
        // Cached buffer has the same (or lower) offset as the requested buffer.
        if (currentNotCached.getEnd() <= currentCached.getEnd()) {
            // Replace the entire current DiskRange with new cached range.
            // In case of an inexact match in either of the below it may throw. We do not currently
            // support the case where the caller requests a single cache buffer via multiple smaller
            // sub-ranges; if that happens, this may throw. Noone does it now, though.
            // TODO: should we actively assert here for cache buffers larger than range?
            currentNotCached.replaceSelfWith(currentCached);
            return null;
        } else {
            // This cache range is a prefix of the requested one; the above also applies.
            // The cache may still contain the rest of the requested range, so don't set gotAllData.
            currentNotCached.insertPartBefore(currentCached);
            return currentNotCached;
        }
    }
    // Some part of the requested range is not cached - the cached offset is past the requested.
    if (gotAllData != null) {
        gotAllData.value = false;
    }
    if (currentNotCached.getEnd() <= currentCached.getEnd()) {
        // The cache buffer comprises the tail of the requested range (and possibly overshoots it).
        // The same as above applies - may throw if cache buffer is larger than the requested range,
        // and there's another range after this that starts in the middle of this cache buffer.
        // Currently, we cache at exact offsets, so the latter should never happen.
        currentNotCached.insertPartAfter(currentCached);
        // No more matches expected.
        return null;
    } else {
        // The cached buffer is in the middle of the requested range.
        // The remaining tail of the latter may still be available further.
        DiskRangeList tail = new DiskRangeList(currentCached.getEnd(), currentNotCached.getEnd());
        currentNotCached.insertPartAfter(currentCached);
        currentCached.insertAfter(tail);
        return tail;
    }
}
Also used : DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList)

Aggregations

DiskRangeList (org.apache.hadoop.hive.common.io.DiskRangeList)29 IOException (java.io.IOException)11 BufferChunk (org.apache.orc.impl.BufferChunk)11 MemoryBuffer (org.apache.hadoop.hive.common.io.encoded.MemoryBuffer)10 ByteBuffer (java.nio.ByteBuffer)9 DiskRange (org.apache.hadoop.hive.common.io.DiskRange)6 MutateHelper (org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper)6 CreateHelper (org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper)5 OrcProto (org.apache.orc.OrcProto)5 ColumnStreamData (org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData)3 Stream (org.apache.orc.OrcProto.Stream)3 OutStream (org.apache.orc.impl.OutStream)3 CodedInputStream (com.google.protobuf.CodedInputStream)2 InputStream (java.io.InputStream)2 ArrayList (java.util.ArrayList)2 IdentityHashMap (java.util.IdentityHashMap)2 Map (java.util.Map)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 ConcurrentSkipListMap (java.util.concurrent.ConcurrentSkipListMap)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2