Search in sources :

Example 6 with MemoryBuffer

use of org.apache.hadoop.hive.common.io.encoded.MemoryBuffer in project hive by apache.

the class OrcEncodedDataReader method getStripeFooterFromCacheOrDisk.

private OrcProto.StripeFooter getStripeFooterFromCacheOrDisk(StripeInformation si, OrcBatchKey stripeKey) throws IOException {
    boolean hasCache = fileKey != null && metadataCache != null;
    if (hasCache) {
        LlapBufferOrBuffers footerBuffers = metadataCache.getStripeTail(stripeKey);
        if (footerBuffers != null) {
            try {
                counters.incrCounter(LlapIOCounters.METADATA_CACHE_HIT);
                ensureCodecFromFileMetadata();
                MemoryBuffer footerBuffer = footerBuffers.getSingleBuffer();
                if (footerBuffer != null) {
                    ByteBuffer bb = footerBuffer.getByteBufferDup();
                    return buildStripeFooter(Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), bb.remaining(), codec, fileMetadata.getCompressionBufferSize());
                } else {
                    MemoryBuffer[] footerBufferArray = footerBuffers.getMultipleBuffers();
                    int pos = 0;
                    List<DiskRange> bcs = new ArrayList<>(footerBufferArray.length);
                    for (MemoryBuffer buf : footerBufferArray) {
                        ByteBuffer bb = buf.getByteBufferDup();
                        bcs.add(new BufferChunk(bb, pos));
                        pos += bb.remaining();
                    }
                    return buildStripeFooter(bcs, pos, codec, fileMetadata.getCompressionBufferSize());
                }
            } finally {
                metadataCache.decRefBuffer(footerBuffers);
            }
        }
        counters.incrCounter(LlapIOCounters.METADATA_CACHE_MISS);
    }
    long offset = si.getOffset() + si.getIndexLength() + si.getDataLength();
    long startTime = counters.startTimeCounter();
    ensureRawDataReader(true);
    // TODO: add this to metadatareader in ORC - SI => metadata buffer, not just metadata.
    if (LOG.isTraceEnabled()) {
        LOG.trace("Reading [" + offset + ", " + (offset + si.getFooterLength()) + ") based on " + si);
    }
    DiskRangeList footerRange = rawDataReader.readFileData(new DiskRangeList(offset, offset + si.getFooterLength()), 0, false);
    // LOG.error("Got " + RecordReaderUtils.stringifyDiskRanges(footerRange));
    counters.incrTimeCounter(LlapIOCounters.HDFS_TIME_NS, startTime);
    // Can only happens w/zcr for a single input buffer.
    assert footerRange.next == null;
    if (hasCache) {
        LlapBufferOrBuffers cacheBuf = metadataCache.putStripeTail(stripeKey, footerRange.getData().duplicate(), cacheTag);
        // We don't use this one.
        metadataCache.decRefBuffer(cacheBuf);
    }
    ByteBuffer bb = footerRange.getData().duplicate();
    CompressionKind kind = orcReader.getCompressionKind();
    boolean isPool = useCodecPool;
    CompressionCodec codec = isPool ? OrcCodecPool.getCodec(kind) : WriterImpl.createCodec(kind);
    boolean isCodecError = true;
    try {
        OrcProto.StripeFooter result = buildStripeFooter(Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), bb.remaining(), codec, orcReader.getCompressionSize());
        isCodecError = false;
        return result;
    } finally {
        try {
            if (isPool && !isCodecError) {
                OrcCodecPool.returnCodec(kind, codec);
            } else {
                codec.close();
            }
        } catch (Exception ex) {
            LOG.error("Ignoring codec cleanup error", ex);
        }
    }
}
Also used : CompressionKind(org.apache.orc.CompressionKind) DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) OrcProto(org.apache.orc.OrcProto) ArrayList(java.util.ArrayList) BufferChunk(org.apache.orc.impl.BufferChunk) ByteBuffer(java.nio.ByteBuffer) IOException(java.io.IOException) MemoryBuffer(org.apache.hadoop.hive.common.io.encoded.MemoryBuffer) CompressionCodec(org.apache.orc.CompressionCodec) LlapBufferOrBuffers(org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers) DiskRange(org.apache.hadoop.hive.common.io.DiskRange)

Example 7 with MemoryBuffer

use of org.apache.hadoop.hive.common.io.encoded.MemoryBuffer in project hive by apache.

the class TestBuddyAllocator method allocateAndUseBuffer.

private void allocateAndUseBuffer(BuddyAllocator a, MemoryBuffer[][] allocs, long[][] testValues, int allocCount, int index, int sizeLog2) throws Exception {
    allocs[index] = new MemoryBuffer[allocCount];
    testValues[index] = new long[allocCount];
    int size = (1 << sizeLog2) - 1;
    try {
        a.allocateMultiple(allocs[index], size);
    } catch (AllocatorOutOfMemoryException ex) {
        LOG.error("Failed to allocate " + allocCount + " of " + size + "; " + a.testDump());
        throw ex;
    }
    // LOG.info("Allocated " + allocCount + " of " + size + "; " + a.debugDump());
    for (int j = 0; j < allocCount; ++j) {
        MemoryBuffer mem = allocs[index][j];
        long testValue = testValues[index][j] = rdm.nextLong();
        putTestValue(mem, testValue);
    }
}
Also used : MemoryBuffer(org.apache.hadoop.hive.common.io.encoded.MemoryBuffer) AllocatorOutOfMemoryException(org.apache.hadoop.hive.common.io.Allocator.AllocatorOutOfMemoryException)

Example 8 with MemoryBuffer

use of org.apache.hadoop.hive.common.io.encoded.MemoryBuffer in project hive by apache.

the class TestLowLevelCacheImpl method testMTTWithCleanup.

@Test
public void testMTTWithCleanup() {
    final LowLevelCacheImpl cache = new LowLevelCacheImpl(LlapDaemonCacheMetrics.create("test", "1"), new DummyCachePolicy(), new DummyAllocator(), true, 1);
    final long fn1 = 1, fn2 = 2;
    final int offsetsToUse = 8;
    final CountDownLatch cdlIn = new CountDownLatch(4), cdlOut = new CountDownLatch(1);
    final AtomicInteger rdmsDone = new AtomicInteger(0);
    Callable<Long> rdmCall = new Callable<Long>() {

        public Long call() {
            int gets = 0, puts = 0;
            try {
                Random rdm = new Random(1234 + Thread.currentThread().getId());
                syncThreadStart(cdlIn, cdlOut);
                for (int i = 0; i < 20000; ++i) {
                    boolean isGet = rdm.nextBoolean(), isFn1 = rdm.nextBoolean();
                    long fileName = isFn1 ? fn1 : fn2;
                    int fileIndex = isFn1 ? 1 : 2;
                    int count = rdm.nextInt(offsetsToUse);
                    if (isGet) {
                        int[] offsets = new int[count];
                        count = generateOffsets(offsetsToUse, rdm, offsets);
                        CreateHelper list = new CreateHelper();
                        for (int j = 0; i < count; ++i) {
                            list.addOrMerge(offsets[j], offsets[j] + 1, true, false);
                        }
                        DiskRangeList iter = cache.getFileData(fileName, list.get(), 0, testFactory, null, null);
                        int j = -1;
                        while (iter != null) {
                            ++j;
                            if (!(iter instanceof CacheChunk)) {
                                iter = iter.next;
                                continue;
                            }
                            ++gets;
                            LlapAllocatorBuffer result = (LlapAllocatorBuffer) ((CacheChunk) iter).getBuffer();
                            assertEquals(makeFakeArenaIndex(fileIndex, offsets[j]), result.getArenaIndex());
                            cache.decRefBuffer(result);
                            iter = iter.next;
                        }
                    } else {
                        DiskRange[] ranges = new DiskRange[count];
                        int[] offsets = new int[count];
                        for (int j = 0; j < count; ++j) {
                            int next = rdm.nextInt(offsetsToUse);
                            ranges[j] = dr(next, next + 1);
                            offsets[j] = next;
                        }
                        MemoryBuffer[] buffers = new MemoryBuffer[count];
                        for (int j = 0; j < offsets.length; ++j) {
                            LlapDataBuffer buf = LowLevelCacheImpl.allocateFake();
                            buf.setNewAllocLocation(makeFakeArenaIndex(fileIndex, offsets[j]), 0);
                            buffers[j] = buf;
                        }
                        long[] mask = cache.putFileData(fileName, ranges, buffers, 0, Priority.NORMAL, null, null);
                        puts += buffers.length;
                        long maskVal = 0;
                        if (mask != null) {
                            assertEquals(1, mask.length);
                            maskVal = mask[0];
                        }
                        for (int j = 0; j < offsets.length; ++j) {
                            LlapDataBuffer buf = (LlapDataBuffer) (buffers[j]);
                            if ((maskVal & 1) == 1) {
                                assertEquals(makeFakeArenaIndex(fileIndex, offsets[j]), buf.getArenaIndex());
                            }
                            maskVal >>= 1;
                            cache.decRefBuffer(buf);
                        }
                    }
                }
            } finally {
                rdmsDone.incrementAndGet();
            }
            return (((long) gets) << 32) | puts;
        }

        private int makeFakeArenaIndex(int fileIndex, long offset) {
            return (int) ((fileIndex << 12) + offset);
        }
    };
    FutureTask<Integer> evictionTask = new FutureTask<Integer>(new Callable<Integer>() {

        public Integer call() {
            boolean isFirstFile = false;
            Random rdm = new Random(1234 + Thread.currentThread().getId());
            int evictions = 0;
            syncThreadStart(cdlIn, cdlOut);
            while (rdmsDone.get() < 3) {
                DiskRangeList head = new DiskRangeList(0, offsetsToUse + 1);
                isFirstFile = !isFirstFile;
                long fileId = isFirstFile ? fn1 : fn2;
                head = cache.getFileData(fileId, head, 0, testFactory, null, null);
                DiskRange[] results = head.listToArray();
                int startIndex = rdm.nextInt(results.length), index = startIndex;
                LlapDataBuffer victim = null;
                do {
                    DiskRange r = results[index];
                    if (r instanceof CacheChunk) {
                        LlapDataBuffer result = (LlapDataBuffer) ((CacheChunk) r).getBuffer();
                        cache.decRefBuffer(result);
                        if (victim == null && result.invalidate() == LlapCacheableBuffer.INVALIDATE_OK) {
                            ++evictions;
                            victim = result;
                        }
                    }
                    ++index;
                    if (index == results.length)
                        index = 0;
                } while (index != startIndex);
                if (victim == null)
                    continue;
                cache.notifyEvicted(victim);
            }
            return evictions;
        }
    });
    FutureTask<Long> rdmTask1 = new FutureTask<Long>(rdmCall), rdmTask2 = new FutureTask<Long>(rdmCall), rdmTask3 = new FutureTask<Long>(rdmCall);
    Executor threadPool = Executors.newFixedThreadPool(4);
    threadPool.execute(rdmTask1);
    threadPool.execute(rdmTask2);
    threadPool.execute(rdmTask3);
    threadPool.execute(evictionTask);
    try {
        cdlIn.await();
        cdlOut.countDown();
        long result1 = rdmTask1.get(), result2 = rdmTask2.get(), result3 = rdmTask3.get();
        int evictions = evictionTask.get();
        LOG.info("MTT test: task 1: " + descRdmTask(result1) + ", task 2: " + descRdmTask(result2) + ", task 3: " + descRdmTask(result3) + "; " + evictions + " evictions");
    } catch (Throwable t) {
        throw new RuntimeException(t);
    }
}
Also used : DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) Callable(java.util.concurrent.Callable) CreateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper) Executor(java.util.concurrent.Executor) Random(java.util.Random) FutureTask(java.util.concurrent.FutureTask) CacheChunk(org.apache.hadoop.hive.ql.io.orc.encoded.CacheChunk) CountDownLatch(java.util.concurrent.CountDownLatch) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) MemoryBuffer(org.apache.hadoop.hive.common.io.encoded.MemoryBuffer) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DiskRange(org.apache.hadoop.hive.common.io.DiskRange) Test(org.junit.Test)

Example 9 with MemoryBuffer

use of org.apache.hadoop.hive.common.io.encoded.MemoryBuffer in project hive by apache.

the class SerDeEncodedDataReader method discardUncachedBuffers.

private void discardUncachedBuffers(List<MemoryBuffer> list) {
    for (MemoryBuffer buffer : list) {
        boolean isInvalidated = ((LlapDataBuffer) buffer).invalidate();
        assert isInvalidated;
        bufferManager.getAllocator().deallocate(buffer);
    }
}
Also used : MemoryBuffer(org.apache.hadoop.hive.common.io.encoded.MemoryBuffer) LlapDataBuffer(org.apache.hadoop.hive.llap.cache.LlapDataBuffer)

Example 10 with MemoryBuffer

use of org.apache.hadoop.hive.common.io.encoded.MemoryBuffer in project hive by apache.

the class SerDeEncodedDataReader method returnData.

@Override
public void returnData(OrcEncodedColumnBatch ecb) {
    for (int colIx = 0; colIx < ecb.getTotalColCount(); ++colIx) {
        if (!ecb.hasData(colIx))
            continue;
        // TODO: reuse columnvector-s on hasBatch - save the array by column? take apart each list.
        ColumnStreamData[] datas = ecb.getColumnData(colIx);
        for (ColumnStreamData data : datas) {
            if (data == null || data.decRef() != 0)
                continue;
            if (LlapIoImpl.LOCKING_LOGGER.isTraceEnabled()) {
                for (MemoryBuffer buf : data.getCacheBuffers()) {
                    LlapIoImpl.LOCKING_LOGGER.trace("Unlocking {} at the end of processing", buf);
                }
            }
            bufferManager.decRefBuffers(data.getCacheBuffers());
            CSD_POOL.offer(data);
        }
    }
    // We can offer ECB even with some streams not discarded; reset() will clear the arrays.
    ECB_POOL.offer(ecb);
}
Also used : MemoryBuffer(org.apache.hadoop.hive.common.io.encoded.MemoryBuffer) ColumnStreamData(org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData)

Aggregations

MemoryBuffer (org.apache.hadoop.hive.common.io.encoded.MemoryBuffer)22 ByteBuffer (java.nio.ByteBuffer)12 DiskRangeList (org.apache.hadoop.hive.common.io.DiskRangeList)10 DiskRange (org.apache.hadoop.hive.common.io.DiskRange)6 IOException (java.io.IOException)5 CreateHelper (org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper)4 ColumnStreamData (org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData)4 OrcProto (org.apache.orc.OrcProto)4 BufferChunk (org.apache.orc.impl.BufferChunk)4 CodedInputStream (com.google.protobuf.CodedInputStream)2 InputStream (java.io.InputStream)2 ArrayList (java.util.ArrayList)2 IdentityHashMap (java.util.IdentityHashMap)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 MutateHelper (org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper)2 LlapBufferOrBuffers (org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers)2 CacheChunk (org.apache.hadoop.hive.ql.io.orc.encoded.CacheChunk)2 Stream (org.apache.orc.OrcProto.Stream)2 Kind (org.apache.orc.OrcProto.Stream.Kind)2 InStream (org.apache.orc.impl.InStream)2