Search in sources :

Example 1 with LlapBufferOrBuffers

use of org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers in project hive by apache.

the class OrcEncodedDataReader method getFileFooterFromCacheOrDisk.

/**
 *  Gets file metadata for the split from cache, or reads it from the file.
 */
private OrcFileMetadata getFileFooterFromCacheOrDisk() throws IOException {
    LlapBufferOrBuffers tailBuffers = null;
    List<StripeStatistics> stats = null;
    List<StripeInformation> stripes = null;
    boolean hasCache = fileKey != null && metadataCache != null;
    if (hasCache) {
        tailBuffers = metadataCache.getFileMetadata(fileKey);
        if (tailBuffers != null) {
            try {
                MemoryBuffer tailBuffer = tailBuffers.getSingleBuffer();
                ByteBuffer bb = null;
                if (tailBuffer != null) {
                    bb = tailBuffer.getByteBufferDup();
                    // TODO: remove the copy after ORC-158 and ORC-197
                    // if (bb.isDirect()) {
                    // Don't mess with the cached object.
                    ByteBuffer dupBb = tailBuffer.getByteBufferDup();
                    bb = ByteBuffer.allocate(dupBb.remaining());
                    bb.put(dupBb);
                    bb.flip();
                // }
                } else {
                    // TODO: add the ability to extractFileTail to read from multiple buffers?
                    MemoryBuffer[] tailBufferArray = tailBuffers.getMultipleBuffers();
                    int totalSize = 0;
                    for (MemoryBuffer buf : tailBufferArray) {
                        totalSize += buf.getByteBufferRaw().remaining();
                    }
                    bb = ByteBuffer.allocate(totalSize);
                    for (MemoryBuffer buf : tailBufferArray) {
                        bb.put(buf.getByteBufferDup());
                    }
                    bb.flip();
                }
                OrcTail orcTail = ReaderImpl.extractFileTail(bb);
                FileTail tail = orcTail.getFileTail();
                stats = orcTail.getStripeStatisticsProto();
                stripes = new ArrayList<>(tail.getFooter().getStripesCount());
                for (OrcProto.StripeInformation stripeProto : tail.getFooter().getStripesList()) {
                    stripes.add(new ReaderImpl.StripeInformationImpl(stripeProto));
                }
                return new OrcFileMetadata(fileKey, tail.getFooter(), tail.getPostscript(), stats, stripes);
            } finally {
                // We don't need the buffer anymore.
                metadataCache.decRefBuffer(tailBuffers);
                counters.incrCounter(LlapIOCounters.METADATA_CACHE_HIT);
            }
        }
        counters.incrCounter(LlapIOCounters.METADATA_CACHE_MISS);
    }
    ensureOrcReader();
    ByteBuffer tailBufferBb = orcReader.getSerializedFileFooter();
    if (hasCache) {
        tailBuffers = metadataCache.putFileMetadata(fileKey, tailBufferBb, cacheTag);
        // We don't use the cache's copy of the buffer.
        metadataCache.decRefBuffer(tailBuffers);
    }
    FileTail ft = orcReader.getFileTail();
    return new OrcFileMetadata(fileKey, ft.getFooter(), ft.getPostscript(), orcReader.getOrcProtoStripeStatistics(), orcReader.getStripes());
}
Also used : OrcFileMetadata(org.apache.hadoop.hive.llap.io.metadata.OrcFileMetadata) OrcProto(org.apache.orc.OrcProto) StripeStatistics(org.apache.orc.OrcProto.StripeStatistics) RecordReaderImpl(org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl) ReaderImpl(org.apache.orc.impl.ReaderImpl) ByteBuffer(java.nio.ByteBuffer) FileTail(org.apache.orc.OrcProto.FileTail) MemoryBuffer(org.apache.hadoop.hive.common.io.encoded.MemoryBuffer) LlapBufferOrBuffers(org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers) StripeInformation(org.apache.orc.StripeInformation) OrcTail(org.apache.orc.impl.OrcTail)

Example 2 with LlapBufferOrBuffers

use of org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers in project hive by apache.

the class OrcEncodedDataReader method getStripeFooterFromCacheOrDisk.

private OrcProto.StripeFooter getStripeFooterFromCacheOrDisk(StripeInformation si, OrcBatchKey stripeKey) throws IOException {
    boolean hasCache = fileKey != null && metadataCache != null;
    if (hasCache) {
        LlapBufferOrBuffers footerBuffers = metadataCache.getStripeTail(stripeKey);
        if (footerBuffers != null) {
            try {
                counters.incrCounter(LlapIOCounters.METADATA_CACHE_HIT);
                ensureCodecFromFileMetadata();
                MemoryBuffer footerBuffer = footerBuffers.getSingleBuffer();
                if (footerBuffer != null) {
                    ByteBuffer bb = footerBuffer.getByteBufferDup();
                    return buildStripeFooter(Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), bb.remaining(), codec, fileMetadata.getCompressionBufferSize());
                } else {
                    MemoryBuffer[] footerBufferArray = footerBuffers.getMultipleBuffers();
                    int pos = 0;
                    List<DiskRange> bcs = new ArrayList<>(footerBufferArray.length);
                    for (MemoryBuffer buf : footerBufferArray) {
                        ByteBuffer bb = buf.getByteBufferDup();
                        bcs.add(new BufferChunk(bb, pos));
                        pos += bb.remaining();
                    }
                    return buildStripeFooter(bcs, pos, codec, fileMetadata.getCompressionBufferSize());
                }
            } finally {
                metadataCache.decRefBuffer(footerBuffers);
            }
        }
        counters.incrCounter(LlapIOCounters.METADATA_CACHE_MISS);
    }
    long offset = si.getOffset() + si.getIndexLength() + si.getDataLength();
    long startTime = counters.startTimeCounter();
    ensureRawDataReader(true);
    // TODO: add this to metadatareader in ORC - SI => metadata buffer, not just metadata.
    if (LOG.isTraceEnabled()) {
        LOG.trace("Reading [" + offset + ", " + (offset + si.getFooterLength()) + ") based on " + si);
    }
    DiskRangeList footerRange = rawDataReader.readFileData(new DiskRangeList(offset, offset + si.getFooterLength()), 0, false);
    // LOG.error("Got " + RecordReaderUtils.stringifyDiskRanges(footerRange));
    counters.incrTimeCounter(LlapIOCounters.HDFS_TIME_NS, startTime);
    // Can only happens w/zcr for a single input buffer.
    assert footerRange.next == null;
    if (hasCache) {
        LlapBufferOrBuffers cacheBuf = metadataCache.putStripeTail(stripeKey, footerRange.getData().duplicate(), cacheTag);
        // We don't use this one.
        metadataCache.decRefBuffer(cacheBuf);
    }
    ByteBuffer bb = footerRange.getData().duplicate();
    CompressionKind kind = orcReader.getCompressionKind();
    boolean isPool = useCodecPool;
    CompressionCodec codec = isPool ? OrcCodecPool.getCodec(kind) : WriterImpl.createCodec(kind);
    boolean isCodecError = true;
    try {
        OrcProto.StripeFooter result = buildStripeFooter(Lists.<DiskRange>newArrayList(new BufferChunk(bb, 0)), bb.remaining(), codec, orcReader.getCompressionSize());
        isCodecError = false;
        return result;
    } finally {
        try {
            if (isPool && !isCodecError) {
                OrcCodecPool.returnCodec(kind, codec);
            } else {
                codec.close();
            }
        } catch (Exception ex) {
            LOG.error("Ignoring codec cleanup error", ex);
        }
    }
}
Also used : CompressionKind(org.apache.orc.CompressionKind) DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) OrcProto(org.apache.orc.OrcProto) ArrayList(java.util.ArrayList) BufferChunk(org.apache.orc.impl.BufferChunk) ByteBuffer(java.nio.ByteBuffer) IOException(java.io.IOException) MemoryBuffer(org.apache.hadoop.hive.common.io.encoded.MemoryBuffer) CompressionCodec(org.apache.orc.CompressionCodec) LlapBufferOrBuffers(org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers) DiskRange(org.apache.hadoop.hive.common.io.DiskRange)

Example 3 with LlapBufferOrBuffers

use of org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers in project hive by apache.

the class TestOrcMetadataCache method testBuffers.

@Test
public void testBuffers() throws Exception {
    DummyMemoryManager mm = new DummyMemoryManager();
    DummyCachePolicy cp = new DummyCachePolicy();
    final int MAX_ALLOC = 64;
    LlapDaemonCacheMetrics metrics = LlapDaemonCacheMetrics.create("", "");
    BuddyAllocator alloc = new BuddyAllocator(false, false, 8, MAX_ALLOC, 1, 4096, 0, null, mm, metrics, null);
    MetadataCache cache = new MetadataCache(alloc, mm, cp, true, metrics);
    Object fileKey1 = new Object();
    Random rdm = new Random();
    ByteBuffer smallBuffer = ByteBuffer.allocate(MAX_ALLOC - 1);
    rdm.nextBytes(smallBuffer.array());
    LlapBufferOrBuffers result = cache.putFileMetadata(fileKey1, smallBuffer);
    cache.decRefBuffer(result);
    ByteBuffer cacheBuf = result.getSingleBuffer().getByteBufferDup();
    assertEquals(smallBuffer, cacheBuf);
    result = cache.putFileMetadata(fileKey1, smallBuffer);
    cache.decRefBuffer(result);
    cacheBuf = result.getSingleBuffer().getByteBufferDup();
    assertEquals(smallBuffer, cacheBuf);
    result = cache.getFileMetadata(fileKey1);
    cacheBuf = result.getSingleBuffer().getByteBufferDup();
    assertEquals(smallBuffer, cacheBuf);
    cache.decRefBuffer(result);
    cache.notifyEvicted((LlapMetadataBuffer<?>) result.getSingleBuffer());
    result = cache.getFileMetadata(fileKey1);
    assertNull(result);
    ByteBuffer largeBuffer = ByteBuffer.allocate((int) (MAX_ALLOC * 2.5));
    rdm.nextBytes(largeBuffer.array());
    result = cache.putFileMetadata(fileKey1, largeBuffer);
    cache.decRefBuffer(result);
    assertNull(result.getSingleBuffer());
    assertEquals(largeBuffer, extractResultBbs(result));
    result = cache.getFileMetadata(fileKey1);
    assertNull(result.getSingleBuffer());
    assertEquals(largeBuffer, extractResultBbs(result));
    LlapAllocatorBuffer b0 = result.getMultipleLlapBuffers()[0], b1 = result.getMultipleLlapBuffers()[1];
    cache.decRefBuffer(result);
    cache.notifyEvicted((LlapMetadataBuffer<?>) b1);
    result = cache.getFileMetadata(fileKey1);
    assertNull(result);
    // Should have also been thrown out.
    assertFalse(b0.incRef() > 0);
}
Also used : LlapDaemonCacheMetrics(org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics) Random(java.util.Random) MetadataCache(org.apache.hadoop.hive.llap.io.metadata.MetadataCache) ByteBuffer(java.nio.ByteBuffer) LlapBufferOrBuffers(org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers) Test(org.junit.Test)

Aggregations

ByteBuffer (java.nio.ByteBuffer)3 LlapBufferOrBuffers (org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers)3 MemoryBuffer (org.apache.hadoop.hive.common.io.encoded.MemoryBuffer)2 OrcProto (org.apache.orc.OrcProto)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Random (java.util.Random)1 DiskRange (org.apache.hadoop.hive.common.io.DiskRange)1 DiskRangeList (org.apache.hadoop.hive.common.io.DiskRangeList)1 MetadataCache (org.apache.hadoop.hive.llap.io.metadata.MetadataCache)1 OrcFileMetadata (org.apache.hadoop.hive.llap.io.metadata.OrcFileMetadata)1 LlapDaemonCacheMetrics (org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics)1 RecordReaderImpl (org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl)1 CompressionCodec (org.apache.orc.CompressionCodec)1 CompressionKind (org.apache.orc.CompressionKind)1 FileTail (org.apache.orc.OrcProto.FileTail)1 StripeStatistics (org.apache.orc.OrcProto.StripeStatistics)1 StripeInformation (org.apache.orc.StripeInformation)1 BufferChunk (org.apache.orc.impl.BufferChunk)1 OrcTail (org.apache.orc.impl.OrcTail)1