Search in sources :

Example 1 with MemoryBufferOrBuffers

use of org.apache.hadoop.hive.common.io.encoded.MemoryBufferOrBuffers in project hive by apache.

the class VectorizedParquetRecordReader method readSplitFooter.

private ParquetMetadata readSplitFooter(JobConf configuration, final Path file, Object cacheKey, MetadataFilter filter, String tag) throws IOException {
    MemoryBufferOrBuffers footerData = (cacheKey == null || metadataCache == null) ? null : metadataCache.getFileMetadata(cacheKey);
    if (footerData != null) {
        if (LOG.isInfoEnabled()) {
            LOG.info("Found the footer in cache for " + cacheKey);
        }
        try {
            return ParquetFileReader.readFooter(new ParquetFooterInputFromCache(footerData), filter);
        } finally {
            metadataCache.decRefBuffer(footerData);
        }
    }
    final FileSystem fs = file.getFileSystem(configuration);
    final FileStatus stat = fs.getFileStatus(file);
    if (cacheKey == null || metadataCache == null) {
        return readFooterFromFile(file, fs, stat, filter);
    }
    // Parquet calls protobuf methods directly on the stream and we can't get bytes after the fact.
    try (SeekableInputStream stream = HadoopStreams.wrap(fs.open(file))) {
        long footerLengthIndex = stat.getLen() - ParquetFooterInputFromCache.FOOTER_LENGTH_SIZE - ParquetFileWriter.MAGIC.length;
        stream.seek(footerLengthIndex);
        int footerLength = BytesUtils.readIntLittleEndian(stream);
        stream.seek(footerLengthIndex - footerLength);
        if (LOG.isInfoEnabled()) {
            LOG.info("Caching the footer of length " + footerLength + " for " + cacheKey);
        }
        footerData = metadataCache.putFileMetadata(cacheKey, footerLength, stream, tag);
        try {
            return ParquetFileReader.readFooter(new ParquetFooterInputFromCache(footerData), filter);
        } finally {
            metadataCache.decRefBuffer(footerData);
        }
    }
}
Also used : MemoryBufferOrBuffers(org.apache.hadoop.hive.common.io.encoded.MemoryBufferOrBuffers) FileStatus(org.apache.hadoop.fs.FileStatus) SeekableInputStream(org.apache.parquet.io.SeekableInputStream) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 2 with MemoryBufferOrBuffers

use of org.apache.hadoop.hive.common.io.encoded.MemoryBufferOrBuffers in project hive by apache.

the class VectorizedParquetRecordReader method readSplitFooter.

private ParquetMetadata readSplitFooter(JobConf configuration, final Path file, Object cacheKey, MetadataFilter filter, CacheTag tag) throws IOException {
    MemoryBufferOrBuffers footerData = (cacheKey == null || metadataCache == null) ? null : metadataCache.getFileMetadata(cacheKey);
    if (footerData != null) {
        LOG.info("Found the footer in cache for " + cacheKey);
        try {
            return ParquetFileReader.readFooter(new ParquetFooterInputFromCache(footerData), filter);
        } finally {
            metadataCache.decRefBuffer(footerData);
        }
    } else {
        throwIfCacheOnlyRead(isReadCacheOnly);
    }
    final FileSystem fs = file.getFileSystem(configuration);
    final FileStatus stat = fs.getFileStatus(file);
    if (cacheKey == null || metadataCache == null) {
        return readFooterFromFile(file, fs, stat, filter);
    }
    // Parquet calls protobuf methods directly on the stream and we can't get bytes after the fact.
    try (SeekableInputStream stream = HadoopStreams.wrap(fs.open(file))) {
        long footerLengthIndex = stat.getLen() - ParquetFooterInputFromCache.FOOTER_LENGTH_SIZE - ParquetFileWriter.MAGIC.length;
        stream.seek(footerLengthIndex);
        int footerLength = BytesUtils.readIntLittleEndian(stream);
        stream.seek(footerLengthIndex - footerLength);
        LOG.info("Caching the footer of length " + footerLength + " for " + cacheKey);
        // Note: we don't pass in isStopped here - this is not on an IO thread.
        footerData = metadataCache.putFileMetadata(cacheKey, footerLength, stream, tag, null);
        try {
            return ParquetFileReader.readFooter(new ParquetFooterInputFromCache(footerData), filter);
        } finally {
            metadataCache.decRefBuffer(footerData);
        }
    }
}
Also used : MemoryBufferOrBuffers(org.apache.hadoop.hive.common.io.encoded.MemoryBufferOrBuffers) FileStatus(org.apache.hadoop.fs.FileStatus) SeekableInputStream(org.apache.parquet.io.SeekableInputStream) FileSystem(org.apache.hadoop.fs.FileSystem)

Example 3 with MemoryBufferOrBuffers

use of org.apache.hadoop.hive.common.io.encoded.MemoryBufferOrBuffers in project hive by apache.

the class LlapOrcCacheLoader method loadFileFooter.

/**
 * Pre read the file footer into the cache.
 */
public void loadFileFooter() {
    MemoryBufferOrBuffers tailBuffers = metadataCache.getFileMetadata(fileKey);
    if (tailBuffers == null) {
        ByteBuffer tailBufferBb = orcReader.getSerializedFileFooter();
        metadataCache.putFileMetadata(fileKey, tailBufferBb, cacheTag, null);
    }
}
Also used : MemoryBufferOrBuffers(org.apache.hadoop.hive.common.io.encoded.MemoryBufferOrBuffers) ByteBuffer(java.nio.ByteBuffer)

Aggregations

MemoryBufferOrBuffers (org.apache.hadoop.hive.common.io.encoded.MemoryBufferOrBuffers)3 FileStatus (org.apache.hadoop.fs.FileStatus)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 SeekableInputStream (org.apache.parquet.io.SeekableInputStream)2 ByteBuffer (java.nio.ByteBuffer)1