use of org.apache.parquet.io.SeekableInputStream in project hive by apache.
the class VectorizedParquetRecordReader method readSplitFooter.
private ParquetMetadata readSplitFooter(JobConf configuration, final Path file, Object cacheKey, MetadataFilter filter, String tag) throws IOException {
MemoryBufferOrBuffers footerData = (cacheKey == null || metadataCache == null) ? null : metadataCache.getFileMetadata(cacheKey);
if (footerData != null) {
if (LOG.isInfoEnabled()) {
LOG.info("Found the footer in cache for " + cacheKey);
}
try {
return ParquetFileReader.readFooter(new ParquetFooterInputFromCache(footerData), filter);
} finally {
metadataCache.decRefBuffer(footerData);
}
}
final FileSystem fs = file.getFileSystem(configuration);
final FileStatus stat = fs.getFileStatus(file);
if (cacheKey == null || metadataCache == null) {
return readFooterFromFile(file, fs, stat, filter);
}
// Parquet calls protobuf methods directly on the stream and we can't get bytes after the fact.
try (SeekableInputStream stream = HadoopStreams.wrap(fs.open(file))) {
long footerLengthIndex = stat.getLen() - ParquetFooterInputFromCache.FOOTER_LENGTH_SIZE - ParquetFileWriter.MAGIC.length;
stream.seek(footerLengthIndex);
int footerLength = BytesUtils.readIntLittleEndian(stream);
stream.seek(footerLengthIndex - footerLength);
if (LOG.isInfoEnabled()) {
LOG.info("Caching the footer of length " + footerLength + " for " + cacheKey);
}
footerData = metadataCache.putFileMetadata(cacheKey, footerLength, stream, tag);
try {
return ParquetFileReader.readFooter(new ParquetFooterInputFromCache(footerData), filter);
} finally {
metadataCache.decRefBuffer(footerData);
}
}
}
use of org.apache.parquet.io.SeekableInputStream in project drill by apache.
the class DirectBufInputStream method read.
public synchronized int read(DrillBuf buf, int off, int len) throws IOException {
buf.clear();
ByteBuffer directBuffer = buf.nioBuffer(0, len);
int lengthLeftToRead = len;
SeekableInputStream seekableInputStream = HadoopStreams.wrap(getInputStream());
while (lengthLeftToRead > 0) {
if (logger.isTraceEnabled()) {
logger.trace("PERF: Disk read start. {}, StartOffset: {}, TotalByteSize: {}", this.streamId, this.startOffset, this.totalByteSize);
}
Stopwatch timer = Stopwatch.createStarted();
int bytesRead = seekableInputStream.read(directBuffer);
if (bytesRead < 0) {
return bytesRead;
}
lengthLeftToRead -= bytesRead;
if (logger.isTraceEnabled()) {
logger.trace("PERF: Disk read complete. {}, StartOffset: {}, TotalByteSize: {}, BytesRead: {}, Time: {} ms", this.streamId, this.startOffset, this.totalByteSize, bytesRead, ((double) timer.elapsed(TimeUnit.MICROSECONDS)) / 1000);
}
}
buf.writerIndex(len);
return len;
}
use of org.apache.parquet.io.SeekableInputStream in project hive by apache.
the class VectorizedParquetRecordReader method readSplitFooter.
private ParquetMetadata readSplitFooter(JobConf configuration, final Path file, Object cacheKey, MetadataFilter filter, CacheTag tag) throws IOException {
MemoryBufferOrBuffers footerData = (cacheKey == null || metadataCache == null) ? null : metadataCache.getFileMetadata(cacheKey);
if (footerData != null) {
LOG.info("Found the footer in cache for " + cacheKey);
try {
return ParquetFileReader.readFooter(new ParquetFooterInputFromCache(footerData), filter);
} finally {
metadataCache.decRefBuffer(footerData);
}
} else {
throwIfCacheOnlyRead(isReadCacheOnly);
}
final FileSystem fs = file.getFileSystem(configuration);
final FileStatus stat = fs.getFileStatus(file);
if (cacheKey == null || metadataCache == null) {
return readFooterFromFile(file, fs, stat, filter);
}
// Parquet calls protobuf methods directly on the stream and we can't get bytes after the fact.
try (SeekableInputStream stream = HadoopStreams.wrap(fs.open(file))) {
long footerLengthIndex = stat.getLen() - ParquetFooterInputFromCache.FOOTER_LENGTH_SIZE - ParquetFileWriter.MAGIC.length;
stream.seek(footerLengthIndex);
int footerLength = BytesUtils.readIntLittleEndian(stream);
stream.seek(footerLengthIndex - footerLength);
LOG.info("Caching the footer of length " + footerLength + " for " + cacheKey);
// Note: we don't pass in isStopped here - this is not on an IO thread.
footerData = metadataCache.putFileMetadata(cacheKey, footerLength, stream, tag, null);
try {
return ParquetFileReader.readFooter(new ParquetFooterInputFromCache(footerData), filter);
} finally {
metadataCache.decRefBuffer(footerData);
}
}
}
Aggregations