use of com.facebook.presto.orc.StripeReader.StripeStreamId in project presto by prestodb.
the class CachingStripeMetadataSource method getRowIndexes.
@Override
public List<RowGroupIndex> getRowIndexes(MetadataReader metadataReader, HiveWriterVersion hiveWriterVersion, StripeId stripId, StreamId streamId, OrcInputStream inputStream, List<HiveBloomFilter> bloomFilters, RuntimeStats runtimeStats) throws IOException {
if (rowGroupIndexCache.isPresent()) {
List<RowGroupIndex> rowGroupIndices = rowGroupIndexCache.get().getIfPresent(new StripeStreamId(stripId, streamId));
if (rowGroupIndices != null) {
runtimeStats.addMetricValue("OrcRowGroupIndexCacheHit", 1);
runtimeStats.addMetricValue("OrcRowGroupIndexInMemoryBytesRead", rowGroupIndices.stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum());
return rowGroupIndices;
} else {
runtimeStats.addMetricValue("OrcRowGroupIndexCacheHit", 0);
runtimeStats.addMetricValue("OrcRowGroupIndexStorageBytesRead", inputStream.getRetainedSizeInBytes());
}
}
List<RowGroupIndex> rowGroupIndices = delegate.getRowIndexes(metadataReader, hiveWriterVersion, stripId, streamId, inputStream, bloomFilters, runtimeStats);
if (rowGroupIndexCache.isPresent()) {
rowGroupIndexCache.get().put(new StripeStreamId(stripId, streamId), rowGroupIndices);
}
return rowGroupIndices;
}
use of com.facebook.presto.orc.StripeReader.StripeStreamId in project presto by prestodb.
the class CachingStripeMetadataSource method getInputs.
@Override
public Map<StreamId, OrcDataSourceInput> getInputs(OrcDataSource orcDataSource, StripeId stripeId, Map<StreamId, DiskRange> diskRanges, boolean cacheable) throws IOException {
if (!cacheable) {
return delegate.getInputs(orcDataSource, stripeId, diskRanges, cacheable);
}
// Fetch existing stream slice from cache
ImmutableMap.Builder<StreamId, OrcDataSourceInput> inputsBuilder = ImmutableMap.builder();
ImmutableMap.Builder<StreamId, DiskRange> uncachedDiskRangesBuilder = ImmutableMap.builder();
for (Entry<StreamId, DiskRange> entry : diskRanges.entrySet()) {
if (isCachedStream(entry.getKey().getStreamKind())) {
Slice streamSlice = stripeStreamCache.getIfPresent(new StripeStreamId(stripeId, entry.getKey()));
if (streamSlice != null) {
inputsBuilder.put(entry.getKey(), new OrcDataSourceInput(new BasicSliceInput(streamSlice), streamSlice.length()));
} else {
uncachedDiskRangesBuilder.put(entry);
}
} else {
uncachedDiskRangesBuilder.put(entry);
}
}
// read ranges and update cache
Map<StreamId, OrcDataSourceInput> uncachedInputs = delegate.getInputs(orcDataSource, stripeId, uncachedDiskRangesBuilder.build(), cacheable);
for (Entry<StreamId, OrcDataSourceInput> entry : uncachedInputs.entrySet()) {
if (isCachedStream(entry.getKey().getStreamKind())) {
// We need to rewind the input after eagerly reading the slice.
Slice streamSlice = Slices.wrappedBuffer(entry.getValue().getInput().readSlice(toIntExact(entry.getValue().getInput().length())).getBytes());
stripeStreamCache.put(new StripeStreamId(stripeId, entry.getKey()), streamSlice);
inputsBuilder.put(entry.getKey(), new OrcDataSourceInput(new BasicSliceInput(streamSlice), toIntExact(streamSlice.getRetainedSize())));
} else {
inputsBuilder.put(entry.getKey(), entry.getValue());
}
}
return inputsBuilder.build();
}
use of com.facebook.presto.orc.StripeReader.StripeStreamId in project presto by prestodb.
the class AbstractTestOrcReader method testCaching.
@Test
public void testCaching() throws Exception {
Cache<OrcDataSourceId, OrcFileTail> orcFileTailCache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, tail) -> ((OrcFileTail) tail).getFooterSize() + ((OrcFileTail) tail).getMetadataSize()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
OrcFileTailSource orcFileTailSource = new CachingOrcFileTailSource(new StorageOrcFileTailSource(), orcFileTailCache);
Cache<StripeId, Slice> stripeFootercache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, footer) -> ((Slice) footer).length()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
Cache<StripeStreamId, Slice> stripeStreamCache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, stream) -> ((Slice) stream).length()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
Optional<Cache<StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build());
StripeMetadataSource stripeMetadataSource = new CachingStripeMetadataSource(new StorageStripeMetadataSource(), stripeFootercache, stripeStreamCache, rowGroupIndexCache);
try (TempFile tempFile = createTempFile(10001)) {
OrcBatchRecordReader storageReader = createCustomOrcRecordReader(tempFile, ORC, OrcPredicate.TRUE, ImmutableList.of(BIGINT), INITIAL_BATCH_SIZE, orcFileTailSource, stripeMetadataSource, true, ImmutableMap.of(), false);
assertEquals(orcFileTailCache.stats().missCount(), 1);
assertEquals(orcFileTailCache.stats().hitCount(), 0);
OrcBatchRecordReader cacheReader = createCustomOrcRecordReader(tempFile, ORC, OrcPredicate.TRUE, ImmutableList.of(BIGINT), INITIAL_BATCH_SIZE, orcFileTailSource, stripeMetadataSource, true, ImmutableMap.of(), false);
assertEquals(orcFileTailCache.stats().missCount(), 1);
assertEquals(orcFileTailCache.stats().hitCount(), 1);
assertEquals(storageReader.getRetainedSizeInBytes(), cacheReader.getRetainedSizeInBytes());
assertEquals(storageReader.getFileRowCount(), cacheReader.getFileRowCount());
assertEquals(storageReader.getSplitLength(), cacheReader.getSplitLength());
storageReader.nextBatch();
assertEquals(stripeFootercache.stats().missCount(), 1);
assertEquals(stripeFootercache.stats().hitCount(), 0);
assertEquals(stripeStreamCache.stats().missCount(), 2);
assertEquals(stripeStreamCache.stats().hitCount(), 0);
assertEquals(rowGroupIndexCache.get().stats().missCount(), 1);
assertEquals(rowGroupIndexCache.get().stats().hitCount(), 0);
cacheReader.nextBatch();
assertEquals(stripeFootercache.stats().missCount(), 1);
assertEquals(stripeFootercache.stats().hitCount(), 1);
assertEquals(stripeStreamCache.stats().missCount(), 2);
assertEquals(stripeStreamCache.stats().hitCount(), 2);
assertEquals(rowGroupIndexCache.get().stats().missCount(), 1);
assertEquals(rowGroupIndexCache.get().stats().hitCount(), 1);
assertEquals(storageReader.readBlock(0).getInt(0), cacheReader.readBlock(0).getInt(0));
}
}
use of com.facebook.presto.orc.StripeReader.StripeStreamId in project presto by prestodb.
the class HiveClientModule method createStripeMetadataSourceFactory.
@Singleton
@Provides
public StripeMetadataSourceFactory createStripeMetadataSourceFactory(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
StripeMetadataSource stripeMetadataSource = new StorageStripeMetadataSource();
if (orcCacheConfig.isStripeMetadataCacheEnabled()) {
Cache<StripeId, Slice> footerCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeFooterCacheSize().toBytes()).weigher((id, footer) -> toIntExact(((Slice) footer).getRetainedSize())).expireAfterAccess(orcCacheConfig.getStripeFooterCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
Cache<StripeStreamId, Slice> streamCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeStreamCacheSize().toBytes()).weigher((id, stream) -> toIntExact(((Slice) stream).getRetainedSize())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
CacheStatsMBean footerCacheStatsMBean = new CacheStatsMBean(footerCache);
CacheStatsMBean streamCacheStatsMBean = new CacheStatsMBean(streamCache);
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeFooter"), footerCacheStatsMBean);
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStream"), streamCacheStatsMBean);
Optional<Cache<StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.empty();
if (orcCacheConfig.isRowGroupIndexCacheEnabled()) {
rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getRowGroupIndexCacheSize().toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build());
CacheStatsMBean rowGroupIndexCacheStatsMBean = new CacheStatsMBean(rowGroupIndexCache.get());
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStreamRowGroupIndex"), rowGroupIndexCacheStatsMBean);
}
stripeMetadataSource = new CachingStripeMetadataSource(stripeMetadataSource, footerCache, streamCache, rowGroupIndexCache);
}
StripeMetadataSourceFactory factory = StripeMetadataSourceFactory.of(stripeMetadataSource);
if (orcCacheConfig.isDwrfStripeCacheEnabled()) {
factory = new DwrfAwareStripeMetadataSourceFactory(factory);
}
return factory;
}
use of com.facebook.presto.orc.StripeReader.StripeStreamId in project presto by prestodb.
the class StorageModule method createStripeMetadataSourceFactory.
@Singleton
@Provides
public StripeMetadataSourceFactory createStripeMetadataSourceFactory(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
StripeMetadataSource stripeMetadataSource = new StorageStripeMetadataSource();
if (orcCacheConfig.isStripeMetadataCacheEnabled()) {
Cache<StripeId, Slice> footerCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeFooterCacheSize().toBytes()).weigher((id, footer) -> ((Slice) footer).length()).expireAfterAccess(orcCacheConfig.getStripeFooterCacheTtlSinceLastAccess().toMillis(), TimeUnit.MILLISECONDS).recordStats().build();
Cache<StripeStreamId, Slice> streamCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeStreamCacheSize().toBytes()).weigher((id, stream) -> ((Slice) stream).length()).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), TimeUnit.MILLISECONDS).recordStats().build();
CacheStatsMBean footerCacheStatsMBean = new CacheStatsMBean(footerCache);
CacheStatsMBean streamCacheStatsMBean = new CacheStatsMBean(streamCache);
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeFooter"), footerCacheStatsMBean);
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStream"), streamCacheStatsMBean);
Optional<Cache<StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.empty();
if (orcCacheConfig.isRowGroupIndexCacheEnabled()) {
rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getRowGroupIndexCacheSize().toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build());
CacheStatsMBean rowGroupIndexCacheStatsMBean = new CacheStatsMBean(rowGroupIndexCache.get());
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStreamRowGroupIndex"), rowGroupIndexCacheStatsMBean);
}
stripeMetadataSource = new CachingStripeMetadataSource(stripeMetadataSource, footerCache, streamCache, rowGroupIndexCache);
}
StripeMetadataSourceFactory factory = StripeMetadataSourceFactory.of(stripeMetadataSource);
if (orcCacheConfig.isDwrfStripeCacheEnabled()) {
factory = new DwrfAwareStripeMetadataSourceFactory(factory);
}
return factory;
}
Aggregations