Search in sources :

Example 1 with StripeStreamId

use of com.facebook.presto.orc.StripeReader.StripeStreamId in project presto by prestodb.

the class CachingStripeMetadataSource method getRowIndexes.

@Override
public List<RowGroupIndex> getRowIndexes(MetadataReader metadataReader, HiveWriterVersion hiveWriterVersion, StripeId stripId, StreamId streamId, OrcInputStream inputStream, List<HiveBloomFilter> bloomFilters, RuntimeStats runtimeStats) throws IOException {
    if (rowGroupIndexCache.isPresent()) {
        List<RowGroupIndex> rowGroupIndices = rowGroupIndexCache.get().getIfPresent(new StripeStreamId(stripId, streamId));
        if (rowGroupIndices != null) {
            runtimeStats.addMetricValue("OrcRowGroupIndexCacheHit", 1);
            runtimeStats.addMetricValue("OrcRowGroupIndexInMemoryBytesRead", rowGroupIndices.stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum());
            return rowGroupIndices;
        } else {
            runtimeStats.addMetricValue("OrcRowGroupIndexCacheHit", 0);
            runtimeStats.addMetricValue("OrcRowGroupIndexStorageBytesRead", inputStream.getRetainedSizeInBytes());
        }
    }
    List<RowGroupIndex> rowGroupIndices = delegate.getRowIndexes(metadataReader, hiveWriterVersion, stripId, streamId, inputStream, bloomFilters, runtimeStats);
    if (rowGroupIndexCache.isPresent()) {
        rowGroupIndexCache.get().put(new StripeStreamId(stripId, streamId), rowGroupIndices);
    }
    return rowGroupIndices;
}
Also used : StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex)

Example 2 with StripeStreamId

use of com.facebook.presto.orc.StripeReader.StripeStreamId in project presto by prestodb.

the class CachingStripeMetadataSource method getInputs.

@Override
public Map<StreamId, OrcDataSourceInput> getInputs(OrcDataSource orcDataSource, StripeId stripeId, Map<StreamId, DiskRange> diskRanges, boolean cacheable) throws IOException {
    if (!cacheable) {
        return delegate.getInputs(orcDataSource, stripeId, diskRanges, cacheable);
    }
    // Fetch existing stream slice from cache
    ImmutableMap.Builder<StreamId, OrcDataSourceInput> inputsBuilder = ImmutableMap.builder();
    ImmutableMap.Builder<StreamId, DiskRange> uncachedDiskRangesBuilder = ImmutableMap.builder();
    for (Entry<StreamId, DiskRange> entry : diskRanges.entrySet()) {
        if (isCachedStream(entry.getKey().getStreamKind())) {
            Slice streamSlice = stripeStreamCache.getIfPresent(new StripeStreamId(stripeId, entry.getKey()));
            if (streamSlice != null) {
                inputsBuilder.put(entry.getKey(), new OrcDataSourceInput(new BasicSliceInput(streamSlice), streamSlice.length()));
            } else {
                uncachedDiskRangesBuilder.put(entry);
            }
        } else {
            uncachedDiskRangesBuilder.put(entry);
        }
    }
    // read ranges and update cache
    Map<StreamId, OrcDataSourceInput> uncachedInputs = delegate.getInputs(orcDataSource, stripeId, uncachedDiskRangesBuilder.build(), cacheable);
    for (Entry<StreamId, OrcDataSourceInput> entry : uncachedInputs.entrySet()) {
        if (isCachedStream(entry.getKey().getStreamKind())) {
            // We need to rewind the input after eagerly reading the slice.
            Slice streamSlice = Slices.wrappedBuffer(entry.getValue().getInput().readSlice(toIntExact(entry.getValue().getInput().length())).getBytes());
            stripeStreamCache.put(new StripeStreamId(stripeId, entry.getKey()), streamSlice);
            inputsBuilder.put(entry.getKey(), new OrcDataSourceInput(new BasicSliceInput(streamSlice), toIntExact(streamSlice.getRetainedSize())));
        } else {
            inputsBuilder.put(entry.getKey(), entry.getValue());
        }
    }
    return inputsBuilder.build();
}
Also used : StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) Slice(io.airlift.slice.Slice) ImmutableMap(com.google.common.collect.ImmutableMap) BasicSliceInput(io.airlift.slice.BasicSliceInput)

Example 3 with StripeStreamId

use of com.facebook.presto.orc.StripeReader.StripeStreamId in project presto by prestodb.

the class AbstractTestOrcReader method testCaching.

@Test
public void testCaching() throws Exception {
    Cache<OrcDataSourceId, OrcFileTail> orcFileTailCache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, tail) -> ((OrcFileTail) tail).getFooterSize() + ((OrcFileTail) tail).getMetadataSize()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
    OrcFileTailSource orcFileTailSource = new CachingOrcFileTailSource(new StorageOrcFileTailSource(), orcFileTailCache);
    Cache<StripeId, Slice> stripeFootercache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, footer) -> ((Slice) footer).length()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
    Cache<StripeStreamId, Slice> stripeStreamCache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, stream) -> ((Slice) stream).length()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
    Optional<Cache<StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build());
    StripeMetadataSource stripeMetadataSource = new CachingStripeMetadataSource(new StorageStripeMetadataSource(), stripeFootercache, stripeStreamCache, rowGroupIndexCache);
    try (TempFile tempFile = createTempFile(10001)) {
        OrcBatchRecordReader storageReader = createCustomOrcRecordReader(tempFile, ORC, OrcPredicate.TRUE, ImmutableList.of(BIGINT), INITIAL_BATCH_SIZE, orcFileTailSource, stripeMetadataSource, true, ImmutableMap.of(), false);
        assertEquals(orcFileTailCache.stats().missCount(), 1);
        assertEquals(orcFileTailCache.stats().hitCount(), 0);
        OrcBatchRecordReader cacheReader = createCustomOrcRecordReader(tempFile, ORC, OrcPredicate.TRUE, ImmutableList.of(BIGINT), INITIAL_BATCH_SIZE, orcFileTailSource, stripeMetadataSource, true, ImmutableMap.of(), false);
        assertEquals(orcFileTailCache.stats().missCount(), 1);
        assertEquals(orcFileTailCache.stats().hitCount(), 1);
        assertEquals(storageReader.getRetainedSizeInBytes(), cacheReader.getRetainedSizeInBytes());
        assertEquals(storageReader.getFileRowCount(), cacheReader.getFileRowCount());
        assertEquals(storageReader.getSplitLength(), cacheReader.getSplitLength());
        storageReader.nextBatch();
        assertEquals(stripeFootercache.stats().missCount(), 1);
        assertEquals(stripeFootercache.stats().hitCount(), 0);
        assertEquals(stripeStreamCache.stats().missCount(), 2);
        assertEquals(stripeStreamCache.stats().hitCount(), 0);
        assertEquals(rowGroupIndexCache.get().stats().missCount(), 1);
        assertEquals(rowGroupIndexCache.get().stats().hitCount(), 0);
        cacheReader.nextBatch();
        assertEquals(stripeFootercache.stats().missCount(), 1);
        assertEquals(stripeFootercache.stats().hitCount(), 1);
        assertEquals(stripeStreamCache.stats().missCount(), 2);
        assertEquals(stripeStreamCache.stats().hitCount(), 2);
        assertEquals(rowGroupIndexCache.get().stats().missCount(), 1);
        assertEquals(rowGroupIndexCache.get().stats().hitCount(), 1);
        assertEquals(storageReader.readBlock(0).getInt(0), cacheReader.readBlock(0).getInt(0));
    }
}
Also used : RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) DateTimeZone(org.joda.time.DateTimeZone) StripeId(com.facebook.presto.orc.StripeReader.StripeId) Writable(org.apache.hadoop.io.Writable) Test(org.testng.annotations.Test) Random(java.util.Random) ORC_12(com.facebook.presto.orc.OrcTester.Format.ORC_12) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Duration(io.airlift.units.Duration) SESSION(com.facebook.presto.testing.TestingConnectorSession.SESSION) Iterables.concat(com.google.common.collect.Iterables.concat) Iterables.cycle(com.google.common.collect.Iterables.cycle) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) HIVE_STORAGE_TIME_ZONE(com.facebook.presto.orc.OrcTester.HIVE_STORAGE_TIME_ZONE) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcTester.createOrcRecordWriter(com.facebook.presto.orc.OrcTester.createOrcRecordWriter) BigInteger(java.math.BigInteger) SqlDecimal(com.facebook.presto.common.type.SqlDecimal) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Collections.nCopies(java.util.Collections.nCopies) BeforeClass(org.testng.annotations.BeforeClass) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) Range(com.google.common.collect.Range) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) Iterables.limit(com.google.common.collect.Iterables.limit) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Collectors(java.util.stream.Collectors) ORC(com.facebook.presto.orc.OrcEncoding.ORC) DataSize(io.airlift.units.DataSize) List(java.util.List) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) OrcTester.createCustomOrcRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcRecordReader) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) Builder(com.google.common.collect.ImmutableList.Builder) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) DecimalType(com.facebook.presto.common.type.DecimalType) ContiguousSet(com.google.common.collect.ContiguousSet) Slice(io.airlift.slice.Slice) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) DateTimeTestingUtils.sqlTimestampOf(com.facebook.presto.testing.DateTimeTestingUtils.sqlTimestampOf) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) Assert.assertEquals(org.testng.Assert.assertEquals) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) MINUTES(java.util.concurrent.TimeUnit.MINUTES) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) Strings(com.google.common.base.Strings) SqlDate(com.facebook.presto.common.type.SqlDate) ImmutableList(com.google.common.collect.ImmutableList) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) SqlVarbinary(com.facebook.presto.common.type.SqlVarbinary) DiscreteDomain(com.google.common.collect.DiscreteDomain) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CharType(com.facebook.presto.common.type.CharType) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) Iterator(java.util.Iterator) UTF_8(java.nio.charset.StandardCharsets.UTF_8) AbstractIterator(com.google.common.collect.AbstractIterator) IOException(java.io.IOException) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Collectors.toList(java.util.stream.Collectors.toList) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) Serializer(org.apache.hadoop.hive.serde2.Serializer) Cache(com.google.common.cache.Cache) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) Collections(java.util.Collections) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Duration(io.airlift.units.Duration) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) StripeId(com.facebook.presto.orc.StripeReader.StripeId) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) DataSize(io.airlift.units.DataSize) List(java.util.List) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) Cache(com.google.common.cache.Cache) Test(org.testng.annotations.Test)

Example 4 with StripeStreamId

use of com.facebook.presto.orc.StripeReader.StripeStreamId in project presto by prestodb.

the class HiveClientModule method createStripeMetadataSourceFactory.

@Singleton
@Provides
public StripeMetadataSourceFactory createStripeMetadataSourceFactory(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
    StripeMetadataSource stripeMetadataSource = new StorageStripeMetadataSource();
    if (orcCacheConfig.isStripeMetadataCacheEnabled()) {
        Cache<StripeId, Slice> footerCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeFooterCacheSize().toBytes()).weigher((id, footer) -> toIntExact(((Slice) footer).getRetainedSize())).expireAfterAccess(orcCacheConfig.getStripeFooterCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
        Cache<StripeStreamId, Slice> streamCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeStreamCacheSize().toBytes()).weigher((id, stream) -> toIntExact(((Slice) stream).getRetainedSize())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
        CacheStatsMBean footerCacheStatsMBean = new CacheStatsMBean(footerCache);
        CacheStatsMBean streamCacheStatsMBean = new CacheStatsMBean(streamCache);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeFooter"), footerCacheStatsMBean);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStream"), streamCacheStatsMBean);
        Optional<Cache<StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.empty();
        if (orcCacheConfig.isRowGroupIndexCacheEnabled()) {
            rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getRowGroupIndexCacheSize().toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build());
            CacheStatsMBean rowGroupIndexCacheStatsMBean = new CacheStatsMBean(rowGroupIndexCache.get());
            exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStreamRowGroupIndex"), rowGroupIndexCacheStatsMBean);
        }
        stripeMetadataSource = new CachingStripeMetadataSource(stripeMetadataSource, footerCache, streamCache, rowGroupIndexCache);
    }
    StripeMetadataSourceFactory factory = StripeMetadataSourceFactory.of(stripeMetadataSource);
    if (orcCacheConfig.isDwrfStripeCacheEnabled()) {
        factory = new DwrfAwareStripeMetadataSourceFactory(factory);
    }
    return factory;
}
Also used : StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) Module(com.google.inject.Module) PrestoS3ClientFactory(com.facebook.presto.hive.s3.PrestoS3ClientFactory) StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) StripeId(com.facebook.presto.orc.StripeReader.StripeId) OrcCacheConfig(com.facebook.presto.orc.cache.OrcCacheConfig) ConnectorNodePartitioningProvider(com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider) DwrfSelectivePageSourceFactory(com.facebook.presto.hive.orc.DwrfSelectivePageSourceFactory) EncryptionLibrary(com.facebook.presto.orc.EncryptionLibrary) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) ExportBinder.newExporter(org.weakref.jmx.guice.ExportBinder.newExporter) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) Multibinder.newSetBinder(com.google.inject.multibindings.Multibinder.newSetBinder) OrcSelectivePageSourceFactory(com.facebook.presto.hive.orc.OrcSelectivePageSourceFactory) TupleDomainFilterCache(com.facebook.presto.hive.orc.TupleDomainFilterCache) EventClient(com.facebook.airlift.event.client.EventClient) ParquetFileMetadata(com.facebook.presto.parquet.cache.ParquetFileMetadata) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PageFileWriterFactory(com.facebook.presto.hive.pagefile.PageFileWriterFactory) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ConnectorSplitManager(com.facebook.presto.spi.connector.ConnectorSplitManager) ParquetMetadataSource(com.facebook.presto.parquet.cache.ParquetMetadataSource) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Multibinder(com.google.inject.multibindings.Multibinder) Threads.daemonThreadsNamed(com.facebook.airlift.concurrent.Threads.daemonThreadsNamed) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) List(java.util.List) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) Optional(java.util.Optional) ParquetCacheConfig(com.facebook.presto.parquet.cache.ParquetCacheConfig) CacheBuilder(com.google.common.cache.CacheBuilder) TypeLiteral(com.google.inject.TypeLiteral) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) MoreExecutors.listeningDecorator(com.google.common.util.concurrent.MoreExecutors.listeningDecorator) ParquetSelectivePageSourceFactory(com.facebook.presto.hive.parquet.ParquetSelectivePageSourceFactory) Slice(io.airlift.slice.Slice) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) HivePlanOptimizerProvider(com.facebook.presto.hive.rule.HivePlanOptimizerProvider) DataSinkFactory(com.facebook.presto.hive.datasink.DataSinkFactory) Singleton(javax.inject.Singleton) Supplier(java.util.function.Supplier) JsonCodecBinder.jsonCodecBinder(com.facebook.airlift.json.JsonCodecBinder.jsonCodecBinder) ConnectorPlanOptimizerProvider(com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider) BoundedExecutor(com.facebook.airlift.concurrent.BoundedExecutor) Binder(com.google.inject.Binder) PageFilePageSourceFactory(com.facebook.presto.hive.pagefile.PageFilePageSourceFactory) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) ParquetDataSourceId(com.facebook.presto.parquet.ParquetDataSourceId) ExecutorServiceAdapter(com.facebook.airlift.concurrent.ExecutorServiceAdapter) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) ConnectorPageSinkProvider(com.facebook.presto.spi.connector.ConnectorPageSinkProvider) ExecutorService(java.util.concurrent.ExecutorService) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) ObjectNames.generatedNameOf(org.weakref.jmx.ObjectNames.generatedNameOf) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) ForUnknown(com.facebook.presto.hive.HiveDwrfEncryptionProvider.ForUnknown) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) UnsupportedEncryptionLibrary(com.facebook.presto.orc.UnsupportedEncryptionLibrary) Scopes(com.google.inject.Scopes) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) HiveCachingHdfsConfiguration(com.facebook.presto.hive.cache.HiveCachingHdfsConfiguration) HivePartitionMutator(com.facebook.presto.hive.metastore.HivePartitionMutator) CachingParquetMetadataSource(com.facebook.presto.parquet.cache.CachingParquetMetadataSource) ConnectorMetadataUpdaterProvider(com.facebook.presto.spi.connector.ConnectorMetadataUpdaterProvider) SmileCodecBinder.smileCodecBinder(com.facebook.airlift.json.smile.SmileCodecBinder.smileCodecBinder) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Provides(com.google.inject.Provides) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) ForCachingFileSystem(com.facebook.presto.cache.ForCachingFileSystem) ConfigBinder.configBinder(com.facebook.airlift.configuration.ConfigBinder.configBinder) ForCryptoService(com.facebook.presto.hive.HiveDwrfEncryptionProvider.ForCryptoService) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) Cache(com.google.common.cache.Cache) MBeanExporter(org.weakref.jmx.MBeanExporter) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StripeId(com.facebook.presto.orc.StripeReader.StripeId) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) TupleDomainFilterCache(com.facebook.presto.hive.orc.TupleDomainFilterCache) Cache(com.google.common.cache.Cache) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Example 5 with StripeStreamId

use of com.facebook.presto.orc.StripeReader.StripeStreamId in project presto by prestodb.

the class StorageModule method createStripeMetadataSourceFactory.

@Singleton
@Provides
public StripeMetadataSourceFactory createStripeMetadataSourceFactory(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
    StripeMetadataSource stripeMetadataSource = new StorageStripeMetadataSource();
    if (orcCacheConfig.isStripeMetadataCacheEnabled()) {
        Cache<StripeId, Slice> footerCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeFooterCacheSize().toBytes()).weigher((id, footer) -> ((Slice) footer).length()).expireAfterAccess(orcCacheConfig.getStripeFooterCacheTtlSinceLastAccess().toMillis(), TimeUnit.MILLISECONDS).recordStats().build();
        Cache<StripeStreamId, Slice> streamCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeStreamCacheSize().toBytes()).weigher((id, stream) -> ((Slice) stream).length()).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), TimeUnit.MILLISECONDS).recordStats().build();
        CacheStatsMBean footerCacheStatsMBean = new CacheStatsMBean(footerCache);
        CacheStatsMBean streamCacheStatsMBean = new CacheStatsMBean(streamCache);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeFooter"), footerCacheStatsMBean);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStream"), streamCacheStatsMBean);
        Optional<Cache<StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.empty();
        if (orcCacheConfig.isRowGroupIndexCacheEnabled()) {
            rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getRowGroupIndexCacheSize().toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build());
            CacheStatsMBean rowGroupIndexCacheStatsMBean = new CacheStatsMBean(rowGroupIndexCache.get());
            exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStreamRowGroupIndex"), rowGroupIndexCacheStatsMBean);
        }
        stripeMetadataSource = new CachingStripeMetadataSource(stripeMetadataSource, footerCache, streamCache, rowGroupIndexCache);
    }
    StripeMetadataSourceFactory factory = StripeMetadataSourceFactory.of(stripeMetadataSource);
    if (orcCacheConfig.isDwrfStripeCacheEnabled()) {
        factory = new DwrfAwareStripeMetadataSourceFactory(factory);
    }
    return factory;
}
Also used : StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) Module(com.google.inject.Module) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) ShardOrganizer(com.facebook.presto.raptor.storage.organization.ShardOrganizer) StripeId(com.facebook.presto.orc.StripeReader.StripeId) OrcCacheConfig(com.facebook.presto.orc.cache.OrcCacheConfig) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) ExportBinder.newExporter(org.weakref.jmx.guice.ExportBinder.newExporter) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) ShardManager(com.facebook.presto.raptor.metadata.ShardManager) DatabaseShardRecorder(com.facebook.presto.raptor.metadata.DatabaseShardRecorder) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Ticker(com.google.common.base.Ticker) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) List(java.util.List) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) BackupManager(com.facebook.presto.raptor.backup.BackupManager) AssignmentLimiter(com.facebook.presto.raptor.metadata.AssignmentLimiter) ShardCleanerConfig(com.facebook.presto.raptor.metadata.ShardCleanerConfig) Slice(io.airlift.slice.Slice) MetadataConfig(com.facebook.presto.raptor.metadata.MetadataConfig) Singleton(javax.inject.Singleton) ShardCompactor(com.facebook.presto.raptor.storage.organization.ShardCompactor) Binder(com.google.inject.Binder) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) Objects.requireNonNull(java.util.Objects.requireNonNull) ShardCompactionManager(com.facebook.presto.raptor.storage.organization.ShardCompactionManager) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) ObjectNames.generatedNameOf(org.weakref.jmx.ObjectNames.generatedNameOf) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) ShardCleaner(com.facebook.presto.raptor.metadata.ShardCleaner) OrganizationJobFactory(com.facebook.presto.raptor.storage.organization.OrganizationJobFactory) Scopes(com.google.inject.Scopes) ShardOrganizationManager(com.facebook.presto.raptor.storage.organization.ShardOrganizationManager) ShardRecorder(com.facebook.presto.raptor.metadata.ShardRecorder) TimeUnit(java.util.concurrent.TimeUnit) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Provides(com.google.inject.Provides) DatabaseShardManager(com.facebook.presto.raptor.metadata.DatabaseShardManager) ConfigBinder.configBinder(com.facebook.airlift.configuration.ConfigBinder.configBinder) JobFactory(com.facebook.presto.raptor.storage.organization.JobFactory) TemporalFunction(com.facebook.presto.raptor.storage.organization.TemporalFunction) Cache(com.google.common.cache.Cache) MBeanExporter(org.weakref.jmx.MBeanExporter) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StripeId(com.facebook.presto.orc.StripeReader.StripeId) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) Cache(com.google.common.cache.Cache) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Aggregations

StripeStreamId (com.facebook.presto.orc.StripeReader.StripeStreamId)5 RowGroupIndex (com.facebook.presto.orc.metadata.RowGroupIndex)4 Slice (io.airlift.slice.Slice)4 StripeId (com.facebook.presto.orc.StripeReader.StripeId)3 CachingOrcFileTailSource (com.facebook.presto.orc.cache.CachingOrcFileTailSource)3 OrcFileTailSource (com.facebook.presto.orc.cache.OrcFileTailSource)3 StorageOrcFileTailSource (com.facebook.presto.orc.cache.StorageOrcFileTailSource)3 OrcFileTail (com.facebook.presto.orc.metadata.OrcFileTail)3 Cache (com.google.common.cache.Cache)3 CacheBuilder (com.google.common.cache.CacheBuilder)3 ConfigBinder.configBinder (com.facebook.airlift.configuration.ConfigBinder.configBinder)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 Math.toIntExact (java.lang.Math.toIntExact)2 List (java.util.List)2 Optional (java.util.Optional)2 MILLISECONDS (java.util.concurrent.TimeUnit.MILLISECONDS)2 BoundedExecutor (com.facebook.airlift.concurrent.BoundedExecutor)1 ExecutorServiceAdapter (com.facebook.airlift.concurrent.ExecutorServiceAdapter)1 Threads.daemonThreadsNamed (com.facebook.airlift.concurrent.Threads.daemonThreadsNamed)1 EventClient (com.facebook.airlift.event.client.EventClient)1