Search in sources :

Example 1 with OrcFileTailSource

use of com.facebook.presto.orc.cache.OrcFileTailSource in project presto by prestodb.

the class StorageModule method createOrcFileTailSource.

@Singleton
@Provides
public OrcFileTailSource createOrcFileTailSource(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
    int expectedFileTailSizeInBytes = toIntExact(orcCacheConfig.getExpectedFileTailSize().toBytes());
    boolean dwrfStripeCacheEnabled = orcCacheConfig.isDwrfStripeCacheEnabled();
    OrcFileTailSource orcFileTailSource = new StorageOrcFileTailSource(expectedFileTailSizeInBytes, dwrfStripeCacheEnabled);
    if (orcCacheConfig.isFileTailCacheEnabled()) {
        Cache<OrcDataSourceId, OrcFileTail> cache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getFileTailCacheSize().toBytes()).weigher((id, tail) -> ((OrcFileTail) tail).getTotalSize()).expireAfterAccess(orcCacheConfig.getFileTailCacheTtlSinceLastAccess().toMillis(), TimeUnit.MILLISECONDS).recordStats().build();
        CacheStatsMBean cacheStatsMBean = new CacheStatsMBean(cache);
        orcFileTailSource = new CachingOrcFileTailSource(orcFileTailSource, cache);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_OrcFileTail"), cacheStatsMBean);
    }
    return orcFileTailSource;
}
Also used : Module(com.google.inject.Module) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) ShardOrganizer(com.facebook.presto.raptor.storage.organization.ShardOrganizer) StripeId(com.facebook.presto.orc.StripeReader.StripeId) OrcCacheConfig(com.facebook.presto.orc.cache.OrcCacheConfig) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) ExportBinder.newExporter(org.weakref.jmx.guice.ExportBinder.newExporter) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) ShardManager(com.facebook.presto.raptor.metadata.ShardManager) DatabaseShardRecorder(com.facebook.presto.raptor.metadata.DatabaseShardRecorder) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Ticker(com.google.common.base.Ticker) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) List(java.util.List) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) BackupManager(com.facebook.presto.raptor.backup.BackupManager) AssignmentLimiter(com.facebook.presto.raptor.metadata.AssignmentLimiter) ShardCleanerConfig(com.facebook.presto.raptor.metadata.ShardCleanerConfig) Slice(io.airlift.slice.Slice) MetadataConfig(com.facebook.presto.raptor.metadata.MetadataConfig) Singleton(javax.inject.Singleton) ShardCompactor(com.facebook.presto.raptor.storage.organization.ShardCompactor) Binder(com.google.inject.Binder) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) Objects.requireNonNull(java.util.Objects.requireNonNull) ShardCompactionManager(com.facebook.presto.raptor.storage.organization.ShardCompactionManager) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) ObjectNames.generatedNameOf(org.weakref.jmx.ObjectNames.generatedNameOf) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) ShardCleaner(com.facebook.presto.raptor.metadata.ShardCleaner) OrganizationJobFactory(com.facebook.presto.raptor.storage.organization.OrganizationJobFactory) Scopes(com.google.inject.Scopes) ShardOrganizationManager(com.facebook.presto.raptor.storage.organization.ShardOrganizationManager) ShardRecorder(com.facebook.presto.raptor.metadata.ShardRecorder) TimeUnit(java.util.concurrent.TimeUnit) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Provides(com.google.inject.Provides) DatabaseShardManager(com.facebook.presto.raptor.metadata.DatabaseShardManager) ConfigBinder.configBinder(com.facebook.airlift.configuration.ConfigBinder.configBinder) JobFactory(com.facebook.presto.raptor.storage.organization.JobFactory) TemporalFunction(com.facebook.presto.raptor.storage.organization.TemporalFunction) Cache(com.google.common.cache.Cache) MBeanExporter(org.weakref.jmx.MBeanExporter) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Example 2 with OrcFileTailSource

use of com.facebook.presto.orc.cache.OrcFileTailSource in project presto by prestodb.

the class AbstractTestOrcReader method testCaching.

@Test
public void testCaching() throws Exception {
    Cache<OrcDataSourceId, OrcFileTail> orcFileTailCache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, tail) -> ((OrcFileTail) tail).getFooterSize() + ((OrcFileTail) tail).getMetadataSize()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
    OrcFileTailSource orcFileTailSource = new CachingOrcFileTailSource(new StorageOrcFileTailSource(), orcFileTailCache);
    Cache<StripeId, Slice> stripeFootercache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, footer) -> ((Slice) footer).length()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
    Cache<StripeStreamId, Slice> stripeStreamCache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, stream) -> ((Slice) stream).length()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
    Optional<Cache<StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build());
    StripeMetadataSource stripeMetadataSource = new CachingStripeMetadataSource(new StorageStripeMetadataSource(), stripeFootercache, stripeStreamCache, rowGroupIndexCache);
    try (TempFile tempFile = createTempFile(10001)) {
        OrcBatchRecordReader storageReader = createCustomOrcRecordReader(tempFile, ORC, OrcPredicate.TRUE, ImmutableList.of(BIGINT), INITIAL_BATCH_SIZE, orcFileTailSource, stripeMetadataSource, true, ImmutableMap.of(), false);
        assertEquals(orcFileTailCache.stats().missCount(), 1);
        assertEquals(orcFileTailCache.stats().hitCount(), 0);
        OrcBatchRecordReader cacheReader = createCustomOrcRecordReader(tempFile, ORC, OrcPredicate.TRUE, ImmutableList.of(BIGINT), INITIAL_BATCH_SIZE, orcFileTailSource, stripeMetadataSource, true, ImmutableMap.of(), false);
        assertEquals(orcFileTailCache.stats().missCount(), 1);
        assertEquals(orcFileTailCache.stats().hitCount(), 1);
        assertEquals(storageReader.getRetainedSizeInBytes(), cacheReader.getRetainedSizeInBytes());
        assertEquals(storageReader.getFileRowCount(), cacheReader.getFileRowCount());
        assertEquals(storageReader.getSplitLength(), cacheReader.getSplitLength());
        storageReader.nextBatch();
        assertEquals(stripeFootercache.stats().missCount(), 1);
        assertEquals(stripeFootercache.stats().hitCount(), 0);
        assertEquals(stripeStreamCache.stats().missCount(), 2);
        assertEquals(stripeStreamCache.stats().hitCount(), 0);
        assertEquals(rowGroupIndexCache.get().stats().missCount(), 1);
        assertEquals(rowGroupIndexCache.get().stats().hitCount(), 0);
        cacheReader.nextBatch();
        assertEquals(stripeFootercache.stats().missCount(), 1);
        assertEquals(stripeFootercache.stats().hitCount(), 1);
        assertEquals(stripeStreamCache.stats().missCount(), 2);
        assertEquals(stripeStreamCache.stats().hitCount(), 2);
        assertEquals(rowGroupIndexCache.get().stats().missCount(), 1);
        assertEquals(rowGroupIndexCache.get().stats().hitCount(), 1);
        assertEquals(storageReader.readBlock(0).getInt(0), cacheReader.readBlock(0).getInt(0));
    }
}
Also used : RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) DateTimeZone(org.joda.time.DateTimeZone) StripeId(com.facebook.presto.orc.StripeReader.StripeId) Writable(org.apache.hadoop.io.Writable) Test(org.testng.annotations.Test) Random(java.util.Random) ORC_12(com.facebook.presto.orc.OrcTester.Format.ORC_12) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Duration(io.airlift.units.Duration) SESSION(com.facebook.presto.testing.TestingConnectorSession.SESSION) Iterables.concat(com.google.common.collect.Iterables.concat) Iterables.cycle(com.google.common.collect.Iterables.cycle) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) HIVE_STORAGE_TIME_ZONE(com.facebook.presto.orc.OrcTester.HIVE_STORAGE_TIME_ZONE) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcTester.createOrcRecordWriter(com.facebook.presto.orc.OrcTester.createOrcRecordWriter) BigInteger(java.math.BigInteger) SqlDecimal(com.facebook.presto.common.type.SqlDecimal) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Collections.nCopies(java.util.Collections.nCopies) BeforeClass(org.testng.annotations.BeforeClass) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) Range(com.google.common.collect.Range) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) Iterables.limit(com.google.common.collect.Iterables.limit) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Collectors(java.util.stream.Collectors) ORC(com.facebook.presto.orc.OrcEncoding.ORC) DataSize(io.airlift.units.DataSize) List(java.util.List) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) OrcTester.createCustomOrcRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcRecordReader) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) Builder(com.google.common.collect.ImmutableList.Builder) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) DecimalType(com.facebook.presto.common.type.DecimalType) ContiguousSet(com.google.common.collect.ContiguousSet) Slice(io.airlift.slice.Slice) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) DateTimeTestingUtils.sqlTimestampOf(com.facebook.presto.testing.DateTimeTestingUtils.sqlTimestampOf) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) Assert.assertEquals(org.testng.Assert.assertEquals) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) MINUTES(java.util.concurrent.TimeUnit.MINUTES) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) Strings(com.google.common.base.Strings) SqlDate(com.facebook.presto.common.type.SqlDate) ImmutableList(com.google.common.collect.ImmutableList) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) SqlVarbinary(com.facebook.presto.common.type.SqlVarbinary) DiscreteDomain(com.google.common.collect.DiscreteDomain) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CharType(com.facebook.presto.common.type.CharType) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) Iterator(java.util.Iterator) UTF_8(java.nio.charset.StandardCharsets.UTF_8) AbstractIterator(com.google.common.collect.AbstractIterator) IOException(java.io.IOException) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Collectors.toList(java.util.stream.Collectors.toList) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) Serializer(org.apache.hadoop.hive.serde2.Serializer) Cache(com.google.common.cache.Cache) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) Collections(java.util.Collections) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Duration(io.airlift.units.Duration) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) StripeId(com.facebook.presto.orc.StripeReader.StripeId) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) DataSize(io.airlift.units.DataSize) List(java.util.List) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) Cache(com.google.common.cache.Cache) Test(org.testng.annotations.Test)

Example 3 with OrcFileTailSource

use of com.facebook.presto.orc.cache.OrcFileTailSource in project presto by prestodb.

the class HiveClientModule method createOrcFileTailSource.

@Singleton
@Provides
public OrcFileTailSource createOrcFileTailSource(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
    int expectedFileTailSizeInBytes = toIntExact(orcCacheConfig.getExpectedFileTailSize().toBytes());
    boolean dwrfStripeCacheEnabled = orcCacheConfig.isDwrfStripeCacheEnabled();
    OrcFileTailSource orcFileTailSource = new StorageOrcFileTailSource(expectedFileTailSizeInBytes, dwrfStripeCacheEnabled);
    if (orcCacheConfig.isFileTailCacheEnabled()) {
        Cache<OrcDataSourceId, OrcFileTail> cache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getFileTailCacheSize().toBytes()).weigher((id, tail) -> ((OrcFileTail) tail).getFooterSize() + ((OrcFileTail) tail).getMetadataSize()).expireAfterAccess(orcCacheConfig.getFileTailCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
        CacheStatsMBean cacheStatsMBean = new CacheStatsMBean(cache);
        orcFileTailSource = new CachingOrcFileTailSource(orcFileTailSource, cache);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_OrcFileTail"), cacheStatsMBean);
    }
    return orcFileTailSource;
}
Also used : Module(com.google.inject.Module) PrestoS3ClientFactory(com.facebook.presto.hive.s3.PrestoS3ClientFactory) StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) StripeId(com.facebook.presto.orc.StripeReader.StripeId) OrcCacheConfig(com.facebook.presto.orc.cache.OrcCacheConfig) ConnectorNodePartitioningProvider(com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider) DwrfSelectivePageSourceFactory(com.facebook.presto.hive.orc.DwrfSelectivePageSourceFactory) EncryptionLibrary(com.facebook.presto.orc.EncryptionLibrary) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) ExportBinder.newExporter(org.weakref.jmx.guice.ExportBinder.newExporter) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) Multibinder.newSetBinder(com.google.inject.multibindings.Multibinder.newSetBinder) OrcSelectivePageSourceFactory(com.facebook.presto.hive.orc.OrcSelectivePageSourceFactory) TupleDomainFilterCache(com.facebook.presto.hive.orc.TupleDomainFilterCache) EventClient(com.facebook.airlift.event.client.EventClient) ParquetFileMetadata(com.facebook.presto.parquet.cache.ParquetFileMetadata) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PageFileWriterFactory(com.facebook.presto.hive.pagefile.PageFileWriterFactory) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ConnectorSplitManager(com.facebook.presto.spi.connector.ConnectorSplitManager) ParquetMetadataSource(com.facebook.presto.parquet.cache.ParquetMetadataSource) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Multibinder(com.google.inject.multibindings.Multibinder) Threads.daemonThreadsNamed(com.facebook.airlift.concurrent.Threads.daemonThreadsNamed) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) List(java.util.List) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) Optional(java.util.Optional) ParquetCacheConfig(com.facebook.presto.parquet.cache.ParquetCacheConfig) CacheBuilder(com.google.common.cache.CacheBuilder) TypeLiteral(com.google.inject.TypeLiteral) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) MoreExecutors.listeningDecorator(com.google.common.util.concurrent.MoreExecutors.listeningDecorator) ParquetSelectivePageSourceFactory(com.facebook.presto.hive.parquet.ParquetSelectivePageSourceFactory) Slice(io.airlift.slice.Slice) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) HivePlanOptimizerProvider(com.facebook.presto.hive.rule.HivePlanOptimizerProvider) DataSinkFactory(com.facebook.presto.hive.datasink.DataSinkFactory) Singleton(javax.inject.Singleton) Supplier(java.util.function.Supplier) JsonCodecBinder.jsonCodecBinder(com.facebook.airlift.json.JsonCodecBinder.jsonCodecBinder) ConnectorPlanOptimizerProvider(com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider) BoundedExecutor(com.facebook.airlift.concurrent.BoundedExecutor) Binder(com.google.inject.Binder) PageFilePageSourceFactory(com.facebook.presto.hive.pagefile.PageFilePageSourceFactory) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) ParquetDataSourceId(com.facebook.presto.parquet.ParquetDataSourceId) ExecutorServiceAdapter(com.facebook.airlift.concurrent.ExecutorServiceAdapter) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) ConnectorPageSinkProvider(com.facebook.presto.spi.connector.ConnectorPageSinkProvider) ExecutorService(java.util.concurrent.ExecutorService) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) ObjectNames.generatedNameOf(org.weakref.jmx.ObjectNames.generatedNameOf) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) ForUnknown(com.facebook.presto.hive.HiveDwrfEncryptionProvider.ForUnknown) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) UnsupportedEncryptionLibrary(com.facebook.presto.orc.UnsupportedEncryptionLibrary) Scopes(com.google.inject.Scopes) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) HiveCachingHdfsConfiguration(com.facebook.presto.hive.cache.HiveCachingHdfsConfiguration) HivePartitionMutator(com.facebook.presto.hive.metastore.HivePartitionMutator) CachingParquetMetadataSource(com.facebook.presto.parquet.cache.CachingParquetMetadataSource) ConnectorMetadataUpdaterProvider(com.facebook.presto.spi.connector.ConnectorMetadataUpdaterProvider) SmileCodecBinder.smileCodecBinder(com.facebook.airlift.json.smile.SmileCodecBinder.smileCodecBinder) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Provides(com.google.inject.Provides) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) ForCachingFileSystem(com.facebook.presto.cache.ForCachingFileSystem) ConfigBinder.configBinder(com.facebook.airlift.configuration.ConfigBinder.configBinder) ForCryptoService(com.facebook.presto.hive.HiveDwrfEncryptionProvider.ForCryptoService) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) Cache(com.google.common.cache.Cache) MBeanExporter(org.weakref.jmx.MBeanExporter) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Example 4 with OrcFileTailSource

use of com.facebook.presto.orc.cache.OrcFileTailSource in project presto by prestodb.

the class IcebergModule method createOrcFileTailSource.

@Singleton
@Provides
public OrcFileTailSource createOrcFileTailSource(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
    int expectedFileTailSizeInBytes = toIntExact(orcCacheConfig.getExpectedFileTailSize().toBytes());
    boolean dwrfStripeCacheEnabled = orcCacheConfig.isDwrfStripeCacheEnabled();
    OrcFileTailSource orcFileTailSource = new StorageOrcFileTailSource(expectedFileTailSizeInBytes, dwrfStripeCacheEnabled);
    if (orcCacheConfig.isFileTailCacheEnabled()) {
        Cache<OrcDataSourceId, OrcFileTail> cache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getFileTailCacheSize().toBytes()).weigher((id, tail) -> ((OrcFileTail) tail).getFooterSize() + ((OrcFileTail) tail).getMetadataSize()).expireAfterAccess(orcCacheConfig.getFileTailCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
        CacheStatsMBean cacheStatsMBean = new CacheStatsMBean(cache);
        orcFileTailSource = new CachingOrcFileTailSource(orcFileTailSource, cache);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_OrcFileTail"), cacheStatsMBean);
    }
    return orcFileTailSource;
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) Module(com.google.inject.Module) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) TestingMBeanServer(org.weakref.jmx.testing.TestingMBeanServer) OrcCacheConfig(com.facebook.presto.orc.cache.OrcCacheConfig) CacheConfig(com.facebook.presto.cache.CacheConfig) ConnectorNodePartitioningProvider(com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider) ExtendedHiveMetastore(com.facebook.presto.hive.metastore.ExtendedHiveMetastore) EncryptionLibrary(com.facebook.presto.orc.EncryptionLibrary) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) ExportBinder.newExporter(org.weakref.jmx.guice.ExportBinder.newExporter) DynamicConfigurationProvider(com.facebook.presto.hive.DynamicConfigurationProvider) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) Multibinder.newSetBinder(com.google.inject.multibindings.Multibinder.newSetBinder) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) HiveHdfsConfiguration(com.facebook.presto.hive.HiveHdfsConfiguration) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ConnectorSplitManager(com.facebook.presto.spi.connector.ConnectorSplitManager) PartitionMutator(com.facebook.presto.hive.PartitionMutator) HiveDwrfEncryptionProvider(com.facebook.presto.hive.HiveDwrfEncryptionProvider) GcsConfigurationInitializer(com.facebook.presto.hive.gcs.GcsConfigurationInitializer) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Multibinder(com.google.inject.multibindings.Multibinder) HdfsConfigurationInitializer(com.facebook.presto.hive.HdfsConfigurationInitializer) Threads.daemonThreadsNamed(com.facebook.airlift.concurrent.Threads.daemonThreadsNamed) CacheFactory(com.facebook.presto.cache.CacheFactory) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) Procedure(com.facebook.presto.spi.procedure.Procedure) List(java.util.List) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StripeReader(com.facebook.presto.orc.StripeReader) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) MetastoreConfig(com.facebook.presto.hive.metastore.MetastoreConfig) FileMergeCacheConfig(com.facebook.presto.cache.filemerge.FileMergeCacheConfig) Slice(io.airlift.slice.Slice) HiveGcsConfigurationInitializer(com.facebook.presto.hive.gcs.HiveGcsConfigurationInitializer) ForCachingHiveMetastore(com.facebook.presto.hive.ForCachingHiveMetastore) ParquetFileWriterConfig(com.facebook.presto.hive.ParquetFileWriterConfig) Singleton(javax.inject.Singleton) JsonCodecBinder.jsonCodecBinder(com.facebook.airlift.json.JsonCodecBinder.jsonCodecBinder) HdfsConfiguration(com.facebook.presto.hive.HdfsConfiguration) Binder(com.google.inject.Binder) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) MBeanServer(javax.management.MBeanServer) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) ConnectorPageSinkProvider(com.facebook.presto.spi.connector.ConnectorPageSinkProvider) HiveNodePartitioningProvider(com.facebook.presto.hive.HiveNodePartitioningProvider) MetastoreClientConfig(com.facebook.presto.hive.MetastoreClientConfig) ExecutorService(java.util.concurrent.ExecutorService) ForMetastoreHdfsEnvironment(com.facebook.presto.hive.ForMetastoreHdfsEnvironment) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) CacheStats(com.facebook.presto.cache.CacheStats) ObjectNames.generatedNameOf(org.weakref.jmx.ObjectNames.generatedNameOf) IcebergPlanOptimizer(com.facebook.presto.iceberg.optimizer.IcebergPlanOptimizer) CachingHiveMetastore(com.facebook.presto.hive.metastore.CachingHiveMetastore) OrcFileWriterConfig(com.facebook.presto.hive.OrcFileWriterConfig) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) UnsupportedEncryptionLibrary(com.facebook.presto.orc.UnsupportedEncryptionLibrary) Scopes(com.google.inject.Scopes) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) HiveCachingHdfsConfiguration(com.facebook.presto.hive.cache.HiveCachingHdfsConfiguration) HivePartitionMutator(com.facebook.presto.hive.metastore.HivePartitionMutator) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Provides(com.google.inject.Provides) HiveGcsConfig(com.facebook.presto.hive.gcs.HiveGcsConfig) ForCachingFileSystem(com.facebook.presto.cache.ForCachingFileSystem) ConfigBinder.configBinder(com.facebook.airlift.configuration.ConfigBinder.configBinder) Cache(com.google.common.cache.Cache) MBeanExporter(org.weakref.jmx.MBeanExporter) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Example 5 with OrcFileTailSource

use of com.facebook.presto.orc.cache.OrcFileTailSource in project presto by prestodb.

the class OrcBatchPageSourceFactory method createOrcPageSource.

public static ConnectorPageSource createOrcPageSource(OrcEncoding orcEncoding, HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, long fileSize, List<HiveColumnHandle> columns, boolean useOrcColumnNames, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, StandardFunctionResolution functionResolution, DataSize maxBufferSize, DataSize streamBufferSize, boolean lazyReadSmallRanges, boolean orcBloomFiltersEnabled, FileFormatDataSourceStats stats, int domainCompactionThreshold, OrcFileTailSource orcFileTailSource, StripeMetadataSourceFactory stripeMetadataSourceFactory, HiveFileContext hiveFileContext, OrcReaderOptions orcReaderOptions, Optional<EncryptionInformation> encryptionInformation, DwrfEncryptionProvider dwrfEncryptionProvider) {
    checkArgument(domainCompactionThreshold >= 1, "domainCompactionThreshold must be at least 1");
    OrcDataSource orcDataSource;
    try {
        FSDataInputStream inputStream = hdfsEnvironment.getFileSystem(sessionUser, path, configuration).openFile(path, hiveFileContext);
        orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSize, orcReaderOptions.getMaxMergeDistance(), maxBufferSize, streamBufferSize, lazyReadSmallRanges, inputStream, stats);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    OrcAggregatedMemoryContext systemMemoryUsage = new HiveOrcAggregatedMemoryContext();
    try {
        DwrfKeyProvider dwrfKeyProvider = new ProjectionBasedDwrfKeyProvider(encryptionInformation, columns, useOrcColumnNames, path);
        OrcReader reader = new OrcReader(orcDataSource, orcEncoding, orcFileTailSource, stripeMetadataSourceFactory, new HiveOrcAggregatedMemoryContext(), orcReaderOptions, hiveFileContext.isCacheable(), dwrfEncryptionProvider, dwrfKeyProvider, hiveFileContext.getStats());
        List<HiveColumnHandle> physicalColumns = getPhysicalHiveColumnHandles(columns, useOrcColumnNames, reader.getTypes(), path);
        ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
        ImmutableList.Builder<ColumnReference<HiveColumnHandle>> columnReferences = ImmutableList.builder();
        for (HiveColumnHandle column : physicalColumns) {
            if (column.getColumnType() == REGULAR) {
                Type type = typeManager.getType(column.getTypeSignature());
                includedColumns.put(column.getHiveColumnIndex(), type);
                columnReferences.add(new ColumnReference<>(column, column.getHiveColumnIndex(), type));
            }
        }
        if (!physicalColumns.isEmpty() && physicalColumns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
            return new AggregatedOrcPageSource(physicalColumns, reader.getFooter(), typeManager, functionResolution);
        }
        OrcPredicate predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences.build(), orcBloomFiltersEnabled, Optional.of(domainCompactionThreshold));
        OrcBatchRecordReader recordReader = reader.createBatchRecordReader(includedColumns.build(), predicate, start, length, hiveStorageTimeZone, systemMemoryUsage, INITIAL_BATCH_SIZE);
        return new OrcBatchPageSource(recordReader, reader.getOrcDataSource(), physicalColumns, typeManager, systemMemoryUsage, stats, hiveFileContext.getStats());
    } catch (Exception e) {
        try {
            orcDataSource.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) DateTimeZone(org.joda.time.DateTimeZone) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) HiveSessionProperties.getOrcTinyStripeThreshold(com.facebook.presto.hive.HiveSessionProperties.getOrcTinyStripeThreshold) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) FixedPageSource(com.facebook.presto.spi.FixedPageSource) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) AGGREGATED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HiveSessionProperties.getOrcLazyReadSmallRanges(com.facebook.presto.hive.HiveSessionProperties.getOrcLazyReadSmallRanges) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) HiveSessionProperties.isOrcBloomFiltersEnabled(com.facebook.presto.hive.HiveSessionProperties.isOrcBloomFiltersEnabled) OrcDataSource(com.facebook.presto.orc.OrcDataSource) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableMap(com.google.common.collect.ImmutableMap) HiveSessionProperties.isOrcZstdJniDecompressionEnabled(com.facebook.presto.hive.HiveSessionProperties.isOrcZstdJniDecompressionEnabled) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) OrcPredicate(com.facebook.presto.orc.OrcPredicate) HiveFileContext(com.facebook.presto.hive.HiveFileContext) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) HiveBatchPageSourceFactory(com.facebook.presto.hive.HiveBatchPageSourceFactory) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) HiveSessionProperties.getOrcMaxMergeDistance(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxMergeDistance) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) PrestoException(com.facebook.presto.spi.PrestoException) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) HiveSessionProperties.getOrcMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxReadBlockSize) Inject(javax.inject.Inject) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) HiveSessionProperties.getOrcMaxBufferSize(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxBufferSize) Objects.requireNonNull(java.util.Objects.requireNonNull) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) Type(com.facebook.presto.common.type.Type) DwrfEncryptionProvider(com.facebook.presto.orc.DwrfEncryptionProvider) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) ColumnReference(com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference) Storage(com.facebook.presto.hive.metastore.Storage) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) OrcEncoding(com.facebook.presto.orc.OrcEncoding) HiveSessionProperties.getOrcStreamBufferSize(com.facebook.presto.hive.HiveSessionProperties.getOrcStreamBufferSize) NO_ENCRYPTION(com.facebook.presto.orc.DwrfEncryptionProvider.NO_ENCRYPTION) IOException(java.io.IOException) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) HiveUtil.getPhysicalHiveColumnHandles(com.facebook.presto.hive.HiveUtil.getPhysicalHiveColumnHandles) OrcReader(com.facebook.presto.orc.OrcReader) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) ImmutableList(com.google.common.collect.ImmutableList) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcDataSource(com.facebook.presto.orc.OrcDataSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap) Type(com.facebook.presto.common.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) OrcPredicate(com.facebook.presto.orc.OrcPredicate) ColumnReference(com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference)

Aggregations

OrcFileTailSource (com.facebook.presto.orc.cache.OrcFileTailSource)7 List (java.util.List)7 Optional (java.util.Optional)7 StripeMetadataSourceFactory (com.facebook.presto.orc.StripeMetadataSourceFactory)6 OrcDataSourceId (com.facebook.presto.orc.OrcDataSourceId)5 CachingOrcFileTailSource (com.facebook.presto.orc.cache.CachingOrcFileTailSource)4 StorageOrcFileTailSource (com.facebook.presto.orc.cache.StorageOrcFileTailSource)4 OrcFileTail (com.facebook.presto.orc.metadata.OrcFileTail)4 RowGroupIndex (com.facebook.presto.orc.metadata.RowGroupIndex)4 Cache (com.google.common.cache.Cache)4 CacheBuilder (com.google.common.cache.CacheBuilder)4 Slice (io.airlift.slice.Slice)4 Math.toIntExact (java.lang.Math.toIntExact)4 MILLISECONDS (java.util.concurrent.TimeUnit.MILLISECONDS)4 ConfigBinder.configBinder (com.facebook.airlift.configuration.ConfigBinder.configBinder)3 Type (com.facebook.presto.common.type.Type)3 TypeManager (com.facebook.presto.common.type.TypeManager)3 CachingStripeMetadataSource (com.facebook.presto.orc.CachingStripeMetadataSource)3 DwrfAwareStripeMetadataSourceFactory (com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory)3 DwrfKeyProvider (com.facebook.presto.orc.DwrfKeyProvider)3