Search in sources :

Example 1 with OrcFileTail

use of com.facebook.presto.orc.metadata.OrcFileTail in project presto by prestodb.

the class TestStorageOrcFileTailSource method testReadDwrfStripeCacheIfEnabledButAbsent.

@Test
public void testReadDwrfStripeCacheIfEnabledButAbsent() throws IOException {
    FileOutputStream out = new FileOutputStream(file.getFile());
    // write the footer and post script
    DwrfProto.Footer.Builder footer = DwrfProto.Footer.newBuilder();
    DwrfProto.PostScript.Builder postScript = DwrfProto.PostScript.newBuilder().setCompression(NONE);
    writeTail(footer, postScript, out);
    out.close();
    // read the file tail with the enabled "read dwrf stripe cache" feature
    StorageOrcFileTailSource src = new StorageOrcFileTailSource(FOOTER_READ_SIZE_IN_BYTES, true);
    OrcDataSource orcDataSource = createFileOrcDataSource();
    OrcFileTail orcFileTail = src.getOrcFileTail(orcDataSource, metadataReader, Optional.empty(), false);
    assertEquals(orcFileTail.getMetadataSize(), 0);
    DwrfProto.Footer actualFooter = readFooter(orcFileTail);
    assertEquals(actualFooter, footer.build());
    // the feature is enabled, but file doesn't have the stripe cache
    assertFalse(orcFileTail.getDwrfStripeCacheData().isPresent());
}
Also used : FileOutputStream(java.io.FileOutputStream) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) DwrfProto(com.facebook.presto.orc.proto.DwrfProto) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) Test(org.testng.annotations.Test)

Example 2 with OrcFileTail

use of com.facebook.presto.orc.metadata.OrcFileTail in project presto by prestodb.

the class StorageModule method createOrcFileTailSource.

@Singleton
@Provides
public OrcFileTailSource createOrcFileTailSource(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
    int expectedFileTailSizeInBytes = toIntExact(orcCacheConfig.getExpectedFileTailSize().toBytes());
    boolean dwrfStripeCacheEnabled = orcCacheConfig.isDwrfStripeCacheEnabled();
    OrcFileTailSource orcFileTailSource = new StorageOrcFileTailSource(expectedFileTailSizeInBytes, dwrfStripeCacheEnabled);
    if (orcCacheConfig.isFileTailCacheEnabled()) {
        Cache<OrcDataSourceId, OrcFileTail> cache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getFileTailCacheSize().toBytes()).weigher((id, tail) -> ((OrcFileTail) tail).getTotalSize()).expireAfterAccess(orcCacheConfig.getFileTailCacheTtlSinceLastAccess().toMillis(), TimeUnit.MILLISECONDS).recordStats().build();
        CacheStatsMBean cacheStatsMBean = new CacheStatsMBean(cache);
        orcFileTailSource = new CachingOrcFileTailSource(orcFileTailSource, cache);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_OrcFileTail"), cacheStatsMBean);
    }
    return orcFileTailSource;
}
Also used : Module(com.google.inject.Module) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) ShardOrganizer(com.facebook.presto.raptor.storage.organization.ShardOrganizer) StripeId(com.facebook.presto.orc.StripeReader.StripeId) OrcCacheConfig(com.facebook.presto.orc.cache.OrcCacheConfig) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) ExportBinder.newExporter(org.weakref.jmx.guice.ExportBinder.newExporter) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) ShardManager(com.facebook.presto.raptor.metadata.ShardManager) DatabaseShardRecorder(com.facebook.presto.raptor.metadata.DatabaseShardRecorder) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Ticker(com.google.common.base.Ticker) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) List(java.util.List) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) BackupManager(com.facebook.presto.raptor.backup.BackupManager) AssignmentLimiter(com.facebook.presto.raptor.metadata.AssignmentLimiter) ShardCleanerConfig(com.facebook.presto.raptor.metadata.ShardCleanerConfig) Slice(io.airlift.slice.Slice) MetadataConfig(com.facebook.presto.raptor.metadata.MetadataConfig) Singleton(javax.inject.Singleton) ShardCompactor(com.facebook.presto.raptor.storage.organization.ShardCompactor) Binder(com.google.inject.Binder) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) Objects.requireNonNull(java.util.Objects.requireNonNull) ShardCompactionManager(com.facebook.presto.raptor.storage.organization.ShardCompactionManager) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) ObjectNames.generatedNameOf(org.weakref.jmx.ObjectNames.generatedNameOf) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) ShardCleaner(com.facebook.presto.raptor.metadata.ShardCleaner) OrganizationJobFactory(com.facebook.presto.raptor.storage.organization.OrganizationJobFactory) Scopes(com.google.inject.Scopes) ShardOrganizationManager(com.facebook.presto.raptor.storage.organization.ShardOrganizationManager) ShardRecorder(com.facebook.presto.raptor.metadata.ShardRecorder) TimeUnit(java.util.concurrent.TimeUnit) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Provides(com.google.inject.Provides) DatabaseShardManager(com.facebook.presto.raptor.metadata.DatabaseShardManager) ConfigBinder.configBinder(com.facebook.airlift.configuration.ConfigBinder.configBinder) JobFactory(com.facebook.presto.raptor.storage.organization.JobFactory) TemporalFunction(com.facebook.presto.raptor.storage.organization.TemporalFunction) Cache(com.google.common.cache.Cache) MBeanExporter(org.weakref.jmx.MBeanExporter) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Example 3 with OrcFileTail

use of com.facebook.presto.orc.metadata.OrcFileTail in project presto by prestodb.

the class AbstractTestOrcReader method testCaching.

@Test
public void testCaching() throws Exception {
    Cache<OrcDataSourceId, OrcFileTail> orcFileTailCache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, tail) -> ((OrcFileTail) tail).getFooterSize() + ((OrcFileTail) tail).getMetadataSize()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
    OrcFileTailSource orcFileTailSource = new CachingOrcFileTailSource(new StorageOrcFileTailSource(), orcFileTailCache);
    Cache<StripeId, Slice> stripeFootercache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, footer) -> ((Slice) footer).length()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
    Cache<StripeStreamId, Slice> stripeStreamCache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, stream) -> ((Slice) stream).length()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
    Optional<Cache<StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build());
    StripeMetadataSource stripeMetadataSource = new CachingStripeMetadataSource(new StorageStripeMetadataSource(), stripeFootercache, stripeStreamCache, rowGroupIndexCache);
    try (TempFile tempFile = createTempFile(10001)) {
        OrcBatchRecordReader storageReader = createCustomOrcRecordReader(tempFile, ORC, OrcPredicate.TRUE, ImmutableList.of(BIGINT), INITIAL_BATCH_SIZE, orcFileTailSource, stripeMetadataSource, true, ImmutableMap.of(), false);
        assertEquals(orcFileTailCache.stats().missCount(), 1);
        assertEquals(orcFileTailCache.stats().hitCount(), 0);
        OrcBatchRecordReader cacheReader = createCustomOrcRecordReader(tempFile, ORC, OrcPredicate.TRUE, ImmutableList.of(BIGINT), INITIAL_BATCH_SIZE, orcFileTailSource, stripeMetadataSource, true, ImmutableMap.of(), false);
        assertEquals(orcFileTailCache.stats().missCount(), 1);
        assertEquals(orcFileTailCache.stats().hitCount(), 1);
        assertEquals(storageReader.getRetainedSizeInBytes(), cacheReader.getRetainedSizeInBytes());
        assertEquals(storageReader.getFileRowCount(), cacheReader.getFileRowCount());
        assertEquals(storageReader.getSplitLength(), cacheReader.getSplitLength());
        storageReader.nextBatch();
        assertEquals(stripeFootercache.stats().missCount(), 1);
        assertEquals(stripeFootercache.stats().hitCount(), 0);
        assertEquals(stripeStreamCache.stats().missCount(), 2);
        assertEquals(stripeStreamCache.stats().hitCount(), 0);
        assertEquals(rowGroupIndexCache.get().stats().missCount(), 1);
        assertEquals(rowGroupIndexCache.get().stats().hitCount(), 0);
        cacheReader.nextBatch();
        assertEquals(stripeFootercache.stats().missCount(), 1);
        assertEquals(stripeFootercache.stats().hitCount(), 1);
        assertEquals(stripeStreamCache.stats().missCount(), 2);
        assertEquals(stripeStreamCache.stats().hitCount(), 2);
        assertEquals(rowGroupIndexCache.get().stats().missCount(), 1);
        assertEquals(rowGroupIndexCache.get().stats().hitCount(), 1);
        assertEquals(storageReader.readBlock(0).getInt(0), cacheReader.readBlock(0).getInt(0));
    }
}
Also used : RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) DateTimeZone(org.joda.time.DateTimeZone) StripeId(com.facebook.presto.orc.StripeReader.StripeId) Writable(org.apache.hadoop.io.Writable) Test(org.testng.annotations.Test) Random(java.util.Random) ORC_12(com.facebook.presto.orc.OrcTester.Format.ORC_12) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Duration(io.airlift.units.Duration) SESSION(com.facebook.presto.testing.TestingConnectorSession.SESSION) Iterables.concat(com.google.common.collect.Iterables.concat) Iterables.cycle(com.google.common.collect.Iterables.cycle) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) HIVE_STORAGE_TIME_ZONE(com.facebook.presto.orc.OrcTester.HIVE_STORAGE_TIME_ZONE) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcTester.createOrcRecordWriter(com.facebook.presto.orc.OrcTester.createOrcRecordWriter) BigInteger(java.math.BigInteger) SqlDecimal(com.facebook.presto.common.type.SqlDecimal) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Collections.nCopies(java.util.Collections.nCopies) BeforeClass(org.testng.annotations.BeforeClass) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) Range(com.google.common.collect.Range) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) Iterables.limit(com.google.common.collect.Iterables.limit) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Collectors(java.util.stream.Collectors) ORC(com.facebook.presto.orc.OrcEncoding.ORC) DataSize(io.airlift.units.DataSize) List(java.util.List) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) OrcTester.createCustomOrcRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcRecordReader) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) Builder(com.google.common.collect.ImmutableList.Builder) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) DecimalType(com.facebook.presto.common.type.DecimalType) ContiguousSet(com.google.common.collect.ContiguousSet) Slice(io.airlift.slice.Slice) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) DateTimeTestingUtils.sqlTimestampOf(com.facebook.presto.testing.DateTimeTestingUtils.sqlTimestampOf) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) Assert.assertEquals(org.testng.Assert.assertEquals) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) MINUTES(java.util.concurrent.TimeUnit.MINUTES) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) Strings(com.google.common.base.Strings) SqlDate(com.facebook.presto.common.type.SqlDate) ImmutableList(com.google.common.collect.ImmutableList) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) SqlVarbinary(com.facebook.presto.common.type.SqlVarbinary) DiscreteDomain(com.google.common.collect.DiscreteDomain) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CharType(com.facebook.presto.common.type.CharType) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) Iterator(java.util.Iterator) UTF_8(java.nio.charset.StandardCharsets.UTF_8) AbstractIterator(com.google.common.collect.AbstractIterator) IOException(java.io.IOException) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Collectors.toList(java.util.stream.Collectors.toList) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) Serializer(org.apache.hadoop.hive.serde2.Serializer) Cache(com.google.common.cache.Cache) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) Collections(java.util.Collections) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Duration(io.airlift.units.Duration) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) StripeId(com.facebook.presto.orc.StripeReader.StripeId) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) DataSize(io.airlift.units.DataSize) List(java.util.List) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) Cache(com.google.common.cache.Cache) Test(org.testng.annotations.Test)

Example 4 with OrcFileTail

use of com.facebook.presto.orc.metadata.OrcFileTail in project presto by prestodb.

the class HiveClientModule method createOrcFileTailSource.

@Singleton
@Provides
public OrcFileTailSource createOrcFileTailSource(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
    int expectedFileTailSizeInBytes = toIntExact(orcCacheConfig.getExpectedFileTailSize().toBytes());
    boolean dwrfStripeCacheEnabled = orcCacheConfig.isDwrfStripeCacheEnabled();
    OrcFileTailSource orcFileTailSource = new StorageOrcFileTailSource(expectedFileTailSizeInBytes, dwrfStripeCacheEnabled);
    if (orcCacheConfig.isFileTailCacheEnabled()) {
        Cache<OrcDataSourceId, OrcFileTail> cache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getFileTailCacheSize().toBytes()).weigher((id, tail) -> ((OrcFileTail) tail).getFooterSize() + ((OrcFileTail) tail).getMetadataSize()).expireAfterAccess(orcCacheConfig.getFileTailCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
        CacheStatsMBean cacheStatsMBean = new CacheStatsMBean(cache);
        orcFileTailSource = new CachingOrcFileTailSource(orcFileTailSource, cache);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_OrcFileTail"), cacheStatsMBean);
    }
    return orcFileTailSource;
}
Also used : Module(com.google.inject.Module) PrestoS3ClientFactory(com.facebook.presto.hive.s3.PrestoS3ClientFactory) StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) StripeId(com.facebook.presto.orc.StripeReader.StripeId) OrcCacheConfig(com.facebook.presto.orc.cache.OrcCacheConfig) ConnectorNodePartitioningProvider(com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider) DwrfSelectivePageSourceFactory(com.facebook.presto.hive.orc.DwrfSelectivePageSourceFactory) EncryptionLibrary(com.facebook.presto.orc.EncryptionLibrary) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) ExportBinder.newExporter(org.weakref.jmx.guice.ExportBinder.newExporter) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) Multibinder.newSetBinder(com.google.inject.multibindings.Multibinder.newSetBinder) OrcSelectivePageSourceFactory(com.facebook.presto.hive.orc.OrcSelectivePageSourceFactory) TupleDomainFilterCache(com.facebook.presto.hive.orc.TupleDomainFilterCache) EventClient(com.facebook.airlift.event.client.EventClient) ParquetFileMetadata(com.facebook.presto.parquet.cache.ParquetFileMetadata) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PageFileWriterFactory(com.facebook.presto.hive.pagefile.PageFileWriterFactory) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ConnectorSplitManager(com.facebook.presto.spi.connector.ConnectorSplitManager) ParquetMetadataSource(com.facebook.presto.parquet.cache.ParquetMetadataSource) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Multibinder(com.google.inject.multibindings.Multibinder) Threads.daemonThreadsNamed(com.facebook.airlift.concurrent.Threads.daemonThreadsNamed) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) List(java.util.List) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) Optional(java.util.Optional) ParquetCacheConfig(com.facebook.presto.parquet.cache.ParquetCacheConfig) CacheBuilder(com.google.common.cache.CacheBuilder) TypeLiteral(com.google.inject.TypeLiteral) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) MoreExecutors.listeningDecorator(com.google.common.util.concurrent.MoreExecutors.listeningDecorator) ParquetSelectivePageSourceFactory(com.facebook.presto.hive.parquet.ParquetSelectivePageSourceFactory) Slice(io.airlift.slice.Slice) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) HivePlanOptimizerProvider(com.facebook.presto.hive.rule.HivePlanOptimizerProvider) DataSinkFactory(com.facebook.presto.hive.datasink.DataSinkFactory) Singleton(javax.inject.Singleton) Supplier(java.util.function.Supplier) JsonCodecBinder.jsonCodecBinder(com.facebook.airlift.json.JsonCodecBinder.jsonCodecBinder) ConnectorPlanOptimizerProvider(com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider) BoundedExecutor(com.facebook.airlift.concurrent.BoundedExecutor) Binder(com.google.inject.Binder) PageFilePageSourceFactory(com.facebook.presto.hive.pagefile.PageFilePageSourceFactory) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) ParquetDataSourceId(com.facebook.presto.parquet.ParquetDataSourceId) ExecutorServiceAdapter(com.facebook.airlift.concurrent.ExecutorServiceAdapter) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) ConnectorPageSinkProvider(com.facebook.presto.spi.connector.ConnectorPageSinkProvider) ExecutorService(java.util.concurrent.ExecutorService) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) ObjectNames.generatedNameOf(org.weakref.jmx.ObjectNames.generatedNameOf) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) ForUnknown(com.facebook.presto.hive.HiveDwrfEncryptionProvider.ForUnknown) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) UnsupportedEncryptionLibrary(com.facebook.presto.orc.UnsupportedEncryptionLibrary) Scopes(com.google.inject.Scopes) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) HiveCachingHdfsConfiguration(com.facebook.presto.hive.cache.HiveCachingHdfsConfiguration) HivePartitionMutator(com.facebook.presto.hive.metastore.HivePartitionMutator) CachingParquetMetadataSource(com.facebook.presto.parquet.cache.CachingParquetMetadataSource) ConnectorMetadataUpdaterProvider(com.facebook.presto.spi.connector.ConnectorMetadataUpdaterProvider) SmileCodecBinder.smileCodecBinder(com.facebook.airlift.json.smile.SmileCodecBinder.smileCodecBinder) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Provides(com.google.inject.Provides) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) ForCachingFileSystem(com.facebook.presto.cache.ForCachingFileSystem) ConfigBinder.configBinder(com.facebook.airlift.configuration.ConfigBinder.configBinder) ForCryptoService(com.facebook.presto.hive.HiveDwrfEncryptionProvider.ForCryptoService) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) Cache(com.google.common.cache.Cache) MBeanExporter(org.weakref.jmx.MBeanExporter) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Example 5 with OrcFileTail

use of com.facebook.presto.orc.metadata.OrcFileTail in project presto by prestodb.

the class IcebergModule method createOrcFileTailSource.

@Singleton
@Provides
public OrcFileTailSource createOrcFileTailSource(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
    int expectedFileTailSizeInBytes = toIntExact(orcCacheConfig.getExpectedFileTailSize().toBytes());
    boolean dwrfStripeCacheEnabled = orcCacheConfig.isDwrfStripeCacheEnabled();
    OrcFileTailSource orcFileTailSource = new StorageOrcFileTailSource(expectedFileTailSizeInBytes, dwrfStripeCacheEnabled);
    if (orcCacheConfig.isFileTailCacheEnabled()) {
        Cache<OrcDataSourceId, OrcFileTail> cache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getFileTailCacheSize().toBytes()).weigher((id, tail) -> ((OrcFileTail) tail).getFooterSize() + ((OrcFileTail) tail).getMetadataSize()).expireAfterAccess(orcCacheConfig.getFileTailCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
        CacheStatsMBean cacheStatsMBean = new CacheStatsMBean(cache);
        orcFileTailSource = new CachingOrcFileTailSource(orcFileTailSource, cache);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_OrcFileTail"), cacheStatsMBean);
    }
    return orcFileTailSource;
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) Module(com.google.inject.Module) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) TestingMBeanServer(org.weakref.jmx.testing.TestingMBeanServer) OrcCacheConfig(com.facebook.presto.orc.cache.OrcCacheConfig) CacheConfig(com.facebook.presto.cache.CacheConfig) ConnectorNodePartitioningProvider(com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider) ExtendedHiveMetastore(com.facebook.presto.hive.metastore.ExtendedHiveMetastore) EncryptionLibrary(com.facebook.presto.orc.EncryptionLibrary) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) ExportBinder.newExporter(org.weakref.jmx.guice.ExportBinder.newExporter) DynamicConfigurationProvider(com.facebook.presto.hive.DynamicConfigurationProvider) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) Multibinder.newSetBinder(com.google.inject.multibindings.Multibinder.newSetBinder) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) HiveHdfsConfiguration(com.facebook.presto.hive.HiveHdfsConfiguration) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ConnectorSplitManager(com.facebook.presto.spi.connector.ConnectorSplitManager) PartitionMutator(com.facebook.presto.hive.PartitionMutator) HiveDwrfEncryptionProvider(com.facebook.presto.hive.HiveDwrfEncryptionProvider) GcsConfigurationInitializer(com.facebook.presto.hive.gcs.GcsConfigurationInitializer) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Multibinder(com.google.inject.multibindings.Multibinder) HdfsConfigurationInitializer(com.facebook.presto.hive.HdfsConfigurationInitializer) Threads.daemonThreadsNamed(com.facebook.airlift.concurrent.Threads.daemonThreadsNamed) CacheFactory(com.facebook.presto.cache.CacheFactory) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) Procedure(com.facebook.presto.spi.procedure.Procedure) List(java.util.List) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StripeReader(com.facebook.presto.orc.StripeReader) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) MetastoreConfig(com.facebook.presto.hive.metastore.MetastoreConfig) FileMergeCacheConfig(com.facebook.presto.cache.filemerge.FileMergeCacheConfig) Slice(io.airlift.slice.Slice) HiveGcsConfigurationInitializer(com.facebook.presto.hive.gcs.HiveGcsConfigurationInitializer) ForCachingHiveMetastore(com.facebook.presto.hive.ForCachingHiveMetastore) ParquetFileWriterConfig(com.facebook.presto.hive.ParquetFileWriterConfig) Singleton(javax.inject.Singleton) JsonCodecBinder.jsonCodecBinder(com.facebook.airlift.json.JsonCodecBinder.jsonCodecBinder) HdfsConfiguration(com.facebook.presto.hive.HdfsConfiguration) Binder(com.google.inject.Binder) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) MBeanServer(javax.management.MBeanServer) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) ConnectorPageSinkProvider(com.facebook.presto.spi.connector.ConnectorPageSinkProvider) HiveNodePartitioningProvider(com.facebook.presto.hive.HiveNodePartitioningProvider) MetastoreClientConfig(com.facebook.presto.hive.MetastoreClientConfig) ExecutorService(java.util.concurrent.ExecutorService) ForMetastoreHdfsEnvironment(com.facebook.presto.hive.ForMetastoreHdfsEnvironment) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) CacheStats(com.facebook.presto.cache.CacheStats) ObjectNames.generatedNameOf(org.weakref.jmx.ObjectNames.generatedNameOf) IcebergPlanOptimizer(com.facebook.presto.iceberg.optimizer.IcebergPlanOptimizer) CachingHiveMetastore(com.facebook.presto.hive.metastore.CachingHiveMetastore) OrcFileWriterConfig(com.facebook.presto.hive.OrcFileWriterConfig) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) UnsupportedEncryptionLibrary(com.facebook.presto.orc.UnsupportedEncryptionLibrary) Scopes(com.google.inject.Scopes) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) HiveCachingHdfsConfiguration(com.facebook.presto.hive.cache.HiveCachingHdfsConfiguration) HivePartitionMutator(com.facebook.presto.hive.metastore.HivePartitionMutator) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Provides(com.google.inject.Provides) HiveGcsConfig(com.facebook.presto.hive.gcs.HiveGcsConfig) ForCachingFileSystem(com.facebook.presto.cache.ForCachingFileSystem) ConfigBinder.configBinder(com.facebook.airlift.configuration.ConfigBinder.configBinder) Cache(com.google.common.cache.Cache) MBeanExporter(org.weakref.jmx.MBeanExporter) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Aggregations

OrcFileTail (com.facebook.presto.orc.metadata.OrcFileTail)8 StorageOrcFileTailSource (com.facebook.presto.orc.cache.StorageOrcFileTailSource)7 Slice (io.airlift.slice.Slice)5 CachingOrcFileTailSource (com.facebook.presto.orc.cache.CachingOrcFileTailSource)4 OrcFileTailSource (com.facebook.presto.orc.cache.OrcFileTailSource)4 RowGroupIndex (com.facebook.presto.orc.metadata.RowGroupIndex)4 Cache (com.google.common.cache.Cache)4 CacheBuilder (com.google.common.cache.CacheBuilder)4 Math.toIntExact (java.lang.Math.toIntExact)4 List (java.util.List)4 Optional (java.util.Optional)4 MILLISECONDS (java.util.concurrent.TimeUnit.MILLISECONDS)4 ConfigBinder.configBinder (com.facebook.airlift.configuration.ConfigBinder.configBinder)3 CachingStripeMetadataSource (com.facebook.presto.orc.CachingStripeMetadataSource)3 DwrfAwareStripeMetadataSourceFactory (com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory)3 OrcDataSourceId (com.facebook.presto.orc.OrcDataSourceId)3 StorageStripeMetadataSource (com.facebook.presto.orc.StorageStripeMetadataSource)3 StripeMetadataSource (com.facebook.presto.orc.StripeMetadataSource)3 StripeMetadataSourceFactory (com.facebook.presto.orc.StripeMetadataSourceFactory)3 StripeId (com.facebook.presto.orc.StripeReader.StripeId)3