Search in sources :

Example 11 with StorageStripeMetadataSource

use of com.facebook.presto.orc.StorageStripeMetadataSource in project presto by prestodb.

the class IcebergModule method createStripeMetadataSourceFactory.

@Singleton
@Provides
public StripeMetadataSourceFactory createStripeMetadataSourceFactory(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
    StripeMetadataSource stripeMetadataSource = new StorageStripeMetadataSource();
    if (orcCacheConfig.isStripeMetadataCacheEnabled()) {
        Cache<StripeReader.StripeId, Slice> footerCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeFooterCacheSize().toBytes()).weigher((id, footer) -> toIntExact(((Slice) footer).getRetainedSize())).expireAfterAccess(orcCacheConfig.getStripeFooterCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
        Cache<StripeReader.StripeStreamId, Slice> streamCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeStreamCacheSize().toBytes()).weigher((id, stream) -> toIntExact(((Slice) stream).getRetainedSize())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
        CacheStatsMBean footerCacheStatsMBean = new CacheStatsMBean(footerCache);
        CacheStatsMBean streamCacheStatsMBean = new CacheStatsMBean(streamCache);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeFooter"), footerCacheStatsMBean);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStream"), streamCacheStatsMBean);
        Optional<Cache<StripeReader.StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.empty();
        if (orcCacheConfig.isRowGroupIndexCacheEnabled()) {
            rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getRowGroupIndexCacheSize().toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build());
            CacheStatsMBean rowGroupIndexCacheStatsMBean = new CacheStatsMBean(rowGroupIndexCache.get());
            exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStreamRowGroupIndex"), rowGroupIndexCacheStatsMBean);
        }
        stripeMetadataSource = new CachingStripeMetadataSource(stripeMetadataSource, footerCache, streamCache, rowGroupIndexCache);
    }
    StripeMetadataSourceFactory factory = StripeMetadataSourceFactory.of(stripeMetadataSource);
    if (orcCacheConfig.isDwrfStripeCacheEnabled()) {
        factory = new DwrfAwareStripeMetadataSourceFactory(factory);
    }
    return factory;
}
Also used : StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) Module(com.google.inject.Module) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) TestingMBeanServer(org.weakref.jmx.testing.TestingMBeanServer) OrcCacheConfig(com.facebook.presto.orc.cache.OrcCacheConfig) CacheConfig(com.facebook.presto.cache.CacheConfig) ConnectorNodePartitioningProvider(com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider) ExtendedHiveMetastore(com.facebook.presto.hive.metastore.ExtendedHiveMetastore) EncryptionLibrary(com.facebook.presto.orc.EncryptionLibrary) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) ExportBinder.newExporter(org.weakref.jmx.guice.ExportBinder.newExporter) DynamicConfigurationProvider(com.facebook.presto.hive.DynamicConfigurationProvider) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) Multibinder.newSetBinder(com.google.inject.multibindings.Multibinder.newSetBinder) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) HiveHdfsConfiguration(com.facebook.presto.hive.HiveHdfsConfiguration) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ConnectorSplitManager(com.facebook.presto.spi.connector.ConnectorSplitManager) PartitionMutator(com.facebook.presto.hive.PartitionMutator) HiveDwrfEncryptionProvider(com.facebook.presto.hive.HiveDwrfEncryptionProvider) GcsConfigurationInitializer(com.facebook.presto.hive.gcs.GcsConfigurationInitializer) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Multibinder(com.google.inject.multibindings.Multibinder) HdfsConfigurationInitializer(com.facebook.presto.hive.HdfsConfigurationInitializer) Threads.daemonThreadsNamed(com.facebook.airlift.concurrent.Threads.daemonThreadsNamed) CacheFactory(com.facebook.presto.cache.CacheFactory) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) Procedure(com.facebook.presto.spi.procedure.Procedure) List(java.util.List) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StripeReader(com.facebook.presto.orc.StripeReader) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) MetastoreConfig(com.facebook.presto.hive.metastore.MetastoreConfig) FileMergeCacheConfig(com.facebook.presto.cache.filemerge.FileMergeCacheConfig) Slice(io.airlift.slice.Slice) HiveGcsConfigurationInitializer(com.facebook.presto.hive.gcs.HiveGcsConfigurationInitializer) ForCachingHiveMetastore(com.facebook.presto.hive.ForCachingHiveMetastore) ParquetFileWriterConfig(com.facebook.presto.hive.ParquetFileWriterConfig) Singleton(javax.inject.Singleton) JsonCodecBinder.jsonCodecBinder(com.facebook.airlift.json.JsonCodecBinder.jsonCodecBinder) HdfsConfiguration(com.facebook.presto.hive.HdfsConfiguration) Binder(com.google.inject.Binder) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) MBeanServer(javax.management.MBeanServer) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) ConnectorPageSinkProvider(com.facebook.presto.spi.connector.ConnectorPageSinkProvider) HiveNodePartitioningProvider(com.facebook.presto.hive.HiveNodePartitioningProvider) MetastoreClientConfig(com.facebook.presto.hive.MetastoreClientConfig) ExecutorService(java.util.concurrent.ExecutorService) ForMetastoreHdfsEnvironment(com.facebook.presto.hive.ForMetastoreHdfsEnvironment) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) CacheStats(com.facebook.presto.cache.CacheStats) ObjectNames.generatedNameOf(org.weakref.jmx.ObjectNames.generatedNameOf) IcebergPlanOptimizer(com.facebook.presto.iceberg.optimizer.IcebergPlanOptimizer) CachingHiveMetastore(com.facebook.presto.hive.metastore.CachingHiveMetastore) OrcFileWriterConfig(com.facebook.presto.hive.OrcFileWriterConfig) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) UnsupportedEncryptionLibrary(com.facebook.presto.orc.UnsupportedEncryptionLibrary) Scopes(com.google.inject.Scopes) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) HiveCachingHdfsConfiguration(com.facebook.presto.hive.cache.HiveCachingHdfsConfiguration) HivePartitionMutator(com.facebook.presto.hive.metastore.HivePartitionMutator) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Provides(com.google.inject.Provides) HiveGcsConfig(com.facebook.presto.hive.gcs.HiveGcsConfig) ForCachingFileSystem(com.facebook.presto.cache.ForCachingFileSystem) ConfigBinder.configBinder(com.facebook.airlift.configuration.ConfigBinder.configBinder) Cache(com.google.common.cache.Cache) MBeanExporter(org.weakref.jmx.MBeanExporter) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StripeReader(com.facebook.presto.orc.StripeReader) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) Cache(com.google.common.cache.Cache) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Example 12 with StorageStripeMetadataSource

use of com.facebook.presto.orc.StorageStripeMetadataSource in project presto by prestodb.

the class TestHiveFileFormats method testFailForLongVarcharPartitionColumn.

@Test
public void testFailForLongVarcharPartitionColumn() throws Exception {
    TestColumn partitionColumn = new TestColumn("partition_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), "test", utf8Slice("tes"), true);
    TestColumn varcharColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), new HiveVarchar("tes", 3), utf8Slice("tes"));
    List<TestColumn> columns = ImmutableList.of(partitionColumn, varcharColumn);
    HiveErrorCode expectedErrorCode = HIVE_INVALID_PARTITION_VALUE;
    String expectedMessage = "Invalid partition value 'test' for varchar\\(3\\) partition key: partition_column";
    assertThatFileFormat(RCTEXT).withColumns(columns).isFailingForPageSource(new RcFilePageSourceFactory(FUNCTION_AND_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage).isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
    assertThatFileFormat(RCBINARY).withColumns(columns).isFailingForPageSource(new RcFilePageSourceFactory(FUNCTION_AND_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage).isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
    assertThatFileFormat(ORC).withColumns(columns).isFailingForPageSource(new OrcBatchPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, false, HDFS_ENVIRONMENT, STATS, 100, new StorageOrcFileTailSource(), StripeMetadataSourceFactory.of(new StorageStripeMetadataSource())), expectedErrorCode, expectedMessage);
    assertThatFileFormat(PARQUET).withColumns(columns).withSession(parquetPageSourceSession).isFailingForPageSource(new ParquetPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, HDFS_ENVIRONMENT, STATS, METADATA_READER), expectedErrorCode, expectedMessage);
    assertThatFileFormat(SEQUENCEFILE).withColumns(columns).isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
    assertThatFileFormat(TEXTFILE).withColumns(columns).isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) Test(org.testng.annotations.Test)

Example 13 with StorageStripeMetadataSource

use of com.facebook.presto.orc.StorageStripeMetadataSource in project presto by prestodb.

the class TestHiveFileFormats method testDwrf.

@Test(dataProvider = "rowCount")
public void testDwrf(int rowCount) throws Exception {
    List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(testColumn -> !hasType(testColumn.getObjectInspector(), PrimitiveCategory.DATE, PrimitiveCategory.VARCHAR, PrimitiveCategory.CHAR, PrimitiveCategory.DECIMAL)).collect(Collectors.toList());
    assertThatFileFormat(DWRF).withColumns(testColumns).withRowsCount(rowCount).isReadableByPageSource(new DwrfBatchPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, HIVE_CLIENT_CONFIG, HDFS_ENVIRONMENT, STATS, new StorageOrcFileTailSource(), StripeMetadataSourceFactory.of(new StorageStripeMetadataSource()), NO_ENCRYPTION));
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Test(org.testng.annotations.Test) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) FileSplit(org.apache.hadoop.mapred.FileSplit) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LzoCodec(io.airlift.compress.lzo.LzoCodec) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorSession(com.facebook.presto.spi.ConnectorSession) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) HIVE_CLIENT_CONFIG(com.facebook.presto.hive.HiveTestUtils.HIVE_CLIENT_CONFIG) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) FUNCTION_RESOLUTION(com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION) ROW_EXPRESSION_SERVICE(com.facebook.presto.hive.HiveTestUtils.ROW_EXPRESSION_SERVICE) Lists(com.google.common.collect.Lists) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CSV(com.facebook.presto.hive.HiveStorageFormat.CSV) ArrayType(com.facebook.presto.common.type.ArrayType) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) File(java.io.File) FUNCTION_AND_TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.FUNCTION_AND_TYPE_MANAGER) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) HDFS_ENVIRONMENT(com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) RowType(com.facebook.presto.common.type.RowType) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) CacheConfig(com.facebook.presto.cache.CacheConfig) SchemaTableName(com.facebook.presto.spi.SchemaTableName) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) TimeZone(java.util.TimeZone) BeforeClass(org.testng.annotations.BeforeClass) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Optional(java.util.Optional) ErrorCodeSupplier(com.facebook.presto.spi.ErrorCodeSupplier) DataProvider(org.testng.annotations.DataProvider) Column(com.facebook.presto.hive.metastore.Column) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Assert.assertEquals(org.testng.Assert.assertEquals) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) NO_ENCRYPTION(com.facebook.presto.hive.HiveDwrfEncryptionProvider.NO_ENCRYPTION) StructuralTestUtil.mapBlockOf(com.facebook.presto.tests.StructuralTestUtil.mapBlockOf) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) Objects.requireNonNull(java.util.Objects.requireNonNull) DEFAULT_HIVE_FILE_CONTEXT(com.facebook.presto.hive.HiveFileContext.DEFAULT_HIVE_FILE_CONTEXT) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) Storage(com.facebook.presto.hive.metastore.Storage) Assert.fail(org.testng.Assert.fail) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) Test(org.testng.annotations.Test)

Example 14 with StorageStripeMetadataSource

use of com.facebook.presto.orc.StorageStripeMetadataSource in project presto by prestodb.

the class StorageModule method createStripeMetadataSourceFactory.

@Singleton
@Provides
public StripeMetadataSourceFactory createStripeMetadataSourceFactory(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
    StripeMetadataSource stripeMetadataSource = new StorageStripeMetadataSource();
    if (orcCacheConfig.isStripeMetadataCacheEnabled()) {
        Cache<StripeId, Slice> footerCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeFooterCacheSize().toBytes()).weigher((id, footer) -> ((Slice) footer).length()).expireAfterAccess(orcCacheConfig.getStripeFooterCacheTtlSinceLastAccess().toMillis(), TimeUnit.MILLISECONDS).recordStats().build();
        Cache<StripeStreamId, Slice> streamCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeStreamCacheSize().toBytes()).weigher((id, stream) -> ((Slice) stream).length()).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), TimeUnit.MILLISECONDS).recordStats().build();
        CacheStatsMBean footerCacheStatsMBean = new CacheStatsMBean(footerCache);
        CacheStatsMBean streamCacheStatsMBean = new CacheStatsMBean(streamCache);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeFooter"), footerCacheStatsMBean);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStream"), streamCacheStatsMBean);
        Optional<Cache<StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.empty();
        if (orcCacheConfig.isRowGroupIndexCacheEnabled()) {
            rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getRowGroupIndexCacheSize().toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build());
            CacheStatsMBean rowGroupIndexCacheStatsMBean = new CacheStatsMBean(rowGroupIndexCache.get());
            exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStreamRowGroupIndex"), rowGroupIndexCacheStatsMBean);
        }
        stripeMetadataSource = new CachingStripeMetadataSource(stripeMetadataSource, footerCache, streamCache, rowGroupIndexCache);
    }
    StripeMetadataSourceFactory factory = StripeMetadataSourceFactory.of(stripeMetadataSource);
    if (orcCacheConfig.isDwrfStripeCacheEnabled()) {
        factory = new DwrfAwareStripeMetadataSourceFactory(factory);
    }
    return factory;
}
Also used : StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) Module(com.google.inject.Module) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) ShardOrganizer(com.facebook.presto.raptor.storage.organization.ShardOrganizer) StripeId(com.facebook.presto.orc.StripeReader.StripeId) OrcCacheConfig(com.facebook.presto.orc.cache.OrcCacheConfig) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) ExportBinder.newExporter(org.weakref.jmx.guice.ExportBinder.newExporter) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) ShardManager(com.facebook.presto.raptor.metadata.ShardManager) DatabaseShardRecorder(com.facebook.presto.raptor.metadata.DatabaseShardRecorder) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Ticker(com.google.common.base.Ticker) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) List(java.util.List) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) BackupManager(com.facebook.presto.raptor.backup.BackupManager) AssignmentLimiter(com.facebook.presto.raptor.metadata.AssignmentLimiter) ShardCleanerConfig(com.facebook.presto.raptor.metadata.ShardCleanerConfig) Slice(io.airlift.slice.Slice) MetadataConfig(com.facebook.presto.raptor.metadata.MetadataConfig) Singleton(javax.inject.Singleton) ShardCompactor(com.facebook.presto.raptor.storage.organization.ShardCompactor) Binder(com.google.inject.Binder) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) Objects.requireNonNull(java.util.Objects.requireNonNull) ShardCompactionManager(com.facebook.presto.raptor.storage.organization.ShardCompactionManager) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) ObjectNames.generatedNameOf(org.weakref.jmx.ObjectNames.generatedNameOf) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) ShardCleaner(com.facebook.presto.raptor.metadata.ShardCleaner) OrganizationJobFactory(com.facebook.presto.raptor.storage.organization.OrganizationJobFactory) Scopes(com.google.inject.Scopes) ShardOrganizationManager(com.facebook.presto.raptor.storage.organization.ShardOrganizationManager) ShardRecorder(com.facebook.presto.raptor.metadata.ShardRecorder) TimeUnit(java.util.concurrent.TimeUnit) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Provides(com.google.inject.Provides) DatabaseShardManager(com.facebook.presto.raptor.metadata.DatabaseShardManager) ConfigBinder.configBinder(com.facebook.airlift.configuration.ConfigBinder.configBinder) JobFactory(com.facebook.presto.raptor.storage.organization.JobFactory) TemporalFunction(com.facebook.presto.raptor.storage.organization.TemporalFunction) Cache(com.google.common.cache.Cache) MBeanExporter(org.weakref.jmx.MBeanExporter) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StripeId(com.facebook.presto.orc.StripeReader.StripeId) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) Cache(com.google.common.cache.Cache) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Aggregations

StorageStripeMetadataSource (com.facebook.presto.orc.StorageStripeMetadataSource)14 StorageOrcFileTailSource (com.facebook.presto.orc.cache.StorageOrcFileTailSource)13 OrcBatchPageSourceFactory (com.facebook.presto.hive.orc.OrcBatchPageSourceFactory)8 ParquetPageSourceFactory (com.facebook.presto.hive.parquet.ParquetPageSourceFactory)7 RcFilePageSourceFactory (com.facebook.presto.hive.rcfile.RcFilePageSourceFactory)7 Test (org.testng.annotations.Test)7 StripeMetadataSourceFactory (com.facebook.presto.orc.StripeMetadataSourceFactory)6 List (java.util.List)6 Optional (java.util.Optional)6 CacheConfig (com.facebook.presto.cache.CacheConfig)5 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)5 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)5 DwrfBatchPageSourceFactory (com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory)4 MetadataReader (com.facebook.presto.parquet.cache.MetadataReader)4 RuntimeStats (com.facebook.presto.common.RuntimeStats)3 ArrayType (com.facebook.presto.common.type.ArrayType)3 OutputStreamDataSinkFactory (com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory)3 ParquetFileWriterFactory (com.facebook.presto.hive.parquet.ParquetFileWriterFactory)3 OrcReader (com.facebook.presto.orc.OrcReader)3 RaptorOrcAggregatedMemoryContext (com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext)3