Search in sources :

Example 1 with ParquetFileMetadata

use of com.facebook.presto.parquet.cache.ParquetFileMetadata in project presto by prestodb.

the class AbstractTestParquetReader method testCaching.

@Test
public void testCaching() throws Exception {
    Cache<ParquetDataSourceId, ParquetFileMetadata> parquetFileMetadataCache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, metadata) -> ((ParquetFileMetadata) metadata).getMetadataSize()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
    ParquetMetadataSource parquetMetadataSource = new CachingParquetMetadataSource(parquetFileMetadataCache, new MetadataReader());
    try (ParquetTester.TempFile tempFile = new ParquetTester.TempFile("test", "parquet")) {
        Iterable<Integer> values = intsBetween(0, 10);
        Iterator<?>[] readValues = stream(new Iterable<?>[] { values }).map(Iterable::iterator).toArray(size -> new Iterator<?>[size]);
        List<String> columnNames = singletonList("column1");
        List<Type> columnTypes = singletonList(INTEGER);
        writeParquetFileFromPresto(tempFile.getFile(), columnTypes, columnNames, readValues, 10, CompressionCodecName.GZIP);
        testSingleRead(new Iterable<?>[] { values }, columnNames, columnTypes, parquetMetadataSource, tempFile.getFile());
        assertEquals(parquetFileMetadataCache.stats().missCount(), 1);
        assertEquals(parquetFileMetadataCache.stats().hitCount(), 0);
        testSingleRead(new Iterable<?>[] { values }, columnNames, columnTypes, parquetMetadataSource, tempFile.getFile());
        assertEquals(parquetFileMetadataCache.stats().missCount(), 1);
        assertEquals(parquetFileMetadataCache.stats().hitCount(), 1);
        testSingleRead(new Iterable<?>[] { values }, columnNames, columnTypes, parquetMetadataSource, tempFile.getFile());
        assertEquals(parquetFileMetadataCache.stats().missCount(), 1);
        assertEquals(parquetFileMetadataCache.stats().hitCount(), 2);
        parquetFileMetadataCache.invalidateAll();
        testSingleRead(new Iterable<?>[] { values }, columnNames, columnTypes, parquetMetadataSource, tempFile.getFile());
        assertEquals(parquetFileMetadataCache.stats().missCount(), 2);
        assertEquals(parquetFileMetadataCache.stats().hitCount(), 2);
        testSingleRead(new Iterable<?>[] { values }, columnNames, columnTypes, parquetMetadataSource, tempFile.getFile());
        assertEquals(parquetFileMetadataCache.stats().missCount(), 2);
        assertEquals(parquetFileMetadataCache.stats().hitCount(), 3);
    }
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) JavaHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Test(org.testng.annotations.Test) HIVE_STORAGE_TIME_ZONE(com.facebook.presto.hive.parquet.ParquetTester.HIVE_STORAGE_TIME_ZONE) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) Collections.singletonList(java.util.Collections.singletonList) BigDecimal(java.math.BigDecimal) Iterables.cycle(com.google.common.collect.Iterables.cycle) Arrays.asList(java.util.Arrays.asList) MessageTypeParser(org.apache.parquet.schema.MessageTypeParser) Map(java.util.Map) BigInteger(java.math.BigInteger) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Functions.compose(com.google.common.base.Functions.compose) ParquetFileMetadata(com.facebook.presto.parquet.cache.ParquetFileMetadata) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) Range(com.google.common.collect.Range) ParquetMetadataSource(com.facebook.presto.parquet.cache.ParquetMetadataSource) Statistics(org.apache.parquet.format.Statistics) MAX_PRECISION(com.facebook.presto.common.type.Decimals.MAX_PRECISION) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Stream(java.util.stream.Stream) SqlTimestamp(com.facebook.presto.common.type.SqlTimestamp) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) StructuralTestUtil.mapType(com.facebook.presto.tests.StructuralTestUtil.mapType) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) ContiguousSet(com.google.common.collect.ContiguousSet) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) MINUTES(java.util.concurrent.TimeUnit.MINUTES) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) SqlDate(com.facebook.presto.common.type.SqlDate) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) SqlVarbinary(com.facebook.presto.common.type.SqlVarbinary) DiscreteDomain(com.google.common.collect.DiscreteDomain) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) ArrayType(com.facebook.presto.common.type.ArrayType) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) RowType.field(com.facebook.presto.common.type.RowType.field) ParquetTester.insertNullEvery(com.facebook.presto.hive.parquet.ParquetTester.insertNullEvery) AbstractIterator(com.google.common.collect.AbstractIterator) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) UTC_KEY(com.facebook.presto.common.type.TimeZoneKey.UTC_KEY) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) ParquetTester.testSingleRead(com.facebook.presto.hive.parquet.ParquetTester.testSingleRead) RowType(com.facebook.presto.common.type.RowType) PrimitiveType(org.apache.parquet.schema.PrimitiveType) Iterables.transform(com.google.common.collect.Iterables.transform) Random(java.util.Random) Duration(io.airlift.units.Duration) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SESSION(com.facebook.presto.testing.TestingConnectorSession.SESSION) Iterables.concat(com.google.common.collect.Iterables.concat) SqlDecimal(com.facebook.presto.common.type.SqlDecimal) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) BINARY(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY) BeforeClass(org.testng.annotations.BeforeClass) Timestamp(java.sql.Timestamp) ParquetTester.writeParquetFileFromPresto(com.facebook.presto.hive.parquet.ParquetTester.writeParquetFileFromPresto) UUID(java.util.UUID) Iterables.limit(com.google.common.collect.Iterables.limit) Logger(java.util.logging.Logger) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) MessageType(org.apache.parquet.schema.MessageType) DataSize(io.airlift.units.DataSize) List(java.util.List) LocalDate(java.time.LocalDate) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) Arrays.stream(java.util.Arrays.stream) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) DateTimeTestingUtils.sqlTimestampOf(com.facebook.presto.testing.DateTimeTestingUtils.sqlTimestampOf) Assert.assertEquals(org.testng.Assert.assertEquals) Shorts(com.google.common.primitives.Shorts) HashMap(java.util.HashMap) OPTIONAL(org.apache.parquet.schema.Type.Repetition.OPTIONAL) AbstractSequentialIterator(com.google.common.collect.AbstractSequentialIterator) Level(java.util.logging.Level) ImmutableList(com.google.common.collect.ImmutableList) ParquetDataSourceId(com.facebook.presto.parquet.ParquetDataSourceId) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) Type(com.facebook.presto.common.type.Type) MessageTypeParser.parseMessageType(org.apache.parquet.schema.MessageTypeParser.parseMessageType) Iterator(java.util.Iterator) UTF_8(java.nio.charset.StandardCharsets.UTF_8) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) Date(java.sql.Date) CachingParquetMetadataSource(com.facebook.presto.parquet.cache.CachingParquetMetadataSource) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) Cache(com.google.common.cache.Cache) Collections(java.util.Collections) DecimalType.createDecimalType(com.facebook.presto.common.type.DecimalType.createDecimalType) CachingParquetMetadataSource(com.facebook.presto.parquet.cache.CachingParquetMetadataSource) ParquetDataSourceId(com.facebook.presto.parquet.ParquetDataSourceId) ParquetFileMetadata(com.facebook.presto.parquet.cache.ParquetFileMetadata) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) Duration(io.airlift.units.Duration) BigInteger(java.math.BigInteger) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) StructuralTestUtil.mapType(com.facebook.presto.tests.StructuralTestUtil.mapType) ArrayType(com.facebook.presto.common.type.ArrayType) RowType(com.facebook.presto.common.type.RowType) PrimitiveType(org.apache.parquet.schema.PrimitiveType) MessageType(org.apache.parquet.schema.MessageType) Type(com.facebook.presto.common.type.Type) MessageTypeParser.parseMessageType(org.apache.parquet.schema.MessageTypeParser.parseMessageType) DecimalType.createDecimalType(com.facebook.presto.common.type.DecimalType.createDecimalType) DataSize(io.airlift.units.DataSize) AbstractIterator(com.google.common.collect.AbstractIterator) AbstractSequentialIterator(com.google.common.collect.AbstractSequentialIterator) Iterator(java.util.Iterator) ParquetMetadataSource(com.facebook.presto.parquet.cache.ParquetMetadataSource) CachingParquetMetadataSource(com.facebook.presto.parquet.cache.CachingParquetMetadataSource) Test(org.testng.annotations.Test)

Example 2 with ParquetFileMetadata

use of com.facebook.presto.parquet.cache.ParquetFileMetadata in project presto by prestodb.

the class HiveClientModule method createParquetMetadataSource.

@Singleton
@Provides
public ParquetMetadataSource createParquetMetadataSource(ParquetCacheConfig parquetCacheConfig, MBeanExporter exporter) {
    ParquetMetadataSource parquetMetadataSource = new MetadataReader();
    if (parquetCacheConfig.isMetadataCacheEnabled()) {
        Cache<ParquetDataSourceId, ParquetFileMetadata> cache = CacheBuilder.newBuilder().maximumWeight(parquetCacheConfig.getMetadataCacheSize().toBytes()).weigher((id, metadata) -> ((ParquetFileMetadata) metadata).getMetadataSize()).expireAfterAccess(parquetCacheConfig.getMetadataCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
        CacheStatsMBean cacheStatsMBean = new CacheStatsMBean(cache);
        parquetMetadataSource = new CachingParquetMetadataSource(cache, parquetMetadataSource);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_ParquetMetadata"), cacheStatsMBean);
    }
    return parquetMetadataSource;
}
Also used : Module(com.google.inject.Module) PrestoS3ClientFactory(com.facebook.presto.hive.s3.PrestoS3ClientFactory) StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) StripeId(com.facebook.presto.orc.StripeReader.StripeId) OrcCacheConfig(com.facebook.presto.orc.cache.OrcCacheConfig) ConnectorNodePartitioningProvider(com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider) DwrfSelectivePageSourceFactory(com.facebook.presto.hive.orc.DwrfSelectivePageSourceFactory) EncryptionLibrary(com.facebook.presto.orc.EncryptionLibrary) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) ExportBinder.newExporter(org.weakref.jmx.guice.ExportBinder.newExporter) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) Multibinder.newSetBinder(com.google.inject.multibindings.Multibinder.newSetBinder) OrcSelectivePageSourceFactory(com.facebook.presto.hive.orc.OrcSelectivePageSourceFactory) TupleDomainFilterCache(com.facebook.presto.hive.orc.TupleDomainFilterCache) EventClient(com.facebook.airlift.event.client.EventClient) ParquetFileMetadata(com.facebook.presto.parquet.cache.ParquetFileMetadata) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PageFileWriterFactory(com.facebook.presto.hive.pagefile.PageFileWriterFactory) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ConnectorSplitManager(com.facebook.presto.spi.connector.ConnectorSplitManager) ParquetMetadataSource(com.facebook.presto.parquet.cache.ParquetMetadataSource) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Multibinder(com.google.inject.multibindings.Multibinder) Threads.daemonThreadsNamed(com.facebook.airlift.concurrent.Threads.daemonThreadsNamed) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) List(java.util.List) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) Optional(java.util.Optional) ParquetCacheConfig(com.facebook.presto.parquet.cache.ParquetCacheConfig) CacheBuilder(com.google.common.cache.CacheBuilder) TypeLiteral(com.google.inject.TypeLiteral) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) MoreExecutors.listeningDecorator(com.google.common.util.concurrent.MoreExecutors.listeningDecorator) ParquetSelectivePageSourceFactory(com.facebook.presto.hive.parquet.ParquetSelectivePageSourceFactory) Slice(io.airlift.slice.Slice) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) HivePlanOptimizerProvider(com.facebook.presto.hive.rule.HivePlanOptimizerProvider) DataSinkFactory(com.facebook.presto.hive.datasink.DataSinkFactory) Singleton(javax.inject.Singleton) Supplier(java.util.function.Supplier) JsonCodecBinder.jsonCodecBinder(com.facebook.airlift.json.JsonCodecBinder.jsonCodecBinder) ConnectorPlanOptimizerProvider(com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider) BoundedExecutor(com.facebook.airlift.concurrent.BoundedExecutor) Binder(com.google.inject.Binder) PageFilePageSourceFactory(com.facebook.presto.hive.pagefile.PageFilePageSourceFactory) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) ParquetDataSourceId(com.facebook.presto.parquet.ParquetDataSourceId) ExecutorServiceAdapter(com.facebook.airlift.concurrent.ExecutorServiceAdapter) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) ConnectorPageSinkProvider(com.facebook.presto.spi.connector.ConnectorPageSinkProvider) ExecutorService(java.util.concurrent.ExecutorService) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) ObjectNames.generatedNameOf(org.weakref.jmx.ObjectNames.generatedNameOf) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) ForUnknown(com.facebook.presto.hive.HiveDwrfEncryptionProvider.ForUnknown) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) UnsupportedEncryptionLibrary(com.facebook.presto.orc.UnsupportedEncryptionLibrary) Scopes(com.google.inject.Scopes) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) HiveCachingHdfsConfiguration(com.facebook.presto.hive.cache.HiveCachingHdfsConfiguration) HivePartitionMutator(com.facebook.presto.hive.metastore.HivePartitionMutator) CachingParquetMetadataSource(com.facebook.presto.parquet.cache.CachingParquetMetadataSource) ConnectorMetadataUpdaterProvider(com.facebook.presto.spi.connector.ConnectorMetadataUpdaterProvider) SmileCodecBinder.smileCodecBinder(com.facebook.airlift.json.smile.SmileCodecBinder.smileCodecBinder) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Provides(com.google.inject.Provides) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) ForCachingFileSystem(com.facebook.presto.cache.ForCachingFileSystem) ConfigBinder.configBinder(com.facebook.airlift.configuration.ConfigBinder.configBinder) ForCryptoService(com.facebook.presto.hive.HiveDwrfEncryptionProvider.ForCryptoService) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) Cache(com.google.common.cache.Cache) MBeanExporter(org.weakref.jmx.MBeanExporter) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) CachingParquetMetadataSource(com.facebook.presto.parquet.cache.CachingParquetMetadataSource) ParquetDataSourceId(com.facebook.presto.parquet.ParquetDataSourceId) ParquetFileMetadata(com.facebook.presto.parquet.cache.ParquetFileMetadata) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) ParquetMetadataSource(com.facebook.presto.parquet.cache.ParquetMetadataSource) CachingParquetMetadataSource(com.facebook.presto.parquet.cache.CachingParquetMetadataSource) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Aggregations

ParquetDataSourceId (com.facebook.presto.parquet.ParquetDataSourceId)2 CachingParquetMetadataSource (com.facebook.presto.parquet.cache.CachingParquetMetadataSource)2 MetadataReader (com.facebook.presto.parquet.cache.MetadataReader)2 ParquetFileMetadata (com.facebook.presto.parquet.cache.ParquetFileMetadata)2 ParquetMetadataSource (com.facebook.presto.parquet.cache.ParquetMetadataSource)2 Cache (com.google.common.cache.Cache)2 CacheBuilder (com.google.common.cache.CacheBuilder)2 List (java.util.List)2 Optional (java.util.Optional)2 MILLISECONDS (java.util.concurrent.TimeUnit.MILLISECONDS)2 BoundedExecutor (com.facebook.airlift.concurrent.BoundedExecutor)1 ExecutorServiceAdapter (com.facebook.airlift.concurrent.ExecutorServiceAdapter)1 Threads.daemonThreadsNamed (com.facebook.airlift.concurrent.Threads.daemonThreadsNamed)1 ConfigBinder.configBinder (com.facebook.airlift.configuration.ConfigBinder.configBinder)1 EventClient (com.facebook.airlift.event.client.EventClient)1 JsonCodecBinder.jsonCodecBinder (com.facebook.airlift.json.JsonCodecBinder.jsonCodecBinder)1 SmileCodecBinder.smileCodecBinder (com.facebook.airlift.json.smile.SmileCodecBinder.smileCodecBinder)1 ForCachingFileSystem (com.facebook.presto.cache.ForCachingFileSystem)1 ArrayType (com.facebook.presto.common.type.ArrayType)1 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)1