Search in sources :

Example 6 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class TestOrcReaderDwrfStripeCaching method getDwrfStripeCache.

private Optional<DwrfStripeCache> getDwrfStripeCache(File orcFile) throws IOException {
    CapturingStripeMetadataSourceFactory stripeMetadataSourceFactory = new CapturingStripeMetadataSourceFactory();
    OrcDataSource orcDataSource = new FileOrcDataSource(orcFile, new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
    new OrcReader(orcDataSource, DWRF, new StorageOrcFileTailSource(READ_TAIL_SIZE_IN_BYTES, true), stripeMetadataSourceFactory, NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, OrcReaderTestingUtils.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
    return stripeMetadataSourceFactory.getDwrfStripeCache();
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) DataSize(io.airlift.units.DataSize) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource)

Example 7 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class TestStructBatchStreamReader method read.

private RowBlock read(TempFile tempFile, Type readerType) throws IOException {
    DataSize dataSize = new DataSize(1, MEGABYTE);
    OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), dataSize, dataSize, dataSize, true);
    OrcReader orcReader = new OrcReader(orcDataSource, ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(dataSize, dataSize, dataSize, false), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
    Map<Integer, Type> includedColumns = new HashMap<>();
    includedColumns.put(0, readerType);
    OrcBatchRecordReader recordReader = orcReader.createBatchRecordReader(includedColumns, OrcPredicate.TRUE, UTC, new TestingHiveOrcAggregatedMemoryContext(), OrcReader.INITIAL_BATCH_SIZE);
    recordReader.nextBatch();
    RowBlock block = (RowBlock) recordReader.readBlock(0);
    recordReader.close();
    return block;
}
Also used : HashMap(java.util.HashMap) RuntimeStats(com.facebook.presto.common.RuntimeStats) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) RowBlock(com.facebook.presto.common.block.RowBlock) Type(com.facebook.presto.common.type.Type) DataSize(io.airlift.units.DataSize)

Example 8 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class StorageModule method createOrcFileTailSource.

@Singleton
@Provides
public OrcFileTailSource createOrcFileTailSource(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
    int expectedFileTailSizeInBytes = toIntExact(orcCacheConfig.getExpectedFileTailSize().toBytes());
    boolean dwrfStripeCacheEnabled = orcCacheConfig.isDwrfStripeCacheEnabled();
    OrcFileTailSource orcFileTailSource = new StorageOrcFileTailSource(expectedFileTailSizeInBytes, dwrfStripeCacheEnabled);
    if (orcCacheConfig.isFileTailCacheEnabled()) {
        Cache<OrcDataSourceId, OrcFileTail> cache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getFileTailCacheSize().toBytes()).weigher((id, tail) -> ((OrcFileTail) tail).getTotalSize()).expireAfterAccess(orcCacheConfig.getFileTailCacheTtlSinceLastAccess().toMillis(), TimeUnit.MILLISECONDS).recordStats().build();
        CacheStatsMBean cacheStatsMBean = new CacheStatsMBean(cache);
        orcFileTailSource = new CachingOrcFileTailSource(orcFileTailSource, cache);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_OrcFileTail"), cacheStatsMBean);
    }
    return orcFileTailSource;
}
Also used : Module(com.google.inject.Module) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) ShardOrganizer(com.facebook.presto.raptor.storage.organization.ShardOrganizer) StripeId(com.facebook.presto.orc.StripeReader.StripeId) OrcCacheConfig(com.facebook.presto.orc.cache.OrcCacheConfig) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) ExportBinder.newExporter(org.weakref.jmx.guice.ExportBinder.newExporter) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) ShardManager(com.facebook.presto.raptor.metadata.ShardManager) DatabaseShardRecorder(com.facebook.presto.raptor.metadata.DatabaseShardRecorder) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Ticker(com.google.common.base.Ticker) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) List(java.util.List) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) BackupManager(com.facebook.presto.raptor.backup.BackupManager) AssignmentLimiter(com.facebook.presto.raptor.metadata.AssignmentLimiter) ShardCleanerConfig(com.facebook.presto.raptor.metadata.ShardCleanerConfig) Slice(io.airlift.slice.Slice) MetadataConfig(com.facebook.presto.raptor.metadata.MetadataConfig) Singleton(javax.inject.Singleton) ShardCompactor(com.facebook.presto.raptor.storage.organization.ShardCompactor) Binder(com.google.inject.Binder) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) Objects.requireNonNull(java.util.Objects.requireNonNull) ShardCompactionManager(com.facebook.presto.raptor.storage.organization.ShardCompactionManager) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) ObjectNames.generatedNameOf(org.weakref.jmx.ObjectNames.generatedNameOf) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) ShardCleaner(com.facebook.presto.raptor.metadata.ShardCleaner) OrganizationJobFactory(com.facebook.presto.raptor.storage.organization.OrganizationJobFactory) Scopes(com.google.inject.Scopes) ShardOrganizationManager(com.facebook.presto.raptor.storage.organization.ShardOrganizationManager) ShardRecorder(com.facebook.presto.raptor.metadata.ShardRecorder) TimeUnit(java.util.concurrent.TimeUnit) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Provides(com.google.inject.Provides) DatabaseShardManager(com.facebook.presto.raptor.metadata.DatabaseShardManager) ConfigBinder.configBinder(com.facebook.airlift.configuration.ConfigBinder.configBinder) JobFactory(com.facebook.presto.raptor.storage.organization.JobFactory) TemporalFunction(com.facebook.presto.raptor.storage.organization.TemporalFunction) Cache(com.google.common.cache.Cache) MBeanExporter(org.weakref.jmx.MBeanExporter) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Example 9 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class AbstractTestOrcReader method testCaching.

@Test
public void testCaching() throws Exception {
    Cache<OrcDataSourceId, OrcFileTail> orcFileTailCache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, tail) -> ((OrcFileTail) tail).getFooterSize() + ((OrcFileTail) tail).getMetadataSize()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
    OrcFileTailSource orcFileTailSource = new CachingOrcFileTailSource(new StorageOrcFileTailSource(), orcFileTailCache);
    Cache<StripeId, Slice> stripeFootercache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, footer) -> ((Slice) footer).length()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
    Cache<StripeStreamId, Slice> stripeStreamCache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, stream) -> ((Slice) stream).length()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
    Optional<Cache<StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build());
    StripeMetadataSource stripeMetadataSource = new CachingStripeMetadataSource(new StorageStripeMetadataSource(), stripeFootercache, stripeStreamCache, rowGroupIndexCache);
    try (TempFile tempFile = createTempFile(10001)) {
        OrcBatchRecordReader storageReader = createCustomOrcRecordReader(tempFile, ORC, OrcPredicate.TRUE, ImmutableList.of(BIGINT), INITIAL_BATCH_SIZE, orcFileTailSource, stripeMetadataSource, true, ImmutableMap.of(), false);
        assertEquals(orcFileTailCache.stats().missCount(), 1);
        assertEquals(orcFileTailCache.stats().hitCount(), 0);
        OrcBatchRecordReader cacheReader = createCustomOrcRecordReader(tempFile, ORC, OrcPredicate.TRUE, ImmutableList.of(BIGINT), INITIAL_BATCH_SIZE, orcFileTailSource, stripeMetadataSource, true, ImmutableMap.of(), false);
        assertEquals(orcFileTailCache.stats().missCount(), 1);
        assertEquals(orcFileTailCache.stats().hitCount(), 1);
        assertEquals(storageReader.getRetainedSizeInBytes(), cacheReader.getRetainedSizeInBytes());
        assertEquals(storageReader.getFileRowCount(), cacheReader.getFileRowCount());
        assertEquals(storageReader.getSplitLength(), cacheReader.getSplitLength());
        storageReader.nextBatch();
        assertEquals(stripeFootercache.stats().missCount(), 1);
        assertEquals(stripeFootercache.stats().hitCount(), 0);
        assertEquals(stripeStreamCache.stats().missCount(), 2);
        assertEquals(stripeStreamCache.stats().hitCount(), 0);
        assertEquals(rowGroupIndexCache.get().stats().missCount(), 1);
        assertEquals(rowGroupIndexCache.get().stats().hitCount(), 0);
        cacheReader.nextBatch();
        assertEquals(stripeFootercache.stats().missCount(), 1);
        assertEquals(stripeFootercache.stats().hitCount(), 1);
        assertEquals(stripeStreamCache.stats().missCount(), 2);
        assertEquals(stripeStreamCache.stats().hitCount(), 2);
        assertEquals(rowGroupIndexCache.get().stats().missCount(), 1);
        assertEquals(rowGroupIndexCache.get().stats().hitCount(), 1);
        assertEquals(storageReader.readBlock(0).getInt(0), cacheReader.readBlock(0).getInt(0));
    }
}
Also used : RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) DateTimeZone(org.joda.time.DateTimeZone) StripeId(com.facebook.presto.orc.StripeReader.StripeId) Writable(org.apache.hadoop.io.Writable) Test(org.testng.annotations.Test) Random(java.util.Random) ORC_12(com.facebook.presto.orc.OrcTester.Format.ORC_12) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Duration(io.airlift.units.Duration) SESSION(com.facebook.presto.testing.TestingConnectorSession.SESSION) Iterables.concat(com.google.common.collect.Iterables.concat) Iterables.cycle(com.google.common.collect.Iterables.cycle) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) HIVE_STORAGE_TIME_ZONE(com.facebook.presto.orc.OrcTester.HIVE_STORAGE_TIME_ZONE) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcTester.createOrcRecordWriter(com.facebook.presto.orc.OrcTester.createOrcRecordWriter) BigInteger(java.math.BigInteger) SqlDecimal(com.facebook.presto.common.type.SqlDecimal) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Collections.nCopies(java.util.Collections.nCopies) BeforeClass(org.testng.annotations.BeforeClass) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) Range(com.google.common.collect.Range) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) Iterables.limit(com.google.common.collect.Iterables.limit) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Collectors(java.util.stream.Collectors) ORC(com.facebook.presto.orc.OrcEncoding.ORC) DataSize(io.airlift.units.DataSize) List(java.util.List) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) OrcTester.createCustomOrcRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcRecordReader) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) Builder(com.google.common.collect.ImmutableList.Builder) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) DecimalType(com.facebook.presto.common.type.DecimalType) ContiguousSet(com.google.common.collect.ContiguousSet) Slice(io.airlift.slice.Slice) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) DateTimeTestingUtils.sqlTimestampOf(com.facebook.presto.testing.DateTimeTestingUtils.sqlTimestampOf) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) Assert.assertEquals(org.testng.Assert.assertEquals) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) MINUTES(java.util.concurrent.TimeUnit.MINUTES) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) Strings(com.google.common.base.Strings) SqlDate(com.facebook.presto.common.type.SqlDate) ImmutableList(com.google.common.collect.ImmutableList) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) SqlVarbinary(com.facebook.presto.common.type.SqlVarbinary) DiscreteDomain(com.google.common.collect.DiscreteDomain) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CharType(com.facebook.presto.common.type.CharType) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) Iterator(java.util.Iterator) UTF_8(java.nio.charset.StandardCharsets.UTF_8) AbstractIterator(com.google.common.collect.AbstractIterator) IOException(java.io.IOException) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Collectors.toList(java.util.stream.Collectors.toList) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) Serializer(org.apache.hadoop.hive.serde2.Serializer) Cache(com.google.common.cache.Cache) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) Collections(java.util.Collections) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Duration(io.airlift.units.Duration) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) StripeId(com.facebook.presto.orc.StripeReader.StripeId) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) DataSize(io.airlift.units.DataSize) List(java.util.List) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) Cache(com.google.common.cache.Cache) Test(org.testng.annotations.Test)

Example 10 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class TestOrcRecordReaderDwrfStripeCaching method assertFileContentCachingEnabled.

private void assertFileContentCachingEnabled(File orcFile, List<DiskRange> forbiddenRanges) throws IOException {
    try (TestingOrcDataSource orcDataSource = new TestingOrcDataSource(createFileOrcDataSource(orcFile))) {
        StripeMetadataSourceFactory delegateSourceFactory = StripeMetadataSourceFactory.of(new StorageStripeMetadataSource());
        DwrfAwareStripeMetadataSourceFactory dwrfAwareFactory = new DwrfAwareStripeMetadataSourceFactory(delegateSourceFactory);
        // set zeroes to avoid file caching and merging of small disk ranges
        OrcReaderOptions orcReaderOptions = new OrcReaderOptions(new DataSize(0, MEGABYTE), new DataSize(0, MEGABYTE), new DataSize(1, MEGABYTE), false);
        OrcReader orcReader = new OrcReader(orcDataSource, DWRF, new StorageOrcFileTailSource(READ_TAIL_SIZE_IN_BYTES, true), dwrfAwareFactory, NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, orcReaderOptions, false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
        assertRecordValues(orcDataSource, orcReader);
        // check that the reader used the cache to read stripe indexes and footers
        assertForbiddenRanges(orcDataSource, forbiddenRanges);
    }
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) DataSize(io.airlift.units.DataSize) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource)

Aggregations

StorageOrcFileTailSource (com.facebook.presto.orc.cache.StorageOrcFileTailSource)32 RuntimeStats (com.facebook.presto.common.RuntimeStats)15 Test (org.testng.annotations.Test)15 StorageStripeMetadataSource (com.facebook.presto.orc.StorageStripeMetadataSource)13 DataSize (io.airlift.units.DataSize)12 OrcBatchPageSourceFactory (com.facebook.presto.hive.orc.OrcBatchPageSourceFactory)8 ParquetPageSourceFactory (com.facebook.presto.hive.parquet.ParquetPageSourceFactory)7 RcFilePageSourceFactory (com.facebook.presto.hive.rcfile.RcFilePageSourceFactory)7 OrcFileTail (com.facebook.presto.orc.metadata.OrcFileTail)7 List (java.util.List)7 Type (com.facebook.presto.common.type.Type)6 ImmutableList (com.google.common.collect.ImmutableList)6 Optional (java.util.Optional)6 CacheConfig (com.facebook.presto.cache.CacheConfig)5 ArrayType (com.facebook.presto.common.type.ArrayType)5 StripeMetadataSourceFactory (com.facebook.presto.orc.StripeMetadataSourceFactory)5 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)4 BOOLEAN (com.facebook.presto.common.type.BooleanType.BOOLEAN)4 DOUBLE (com.facebook.presto.common.type.DoubleType.DOUBLE)4 INTEGER (com.facebook.presto.common.type.IntegerType.INTEGER)4