Search in sources :

Example 11 with RowGroupIndex

use of com.facebook.presto.orc.metadata.RowGroupIndex in project presto by prestodb.

the class StructColumnWriter method getIndexStreams.

@Override
public List<StreamDataOutput> getIndexStreams() throws IOException {
    checkState(closed);
    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createStructColumnPositionList(compressed, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }
    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(column, StreamKind.ROW_INDEX, slice.length(), false);
    ImmutableList.Builder<StreamDataOutput> indexStreams = ImmutableList.builder();
    indexStreams.add(new StreamDataOutput(slice, stream));
    for (ColumnWriter structField : structFields) {
        indexStreams.addAll(structField.getIndexStreams());
    }
    return indexStreams.build();
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) BooleanStreamCheckpoint(com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint) ImmutableList(com.google.common.collect.ImmutableList) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) BooleanStreamCheckpoint(com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) PresentOutputStream(com.facebook.presto.orc.stream.PresentOutputStream) Stream(com.facebook.presto.orc.metadata.Stream)

Example 12 with RowGroupIndex

use of com.facebook.presto.orc.metadata.RowGroupIndex in project presto by prestodb.

the class HiveClientModule method createStripeMetadataSourceFactory.

@Singleton
@Provides
public StripeMetadataSourceFactory createStripeMetadataSourceFactory(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
    StripeMetadataSource stripeMetadataSource = new StorageStripeMetadataSource();
    if (orcCacheConfig.isStripeMetadataCacheEnabled()) {
        Cache<StripeId, Slice> footerCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeFooterCacheSize().toBytes()).weigher((id, footer) -> toIntExact(((Slice) footer).getRetainedSize())).expireAfterAccess(orcCacheConfig.getStripeFooterCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
        Cache<StripeStreamId, Slice> streamCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeStreamCacheSize().toBytes()).weigher((id, stream) -> toIntExact(((Slice) stream).getRetainedSize())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
        CacheStatsMBean footerCacheStatsMBean = new CacheStatsMBean(footerCache);
        CacheStatsMBean streamCacheStatsMBean = new CacheStatsMBean(streamCache);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeFooter"), footerCacheStatsMBean);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStream"), streamCacheStatsMBean);
        Optional<Cache<StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.empty();
        if (orcCacheConfig.isRowGroupIndexCacheEnabled()) {
            rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getRowGroupIndexCacheSize().toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build());
            CacheStatsMBean rowGroupIndexCacheStatsMBean = new CacheStatsMBean(rowGroupIndexCache.get());
            exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStreamRowGroupIndex"), rowGroupIndexCacheStatsMBean);
        }
        stripeMetadataSource = new CachingStripeMetadataSource(stripeMetadataSource, footerCache, streamCache, rowGroupIndexCache);
    }
    StripeMetadataSourceFactory factory = StripeMetadataSourceFactory.of(stripeMetadataSource);
    if (orcCacheConfig.isDwrfStripeCacheEnabled()) {
        factory = new DwrfAwareStripeMetadataSourceFactory(factory);
    }
    return factory;
}
Also used : StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) Module(com.google.inject.Module) PrestoS3ClientFactory(com.facebook.presto.hive.s3.PrestoS3ClientFactory) StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) StripeId(com.facebook.presto.orc.StripeReader.StripeId) OrcCacheConfig(com.facebook.presto.orc.cache.OrcCacheConfig) ConnectorNodePartitioningProvider(com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider) DwrfSelectivePageSourceFactory(com.facebook.presto.hive.orc.DwrfSelectivePageSourceFactory) EncryptionLibrary(com.facebook.presto.orc.EncryptionLibrary) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) ExportBinder.newExporter(org.weakref.jmx.guice.ExportBinder.newExporter) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) Multibinder.newSetBinder(com.google.inject.multibindings.Multibinder.newSetBinder) OrcSelectivePageSourceFactory(com.facebook.presto.hive.orc.OrcSelectivePageSourceFactory) TupleDomainFilterCache(com.facebook.presto.hive.orc.TupleDomainFilterCache) EventClient(com.facebook.airlift.event.client.EventClient) ParquetFileMetadata(com.facebook.presto.parquet.cache.ParquetFileMetadata) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PageFileWriterFactory(com.facebook.presto.hive.pagefile.PageFileWriterFactory) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ConnectorSplitManager(com.facebook.presto.spi.connector.ConnectorSplitManager) ParquetMetadataSource(com.facebook.presto.parquet.cache.ParquetMetadataSource) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Multibinder(com.google.inject.multibindings.Multibinder) Threads.daemonThreadsNamed(com.facebook.airlift.concurrent.Threads.daemonThreadsNamed) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) List(java.util.List) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) Optional(java.util.Optional) ParquetCacheConfig(com.facebook.presto.parquet.cache.ParquetCacheConfig) CacheBuilder(com.google.common.cache.CacheBuilder) TypeLiteral(com.google.inject.TypeLiteral) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) MoreExecutors.listeningDecorator(com.google.common.util.concurrent.MoreExecutors.listeningDecorator) ParquetSelectivePageSourceFactory(com.facebook.presto.hive.parquet.ParquetSelectivePageSourceFactory) Slice(io.airlift.slice.Slice) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) HivePlanOptimizerProvider(com.facebook.presto.hive.rule.HivePlanOptimizerProvider) DataSinkFactory(com.facebook.presto.hive.datasink.DataSinkFactory) Singleton(javax.inject.Singleton) Supplier(java.util.function.Supplier) JsonCodecBinder.jsonCodecBinder(com.facebook.airlift.json.JsonCodecBinder.jsonCodecBinder) ConnectorPlanOptimizerProvider(com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider) BoundedExecutor(com.facebook.airlift.concurrent.BoundedExecutor) Binder(com.google.inject.Binder) PageFilePageSourceFactory(com.facebook.presto.hive.pagefile.PageFilePageSourceFactory) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) ParquetDataSourceId(com.facebook.presto.parquet.ParquetDataSourceId) ExecutorServiceAdapter(com.facebook.airlift.concurrent.ExecutorServiceAdapter) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) ConnectorPageSinkProvider(com.facebook.presto.spi.connector.ConnectorPageSinkProvider) ExecutorService(java.util.concurrent.ExecutorService) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) ObjectNames.generatedNameOf(org.weakref.jmx.ObjectNames.generatedNameOf) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) ForUnknown(com.facebook.presto.hive.HiveDwrfEncryptionProvider.ForUnknown) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) UnsupportedEncryptionLibrary(com.facebook.presto.orc.UnsupportedEncryptionLibrary) Scopes(com.google.inject.Scopes) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) HiveCachingHdfsConfiguration(com.facebook.presto.hive.cache.HiveCachingHdfsConfiguration) HivePartitionMutator(com.facebook.presto.hive.metastore.HivePartitionMutator) CachingParquetMetadataSource(com.facebook.presto.parquet.cache.CachingParquetMetadataSource) ConnectorMetadataUpdaterProvider(com.facebook.presto.spi.connector.ConnectorMetadataUpdaterProvider) SmileCodecBinder.smileCodecBinder(com.facebook.airlift.json.smile.SmileCodecBinder.smileCodecBinder) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Provides(com.google.inject.Provides) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) ForCachingFileSystem(com.facebook.presto.cache.ForCachingFileSystem) ConfigBinder.configBinder(com.facebook.airlift.configuration.ConfigBinder.configBinder) ForCryptoService(com.facebook.presto.hive.HiveDwrfEncryptionProvider.ForCryptoService) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) Cache(com.google.common.cache.Cache) MBeanExporter(org.weakref.jmx.MBeanExporter) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StripeId(com.facebook.presto.orc.StripeReader.StripeId) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) TupleDomainFilterCache(com.facebook.presto.hive.orc.TupleDomainFilterCache) Cache(com.google.common.cache.Cache) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Example 13 with RowGroupIndex

use of com.facebook.presto.orc.metadata.RowGroupIndex in project presto by prestodb.

the class StripeReader method readColumnIndexes.

private Map<Integer, List<RowGroupIndex>> readColumnIndexes(Map<StreamId, Stream> streams, Map<StreamId, OrcInputStream> streamsData, Map<Integer, List<HiveBloomFilter>> bloomFilterIndexes) throws IOException {
    ImmutableMap.Builder<Integer, List<RowGroupIndex>> columnIndexes = ImmutableMap.builder();
    for (Entry<StreamId, Stream> entry : streams.entrySet()) {
        Stream stream = entry.getValue();
        if (stream.getStreamKind() == ROW_INDEX) {
            OrcInputStream inputStream = streamsData.get(entry.getKey());
            List<HiveBloomFilter> bloomFilters = bloomFilterIndexes.get(stream.getColumn());
            List<RowGroupIndex> rowGroupIndexes = metadataReader.readRowIndexes(hiveWriterVersion, inputStream);
            if (bloomFilters != null && !bloomFilters.isEmpty()) {
                ImmutableList.Builder<RowGroupIndex> newRowGroupIndexes = ImmutableList.builder();
                for (int i = 0; i < rowGroupIndexes.size(); i++) {
                    RowGroupIndex rowGroupIndex = rowGroupIndexes.get(i);
                    ColumnStatistics columnStatistics = rowGroupIndex.getColumnStatistics().withBloomFilter(bloomFilters.get(i));
                    newRowGroupIndexes.add(new RowGroupIndex(rowGroupIndex.getPositions(), columnStatistics));
                }
                rowGroupIndexes = newRowGroupIndexes.build();
            }
            columnIndexes.put(stream.getColumn(), rowGroupIndexes);
        }
    }
    return columnIndexes.build();
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.ColumnStatistics) OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) ImmutableList(com.google.common.collect.ImmutableList) ImmutableMap(com.google.common.collect.ImmutableMap) Checkpoints.getDictionaryStreamCheckpoint(com.facebook.presto.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint) StreamCheckpoint(com.facebook.presto.orc.checkpoint.StreamCheckpoint) HiveBloomFilter(com.facebook.presto.orc.metadata.HiveBloomFilter) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) ValueStream(com.facebook.presto.orc.stream.ValueStream) OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) Stream(com.facebook.presto.orc.metadata.Stream) InputStream(java.io.InputStream)

Example 14 with RowGroupIndex

use of com.facebook.presto.orc.metadata.RowGroupIndex in project presto by prestodb.

the class IcebergModule method createStripeMetadataSourceFactory.

@Singleton
@Provides
public StripeMetadataSourceFactory createStripeMetadataSourceFactory(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
    StripeMetadataSource stripeMetadataSource = new StorageStripeMetadataSource();
    if (orcCacheConfig.isStripeMetadataCacheEnabled()) {
        Cache<StripeReader.StripeId, Slice> footerCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeFooterCacheSize().toBytes()).weigher((id, footer) -> toIntExact(((Slice) footer).getRetainedSize())).expireAfterAccess(orcCacheConfig.getStripeFooterCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
        Cache<StripeReader.StripeStreamId, Slice> streamCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeStreamCacheSize().toBytes()).weigher((id, stream) -> toIntExact(((Slice) stream).getRetainedSize())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
        CacheStatsMBean footerCacheStatsMBean = new CacheStatsMBean(footerCache);
        CacheStatsMBean streamCacheStatsMBean = new CacheStatsMBean(streamCache);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeFooter"), footerCacheStatsMBean);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStream"), streamCacheStatsMBean);
        Optional<Cache<StripeReader.StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.empty();
        if (orcCacheConfig.isRowGroupIndexCacheEnabled()) {
            rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getRowGroupIndexCacheSize().toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build());
            CacheStatsMBean rowGroupIndexCacheStatsMBean = new CacheStatsMBean(rowGroupIndexCache.get());
            exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStreamRowGroupIndex"), rowGroupIndexCacheStatsMBean);
        }
        stripeMetadataSource = new CachingStripeMetadataSource(stripeMetadataSource, footerCache, streamCache, rowGroupIndexCache);
    }
    StripeMetadataSourceFactory factory = StripeMetadataSourceFactory.of(stripeMetadataSource);
    if (orcCacheConfig.isDwrfStripeCacheEnabled()) {
        factory = new DwrfAwareStripeMetadataSourceFactory(factory);
    }
    return factory;
}
Also used : StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) Module(com.google.inject.Module) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) TestingMBeanServer(org.weakref.jmx.testing.TestingMBeanServer) OrcCacheConfig(com.facebook.presto.orc.cache.OrcCacheConfig) CacheConfig(com.facebook.presto.cache.CacheConfig) ConnectorNodePartitioningProvider(com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider) ExtendedHiveMetastore(com.facebook.presto.hive.metastore.ExtendedHiveMetastore) EncryptionLibrary(com.facebook.presto.orc.EncryptionLibrary) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) ExportBinder.newExporter(org.weakref.jmx.guice.ExportBinder.newExporter) DynamicConfigurationProvider(com.facebook.presto.hive.DynamicConfigurationProvider) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) Multibinder.newSetBinder(com.google.inject.multibindings.Multibinder.newSetBinder) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) HiveHdfsConfiguration(com.facebook.presto.hive.HiveHdfsConfiguration) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ConnectorSplitManager(com.facebook.presto.spi.connector.ConnectorSplitManager) PartitionMutator(com.facebook.presto.hive.PartitionMutator) HiveDwrfEncryptionProvider(com.facebook.presto.hive.HiveDwrfEncryptionProvider) GcsConfigurationInitializer(com.facebook.presto.hive.gcs.GcsConfigurationInitializer) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Multibinder(com.google.inject.multibindings.Multibinder) HdfsConfigurationInitializer(com.facebook.presto.hive.HdfsConfigurationInitializer) Threads.daemonThreadsNamed(com.facebook.airlift.concurrent.Threads.daemonThreadsNamed) CacheFactory(com.facebook.presto.cache.CacheFactory) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) Procedure(com.facebook.presto.spi.procedure.Procedure) List(java.util.List) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StripeReader(com.facebook.presto.orc.StripeReader) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) MetastoreConfig(com.facebook.presto.hive.metastore.MetastoreConfig) FileMergeCacheConfig(com.facebook.presto.cache.filemerge.FileMergeCacheConfig) Slice(io.airlift.slice.Slice) HiveGcsConfigurationInitializer(com.facebook.presto.hive.gcs.HiveGcsConfigurationInitializer) ForCachingHiveMetastore(com.facebook.presto.hive.ForCachingHiveMetastore) ParquetFileWriterConfig(com.facebook.presto.hive.ParquetFileWriterConfig) Singleton(javax.inject.Singleton) JsonCodecBinder.jsonCodecBinder(com.facebook.airlift.json.JsonCodecBinder.jsonCodecBinder) HdfsConfiguration(com.facebook.presto.hive.HdfsConfiguration) Binder(com.google.inject.Binder) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) MBeanServer(javax.management.MBeanServer) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) ConnectorPageSinkProvider(com.facebook.presto.spi.connector.ConnectorPageSinkProvider) HiveNodePartitioningProvider(com.facebook.presto.hive.HiveNodePartitioningProvider) MetastoreClientConfig(com.facebook.presto.hive.MetastoreClientConfig) ExecutorService(java.util.concurrent.ExecutorService) ForMetastoreHdfsEnvironment(com.facebook.presto.hive.ForMetastoreHdfsEnvironment) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) CacheStats(com.facebook.presto.cache.CacheStats) ObjectNames.generatedNameOf(org.weakref.jmx.ObjectNames.generatedNameOf) IcebergPlanOptimizer(com.facebook.presto.iceberg.optimizer.IcebergPlanOptimizer) CachingHiveMetastore(com.facebook.presto.hive.metastore.CachingHiveMetastore) OrcFileWriterConfig(com.facebook.presto.hive.OrcFileWriterConfig) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) UnsupportedEncryptionLibrary(com.facebook.presto.orc.UnsupportedEncryptionLibrary) Scopes(com.google.inject.Scopes) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) HiveCachingHdfsConfiguration(com.facebook.presto.hive.cache.HiveCachingHdfsConfiguration) HivePartitionMutator(com.facebook.presto.hive.metastore.HivePartitionMutator) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Provides(com.google.inject.Provides) HiveGcsConfig(com.facebook.presto.hive.gcs.HiveGcsConfig) ForCachingFileSystem(com.facebook.presto.cache.ForCachingFileSystem) ConfigBinder.configBinder(com.facebook.airlift.configuration.ConfigBinder.configBinder) Cache(com.google.common.cache.Cache) MBeanExporter(org.weakref.jmx.MBeanExporter) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StripeReader(com.facebook.presto.orc.StripeReader) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) CacheStatsMBean(com.facebook.presto.hive.CacheStatsMBean) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) Cache(com.google.common.cache.Cache) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Example 15 with RowGroupIndex

use of com.facebook.presto.orc.metadata.RowGroupIndex in project presto by prestodb.

the class LongColumnWriter method getIndexStreams.

@Override
public List<StreamDataOutput> getIndexStreams() throws IOException {
    checkState(closed);
    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
    List<LongStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
        LongStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createLongColumnPositionList(compressed, dataCheckpoint, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }
    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(column, StreamKind.ROW_INDEX, slice.length(), false);
    return ImmutableList.of(new StreamDataOutput(slice, stream));
}
Also used : ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) BooleanStreamCheckpoint(com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint) ImmutableList(com.google.common.collect.ImmutableList) StreamDataOutput(com.facebook.presto.orc.stream.StreamDataOutput) LongStreamCheckpoint(com.facebook.presto.orc.checkpoint.LongStreamCheckpoint) LongStreamCheckpoint(com.facebook.presto.orc.checkpoint.LongStreamCheckpoint) BooleanStreamCheckpoint(com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) PresentOutputStream(com.facebook.presto.orc.stream.PresentOutputStream) LongOutputStream(com.facebook.presto.orc.stream.LongOutputStream) Stream(com.facebook.presto.orc.metadata.Stream)

Aggregations

RowGroupIndex (com.facebook.presto.orc.metadata.RowGroupIndex)23 List (java.util.List)21 Slice (io.airlift.slice.Slice)19 ImmutableList (com.google.common.collect.ImmutableList)18 Stream (com.facebook.presto.orc.metadata.Stream)17 ArrayList (java.util.ArrayList)17 ColumnStatistics (com.facebook.presto.orc.metadata.statistics.ColumnStatistics)15 BooleanStreamCheckpoint (com.facebook.presto.orc.checkpoint.BooleanStreamCheckpoint)12 PresentOutputStream (com.facebook.presto.orc.stream.PresentOutputStream)12 StreamDataOutput (com.facebook.presto.orc.stream.StreamDataOutput)12 LongStreamCheckpoint (com.facebook.presto.orc.checkpoint.LongStreamCheckpoint)7 LongOutputStream (com.facebook.presto.orc.stream.LongOutputStream)7 ImmutableMap (com.google.common.collect.ImmutableMap)7 Math.toIntExact (java.lang.Math.toIntExact)6 Optional (java.util.Optional)6 OrcInputStream (com.facebook.presto.orc.stream.OrcInputStream)5 InputStream (java.io.InputStream)5 CachingOrcFileTailSource (com.facebook.presto.orc.cache.CachingOrcFileTailSource)4 OrcFileTailSource (com.facebook.presto.orc.cache.OrcFileTailSource)4 StorageOrcFileTailSource (com.facebook.presto.orc.cache.StorageOrcFileTailSource)4