use of com.facebook.presto.orc.metadata.RowGroupIndex in project presto by prestodb.
the class StructColumnWriter method getIndexStreams.
@Override
public List<StreamDataOutput> getIndexStreams() throws IOException {
checkState(closed);
ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
int groupId = i;
ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
List<Integer> positions = createStructColumnPositionList(compressed, presentCheckpoint);
rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
}
Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
Stream stream = new Stream(column, StreamKind.ROW_INDEX, slice.length(), false);
ImmutableList.Builder<StreamDataOutput> indexStreams = ImmutableList.builder();
indexStreams.add(new StreamDataOutput(slice, stream));
for (ColumnWriter structField : structFields) {
indexStreams.addAll(structField.getIndexStreams());
}
return indexStreams.build();
}
use of com.facebook.presto.orc.metadata.RowGroupIndex in project presto by prestodb.
the class HiveClientModule method createStripeMetadataSourceFactory.
@Singleton
@Provides
public StripeMetadataSourceFactory createStripeMetadataSourceFactory(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
StripeMetadataSource stripeMetadataSource = new StorageStripeMetadataSource();
if (orcCacheConfig.isStripeMetadataCacheEnabled()) {
Cache<StripeId, Slice> footerCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeFooterCacheSize().toBytes()).weigher((id, footer) -> toIntExact(((Slice) footer).getRetainedSize())).expireAfterAccess(orcCacheConfig.getStripeFooterCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
Cache<StripeStreamId, Slice> streamCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeStreamCacheSize().toBytes()).weigher((id, stream) -> toIntExact(((Slice) stream).getRetainedSize())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
CacheStatsMBean footerCacheStatsMBean = new CacheStatsMBean(footerCache);
CacheStatsMBean streamCacheStatsMBean = new CacheStatsMBean(streamCache);
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeFooter"), footerCacheStatsMBean);
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStream"), streamCacheStatsMBean);
Optional<Cache<StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.empty();
if (orcCacheConfig.isRowGroupIndexCacheEnabled()) {
rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getRowGroupIndexCacheSize().toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build());
CacheStatsMBean rowGroupIndexCacheStatsMBean = new CacheStatsMBean(rowGroupIndexCache.get());
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStreamRowGroupIndex"), rowGroupIndexCacheStatsMBean);
}
stripeMetadataSource = new CachingStripeMetadataSource(stripeMetadataSource, footerCache, streamCache, rowGroupIndexCache);
}
StripeMetadataSourceFactory factory = StripeMetadataSourceFactory.of(stripeMetadataSource);
if (orcCacheConfig.isDwrfStripeCacheEnabled()) {
factory = new DwrfAwareStripeMetadataSourceFactory(factory);
}
return factory;
}
use of com.facebook.presto.orc.metadata.RowGroupIndex in project presto by prestodb.
the class StripeReader method readColumnIndexes.
private Map<Integer, List<RowGroupIndex>> readColumnIndexes(Map<StreamId, Stream> streams, Map<StreamId, OrcInputStream> streamsData, Map<Integer, List<HiveBloomFilter>> bloomFilterIndexes) throws IOException {
ImmutableMap.Builder<Integer, List<RowGroupIndex>> columnIndexes = ImmutableMap.builder();
for (Entry<StreamId, Stream> entry : streams.entrySet()) {
Stream stream = entry.getValue();
if (stream.getStreamKind() == ROW_INDEX) {
OrcInputStream inputStream = streamsData.get(entry.getKey());
List<HiveBloomFilter> bloomFilters = bloomFilterIndexes.get(stream.getColumn());
List<RowGroupIndex> rowGroupIndexes = metadataReader.readRowIndexes(hiveWriterVersion, inputStream);
if (bloomFilters != null && !bloomFilters.isEmpty()) {
ImmutableList.Builder<RowGroupIndex> newRowGroupIndexes = ImmutableList.builder();
for (int i = 0; i < rowGroupIndexes.size(); i++) {
RowGroupIndex rowGroupIndex = rowGroupIndexes.get(i);
ColumnStatistics columnStatistics = rowGroupIndex.getColumnStatistics().withBloomFilter(bloomFilters.get(i));
newRowGroupIndexes.add(new RowGroupIndex(rowGroupIndex.getPositions(), columnStatistics));
}
rowGroupIndexes = newRowGroupIndexes.build();
}
columnIndexes.put(stream.getColumn(), rowGroupIndexes);
}
}
return columnIndexes.build();
}
use of com.facebook.presto.orc.metadata.RowGroupIndex in project presto by prestodb.
the class IcebergModule method createStripeMetadataSourceFactory.
@Singleton
@Provides
public StripeMetadataSourceFactory createStripeMetadataSourceFactory(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
StripeMetadataSource stripeMetadataSource = new StorageStripeMetadataSource();
if (orcCacheConfig.isStripeMetadataCacheEnabled()) {
Cache<StripeReader.StripeId, Slice> footerCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeFooterCacheSize().toBytes()).weigher((id, footer) -> toIntExact(((Slice) footer).getRetainedSize())).expireAfterAccess(orcCacheConfig.getStripeFooterCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
Cache<StripeReader.StripeStreamId, Slice> streamCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeStreamCacheSize().toBytes()).weigher((id, stream) -> toIntExact(((Slice) stream).getRetainedSize())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
CacheStatsMBean footerCacheStatsMBean = new CacheStatsMBean(footerCache);
CacheStatsMBean streamCacheStatsMBean = new CacheStatsMBean(streamCache);
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeFooter"), footerCacheStatsMBean);
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStream"), streamCacheStatsMBean);
Optional<Cache<StripeReader.StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.empty();
if (orcCacheConfig.isRowGroupIndexCacheEnabled()) {
rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getRowGroupIndexCacheSize().toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build());
CacheStatsMBean rowGroupIndexCacheStatsMBean = new CacheStatsMBean(rowGroupIndexCache.get());
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStreamRowGroupIndex"), rowGroupIndexCacheStatsMBean);
}
stripeMetadataSource = new CachingStripeMetadataSource(stripeMetadataSource, footerCache, streamCache, rowGroupIndexCache);
}
StripeMetadataSourceFactory factory = StripeMetadataSourceFactory.of(stripeMetadataSource);
if (orcCacheConfig.isDwrfStripeCacheEnabled()) {
factory = new DwrfAwareStripeMetadataSourceFactory(factory);
}
return factory;
}
use of com.facebook.presto.orc.metadata.RowGroupIndex in project presto by prestodb.
the class LongColumnWriter method getIndexStreams.
@Override
public List<StreamDataOutput> getIndexStreams() throws IOException {
checkState(closed);
ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();
List<LongStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
int groupId = i;
ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
LongStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
List<Integer> positions = createLongColumnPositionList(compressed, dataCheckpoint, presentCheckpoint);
rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
}
Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
Stream stream = new Stream(column, StreamKind.ROW_INDEX, slice.length(), false);
return ImmutableList.of(new StreamDataOutput(slice, stream));
}
Aggregations