use of com.facebook.presto.orc.metadata.RowGroupIndex in project presto by prestodb.
the class StripeReader method createRowGroup.
@VisibleForTesting
static RowGroup createRowGroup(int groupId, long rowsInStripe, long rowsInRowGroup, Map<StreamId, List<RowGroupIndex>> columnIndexes, Map<StreamId, ValueInputStream<?>> valueStreams, Map<StreamId, StreamCheckpoint> checkpoints) {
long totalRowGroupBytes = columnIndexes.values().stream().mapToLong(e -> e.get(groupId).getColumnStatistics().getTotalValueSizeInBytes()).sum();
long rowOffset = multiplyExact(groupId, rowsInRowGroup);
int rowCount = toIntExact(Math.min(rowsInStripe - rowOffset, rowsInRowGroup));
ImmutableMap.Builder<StreamId, InputStreamSource<?>> builder = ImmutableMap.builder();
for (Entry<StreamId, StreamCheckpoint> entry : checkpoints.entrySet()) {
StreamId streamId = entry.getKey();
StreamCheckpoint checkpoint = entry.getValue();
// skip streams without data
ValueInputStream<?> valueStream = valueStreams.get(streamId);
if (valueStream == null) {
continue;
}
builder.put(streamId, createCheckpointStreamSource(valueStream, checkpoint));
}
InputStreamSources rowGroupStreams = new InputStreamSources(builder.build());
return new RowGroup(groupId, rowOffset, rowCount, totalRowGroupBytes, rowGroupStreams);
}
use of com.facebook.presto.orc.metadata.RowGroupIndex in project presto by prestodb.
the class StripeReader method getRowGroupStatistics.
private static Map<Integer, ColumnStatistics> getRowGroupStatistics(OrcType rootStructType, Map<StreamId, List<RowGroupIndex>> columnIndexes, int rowGroup) {
requireNonNull(rootStructType, "rootStructType is null");
checkArgument(rootStructType.getOrcTypeKind() == STRUCT);
requireNonNull(columnIndexes, "columnIndexes is null");
checkArgument(rowGroup >= 0, "rowGroup is negative");
Map<Integer, List<ColumnStatistics>> groupedColumnStatistics = new HashMap<>();
for (Entry<StreamId, List<RowGroupIndex>> entry : columnIndexes.entrySet()) {
if (!entry.getValue().isEmpty() && entry.getValue().get(rowGroup) != null) {
groupedColumnStatistics.computeIfAbsent(entry.getKey().getColumn(), key -> new ArrayList<>()).add(entry.getValue().get(rowGroup).getColumnStatistics());
}
}
ImmutableMap.Builder<Integer, ColumnStatistics> statistics = ImmutableMap.builder();
for (int ordinal = 0; ordinal < rootStructType.getFieldCount(); ordinal++) {
List<ColumnStatistics> columnStatistics = groupedColumnStatistics.get(rootStructType.getFieldTypeIndex(ordinal));
if (columnStatistics != null) {
if (columnStatistics.size() == 1) {
statistics.put(ordinal, getOnlyElement(columnStatistics));
} else {
// Merge statistics from different streams
// This can happen if map is represented as struct (DWRF only)
statistics.put(ordinal, mergeColumnStatistics(columnStatistics));
}
}
}
return statistics.build();
}
use of com.facebook.presto.orc.metadata.RowGroupIndex in project presto by prestodb.
the class StorageModule method createStripeMetadataSourceFactory.
@Singleton
@Provides
public StripeMetadataSourceFactory createStripeMetadataSourceFactory(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
StripeMetadataSource stripeMetadataSource = new StorageStripeMetadataSource();
if (orcCacheConfig.isStripeMetadataCacheEnabled()) {
Cache<StripeId, Slice> footerCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeFooterCacheSize().toBytes()).weigher((id, footer) -> ((Slice) footer).length()).expireAfterAccess(orcCacheConfig.getStripeFooterCacheTtlSinceLastAccess().toMillis(), TimeUnit.MILLISECONDS).recordStats().build();
Cache<StripeStreamId, Slice> streamCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeStreamCacheSize().toBytes()).weigher((id, stream) -> ((Slice) stream).length()).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), TimeUnit.MILLISECONDS).recordStats().build();
CacheStatsMBean footerCacheStatsMBean = new CacheStatsMBean(footerCache);
CacheStatsMBean streamCacheStatsMBean = new CacheStatsMBean(streamCache);
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeFooter"), footerCacheStatsMBean);
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStream"), streamCacheStatsMBean);
Optional<Cache<StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.empty();
if (orcCacheConfig.isRowGroupIndexCacheEnabled()) {
rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getRowGroupIndexCacheSize().toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build());
CacheStatsMBean rowGroupIndexCacheStatsMBean = new CacheStatsMBean(rowGroupIndexCache.get());
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStreamRowGroupIndex"), rowGroupIndexCacheStatsMBean);
}
stripeMetadataSource = new CachingStripeMetadataSource(stripeMetadataSource, footerCache, streamCache, rowGroupIndexCache);
}
StripeMetadataSourceFactory factory = StripeMetadataSourceFactory.of(stripeMetadataSource);
if (orcCacheConfig.isDwrfStripeCacheEnabled()) {
factory = new DwrfAwareStripeMetadataSourceFactory(factory);
}
return factory;
}
Aggregations