use of com.facebook.presto.hive.HiveSplit.BucketConversion in project presto by prestodb.
the class HivePageSourceProvider method createSelectivePageSource.
private static Optional<ConnectorPageSource> createSelectivePageSource(Set<HiveSelectivePageSourceFactory> selectivePageSourceFactories, Configuration configuration, ConnectorSession session, HiveSplit split, HiveTableLayoutHandle layout, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, LoadingCache<RowExpressionCacheKey, RowExpression> rowExpressionCache, SplitContext splitContext, Optional<EncryptionInformation> encryptionInformation) {
Set<HiveColumnHandle> interimColumns = ImmutableSet.<HiveColumnHandle>builder().addAll(layout.getPredicateColumns().values()).addAll(split.getBucketConversion().map(BucketConversion::getBucketColumnHandles).orElse(ImmutableList.of())).build();
Set<String> columnNames = columns.stream().map(HiveColumnHandle::getName).collect(toImmutableSet());
List<HiveColumnHandle> allColumns = ImmutableList.<HiveColumnHandle>builder().addAll(columns).addAll(interimColumns.stream().filter(column -> !columnNames.contains(column.getName())).collect(toImmutableList())).build();
Path path = new Path(split.getPath());
List<ColumnMapping> columnMappings = ColumnMapping.buildColumnMappings(split.getPartitionKeys(), allColumns, ImmutableList.of(), split.getTableToPartitionMapping(), path, split.getTableBucketNumber(), split.getFileSize(), split.getFileModifiedTime());
Optional<BucketAdaptation> bucketAdaptation = split.getBucketConversion().map(conversion -> toBucketAdaptation(conversion, columnMappings, split.getTableBucketNumber(), mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex()));
Map<Integer, String> prefilledValues = columnMappings.stream().filter(mapping -> mapping.getKind() == ColumnMappingKind.PREFILLED).collect(toImmutableMap(mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex(), ColumnMapping::getPrefilledValue));
Map<Integer, HiveCoercer> coercers = columnMappings.stream().filter(mapping -> mapping.getCoercionFrom().isPresent()).collect(toImmutableMap(mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex(), mapping -> createCoercer(typeManager, mapping.getCoercionFrom().get(), mapping.getHiveColumnHandle().getHiveType())));
List<Integer> outputColumns = columns.stream().map(HiveColumnHandle::getHiveColumnIndex).collect(toImmutableList());
RowExpression optimizedRemainingPredicate = rowExpressionCache.getUnchecked(new RowExpressionCacheKey(layout.getRemainingPredicate(), session));
if (shouldSkipBucket(layout, split, splitContext)) {
return Optional.of(new HiveEmptySplitPageSource());
}
if (shouldSkipPartition(typeManager, layout, hiveStorageTimeZone, split, splitContext)) {
return Optional.of(new HiveEmptySplitPageSource());
}
CacheQuota cacheQuota = generateCacheQuota(split);
for (HiveSelectivePageSourceFactory pageSourceFactory : selectivePageSourceFactories) {
Optional<? extends ConnectorPageSource> pageSource = pageSourceFactory.createPageSource(configuration, session, path, split.getStart(), split.getLength(), split.getFileSize(), split.getStorage(), toColumnHandles(columnMappings, true), prefilledValues, coercers, bucketAdaptation, outputColumns, splitContext.getDynamicFilterPredicate().map(filter -> filter.transform(handle -> new Subfield(((HiveColumnHandle) handle).getName())).intersect(layout.getDomainPredicate())).orElse(layout.getDomainPredicate()), optimizedRemainingPredicate, hiveStorageTimeZone, new HiveFileContext(splitContext.isCacheable(), cacheQuota, split.getExtraFileInfo().map(BinaryExtraHiveFileInfo::new), Optional.of(split.getFileSize()), split.getFileModifiedTime(), HiveSessionProperties.isVerboseRuntimeStatsEnabled(session)), encryptionInformation);
if (pageSource.isPresent()) {
return Optional.of(pageSource.get());
}
}
return Optional.empty();
}
Aggregations