use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class HiveMetadata method getTableLayouts.
@Override
public List<ConnectorTableLayoutResult> getTableLayouts(ConnectorSession session, ConnectorTableHandle tableHandle, Constraint<ColumnHandle> constraint, Optional<Set<ColumnHandle>> desiredColumns) {
HiveTableHandle handle = (HiveTableHandle) tableHandle;
HivePartitionResult hivePartitionResult;
if (handle.getAnalyzePartitionValues().isPresent()) {
verify(constraint.getSummary().isAll(), "There shouldn't be any constraint for ANALYZE operation");
hivePartitionResult = partitionManager.getPartitions(metastore, tableHandle, handle.getAnalyzePartitionValues().get(), session);
} else {
hivePartitionResult = partitionManager.getPartitions(metastore, tableHandle, constraint, session);
}
Map<String, HiveColumnHandle> predicateColumns = hivePartitionResult.getEffectivePredicate().getDomains().get().keySet().stream().map(HiveColumnHandle.class::cast).collect(toImmutableMap(HiveColumnHandle::getName, Functions.identity()));
Optional<HiveBucketHandle> hiveBucketHandle = hivePartitionResult.getBucketHandle();
int virtualBucketCount = getVirtualBucketCount(session);
if (!hivePartitionResult.getBucketHandle().isPresent() && virtualBucketCount > 0) {
hiveBucketHandle = Optional.of(createVirtualBucketHandle(virtualBucketCount));
}
TupleDomain<Subfield> domainPredicate = hivePartitionResult.getEffectivePredicate().transform(HiveMetadata::toSubfield);
Table table = metastore.getTable(getMetastoreContext(session), handle.getSchemaTableName().getSchemaName(), handle.getSchemaTableName().getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
return ImmutableList.of(new ConnectorTableLayoutResult(getTableLayout(session, new HiveTableLayoutHandle(handle.getSchemaTableName(), table.getStorage().getLocation(), hivePartitionResult.getPartitionColumns(), // remove comments to optimize serialization costs
pruneColumnComments(hivePartitionResult.getDataColumns()), hivePartitionResult.getTableParameters(), hivePartitionResult.getPartitions(), domainPredicate, TRUE_CONSTANT, predicateColumns, hivePartitionResult.getEnforcedConstraint(), hiveBucketHandle, hivePartitionResult.getBucketFilter(), false, createTableLayoutString(session, handle.getSchemaTableName(), hivePartitionResult.getBucketHandle(), hivePartitionResult.getBucketFilter(), TRUE_CONSTANT, domainPredicate), desiredColumns.map(columns -> columns.stream().map(column -> (HiveColumnHandle) column).collect(toImmutableSet())), false)), hivePartitionResult.getUnenforcedConstraint()));
}
use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class HiveMetadata method getTableStatistics.
@Override
public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTableHandle tableHandle, Optional<ConnectorTableLayoutHandle> tableLayoutHandle, List<ColumnHandle> columnHandles, Constraint<ColumnHandle> constraint) {
if (!isStatisticsEnabled(session)) {
return TableStatistics.empty();
}
if (!tableLayoutHandle.isPresent() || !((HiveTableLayoutHandle) tableLayoutHandle.get()).isPushdownFilterEnabled()) {
Map<String, ColumnHandle> columns = columnHandles.stream().map(HiveColumnHandle.class::cast).filter(not(HiveColumnHandle::isHidden)).collect(toImmutableMap(HiveColumnHandle::getName, Function.identity()));
Map<String, Type> columnTypes = columns.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> getColumnMetadata(session, tableHandle, entry.getValue()).getType()));
List<HivePartition> partitions = partitionManager.getPartitions(metastore, tableHandle, constraint, session).getPartitions();
return hiveStatisticsProvider.getTableStatistics(session, ((HiveTableHandle) tableHandle).getSchemaTableName(), columns, columnTypes, partitions);
}
verify(!constraint.predicate().isPresent());
HiveTableLayoutHandle hiveLayoutHandle = (HiveTableLayoutHandle) tableLayoutHandle.get();
Set<String> columnNames = columnHandles.stream().map(HiveColumnHandle.class::cast).map(HiveColumnHandle::getName).collect(toImmutableSet());
Set<ColumnHandle> allColumnHandles = ImmutableSet.<ColumnHandle>builder().addAll(columnHandles).addAll(hiveLayoutHandle.getPredicateColumns().values().stream().filter(column -> !columnNames.contains(column.getName())).collect(toImmutableList())).build();
Map<String, ColumnHandle> allColumns = Maps.uniqueIndex(allColumnHandles, column -> ((HiveColumnHandle) column).getName());
Map<String, Type> allColumnTypes = allColumns.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> getColumnMetadata(session, tableHandle, entry.getValue()).getType()));
Constraint<ColumnHandle> combinedConstraint = new Constraint<>(constraint.getSummary().intersect(hiveLayoutHandle.getDomainPredicate().transform(subfield -> isEntireColumn(subfield) ? subfield.getRootName() : null).transform(allColumns::get)));
SubfieldExtractor subfieldExtractor = new SubfieldExtractor(functionResolution, rowExpressionService.getExpressionOptimizer(), session);
RowExpression domainPredicate = rowExpressionService.getDomainTranslator().toPredicate(hiveLayoutHandle.getDomainPredicate().transform(subfield -> subfieldExtractor.toRowExpression(subfield, allColumnTypes.get(subfield.getRootName()))));
RowExpression combinedPredicate = binaryExpression(SpecialFormExpression.Form.AND, ImmutableList.of(hiveLayoutHandle.getRemainingPredicate(), domainPredicate));
List<HivePartition> partitions = partitionManager.getPartitions(metastore, tableHandle, combinedConstraint, session).getPartitions();
TableStatistics tableStatistics = hiveStatisticsProvider.getTableStatistics(session, ((HiveTableHandle) tableHandle).getSchemaTableName(), allColumns, allColumnTypes, partitions);
return filterStatsCalculatorService.filterStats(tableStatistics, combinedPredicate, session, ImmutableBiMap.copyOf(allColumns).inverse(), allColumnTypes);
}
use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class HivePageSourceProvider method createSelectivePageSource.
private static Optional<ConnectorPageSource> createSelectivePageSource(Set<HiveSelectivePageSourceFactory> selectivePageSourceFactories, Configuration configuration, ConnectorSession session, HiveSplit split, HiveTableLayoutHandle layout, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, LoadingCache<RowExpressionCacheKey, RowExpression> rowExpressionCache, SplitContext splitContext, Optional<EncryptionInformation> encryptionInformation) {
Set<HiveColumnHandle> interimColumns = ImmutableSet.<HiveColumnHandle>builder().addAll(layout.getPredicateColumns().values()).addAll(split.getBucketConversion().map(BucketConversion::getBucketColumnHandles).orElse(ImmutableList.of())).build();
Set<String> columnNames = columns.stream().map(HiveColumnHandle::getName).collect(toImmutableSet());
List<HiveColumnHandle> allColumns = ImmutableList.<HiveColumnHandle>builder().addAll(columns).addAll(interimColumns.stream().filter(column -> !columnNames.contains(column.getName())).collect(toImmutableList())).build();
Path path = new Path(split.getPath());
List<ColumnMapping> columnMappings = ColumnMapping.buildColumnMappings(split.getPartitionKeys(), allColumns, ImmutableList.of(), split.getTableToPartitionMapping(), path, split.getTableBucketNumber(), split.getFileSize(), split.getFileModifiedTime());
Optional<BucketAdaptation> bucketAdaptation = split.getBucketConversion().map(conversion -> toBucketAdaptation(conversion, columnMappings, split.getTableBucketNumber(), mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex()));
Map<Integer, String> prefilledValues = columnMappings.stream().filter(mapping -> mapping.getKind() == ColumnMappingKind.PREFILLED).collect(toImmutableMap(mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex(), ColumnMapping::getPrefilledValue));
Map<Integer, HiveCoercer> coercers = columnMappings.stream().filter(mapping -> mapping.getCoercionFrom().isPresent()).collect(toImmutableMap(mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex(), mapping -> createCoercer(typeManager, mapping.getCoercionFrom().get(), mapping.getHiveColumnHandle().getHiveType())));
List<Integer> outputColumns = columns.stream().map(HiveColumnHandle::getHiveColumnIndex).collect(toImmutableList());
RowExpression optimizedRemainingPredicate = rowExpressionCache.getUnchecked(new RowExpressionCacheKey(layout.getRemainingPredicate(), session));
if (shouldSkipBucket(layout, split, splitContext)) {
return Optional.of(new HiveEmptySplitPageSource());
}
if (shouldSkipPartition(typeManager, layout, hiveStorageTimeZone, split, splitContext)) {
return Optional.of(new HiveEmptySplitPageSource());
}
CacheQuota cacheQuota = generateCacheQuota(split);
for (HiveSelectivePageSourceFactory pageSourceFactory : selectivePageSourceFactories) {
Optional<? extends ConnectorPageSource> pageSource = pageSourceFactory.createPageSource(configuration, session, path, split.getStart(), split.getLength(), split.getFileSize(), split.getStorage(), toColumnHandles(columnMappings, true), prefilledValues, coercers, bucketAdaptation, outputColumns, splitContext.getDynamicFilterPredicate().map(filter -> filter.transform(handle -> new Subfield(((HiveColumnHandle) handle).getName())).intersect(layout.getDomainPredicate())).orElse(layout.getDomainPredicate()), optimizedRemainingPredicate, hiveStorageTimeZone, new HiveFileContext(splitContext.isCacheable(), cacheQuota, split.getExtraFileInfo().map(BinaryExtraHiveFileInfo::new), Optional.of(split.getFileSize()), split.getFileModifiedTime(), HiveSessionProperties.isVerboseRuntimeStatsEnabled(session)), encryptionInformation);
if (pageSource.isPresent()) {
return Optional.of(pageSource.get());
}
}
return Optional.empty();
}
use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class HiveSplitManager method getPartitionSplitInfo.
private Map<String, PartitionSplitInfo> getPartitionSplitInfo(ConnectorSession session, SemiTransactionalHiveMetastore metastore, SchemaTableName tableName, List<HivePartition> partitionBatch, Map<String, HiveColumnHandle> predicateColumns, Optional<Map<Subfield, Domain>> domains) {
MetastoreContext metastoreContext = new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), isUserDefinedTypeEncodingEnabled(session), metastore.getColumnConverterProvider());
Map<String, Optional<Partition>> partitions = metastore.getPartitionsByNames(metastoreContext, tableName.getSchemaName(), tableName.getTableName(), Lists.transform(partitionBatch, HivePartition::getPartitionId));
Map<String, PartitionStatistics> partitionStatistics = ImmutableMap.of();
if (domains.isPresent() && isPartitionStatisticsBasedOptimizationEnabled(session)) {
partitionStatistics = metastore.getPartitionStatistics(metastoreContext, tableName.getSchemaName(), tableName.getTableName(), partitionBatch.stream().map(HivePartition::getPartitionId).collect(toImmutableSet()));
}
Map<String, String> partitionNameToLocation = new HashMap<>();
ImmutableMap.Builder<String, PartitionSplitInfo> partitionSplitInfoBuilder = ImmutableMap.builder();
for (Map.Entry<String, Optional<Partition>> entry : partitions.entrySet()) {
ImmutableSet.Builder<ColumnHandle> redundantColumnDomainsBuilder = ImmutableSet.builder();
if (!entry.getValue().isPresent()) {
throw new PrestoException(HIVE_PARTITION_DROPPED_DURING_QUERY, "Partition no longer exists: " + entry.getKey());
}
boolean pruned = false;
if (partitionStatistics.containsKey(entry.getKey())) {
Map<String, HiveColumnStatistics> columnStatistics = partitionStatistics.get(entry.getKey()).getColumnStatistics();
for (Map.Entry<String, HiveColumnHandle> predicateColumnEntry : predicateColumns.entrySet()) {
if (columnStatistics.containsKey(predicateColumnEntry.getKey())) {
Optional<ValueSet> columnsStatisticsValueSet = getColumnStatisticsValueSet(columnStatistics.get(predicateColumnEntry.getKey()), predicateColumnEntry.getValue().getHiveType());
Subfield subfield = new Subfield(predicateColumnEntry.getKey());
if (columnsStatisticsValueSet.isPresent() && domains.get().containsKey(subfield)) {
ValueSet columnPredicateValueSet = domains.get().get(subfield).getValues();
if (!columnPredicateValueSet.overlaps(columnsStatisticsValueSet.get())) {
pruned = true;
break;
}
if (columnPredicateValueSet.contains(columnsStatisticsValueSet.get())) {
redundantColumnDomainsBuilder.add(predicateColumnEntry.getValue());
}
}
}
}
}
if (!pruned) {
partitionNameToLocation.put(entry.getKey(), entry.getValue().get().getStorage().getLocation());
}
partitionSplitInfoBuilder.put(entry.getKey(), new PartitionSplitInfo(entry.getValue().get(), pruned, redundantColumnDomainsBuilder.build()));
}
metastore.setPartitionLeases(metastoreContext, tableName.getSchemaName(), tableName.getTableName(), partitionNameToLocation, getLeaseDuration(session));
return partitionSplitInfoBuilder.build();
}
use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class OrcSelectivePageSourceFactory method toFilterFunctions.
/**
* Split filter expression into groups of conjuncts that depend on the same set of inputs,
* then compile each group into FilterFunction.
*/
private static List<FilterFunction> toFilterFunctions(RowExpression filter, Optional<BucketAdapter> bucketAdapter, ConnectorSession session, DeterminismEvaluator determinismEvaluator, PredicateCompiler predicateCompiler) {
ImmutableList.Builder<FilterFunction> filterFunctions = ImmutableList.builder();
bucketAdapter.map(predicate -> new FilterFunction(session.getSqlFunctionProperties(), true, predicate)).ifPresent(filterFunctions::add);
if (TRUE_CONSTANT.equals(filter)) {
return filterFunctions.build();
}
DynamicFilterExtractResult extractDynamicFilterResult = extractDynamicFilters(filter);
// dynamic filter will be added through subfield pushdown
filter = and(extractDynamicFilterResult.getStaticConjuncts());
if (!isAdaptiveFilterReorderingEnabled(session)) {
filterFunctions.add(new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(filter), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), filter).get()));
return filterFunctions.build();
}
List<RowExpression> conjuncts = extractConjuncts(filter);
if (conjuncts.size() == 1) {
filterFunctions.add(new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(filter), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), filter).get()));
return filterFunctions.build();
}
// Use LinkedHashMap to preserve user-specified order of conjuncts. This will be the initial order in which filters are applied.
Map<Set<Integer>, List<RowExpression>> inputsToConjuncts = new LinkedHashMap<>();
for (RowExpression conjunct : conjuncts) {
inputsToConjuncts.computeIfAbsent(extractInputs(conjunct), k -> new ArrayList<>()).add(conjunct);
}
inputsToConjuncts.values().stream().map(expressions -> binaryExpression(AND, expressions)).map(predicate -> new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(predicate), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), predicate).get())).forEach(filterFunctions::add);
return filterFunctions.build();
}
Aggregations