use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.
the class PushPredicateIntoTableScan method pushFilterIntoTableScan.
public static Optional<PlanNode> pushFilterIntoTableScan(FilterNode filterNode, TableScanNode node, boolean pruneWithPredicateExpression, Session session, SymbolAllocator symbolAllocator, PlannerContext plannerContext, TypeAnalyzer typeAnalyzer, StatsProvider statsProvider, DomainTranslator domainTranslator) {
if (!isAllowPushdownIntoConnectors(session)) {
return Optional.empty();
}
SplitExpression splitExpression = splitExpression(plannerContext, filterNode.getPredicate());
DomainTranslator.ExtractionResult decomposedPredicate = DomainTranslator.getExtractionResult(plannerContext, session, splitExpression.getDeterministicPredicate(), symbolAllocator.getTypes());
TupleDomain<ColumnHandle> newDomain = decomposedPredicate.getTupleDomain().transformKeys(node.getAssignments()::get).intersect(node.getEnforcedConstraint());
Map<NodeRef<Expression>, Type> remainingExpressionTypes = typeAnalyzer.getTypes(session, symbolAllocator.getTypes(), decomposedPredicate.getRemainingExpression());
Optional<ConnectorExpression> connectorExpression = new ConnectorExpressionTranslator.SqlToConnectorExpressionTranslator(session, remainingExpressionTypes, plannerContext).process(decomposedPredicate.getRemainingExpression());
Map<String, ColumnHandle> connectorExpressionAssignments = connectorExpression.map(ignored -> node.getAssignments().entrySet().stream().collect(toImmutableMap(entry -> entry.getKey().getName(), Map.Entry::getValue))).orElse(ImmutableMap.of());
Map<ColumnHandle, Symbol> assignments = ImmutableBiMap.copyOf(node.getAssignments()).inverse();
Constraint constraint;
// use evaluator only when there is some predicate which could not be translated into tuple domain
if (pruneWithPredicateExpression && !TRUE_LITERAL.equals(decomposedPredicate.getRemainingExpression())) {
LayoutConstraintEvaluator evaluator = new LayoutConstraintEvaluator(plannerContext, typeAnalyzer, session, symbolAllocator.getTypes(), node.getAssignments(), combineConjuncts(plannerContext.getMetadata(), splitExpression.getDeterministicPredicate(), // which would be expensive to evaluate in the call to isCandidate below.
domainTranslator.toPredicate(session, newDomain.simplify().transformKeys(assignments::get))));
constraint = new Constraint(newDomain, connectorExpression.orElse(TRUE), connectorExpressionAssignments, evaluator::isCandidate, evaluator.getArguments());
} else {
// Currently, invoking the expression interpreter is very expensive.
// TODO invoke the interpreter unconditionally when the interpreter becomes cheap enough.
constraint = new Constraint(newDomain, connectorExpression.orElse(TRUE), connectorExpressionAssignments);
}
// check if new domain is wider than domain already provided by table scan
if (constraint.predicate().isEmpty() && // TODO do we need to track enforced ConnectorExpression in TableScanNode?
TRUE.equals(connectorExpression.orElse(TRUE)) && newDomain.contains(node.getEnforcedConstraint())) {
Expression resultingPredicate = createResultingPredicate(plannerContext, session, symbolAllocator, typeAnalyzer, splitExpression.getDynamicFilter(), TRUE_LITERAL, splitExpression.getNonDeterministicPredicate(), decomposedPredicate.getRemainingExpression());
if (!TRUE_LITERAL.equals(resultingPredicate)) {
return Optional.of(new FilterNode(filterNode.getId(), node, resultingPredicate));
}
return Optional.of(node);
}
if (newDomain.isNone()) {
// to turn the subtree into a Values node
return Optional.of(new ValuesNode(node.getId(), node.getOutputSymbols(), ImmutableList.of()));
}
Optional<ConstraintApplicationResult<TableHandle>> result = plannerContext.getMetadata().applyFilter(session, node.getTable(), constraint);
if (result.isEmpty()) {
return Optional.empty();
}
TableHandle newTable = result.get().getHandle();
TableProperties newTableProperties = plannerContext.getMetadata().getTableProperties(session, newTable);
Optional<TablePartitioning> newTablePartitioning = newTableProperties.getTablePartitioning();
if (newTableProperties.getPredicate().isNone()) {
return Optional.of(new ValuesNode(node.getId(), node.getOutputSymbols(), ImmutableList.of()));
}
TupleDomain<ColumnHandle> remainingFilter = result.get().getRemainingFilter();
Optional<ConnectorExpression> remainingConnectorExpression = result.get().getRemainingExpression();
boolean precalculateStatistics = result.get().isPrecalculateStatistics();
verifyTablePartitioning(session, plannerContext.getMetadata(), node, newTablePartitioning);
TableScanNode tableScan = new TableScanNode(node.getId(), newTable, node.getOutputSymbols(), node.getAssignments(), computeEnforced(newDomain, remainingFilter), // TODO (https://github.com/trinodb/trino/issues/8144) distinguish between predicate pushed down and remaining
deriveTableStatisticsForPushdown(statsProvider, session, precalculateStatistics, filterNode), node.isUpdateTarget(), node.getUseConnectorNodePartitioning());
Expression remainingDecomposedPredicate;
if (remainingConnectorExpression.isEmpty() || remainingConnectorExpression.equals(connectorExpression)) {
remainingDecomposedPredicate = decomposedPredicate.getRemainingExpression();
} else {
Map<String, Symbol> variableMappings = assignments.values().stream().collect(toImmutableMap(Symbol::getName, Function.identity()));
Expression translatedExpression = ConnectorExpressionTranslator.translate(session, remainingConnectorExpression.get(), plannerContext, variableMappings, new LiteralEncoder(plannerContext));
if (connectorExpression.isEmpty()) {
remainingDecomposedPredicate = ExpressionUtils.combineConjuncts(plannerContext.getMetadata(), translatedExpression, decomposedPredicate.getRemainingExpression());
} else {
remainingDecomposedPredicate = translatedExpression;
}
}
Expression resultingPredicate = createResultingPredicate(plannerContext, session, symbolAllocator, typeAnalyzer, splitExpression.getDynamicFilter(), domainTranslator.toPredicate(session, remainingFilter.transformKeys(assignments::get)), splitExpression.getNonDeterministicPredicate(), remainingDecomposedPredicate);
if (!TRUE_LITERAL.equals(resultingPredicate)) {
return Optional.of(new FilterNode(filterNode.getId(), tableScan, resultingPredicate));
}
return Optional.of(tableScan);
}
use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.
the class PushPredicateIntoTableScan method computeEnforced.
public static TupleDomain<ColumnHandle> computeEnforced(TupleDomain<ColumnHandle> predicate, TupleDomain<ColumnHandle> unenforced) {
// The engine requested the connector to apply a filter with a non-none TupleDomain.
// A TupleDomain is effectively a list of column-Domain pairs.
// The connector is expected enforce the respective domain entirely on none, some, or all of the columns.
// 1. When the connector could enforce none of the domains, the unenforced would be equal to predicate;
// 2. When the connector could enforce some of the domains, the unenforced would contain a subset of the column-Domain pairs;
// 3. When the connector could enforce all of the domains, the unenforced would be TupleDomain.all().
// In all 3 cases shown above, the unenforced is not TupleDomain.none().
checkArgument(!unenforced.isNone());
Map<ColumnHandle, Domain> predicateDomains = predicate.getDomains().get();
Map<ColumnHandle, Domain> unenforcedDomains = unenforced.getDomains().get();
ImmutableMap.Builder<ColumnHandle, Domain> enforcedDomainsBuilder = ImmutableMap.builder();
for (Map.Entry<ColumnHandle, Domain> entry : predicateDomains.entrySet()) {
ColumnHandle predicateColumnHandle = entry.getKey();
if (unenforcedDomains.containsKey(predicateColumnHandle)) {
checkArgument(entry.getValue().equals(unenforcedDomains.get(predicateColumnHandle)), "Enforced tuple domain cannot be determined. The connector is expected to enforce the respective domain entirely on none, some, or all of the column.");
} else {
enforcedDomainsBuilder.put(predicateColumnHandle, entry.getValue());
}
}
Map<ColumnHandle, Domain> enforcedDomains = enforcedDomainsBuilder.buildOrThrow();
checkArgument(enforcedDomains.size() + unenforcedDomains.size() == predicateDomains.size(), "Enforced tuple domain cannot be determined. Connector returned an unenforced TupleDomain that contains columns not in predicate.");
return TupleDomain.withColumnDomains(enforcedDomains);
}
use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.
the class PushJoinIntoTableScan method apply.
@Override
public Result apply(JoinNode joinNode, Captures captures, Context context) {
if (joinNode.isCrossJoin()) {
return Result.empty();
}
TableScanNode left = captures.get(LEFT_TABLE_SCAN);
TableScanNode right = captures.get(RIGHT_TABLE_SCAN);
verify(!left.isUpdateTarget() && !right.isUpdateTarget(), "Unexpected Join over for-update table scan");
Expression effectiveFilter = getEffectiveFilter(joinNode);
FilterSplitResult filterSplitResult = splitFilter(effectiveFilter, left.getOutputSymbols(), right.getOutputSymbols(), context);
if (!filterSplitResult.getRemainingFilter().equals(BooleanLiteral.TRUE_LITERAL)) {
// TODO add extra filter node above join
return Result.empty();
}
if (left.getEnforcedConstraint().isNone() || right.getEnforcedConstraint().isNone()) {
// enforced constraint harder below.
return Result.empty();
}
Map<String, ColumnHandle> leftAssignments = left.getAssignments().entrySet().stream().collect(toImmutableMap(entry -> entry.getKey().getName(), Map.Entry::getValue));
Map<String, ColumnHandle> rightAssignments = right.getAssignments().entrySet().stream().collect(toImmutableMap(entry -> entry.getKey().getName(), Map.Entry::getValue));
/*
* We are (lazily) computing estimated statistics for join node and left and right table
* and passing those to connector via applyJoin.
*
* There are a couple reasons for this approach:
* - the engine knows how to estimate join and connector may not
* - the engine may have cached stats for the table scans (within context.getStatsProvider()), so can be able to provide information more inexpensively
* - in the future, the engine may be able to provide stats for table scan even in case when connector no longer can (see https://github.com/trinodb/trino/issues/6998)
* - the pushdown feasibility assessment logic may be different (or configured differently) for different connectors/catalogs.
*/
JoinStatistics joinStatistics = getJoinStatistics(joinNode, left, right, context);
Optional<JoinApplicationResult<TableHandle>> joinApplicationResult = metadata.applyJoin(context.getSession(), getJoinType(joinNode), left.getTable(), right.getTable(), filterSplitResult.getPushableConditions(), // TODO we could pass only subset of assignments here, those which are needed to resolve filterSplitResult.getPushableConditions
leftAssignments, rightAssignments, joinStatistics);
if (joinApplicationResult.isEmpty()) {
return Result.empty();
}
TableHandle handle = joinApplicationResult.get().getTableHandle();
Map<ColumnHandle, ColumnHandle> leftColumnHandlesMapping = joinApplicationResult.get().getLeftColumnHandles();
Map<ColumnHandle, ColumnHandle> rightColumnHandlesMapping = joinApplicationResult.get().getRightColumnHandles();
ImmutableMap.Builder<Symbol, ColumnHandle> assignmentsBuilder = ImmutableMap.builder();
assignmentsBuilder.putAll(left.getAssignments().entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> leftColumnHandlesMapping.get(entry.getValue()))));
assignmentsBuilder.putAll(right.getAssignments().entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> rightColumnHandlesMapping.get(entry.getValue()))));
Map<Symbol, ColumnHandle> assignments = assignmentsBuilder.buildOrThrow();
// convert enforced constraint
JoinNode.Type joinType = joinNode.getType();
TupleDomain<ColumnHandle> leftConstraint = deriveConstraint(left.getEnforcedConstraint(), leftColumnHandlesMapping, joinType == RIGHT || joinType == FULL);
TupleDomain<ColumnHandle> rightConstraint = deriveConstraint(right.getEnforcedConstraint(), rightColumnHandlesMapping, joinType == LEFT || joinType == FULL);
TupleDomain<ColumnHandle> newEnforcedConstraint = TupleDomain.withColumnDomains(ImmutableMap.<ColumnHandle, Domain>builder().putAll(leftConstraint.getDomains().orElseThrow()).putAll(rightConstraint.getDomains().orElseThrow()).buildOrThrow());
return Result.ofPlanNode(new ProjectNode(context.getIdAllocator().getNextId(), new TableScanNode(joinNode.getId(), handle, ImmutableList.copyOf(assignments.keySet()), assignments, newEnforcedConstraint, deriveTableStatisticsForPushdown(context.getStatsProvider(), context.getSession(), joinApplicationResult.get().isPrecalculateStatistics(), joinNode), false, Optional.empty()), Assignments.identity(joinNode.getOutputSymbols())));
}
use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.
the class PushPredicateThroughProjectIntoWindow method extractUpperBound.
private static OptionalInt extractUpperBound(TupleDomain<Symbol> tupleDomain, Symbol symbol) {
if (tupleDomain.isNone()) {
return OptionalInt.empty();
}
Domain rankingDomain = tupleDomain.getDomains().get().get(symbol);
if (rankingDomain == null) {
return OptionalInt.empty();
}
ValueSet values = rankingDomain.getValues();
if (values.isAll() || values.isNone() || values.getRanges().getRangeCount() <= 0) {
return OptionalInt.empty();
}
Range span = values.getRanges().getSpan();
if (span.isHighUnbounded()) {
return OptionalInt.empty();
}
long upperBound = (Long) span.getHighBoundedValue();
if (!span.isHighInclusive()) {
upperBound--;
}
if (upperBound >= Integer.MIN_VALUE && upperBound <= Integer.MAX_VALUE) {
return OptionalInt.of(toIntExact(upperBound));
}
return OptionalInt.empty();
}
use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.
the class HiveMetadata method getTableProperties.
@Override
public ConnectorTableProperties getTableProperties(ConnectorSession session, ConnectorTableHandle table) {
HiveTableHandle hiveTable = (HiveTableHandle) table;
List<ColumnHandle> partitionColumns = ImmutableList.copyOf(hiveTable.getPartitionColumns());
TupleDomain<ColumnHandle> predicate = TupleDomain.all();
Optional<DiscretePredicates> discretePredicates = Optional.empty();
// So computation of predicate and discretePredicates are not valid.
if (hiveTable.getPartitionNames().isEmpty()) {
Optional<List<HivePartition>> partitions = hiveTable.getPartitions().or(() -> {
// We load the partitions to compute the predicates enforced by the table.
// Note that the computation is not persisted in the table handle, so can be redone many times
// TODO: https://github.com/trinodb/trino/issues/10980.
HivePartitionResult partitionResult = partitionManager.getPartitions(metastore, table, new Constraint(hiveTable.getEnforcedConstraint()));
if (partitionManager.canPartitionsBeLoaded(partitionResult)) {
return Optional.of(partitionManager.getPartitionsAsList(partitionResult));
}
return Optional.empty();
});
if (partitions.isPresent()) {
List<HivePartition> hivePartitions = partitions.orElseThrow();
// Since the partitions are fully loaded now, we need to compute
predicate = createPredicate(partitionColumns, hivePartitions);
// this check allows us to ensure that table is partitioned
if (!partitionColumns.isEmpty()) {
// Do not create tuple domains for every partition at the same time!
// There can be a huge number of partitions so use an iterable so
// all domains do not need to be in memory at the same time.
Iterable<TupleDomain<ColumnHandle>> partitionDomains = Iterables.transform(hivePartitions, hivePartition -> TupleDomain.fromFixedValues(hivePartition.getKeys()));
discretePredicates = Optional.of(new DiscretePredicates(partitionColumns, partitionDomains));
}
}
}
Optional<ConnectorTablePartitioning> tablePartitioning = Optional.empty();
List<LocalProperty<ColumnHandle>> sortingProperties = ImmutableList.of();
if (hiveTable.getBucketHandle().isPresent()) {
if (isPropagateTableScanSortingProperties(session) && !hiveTable.getBucketHandle().get().getSortedBy().isEmpty()) {
// Populating SortingProperty guarantees to the engine that it is reading pre-sorted input.
// We detect compatibility between table and partition level sorted_by properties
// and fail the query if there is a mismatch in HiveSplitManager#getPartitionMetadata.
// This can lead to incorrect results if a sorted_by property is defined over unsorted files.
Map<String, ColumnHandle> columnHandles = getColumnHandles(session, table);
sortingProperties = hiveTable.getBucketHandle().get().getSortedBy().stream().map(sortingColumn -> new SortingProperty<>(columnHandles.get(sortingColumn.getColumnName()), sortingColumn.getOrder().getSortOrder())).collect(toImmutableList());
}
if (isBucketExecutionEnabled(session)) {
tablePartitioning = hiveTable.getBucketHandle().map(bucketing -> new ConnectorTablePartitioning(new HivePartitioningHandle(bucketing.getBucketingVersion(), bucketing.getReadBucketCount(), bucketing.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), OptionalInt.empty(), false), bucketing.getColumns().stream().map(ColumnHandle.class::cast).collect(toImmutableList())));
}
}
return new ConnectorTableProperties(predicate, tablePartitioning, Optional.empty(), discretePredicates, sortingProperties);
}
Aggregations