use of com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT in project presto by prestodb.
the class RewriteAggregationIfToFilter method apply.
@Override
public Result apply(AggregationNode aggregationNode, Captures captures, Context context) {
ProjectNode sourceProject = captures.get(CHILD);
Set<Aggregation> aggregationsToRewrite = aggregationNode.getAggregations().values().stream().filter(aggregation -> shouldRewriteAggregation(aggregation, sourceProject)).collect(toImmutableSet());
if (aggregationsToRewrite.isEmpty()) {
return Result.empty();
}
context.getSession().getRuntimeStats().addMetricValue(REWRITE_AGGREGATION_IF_TO_FILTER_APPLIED, 1);
// Get the corresponding assignments in the input project.
// The aggregationReferences only has the aggregations to rewrite, thus the sourceAssignments only has IF/CAST(IF) expressions with NULL false results.
// Multiple aggregations may reference the same input. We use a map to dedup them based on the VariableReferenceExpression, so that we only do the rewrite once per input
// IF expression.
// The order of sourceAssignments determines the order of generating the new variables for the IF conditions and results. We use a sorted map to get a deterministic
// order based on the name of the VariableReferenceExpressions.
Map<VariableReferenceExpression, RowExpression> sourceAssignments = aggregationsToRewrite.stream().map(aggregation -> (VariableReferenceExpression) aggregation.getArguments().get(0)).collect(toImmutableSortedMap(VariableReferenceExpression::compareTo, identity(), variable -> sourceProject.getAssignments().get(variable), (left, right) -> left));
Assignments.Builder newAssignments = Assignments.builder();
newAssignments.putAll(sourceProject.getAssignments());
// Map from the aggregation reference to the IF condition reference which will be put in the mask.
Map<VariableReferenceExpression, VariableReferenceExpression> aggregationReferenceToConditionReference = new HashMap<>();
// Map from the aggregation reference to the IF result reference. This only contains the aggregates where the IF can be safely unwrapped.
// E.g., SUM(IF(CARDINALITY(array) > 0, array[1])) will not be included in this map as array[1] can return errors if we unwrap the IF.
Map<VariableReferenceExpression, VariableReferenceExpression> aggregationReferenceToIfResultReference = new HashMap<>();
AggregationIfToFilterRewriteStrategy rewriteStrategy = getAggregationIfToFilterRewriteStrategy(context.getSession());
for (Map.Entry<VariableReferenceExpression, RowExpression> entry : sourceAssignments.entrySet()) {
VariableReferenceExpression outputVariable = entry.getKey();
RowExpression rowExpression = entry.getValue();
SpecialFormExpression ifExpression = (SpecialFormExpression) ((rowExpression instanceof CallExpression) ? ((CallExpression) rowExpression).getArguments().get(0) : rowExpression);
RowExpression condition = ifExpression.getArguments().get(0);
VariableReferenceExpression conditionReference = context.getVariableAllocator().newVariable(condition);
newAssignments.put(conditionReference, condition);
aggregationReferenceToConditionReference.put(outputVariable, conditionReference);
if (canUnwrapIf(ifExpression, rewriteStrategy)) {
RowExpression trueResult = ifExpression.getArguments().get(1);
if (rowExpression instanceof CallExpression) {
// Wrap the result with CAST().
trueResult = new CallExpression(((CallExpression) rowExpression).getDisplayName(), ((CallExpression) rowExpression).getFunctionHandle(), rowExpression.getType(), ImmutableList.of(trueResult));
}
VariableReferenceExpression ifResultReference = context.getVariableAllocator().newVariable(trueResult);
newAssignments.put(ifResultReference, trueResult);
aggregationReferenceToIfResultReference.put(outputVariable, ifResultReference);
}
}
// Build new aggregations.
ImmutableMap.Builder<VariableReferenceExpression, Aggregation> aggregations = ImmutableMap.builder();
// Stores the masks used to build the filter predicates. Use set to dedup the predicates.
ImmutableSortedSet.Builder<VariableReferenceExpression> masks = ImmutableSortedSet.naturalOrder();
for (Map.Entry<VariableReferenceExpression, Aggregation> entry : aggregationNode.getAggregations().entrySet()) {
VariableReferenceExpression output = entry.getKey();
Aggregation aggregation = entry.getValue();
if (!aggregationsToRewrite.contains(aggregation)) {
aggregations.put(output, aggregation);
continue;
}
VariableReferenceExpression aggregationReference = (VariableReferenceExpression) aggregation.getArguments().get(0);
CallExpression callExpression = aggregation.getCall();
VariableReferenceExpression ifResultReference = aggregationReferenceToIfResultReference.get(aggregationReference);
if (ifResultReference != null) {
callExpression = new CallExpression(callExpression.getSourceLocation(), callExpression.getDisplayName(), callExpression.getFunctionHandle(), callExpression.getType(), ImmutableList.of(ifResultReference));
}
VariableReferenceExpression mask = aggregationReferenceToConditionReference.get(aggregationReference);
aggregations.put(output, new Aggregation(callExpression, Optional.empty(), aggregation.getOrderBy(), aggregation.isDistinct(), Optional.of(aggregationReferenceToConditionReference.get(aggregationReference))));
masks.add(mask);
}
RowExpression predicate = TRUE_CONSTANT;
if (!aggregationNode.hasNonEmptyGroupingSet() && aggregationsToRewrite.size() == aggregationNode.getAggregations().size()) {
// All aggregations are rewritten by this rule. We can add a filter with all the masks to make the query more efficient.
predicate = or(masks.build());
}
return Result.ofPlanNode(new AggregationNode(aggregationNode.getSourceLocation(), context.getIdAllocator().getNextId(), new FilterNode(aggregationNode.getSourceLocation(), context.getIdAllocator().getNextId(), new ProjectNode(context.getIdAllocator().getNextId(), sourceProject.getSource(), newAssignments.build()), predicate), aggregations.build(), aggregationNode.getGroupingSets(), aggregationNode.getPreGroupedVariables(), aggregationNode.getStep(), aggregationNode.getHashVariable(), aggregationNode.getGroupIdVariable()));
}
use of com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT in project presto by prestodb.
the class HiveFilterPushdown method pushdownFilter.
@VisibleForTesting
public static ConnectorPushdownFilterResult pushdownFilter(ConnectorSession session, ConnectorMetadata metadata, SemiTransactionalHiveMetastore metastore, RowExpressionService rowExpressionService, StandardFunctionResolution functionResolution, HivePartitionManager partitionManager, FunctionMetadataManager functionMetadataManager, ConnectorTableHandle tableHandle, RowExpression filter, Optional<ConnectorTableLayoutHandle> currentLayoutHandle) {
checkArgument(!FALSE_CONSTANT.equals(filter), "Cannot pushdown filter that is always false");
if (TRUE_CONSTANT.equals(filter) && currentLayoutHandle.isPresent()) {
return new ConnectorPushdownFilterResult(metadata.getTableLayout(session, currentLayoutHandle.get()), TRUE_CONSTANT);
}
// Split the filter into 3 groups of conjuncts:
// - range filters that apply to entire columns,
// - range filters that apply to subfields,
// - the rest. Intersect these with possibly pre-existing filters.
DomainTranslator.ExtractionResult<Subfield> decomposedFilter = rowExpressionService.getDomainTranslator().fromPredicate(session, filter, new SubfieldExtractor(functionResolution, rowExpressionService.getExpressionOptimizer(), session).toColumnExtractor());
if (currentLayoutHandle.isPresent()) {
HiveTableLayoutHandle currentHiveLayout = (HiveTableLayoutHandle) currentLayoutHandle.get();
decomposedFilter = intersectExtractionResult(new DomainTranslator.ExtractionResult(currentHiveLayout.getDomainPredicate(), currentHiveLayout.getRemainingPredicate()), decomposedFilter);
}
if (decomposedFilter.getTupleDomain().isNone()) {
return new ConnectorPushdownFilterResult(EMPTY_TABLE_LAYOUT, FALSE_CONSTANT);
}
RowExpression optimizedRemainingExpression = rowExpressionService.getExpressionOptimizer().optimize(decomposedFilter.getRemainingExpression(), OPTIMIZED, session);
if (optimizedRemainingExpression instanceof ConstantExpression) {
ConstantExpression constantExpression = (ConstantExpression) optimizedRemainingExpression;
if (FALSE_CONSTANT.equals(constantExpression) || constantExpression.getValue() == null) {
return new ConnectorPushdownFilterResult(EMPTY_TABLE_LAYOUT, FALSE_CONSTANT);
}
}
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
TupleDomain<ColumnHandle> entireColumnDomain = decomposedFilter.getTupleDomain().transform(subfield -> isEntireColumn(subfield) ? subfield.getRootName() : null).transform(columnHandles::get);
if (currentLayoutHandle.isPresent()) {
entireColumnDomain = entireColumnDomain.intersect(((HiveTableLayoutHandle) (currentLayoutHandle.get())).getPartitionColumnPredicate());
}
Constraint<ColumnHandle> constraint = new Constraint<>(entireColumnDomain);
// Extract deterministic conjuncts that apply to partition columns and specify these as Constraint#predicate
if (!TRUE_CONSTANT.equals(decomposedFilter.getRemainingExpression())) {
LogicalRowExpressions logicalRowExpressions = new LogicalRowExpressions(rowExpressionService.getDeterminismEvaluator(), functionResolution, functionMetadataManager);
RowExpression deterministicPredicate = logicalRowExpressions.filterDeterministicConjuncts(decomposedFilter.getRemainingExpression());
if (!TRUE_CONSTANT.equals(deterministicPredicate)) {
ConstraintEvaluator evaluator = new ConstraintEvaluator(rowExpressionService, session, columnHandles, deterministicPredicate);
constraint = new Constraint<>(entireColumnDomain, evaluator::isCandidate);
}
}
HivePartitionResult hivePartitionResult = partitionManager.getPartitions(metastore, tableHandle, constraint, session);
TupleDomain<Subfield> domainPredicate = withColumnDomains(ImmutableMap.<Subfield, Domain>builder().putAll(hivePartitionResult.getUnenforcedConstraint().transform(HiveFilterPushdown::toSubfield).getDomains().orElse(ImmutableMap.of())).putAll(decomposedFilter.getTupleDomain().transform(subfield -> !isEntireColumn(subfield) ? subfield : null).getDomains().orElse(ImmutableMap.of())).build());
Set<String> predicateColumnNames = new HashSet<>();
domainPredicate.getDomains().get().keySet().stream().map(Subfield::getRootName).forEach(predicateColumnNames::add);
// Include only columns referenced in the optimized expression. Although the expression is sent to the worker node
// unoptimized, the worker is expected to optimize the expression before executing.
extractAll(optimizedRemainingExpression).stream().map(VariableReferenceExpression::getName).forEach(predicateColumnNames::add);
Map<String, HiveColumnHandle> predicateColumns = predicateColumnNames.stream().map(columnHandles::get).map(HiveColumnHandle.class::cast).collect(toImmutableMap(HiveColumnHandle::getName, Functions.identity()));
SchemaTableName tableName = ((HiveTableHandle) tableHandle).getSchemaTableName();
LogicalRowExpressions logicalRowExpressions = new LogicalRowExpressions(rowExpressionService.getDeterminismEvaluator(), functionResolution, functionMetadataManager);
List<RowExpression> conjuncts = extractConjuncts(decomposedFilter.getRemainingExpression());
RowExpression dynamicFilterExpression = extractDynamicConjuncts(conjuncts, logicalRowExpressions);
RowExpression remainingExpression = extractStaticConjuncts(conjuncts, logicalRowExpressions);
remainingExpression = removeNestedDynamicFilters(remainingExpression);
Table table = metastore.getTable(new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), isUserDefinedTypeEncodingEnabled(session), metastore.getColumnConverterProvider()), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
return new ConnectorPushdownFilterResult(metadata.getTableLayout(session, new HiveTableLayoutHandle(tableName, table.getStorage().getLocation(), hivePartitionResult.getPartitionColumns(), // remove comments to optimize serialization costs
pruneColumnComments(hivePartitionResult.getDataColumns()), hivePartitionResult.getTableParameters(), hivePartitionResult.getPartitions(), domainPredicate, remainingExpression, predicateColumns, hivePartitionResult.getEnforcedConstraint(), hivePartitionResult.getBucketHandle(), hivePartitionResult.getBucketFilter(), true, createTableLayoutString(session, rowExpressionService, tableName, hivePartitionResult.getBucketHandle(), hivePartitionResult.getBucketFilter(), remainingExpression, domainPredicate), currentLayoutHandle.map(layout -> ((HiveTableLayoutHandle) layout).getRequestedColumns()).orElse(Optional.empty()), false)), dynamicFilterExpression);
}
use of com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT in project presto by prestodb.
the class TestHiveFileFormats method testPageSourceFactory.
private void testPageSourceFactory(HiveBatchPageSourceFactory sourceFactory, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testColumns, ConnectorSession session, int rowCount) throws IOException {
List<HivePartitionKey> partitionKeys = testColumns.stream().filter(TestColumn::isPartitionKey).map(TestColumn::toHivePartitionKey).collect(toList());
List<HiveColumnHandle> partitionKeyColumnHandles = getColumnHandles(testColumns.stream().filter(TestColumn::isPartitionKey).collect(toImmutableList()));
List<Column> tableDataColumns = testColumns.stream().filter(column -> !column.isPartitionKey()).map(column -> new Column(column.getName(), HiveType.valueOf(column.getType()), Optional.empty(), Optional.empty())).collect(toImmutableList());
List<HiveColumnHandle> columnHandles = getColumnHandles(testColumns);
Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(), ImmutableSet.of(sourceFactory), new Configuration(), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), split.getLength(), Instant.now().toEpochMilli(), new Storage(StorageFormat.create(storageFormat.getSerDe(), storageFormat.getInputFormat(), storageFormat.getOutputFormat()), "location", Optional.empty(), false, ImmutableMap.of(), ImmutableMap.of()), TupleDomain.all(), columnHandles, ImmutableMap.of(), partitionKeys, DateTimeZone.getDefault(), FUNCTION_AND_TYPE_MANAGER, new SchemaTableName("schema", "table"), partitionKeyColumnHandles, tableDataColumns, ImmutableMap.of(), tableDataColumns.size(), TableToPartitionMapping.empty(), Optional.empty(), false, DEFAULT_HIVE_FILE_CONTEXT, TRUE_CONSTANT, false, ROW_EXPRESSION_SERVICE, Optional.empty(), ImmutableMap.of());
assertTrue(pageSource.isPresent());
checkPageSource(pageSource.get(), testColumns, getTypes(columnHandles), rowCount);
}
use of com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT in project presto by prestodb.
the class TestHiveFileFormats method testCursorProvider.
private void testCursorProvider(HiveRecordCursorProvider cursorProvider, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testColumns, ConnectorSession session, int rowCount) {
List<HivePartitionKey> partitionKeys = testColumns.stream().filter(TestColumn::isPartitionKey).map(TestColumn::toHivePartitionKey).collect(toList());
List<HiveColumnHandle> partitionKeyColumnHandles = getColumnHandles(testColumns.stream().filter(TestColumn::isPartitionKey).collect(toImmutableList()));
List<Column> tableDataColumns = testColumns.stream().filter(column -> !column.isPartitionKey()).map(column -> new Column(column.getName(), HiveType.valueOf(column.getType()), Optional.empty(), Optional.empty())).collect(toImmutableList());
Configuration configuration = new Configuration();
configuration.set("io.compression.codecs", LzoCodec.class.getName() + "," + LzopCodec.class.getName());
Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(cursorProvider), ImmutableSet.of(), configuration, session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), split.getLength(), Instant.now().toEpochMilli(), new Storage(StorageFormat.create(storageFormat.getSerDe(), storageFormat.getInputFormat(), storageFormat.getOutputFormat()), "location", Optional.empty(), false, ImmutableMap.of(), ImmutableMap.of()), TupleDomain.all(), getColumnHandles(testColumns), ImmutableMap.of(), partitionKeys, DateTimeZone.getDefault(), FUNCTION_AND_TYPE_MANAGER, new SchemaTableName("schema", "table"), partitionKeyColumnHandles, tableDataColumns, ImmutableMap.of(), tableDataColumns.size(), TableToPartitionMapping.empty(), Optional.empty(), false, DEFAULT_HIVE_FILE_CONTEXT, TRUE_CONSTANT, false, ROW_EXPRESSION_SERVICE, Optional.empty(), ImmutableMap.of());
RecordCursor cursor = ((RecordPageSource) pageSource.get()).getCursor();
checkCursor(cursor, testColumns, rowCount);
}
Aggregations