Search in sources :

Example 1 with HiveTableLayoutHandle

use of com.facebook.presto.hive.HiveTableLayoutHandle in project presto by prestodb.

the class HiveFilterPushdown method pushdownFilter.

@VisibleForTesting
public static ConnectorPushdownFilterResult pushdownFilter(ConnectorSession session, ConnectorMetadata metadata, SemiTransactionalHiveMetastore metastore, RowExpressionService rowExpressionService, StandardFunctionResolution functionResolution, HivePartitionManager partitionManager, FunctionMetadataManager functionMetadataManager, ConnectorTableHandle tableHandle, RowExpression filter, Optional<ConnectorTableLayoutHandle> currentLayoutHandle) {
    checkArgument(!FALSE_CONSTANT.equals(filter), "Cannot pushdown filter that is always false");
    if (TRUE_CONSTANT.equals(filter) && currentLayoutHandle.isPresent()) {
        return new ConnectorPushdownFilterResult(metadata.getTableLayout(session, currentLayoutHandle.get()), TRUE_CONSTANT);
    }
    // Split the filter into 3 groups of conjuncts:
    // - range filters that apply to entire columns,
    // - range filters that apply to subfields,
    // - the rest. Intersect these with possibly pre-existing filters.
    DomainTranslator.ExtractionResult<Subfield> decomposedFilter = rowExpressionService.getDomainTranslator().fromPredicate(session, filter, new SubfieldExtractor(functionResolution, rowExpressionService.getExpressionOptimizer(), session).toColumnExtractor());
    if (currentLayoutHandle.isPresent()) {
        HiveTableLayoutHandle currentHiveLayout = (HiveTableLayoutHandle) currentLayoutHandle.get();
        decomposedFilter = intersectExtractionResult(new DomainTranslator.ExtractionResult(currentHiveLayout.getDomainPredicate(), currentHiveLayout.getRemainingPredicate()), decomposedFilter);
    }
    if (decomposedFilter.getTupleDomain().isNone()) {
        return new ConnectorPushdownFilterResult(EMPTY_TABLE_LAYOUT, FALSE_CONSTANT);
    }
    RowExpression optimizedRemainingExpression = rowExpressionService.getExpressionOptimizer().optimize(decomposedFilter.getRemainingExpression(), OPTIMIZED, session);
    if (optimizedRemainingExpression instanceof ConstantExpression) {
        ConstantExpression constantExpression = (ConstantExpression) optimizedRemainingExpression;
        if (FALSE_CONSTANT.equals(constantExpression) || constantExpression.getValue() == null) {
            return new ConnectorPushdownFilterResult(EMPTY_TABLE_LAYOUT, FALSE_CONSTANT);
        }
    }
    Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
    TupleDomain<ColumnHandle> entireColumnDomain = decomposedFilter.getTupleDomain().transform(subfield -> isEntireColumn(subfield) ? subfield.getRootName() : null).transform(columnHandles::get);
    if (currentLayoutHandle.isPresent()) {
        entireColumnDomain = entireColumnDomain.intersect(((HiveTableLayoutHandle) (currentLayoutHandle.get())).getPartitionColumnPredicate());
    }
    Constraint<ColumnHandle> constraint = new Constraint<>(entireColumnDomain);
    // Extract deterministic conjuncts that apply to partition columns and specify these as Constraint#predicate
    if (!TRUE_CONSTANT.equals(decomposedFilter.getRemainingExpression())) {
        LogicalRowExpressions logicalRowExpressions = new LogicalRowExpressions(rowExpressionService.getDeterminismEvaluator(), functionResolution, functionMetadataManager);
        RowExpression deterministicPredicate = logicalRowExpressions.filterDeterministicConjuncts(decomposedFilter.getRemainingExpression());
        if (!TRUE_CONSTANT.equals(deterministicPredicate)) {
            ConstraintEvaluator evaluator = new ConstraintEvaluator(rowExpressionService, session, columnHandles, deterministicPredicate);
            constraint = new Constraint<>(entireColumnDomain, evaluator::isCandidate);
        }
    }
    HivePartitionResult hivePartitionResult = partitionManager.getPartitions(metastore, tableHandle, constraint, session);
    TupleDomain<Subfield> domainPredicate = withColumnDomains(ImmutableMap.<Subfield, Domain>builder().putAll(hivePartitionResult.getUnenforcedConstraint().transform(HiveFilterPushdown::toSubfield).getDomains().orElse(ImmutableMap.of())).putAll(decomposedFilter.getTupleDomain().transform(subfield -> !isEntireColumn(subfield) ? subfield : null).getDomains().orElse(ImmutableMap.of())).build());
    Set<String> predicateColumnNames = new HashSet<>();
    domainPredicate.getDomains().get().keySet().stream().map(Subfield::getRootName).forEach(predicateColumnNames::add);
    // Include only columns referenced in the optimized expression. Although the expression is sent to the worker node
    // unoptimized, the worker is expected to optimize the expression before executing.
    extractAll(optimizedRemainingExpression).stream().map(VariableReferenceExpression::getName).forEach(predicateColumnNames::add);
    Map<String, HiveColumnHandle> predicateColumns = predicateColumnNames.stream().map(columnHandles::get).map(HiveColumnHandle.class::cast).collect(toImmutableMap(HiveColumnHandle::getName, Functions.identity()));
    SchemaTableName tableName = ((HiveTableHandle) tableHandle).getSchemaTableName();
    LogicalRowExpressions logicalRowExpressions = new LogicalRowExpressions(rowExpressionService.getDeterminismEvaluator(), functionResolution, functionMetadataManager);
    List<RowExpression> conjuncts = extractConjuncts(decomposedFilter.getRemainingExpression());
    RowExpression dynamicFilterExpression = extractDynamicConjuncts(conjuncts, logicalRowExpressions);
    RowExpression remainingExpression = extractStaticConjuncts(conjuncts, logicalRowExpressions);
    remainingExpression = removeNestedDynamicFilters(remainingExpression);
    Table table = metastore.getTable(new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), isUserDefinedTypeEncodingEnabled(session), metastore.getColumnConverterProvider()), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    return new ConnectorPushdownFilterResult(metadata.getTableLayout(session, new HiveTableLayoutHandle(tableName, table.getStorage().getLocation(), hivePartitionResult.getPartitionColumns(), // remove comments to optimize serialization costs
    pruneColumnComments(hivePartitionResult.getDataColumns()), hivePartitionResult.getTableParameters(), hivePartitionResult.getPartitions(), domainPredicate, remainingExpression, predicateColumns, hivePartitionResult.getEnforcedConstraint(), hivePartitionResult.getBucketHandle(), hivePartitionResult.getBucketFilter(), true, createTableLayoutString(session, rowExpressionService, tableName, hivePartitionResult.getBucketHandle(), hivePartitionResult.getBucketFilter(), remainingExpression, domainPredicate), currentLayoutHandle.map(layout -> ((HiveTableLayoutHandle) layout).getRequestedColumns()).orElse(Optional.empty()), false)), dynamicFilterExpression);
}
Also used : LogicalRowExpressions.extractConjuncts(com.facebook.presto.expressions.LogicalRowExpressions.extractConjuncts) HiveSessionProperties.isParquetPushdownFilterEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetPushdownFilterEnabled) HiveTableLayoutHandle(com.facebook.presto.hive.HiveTableLayoutHandle) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) VariableReferenceExpression(com.facebook.presto.spi.relation.VariableReferenceExpression) PlanVisitor(com.facebook.presto.spi.plan.PlanVisitor) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) HiveTransactionManager(com.facebook.presto.hive.HiveTransactionManager) ValuesNode(com.facebook.presto.spi.plan.ValuesNode) TupleDomain.withColumnDomains(com.facebook.presto.common.predicate.TupleDomain.withColumnDomains) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Map(java.util.Map) FALSE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.FALSE_CONSTANT) INVALID_FUNCTION_ARGUMENT(com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT) HiveBucketHandle(com.facebook.presto.hive.HiveBucketHandle) BiMap(com.google.common.collect.BiMap) ImmutableSet(com.google.common.collect.ImmutableSet) NullableValue(com.facebook.presto.common.predicate.NullableValue) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.emptyList(java.util.Collections.emptyList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) DomainTranslator(com.facebook.presto.spi.relation.DomainTranslator) Set(java.util.Set) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ConnectorSession(com.facebook.presto.spi.ConnectorSession) DynamicFilters.extractDynamicConjuncts(com.facebook.presto.expressions.DynamicFilters.extractDynamicConjuncts) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) RowExpressionService(com.facebook.presto.spi.relation.RowExpressionService) DefaultRowExpressionTraversalVisitor(com.facebook.presto.expressions.DefaultRowExpressionTraversalVisitor) HiveTableHandle(com.facebook.presto.hive.HiveTableHandle) MetastoreUtil.getMetastoreHeaders(com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders) HivePartitionResult(com.facebook.presto.hive.HivePartitionResult) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) HiveSessionProperties(com.facebook.presto.hive.HiveSessionProperties) DynamicFilters.removeNestedDynamicFilters(com.facebook.presto.expressions.DynamicFilters.removeNestedDynamicFilters) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) ConnectorMetadata(com.facebook.presto.spi.connector.ConnectorMetadata) ConnectorPlanOptimizer(com.facebook.presto.spi.ConnectorPlanOptimizer) Table(com.facebook.presto.hive.metastore.Table) DIVISION_BY_ZERO(com.facebook.presto.spi.StandardErrorCode.DIVISION_BY_ZERO) Column(com.facebook.presto.hive.metastore.Column) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) PrestoException(com.facebook.presto.spi.PrestoException) ConstantExpression(com.facebook.presto.spi.relation.ConstantExpression) Function(java.util.function.Function) HashSet(java.util.HashSet) FilterNode(com.facebook.presto.spi.plan.FilterNode) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) LogicalRowExpressions(com.facebook.presto.expressions.LogicalRowExpressions) VariableAllocator(com.facebook.presto.spi.VariableAllocator) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) TableHandle(com.facebook.presto.spi.TableHandle) FunctionMetadataManager(com.facebook.presto.spi.function.FunctionMetadataManager) HiveTableProperties.getHiveStorageFormat(com.facebook.presto.hive.HiveTableProperties.getHiveStorageFormat) HivePartitionManager(com.facebook.presto.hive.HivePartitionManager) RowExpression(com.facebook.presto.spi.relation.RowExpression) Functions(com.google.common.base.Functions) PlanNodeIdAllocator(com.facebook.presto.spi.plan.PlanNodeIdAllocator) ConnectorTableLayout(com.facebook.presto.spi.ConnectorTableLayout) SubfieldExtractor(com.facebook.presto.hive.SubfieldExtractor) LogicalRowExpressions.and(com.facebook.presto.expressions.LogicalRowExpressions.and) INVALID_CAST_ARGUMENT(com.facebook.presto.spi.StandardErrorCode.INVALID_CAST_ARGUMENT) Constraint(com.facebook.presto.spi.Constraint) HiveBucketing(com.facebook.presto.hive.HiveBucketing) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) OPTIMIZED(com.facebook.presto.spi.relation.ExpressionOptimizer.Level.OPTIMIZED) HiveMetadata(com.facebook.presto.hive.HiveMetadata) PlanNode(com.facebook.presto.spi.plan.PlanNode) DynamicFilters.extractStaticConjuncts(com.facebook.presto.expressions.DynamicFilters.extractStaticConjuncts) NUMERIC_VALUE_OUT_OF_RANGE(com.facebook.presto.spi.StandardErrorCode.NUMERIC_VALUE_OUT_OF_RANGE) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) ColumnHandle(com.facebook.presto.spi.ColumnHandle) HiveStorageFormat(com.facebook.presto.hive.HiveStorageFormat) TableScanNode(com.facebook.presto.spi.plan.TableScanNode) Sets.intersection(com.google.common.collect.Sets.intersection) MetastoreUtil.isUserDefinedTypeEncodingEnabled(com.facebook.presto.hive.metastore.MetastoreUtil.isUserDefinedTypeEncodingEnabled) ImmutableBiMap.toImmutableBiMap(com.google.common.collect.ImmutableBiMap.toImmutableBiMap) VisibleForTesting(com.google.common.annotations.VisibleForTesting) RowExpressionNodeInliner.replaceExpression(com.facebook.presto.expressions.RowExpressionNodeInliner.replaceExpression) HivePartitionResult(com.facebook.presto.hive.HivePartitionResult) Constraint(com.facebook.presto.spi.Constraint) ConstantExpression(com.facebook.presto.spi.relation.ConstantExpression) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) SubfieldExtractor(com.facebook.presto.hive.SubfieldExtractor) HiveTableHandle(com.facebook.presto.hive.HiveTableHandle) DomainTranslator(com.facebook.presto.spi.relation.DomainTranslator) HiveTableLayoutHandle(com.facebook.presto.hive.HiveTableLayoutHandle) Subfield(com.facebook.presto.common.Subfield) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) HashSet(java.util.HashSet) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Table(com.facebook.presto.hive.metastore.Table) LogicalRowExpressions(com.facebook.presto.expressions.LogicalRowExpressions) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) RowExpression(com.facebook.presto.spi.relation.RowExpression) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

Subfield (com.facebook.presto.common.Subfield)1 Domain (com.facebook.presto.common.predicate.Domain)1 NullableValue (com.facebook.presto.common.predicate.NullableValue)1 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)1 TupleDomain.withColumnDomains (com.facebook.presto.common.predicate.TupleDomain.withColumnDomains)1 DefaultRowExpressionTraversalVisitor (com.facebook.presto.expressions.DefaultRowExpressionTraversalVisitor)1 DynamicFilters.extractDynamicConjuncts (com.facebook.presto.expressions.DynamicFilters.extractDynamicConjuncts)1 DynamicFilters.extractStaticConjuncts (com.facebook.presto.expressions.DynamicFilters.extractStaticConjuncts)1 DynamicFilters.removeNestedDynamicFilters (com.facebook.presto.expressions.DynamicFilters.removeNestedDynamicFilters)1 LogicalRowExpressions (com.facebook.presto.expressions.LogicalRowExpressions)1 FALSE_CONSTANT (com.facebook.presto.expressions.LogicalRowExpressions.FALSE_CONSTANT)1 TRUE_CONSTANT (com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT)1 LogicalRowExpressions.and (com.facebook.presto.expressions.LogicalRowExpressions.and)1 LogicalRowExpressions.extractConjuncts (com.facebook.presto.expressions.LogicalRowExpressions.extractConjuncts)1 RowExpressionNodeInliner.replaceExpression (com.facebook.presto.expressions.RowExpressionNodeInliner.replaceExpression)1 HiveBucketHandle (com.facebook.presto.hive.HiveBucketHandle)1 HiveBucketing (com.facebook.presto.hive.HiveBucketing)1 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)1 HiveMetadata (com.facebook.presto.hive.HiveMetadata)1 HivePartitionManager (com.facebook.presto.hive.HivePartitionManager)1