Search in sources :

Example 86 with ConnectorSession

use of com.facebook.presto.spi.ConnectorSession in project presto by prestodb.

the class HivePageSourceProvider method createSelectivePageSource.

private static Optional<ConnectorPageSource> createSelectivePageSource(Set<HiveSelectivePageSourceFactory> selectivePageSourceFactories, Configuration configuration, ConnectorSession session, HiveSplit split, HiveTableLayoutHandle layout, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, LoadingCache<RowExpressionCacheKey, RowExpression> rowExpressionCache, SplitContext splitContext, Optional<EncryptionInformation> encryptionInformation) {
    Set<HiveColumnHandle> interimColumns = ImmutableSet.<HiveColumnHandle>builder().addAll(layout.getPredicateColumns().values()).addAll(split.getBucketConversion().map(BucketConversion::getBucketColumnHandles).orElse(ImmutableList.of())).build();
    Set<String> columnNames = columns.stream().map(HiveColumnHandle::getName).collect(toImmutableSet());
    List<HiveColumnHandle> allColumns = ImmutableList.<HiveColumnHandle>builder().addAll(columns).addAll(interimColumns.stream().filter(column -> !columnNames.contains(column.getName())).collect(toImmutableList())).build();
    Path path = new Path(split.getPath());
    List<ColumnMapping> columnMappings = ColumnMapping.buildColumnMappings(split.getPartitionKeys(), allColumns, ImmutableList.of(), split.getTableToPartitionMapping(), path, split.getTableBucketNumber(), split.getFileSize(), split.getFileModifiedTime());
    Optional<BucketAdaptation> bucketAdaptation = split.getBucketConversion().map(conversion -> toBucketAdaptation(conversion, columnMappings, split.getTableBucketNumber(), mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex()));
    Map<Integer, String> prefilledValues = columnMappings.stream().filter(mapping -> mapping.getKind() == ColumnMappingKind.PREFILLED).collect(toImmutableMap(mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex(), ColumnMapping::getPrefilledValue));
    Map<Integer, HiveCoercer> coercers = columnMappings.stream().filter(mapping -> mapping.getCoercionFrom().isPresent()).collect(toImmutableMap(mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex(), mapping -> createCoercer(typeManager, mapping.getCoercionFrom().get(), mapping.getHiveColumnHandle().getHiveType())));
    List<Integer> outputColumns = columns.stream().map(HiveColumnHandle::getHiveColumnIndex).collect(toImmutableList());
    RowExpression optimizedRemainingPredicate = rowExpressionCache.getUnchecked(new RowExpressionCacheKey(layout.getRemainingPredicate(), session));
    if (shouldSkipBucket(layout, split, splitContext)) {
        return Optional.of(new HiveEmptySplitPageSource());
    }
    if (shouldSkipPartition(typeManager, layout, hiveStorageTimeZone, split, splitContext)) {
        return Optional.of(new HiveEmptySplitPageSource());
    }
    CacheQuota cacheQuota = generateCacheQuota(split);
    for (HiveSelectivePageSourceFactory pageSourceFactory : selectivePageSourceFactories) {
        Optional<? extends ConnectorPageSource> pageSource = pageSourceFactory.createPageSource(configuration, session, path, split.getStart(), split.getLength(), split.getFileSize(), split.getStorage(), toColumnHandles(columnMappings, true), prefilledValues, coercers, bucketAdaptation, outputColumns, splitContext.getDynamicFilterPredicate().map(filter -> filter.transform(handle -> new Subfield(((HiveColumnHandle) handle).getName())).intersect(layout.getDomainPredicate())).orElse(layout.getDomainPredicate()), optimizedRemainingPredicate, hiveStorageTimeZone, new HiveFileContext(splitContext.isCacheable(), cacheQuota, split.getExtraFileInfo().map(BinaryExtraHiveFileInfo::new), Optional.of(split.getFileSize()), split.getFileModifiedTime(), HiveSessionProperties.isVerboseRuntimeStatsEnabled(session)), encryptionInformation);
        if (pageSource.isPresent()) {
            return Optional.of(pageSource.get());
        }
    }
    return Optional.empty();
}
Also used : NestedField(com.facebook.presto.common.Subfield.NestedField) RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) LoadingCache(com.google.common.cache.LoadingCache) HiveColumnHandle.isPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.isPushedDownSubfield) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) ColumnMapping.toColumnHandles(com.facebook.presto.hive.HivePageSourceProvider.ColumnMapping.toColumnHandles) AGGREGATED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) SplitContext(com.facebook.presto.spi.SplitContext) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HiveBucketing.getHiveBucketFilter(com.facebook.presto.hive.HiveBucketing.getHiveBucketFilter) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) BucketConversion(com.facebook.presto.hive.HiveSplit.BucketConversion) ImmutableSet(com.google.common.collect.ImmutableSet) HIVE_UNKNOWN_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_UNKNOWN_ERROR) NullableValue(com.facebook.presto.common.predicate.NullableValue) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ConnectorSession(com.facebook.presto.spi.ConnectorSession) CacheLoader(com.google.common.cache.CacheLoader) RecordCursor(com.facebook.presto.spi.RecordCursor) DataSize(io.airlift.units.DataSize) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) RowExpressionService(com.facebook.presto.spi.relation.RowExpressionService) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) HiveCoercer.createCoercer(com.facebook.presto.hive.HiveCoercer.createCoercer) PARTITION_KEY(com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) Column(com.facebook.presto.hive.metastore.Column) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) HiveSessionProperties.isUseRecordPageSourceForCustomSplit(com.facebook.presto.hive.HiveSessionProperties.isUseRecordPageSourceForCustomSplit) System.identityHashCode(java.lang.System.identityHashCode) Inject(javax.inject.Inject) HashSet(java.util.HashSet) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Type(com.facebook.presto.common.type.Type) RowExpression(com.facebook.presto.spi.relation.RowExpression) Storage(com.facebook.presto.hive.metastore.Storage) Properties(java.util.Properties) PathElement(com.facebook.presto.common.Subfield.PathElement) HiveUtil.getPrefilledColumnValue(com.facebook.presto.hive.HiveUtil.getPrefilledColumnValue) MetastoreUtil.reconstructPartitionSchema(com.facebook.presto.hive.metastore.MetastoreUtil.reconstructPartitionSchema) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) OPTIMIZED(com.facebook.presto.spi.relation.ExpressionOptimizer.Level.OPTIMIZED) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) ColumnHandle(com.facebook.presto.spi.ColumnHandle) SYNTHESIZED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.SYNTHESIZED) HiveUtil.parsePartitionValue(com.facebook.presto.hive.HiveUtil.parsePartitionValue) VisibleForTesting(com.google.common.annotations.VisibleForTesting) MetastoreUtil.getHiveSchema(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveSchema) HiveUtil.shouldUseRecordReaderFromInputFormat(com.facebook.presto.hive.HiveUtil.shouldUseRecordReaderFromInputFormat) HiveColumnHandle.isPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.isPushedDownSubfield) Subfield(com.facebook.presto.common.Subfield) Path(org.apache.hadoop.fs.Path) RowExpression(com.facebook.presto.spi.relation.RowExpression)

Example 87 with ConnectorSession

use of com.facebook.presto.spi.ConnectorSession in project presto by prestodb.

the class HivePartitionManager method getPartitionsIterator.

public Iterable<HivePartition> getPartitionsIterator(SemiTransactionalHiveMetastore metastore, ConnectorTableHandle tableHandle, Constraint<ColumnHandle> constraint, ConnectorSession session) {
    HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
    TupleDomain<ColumnHandle> effectivePredicateColumnHandles = constraint.getSummary();
    SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
    Table table = getTable(session, metastore, tableName, isOfflineDataDebugModeEnabled(session));
    List<HiveColumnHandle> partitionColumns = getPartitionKeyColumnHandles(table);
    List<Type> partitionTypes = partitionColumns.stream().map(column -> typeManager.getType(column.getTypeSignature())).collect(toList());
    Map<Column, Domain> effectivePredicate = createPartitionPredicates(metastore, session, effectivePredicateColumnHandles, partitionColumns, assumeCanonicalPartitionKeys);
    if (partitionColumns.isEmpty()) {
        return ImmutableList.of(new HivePartition(tableName));
    } else {
        return () -> {
            List<String> filteredPartitionNames = getFilteredPartitionNames(session, metastore, tableName, effectivePredicate);
            return filteredPartitionNames.stream().map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, partitionTypes, constraint)).filter(Optional::isPresent).map(Optional::get).iterator();
        };
    }
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) MetastoreUtil.makePartName(com.facebook.presto.hive.metastore.MetastoreUtil.makePartName) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) HiveSessionProperties.isOfflineDataDebugModeEnabled(com.facebook.presto.hive.HiveSessionProperties.isOfflineDataDebugModeEnabled) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Predicates.not(com.google.common.base.Predicates.not) Map(java.util.Map) HiveBucketing.getHiveBucketFilter(com.facebook.presto.hive.HiveBucketing.getHiveBucketFilter) TEMPORARY_TABLE(com.facebook.presto.hive.metastore.PrestoTableType.TEMPORARY_TABLE) HIVE_EXCEEDED_PARTITION_LIMIT(com.facebook.presto.hive.HiveErrorCode.HIVE_EXCEEDED_PARTITION_LIMIT) NullableValue(com.facebook.presto.common.predicate.NullableValue) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HiveSessionProperties.shouldIgnoreTableBucketing(com.facebook.presto.hive.HiveSessionProperties.shouldIgnoreTableBucketing) VarcharType(com.facebook.presto.common.type.VarcharType) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) List(java.util.List) MetastoreUtil.getMetastoreHeaders(com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders) Optional(java.util.Optional) MetastoreUtil.getProtectMode(com.facebook.presto.hive.metastore.MetastoreUtil.getProtectMode) HiveSessionProperties.getMinBucketCountToNotIgnoreTableBucketing(com.facebook.presto.hive.HiveSessionProperties.getMinBucketCountToNotIgnoreTableBucketing) Table(com.facebook.presto.hive.metastore.Table) Column(com.facebook.presto.hive.metastore.Column) HiveUtil.getPartitionKeyColumnHandles(com.facebook.presto.hive.HiveUtil.getPartitionKeyColumnHandles) BUCKET_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) PrestoException(com.facebook.presto.spi.PrestoException) Inject(javax.inject.Inject) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) MetastoreUtil.verifyOnline(com.facebook.presto.hive.metastore.MetastoreUtil.verifyOnline) Predicates(com.google.common.base.Predicates) CharType(com.facebook.presto.common.type.CharType) HiveBucketFilter(com.facebook.presto.hive.HiveBucketing.HiveBucketFilter) HiveSessionProperties.getMaxBucketsForGroupedExecution(com.facebook.presto.hive.HiveSessionProperties.getMaxBucketsForGroupedExecution) Type(com.facebook.presto.common.type.Type) VerifyException(com.google.common.base.VerifyException) Iterator(java.util.Iterator) Constraint.alwaysTrue(com.facebook.presto.spi.Constraint.alwaysTrue) Constraint(com.facebook.presto.spi.Constraint) HiveBucketing.getHiveBucketHandle(com.facebook.presto.hive.HiveBucketing.getHiveBucketHandle) Maps(com.google.common.collect.Maps) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) Collectors.toList(java.util.stream.Collectors.toList) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) ColumnHandle(com.facebook.presto.spi.ColumnHandle) MetastoreUtil.extractPartitionValues(com.facebook.presto.hive.metastore.MetastoreUtil.extractPartitionValues) MetastoreUtil.isUserDefinedTypeEncodingEnabled(com.facebook.presto.hive.metastore.MetastoreUtil.isUserDefinedTypeEncodingEnabled) HiveUtil.parsePartitionValue(com.facebook.presto.hive.HiveUtil.parsePartitionValue) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Table(com.facebook.presto.hive.metastore.Table) Optional(java.util.Optional) SchemaTableName(com.facebook.presto.spi.SchemaTableName) VarcharType(com.facebook.presto.common.type.VarcharType) CharType(com.facebook.presto.common.type.CharType) Type(com.facebook.presto.common.type.Type) Column(com.facebook.presto.hive.metastore.Column) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain)

Example 88 with ConnectorSession

use of com.facebook.presto.spi.ConnectorSession in project presto by prestodb.

the class HivePartitionManager method getPartitions.

public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastore, ConnectorTableHandle tableHandle, List<List<String>> partitionValuesList, ConnectorSession session) {
    HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
    SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
    Table table = getTable(session, metastore, tableName, isOfflineDataDebugModeEnabled(session));
    List<HiveColumnHandle> partitionColumns = getPartitionKeyColumnHandles(table);
    List<Type> partitionColumnTypes = partitionColumns.stream().map(column -> typeManager.getType(column.getTypeSignature())).collect(toImmutableList());
    List<HivePartition> partitionList = partitionValuesList.stream().map(partitionValues -> makePartName(table.getPartitionColumns(), partitionValues)).map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, partitionColumnTypes, alwaysTrue())).map(partition -> partition.orElseThrow(() -> new VerifyException("partition must exist"))).collect(toImmutableList());
    Optional<HiveBucketHandle> bucketHandle = shouldIgnoreTableBucketing(session) ? Optional.empty() : getHiveBucketHandle(table);
    return new HivePartitionResult(partitionColumns, table.getDataColumns(), table.getParameters(), partitionList, TupleDomain.all(), TupleDomain.all(), TupleDomain.none(), bucketHandle, Optional.empty());
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) MetastoreUtil.makePartName(com.facebook.presto.hive.metastore.MetastoreUtil.makePartName) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) HiveSessionProperties.isOfflineDataDebugModeEnabled(com.facebook.presto.hive.HiveSessionProperties.isOfflineDataDebugModeEnabled) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Predicates.not(com.google.common.base.Predicates.not) Map(java.util.Map) HiveBucketing.getHiveBucketFilter(com.facebook.presto.hive.HiveBucketing.getHiveBucketFilter) TEMPORARY_TABLE(com.facebook.presto.hive.metastore.PrestoTableType.TEMPORARY_TABLE) HIVE_EXCEEDED_PARTITION_LIMIT(com.facebook.presto.hive.HiveErrorCode.HIVE_EXCEEDED_PARTITION_LIMIT) NullableValue(com.facebook.presto.common.predicate.NullableValue) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HiveSessionProperties.shouldIgnoreTableBucketing(com.facebook.presto.hive.HiveSessionProperties.shouldIgnoreTableBucketing) VarcharType(com.facebook.presto.common.type.VarcharType) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) List(java.util.List) MetastoreUtil.getMetastoreHeaders(com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders) Optional(java.util.Optional) MetastoreUtil.getProtectMode(com.facebook.presto.hive.metastore.MetastoreUtil.getProtectMode) HiveSessionProperties.getMinBucketCountToNotIgnoreTableBucketing(com.facebook.presto.hive.HiveSessionProperties.getMinBucketCountToNotIgnoreTableBucketing) Table(com.facebook.presto.hive.metastore.Table) Column(com.facebook.presto.hive.metastore.Column) HiveUtil.getPartitionKeyColumnHandles(com.facebook.presto.hive.HiveUtil.getPartitionKeyColumnHandles) BUCKET_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) PrestoException(com.facebook.presto.spi.PrestoException) Inject(javax.inject.Inject) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) MetastoreUtil.verifyOnline(com.facebook.presto.hive.metastore.MetastoreUtil.verifyOnline) Predicates(com.google.common.base.Predicates) CharType(com.facebook.presto.common.type.CharType) HiveBucketFilter(com.facebook.presto.hive.HiveBucketing.HiveBucketFilter) HiveSessionProperties.getMaxBucketsForGroupedExecution(com.facebook.presto.hive.HiveSessionProperties.getMaxBucketsForGroupedExecution) Type(com.facebook.presto.common.type.Type) VerifyException(com.google.common.base.VerifyException) Iterator(java.util.Iterator) Constraint.alwaysTrue(com.facebook.presto.spi.Constraint.alwaysTrue) Constraint(com.facebook.presto.spi.Constraint) HiveBucketing.getHiveBucketHandle(com.facebook.presto.hive.HiveBucketing.getHiveBucketHandle) Maps(com.google.common.collect.Maps) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) Collectors.toList(java.util.stream.Collectors.toList) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) ColumnHandle(com.facebook.presto.spi.ColumnHandle) MetastoreUtil.extractPartitionValues(com.facebook.presto.hive.metastore.MetastoreUtil.extractPartitionValues) MetastoreUtil.isUserDefinedTypeEncodingEnabled(com.facebook.presto.hive.metastore.MetastoreUtil.isUserDefinedTypeEncodingEnabled) HiveUtil.parsePartitionValue(com.facebook.presto.hive.HiveUtil.parsePartitionValue) Table(com.facebook.presto.hive.metastore.Table) SchemaTableName(com.facebook.presto.spi.SchemaTableName) VarcharType(com.facebook.presto.common.type.VarcharType) CharType(com.facebook.presto.common.type.CharType) Type(com.facebook.presto.common.type.Type) VerifyException(com.google.common.base.VerifyException) HiveBucketing.getHiveBucketHandle(com.facebook.presto.hive.HiveBucketing.getHiveBucketHandle)

Example 89 with ConnectorSession

use of com.facebook.presto.spi.ConnectorSession in project presto by prestodb.

the class OrcSelectivePageSourceFactory method toFilterFunctions.

/**
 * Split filter expression into groups of conjuncts that depend on the same set of inputs,
 * then compile each group into FilterFunction.
 */
private static List<FilterFunction> toFilterFunctions(RowExpression filter, Optional<BucketAdapter> bucketAdapter, ConnectorSession session, DeterminismEvaluator determinismEvaluator, PredicateCompiler predicateCompiler) {
    ImmutableList.Builder<FilterFunction> filterFunctions = ImmutableList.builder();
    bucketAdapter.map(predicate -> new FilterFunction(session.getSqlFunctionProperties(), true, predicate)).ifPresent(filterFunctions::add);
    if (TRUE_CONSTANT.equals(filter)) {
        return filterFunctions.build();
    }
    DynamicFilterExtractResult extractDynamicFilterResult = extractDynamicFilters(filter);
    // dynamic filter will be added through subfield pushdown
    filter = and(extractDynamicFilterResult.getStaticConjuncts());
    if (!isAdaptiveFilterReorderingEnabled(session)) {
        filterFunctions.add(new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(filter), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), filter).get()));
        return filterFunctions.build();
    }
    List<RowExpression> conjuncts = extractConjuncts(filter);
    if (conjuncts.size() == 1) {
        filterFunctions.add(new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(filter), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), filter).get()));
        return filterFunctions.build();
    }
    // Use LinkedHashMap to preserve user-specified order of conjuncts. This will be the initial order in which filters are applied.
    Map<Set<Integer>, List<RowExpression>> inputsToConjuncts = new LinkedHashMap<>();
    for (RowExpression conjunct : conjuncts) {
        inputsToConjuncts.computeIfAbsent(extractInputs(conjunct), k -> new ArrayList<>()).add(conjunct);
    }
    inputsToConjuncts.values().stream().map(expressions -> binaryExpression(AND, expressions)).map(predicate -> new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(predicate), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), predicate).get())).forEach(filterFunctions::add);
    return filterFunctions.build();
}
Also used : LogicalRowExpressions.extractConjuncts(com.facebook.presto.expressions.LogicalRowExpressions.extractConjuncts) Page(com.facebook.presto.common.Page) HiveCoercer(com.facebook.presto.hive.HiveCoercer) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) FixedPageSource(com.facebook.presto.spi.FixedPageSource) AGGREGATED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Predicate(com.facebook.presto.common.relation.Predicate) HiveSessionProperties.isOrcBloomFiltersEnabled(com.facebook.presto.hive.HiveSessionProperties.isOrcBloomFiltersEnabled) OrcDataSource(com.facebook.presto.orc.OrcDataSource) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) SqlFunctionProperties(com.facebook.presto.common.function.SqlFunctionProperties) HiveSessionProperties.isOrcZstdJniDecompressionEnabled(com.facebook.presto.hive.HiveSessionProperties.isOrcZstdJniDecompressionEnabled) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) HiveFileContext(com.facebook.presto.hive.HiveFileContext) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) DefaultRowExpressionTraversalVisitor(com.facebook.presto.expressions.DefaultRowExpressionTraversalVisitor) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) BucketAdaptation(com.facebook.presto.hive.BucketAdaptation) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) DwrfEncryptionProvider(com.facebook.presto.orc.DwrfEncryptionProvider) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) HIVE_INVALID_BUCKET_FILES(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_BUCKET_FILES) OrcSelectiveRecordReader(com.facebook.presto.orc.OrcSelectiveRecordReader) SubfieldExtractor(com.facebook.presto.hive.SubfieldExtractor) LogicalRowExpressions.and(com.facebook.presto.expressions.LogicalRowExpressions.and) IOException(java.io.IOException) HiveBucketing.getHiveBucket(com.facebook.presto.hive.HiveBucketing.getHiveBucket) OrcReader(com.facebook.presto.orc.OrcReader) DynamicFilters.extractDynamicFilters(com.facebook.presto.expressions.DynamicFilters.extractDynamicFilters) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) VariableReferenceExpression(com.facebook.presto.spi.relation.VariableReferenceExpression) HiveSessionProperties.getOrcTinyStripeThreshold(com.facebook.presto.hive.HiveSessionProperties.getOrcTinyStripeThreshold) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) HiveSessionProperties.getOrcLazyReadSmallRanges(com.facebook.presto.hive.HiveSessionProperties.getOrcLazyReadSmallRanges) AND(com.facebook.presto.spi.relation.SpecialFormExpression.Form.AND) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) CallExpression(com.facebook.presto.spi.relation.CallExpression) SpecialFormExpression(com.facebook.presto.spi.relation.SpecialFormExpression) BiMap(com.google.common.collect.BiMap) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) OrcPredicate(com.facebook.presto.orc.OrcPredicate) FileNotFoundException(java.io.FileNotFoundException) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) HiveSessionProperties.isAdaptiveFilterReorderingEnabled(com.facebook.presto.hive.HiveSessionProperties.isAdaptiveFilterReorderingEnabled) String.format(java.lang.String.format) DataSize(io.airlift.units.DataSize) List(java.util.List) RowExpressionService(com.facebook.presto.spi.relation.RowExpressionService) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) HiveSelectivePageSourceFactory(com.facebook.presto.hive.HiveSelectivePageSourceFactory) Optional(java.util.Optional) LogicalRowExpressions.binaryExpression(com.facebook.presto.expressions.LogicalRowExpressions.binaryExpression) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) PredicateCompiler(com.facebook.presto.spi.relation.PredicateCompiler) InputReferenceExpression(com.facebook.presto.spi.relation.InputReferenceExpression) IntStream(java.util.stream.IntStream) DynamicFilterExtractResult(com.facebook.presto.expressions.DynamicFilters.DynamicFilterExtractResult) DeterminismEvaluator(com.facebook.presto.spi.relation.DeterminismEvaluator) HiveSessionProperties.getOrcMaxMergeDistance(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxMergeDistance) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) HiveType(com.facebook.presto.hive.HiveType) HashMap(java.util.HashMap) PrestoException(com.facebook.presto.spi.PrestoException) Function(java.util.function.Function) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) HiveSessionProperties.getOrcMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxReadBlockSize) ImmutableBiMap(com.google.common.collect.ImmutableBiMap) Inject(javax.inject.Inject) HashSet(java.util.HashSet) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) FilterFunction(com.facebook.presto.common.predicate.FilterFunction) TypeManager(com.facebook.presto.common.type.TypeManager) HiveSessionProperties.getOrcMaxBufferSize(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxBufferSize) Objects.requireNonNull(java.util.Objects.requireNonNull) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) Type(com.facebook.presto.common.type.Type) RowExpression(com.facebook.presto.spi.relation.RowExpression) Storage(com.facebook.presto.hive.metastore.Storage) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) OrcEncoding(com.facebook.presto.orc.OrcEncoding) HiveSessionProperties.getOrcStreamBufferSize(com.facebook.presto.hive.HiveSessionProperties.getOrcStreamBufferSize) NO_ENCRYPTION(com.facebook.presto.orc.DwrfEncryptionProvider.NO_ENCRYPTION) Maps(com.google.common.collect.Maps) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Consumer(java.util.function.Consumer) HiveUtil.typedPartitionKey(com.facebook.presto.hive.HiveUtil.typedPartitionKey) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) HiveUtil.getPhysicalHiveColumnHandles(com.facebook.presto.hive.HiveUtil.getPhysicalHiveColumnHandles) ImmutableBiMap.toImmutableBiMap(com.google.common.collect.ImmutableBiMap.toImmutableBiMap) RowExpressionNodeInliner.replaceExpression(com.facebook.presto.expressions.RowExpressionNodeInliner.replaceExpression) FilterFunction(com.facebook.presto.common.predicate.FilterFunction) Set(java.util.Set) ImmutableSet(com.google.common.collect.ImmutableSet) HashSet(java.util.HashSet) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) RowExpression(com.facebook.presto.spi.relation.RowExpression) LinkedHashMap(java.util.LinkedHashMap) DynamicFilterExtractResult(com.facebook.presto.expressions.DynamicFilters.DynamicFilterExtractResult) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList)

Example 90 with ConnectorSession

use of com.facebook.presto.spi.ConnectorSession in project presto by prestodb.

the class RcFilePageSourceFactory method createPageSource.

@Override
public Optional<? extends ConnectorPageSource> createPageSource(Configuration configuration, ConnectorSession session, Path path, long start, long length, long fileSize, Storage storage, SchemaTableName tableName, Map<String, String> tableParameters, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, HiveFileContext hiveFileContext, Optional<EncryptionInformation> encryptionInformation) {
    if (!columns.isEmpty() && columns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
        throw new UnsupportedOperationException("Partial aggregation pushdown only supported for ORC/Parquet files. " + "Table " + tableName.toString() + " has file (" + path.toString() + ") of format " + storage.getStorageFormat().getOutputFormat() + ". Set session property hive.pushdown_partial_aggregations_into_scan=false and execute query again");
    }
    RcFileEncoding rcFileEncoding;
    if (LazyBinaryColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerDe())) {
        rcFileEncoding = new BinaryRcFileEncoding();
    } else if (ColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerDe())) {
        rcFileEncoding = createTextVectorEncoding(getHiveSchema(storage.getSerdeParameters(), tableParameters), hiveStorageTimeZone);
    } else {
        return Optional.empty();
    }
    if (fileSize == 0) {
        throw new PrestoException(HIVE_BAD_DATA, "RCFile is empty: " + path);
    }
    FSDataInputStream inputStream;
    try {
        inputStream = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration).openFile(path, hiveFileContext);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    try {
        ImmutableMap.Builder<Integer, Type> readColumns = ImmutableMap.builder();
        for (HiveColumnHandle column : columns) {
            readColumns.put(column.getHiveColumnIndex(), column.getHiveType().getType(typeManager));
        }
        RcFileReader rcFileReader = new RcFileReader(new HdfsRcFileDataSource(path.toString(), inputStream, fileSize, stats), rcFileEncoding, readColumns.build(), new AircompressorCodecFactory(new HadoopCodecFactory(configuration.getClassLoader())), start, length, new DataSize(8, Unit.MEGABYTE));
        return Optional.of(new RcFilePageSource(rcFileReader, columns, typeManager));
    } catch (Throwable e) {
        try {
            inputStream.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e instanceof RcFileCorruptionException) {
            throw new PrestoException(HIVE_BAD_DATA, message, e);
        }
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) SERIALIZATION_LAST_COLUMN_TAKES_REST(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST) FIELD_DELIM(org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM) SERIALIZATION_NULL_FORMAT(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_FORMAT) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) Unit(io.airlift.units.DataSize.Unit) RcFileCorruptionException(com.facebook.presto.rcfile.RcFileCorruptionException) AGGREGATED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ESCAPE_CHAR(org.apache.hadoop.hive.serde.serdeConstants.ESCAPE_CHAR) ImmutableMap(com.google.common.collect.ImmutableMap) HiveFileContext(com.facebook.presto.hive.HiveFileContext) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) DEFAULT_NULL_SEQUENCE(com.facebook.presto.rcfile.text.TextRcFileEncoding.DEFAULT_NULL_SEQUENCE) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveBatchPageSourceFactory(com.facebook.presto.hive.HiveBatchPageSourceFactory) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) Slice(io.airlift.slice.Slice) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) LazyUtils.getByte(org.apache.hadoop.hive.serde2.lazy.LazyUtils.getByte) PrestoException(com.facebook.presto.spi.PrestoException) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) HadoopCodecFactory(com.facebook.presto.rcfile.HadoopCodecFactory) Inject(javax.inject.Inject) SERIALIZATION_EXTEND_NESTING_LEVELS(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters.SERIALIZATION_EXTEND_NESTING_LEVELS) MAPKEY_DELIM(org.apache.hadoop.hive.serde.serdeConstants.MAPKEY_DELIM) SERIALIZATION_FORMAT(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT) TypeManager(com.facebook.presto.common.type.TypeManager) DEFAULT_SEPARATORS(com.facebook.presto.rcfile.text.TextRcFileEncoding.DEFAULT_SEPARATORS) Objects.requireNonNull(java.util.Objects.requireNonNull) COLLECTION_DELIM(org.apache.hadoop.hive.serde.serdeConstants.COLLECTION_DELIM) HIVE_BAD_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA) Type(com.facebook.presto.common.type.Type) AircompressorCodecFactory(com.facebook.presto.rcfile.AircompressorCodecFactory) Storage(com.facebook.presto.hive.metastore.Storage) Properties(java.util.Properties) RcFileReader(com.facebook.presto.rcfile.RcFileReader) IOException(java.io.IOException) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) TextRcFileEncoding(com.facebook.presto.rcfile.text.TextRcFileEncoding) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) MetastoreUtil.getHiveSchema(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveSchema) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) TextRcFileEncoding(com.facebook.presto.rcfile.text.TextRcFileEncoding) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) IOException(java.io.IOException) RcFileReader(com.facebook.presto.rcfile.RcFileReader) RcFileCorruptionException(com.facebook.presto.rcfile.RcFileCorruptionException) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap) Type(com.facebook.presto.common.type.Type) RcFileCorruptionException(com.facebook.presto.rcfile.RcFileCorruptionException) HadoopCodecFactory(com.facebook.presto.rcfile.HadoopCodecFactory) DataSize(io.airlift.units.DataSize) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) AircompressorCodecFactory(com.facebook.presto.rcfile.AircompressorCodecFactory) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle)

Aggregations

ConnectorSession (com.facebook.presto.spi.ConnectorSession)190 TestingConnectorSession (com.facebook.presto.testing.TestingConnectorSession)94 ColumnHandle (com.facebook.presto.spi.ColumnHandle)92 ConnectorMetadata (com.facebook.presto.spi.connector.ConnectorMetadata)80 List (java.util.List)80 ConnectorTableHandle (com.facebook.presto.spi.ConnectorTableHandle)78 SchemaTableName (com.facebook.presto.spi.SchemaTableName)75 Optional (java.util.Optional)74 ImmutableList (com.google.common.collect.ImmutableList)73 Objects.requireNonNull (java.util.Objects.requireNonNull)73 Map (java.util.Map)64 PrestoException (com.facebook.presto.spi.PrestoException)63 ImmutableMap (com.google.common.collect.ImmutableMap)63 Test (org.testng.annotations.Test)63 Path (org.apache.hadoop.fs.Path)62 ConnectorTableLayoutHandle (com.facebook.presto.spi.ConnectorTableLayoutHandle)54 ConnectorTableMetadata (com.facebook.presto.spi.ConnectorTableMetadata)53 Constraint (com.facebook.presto.spi.Constraint)51 Slice (io.airlift.slice.Slice)51 Type (com.facebook.presto.common.type.Type)50