Search in sources :

Example 26 with TupleDomain

use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.

the class HiveMetadata method getTableLayout.

@Override
public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTableLayoutHandle layoutHandle) {
    HiveTableLayoutHandle hiveLayoutHandle = (HiveTableLayoutHandle) layoutHandle;
    List<ColumnHandle> partitionColumns = ImmutableList.copyOf(hiveLayoutHandle.getPartitionColumns());
    List<HivePartition> partitions = hiveLayoutHandle.getPartitions().get();
    Optional<DiscretePredicates> discretePredicates = Optional.empty();
    if (!partitionColumns.isEmpty()) {
        // Do not create tuple domains for every partition at the same time!
        // There can be a huge number of partitions so use an iterable so
        // all domains do not need to be in memory at the same time.
        Iterable<TupleDomain<ColumnHandle>> partitionDomains = Iterables.transform(partitions, (hivePartition) -> TupleDomain.fromFixedValues(hivePartition.getKeys()));
        discretePredicates = Optional.of(new DiscretePredicates(partitionColumns, partitionDomains));
    }
    Optional<ConnectorTablePartitioning> tablePartitioning = Optional.empty();
    SchemaTableName tableName = hiveLayoutHandle.getSchemaTableName();
    MetastoreContext metastoreContext = getMetastoreContext(session);
    Table table = metastore.getTable(metastoreContext, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    // never ignore table bucketing for temporary tables as those are created such explicitly by the engine request
    boolean bucketExecutionEnabled = table.getTableType().equals(TEMPORARY_TABLE) || isBucketExecutionEnabled(session);
    if (bucketExecutionEnabled && hiveLayoutHandle.getBucketHandle().isPresent()) {
        HiveBucketHandle hiveBucketHandle = hiveLayoutHandle.getBucketHandle().get();
        HivePartitioningHandle partitioningHandle;
        int bucketCount = hiveBucketHandle.getReadBucketCount();
        OptionalInt maxCompatibleBucketCount = OptionalInt.empty();
        // Virtually bucketed table does not have table bucket property
        if (hiveBucketHandle.isVirtuallyBucketed()) {
            partitioningHandle = createHiveCompatiblePartitioningHandle(bucketCount, hiveBucketHandle.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), maxCompatibleBucketCount);
        } else {
            HiveBucketProperty bucketProperty = table.getStorage().getBucketProperty().orElseThrow(() -> new IllegalArgumentException("bucketProperty is expected to be present"));
            switch(bucketProperty.getBucketFunctionType()) {
                case HIVE_COMPATIBLE:
                    partitioningHandle = createHiveCompatiblePartitioningHandle(bucketCount, hiveBucketHandle.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), maxCompatibleBucketCount);
                    break;
                case PRESTO_NATIVE:
                    partitioningHandle = createPrestoNativePartitioningHandle(bucketCount, bucketProperty.getTypes().get(), maxCompatibleBucketCount);
                    break;
                default:
                    throw new IllegalArgumentException("Unsupported bucket function type " + bucketProperty.getBucketFunctionType());
            }
        }
        tablePartitioning = Optional.of(new ConnectorTablePartitioning(partitioningHandle, hiveBucketHandle.getColumns().stream().map(ColumnHandle.class::cast).collect(toImmutableList())));
    }
    TupleDomain<ColumnHandle> predicate;
    if (hiveLayoutHandle.isPushdownFilterEnabled()) {
        predicate = hiveLayoutHandle.getDomainPredicate().transform(subfield -> isEntireColumn(subfield) ? subfield.getRootName() : null).transform(hiveLayoutHandle.getPredicateColumns()::get).transform(ColumnHandle.class::cast).intersect(createPredicate(partitionColumns, partitions));
    } else {
        predicate = createPredicate(partitionColumns, partitions);
    }
    // Expose ordering property of the table.
    ImmutableList.Builder<LocalProperty<ColumnHandle>> localProperties = ImmutableList.builder();
    Optional<Set<ColumnHandle>> streamPartitionColumns = Optional.empty();
    if (table.getStorage().getBucketProperty().isPresent() && !table.getStorage().getBucketProperty().get().getSortedBy().isEmpty()) {
        ImmutableSet.Builder<ColumnHandle> streamPartitionColumnsBuilder = ImmutableSet.builder();
        // streamPartitioningColumns is how we partition the data across splits.
        // localProperty is how we partition the data within a split.
        // 1. add partition columns to streamPartitionColumns
        partitionColumns.forEach(streamPartitionColumnsBuilder::add);
        // 2. add sorted columns to streamPartitionColumns and localProperties
        HiveBucketProperty bucketProperty = table.getStorage().getBucketProperty().get();
        Map<String, ColumnHandle> columnHandles = hiveColumnHandles(table).stream().collect(toImmutableMap(HiveColumnHandle::getName, identity()));
        bucketProperty.getSortedBy().forEach(sortingColumn -> {
            ColumnHandle columnHandle = columnHandles.get(sortingColumn.getColumnName());
            localProperties.add(new SortingProperty<>(columnHandle, sortingColumn.getOrder().getSortOrder()));
            streamPartitionColumnsBuilder.add(columnHandle);
        });
        // We currently only set streamPartitionColumns when it enables streaming aggregation and also it's eligible to enable streaming aggregation
        // 1. When the bucket columns are the same as the prefix of the sort columns
        // 2. When all rows of the same value group are guaranteed to be in the same split. We disable splitting a file when isStreamingAggregationEnabled is true to make sure the property is guaranteed.
        List<String> sortColumns = bucketProperty.getSortedBy().stream().map(SortingColumn::getColumnName).collect(toImmutableList());
        if (bucketProperty.getBucketedBy().size() <= sortColumns.size() && bucketProperty.getBucketedBy().containsAll(sortColumns.subList(0, bucketProperty.getBucketedBy().size())) && isStreamingAggregationEnabled(session)) {
            streamPartitionColumns = Optional.of(streamPartitionColumnsBuilder.build());
        }
    }
    return new ConnectorTableLayout(hiveLayoutHandle, Optional.empty(), predicate, tablePartitioning, streamPartitionColumns, discretePredicates, localProperties.build(), Optional.of(hiveLayoutHandle.getRemainingPredicate()));
}
Also used : ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) InMemoryRecordSet(com.facebook.presto.spi.InMemoryRecordSet) HashSet(java.util.HashSet) Set(java.util.Set) Collectors.toSet(java.util.stream.Collectors.toSet) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorTablePartitioning(com.facebook.presto.spi.ConnectorTablePartitioning) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) ColumnHandle(com.facebook.presto.spi.ColumnHandle) SystemTable(com.facebook.presto.spi.SystemTable) DwrfTableEncryptionProperties.forTable(com.facebook.presto.hive.DwrfTableEncryptionProperties.forTable) HiveUtil.translateHiveUnsupportedTypeForTemporaryTable(com.facebook.presto.hive.HiveUtil.translateHiveUnsupportedTypeForTemporaryTable) Table(com.facebook.presto.hive.metastore.Table) HiveUtil.translateHiveUnsupportedTypesForTemporaryTable(com.facebook.presto.hive.HiveUtil.translateHiveUnsupportedTypesForTemporaryTable) HiveTableProperties.getEncryptTable(com.facebook.presto.hive.HiveTableProperties.getEncryptTable) HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable(com.facebook.presto.hive.HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable) HiveTableProperties.isExternalTable(com.facebook.presto.hive.HiveTableProperties.isExternalTable) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) DiscretePredicates(com.facebook.presto.spi.DiscretePredicates) OptionalInt(java.util.OptionalInt) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Constraint(com.facebook.presto.spi.Constraint) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) ConnectorTableLayout(com.facebook.presto.spi.ConnectorTableLayout) HiveBucketing.getHiveBucketHandle(com.facebook.presto.hive.HiveBucketing.getHiveBucketHandle) LocalProperty(com.facebook.presto.spi.LocalProperty)

Example 27 with TupleDomain

use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.

the class HivePartitionManager method getPartitionsIterator.

public Iterable<HivePartition> getPartitionsIterator(SemiTransactionalHiveMetastore metastore, ConnectorTableHandle tableHandle, Constraint<ColumnHandle> constraint, ConnectorSession session) {
    HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
    TupleDomain<ColumnHandle> effectivePredicateColumnHandles = constraint.getSummary();
    SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
    Table table = getTable(session, metastore, tableName, isOfflineDataDebugModeEnabled(session));
    List<HiveColumnHandle> partitionColumns = getPartitionKeyColumnHandles(table);
    List<Type> partitionTypes = partitionColumns.stream().map(column -> typeManager.getType(column.getTypeSignature())).collect(toList());
    Map<Column, Domain> effectivePredicate = createPartitionPredicates(metastore, session, effectivePredicateColumnHandles, partitionColumns, assumeCanonicalPartitionKeys);
    if (partitionColumns.isEmpty()) {
        return ImmutableList.of(new HivePartition(tableName));
    } else {
        return () -> {
            List<String> filteredPartitionNames = getFilteredPartitionNames(session, metastore, tableName, effectivePredicate);
            return filteredPartitionNames.stream().map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, partitionTypes, constraint)).filter(Optional::isPresent).map(Optional::get).iterator();
        };
    }
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) MetastoreUtil.makePartName(com.facebook.presto.hive.metastore.MetastoreUtil.makePartName) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) HiveSessionProperties.isOfflineDataDebugModeEnabled(com.facebook.presto.hive.HiveSessionProperties.isOfflineDataDebugModeEnabled) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Predicates.not(com.google.common.base.Predicates.not) Map(java.util.Map) HiveBucketing.getHiveBucketFilter(com.facebook.presto.hive.HiveBucketing.getHiveBucketFilter) TEMPORARY_TABLE(com.facebook.presto.hive.metastore.PrestoTableType.TEMPORARY_TABLE) HIVE_EXCEEDED_PARTITION_LIMIT(com.facebook.presto.hive.HiveErrorCode.HIVE_EXCEEDED_PARTITION_LIMIT) NullableValue(com.facebook.presto.common.predicate.NullableValue) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HiveSessionProperties.shouldIgnoreTableBucketing(com.facebook.presto.hive.HiveSessionProperties.shouldIgnoreTableBucketing) VarcharType(com.facebook.presto.common.type.VarcharType) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) List(java.util.List) MetastoreUtil.getMetastoreHeaders(com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders) Optional(java.util.Optional) MetastoreUtil.getProtectMode(com.facebook.presto.hive.metastore.MetastoreUtil.getProtectMode) HiveSessionProperties.getMinBucketCountToNotIgnoreTableBucketing(com.facebook.presto.hive.HiveSessionProperties.getMinBucketCountToNotIgnoreTableBucketing) Table(com.facebook.presto.hive.metastore.Table) Column(com.facebook.presto.hive.metastore.Column) HiveUtil.getPartitionKeyColumnHandles(com.facebook.presto.hive.HiveUtil.getPartitionKeyColumnHandles) BUCKET_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) PrestoException(com.facebook.presto.spi.PrestoException) Inject(javax.inject.Inject) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) MetastoreUtil.verifyOnline(com.facebook.presto.hive.metastore.MetastoreUtil.verifyOnline) Predicates(com.google.common.base.Predicates) CharType(com.facebook.presto.common.type.CharType) HiveBucketFilter(com.facebook.presto.hive.HiveBucketing.HiveBucketFilter) HiveSessionProperties.getMaxBucketsForGroupedExecution(com.facebook.presto.hive.HiveSessionProperties.getMaxBucketsForGroupedExecution) Type(com.facebook.presto.common.type.Type) VerifyException(com.google.common.base.VerifyException) Iterator(java.util.Iterator) Constraint.alwaysTrue(com.facebook.presto.spi.Constraint.alwaysTrue) Constraint(com.facebook.presto.spi.Constraint) HiveBucketing.getHiveBucketHandle(com.facebook.presto.hive.HiveBucketing.getHiveBucketHandle) Maps(com.google.common.collect.Maps) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) Collectors.toList(java.util.stream.Collectors.toList) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) ColumnHandle(com.facebook.presto.spi.ColumnHandle) MetastoreUtil.extractPartitionValues(com.facebook.presto.hive.metastore.MetastoreUtil.extractPartitionValues) MetastoreUtil.isUserDefinedTypeEncodingEnabled(com.facebook.presto.hive.metastore.MetastoreUtil.isUserDefinedTypeEncodingEnabled) HiveUtil.parsePartitionValue(com.facebook.presto.hive.HiveUtil.parsePartitionValue) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Table(com.facebook.presto.hive.metastore.Table) Optional(java.util.Optional) SchemaTableName(com.facebook.presto.spi.SchemaTableName) VarcharType(com.facebook.presto.common.type.VarcharType) CharType(com.facebook.presto.common.type.CharType) Type(com.facebook.presto.common.type.Type) Column(com.facebook.presto.hive.metastore.Column) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain)

Example 28 with TupleDomain

use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.

the class HiveMaterializedViewUtils method getMaterializedDataPredicates.

public static MaterializedDataPredicates getMaterializedDataPredicates(SemiTransactionalHiveMetastore metastore, MetastoreContext metastoreContext, TypeManager typeManager, Table table, DateTimeZone timeZone) {
    List<Column> partitionColumns = table.getPartitionColumns();
    for (Column partitionColumn : partitionColumns) {
        HiveType hiveType = partitionColumn.getType();
        if (!hiveType.isSupportedType()) {
            throw new PrestoException(NOT_SUPPORTED, String.format("Unsupported Hive type %s found in partition keys of table %s.%s", hiveType, table.getDatabaseName(), table.getTableName()));
        }
    }
    List<HiveColumnHandle> partitionKeyColumnHandles = getPartitionKeyColumnHandles(table);
    Map<String, Type> partitionTypes = partitionKeyColumnHandles.stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> typeManager.getType(column.getTypeSignature())));
    List<String> partitionNames = metastore.getPartitionNames(metastoreContext, table.getDatabaseName(), table.getTableName()).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(table.getDatabaseName(), table.getTableName())));
    ImmutableList.Builder<TupleDomain<String>> partitionNamesAndValues = ImmutableList.builder();
    for (String partitionName : partitionNames) {
        ImmutableMap.Builder<String, NullableValue> partitionNameAndValuesMap = ImmutableMap.builder();
        Map<String, String> partitions = toPartitionNamesAndValues(partitionName);
        if (partitionColumns.size() != partitions.size()) {
            throw new PrestoException(HIVE_INVALID_METADATA, String.format("Expected %d partition key values, but got %d", partitionColumns.size(), partitions.size()));
        }
        partitionTypes.forEach((name, type) -> {
            String value = partitions.get(name);
            if (value == null) {
                throw new PrestoException(HIVE_INVALID_PARTITION_VALUE, String.format("partition key value cannot be null for field: %s", name));
            }
            partitionNameAndValuesMap.put(name, parsePartitionValue(name, value, type, timeZone));
        });
        TupleDomain<String> tupleDomain = TupleDomain.fromFixedValues(partitionNameAndValuesMap.build());
        partitionNamesAndValues.add(tupleDomain);
    }
    return new MaterializedDataPredicates(partitionNamesAndValues.build(), partitionColumns.stream().map(Column::getName).collect(toImmutableList()));
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) Table(com.facebook.presto.hive.metastore.Table) Column(com.facebook.presto.hive.metastore.Column) HiveUtil.getPartitionKeyColumnHandles(com.facebook.presto.hive.HiveUtil.getPartitionKeyColumnHandles) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) HashMap(java.util.HashMap) PrestoException(com.facebook.presto.spi.PrestoException) MetastoreUtil.toPartitionNamesAndValues(com.facebook.presto.hive.metastore.MetastoreUtil.toPartitionNamesAndValues) HashSet(java.util.HashSet) LinkedHashMap(java.util.LinkedHashMap) SchemaTableName(com.facebook.presto.spi.SchemaTableName) ImmutableList(com.google.common.collect.ImmutableList) TupleDomain.extractFixedValues(com.facebook.presto.common.predicate.TupleDomain.extractFixedValues) TypeManager(com.facebook.presto.common.type.TypeManager) Map(java.util.Map) ConnectorMaterializedViewDefinition(com.facebook.presto.spi.ConnectorMaterializedViewDefinition) MaterializedDataPredicates(com.facebook.presto.spi.MaterializedViewStatus.MaterializedDataPredicates) Type(com.facebook.presto.common.type.Type) Collections.emptyMap(java.util.Collections.emptyMap) NullableValue(com.facebook.presto.common.predicate.NullableValue) ImmutableMap(com.google.common.collect.ImmutableMap) HIVE_INVALID_METADATA(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) Collectors(java.util.stream.Collectors) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) String.format(java.lang.String.format) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) Optional(java.util.Optional) HiveUtil.parsePartitionValue(com.facebook.presto.hive.HiveUtil.parsePartitionValue) TupleDomain.toLinkedMap(com.facebook.presto.common.predicate.TupleDomain.toLinkedMap) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) NullableValue(com.facebook.presto.common.predicate.NullableValue) PrestoException(com.facebook.presto.spi.PrestoException) SchemaTableName(com.facebook.presto.spi.SchemaTableName) MaterializedDataPredicates(com.facebook.presto.spi.MaterializedViewStatus.MaterializedDataPredicates) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) Type(com.facebook.presto.common.type.Type) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) Column(com.facebook.presto.hive.metastore.Column)

Example 29 with TupleDomain

use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.

the class RcFilePageSourceFactory method createPageSource.

@Override
public Optional<? extends ConnectorPageSource> createPageSource(Configuration configuration, ConnectorSession session, Path path, long start, long length, long fileSize, Storage storage, SchemaTableName tableName, Map<String, String> tableParameters, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, HiveFileContext hiveFileContext, Optional<EncryptionInformation> encryptionInformation) {
    if (!columns.isEmpty() && columns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
        throw new UnsupportedOperationException("Partial aggregation pushdown only supported for ORC/Parquet files. " + "Table " + tableName.toString() + " has file (" + path.toString() + ") of format " + storage.getStorageFormat().getOutputFormat() + ". Set session property hive.pushdown_partial_aggregations_into_scan=false and execute query again");
    }
    RcFileEncoding rcFileEncoding;
    if (LazyBinaryColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerDe())) {
        rcFileEncoding = new BinaryRcFileEncoding();
    } else if (ColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerDe())) {
        rcFileEncoding = createTextVectorEncoding(getHiveSchema(storage.getSerdeParameters(), tableParameters), hiveStorageTimeZone);
    } else {
        return Optional.empty();
    }
    if (fileSize == 0) {
        throw new PrestoException(HIVE_BAD_DATA, "RCFile is empty: " + path);
    }
    FSDataInputStream inputStream;
    try {
        inputStream = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration).openFile(path, hiveFileContext);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    try {
        ImmutableMap.Builder<Integer, Type> readColumns = ImmutableMap.builder();
        for (HiveColumnHandle column : columns) {
            readColumns.put(column.getHiveColumnIndex(), column.getHiveType().getType(typeManager));
        }
        RcFileReader rcFileReader = new RcFileReader(new HdfsRcFileDataSource(path.toString(), inputStream, fileSize, stats), rcFileEncoding, readColumns.build(), new AircompressorCodecFactory(new HadoopCodecFactory(configuration.getClassLoader())), start, length, new DataSize(8, Unit.MEGABYTE));
        return Optional.of(new RcFilePageSource(rcFileReader, columns, typeManager));
    } catch (Throwable e) {
        try {
            inputStream.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e instanceof RcFileCorruptionException) {
            throw new PrestoException(HIVE_BAD_DATA, message, e);
        }
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) SERIALIZATION_LAST_COLUMN_TAKES_REST(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST) FIELD_DELIM(org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM) SERIALIZATION_NULL_FORMAT(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_FORMAT) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) Unit(io.airlift.units.DataSize.Unit) RcFileCorruptionException(com.facebook.presto.rcfile.RcFileCorruptionException) AGGREGATED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ESCAPE_CHAR(org.apache.hadoop.hive.serde.serdeConstants.ESCAPE_CHAR) ImmutableMap(com.google.common.collect.ImmutableMap) HiveFileContext(com.facebook.presto.hive.HiveFileContext) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) DEFAULT_NULL_SEQUENCE(com.facebook.presto.rcfile.text.TextRcFileEncoding.DEFAULT_NULL_SEQUENCE) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveBatchPageSourceFactory(com.facebook.presto.hive.HiveBatchPageSourceFactory) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) Slice(io.airlift.slice.Slice) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) LazyUtils.getByte(org.apache.hadoop.hive.serde2.lazy.LazyUtils.getByte) PrestoException(com.facebook.presto.spi.PrestoException) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) HadoopCodecFactory(com.facebook.presto.rcfile.HadoopCodecFactory) Inject(javax.inject.Inject) SERIALIZATION_EXTEND_NESTING_LEVELS(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters.SERIALIZATION_EXTEND_NESTING_LEVELS) MAPKEY_DELIM(org.apache.hadoop.hive.serde.serdeConstants.MAPKEY_DELIM) SERIALIZATION_FORMAT(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT) TypeManager(com.facebook.presto.common.type.TypeManager) DEFAULT_SEPARATORS(com.facebook.presto.rcfile.text.TextRcFileEncoding.DEFAULT_SEPARATORS) Objects.requireNonNull(java.util.Objects.requireNonNull) COLLECTION_DELIM(org.apache.hadoop.hive.serde.serdeConstants.COLLECTION_DELIM) HIVE_BAD_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA) Type(com.facebook.presto.common.type.Type) AircompressorCodecFactory(com.facebook.presto.rcfile.AircompressorCodecFactory) Storage(com.facebook.presto.hive.metastore.Storage) Properties(java.util.Properties) RcFileReader(com.facebook.presto.rcfile.RcFileReader) IOException(java.io.IOException) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) TextRcFileEncoding(com.facebook.presto.rcfile.text.TextRcFileEncoding) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) MetastoreUtil.getHiveSchema(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveSchema) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) TextRcFileEncoding(com.facebook.presto.rcfile.text.TextRcFileEncoding) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) IOException(java.io.IOException) RcFileReader(com.facebook.presto.rcfile.RcFileReader) RcFileCorruptionException(com.facebook.presto.rcfile.RcFileCorruptionException) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap) Type(com.facebook.presto.common.type.Type) RcFileCorruptionException(com.facebook.presto.rcfile.RcFileCorruptionException) HadoopCodecFactory(com.facebook.presto.rcfile.HadoopCodecFactory) DataSize(io.airlift.units.DataSize) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) AircompressorCodecFactory(com.facebook.presto.rcfile.AircompressorCodecFactory) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle)

Example 30 with TupleDomain

use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.

the class ParquetPageSourceFactory method getParquetTupleDomain.

public static TupleDomain<ColumnDescriptor> getParquetTupleDomain(Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<HiveColumnHandle> effectivePredicate) {
    if (effectivePredicate.isNone()) {
        return TupleDomain.none();
    }
    ImmutableMap.Builder<ColumnDescriptor, Domain> predicate = ImmutableMap.builder();
    for (Entry<HiveColumnHandle, Domain> entry : effectivePredicate.getDomains().get().entrySet()) {
        HiveColumnHandle columnHandle = entry.getKey();
        // skip looking up predicates for complex types as Parquet only stores stats for primitives
        if (!columnHandle.getHiveType().getCategory().equals(PRIMITIVE)) {
            continue;
        }
        RichColumnDescriptor descriptor;
        if (isPushedDownSubfield(columnHandle)) {
            Subfield pushedDownSubfield = getPushedDownSubfield(columnHandle);
            List<String> subfieldPath = columnPathFromSubfield(pushedDownSubfield);
            descriptor = descriptorsByPath.get(subfieldPath);
        } else {
            descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName()));
        }
        if (descriptor != null) {
            predicate.put(descriptor, entry.getValue());
        }
    }
    return TupleDomain.withColumnDomains(predicate.build());
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) ImmutableMap(com.google.common.collect.ImmutableMap) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) ParquetTypeUtils.columnPathFromSubfield(com.facebook.presto.parquet.ParquetTypeUtils.columnPathFromSubfield) HiveColumnHandle.getPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.getPushedDownSubfield) HiveColumnHandle.isPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.isPushedDownSubfield) Subfield(com.facebook.presto.common.Subfield)

Aggregations

TupleDomain (com.facebook.presto.common.predicate.TupleDomain)76 Domain (com.facebook.presto.common.predicate.Domain)54 Map (java.util.Map)39 ColumnHandle (com.facebook.presto.spi.ColumnHandle)36 ImmutableList (com.google.common.collect.ImmutableList)33 ImmutableMap (com.google.common.collect.ImmutableMap)31 List (java.util.List)27 Optional (java.util.Optional)26 Objects.requireNonNull (java.util.Objects.requireNonNull)25 Test (org.testng.annotations.Test)25 ConnectorSession (com.facebook.presto.spi.ConnectorSession)22 SchemaTableName (com.facebook.presto.spi.SchemaTableName)22 Set (java.util.Set)21 Type (com.facebook.presto.common.type.Type)20 PrestoException (com.facebook.presto.spi.PrestoException)20 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)19 ImmutableSet (com.google.common.collect.ImmutableSet)19 String.format (java.lang.String.format)19 NullableValue (com.facebook.presto.common.predicate.NullableValue)17 TypeManager (com.facebook.presto.common.type.TypeManager)15