Search in sources :

Example 1 with PARTITION_NOT_READABLE

use of com.facebook.presto.hive.HiveWarningCode.PARTITION_NOT_READABLE in project presto by prestodb.

the class HiveSplitManager method getPartitionMetadata.

private Iterable<HivePartitionMetadata> getPartitionMetadata(SemiTransactionalHiveMetastore metastore, Table table, SchemaTableName tableName, List<HivePartition> hivePartitions, Optional<HiveBucketHandle> hiveBucketHandle, ConnectorSession session, WarningCollector warningCollector, Optional<Set<HiveColumnHandle>> requestedColumns, Map<String, HiveColumnHandle> predicateColumns, Optional<Map<Subfield, Domain>> domains) {
    if (hivePartitions.isEmpty()) {
        return ImmutableList.of();
    }
    Optional<Set<HiveColumnHandle>> allRequestedColumns = mergeRequestedAndPredicateColumns(requestedColumns, ImmutableSet.copyOf(predicateColumns.values()));
    if (hivePartitions.size() == 1) {
        HivePartition firstPartition = getOnlyElement(hivePartitions);
        if (firstPartition.getPartitionId().equals(UNPARTITIONED_ID)) {
            return ImmutableList.of(new HivePartitionMetadata(firstPartition, Optional.empty(), TableToPartitionMapping.empty(), encryptionInformationProvider.getReadEncryptionInformation(session, table, allRequestedColumns), ImmutableSet.of()));
        }
    }
    StorageFormat storageFormat = table.getStorage().getStorageFormat();
    Optional<HiveStorageFormat> hiveStorageFormat = getHiveStorageFormat(storageFormat);
    Optional<HiveStorageFormat> resolvedHiveStorageFormat;
    if (isUseParquetColumnNames(session)) {
        // Use Hive Storage Format as Parquet if table is of HUDI format
        resolvedHiveStorageFormat = (!hiveStorageFormat.isPresent() && isHudiFormat(storageFormat)) ? Optional.of(PARQUET) : hiveStorageFormat;
    } else {
        resolvedHiveStorageFormat = hiveStorageFormat;
    }
    Iterable<List<HivePartition>> partitionNameBatches = partitionExponentially(hivePartitions, minPartitionBatchSize, maxPartitionBatchSize);
    Iterable<List<HivePartitionMetadata>> partitionBatches = transform(partitionNameBatches, partitionBatch -> {
        Map<String, PartitionSplitInfo> partitionSplitInfo = getPartitionSplitInfo(session, metastore, tableName, partitionBatch, predicateColumns, domains);
        if (partitionBatch.size() != partitionSplitInfo.size()) {
            throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Expected %s partitions but found %s", partitionBatch.size(), partitionSplitInfo.size()));
        }
        Map<String, Partition> partitions = partitionSplitInfo.entrySet().stream().collect(toImmutableMap(Entry::getKey, entry -> entry.getValue().getPartition()));
        Optional<Map<String, EncryptionInformation>> encryptionInformationForPartitions = encryptionInformationProvider.getReadEncryptionInformation(session, table, allRequestedColumns, partitions);
        ImmutableList.Builder<HivePartitionMetadata> results = ImmutableList.builder();
        Map<String, Set<String>> partitionsNotReadable = new HashMap<>();
        int unreadablePartitionsSkipped = 0;
        for (HivePartition hivePartition : partitionBatch) {
            Partition partition = partitions.get(hivePartition.getPartitionId());
            if (partitionSplitInfo.get(hivePartition.getPartitionId()).isPruned()) {
                continue;
            }
            if (partition == null) {
                throw new PrestoException(GENERIC_INTERNAL_ERROR, "Partition not loaded: " + hivePartition);
            }
            String partitionName = makePartName(table.getPartitionColumns(), partition.getValues());
            Optional<EncryptionInformation> encryptionInformation = encryptionInformationForPartitions.map(metadata -> metadata.get(hivePartition.getPartitionId()));
            if (!isOfflineDataDebugModeEnabled(session)) {
                // verify partition is online
                verifyOnline(tableName, Optional.of(partitionName), getProtectMode(partition), partition.getParameters());
                // verify partition is not marked as non-readable
                String reason = partition.getParameters().get(OBJECT_NOT_READABLE);
                if (!isNullOrEmpty(reason)) {
                    if (!shouldIgnoreUnreadablePartition(session) || !partition.isEligibleToIgnore()) {
                        throw new HiveNotReadableException(tableName, Optional.of(partitionName), reason);
                    }
                    unreadablePartitionsSkipped++;
                    if (partitionsNotReadable.size() <= 3) {
                        partitionsNotReadable.putIfAbsent(reason, new HashSet<>(ImmutableSet.of(partitionName)));
                        if (partitionsNotReadable.get(reason).size() <= 3) {
                            partitionsNotReadable.get(reason).add(partitionName);
                        }
                    }
                    continue;
                }
            }
            // Verify that the partition schema matches the table schema.
            // Either adding or dropping columns from the end of the table
            // without modifying existing partitions is allowed, but every
            // column that exists in both the table and partition must have
            // the same type.
            List<Column> tableColumns = table.getDataColumns();
            List<Column> partitionColumns = partition.getColumns();
            if ((tableColumns == null) || (partitionColumns == null)) {
                throw new PrestoException(HIVE_INVALID_METADATA, format("Table '%s' or partition '%s' has null columns", tableName, partitionName));
            }
            TableToPartitionMapping tableToPartitionMapping = getTableToPartitionMapping(session, resolvedHiveStorageFormat, tableName, partitionName, tableColumns, partitionColumns);
            if (hiveBucketHandle.isPresent() && !hiveBucketHandle.get().isVirtuallyBucketed()) {
                Optional<HiveBucketProperty> partitionBucketProperty = partition.getStorage().getBucketProperty();
                if (!partitionBucketProperty.isPresent()) {
                    throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("Hive table (%s) is bucketed but partition (%s) is not bucketed", hivePartition.getTableName(), hivePartition.getPartitionId()));
                }
                int tableBucketCount = hiveBucketHandle.get().getTableBucketCount();
                int partitionBucketCount = partitionBucketProperty.get().getBucketCount();
                List<String> tableBucketColumns = hiveBucketHandle.get().getColumns().stream().map(HiveColumnHandle::getName).collect(toImmutableList());
                List<String> partitionBucketColumns = partitionBucketProperty.get().getBucketedBy();
                if (!tableBucketColumns.equals(partitionBucketColumns) || !isBucketCountCompatible(tableBucketCount, partitionBucketCount)) {
                    throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("Hive table (%s) bucketing (columns=%s, buckets=%s) is not compatible with partition (%s) bucketing (columns=%s, buckets=%s)", hivePartition.getTableName(), tableBucketColumns, tableBucketCount, hivePartition.getPartitionId(), partitionBucketColumns, partitionBucketCount));
                }
            }
            results.add(new HivePartitionMetadata(hivePartition, Optional.of(partition), tableToPartitionMapping, encryptionInformation, partitionSplitInfo.get(hivePartition.getPartitionId()).getRedundantColumnDomains()));
        }
        if (unreadablePartitionsSkipped > 0) {
            StringBuilder warningMessage = new StringBuilder(format("Table '%s' has %s out of %s partitions unreadable: ", tableName, unreadablePartitionsSkipped, partitionBatch.size()));
            for (Entry<String, Set<String>> entry : partitionsNotReadable.entrySet()) {
                warningMessage.append(String.join(", ", entry.getValue())).append("... are due to ").append(entry.getKey()).append(". ");
            }
            warningCollector.add(new PrestoWarning(PARTITION_NOT_READABLE, warningMessage.toString()));
        }
        return results.build();
    });
    return concat(partitionBatches);
}
Also used : WarningCollector(com.facebook.presto.spi.WarningCollector) HiveStorageFormat.getHiveStorageFormat(com.facebook.presto.hive.HiveStorageFormat.getHiveStorageFormat) MetastoreUtil.makePartName(com.facebook.presto.hive.metastore.MetastoreUtil.makePartName) ConnectorSplitSource(com.facebook.presto.spi.ConnectorSplitSource) CounterStat(com.facebook.airlift.stats.CounterStat) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) HiveSessionProperties.isOfflineDataDebugModeEnabled(com.facebook.presto.hive.HiveSessionProperties.isOfflineDataDebugModeEnabled) GENERIC_INTERNAL_ERROR(com.facebook.presto.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) HiveSessionProperties.isUseParquetColumnNames(com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames) SERVER_SHUTTING_DOWN(com.facebook.presto.spi.StandardErrorCode.SERVER_SHUTTING_DOWN) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) IntegerStatistics(com.facebook.presto.hive.metastore.IntegerStatistics) Map(java.util.Map) DecimalStatistics(com.facebook.presto.hive.metastore.DecimalStatistics) ENGLISH(java.util.Locale.ENGLISH) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) HIVE_INVALID_METADATA(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) Decimals.encodeScaledValue(com.facebook.presto.common.type.Decimals.encodeScaledValue) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) HiveSessionProperties.getHiveMaxInitialSplitSize(com.facebook.presto.hive.HiveSessionProperties.getHiveMaxInitialSplitSize) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) Decimals.isShortDecimal(com.facebook.presto.common.type.Decimals.isShortDecimal) TableToPartitionMapping.mapColumnsByIndex(com.facebook.presto.hive.TableToPartitionMapping.mapColumnsByIndex) MetastoreUtil.getMetastoreHeaders(com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders) Iterables(com.google.common.collect.Iterables) Table(com.facebook.presto.hive.metastore.Table) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) HiveSessionProperties.shouldIgnoreUnreadablePartition(com.facebook.presto.hive.HiveSessionProperties.shouldIgnoreUnreadablePartition) Collectors.groupingBy(java.util.stream.Collectors.groupingBy) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) HiveSessionProperties.isPartitionStatisticsBasedOptimizationEnabled(com.facebook.presto.hive.HiveSessionProperties.isPartitionStatisticsBasedOptimizationEnabled) Lists(com.google.common.collect.Lists) Managed(org.weakref.jmx.Managed) PrestoWarning(com.facebook.presto.spi.PrestoWarning) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) HiveBucketFilter(com.facebook.presto.hive.HiveBucketing.HiveBucketFilter) Executor(java.util.concurrent.Executor) DoubleStatistics(com.facebook.presto.hive.metastore.DoubleStatistics) AbstractIterator(com.google.common.collect.AbstractIterator) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Collectors.reducing(java.util.stream.Collectors.reducing) Domain(com.facebook.presto.common.predicate.Domain) ColumnHandle(com.facebook.presto.spi.ColumnHandle) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) BucketSplitInfo.createBucketSplitInfo(com.facebook.presto.hive.StoragePartitionLoader.BucketSplitInfo.createBucketSplitInfo) ValueSet(com.facebook.presto.common.predicate.ValueSet) Iterables.transform(com.google.common.collect.Iterables.transform) HoodieParquetRealtimeInputFormat(org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat) SortedRangeSet(com.facebook.presto.common.predicate.SortedRangeSet) Float.floatToIntBits(java.lang.Float.floatToIntBits) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) Iterables.concat(com.google.common.collect.Iterables.concat) HiveType.getPrimitiveType(com.facebook.presto.hive.HiveType.getPrimitiveType) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) GROUPED_SCHEDULING(com.facebook.presto.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.GROUPED_SCHEDULING) ConnectorSplitManager(com.facebook.presto.spi.connector.ConnectorSplitManager) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) HiveSessionProperties.getLeaseDuration(com.facebook.presto.hive.HiveSessionProperties.getLeaseDuration) Math.min(java.lang.Math.min) String.format(java.lang.String.format) Range(com.facebook.presto.common.predicate.Range) DateStatistics(com.facebook.presto.hive.metastore.DateStatistics) DataSize(io.airlift.units.DataSize) List(java.util.List) Entry(java.util.Map.Entry) Optional(java.util.Optional) Nested(org.weakref.jmx.Nested) MetastoreUtil.getProtectMode(com.facebook.presto.hive.metastore.MetastoreUtil.getProtectMode) Column(com.facebook.presto.hive.metastore.Column) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) HIVE_PARTITION_DROPPED_DURING_QUERY(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_DROPPED_DURING_QUERY) HashMap(java.util.HashMap) PrestoException(com.facebook.presto.spi.PrestoException) HiveColumnHandle.isPathColumnHandle(com.facebook.presto.hive.HiveColumnHandle.isPathColumnHandle) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) Partition(com.facebook.presto.hive.metastore.Partition) Inject(javax.inject.Inject) HashSet(java.util.HashSet) UNPARTITIONED_ID(com.facebook.presto.hive.HivePartition.UNPARTITIONED_ID) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) BoundedExecutor(com.facebook.airlift.concurrent.BoundedExecutor) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) PARTITION_NOT_READABLE(com.facebook.presto.hive.HiveWarningCode.PARTITION_NOT_READABLE) Objects.requireNonNull(java.util.Objects.requireNonNull) MetastoreUtil.verifyOnline(com.facebook.presto.hive.metastore.MetastoreUtil.verifyOnline) HoodieParquetInputFormat(org.apache.hudi.hadoop.HoodieParquetInputFormat) Type(com.facebook.presto.common.type.Type) ExecutorService(java.util.concurrent.ExecutorService) HIVE_TRANSACTION_NOT_FOUND(com.facebook.presto.hive.HiveErrorCode.HIVE_TRANSACTION_NOT_FOUND) Iterator(java.util.Iterator) FixedSplitSource(com.facebook.presto.spi.FixedSplitSource) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) Ordering(com.google.common.collect.Ordering) MetastoreUtil.isUserDefinedTypeEncodingEnabled(com.facebook.presto.hive.metastore.MetastoreUtil.isUserDefinedTypeEncodingEnabled) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Set(java.util.Set) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ValueSet(com.facebook.presto.common.predicate.ValueSet) SortedRangeSet(com.facebook.presto.common.predicate.SortedRangeSet) ImmutableSet(com.google.common.collect.ImmutableSet) HashSet(java.util.HashSet) HashMap(java.util.HashMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) PrestoException(com.facebook.presto.spi.PrestoException) HiveStorageFormat.getHiveStorageFormat(com.facebook.presto.hive.HiveStorageFormat.getHiveStorageFormat) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) HiveStorageFormat.getHiveStorageFormat(com.facebook.presto.hive.HiveStorageFormat.getHiveStorageFormat) Column(com.facebook.presto.hive.metastore.Column) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HiveSessionProperties.shouldIgnoreUnreadablePartition(com.facebook.presto.hive.HiveSessionProperties.shouldIgnoreUnreadablePartition) Partition(com.facebook.presto.hive.metastore.Partition) PrestoWarning(com.facebook.presto.spi.PrestoWarning) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Aggregations

BoundedExecutor (com.facebook.airlift.concurrent.BoundedExecutor)1 CounterStat (com.facebook.airlift.stats.CounterStat)1 Subfield (com.facebook.presto.common.Subfield)1 Domain (com.facebook.presto.common.predicate.Domain)1 Range (com.facebook.presto.common.predicate.Range)1 SortedRangeSet (com.facebook.presto.common.predicate.SortedRangeSet)1 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)1 ValueSet (com.facebook.presto.common.predicate.ValueSet)1 Decimals.encodeScaledValue (com.facebook.presto.common.type.Decimals.encodeScaledValue)1 Decimals.isShortDecimal (com.facebook.presto.common.type.Decimals.isShortDecimal)1 Type (com.facebook.presto.common.type.Type)1 HiveBucketFilter (com.facebook.presto.hive.HiveBucketing.HiveBucketFilter)1 REGULAR (com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR)1 HiveColumnHandle.isPathColumnHandle (com.facebook.presto.hive.HiveColumnHandle.isPathColumnHandle)1 HIVE_INVALID_METADATA (com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA)1 HIVE_PARTITION_DROPPED_DURING_QUERY (com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_DROPPED_DURING_QUERY)1 HIVE_PARTITION_SCHEMA_MISMATCH (com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH)1 HIVE_TRANSACTION_NOT_FOUND (com.facebook.presto.hive.HiveErrorCode.HIVE_TRANSACTION_NOT_FOUND)1 UNPARTITIONED_ID (com.facebook.presto.hive.HivePartition.UNPARTITIONED_ID)1 HiveSessionProperties.getHiveMaxInitialSplitSize (com.facebook.presto.hive.HiveSessionProperties.getHiveMaxInitialSplitSize)1