Search in sources :

Example 1 with BucketValidation

use of io.trino.plugin.hive.HiveSplit.BucketValidation in project trino by trinodb.

the class BackgroundHiveSplitLoader method loadPartition.

private ListenableFuture<Void> loadPartition(HivePartitionMetadata partition) throws IOException {
    HivePartition hivePartition = partition.getHivePartition();
    String partitionName = hivePartition.getPartitionId();
    Properties schema = getPartitionSchema(table, partition.getPartition());
    List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition());
    TupleDomain<HiveColumnHandle> effectivePredicate = compactEffectivePredicate.transformKeys(HiveColumnHandle.class::cast);
    BooleanSupplier partitionMatchSupplier = createPartitionMatchSupplier(dynamicFilter, hivePartition, getPartitionKeyColumnHandles(table, typeManager));
    if (!partitionMatchSupplier.getAsBoolean()) {
        // Avoid listing files and creating splits from a partition if it has been pruned due to dynamic filters
        return COMPLETED_FUTURE;
    }
    Path path = new Path(getPartitionLocation(table, partition.getPartition()));
    Configuration configuration = hdfsEnvironment.getConfiguration(hdfsContext, path);
    InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);
    FileSystem fs = hdfsEnvironment.getFileSystem(hdfsContext, path);
    boolean s3SelectPushdownEnabled = shouldEnablePushdownForTable(session, table, path.toString(), partition.getPartition());
    // S3 Select pushdown works at the granularity of individual S3 objects,
    // therefore we must not split files when it is enabled.
    // Skip header / footer lines are not splittable except for a special case when skip.header.line.count=1
    boolean splittable = !s3SelectPushdownEnabled && getFooterCount(schema) == 0 && getHeaderCount(schema) <= 1;
    if (inputFormat instanceof SymlinkTextInputFormat) {
        if (tableBucketInfo.isPresent()) {
            throw new TrinoException(NOT_SUPPORTED, "Bucketed table in SymlinkTextInputFormat is not yet supported");
        }
        InputFormat<?, ?> targetInputFormat = getInputFormat(configuration, schema, true);
        List<Path> targetPaths = hdfsEnvironment.doAs(hdfsContext.getIdentity(), () -> getTargetPathsFromSymlink(fs, path));
        Set<Path> parents = targetPaths.stream().map(Path::getParent).distinct().collect(toImmutableSet());
        if (optimizeSymlinkListing && parents.size() == 1 && !recursiveDirWalkerEnabled) {
            Optional<Iterator<InternalHiveSplit>> manifestFileIterator = buildManifestFileIterator(targetInputFormat, partitionName, schema, partitionKeys, effectivePredicate, partitionMatchSupplier, s3SelectPushdownEnabled, partition.getTableToPartitionMapping(), getOnlyElement(parents), targetPaths, splittable);
            if (manifestFileIterator.isPresent()) {
                fileIterators.addLast(manifestFileIterator.get());
                return COMPLETED_FUTURE;
            }
        }
        return createHiveSymlinkSplits(partitionName, targetInputFormat, schema, partitionKeys, effectivePredicate, partitionMatchSupplier, s3SelectPushdownEnabled, partition.getTableToPartitionMapping(), targetPaths);
    }
    Optional<BucketConversion> bucketConversion = Optional.empty();
    boolean bucketConversionRequiresWorkerParticipation = false;
    if (partition.getPartition().isPresent()) {
        Optional<HiveBucketProperty> partitionBucketProperty = partition.getPartition().get().getStorage().getBucketProperty();
        if (tableBucketInfo.isPresent() && partitionBucketProperty.isPresent()) {
            int readBucketCount = tableBucketInfo.get().getReadBucketCount();
            // TODO can partition's bucketing_version be different from table's?
            BucketingVersion bucketingVersion = partitionBucketProperty.get().getBucketingVersion();
            int partitionBucketCount = partitionBucketProperty.get().getBucketCount();
            // Here, it's just trying to see if its needs the BucketConversion.
            if (readBucketCount != partitionBucketCount) {
                bucketConversion = Optional.of(new BucketConversion(bucketingVersion, readBucketCount, partitionBucketCount, tableBucketInfo.get().getBucketColumns()));
                if (readBucketCount > partitionBucketCount) {
                    bucketConversionRequiresWorkerParticipation = true;
                }
            }
        }
    }
    Optional<BucketValidation> bucketValidation = Optional.empty();
    if (isValidateBucketing(session) && tableBucketInfo.isPresent()) {
        BucketSplitInfo info = tableBucketInfo.get();
        bucketValidation = Optional.of(new BucketValidation(info.getBucketingVersion(), info.getTableBucketCount(), info.getBucketColumns()));
    }
    InternalHiveSplitFactory splitFactory = new InternalHiveSplitFactory(fs, partitionName, inputFormat, schema, partitionKeys, effectivePredicate, partitionMatchSupplier, partition.getTableToPartitionMapping(), bucketConversionRequiresWorkerParticipation ? bucketConversion : Optional.empty(), bucketValidation, getMaxInitialSplitSize(session), isForceLocalScheduling(session), s3SelectPushdownEnabled, transaction, maxSplitFileSize);
    // on the input format to obtain file splits.
    if (shouldUseFileSplitsFromInputFormat(inputFormat)) {
        if (tableBucketInfo.isPresent()) {
            throw new TrinoException(NOT_SUPPORTED, "Trino cannot read bucketed partition in an input format with UseFileSplitsFromInputFormat annotation: " + inputFormat.getClass().getSimpleName());
        }
        if (AcidUtils.isTransactionalTable(table.getParameters())) {
            throw new TrinoException(NOT_SUPPORTED, "Hive transactional tables in an input format with UseFileSplitsFromInputFormat annotation are not supported: " + inputFormat.getClass().getSimpleName());
        }
        JobConf jobConf = toJobConf(configuration);
        FileInputFormat.setInputPaths(jobConf, path);
        // Pass SerDes and Table parameters into input format configuration
        fromProperties(schema).forEach(jobConf::set);
        InputSplit[] splits = hdfsEnvironment.doAs(hdfsContext.getIdentity(), () -> inputFormat.getSplits(jobConf, 0));
        return addSplitsToSource(splits, splitFactory);
    }
    List<Path> readPaths;
    List<HdfsFileStatusWithId> fileStatusOriginalFiles = ImmutableList.of();
    AcidInfo.Builder acidInfoBuilder = AcidInfo.builder(path);
    boolean isFullAcid = AcidUtils.isFullAcidTable(table.getParameters());
    if (AcidUtils.isTransactionalTable(table.getParameters())) {
        AcidUtils.Directory directory = hdfsEnvironment.doAs(hdfsContext.getIdentity(), () -> AcidUtils.getAcidState(path, configuration, validWriteIds.orElseThrow(() -> new IllegalStateException("No validWriteIds present")), false, true));
        if (isFullAcid) {
            // From Hive version >= 3.0, delta/base files will always have file '_orc_acid_version' with value >= '2'.
            Path baseOrDeltaPath = directory.getBaseDirectory() != null ? directory.getBaseDirectory() : (directory.getCurrentDirectories().size() > 0 ? directory.getCurrentDirectories().get(0).getPath() : null);
            if (baseOrDeltaPath != null && AcidUtils.OrcAcidVersion.getAcidVersionFromMetaFile(baseOrDeltaPath, fs) >= 2) {
                // Trino cannot read ORC ACID tables with version < 2 (written by Hive older than 3.0)
                // See https://github.com/trinodb/trino/issues/2790#issuecomment-591901728 for more context
                // We perform initial version check based on _orc_acid_version file here.
                // If we cannot verify the version (the _orc_acid_version file may not exist),
                // we will do extra check based on ORC datafile metadata in OrcPageSourceFactory.
                acidInfoBuilder.setOrcAcidVersionValidated(true);
            }
        }
        readPaths = new ArrayList<>();
        // base
        if (directory.getBaseDirectory() != null) {
            readPaths.add(directory.getBaseDirectory());
        }
        // delta directories
        for (AcidUtils.ParsedDelta delta : directory.getCurrentDirectories()) {
            if (!delta.isDeleteDelta()) {
                readPaths.add(delta.getPath());
            }
        }
        // Create a registry of delete_delta directories for the partition
        for (AcidUtils.ParsedDelta delta : directory.getCurrentDirectories()) {
            if (delta.isDeleteDelta()) {
                if (!isFullAcid) {
                    throw new TrinoException(HIVE_BAD_DATA, format("Unexpected delete delta for a non full ACID table '%s'. Would be ignored by the reader: %s", table.getSchemaTableName(), delta.getPath()));
                }
                acidInfoBuilder.addDeleteDelta(delta.getPath());
            }
        }
        // initialize original files status list if present
        fileStatusOriginalFiles = directory.getOriginalFiles();
        for (HdfsFileStatusWithId hdfsFileStatusWithId : fileStatusOriginalFiles) {
            Path originalFilePath = hdfsFileStatusWithId.getFileStatus().getPath();
            long originalFileLength = hdfsFileStatusWithId.getFileStatus().getLen();
            if (originalFileLength == 0) {
                continue;
            }
            // Hive requires "original" files of transactional tables to conform to the bucketed tables naming pattern, to match them with delete deltas.
            int bucketId = getRequiredBucketNumber(originalFilePath);
            acidInfoBuilder.addOriginalFile(originalFilePath, originalFileLength, bucketId);
        }
    } else {
        // TODO https://github.com/trinodb/trino/issues/7603 - we should not referece acidInfoBuilder at allwhen we are not reading from non-ACID table
        // no ACID; no further validation needed
        acidInfoBuilder.setOrcAcidVersionValidated(true);
        readPaths = ImmutableList.of(path);
    }
    // Bucketed partitions are fully loaded immediately since all files must be loaded to determine the file to bucket mapping
    if (tableBucketInfo.isPresent()) {
        // TODO document in addToQueue() that it is sufficient to hold on to last returned future
        ListenableFuture<Void> lastResult = immediateVoidFuture();
        for (Path readPath : readPaths) {
            // list all files in the partition
            List<LocatedFileStatus> files = new ArrayList<>();
            try {
                Iterators.addAll(files, new HiveFileIterator(table, readPath, fs, directoryLister, namenodeStats, FAIL, ignoreAbsentPartitions));
            } catch (HiveFileIterator.NestedDirectoryNotAllowedException e) {
                // Fail here to be on the safe side. This seems to be the same as what Hive does
                throw new TrinoException(HIVE_INVALID_BUCKET_FILES, format("Hive table '%s' is corrupt. Found sub-directory '%s' in bucket directory for partition: %s", table.getSchemaTableName(), e.getNestedDirectoryPath(), splitFactory.getPartitionName()));
            }
            Optional<AcidInfo> acidInfo = isFullAcid ? acidInfoBuilder.build() : Optional.empty();
            lastResult = hiveSplitSource.addToQueue(getBucketedSplits(files, splitFactory, tableBucketInfo.get(), bucketConversion, splittable, acidInfo));
        }
        for (HdfsFileStatusWithId hdfsFileStatusWithId : fileStatusOriginalFiles) {
            List<LocatedFileStatus> locatedFileStatuses = ImmutableList.of((LocatedFileStatus) hdfsFileStatusWithId.getFileStatus());
            Optional<AcidInfo> acidInfo = isFullAcid ? Optional.of(acidInfoBuilder.buildWithRequiredOriginalFiles(getRequiredBucketNumber(hdfsFileStatusWithId.getFileStatus().getPath()))) : Optional.empty();
            lastResult = hiveSplitSource.addToQueue(getBucketedSplits(locatedFileStatuses, splitFactory, tableBucketInfo.get(), bucketConversion, splittable, acidInfo));
        }
        return lastResult;
    }
    for (Path readPath : readPaths) {
        Optional<AcidInfo> acidInfo = isFullAcid ? acidInfoBuilder.build() : Optional.empty();
        fileIterators.addLast(createInternalHiveSplitIterator(readPath, fs, splitFactory, splittable, acidInfo));
    }
    if (!fileStatusOriginalFiles.isEmpty()) {
        fileIterators.addLast(generateOriginalFilesSplits(splitFactory, fileStatusOriginalFiles, splittable, acidInfoBuilder, isFullAcid));
    }
    return COMPLETED_FUTURE;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) FileSystem(org.apache.hadoop.fs.FileSystem) HiveFileIterator(io.trino.plugin.hive.util.HiveFileIterator) ConfigurationUtils.toJobConf(io.trino.plugin.hive.util.ConfigurationUtils.toJobConf) JobConf(org.apache.hadoop.mapred.JobConf) InternalHiveSplitFactory(io.trino.plugin.hive.util.InternalHiveSplitFactory) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HdfsFileStatusWithId(org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId) Properties(java.util.Properties) Maps.fromProperties(com.google.common.collect.Maps.fromProperties) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat) BucketingVersion(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion) HiveFileIterator(io.trino.plugin.hive.util.HiveFileIterator) Iterator(java.util.Iterator) BooleanSupplier(java.util.function.BooleanSupplier) InputSplit(org.apache.hadoop.mapred.InputSplit) Path(org.apache.hadoop.fs.Path) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) BucketValidation(io.trino.plugin.hive.HiveSplit.BucketValidation) TrinoException(io.trino.spi.TrinoException) BucketConversion(io.trino.plugin.hive.HiveSplit.BucketConversion)

Example 2 with BucketValidation

use of io.trino.plugin.hive.HiveSplit.BucketValidation in project trino by trinodb.

the class HivePageSourceProvider method createBucketValidator.

private static Optional<BucketValidator> createBucketValidator(Path path, Optional<BucketValidation> bucketValidation, OptionalInt bucketNumber, List<ColumnMapping> columnMappings) {
    return bucketValidation.flatMap(validation -> {
        Map<Integer, ColumnMapping> baseHiveColumnToBlockIndex = columnMappings.stream().filter(mapping -> mapping.getHiveColumnHandle().isBaseColumn()).collect(toImmutableMap(mapping -> mapping.getHiveColumnHandle().getBaseHiveColumnIndex(), identity()));
        int[] bucketColumnIndices = new int[validation.getBucketColumns().size()];
        List<TypeInfo> bucketColumnTypes = new ArrayList<>();
        for (int i = 0; i < validation.getBucketColumns().size(); i++) {
            HiveColumnHandle column = validation.getBucketColumns().get(i);
            ColumnMapping mapping = baseHiveColumnToBlockIndex.get(column.getBaseHiveColumnIndex());
            if (mapping == null) {
                // partitions the table by bucket, even if the bucket has the wrong data.
                return Optional.empty();
            }
            bucketColumnIndices[i] = mapping.getIndex();
            bucketColumnTypes.add(mapping.getHiveColumnHandle().getHiveType().getTypeInfo());
        }
        return Optional.of(new BucketValidator(path, bucketColumnIndices, bucketColumnTypes, validation.getBucketingVersion(), validation.getBucketCount(), bucketNumber.orElseThrow()));
    });
}
Also used : PARTITION_KEY(io.trino.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) OrcFileWriterFactory(io.trino.plugin.hive.orc.OrcFileWriterFactory) GENERIC_INSUFFICIENT_RESOURCES(io.trino.spi.StandardErrorCode.GENERIC_INSUFFICIENT_RESOURCES) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) HiveBucketFilter(io.trino.plugin.hive.util.HiveBucketing.HiveBucketFilter) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) PREFILLED(io.trino.plugin.hive.HivePageSourceProvider.ColumnMappingKind.PREFILLED) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) ORIGINAL_FILE_PATH_MATCHER(io.trino.plugin.hive.HiveUpdatablePageSource.ORIGINAL_FILE_PATH_MATCHER) Path(org.apache.hadoop.fs.Path) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) BucketValidation(io.trino.plugin.hive.HiveSplit.BucketValidation) BiMap(com.google.common.collect.BiMap) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) ImmutableSet(com.google.common.collect.ImmutableSet) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ACID_ROW_STRUCT_COLUMN_ID(io.trino.plugin.hive.HiveUpdatablePageSource.ACID_ROW_STRUCT_COLUMN_ID) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Objects(java.util.Objects) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ReaderRecordCursorWithProjections(io.trino.plugin.hive.HiveRecordCursorProvider.ReaderRecordCursorWithProjections) BucketingVersion(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion) DynamicFilter(io.trino.spi.connector.DynamicFilter) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) NullableValue(io.trino.spi.predicate.NullableValue) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) OrcTypeToHiveTypeTranslator.fromOrcTypeToHiveType(io.trino.plugin.hive.orc.OrcTypeToHiveTypeTranslator.fromOrcTypeToHiveType) HiveBucketing.getHiveBucketFilter(io.trino.plugin.hive.util.HiveBucketing.getHiveBucketFilter) OptionalInt(java.util.OptionalInt) ArrayList(java.util.ArrayList) ImmutableBiMap(com.google.common.collect.ImmutableBiMap) Inject(javax.inject.Inject) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) OrcPageSource(io.trino.plugin.hive.orc.OrcPageSource) ColumnMapping.toColumnHandles(io.trino.plugin.hive.HivePageSourceProvider.ColumnMapping.toColumnHandles) RecordPageSource(io.trino.spi.connector.RecordPageSource) Objects.requireNonNull(java.util.Objects.requireNonNull) HiveUtil.getPrefilledColumnValue(io.trino.plugin.hive.util.HiveUtil.getPrefilledColumnValue) ColumnHandle(io.trino.spi.connector.ColumnHandle) BucketConversion(io.trino.plugin.hive.HiveSplit.BucketConversion) OrcType(io.trino.orc.metadata.OrcType) RecordCursor(io.trino.spi.connector.RecordCursor) Properties(java.util.Properties) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ConnectorPageSourceProvider(io.trino.spi.connector.ConnectorPageSourceProvider) ColumnMetadata(io.trino.orc.metadata.ColumnMetadata) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) HiveColumnHandle.isRowIdColumnHandle(io.trino.plugin.hive.HiveColumnHandle.isRowIdColumnHandle) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SYNTHESIZED(io.trino.plugin.hive.HiveColumnHandle.ColumnType.SYNTHESIZED) Collectors.toList(java.util.stream.Collectors.toList) VisibleForTesting(com.google.common.annotations.VisibleForTesting) EmptyPageSource(io.trino.spi.connector.EmptyPageSource) BucketValidator(io.trino.plugin.hive.HivePageSource.BucketValidator) TypeManager(io.trino.spi.type.TypeManager) OrcColumnId(io.trino.orc.metadata.OrcColumnId) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) BucketValidator(io.trino.plugin.hive.HivePageSource.BucketValidator) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Aggregations

BucketConversion (io.trino.plugin.hive.HiveSplit.BucketConversion)2 BucketValidation (io.trino.plugin.hive.HiveSplit.BucketValidation)2 BucketingVersion (io.trino.plugin.hive.util.HiveBucketing.BucketingVersion)2 TrinoException (io.trino.spi.TrinoException)2 ArrayList (java.util.ArrayList)2 Properties (java.util.Properties)2 Configuration (org.apache.hadoop.conf.Configuration)2 Path (org.apache.hadoop.fs.Path)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)1 Preconditions.checkState (com.google.common.base.Preconditions.checkState)1 BiMap (com.google.common.collect.BiMap)1 ImmutableBiMap (com.google.common.collect.ImmutableBiMap)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Maps.fromProperties (com.google.common.collect.Maps.fromProperties)1 Maps.uniqueIndex (com.google.common.collect.Maps.uniqueIndex)1 ColumnMetadata (io.trino.orc.metadata.ColumnMetadata)1