use of io.trino.plugin.hive.HiveSplit.BucketConversion in project trino by trinodb.
the class HivePageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit split, ConnectorTableHandle tableHandle, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
HiveSplit hiveSplit = (HiveSplit) split;
if (shouldSkipBucket(hiveTable, hiveSplit, dynamicFilter)) {
return new EmptyPageSource();
}
List<HiveColumnHandle> hiveColumns = columns.stream().map(HiveColumnHandle.class::cast).collect(toList());
List<HiveColumnHandle> dependencyColumns = hiveColumns.stream().filter(HiveColumnHandle::isBaseColumn).collect(toImmutableList());
if (hiveTable.isAcidUpdate()) {
hiveColumns = hiveTable.getUpdateProcessor().orElseThrow(() -> new IllegalArgumentException("update processor not present")).mergeWithNonUpdatedColumns(hiveColumns);
}
Path path = new Path(hiveSplit.getPath());
boolean originalFile = ORIGINAL_FILE_PATH_MATCHER.matcher(path.toString()).matches();
List<ColumnMapping> columnMappings = ColumnMapping.buildColumnMappings(hiveSplit.getPartitionName(), hiveSplit.getPartitionKeys(), hiveColumns, hiveSplit.getBucketConversion().map(BucketConversion::getBucketColumnHandles).orElse(ImmutableList.of()), hiveSplit.getTableToPartitionMapping(), path, hiveSplit.getBucketNumber(), hiveSplit.getEstimatedFileSize(), hiveSplit.getFileModifiedTime());
// This can happen when dynamic filters are collected after partition splits were listed.
if (shouldSkipSplit(columnMappings, dynamicFilter)) {
return new EmptyPageSource();
}
Configuration configuration = hdfsEnvironment.getConfiguration(new HdfsContext(session), path);
TupleDomain<HiveColumnHandle> simplifiedDynamicFilter = dynamicFilter.getCurrentPredicate().transformKeys(HiveColumnHandle.class::cast).simplify(domainCompactionThreshold);
Optional<ConnectorPageSource> pageSource = createHivePageSource(pageSourceFactories, cursorProviders, configuration, session, path, hiveSplit.getBucketNumber(), hiveSplit.getStart(), hiveSplit.getLength(), hiveSplit.getEstimatedFileSize(), hiveSplit.getSchema(), hiveTable.getCompactEffectivePredicate().intersect(simplifiedDynamicFilter), hiveColumns, typeManager, hiveSplit.getBucketConversion(), hiveSplit.getBucketValidation(), hiveSplit.isS3SelectPushdownEnabled(), hiveSplit.getAcidInfo(), originalFile, hiveTable.getTransaction(), columnMappings);
if (pageSource.isPresent()) {
ConnectorPageSource source = pageSource.get();
if (hiveTable.isAcidDelete() || hiveTable.isAcidUpdate()) {
checkArgument(orcFileWriterFactory.isPresent(), "orcFileWriterFactory not supplied but required for DELETE and UPDATE");
HivePageSource hivePageSource = (HivePageSource) source;
OrcPageSource orcPageSource = (OrcPageSource) hivePageSource.getDelegate();
ColumnMetadata<OrcType> columnMetadata = orcPageSource.getColumnTypes();
int acidRowColumnId = originalFile ? 0 : ACID_ROW_STRUCT_COLUMN_ID;
HiveType rowType = fromOrcTypeToHiveType(columnMetadata.get(new OrcColumnId(acidRowColumnId)), columnMetadata);
long currentSplitNumber = hiveSplit.getSplitNumber();
if (currentSplitNumber >= MAX_NUMBER_OF_SPLITS) {
throw new TrinoException(GENERIC_INSUFFICIENT_RESOURCES, format("Number of splits is higher than maximum possible number of splits %d", MAX_NUMBER_OF_SPLITS));
}
long initialRowId = currentSplitNumber << PER_SPLIT_ROW_ID_BITS;
return new HiveUpdatablePageSource(hiveTable, hiveSplit.getPartitionName(), hiveSplit.getStatementId(), source, typeManager, hiveSplit.getBucketNumber(), path, originalFile, orcFileWriterFactory.get(), configuration, session, rowType, dependencyColumns, hiveTable.getTransaction().getOperation(), initialRowId, MAX_NUMBER_OF_ROWS_PER_SPLIT);
}
return source;
}
throw new RuntimeException("Could not find a file reader for split " + hiveSplit);
}
use of io.trino.plugin.hive.HiveSplit.BucketConversion in project trino by trinodb.
the class InternalHiveSplitFactory method createInternalHiveSplit.
private Optional<InternalHiveSplit> createInternalHiveSplit(Path path, BlockLocation[] blockLocations, long start, long length, // Estimated because, for example, encrypted S3 files may be padded, so reported size may not reflect actual size
long estimatedFileSize, long fileModificationTime, OptionalInt bucketNumber, boolean splittable, Optional<AcidInfo> acidInfo) {
String pathString = path.toString();
if (!pathMatchesPredicate(pathDomain, pathString)) {
return Optional.empty();
}
// per HIVE-13040 empty files are allowed
if (estimatedFileSize == 0) {
return Optional.empty();
}
// but it might be ready when splits are enumerated lazily.
if (!partitionMatchSupplier.getAsBoolean()) {
return Optional.empty();
}
if (maxSplitFileSize.isPresent() && estimatedFileSize > maxSplitFileSize.get()) {
return Optional.empty();
}
ImmutableList.Builder<InternalHiveBlock> blockBuilder = ImmutableList.builder();
for (BlockLocation blockLocation : blockLocations) {
// clamp the block range
long blockStart = Math.max(start, blockLocation.getOffset());
long blockEnd = Math.min(start + length, blockLocation.getOffset() + blockLocation.getLength());
if (blockStart > blockEnd) {
// block is outside split range
continue;
}
if (blockStart == blockEnd && !(blockStart == start && blockEnd == start + length)) {
// skip zero-width block, except in the special circumstance: slice is empty, and the block covers the empty slice interval.
continue;
}
blockBuilder.add(new InternalHiveBlock(blockStart, blockEnd, getHostAddresses(blockLocation)));
}
List<InternalHiveBlock> blocks = blockBuilder.build();
checkBlocks(path, blocks, start, length);
if (!splittable) {
// not splittable, use the hosts from the first block if it exists
blocks = ImmutableList.of(new InternalHiveBlock(start, start + length, blocks.get(0).getAddresses()));
}
int bucketNumberIndex = bucketNumber.orElse(0);
return Optional.of(new InternalHiveSplit(partitionName, pathString, start, start + length, estimatedFileSize, fileModificationTime, schema, partitionKeys, blocks, bucketNumber, () -> bucketStatementCounters.computeIfAbsent(bucketNumberIndex, index -> new AtomicInteger()).getAndIncrement(), splittable, forceLocalScheduling && allBlocksHaveAddress(blocks), tableToPartitionMapping, bucketConversion, bucketValidation, s3SelectPushdownEnabled && S3SelectPushdown.isCompressionCodecSupported(inputFormat, path), acidInfo, partitionMatchSupplier));
}
use of io.trino.plugin.hive.HiveSplit.BucketConversion in project trino by trinodb.
the class BackgroundHiveSplitLoader method loadPartition.
private ListenableFuture<Void> loadPartition(HivePartitionMetadata partition) throws IOException {
HivePartition hivePartition = partition.getHivePartition();
String partitionName = hivePartition.getPartitionId();
Properties schema = getPartitionSchema(table, partition.getPartition());
List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition());
TupleDomain<HiveColumnHandle> effectivePredicate = compactEffectivePredicate.transformKeys(HiveColumnHandle.class::cast);
BooleanSupplier partitionMatchSupplier = createPartitionMatchSupplier(dynamicFilter, hivePartition, getPartitionKeyColumnHandles(table, typeManager));
if (!partitionMatchSupplier.getAsBoolean()) {
// Avoid listing files and creating splits from a partition if it has been pruned due to dynamic filters
return COMPLETED_FUTURE;
}
Path path = new Path(getPartitionLocation(table, partition.getPartition()));
Configuration configuration = hdfsEnvironment.getConfiguration(hdfsContext, path);
InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);
FileSystem fs = hdfsEnvironment.getFileSystem(hdfsContext, path);
boolean s3SelectPushdownEnabled = shouldEnablePushdownForTable(session, table, path.toString(), partition.getPartition());
// S3 Select pushdown works at the granularity of individual S3 objects,
// therefore we must not split files when it is enabled.
// Skip header / footer lines are not splittable except for a special case when skip.header.line.count=1
boolean splittable = !s3SelectPushdownEnabled && getFooterCount(schema) == 0 && getHeaderCount(schema) <= 1;
if (inputFormat instanceof SymlinkTextInputFormat) {
if (tableBucketInfo.isPresent()) {
throw new TrinoException(NOT_SUPPORTED, "Bucketed table in SymlinkTextInputFormat is not yet supported");
}
InputFormat<?, ?> targetInputFormat = getInputFormat(configuration, schema, true);
List<Path> targetPaths = hdfsEnvironment.doAs(hdfsContext.getIdentity(), () -> getTargetPathsFromSymlink(fs, path));
Set<Path> parents = targetPaths.stream().map(Path::getParent).distinct().collect(toImmutableSet());
if (optimizeSymlinkListing && parents.size() == 1 && !recursiveDirWalkerEnabled) {
Optional<Iterator<InternalHiveSplit>> manifestFileIterator = buildManifestFileIterator(targetInputFormat, partitionName, schema, partitionKeys, effectivePredicate, partitionMatchSupplier, s3SelectPushdownEnabled, partition.getTableToPartitionMapping(), getOnlyElement(parents), targetPaths, splittable);
if (manifestFileIterator.isPresent()) {
fileIterators.addLast(manifestFileIterator.get());
return COMPLETED_FUTURE;
}
}
return createHiveSymlinkSplits(partitionName, targetInputFormat, schema, partitionKeys, effectivePredicate, partitionMatchSupplier, s3SelectPushdownEnabled, partition.getTableToPartitionMapping(), targetPaths);
}
Optional<BucketConversion> bucketConversion = Optional.empty();
boolean bucketConversionRequiresWorkerParticipation = false;
if (partition.getPartition().isPresent()) {
Optional<HiveBucketProperty> partitionBucketProperty = partition.getPartition().get().getStorage().getBucketProperty();
if (tableBucketInfo.isPresent() && partitionBucketProperty.isPresent()) {
int readBucketCount = tableBucketInfo.get().getReadBucketCount();
// TODO can partition's bucketing_version be different from table's?
BucketingVersion bucketingVersion = partitionBucketProperty.get().getBucketingVersion();
int partitionBucketCount = partitionBucketProperty.get().getBucketCount();
// Here, it's just trying to see if its needs the BucketConversion.
if (readBucketCount != partitionBucketCount) {
bucketConversion = Optional.of(new BucketConversion(bucketingVersion, readBucketCount, partitionBucketCount, tableBucketInfo.get().getBucketColumns()));
if (readBucketCount > partitionBucketCount) {
bucketConversionRequiresWorkerParticipation = true;
}
}
}
}
Optional<BucketValidation> bucketValidation = Optional.empty();
if (isValidateBucketing(session) && tableBucketInfo.isPresent()) {
BucketSplitInfo info = tableBucketInfo.get();
bucketValidation = Optional.of(new BucketValidation(info.getBucketingVersion(), info.getTableBucketCount(), info.getBucketColumns()));
}
InternalHiveSplitFactory splitFactory = new InternalHiveSplitFactory(fs, partitionName, inputFormat, schema, partitionKeys, effectivePredicate, partitionMatchSupplier, partition.getTableToPartitionMapping(), bucketConversionRequiresWorkerParticipation ? bucketConversion : Optional.empty(), bucketValidation, getMaxInitialSplitSize(session), isForceLocalScheduling(session), s3SelectPushdownEnabled, transaction, maxSplitFileSize);
// on the input format to obtain file splits.
if (shouldUseFileSplitsFromInputFormat(inputFormat)) {
if (tableBucketInfo.isPresent()) {
throw new TrinoException(NOT_SUPPORTED, "Trino cannot read bucketed partition in an input format with UseFileSplitsFromInputFormat annotation: " + inputFormat.getClass().getSimpleName());
}
if (AcidUtils.isTransactionalTable(table.getParameters())) {
throw new TrinoException(NOT_SUPPORTED, "Hive transactional tables in an input format with UseFileSplitsFromInputFormat annotation are not supported: " + inputFormat.getClass().getSimpleName());
}
JobConf jobConf = toJobConf(configuration);
FileInputFormat.setInputPaths(jobConf, path);
// Pass SerDes and Table parameters into input format configuration
fromProperties(schema).forEach(jobConf::set);
InputSplit[] splits = hdfsEnvironment.doAs(hdfsContext.getIdentity(), () -> inputFormat.getSplits(jobConf, 0));
return addSplitsToSource(splits, splitFactory);
}
List<Path> readPaths;
List<HdfsFileStatusWithId> fileStatusOriginalFiles = ImmutableList.of();
AcidInfo.Builder acidInfoBuilder = AcidInfo.builder(path);
boolean isFullAcid = AcidUtils.isFullAcidTable(table.getParameters());
if (AcidUtils.isTransactionalTable(table.getParameters())) {
AcidUtils.Directory directory = hdfsEnvironment.doAs(hdfsContext.getIdentity(), () -> AcidUtils.getAcidState(path, configuration, validWriteIds.orElseThrow(() -> new IllegalStateException("No validWriteIds present")), false, true));
if (isFullAcid) {
// From Hive version >= 3.0, delta/base files will always have file '_orc_acid_version' with value >= '2'.
Path baseOrDeltaPath = directory.getBaseDirectory() != null ? directory.getBaseDirectory() : (directory.getCurrentDirectories().size() > 0 ? directory.getCurrentDirectories().get(0).getPath() : null);
if (baseOrDeltaPath != null && AcidUtils.OrcAcidVersion.getAcidVersionFromMetaFile(baseOrDeltaPath, fs) >= 2) {
// Trino cannot read ORC ACID tables with version < 2 (written by Hive older than 3.0)
// See https://github.com/trinodb/trino/issues/2790#issuecomment-591901728 for more context
// We perform initial version check based on _orc_acid_version file here.
// If we cannot verify the version (the _orc_acid_version file may not exist),
// we will do extra check based on ORC datafile metadata in OrcPageSourceFactory.
acidInfoBuilder.setOrcAcidVersionValidated(true);
}
}
readPaths = new ArrayList<>();
// base
if (directory.getBaseDirectory() != null) {
readPaths.add(directory.getBaseDirectory());
}
// delta directories
for (AcidUtils.ParsedDelta delta : directory.getCurrentDirectories()) {
if (!delta.isDeleteDelta()) {
readPaths.add(delta.getPath());
}
}
// Create a registry of delete_delta directories for the partition
for (AcidUtils.ParsedDelta delta : directory.getCurrentDirectories()) {
if (delta.isDeleteDelta()) {
if (!isFullAcid) {
throw new TrinoException(HIVE_BAD_DATA, format("Unexpected delete delta for a non full ACID table '%s'. Would be ignored by the reader: %s", table.getSchemaTableName(), delta.getPath()));
}
acidInfoBuilder.addDeleteDelta(delta.getPath());
}
}
// initialize original files status list if present
fileStatusOriginalFiles = directory.getOriginalFiles();
for (HdfsFileStatusWithId hdfsFileStatusWithId : fileStatusOriginalFiles) {
Path originalFilePath = hdfsFileStatusWithId.getFileStatus().getPath();
long originalFileLength = hdfsFileStatusWithId.getFileStatus().getLen();
if (originalFileLength == 0) {
continue;
}
// Hive requires "original" files of transactional tables to conform to the bucketed tables naming pattern, to match them with delete deltas.
int bucketId = getRequiredBucketNumber(originalFilePath);
acidInfoBuilder.addOriginalFile(originalFilePath, originalFileLength, bucketId);
}
} else {
// TODO https://github.com/trinodb/trino/issues/7603 - we should not referece acidInfoBuilder at allwhen we are not reading from non-ACID table
// no ACID; no further validation needed
acidInfoBuilder.setOrcAcidVersionValidated(true);
readPaths = ImmutableList.of(path);
}
// Bucketed partitions are fully loaded immediately since all files must be loaded to determine the file to bucket mapping
if (tableBucketInfo.isPresent()) {
// TODO document in addToQueue() that it is sufficient to hold on to last returned future
ListenableFuture<Void> lastResult = immediateVoidFuture();
for (Path readPath : readPaths) {
// list all files in the partition
List<LocatedFileStatus> files = new ArrayList<>();
try {
Iterators.addAll(files, new HiveFileIterator(table, readPath, fs, directoryLister, namenodeStats, FAIL, ignoreAbsentPartitions));
} catch (HiveFileIterator.NestedDirectoryNotAllowedException e) {
// Fail here to be on the safe side. This seems to be the same as what Hive does
throw new TrinoException(HIVE_INVALID_BUCKET_FILES, format("Hive table '%s' is corrupt. Found sub-directory '%s' in bucket directory for partition: %s", table.getSchemaTableName(), e.getNestedDirectoryPath(), splitFactory.getPartitionName()));
}
Optional<AcidInfo> acidInfo = isFullAcid ? acidInfoBuilder.build() : Optional.empty();
lastResult = hiveSplitSource.addToQueue(getBucketedSplits(files, splitFactory, tableBucketInfo.get(), bucketConversion, splittable, acidInfo));
}
for (HdfsFileStatusWithId hdfsFileStatusWithId : fileStatusOriginalFiles) {
List<LocatedFileStatus> locatedFileStatuses = ImmutableList.of((LocatedFileStatus) hdfsFileStatusWithId.getFileStatus());
Optional<AcidInfo> acidInfo = isFullAcid ? Optional.of(acidInfoBuilder.buildWithRequiredOriginalFiles(getRequiredBucketNumber(hdfsFileStatusWithId.getFileStatus().getPath()))) : Optional.empty();
lastResult = hiveSplitSource.addToQueue(getBucketedSplits(locatedFileStatuses, splitFactory, tableBucketInfo.get(), bucketConversion, splittable, acidInfo));
}
return lastResult;
}
for (Path readPath : readPaths) {
Optional<AcidInfo> acidInfo = isFullAcid ? acidInfoBuilder.build() : Optional.empty();
fileIterators.addLast(createInternalHiveSplitIterator(readPath, fs, splitFactory, splittable, acidInfo));
}
if (!fileStatusOriginalFiles.isEmpty()) {
fileIterators.addLast(generateOriginalFilesSplits(splitFactory, fileStatusOriginalFiles, splittable, acidInfoBuilder, isFullAcid));
}
return COMPLETED_FUTURE;
}
Aggregations