use of io.trino.plugin.hive.HivePageSource.BucketValidator in project trino by trinodb.
the class HivePageSourceProvider method createBucketValidator.
private static Optional<BucketValidator> createBucketValidator(Path path, Optional<BucketValidation> bucketValidation, OptionalInt bucketNumber, List<ColumnMapping> columnMappings) {
return bucketValidation.flatMap(validation -> {
Map<Integer, ColumnMapping> baseHiveColumnToBlockIndex = columnMappings.stream().filter(mapping -> mapping.getHiveColumnHandle().isBaseColumn()).collect(toImmutableMap(mapping -> mapping.getHiveColumnHandle().getBaseHiveColumnIndex(), identity()));
int[] bucketColumnIndices = new int[validation.getBucketColumns().size()];
List<TypeInfo> bucketColumnTypes = new ArrayList<>();
for (int i = 0; i < validation.getBucketColumns().size(); i++) {
HiveColumnHandle column = validation.getBucketColumns().get(i);
ColumnMapping mapping = baseHiveColumnToBlockIndex.get(column.getBaseHiveColumnIndex());
if (mapping == null) {
// partitions the table by bucket, even if the bucket has the wrong data.
return Optional.empty();
}
bucketColumnIndices[i] = mapping.getIndex();
bucketColumnTypes.add(mapping.getHiveColumnHandle().getHiveType().getTypeInfo());
}
return Optional.of(new BucketValidator(path, bucketColumnIndices, bucketColumnTypes, validation.getBucketingVersion(), validation.getBucketCount(), bucketNumber.orElseThrow()));
});
}
use of io.trino.plugin.hive.HivePageSource.BucketValidator in project trino by trinodb.
the class HivePageSourceProvider method createHivePageSource.
public static Optional<ConnectorPageSource> createHivePageSource(Set<HivePageSourceFactory> pageSourceFactories, Set<HiveRecordCursorProvider> cursorProviders, Configuration configuration, ConnectorSession session, Path path, OptionalInt bucketNumber, long start, long length, long estimatedFileSize, Properties schema, TupleDomain<HiveColumnHandle> effectivePredicate, List<HiveColumnHandle> columns, TypeManager typeManager, Optional<BucketConversion> bucketConversion, Optional<BucketValidation> bucketValidation, boolean s3SelectPushdownEnabled, Optional<AcidInfo> acidInfo, boolean originalFile, AcidTransaction transaction, List<ColumnMapping> columnMappings) {
if (effectivePredicate.isNone()) {
return Optional.of(new EmptyPageSource());
}
List<ColumnMapping> regularAndInterimColumnMappings = ColumnMapping.extractRegularAndInterimColumnMappings(columnMappings);
Optional<BucketAdaptation> bucketAdaptation = createBucketAdaptation(bucketConversion, bucketNumber, regularAndInterimColumnMappings);
Optional<BucketValidator> bucketValidator = createBucketValidator(path, bucketValidation, bucketNumber, regularAndInterimColumnMappings);
for (HivePageSourceFactory pageSourceFactory : pageSourceFactories) {
List<HiveColumnHandle> desiredColumns = toColumnHandles(regularAndInterimColumnMappings, true, typeManager);
Optional<ReaderPageSource> readerWithProjections = pageSourceFactory.createPageSource(configuration, session, path, start, length, estimatedFileSize, schema, desiredColumns, effectivePredicate, acidInfo, bucketNumber, originalFile, transaction);
if (readerWithProjections.isPresent()) {
ConnectorPageSource pageSource = readerWithProjections.get().get();
Optional<ReaderColumns> readerProjections = readerWithProjections.get().getReaderColumns();
Optional<ReaderProjectionsAdapter> adapter = Optional.empty();
if (readerProjections.isPresent()) {
adapter = Optional.of(hiveProjectionsAdapter(desiredColumns, readerProjections.get()));
}
return Optional.of(new HivePageSource(columnMappings, bucketAdaptation, bucketValidator, adapter, typeManager, pageSource));
}
}
for (HiveRecordCursorProvider provider : cursorProviders) {
// GenericHiveRecordCursor will automatically do the coercion without HiveCoercionRecordCursor
boolean doCoercion = !(provider instanceof GenericHiveRecordCursorProvider);
List<HiveColumnHandle> desiredColumns = toColumnHandles(regularAndInterimColumnMappings, doCoercion, typeManager);
Optional<ReaderRecordCursorWithProjections> readerWithProjections = provider.createRecordCursor(configuration, session, path, start, length, estimatedFileSize, schema, desiredColumns, effectivePredicate, typeManager, s3SelectPushdownEnabled);
if (readerWithProjections.isPresent()) {
RecordCursor delegate = readerWithProjections.get().getRecordCursor();
Optional<ReaderColumns> projections = readerWithProjections.get().getProjectedReaderColumns();
if (projections.isPresent()) {
ReaderProjectionsAdapter projectionsAdapter = hiveProjectionsAdapter(desiredColumns, projections.get());
delegate = new HiveReaderProjectionsAdaptingRecordCursor(delegate, projectionsAdapter);
}
checkArgument(acidInfo.isEmpty(), "Acid is not supported");
if (bucketAdaptation.isPresent()) {
delegate = new HiveBucketAdapterRecordCursor(bucketAdaptation.get().getBucketColumnIndices(), bucketAdaptation.get().getBucketColumnHiveTypes(), bucketAdaptation.get().getBucketingVersion(), bucketAdaptation.get().getTableBucketCount(), bucketAdaptation.get().getPartitionBucketCount(), bucketAdaptation.get().getBucketToKeep(), typeManager, delegate);
}
// Need to wrap RcText and RcBinary into a wrapper, which will do the coercion for mismatch columns
if (doCoercion) {
delegate = new HiveCoercionRecordCursor(regularAndInterimColumnMappings, typeManager, delegate);
}
// bucket adaptation already validates that data is in the right bucket
if (bucketAdaptation.isEmpty() && bucketValidator.isPresent()) {
delegate = bucketValidator.get().wrapRecordCursor(delegate, typeManager);
}
HiveRecordCursor hiveRecordCursor = new HiveRecordCursor(columnMappings, delegate);
List<Type> columnTypes = columns.stream().map(HiveColumnHandle::getType).collect(toList());
return Optional.of(new RecordPageSource(columnTypes, hiveRecordCursor));
}
}
return Optional.empty();
}
Aggregations