use of io.trino.spi.connector.EmptyPageSource in project trino by trinodb.
the class TableScanOperator method addSplit.
@Override
public Supplier<Optional<UpdatablePageSource>> addSplit(Split split) {
requireNonNull(split, "split is null");
checkState(this.split == null, "Table scan split already set");
if (finished) {
return Optional::empty;
}
this.split = split;
Object splitInfo = split.getInfo();
if (splitInfo != null) {
operatorContext.setInfoSupplier(Suppliers.ofInstance(new SplitOperatorInfo(split.getCatalogName(), splitInfo)));
}
blocked.set(null);
if (split.getConnectorSplit() instanceof EmptySplit) {
source = new EmptyPageSource();
}
return () -> {
if (source instanceof UpdatablePageSource) {
return Optional.of((UpdatablePageSource) source);
}
return Optional.empty();
};
}
use of io.trino.spi.connector.EmptyPageSource in project trino by trinodb.
the class DeltaLakePageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit connectorSplit, ConnectorTableHandle connectorTable, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
DeltaLakeSplit split = (DeltaLakeSplit) connectorSplit;
DeltaLakeTableHandle table = (DeltaLakeTableHandle) connectorTable;
// We reach here when we could not prune the split using file level stats, table predicate
// and the dynamic filter in the coordinator during split generation. The file level stats
// in DeltaLakeSplit#filePredicate could help to prune this split when a more selective dynamic filter
// is available now, without having to access parquet file footer for row-group stats.
// We avoid sending DeltaLakeSplit#splitPredicate to workers by using table.getPredicate() here.
TupleDomain<DeltaLakeColumnHandle> filteredSplitPredicate = TupleDomain.intersect(ImmutableList.of(table.getNonPartitionConstraint(), split.getStatisticsPredicate(), dynamicFilter.getCurrentPredicate().transformKeys(DeltaLakeColumnHandle.class::cast)));
if (filteredSplitPredicate.isNone()) {
return new EmptyPageSource();
}
List<DeltaLakeColumnHandle> deltaLakeColumns = columns.stream().map(DeltaLakeColumnHandle.class::cast).collect(toImmutableList());
Map<String, Optional<String>> partitionKeys = split.getPartitionKeys();
List<DeltaLakeColumnHandle> regularColumns = deltaLakeColumns.stream().filter(column -> column.getColumnType() == REGULAR).collect(toImmutableList());
List<HiveColumnHandle> hiveColumnHandles = regularColumns.stream().map(DeltaLakeColumnHandle::toHiveColumnHandle).collect(toImmutableList());
Path path = new Path(split.getPath());
HdfsContext hdfsContext = new HdfsContext(session);
TupleDomain<HiveColumnHandle> parquetPredicate = getParquetTupleDomain(filteredSplitPredicate.simplify(domainCompactionThreshold));
if (table.getWriteType().isPresent()) {
return new DeltaLakeUpdatablePageSource(table, deltaLakeColumns, partitionKeys, split.getPath(), split.getFileSize(), split.getFileModifiedTime(), session, executorService, hdfsEnvironment, hdfsContext, parquetDateTimeZone, parquetReaderOptions, parquetPredicate, typeManager, updateResultJsonCodec);
}
ReaderPageSource pageSource = ParquetPageSourceFactory.createPageSource(path, split.getStart(), split.getLength(), split.getFileSize(), hiveColumnHandles, parquetPredicate, true, hdfsEnvironment, hdfsEnvironment.getConfiguration(hdfsContext, path), session.getIdentity(), parquetDateTimeZone, fileFormatDataSourceStats, parquetReaderOptions.withMaxReadBlockSize(getParquetMaxReadBlockSize(session)).withUseColumnIndex(isParquetUseColumnIndex(session)));
verify(pageSource.getReaderColumns().isEmpty(), "All columns expected to be base columns");
return new DeltaLakePageSource(deltaLakeColumns, partitionKeys, pageSource.get(), split.getPath(), split.getFileSize(), split.getFileModifiedTime());
}
use of io.trino.spi.connector.EmptyPageSource in project trino by trinodb.
the class HivePageSourceProvider method createHivePageSource.
public static Optional<ConnectorPageSource> createHivePageSource(Set<HivePageSourceFactory> pageSourceFactories, Set<HiveRecordCursorProvider> cursorProviders, Configuration configuration, ConnectorSession session, Path path, OptionalInt bucketNumber, long start, long length, long estimatedFileSize, Properties schema, TupleDomain<HiveColumnHandle> effectivePredicate, List<HiveColumnHandle> columns, TypeManager typeManager, Optional<BucketConversion> bucketConversion, Optional<BucketValidation> bucketValidation, boolean s3SelectPushdownEnabled, Optional<AcidInfo> acidInfo, boolean originalFile, AcidTransaction transaction, List<ColumnMapping> columnMappings) {
if (effectivePredicate.isNone()) {
return Optional.of(new EmptyPageSource());
}
List<ColumnMapping> regularAndInterimColumnMappings = ColumnMapping.extractRegularAndInterimColumnMappings(columnMappings);
Optional<BucketAdaptation> bucketAdaptation = createBucketAdaptation(bucketConversion, bucketNumber, regularAndInterimColumnMappings);
Optional<BucketValidator> bucketValidator = createBucketValidator(path, bucketValidation, bucketNumber, regularAndInterimColumnMappings);
for (HivePageSourceFactory pageSourceFactory : pageSourceFactories) {
List<HiveColumnHandle> desiredColumns = toColumnHandles(regularAndInterimColumnMappings, true, typeManager);
Optional<ReaderPageSource> readerWithProjections = pageSourceFactory.createPageSource(configuration, session, path, start, length, estimatedFileSize, schema, desiredColumns, effectivePredicate, acidInfo, bucketNumber, originalFile, transaction);
if (readerWithProjections.isPresent()) {
ConnectorPageSource pageSource = readerWithProjections.get().get();
Optional<ReaderColumns> readerProjections = readerWithProjections.get().getReaderColumns();
Optional<ReaderProjectionsAdapter> adapter = Optional.empty();
if (readerProjections.isPresent()) {
adapter = Optional.of(hiveProjectionsAdapter(desiredColumns, readerProjections.get()));
}
return Optional.of(new HivePageSource(columnMappings, bucketAdaptation, bucketValidator, adapter, typeManager, pageSource));
}
}
for (HiveRecordCursorProvider provider : cursorProviders) {
// GenericHiveRecordCursor will automatically do the coercion without HiveCoercionRecordCursor
boolean doCoercion = !(provider instanceof GenericHiveRecordCursorProvider);
List<HiveColumnHandle> desiredColumns = toColumnHandles(regularAndInterimColumnMappings, doCoercion, typeManager);
Optional<ReaderRecordCursorWithProjections> readerWithProjections = provider.createRecordCursor(configuration, session, path, start, length, estimatedFileSize, schema, desiredColumns, effectivePredicate, typeManager, s3SelectPushdownEnabled);
if (readerWithProjections.isPresent()) {
RecordCursor delegate = readerWithProjections.get().getRecordCursor();
Optional<ReaderColumns> projections = readerWithProjections.get().getProjectedReaderColumns();
if (projections.isPresent()) {
ReaderProjectionsAdapter projectionsAdapter = hiveProjectionsAdapter(desiredColumns, projections.get());
delegate = new HiveReaderProjectionsAdaptingRecordCursor(delegate, projectionsAdapter);
}
checkArgument(acidInfo.isEmpty(), "Acid is not supported");
if (bucketAdaptation.isPresent()) {
delegate = new HiveBucketAdapterRecordCursor(bucketAdaptation.get().getBucketColumnIndices(), bucketAdaptation.get().getBucketColumnHiveTypes(), bucketAdaptation.get().getBucketingVersion(), bucketAdaptation.get().getTableBucketCount(), bucketAdaptation.get().getPartitionBucketCount(), bucketAdaptation.get().getBucketToKeep(), typeManager, delegate);
}
// Need to wrap RcText and RcBinary into a wrapper, which will do the coercion for mismatch columns
if (doCoercion) {
delegate = new HiveCoercionRecordCursor(regularAndInterimColumnMappings, typeManager, delegate);
}
// bucket adaptation already validates that data is in the right bucket
if (bucketAdaptation.isEmpty() && bucketValidator.isPresent()) {
delegate = bucketValidator.get().wrapRecordCursor(delegate, typeManager);
}
HiveRecordCursor hiveRecordCursor = new HiveRecordCursor(columnMappings, delegate);
List<Type> columnTypes = columns.stream().map(HiveColumnHandle::getType).collect(toList());
return Optional.of(new RecordPageSource(columnTypes, hiveRecordCursor));
}
}
return Optional.empty();
}
Aggregations