Search in sources :

Example 1 with ConnectorPartitionHandle

use of io.trino.spi.connector.ConnectorPartitionHandle in project trino by trinodb.

the class IcebergSplitSource method getNextBatch.

@Override
public CompletableFuture<ConnectorSplitBatch> getNextBatch(ConnectorPartitionHandle partitionHandle, int maxSize) {
    long timeLeft = dynamicFilteringWaitTimeoutMillis - dynamicFilterWaitStopwatch.elapsed(MILLISECONDS);
    if (dynamicFilter.isAwaitable() && timeLeft > 0) {
        return dynamicFilter.isBlocked().thenApply(ignored -> EMPTY_BATCH).completeOnTimeout(EMPTY_BATCH, timeLeft, MILLISECONDS);
    }
    if (combinedScanIterable == null) {
        // Used to avoid duplicating work if the Dynamic Filter was already pushed down to the Iceberg API
        this.pushedDownDynamicFilterPredicate = dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast);
        TupleDomain<IcebergColumnHandle> fullPredicate = tableHandle.getUnenforcedPredicate().intersect(pushedDownDynamicFilterPredicate);
        // TODO: (https://github.com/trinodb/trino/issues/9743): Consider removing TupleDomain#simplify
        TupleDomain<IcebergColumnHandle> simplifiedPredicate = fullPredicate.simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD);
        if (!simplifiedPredicate.equals(fullPredicate)) {
            // Pushed down predicate was simplified, always evaluate it against individual splits
            this.pushedDownDynamicFilterPredicate = TupleDomain.all();
        }
        TupleDomain<IcebergColumnHandle> effectivePredicate = tableHandle.getEnforcedPredicate().intersect(simplifiedPredicate);
        if (effectivePredicate.isNone()) {
            finish();
            return completedFuture(NO_MORE_SPLITS_BATCH);
        }
        Expression filterExpression = toIcebergExpression(effectivePredicate);
        this.combinedScanIterable = tableScan.filter(filterExpression).includeColumnStats().planTasks();
        this.fileScanIterator = Streams.stream(combinedScanIterable).map(CombinedScanTask::files).flatMap(Collection::stream).iterator();
    }
    TupleDomain<IcebergColumnHandle> dynamicFilterPredicate = dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast);
    if (dynamicFilterPredicate.isNone()) {
        finish();
        return completedFuture(NO_MORE_SPLITS_BATCH);
    }
    Iterator<FileScanTask> fileScanTasks = Iterators.limit(fileScanIterator, maxSize);
    ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
    while (fileScanTasks.hasNext()) {
        FileScanTask scanTask = fileScanTasks.next();
        if (!scanTask.deletes().isEmpty()) {
            throw new TrinoException(NOT_SUPPORTED, "Iceberg tables with delete files are not supported: " + tableHandle.getSchemaTableName());
        }
        if (maxScannedFileSizeInBytes.isPresent() && scanTask.file().fileSizeInBytes() > maxScannedFileSizeInBytes.get()) {
            continue;
        }
        IcebergSplit icebergSplit = toIcebergSplit(scanTask);
        Schema fileSchema = scanTask.spec().schema();
        Set<IcebergColumnHandle> identityPartitionColumns = icebergSplit.getPartitionKeys().keySet().stream().map(fieldId -> getColumnHandle(fileSchema.findField(fieldId), typeManager)).collect(toImmutableSet());
        Supplier<Map<ColumnHandle, NullableValue>> partitionValues = memoize(() -> {
            Map<ColumnHandle, NullableValue> bindings = new HashMap<>();
            for (IcebergColumnHandle partitionColumn : identityPartitionColumns) {
                Object partitionValue = deserializePartitionValue(partitionColumn.getType(), icebergSplit.getPartitionKeys().get(partitionColumn.getId()).orElse(null), partitionColumn.getName());
                NullableValue bindingValue = new NullableValue(partitionColumn.getType(), partitionValue);
                bindings.put(partitionColumn, bindingValue);
            }
            return bindings;
        });
        if (!dynamicFilterPredicate.isAll() && !dynamicFilterPredicate.equals(pushedDownDynamicFilterPredicate)) {
            if (!partitionMatchesPredicate(identityPartitionColumns, partitionValues, dynamicFilterPredicate)) {
                continue;
            }
            if (!fileMatchesPredicate(fieldIdToType, dynamicFilterPredicate, scanTask.file().lowerBounds(), scanTask.file().upperBounds(), scanTask.file().nullValueCounts())) {
                continue;
            }
        }
        if (!partitionMatchesConstraint(identityPartitionColumns, partitionValues, constraint)) {
            continue;
        }
        if (recordScannedFiles) {
            scannedFiles.add(scanTask.file());
        }
        splits.add(icebergSplit);
    }
    return completedFuture(new ConnectorSplitBatch(splits.build(), isFinished()));
}
Also used : IcebergUtil.getPartitionKeys(io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys) CompletableFuture.completedFuture(java.util.concurrent.CompletableFuture.completedFuture) ByteBuffer(java.nio.ByteBuffer) TypeConverter.toIcebergType(io.trino.plugin.iceberg.TypeConverter.toIcebergType) Duration(io.airlift.units.Duration) ICEBERG_DOMAIN_COMPACTION_THRESHOLD(io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) Expression(org.apache.iceberg.expressions.Expression) ConnectorPartitionHandle(io.trino.spi.connector.ConnectorPartitionHandle) Map(java.util.Map) FileScanTask(org.apache.iceberg.FileScanTask) DataFile(org.apache.iceberg.DataFile) IcebergUtil.getColumnHandle(io.trino.plugin.iceberg.IcebergUtil.getColumnHandle) ImmutableSet(com.google.common.collect.ImmutableSet) CloseableIterable(org.apache.iceberg.io.CloseableIterable) Range(io.trino.spi.predicate.Range) Domain(io.trino.spi.predicate.Domain) Collection(java.util.Collection) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) TableScan(org.apache.iceberg.TableScan) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Streams(com.google.common.collect.Streams) Schema(org.apache.iceberg.Schema) CombinedScanTask(org.apache.iceberg.CombinedScanTask) ValueSet(io.trino.spi.predicate.ValueSet) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Type(org.apache.iceberg.types.Type) UncheckedIOException(java.io.UncheckedIOException) DataSize(io.airlift.units.DataSize) List(java.util.List) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) Constraint(io.trino.spi.connector.Constraint) IcebergUtil.deserializePartitionValue(io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue) NullableValue(io.trino.spi.predicate.NullableValue) Stopwatch(com.google.common.base.Stopwatch) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Supplier(java.util.function.Supplier) ExpressionConverter.toIcebergExpression(io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression) Iterators(com.google.common.collect.Iterators) IcebergTypes.convertIcebergValueToTrino(io.trino.plugin.iceberg.IcebergTypes.convertIcebergValueToTrino) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Suppliers.memoize(com.google.common.base.Suppliers.memoize) Nullable(javax.annotation.Nullable) Iterator(java.util.Iterator) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) IOException(java.io.IOException) TupleDomain(io.trino.spi.predicate.TupleDomain) Conversions.fromByteBuffer(org.apache.iceberg.types.Conversions.fromByteBuffer) Sets.intersection(com.google.common.collect.Sets.intersection) IcebergUtil.primitiveFieldTypes(io.trino.plugin.iceberg.IcebergUtil.primitiveFieldTypes) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TypeManager(io.trino.spi.type.TypeManager) Collections(java.util.Collections) IcebergUtil.getColumnHandle(io.trino.plugin.iceberg.IcebergUtil.getColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) HashMap(java.util.HashMap) ImmutableList(com.google.common.collect.ImmutableList) Schema(org.apache.iceberg.Schema) NullableValue(io.trino.spi.predicate.NullableValue) Expression(org.apache.iceberg.expressions.Expression) ExpressionConverter.toIcebergExpression(io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression) Collection(java.util.Collection) TrinoException(io.trino.spi.TrinoException) FileScanTask(org.apache.iceberg.FileScanTask) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Preconditions.checkState (com.google.common.base.Preconditions.checkState)1 Stopwatch (com.google.common.base.Stopwatch)1 Suppliers.memoize (com.google.common.base.Suppliers.memoize)1 Verify.verify (com.google.common.base.Verify.verify)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)1 Iterators (com.google.common.collect.Iterators)1 Sets.intersection (com.google.common.collect.Sets.intersection)1 Streams (com.google.common.collect.Streams)1 DataSize (io.airlift.units.DataSize)1 Duration (io.airlift.units.Duration)1 ExpressionConverter.toIcebergExpression (io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression)1 ICEBERG_DOMAIN_COMPACTION_THRESHOLD (io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD)1 IcebergTypes.convertIcebergValueToTrino (io.trino.plugin.iceberg.IcebergTypes.convertIcebergValueToTrino)1 IcebergUtil.deserializePartitionValue (io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue)1 IcebergUtil.getColumnHandle (io.trino.plugin.iceberg.IcebergUtil.getColumnHandle)1 IcebergUtil.getPartitionKeys (io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys)1 IcebergUtil.primitiveFieldTypes (io.trino.plugin.iceberg.IcebergUtil.primitiveFieldTypes)1