use of io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD in project trino by trinodb.
the class BaseIcebergConnectorTest method testPredicatePushdown.
@Test
public void testPredicatePushdown() {
QualifiedObjectName tableName = new QualifiedObjectName("iceberg", "tpch", "test_predicate");
assertUpdate(format("CREATE TABLE %s (col1 BIGINT, col2 BIGINT, col3 BIGINT) WITH (partitioning = ARRAY['col2', 'col3'])", tableName));
assertUpdate(format("INSERT INTO %s VALUES (1, 10, 100)", tableName), 1L);
assertUpdate(format("INSERT INTO %s VALUES (2, 20, 200)", tableName), 1L);
assertQuery(format("SELECT * FROM %s WHERE col1 = 1", tableName), "VALUES (1, 10, 100)");
assertFilterPushdown(tableName, ImmutableMap.of("col1", singleValue(BIGINT, 1L)), ImmutableMap.of(), ImmutableMap.of("col1", singleValue(BIGINT, 1L)));
assertQuery(format("SELECT * FROM %s WHERE col2 = 10", tableName), "VALUES (1, 10, 100)");
assertFilterPushdown(tableName, ImmutableMap.of("col2", singleValue(BIGINT, 10L)), ImmutableMap.of("col2", singleValue(BIGINT, 10L)), ImmutableMap.of());
assertQuery(format("SELECT * FROM %s WHERE col1 = 1 AND col2 = 10", tableName), "VALUES (1, 10, 100)");
assertFilterPushdown(tableName, ImmutableMap.of("col1", singleValue(BIGINT, 1L), "col2", singleValue(BIGINT, 10L)), ImmutableMap.of("col2", singleValue(BIGINT, 10L)), ImmutableMap.of("col1", singleValue(BIGINT, 1L)));
// Assert pushdown for an IN predicate with value count above the default compaction threshold
List<Long> values = LongStream.range(1L, 1010L).boxed().filter(index -> index != 20L).collect(toImmutableList());
assertTrue(values.size() > ICEBERG_DOMAIN_COMPACTION_THRESHOLD);
String valuesString = join(",", values.stream().map(Object::toString).collect(toImmutableList()));
String inPredicate = "%s IN (" + valuesString + ")";
assertQuery(format("SELECT * FROM %s WHERE %s AND %s", tableName, format(inPredicate, "col1"), format(inPredicate, "col2")), "VALUES (1, 10, 100)");
assertFilterPushdown(tableName, ImmutableMap.of("col1", multipleValues(BIGINT, values), "col2", multipleValues(BIGINT, values)), ImmutableMap.of("col2", multipleValues(BIGINT, values)), // Unenforced predicate is simplified during split generation, but not reflected here
ImmutableMap.of("col1", multipleValues(BIGINT, values)));
dropTable(tableName.getObjectName());
}
use of io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD in project trino by trinodb.
the class IcebergPageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit connectorSplit, ConnectorTableHandle connectorTable, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
IcebergSplit split = (IcebergSplit) connectorSplit;
IcebergTableHandle table = (IcebergTableHandle) connectorTable;
List<IcebergColumnHandle> icebergColumns = columns.stream().map(IcebergColumnHandle.class::cast).collect(toImmutableList());
Map<Integer, Optional<String>> partitionKeys = split.getPartitionKeys();
List<IcebergColumnHandle> regularColumns = columns.stream().map(IcebergColumnHandle.class::cast).filter(column -> !partitionKeys.containsKey(column.getId())).collect(toImmutableList());
TupleDomain<IcebergColumnHandle> effectivePredicate = table.getUnenforcedPredicate().intersect(dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast)).simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD);
HdfsContext hdfsContext = new HdfsContext(session);
ReaderPageSource dataPageSource = createDataPageSource(session, hdfsContext, new Path(split.getPath()), split.getStart(), split.getLength(), split.getFileSize(), split.getFileFormat(), regularColumns, effectivePredicate, table.getNameMappingJson().map(NameMappingParser::fromJson));
Optional<ReaderProjectionsAdapter> projectionsAdapter = dataPageSource.getReaderColumns().map(readerColumns -> new ReaderProjectionsAdapter(regularColumns, readerColumns, column -> ((IcebergColumnHandle) column).getType(), IcebergPageSourceProvider::applyProjection));
return new IcebergPageSource(icebergColumns, partitionKeys, dataPageSource.get(), projectionsAdapter);
}
use of io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD in project trino by trinodb.
the class IcebergSplitSource method getNextBatch.
@Override
public CompletableFuture<ConnectorSplitBatch> getNextBatch(ConnectorPartitionHandle partitionHandle, int maxSize) {
long timeLeft = dynamicFilteringWaitTimeoutMillis - dynamicFilterWaitStopwatch.elapsed(MILLISECONDS);
if (dynamicFilter.isAwaitable() && timeLeft > 0) {
return dynamicFilter.isBlocked().thenApply(ignored -> EMPTY_BATCH).completeOnTimeout(EMPTY_BATCH, timeLeft, MILLISECONDS);
}
if (combinedScanIterable == null) {
// Used to avoid duplicating work if the Dynamic Filter was already pushed down to the Iceberg API
this.pushedDownDynamicFilterPredicate = dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast);
TupleDomain<IcebergColumnHandle> fullPredicate = tableHandle.getUnenforcedPredicate().intersect(pushedDownDynamicFilterPredicate);
// TODO: (https://github.com/trinodb/trino/issues/9743): Consider removing TupleDomain#simplify
TupleDomain<IcebergColumnHandle> simplifiedPredicate = fullPredicate.simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD);
if (!simplifiedPredicate.equals(fullPredicate)) {
// Pushed down predicate was simplified, always evaluate it against individual splits
this.pushedDownDynamicFilterPredicate = TupleDomain.all();
}
TupleDomain<IcebergColumnHandle> effectivePredicate = tableHandle.getEnforcedPredicate().intersect(simplifiedPredicate);
if (effectivePredicate.isNone()) {
finish();
return completedFuture(NO_MORE_SPLITS_BATCH);
}
Expression filterExpression = toIcebergExpression(effectivePredicate);
this.combinedScanIterable = tableScan.filter(filterExpression).includeColumnStats().planTasks();
this.fileScanIterator = Streams.stream(combinedScanIterable).map(CombinedScanTask::files).flatMap(Collection::stream).iterator();
}
TupleDomain<IcebergColumnHandle> dynamicFilterPredicate = dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast);
if (dynamicFilterPredicate.isNone()) {
finish();
return completedFuture(NO_MORE_SPLITS_BATCH);
}
Iterator<FileScanTask> fileScanTasks = Iterators.limit(fileScanIterator, maxSize);
ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
while (fileScanTasks.hasNext()) {
FileScanTask scanTask = fileScanTasks.next();
if (!scanTask.deletes().isEmpty()) {
throw new TrinoException(NOT_SUPPORTED, "Iceberg tables with delete files are not supported: " + tableHandle.getSchemaTableName());
}
if (maxScannedFileSizeInBytes.isPresent() && scanTask.file().fileSizeInBytes() > maxScannedFileSizeInBytes.get()) {
continue;
}
IcebergSplit icebergSplit = toIcebergSplit(scanTask);
Schema fileSchema = scanTask.spec().schema();
Set<IcebergColumnHandle> identityPartitionColumns = icebergSplit.getPartitionKeys().keySet().stream().map(fieldId -> getColumnHandle(fileSchema.findField(fieldId), typeManager)).collect(toImmutableSet());
Supplier<Map<ColumnHandle, NullableValue>> partitionValues = memoize(() -> {
Map<ColumnHandle, NullableValue> bindings = new HashMap<>();
for (IcebergColumnHandle partitionColumn : identityPartitionColumns) {
Object partitionValue = deserializePartitionValue(partitionColumn.getType(), icebergSplit.getPartitionKeys().get(partitionColumn.getId()).orElse(null), partitionColumn.getName());
NullableValue bindingValue = new NullableValue(partitionColumn.getType(), partitionValue);
bindings.put(partitionColumn, bindingValue);
}
return bindings;
});
if (!dynamicFilterPredicate.isAll() && !dynamicFilterPredicate.equals(pushedDownDynamicFilterPredicate)) {
if (!partitionMatchesPredicate(identityPartitionColumns, partitionValues, dynamicFilterPredicate)) {
continue;
}
if (!fileMatchesPredicate(fieldIdToType, dynamicFilterPredicate, scanTask.file().lowerBounds(), scanTask.file().upperBounds(), scanTask.file().nullValueCounts())) {
continue;
}
}
if (!partitionMatchesConstraint(identityPartitionColumns, partitionValues, constraint)) {
continue;
}
if (recordScannedFiles) {
scannedFiles.add(scanTask.file());
}
splits.add(icebergSplit);
}
return completedFuture(new ConnectorSplitBatch(splits.build(), isFinished()));
}
Aggregations