use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.
the class DeltaLakeMetadata method applyFilter.
@Override
public Optional<ConstraintApplicationResult<ConnectorTableHandle>> applyFilter(ConnectorSession session, ConnectorTableHandle handle, Constraint constraint) {
DeltaLakeTableHandle tableHandle = (DeltaLakeTableHandle) handle;
SchemaTableName tableName = tableHandle.getSchemaTableName();
Set<DeltaLakeColumnHandle> partitionColumns = ImmutableSet.copyOf(extractPartitionColumns(tableHandle.getMetadataEntry(), typeManager));
verify(!constraint.getSummary().isNone(), "applyFilter constraint has summary NONE");
Map<ColumnHandle, Domain> constraintDomains = constraint.getSummary().getDomains().orElseThrow();
ImmutableMap.Builder<DeltaLakeColumnHandle, Domain> enforceableDomains = ImmutableMap.builder();
ImmutableMap.Builder<DeltaLakeColumnHandle, Domain> unenforceableDomains = ImmutableMap.builder();
for (Map.Entry<ColumnHandle, Domain> domainEntry : constraintDomains.entrySet()) {
DeltaLakeColumnHandle column = (DeltaLakeColumnHandle) domainEntry.getKey();
if (!partitionColumns.contains(column)) {
unenforceableDomains.put(column, domainEntry.getValue());
} else {
enforceableDomains.put(column, domainEntry.getValue());
}
}
TupleDomain<DeltaLakeColumnHandle> newEnforcedConstraint = TupleDomain.withColumnDomains(enforceableDomains.buildOrThrow());
TupleDomain<DeltaLakeColumnHandle> newUnenforcedConstraint = TupleDomain.withColumnDomains(unenforceableDomains.buildOrThrow());
DeltaLakeTableHandle newHandle = new DeltaLakeTableHandle(tableName.getSchemaName(), tableName.getTableName(), tableHandle.getLocation(), Optional.of(tableHandle.getMetadataEntry()), // The unenforced constraint will still be checked by the engine.
tableHandle.getEnforcedPartitionConstraint().intersect(newEnforcedConstraint), tableHandle.getNonPartitionConstraint().intersect(newUnenforcedConstraint).simplify(domainCompactionThreshold), tableHandle.getWriteType(), tableHandle.getProjectedColumns(), tableHandle.getUpdatedColumns(), tableHandle.getUpdateRowIdColumns(), Optional.empty(), tableHandle.getReadVersion());
if (tableHandle.getEnforcedPartitionConstraint().equals(newHandle.getEnforcedPartitionConstraint()) && tableHandle.getNonPartitionConstraint().equals(newHandle.getNonPartitionConstraint())) {
return Optional.empty();
}
return Optional.of(new ConstraintApplicationResult<>(newHandle, newUnenforcedConstraint.transformKeys(ColumnHandle.class::cast), false));
}
use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.
the class DeltaLakePageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit connectorSplit, ConnectorTableHandle connectorTable, List<ColumnHandle> columns, DynamicFilter dynamicFilter) {
DeltaLakeSplit split = (DeltaLakeSplit) connectorSplit;
DeltaLakeTableHandle table = (DeltaLakeTableHandle) connectorTable;
// We reach here when we could not prune the split using file level stats, table predicate
// and the dynamic filter in the coordinator during split generation. The file level stats
// in DeltaLakeSplit#filePredicate could help to prune this split when a more selective dynamic filter
// is available now, without having to access parquet file footer for row-group stats.
// We avoid sending DeltaLakeSplit#splitPredicate to workers by using table.getPredicate() here.
TupleDomain<DeltaLakeColumnHandle> filteredSplitPredicate = TupleDomain.intersect(ImmutableList.of(table.getNonPartitionConstraint(), split.getStatisticsPredicate(), dynamicFilter.getCurrentPredicate().transformKeys(DeltaLakeColumnHandle.class::cast)));
if (filteredSplitPredicate.isNone()) {
return new EmptyPageSource();
}
List<DeltaLakeColumnHandle> deltaLakeColumns = columns.stream().map(DeltaLakeColumnHandle.class::cast).collect(toImmutableList());
Map<String, Optional<String>> partitionKeys = split.getPartitionKeys();
List<DeltaLakeColumnHandle> regularColumns = deltaLakeColumns.stream().filter(column -> column.getColumnType() == REGULAR).collect(toImmutableList());
List<HiveColumnHandle> hiveColumnHandles = regularColumns.stream().map(DeltaLakeColumnHandle::toHiveColumnHandle).collect(toImmutableList());
Path path = new Path(split.getPath());
HdfsContext hdfsContext = new HdfsContext(session);
TupleDomain<HiveColumnHandle> parquetPredicate = getParquetTupleDomain(filteredSplitPredicate.simplify(domainCompactionThreshold));
if (table.getWriteType().isPresent()) {
return new DeltaLakeUpdatablePageSource(table, deltaLakeColumns, partitionKeys, split.getPath(), split.getFileSize(), split.getFileModifiedTime(), session, executorService, hdfsEnvironment, hdfsContext, parquetDateTimeZone, parquetReaderOptions, parquetPredicate, typeManager, updateResultJsonCodec);
}
ReaderPageSource pageSource = ParquetPageSourceFactory.createPageSource(path, split.getStart(), split.getLength(), split.getFileSize(), hiveColumnHandles, parquetPredicate, true, hdfsEnvironment, hdfsEnvironment.getConfiguration(hdfsContext, path), session.getIdentity(), parquetDateTimeZone, fileFormatDataSourceStats, parquetReaderOptions.withMaxReadBlockSize(getParquetMaxReadBlockSize(session)).withUseColumnIndex(isParquetUseColumnIndex(session)));
verify(pageSource.getReaderColumns().isEmpty(), "All columns expected to be base columns");
return new DeltaLakePageSource(deltaLakeColumns, partitionKeys, pageSource.get(), split.getPath(), split.getFileSize(), split.getFileModifiedTime());
}
use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.
the class DefaultQueryBuilder method toConjuncts.
protected List<String> toConjuncts(JdbcClient client, ConnectorSession session, Connection connection, TupleDomain<ColumnHandle> tupleDomain, Consumer<QueryParameter> accumulator) {
if (tupleDomain.isNone()) {
return ImmutableList.of(ALWAYS_FALSE);
}
ImmutableList.Builder<String> builder = ImmutableList.builder();
for (Map.Entry<ColumnHandle, Domain> entry : tupleDomain.getDomains().get().entrySet()) {
JdbcColumnHandle column = ((JdbcColumnHandle) entry.getKey());
Domain domain = pushDownDomain(client, session, connection, column, entry.getValue());
builder.add(toPredicate(client, session, connection, column, domain, accumulator));
}
return builder.build();
}
use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.
the class ElasticsearchMetadata method applyFilter.
@Override
public Optional<ConstraintApplicationResult<ConnectorTableHandle>> applyFilter(ConnectorSession session, ConnectorTableHandle table, Constraint constraint) {
ElasticsearchTableHandle handle = (ElasticsearchTableHandle) table;
if (isPassthroughQuery(handle)) {
// filter pushdown currently not supported for passthrough query
return Optional.empty();
}
Map<ColumnHandle, Domain> supported = new HashMap<>();
Map<ColumnHandle, Domain> unsupported = new HashMap<>();
if (constraint.getSummary().getDomains().isPresent()) {
for (Map.Entry<ColumnHandle, Domain> entry : constraint.getSummary().getDomains().get().entrySet()) {
ElasticsearchColumnHandle column = (ElasticsearchColumnHandle) entry.getKey();
if (column.isSupportsPredicates()) {
supported.put(column, entry.getValue());
} else {
unsupported.put(column, entry.getValue());
}
}
}
TupleDomain<ColumnHandle> oldDomain = handle.getConstraint();
TupleDomain<ColumnHandle> newDomain = oldDomain.intersect(TupleDomain.withColumnDomains(supported));
ConnectorExpression oldExpression = constraint.getExpression();
Map<String, String> newRegexes = new HashMap<>(handle.getRegexes());
List<ConnectorExpression> expressions = ConnectorExpressions.extractConjuncts(constraint.getExpression());
List<ConnectorExpression> notHandledExpressions = new ArrayList<>();
for (ConnectorExpression expression : expressions) {
if (expression instanceof Call) {
Call call = (Call) expression;
if (isSupportedLikeCall(call)) {
List<ConnectorExpression> arguments = call.getArguments();
String variableName = ((Variable) arguments.get(0)).getName();
ElasticsearchColumnHandle column = (ElasticsearchColumnHandle) constraint.getAssignments().get(variableName);
verifyNotNull(column, "No assignment for %s", variableName);
String columnName = column.getName();
Object pattern = ((Constant) arguments.get(1)).getValue();
Optional<Slice> escape = Optional.empty();
if (arguments.size() == 3) {
escape = Optional.of((Slice) (((Constant) arguments.get(2)).getValue()));
}
if (!newRegexes.containsKey(columnName) && pattern instanceof Slice) {
IndexMetadata metadata = client.getIndexMetadata(handle.getIndex());
if (metadata.getSchema().getFields().stream().anyMatch(field -> columnName.equals(field.getName()) && field.getType() instanceof PrimitiveType && "keyword".equals(((PrimitiveType) field.getType()).getName()))) {
newRegexes.put(columnName, likeToRegexp(((Slice) pattern), escape));
continue;
}
}
}
}
notHandledExpressions.add(expression);
}
ConnectorExpression newExpression = ConnectorExpressions.and(notHandledExpressions);
if (oldDomain.equals(newDomain) && oldExpression.equals(newExpression)) {
return Optional.empty();
}
handle = new ElasticsearchTableHandle(handle.getType(), handle.getSchema(), handle.getIndex(), newDomain, newRegexes, handle.getQuery(), handle.getLimit());
return Optional.of(new ConstraintApplicationResult<>(handle, TupleDomain.withColumnDomains(unsupported), newExpression, false));
}
use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.
the class IcebergSplitSource method getNextBatch.
@Override
public CompletableFuture<ConnectorSplitBatch> getNextBatch(ConnectorPartitionHandle partitionHandle, int maxSize) {
long timeLeft = dynamicFilteringWaitTimeoutMillis - dynamicFilterWaitStopwatch.elapsed(MILLISECONDS);
if (dynamicFilter.isAwaitable() && timeLeft > 0) {
return dynamicFilter.isBlocked().thenApply(ignored -> EMPTY_BATCH).completeOnTimeout(EMPTY_BATCH, timeLeft, MILLISECONDS);
}
if (combinedScanIterable == null) {
// Used to avoid duplicating work if the Dynamic Filter was already pushed down to the Iceberg API
this.pushedDownDynamicFilterPredicate = dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast);
TupleDomain<IcebergColumnHandle> fullPredicate = tableHandle.getUnenforcedPredicate().intersect(pushedDownDynamicFilterPredicate);
// TODO: (https://github.com/trinodb/trino/issues/9743): Consider removing TupleDomain#simplify
TupleDomain<IcebergColumnHandle> simplifiedPredicate = fullPredicate.simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD);
if (!simplifiedPredicate.equals(fullPredicate)) {
// Pushed down predicate was simplified, always evaluate it against individual splits
this.pushedDownDynamicFilterPredicate = TupleDomain.all();
}
TupleDomain<IcebergColumnHandle> effectivePredicate = tableHandle.getEnforcedPredicate().intersect(simplifiedPredicate);
if (effectivePredicate.isNone()) {
finish();
return completedFuture(NO_MORE_SPLITS_BATCH);
}
Expression filterExpression = toIcebergExpression(effectivePredicate);
this.combinedScanIterable = tableScan.filter(filterExpression).includeColumnStats().planTasks();
this.fileScanIterator = Streams.stream(combinedScanIterable).map(CombinedScanTask::files).flatMap(Collection::stream).iterator();
}
TupleDomain<IcebergColumnHandle> dynamicFilterPredicate = dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast);
if (dynamicFilterPredicate.isNone()) {
finish();
return completedFuture(NO_MORE_SPLITS_BATCH);
}
Iterator<FileScanTask> fileScanTasks = Iterators.limit(fileScanIterator, maxSize);
ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
while (fileScanTasks.hasNext()) {
FileScanTask scanTask = fileScanTasks.next();
if (!scanTask.deletes().isEmpty()) {
throw new TrinoException(NOT_SUPPORTED, "Iceberg tables with delete files are not supported: " + tableHandle.getSchemaTableName());
}
if (maxScannedFileSizeInBytes.isPresent() && scanTask.file().fileSizeInBytes() > maxScannedFileSizeInBytes.get()) {
continue;
}
IcebergSplit icebergSplit = toIcebergSplit(scanTask);
Schema fileSchema = scanTask.spec().schema();
Set<IcebergColumnHandle> identityPartitionColumns = icebergSplit.getPartitionKeys().keySet().stream().map(fieldId -> getColumnHandle(fileSchema.findField(fieldId), typeManager)).collect(toImmutableSet());
Supplier<Map<ColumnHandle, NullableValue>> partitionValues = memoize(() -> {
Map<ColumnHandle, NullableValue> bindings = new HashMap<>();
for (IcebergColumnHandle partitionColumn : identityPartitionColumns) {
Object partitionValue = deserializePartitionValue(partitionColumn.getType(), icebergSplit.getPartitionKeys().get(partitionColumn.getId()).orElse(null), partitionColumn.getName());
NullableValue bindingValue = new NullableValue(partitionColumn.getType(), partitionValue);
bindings.put(partitionColumn, bindingValue);
}
return bindings;
});
if (!dynamicFilterPredicate.isAll() && !dynamicFilterPredicate.equals(pushedDownDynamicFilterPredicate)) {
if (!partitionMatchesPredicate(identityPartitionColumns, partitionValues, dynamicFilterPredicate)) {
continue;
}
if (!fileMatchesPredicate(fieldIdToType, dynamicFilterPredicate, scanTask.file().lowerBounds(), scanTask.file().upperBounds(), scanTask.file().nullValueCounts())) {
continue;
}
}
if (!partitionMatchesConstraint(identityPartitionColumns, partitionValues, constraint)) {
continue;
}
if (recordScannedFiles) {
scannedFiles.add(scanTask.file());
}
splits.add(icebergSplit);
}
return completedFuture(new ConnectorSplitBatch(splits.build(), isFinished()));
}
Aggregations