use of io.trino.spi.connector.Constraint in project trino by trinodb.
the class TestInformationSchemaMetadata method testInformationSchemaPredicatePushdownForEmptyNames.
@Test
public void testInformationSchemaPredicatePushdownForEmptyNames() {
TransactionId transactionId = transactionManager.beginTransaction(false);
ConnectorSession session = createNewSession(transactionId);
ConnectorMetadata metadata = new InformationSchemaMetadata("test_catalog", this.metadata);
InformationSchemaColumnHandle tableSchemaColumn = new InformationSchemaColumnHandle("table_schema");
InformationSchemaColumnHandle tableNameColumn = new InformationSchemaColumnHandle("table_name");
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, new SchemaTableName("information_schema", "tables"));
// Empty schema name
InformationSchemaTableHandle filtered = metadata.applyFilter(session, tableHandle, new Constraint(TupleDomain.withColumnDomains(ImmutableMap.of(tableSchemaColumn, Domain.singleValue(VARCHAR, Slices.utf8Slice("")))))).map(ConstraintApplicationResult::getHandle).map(InformationSchemaTableHandle.class::cast).orElseThrow(AssertionError::new);
// "" schema name is valid schema name, but is (currently) valid for QualifiedTablePrefix
assertEquals(filtered.getPrefixes(), ImmutableSet.of(new QualifiedTablePrefix("test_catalog", "")));
// Empty table name
filtered = metadata.applyFilter(session, tableHandle, new Constraint(TupleDomain.withColumnDomains(ImmutableMap.of(tableNameColumn, Domain.singleValue(VARCHAR, Slices.utf8Slice("")))))).map(ConstraintApplicationResult::getHandle).map(InformationSchemaTableHandle.class::cast).orElseThrow(AssertionError::new);
// "" table name is valid schema name, but is (currently) valid for QualifiedTablePrefix
// filter blindly applies filter to all visible schemas, so information_schema must be included
assertEquals(filtered.getPrefixes(), ImmutableSet.of(new QualifiedTablePrefix("test_catalog", "test_schema", ""), new QualifiedTablePrefix("test_catalog", "information_schema", "")));
}
use of io.trino.spi.connector.Constraint in project trino by trinodb.
the class PartitionsSystemTableProvider method getSystemTable.
@Override
public Optional<SystemTable> getSystemTable(HiveMetadata metadata, ConnectorSession session, SchemaTableName tableName) {
if (!PARTITIONS.matches(tableName)) {
return Optional.empty();
}
SchemaTableName sourceTableName = PARTITIONS.getSourceTableName(tableName);
Table sourceTable = metadata.getMetastore().getTable(sourceTableName.getSchemaName(), sourceTableName.getTableName()).orElse(null);
if (sourceTable == null || isDeltaLakeTable(sourceTable) || isIcebergTable(sourceTable)) {
return Optional.empty();
}
verifyOnline(sourceTableName, Optional.empty(), getProtectMode(sourceTable), sourceTable.getParameters());
HiveTableHandle sourceTableHandle = new HiveTableHandle(sourceTableName.getSchemaName(), sourceTableName.getTableName(), sourceTable.getParameters(), getPartitionKeyColumnHandles(sourceTable, typeManager), getRegularColumnHandles(sourceTable, typeManager, getTimestampPrecision(session)), getHiveBucketHandle(session, sourceTable, typeManager));
List<HiveColumnHandle> partitionColumns = sourceTableHandle.getPartitionColumns();
if (partitionColumns.isEmpty()) {
return Optional.empty();
}
List<Type> partitionColumnTypes = partitionColumns.stream().map(HiveColumnHandle::getType).collect(toImmutableList());
List<ColumnMetadata> partitionSystemTableColumns = partitionColumns.stream().map(column -> ColumnMetadata.builder().setName(column.getName()).setType(column.getType()).setComment(column.getComment()).setHidden(column.isHidden()).build()).collect(toImmutableList());
Map<Integer, HiveColumnHandle> fieldIdToColumnHandle = IntStream.range(0, partitionColumns.size()).boxed().collect(toImmutableMap(identity(), partitionColumns::get));
return Optional.of(createSystemTable(new ConnectorTableMetadata(tableName, partitionSystemTableColumns), constraint -> {
Constraint targetConstraint = new Constraint(constraint.transformKeys(fieldIdToColumnHandle::get));
Iterable<List<Object>> records = () -> stream(partitionManager.getPartitions(metadata.getMetastore(), sourceTableHandle, targetConstraint).getPartitions()).map(hivePartition -> IntStream.range(0, partitionColumns.size()).mapToObj(fieldIdToColumnHandle::get).map(columnHandle -> hivePartition.getKeys().get(columnHandle).getValue()).collect(// nullable
toList())).iterator();
return new InMemoryRecordSet(partitionColumnTypes, records).cursor();
}));
}
use of io.trino.spi.connector.Constraint in project trino by trinodb.
the class BaseIcebergConnectorTest method testStatisticsConstraints.
@Test
public void testStatisticsConstraints() {
String tableName = "iceberg.tpch.test_simple_partitioned_table_statistics";
assertUpdate("CREATE TABLE iceberg.tpch.test_simple_partitioned_table_statistics (col1 BIGINT, col2 BIGINT) WITH (partitioning = ARRAY['col1'])");
String insertStart = "INSERT INTO iceberg.tpch.test_simple_partitioned_table_statistics";
assertUpdate(insertStart + " VALUES (1, 101), (2, 102), (3, 103), (4, 104)", 4);
TableStatistics tableStatistics = getTableStatistics(tableName, new Constraint(TupleDomain.all()));
IcebergColumnHandle col1Handle = getColumnHandleFromStatistics(tableStatistics, "col1");
IcebergColumnHandle col2Handle = getColumnHandleFromStatistics(tableStatistics, "col2");
// Constraint.predicate is currently not supported, because it's never provided by the engine.
// TODO add (restore) test coverage when this changes.
// predicate on a partition column
assertThatThrownBy(() -> getTableStatistics(tableName, new Constraint(TupleDomain.all(), new TestRelationalNumberPredicate("col1", 3, i1 -> i1 >= 0), Set.of(col1Handle)))).isInstanceOf(VerifyException.class).hasMessage("Unexpected Constraint predicate");
// predicate on a non-partition column
assertThatThrownBy(() -> getTableStatistics(tableName, new Constraint(TupleDomain.all(), new TestRelationalNumberPredicate("col2", 102, i -> i >= 0), Set.of(col2Handle)))).isInstanceOf(VerifyException.class).hasMessage("Unexpected Constraint predicate");
dropTable(tableName);
}
use of io.trino.spi.connector.Constraint in project trino by trinodb.
the class IcebergSplitSource method getNextBatch.
@Override
public CompletableFuture<ConnectorSplitBatch> getNextBatch(ConnectorPartitionHandle partitionHandle, int maxSize) {
long timeLeft = dynamicFilteringWaitTimeoutMillis - dynamicFilterWaitStopwatch.elapsed(MILLISECONDS);
if (dynamicFilter.isAwaitable() && timeLeft > 0) {
return dynamicFilter.isBlocked().thenApply(ignored -> EMPTY_BATCH).completeOnTimeout(EMPTY_BATCH, timeLeft, MILLISECONDS);
}
if (combinedScanIterable == null) {
// Used to avoid duplicating work if the Dynamic Filter was already pushed down to the Iceberg API
this.pushedDownDynamicFilterPredicate = dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast);
TupleDomain<IcebergColumnHandle> fullPredicate = tableHandle.getUnenforcedPredicate().intersect(pushedDownDynamicFilterPredicate);
// TODO: (https://github.com/trinodb/trino/issues/9743): Consider removing TupleDomain#simplify
TupleDomain<IcebergColumnHandle> simplifiedPredicate = fullPredicate.simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD);
if (!simplifiedPredicate.equals(fullPredicate)) {
// Pushed down predicate was simplified, always evaluate it against individual splits
this.pushedDownDynamicFilterPredicate = TupleDomain.all();
}
TupleDomain<IcebergColumnHandle> effectivePredicate = tableHandle.getEnforcedPredicate().intersect(simplifiedPredicate);
if (effectivePredicate.isNone()) {
finish();
return completedFuture(NO_MORE_SPLITS_BATCH);
}
Expression filterExpression = toIcebergExpression(effectivePredicate);
this.combinedScanIterable = tableScan.filter(filterExpression).includeColumnStats().planTasks();
this.fileScanIterator = Streams.stream(combinedScanIterable).map(CombinedScanTask::files).flatMap(Collection::stream).iterator();
}
TupleDomain<IcebergColumnHandle> dynamicFilterPredicate = dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast);
if (dynamicFilterPredicate.isNone()) {
finish();
return completedFuture(NO_MORE_SPLITS_BATCH);
}
Iterator<FileScanTask> fileScanTasks = Iterators.limit(fileScanIterator, maxSize);
ImmutableList.Builder<ConnectorSplit> splits = ImmutableList.builder();
while (fileScanTasks.hasNext()) {
FileScanTask scanTask = fileScanTasks.next();
if (!scanTask.deletes().isEmpty()) {
throw new TrinoException(NOT_SUPPORTED, "Iceberg tables with delete files are not supported: " + tableHandle.getSchemaTableName());
}
if (maxScannedFileSizeInBytes.isPresent() && scanTask.file().fileSizeInBytes() > maxScannedFileSizeInBytes.get()) {
continue;
}
IcebergSplit icebergSplit = toIcebergSplit(scanTask);
Schema fileSchema = scanTask.spec().schema();
Set<IcebergColumnHandle> identityPartitionColumns = icebergSplit.getPartitionKeys().keySet().stream().map(fieldId -> getColumnHandle(fileSchema.findField(fieldId), typeManager)).collect(toImmutableSet());
Supplier<Map<ColumnHandle, NullableValue>> partitionValues = memoize(() -> {
Map<ColumnHandle, NullableValue> bindings = new HashMap<>();
for (IcebergColumnHandle partitionColumn : identityPartitionColumns) {
Object partitionValue = deserializePartitionValue(partitionColumn.getType(), icebergSplit.getPartitionKeys().get(partitionColumn.getId()).orElse(null), partitionColumn.getName());
NullableValue bindingValue = new NullableValue(partitionColumn.getType(), partitionValue);
bindings.put(partitionColumn, bindingValue);
}
return bindings;
});
if (!dynamicFilterPredicate.isAll() && !dynamicFilterPredicate.equals(pushedDownDynamicFilterPredicate)) {
if (!partitionMatchesPredicate(identityPartitionColumns, partitionValues, dynamicFilterPredicate)) {
continue;
}
if (!fileMatchesPredicate(fieldIdToType, dynamicFilterPredicate, scanTask.file().lowerBounds(), scanTask.file().upperBounds(), scanTask.file().nullValueCounts())) {
continue;
}
}
if (!partitionMatchesConstraint(identityPartitionColumns, partitionValues, constraint)) {
continue;
}
if (recordScannedFiles) {
scannedFiles.add(scanTask.file());
}
splits.add(icebergSplit);
}
return completedFuture(new ConnectorSplitBatch(splits.build(), isFinished()));
}
use of io.trino.spi.connector.Constraint in project trino by trinodb.
the class TableStatisticsMaker method dataFileMatches.
private boolean dataFileMatches(DataFile dataFile, Constraint constraint, List<PartitionField> partitionFields, Map<Integer, ColumnFieldDetails> fieldDetails) {
// Currently this method is used only for IcebergMetadata.getTableStatistics and there Constraint never carries a predicate.
// TODO support pruning with constraint when this changes.
verify(constraint.predicate().isEmpty(), "Unexpected Constraint predicate");
TupleDomain<ColumnHandle> constraintSummary = constraint.getSummary();
Map<ColumnHandle, Domain> domains = constraintSummary.getDomains().get();
for (int index = 0; index < partitionFields.size(); index++) {
PartitionField field = partitionFields.get(index);
int fieldId = field.fieldId();
ColumnFieldDetails details = fieldDetails.get(fieldId);
IcebergColumnHandle column = details.getColumnHandle();
Object value = convertIcebergValueToTrino(details.getIcebergType(), dataFile.partition().get(index, details.getJavaClass()));
Domain allowedDomain = domains.get(column);
if (allowedDomain != null && !allowedDomain.includesNullableValue(value)) {
return false;
}
}
return true;
}
Aggregations