use of io.trino.spi.connector.DiscretePredicates in project trino by trinodb.
the class HiveMetadata method getTableProperties.
@Override
public ConnectorTableProperties getTableProperties(ConnectorSession session, ConnectorTableHandle table) {
HiveTableHandle hiveTable = (HiveTableHandle) table;
List<ColumnHandle> partitionColumns = ImmutableList.copyOf(hiveTable.getPartitionColumns());
TupleDomain<ColumnHandle> predicate = TupleDomain.all();
Optional<DiscretePredicates> discretePredicates = Optional.empty();
// So computation of predicate and discretePredicates are not valid.
if (hiveTable.getPartitionNames().isEmpty()) {
Optional<List<HivePartition>> partitions = hiveTable.getPartitions().or(() -> {
// We load the partitions to compute the predicates enforced by the table.
// Note that the computation is not persisted in the table handle, so can be redone many times
// TODO: https://github.com/trinodb/trino/issues/10980.
HivePartitionResult partitionResult = partitionManager.getPartitions(metastore, table, new Constraint(hiveTable.getEnforcedConstraint()));
if (partitionManager.canPartitionsBeLoaded(partitionResult)) {
return Optional.of(partitionManager.getPartitionsAsList(partitionResult));
}
return Optional.empty();
});
if (partitions.isPresent()) {
List<HivePartition> hivePartitions = partitions.orElseThrow();
// Since the partitions are fully loaded now, we need to compute
predicate = createPredicate(partitionColumns, hivePartitions);
// this check allows us to ensure that table is partitioned
if (!partitionColumns.isEmpty()) {
// Do not create tuple domains for every partition at the same time!
// There can be a huge number of partitions so use an iterable so
// all domains do not need to be in memory at the same time.
Iterable<TupleDomain<ColumnHandle>> partitionDomains = Iterables.transform(hivePartitions, hivePartition -> TupleDomain.fromFixedValues(hivePartition.getKeys()));
discretePredicates = Optional.of(new DiscretePredicates(partitionColumns, partitionDomains));
}
}
}
Optional<ConnectorTablePartitioning> tablePartitioning = Optional.empty();
List<LocalProperty<ColumnHandle>> sortingProperties = ImmutableList.of();
if (hiveTable.getBucketHandle().isPresent()) {
if (isPropagateTableScanSortingProperties(session) && !hiveTable.getBucketHandle().get().getSortedBy().isEmpty()) {
// Populating SortingProperty guarantees to the engine that it is reading pre-sorted input.
// We detect compatibility between table and partition level sorted_by properties
// and fail the query if there is a mismatch in HiveSplitManager#getPartitionMetadata.
// This can lead to incorrect results if a sorted_by property is defined over unsorted files.
Map<String, ColumnHandle> columnHandles = getColumnHandles(session, table);
sortingProperties = hiveTable.getBucketHandle().get().getSortedBy().stream().map(sortingColumn -> new SortingProperty<>(columnHandles.get(sortingColumn.getColumnName()), sortingColumn.getOrder().getSortOrder())).collect(toImmutableList());
}
if (isBucketExecutionEnabled(session)) {
tablePartitioning = hiveTable.getBucketHandle().map(bucketing -> new ConnectorTablePartitioning(new HivePartitioningHandle(bucketing.getBucketingVersion(), bucketing.getReadBucketCount(), bucketing.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), OptionalInt.empty(), false), bucketing.getColumns().stream().map(ColumnHandle.class::cast).collect(toImmutableList())));
}
}
return new ConnectorTableProperties(predicate, tablePartitioning, Optional.empty(), discretePredicates, sortingProperties);
}
use of io.trino.spi.connector.DiscretePredicates in project trino by trinodb.
the class AbstractTestHive method assertExpectedTableProperties.
protected void assertExpectedTableProperties(ConnectorTableProperties actualProperties, ConnectorTableProperties expectedProperties) {
assertEquals(actualProperties.getPredicate(), expectedProperties.getPredicate());
assertEquals(actualProperties.getDiscretePredicates().isPresent(), expectedProperties.getDiscretePredicates().isPresent());
actualProperties.getDiscretePredicates().ifPresent(actual -> {
DiscretePredicates expected = expectedProperties.getDiscretePredicates().get();
assertEquals(actual.getColumns(), expected.getColumns());
assertEqualsIgnoreOrder(actual.getPredicates(), expected.getPredicates());
});
assertEquals(actualProperties.getStreamPartitioningColumns(), expectedProperties.getStreamPartitioningColumns());
assertEquals(actualProperties.getLocalProperties(), expectedProperties.getLocalProperties());
}
use of io.trino.spi.connector.DiscretePredicates in project trino by trinodb.
the class AbstractTestHive method setupHive.
protected void setupHive(String databaseName) {
database = databaseName;
tablePartitionFormat = new SchemaTableName(database, "trino_test_partition_format");
tableUnpartitioned = new SchemaTableName(database, "trino_test_unpartitioned");
tableOffline = new SchemaTableName(database, "trino_test_offline");
tableOfflinePartition = new SchemaTableName(database, "trino_test_offline_partition");
tableNotReadable = new SchemaTableName(database, "trino_test_not_readable");
view = new SchemaTableName(database, "trino_test_view");
invalidTable = new SchemaTableName(database, INVALID_TABLE);
tableBucketedStringInt = new SchemaTableName(database, "trino_test_bucketed_by_string_int");
tableBucketedBigintBoolean = new SchemaTableName(database, "trino_test_bucketed_by_bigint_boolean");
tableBucketedDoubleFloat = new SchemaTableName(database, "trino_test_bucketed_by_double_float");
tablePartitionSchemaChange = new SchemaTableName(database, "trino_test_partition_schema_change");
tablePartitionSchemaChangeNonCanonical = new SchemaTableName(database, "trino_test_partition_schema_change_non_canonical");
tableBucketEvolution = new SchemaTableName(database, "trino_test_bucket_evolution");
invalidTableHandle = new HiveTableHandle(database, INVALID_TABLE, ImmutableMap.of(), ImmutableList.of(), ImmutableList.of(), Optional.empty());
dsColumn = createBaseColumn("ds", -1, HIVE_STRING, VARCHAR, PARTITION_KEY, Optional.empty());
fileFormatColumn = createBaseColumn("file_format", -1, HIVE_STRING, VARCHAR, PARTITION_KEY, Optional.empty());
dummyColumn = createBaseColumn("dummy", -1, HIVE_INT, INTEGER, PARTITION_KEY, Optional.empty());
intColumn = createBaseColumn("t_int", -1, HIVE_INT, INTEGER, PARTITION_KEY, Optional.empty());
invalidColumnHandle = createBaseColumn(INVALID_COLUMN, 0, HIVE_STRING, VARCHAR, REGULAR, Optional.empty());
List<ColumnHandle> partitionColumns = ImmutableList.of(dsColumn, fileFormatColumn, dummyColumn);
tablePartitionFormatPartitions = ImmutableList.<HivePartition>builder().add(new HivePartition(tablePartitionFormat, "ds=2012-12-29/file_format=textfile/dummy=1", ImmutableMap.<ColumnHandle, NullableValue>builder().put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))).put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("textfile"))).put(dummyColumn, NullableValue.of(INTEGER, 1L)).buildOrThrow())).add(new HivePartition(tablePartitionFormat, "ds=2012-12-29/file_format=sequencefile/dummy=2", ImmutableMap.<ColumnHandle, NullableValue>builder().put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))).put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("sequencefile"))).put(dummyColumn, NullableValue.of(INTEGER, 2L)).buildOrThrow())).add(new HivePartition(tablePartitionFormat, "ds=2012-12-29/file_format=rctext/dummy=3", ImmutableMap.<ColumnHandle, NullableValue>builder().put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))).put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("rctext"))).put(dummyColumn, NullableValue.of(INTEGER, 3L)).buildOrThrow())).add(new HivePartition(tablePartitionFormat, "ds=2012-12-29/file_format=rcbinary/dummy=4", ImmutableMap.<ColumnHandle, NullableValue>builder().put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))).put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("rcbinary"))).put(dummyColumn, NullableValue.of(INTEGER, 4L)).buildOrThrow())).build();
tableUnpartitionedPartitions = ImmutableList.of(new HivePartition(tableUnpartitioned));
tablePartitionFormatProperties = new ConnectorTableProperties(TupleDomain.withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("textfile")), Range.equal(createUnboundedVarcharType(), utf8Slice("sequencefile")), Range.equal(createUnboundedVarcharType(), utf8Slice("rctext")), Range.equal(createUnboundedVarcharType(), utf8Slice("rcbinary"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 1L), Range.equal(INTEGER, 2L), Range.equal(INTEGER, 3L), Range.equal(INTEGER, 4L)), false))), Optional.empty(), Optional.empty(), Optional.of(new DiscretePredicates(partitionColumns, ImmutableList.of(TupleDomain.withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("textfile"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 1L)), false))), TupleDomain.withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("sequencefile"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 2L)), false))), TupleDomain.withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("rctext"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 3L)), false))), TupleDomain.withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("rcbinary"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 4L)), false)))))), ImmutableList.of());
tableUnpartitionedProperties = new ConnectorTableProperties();
}
use of io.trino.spi.connector.DiscretePredicates in project trino by trinodb.
the class IcebergMetadata method getTableProperties.
@Override
public ConnectorTableProperties getTableProperties(ConnectorSession session, ConnectorTableHandle tableHandle) {
IcebergTableHandle table = (IcebergTableHandle) tableHandle;
if (table.getSnapshotId().isEmpty()) {
// TupleDomain.none() as the predicate
return new ConnectorTableProperties(TupleDomain.none(), Optional.empty(), Optional.empty(), Optional.empty(), ImmutableList.of());
}
Table icebergTable = catalog.loadTable(session, table.getSchemaTableName());
// Extract identity partition fields that are present in all partition specs, for creating the discrete predicates.
Set<Integer> partitionSourceIds = identityPartitionColumnsInAllSpecs(icebergTable);
TupleDomain<IcebergColumnHandle> enforcedPredicate = table.getEnforcedPredicate();
DiscretePredicates discretePredicates = null;
if (!partitionSourceIds.isEmpty()) {
// Extract identity partition columns
Map<Integer, IcebergColumnHandle> columns = getColumns(icebergTable.schema(), typeManager).stream().filter(column -> partitionSourceIds.contains(column.getId())).collect(toImmutableMap(IcebergColumnHandle::getId, Function.identity()));
Supplier<List<FileScanTask>> lazyFiles = Suppliers.memoize(() -> {
TableScan tableScan = icebergTable.newScan().useSnapshot(table.getSnapshotId().get()).filter(toIcebergExpression(enforcedPredicate)).includeColumnStats();
try (CloseableIterable<FileScanTask> iterator = tableScan.planFiles()) {
return ImmutableList.copyOf(iterator);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
Iterable<FileScanTask> files = () -> lazyFiles.get().iterator();
Iterable<TupleDomain<ColumnHandle>> discreteTupleDomain = Iterables.transform(files, fileScan -> {
// Extract partition values in the data file
Map<Integer, Optional<String>> partitionColumnValueStrings = getPartitionKeys(fileScan);
Map<ColumnHandle, NullableValue> partitionValues = partitionSourceIds.stream().filter(partitionColumnValueStrings::containsKey).collect(toImmutableMap(columns::get, columnId -> {
IcebergColumnHandle column = columns.get(columnId);
Object prestoValue = deserializePartitionValue(column.getType(), partitionColumnValueStrings.get(columnId).orElse(null), column.getName());
return NullableValue.of(column.getType(), prestoValue);
}));
return TupleDomain.fromFixedValues(partitionValues);
});
discretePredicates = new DiscretePredicates(columns.values().stream().map(ColumnHandle.class::cast).collect(toImmutableList()), discreteTupleDomain);
}
return new ConnectorTableProperties(// over all tableScan.planFiles() and caching partition values in table handle.
enforcedPredicate.transformKeys(ColumnHandle.class::cast), // TODO: implement table partitioning
Optional.empty(), Optional.empty(), Optional.ofNullable(discretePredicates), ImmutableList.of());
}
Aggregations