use of io.trino.spi.predicate.Domain in project trino by trinodb.
the class DeltaLakeMetadata method applyFilter.
@Override
public Optional<ConstraintApplicationResult<ConnectorTableHandle>> applyFilter(ConnectorSession session, ConnectorTableHandle handle, Constraint constraint) {
DeltaLakeTableHandle tableHandle = (DeltaLakeTableHandle) handle;
SchemaTableName tableName = tableHandle.getSchemaTableName();
Set<DeltaLakeColumnHandle> partitionColumns = ImmutableSet.copyOf(extractPartitionColumns(tableHandle.getMetadataEntry(), typeManager));
verify(!constraint.getSummary().isNone(), "applyFilter constraint has summary NONE");
Map<ColumnHandle, Domain> constraintDomains = constraint.getSummary().getDomains().orElseThrow();
ImmutableMap.Builder<DeltaLakeColumnHandle, Domain> enforceableDomains = ImmutableMap.builder();
ImmutableMap.Builder<DeltaLakeColumnHandle, Domain> unenforceableDomains = ImmutableMap.builder();
for (Map.Entry<ColumnHandle, Domain> domainEntry : constraintDomains.entrySet()) {
DeltaLakeColumnHandle column = (DeltaLakeColumnHandle) domainEntry.getKey();
if (!partitionColumns.contains(column)) {
unenforceableDomains.put(column, domainEntry.getValue());
} else {
enforceableDomains.put(column, domainEntry.getValue());
}
}
TupleDomain<DeltaLakeColumnHandle> newEnforcedConstraint = TupleDomain.withColumnDomains(enforceableDomains.buildOrThrow());
TupleDomain<DeltaLakeColumnHandle> newUnenforcedConstraint = TupleDomain.withColumnDomains(unenforceableDomains.buildOrThrow());
DeltaLakeTableHandle newHandle = new DeltaLakeTableHandle(tableName.getSchemaName(), tableName.getTableName(), tableHandle.getLocation(), Optional.of(tableHandle.getMetadataEntry()), // The unenforced constraint will still be checked by the engine.
tableHandle.getEnforcedPartitionConstraint().intersect(newEnforcedConstraint), tableHandle.getNonPartitionConstraint().intersect(newUnenforcedConstraint).simplify(domainCompactionThreshold), tableHandle.getWriteType(), tableHandle.getProjectedColumns(), tableHandle.getUpdatedColumns(), tableHandle.getUpdateRowIdColumns(), Optional.empty(), tableHandle.getReadVersion());
if (tableHandle.getEnforcedPartitionConstraint().equals(newHandle.getEnforcedPartitionConstraint()) && tableHandle.getNonPartitionConstraint().equals(newHandle.getNonPartitionConstraint())) {
return Optional.empty();
}
return Optional.of(new ConstraintApplicationResult<>(newHandle, newUnenforcedConstraint.transformKeys(ColumnHandle.class::cast), false));
}
use of io.trino.spi.predicate.Domain in project trino by trinodb.
the class TestHiveMetadata method testCreateMixedPredicate.
@Test
public void testCreateMixedPredicate() {
ImmutableList.Builder<HivePartition> partitions = ImmutableList.builder();
for (int i = 0; i < 5; i++) {
partitions.add(new HivePartition(new SchemaTableName("test", "test"), Integer.toString(i), ImmutableMap.of(TEST_COLUMN_HANDLE, NullableValue.of(VARCHAR, utf8Slice(Integer.toString(i))))));
}
partitions.add(new HivePartition(new SchemaTableName("test", "test"), "null", ImmutableMap.of(TEST_COLUMN_HANDLE, NullableValue.asNull(VARCHAR))));
Domain domain = createPredicate(ImmutableList.of(TEST_COLUMN_HANDLE), partitions.build()).getDomains().orElseThrow().get(TEST_COLUMN_HANDLE);
assertEquals(domain, Domain.create(ValueSet.of(VARCHAR, utf8Slice("0"), utf8Slice("1"), utf8Slice("2"), utf8Slice("3"), utf8Slice("4")), true));
}
use of io.trino.spi.predicate.Domain in project trino by trinodb.
the class TestHiveMetadata method testCreatePredicateWithNaNAndNull.
@Test
public void testCreatePredicateWithNaNAndNull() {
HiveColumnHandle columnHandle = DOUBLE_COLUMN_HANDLE;
ImmutableList.Builder<HivePartition> partitions = ImmutableList.builder();
partitions.add(new HivePartition(new SchemaTableName("test", "test"), "p1", ImmutableMap.of(columnHandle, NullableValue.of(DOUBLE, Double.NaN))));
partitions.add(new HivePartition(new SchemaTableName("test", "test"), "p2", ImmutableMap.of(columnHandle, NullableValue.of(DOUBLE, 4.2))));
partitions.add(new HivePartition(new SchemaTableName("test", "test"), "p3", ImmutableMap.of(columnHandle, NullableValue.asNull(DOUBLE))));
Domain domain = createPredicate(ImmutableList.of(columnHandle), partitions.build()).getDomains().orElseThrow().get(columnHandle);
assertNull(domain);
}
use of io.trino.spi.predicate.Domain in project trino by trinodb.
the class TupleDomainParquetPredicate method getDomain.
/**
* Get a domain for the ranges defined by each pair of elements from {@code minimums} and {@code maximums}.
* Both arrays must have the same length.
*/
private static Domain getDomain(ColumnDescriptor column, Type type, List<Object> minimums, List<Object> maximums, boolean hasNullValue, DateTimeZone timeZone) {
checkArgument(minimums.size() == maximums.size(), "Expected minimums and maximums to have the same size");
if (type.equals(BOOLEAN)) {
boolean hasTrueValues = minimums.stream().anyMatch(value -> (boolean) value) || maximums.stream().anyMatch(value -> (boolean) value);
boolean hasFalseValues = minimums.stream().anyMatch(value -> !(boolean) value) || maximums.stream().anyMatch(value -> !(boolean) value);
if (hasTrueValues && hasFalseValues) {
return Domain.all(type);
}
if (hasTrueValues) {
return Domain.create(ValueSet.of(type, true), hasNullValue);
}
if (hasFalseValues) {
return Domain.create(ValueSet.of(type, false), hasNullValue);
}
// All nulls case is handled earlier
throw new VerifyException("Impossible boolean statistics");
}
if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(DATE) || type.equals(SMALLINT) || type.equals(TINYINT)) {
List<Range> ranges = new ArrayList<>();
for (int i = 0; i < minimums.size(); i++) {
long min = asLong(minimums.get(i));
long max = asLong(maximums.get(i));
if (isStatisticsOverflow(type, min, max)) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, min, true, max, true));
}
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (type instanceof DecimalType) {
DecimalType decimalType = (DecimalType) type;
List<Range> ranges = new ArrayList<>();
if (decimalType.isShort()) {
for (int i = 0; i < minimums.size(); i++) {
Object min = minimums.get(i);
Object max = maximums.get(i);
long minValue = min instanceof Binary ? getShortDecimalValue(((Binary) min).getBytes()) : asLong(min);
long maxValue = min instanceof Binary ? getShortDecimalValue(((Binary) max).getBytes()) : asLong(max);
if (isStatisticsOverflow(type, minValue, maxValue)) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, minValue, true, maxValue, true));
}
} else {
for (int i = 0; i < minimums.size(); i++) {
Int128 min = Int128.fromBigEndian(((Binary) minimums.get(i)).getBytes());
Int128 max = Int128.fromBigEndian(((Binary) maximums.get(i)).getBytes());
ranges.add(Range.range(type, min, true, max, true));
}
}
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (type.equals(REAL)) {
List<Range> ranges = new ArrayList<>();
for (int i = 0; i < minimums.size(); i++) {
Float min = (Float) minimums.get(i);
Float max = (Float) maximums.get(i);
if (min.isNaN() || max.isNaN()) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, (long) floatToRawIntBits(min), true, (long) floatToRawIntBits(max), true));
}
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (type.equals(DOUBLE)) {
List<Range> ranges = new ArrayList<>();
for (int i = 0; i < minimums.size(); i++) {
Double min = (Double) minimums.get(i);
Double max = (Double) maximums.get(i);
if (min.isNaN() || max.isNaN()) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, min, true, max, true));
}
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (type instanceof VarcharType) {
List<Range> ranges = new ArrayList<>();
for (int i = 0; i < minimums.size(); i++) {
Slice min = Slices.wrappedBuffer(((Binary) minimums.get(i)).toByteBuffer());
Slice max = Slices.wrappedBuffer(((Binary) maximums.get(i)).toByteBuffer());
ranges.add(Range.range(type, min, true, max, true));
}
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (type instanceof TimestampType) {
if (column.getPrimitiveType().getPrimitiveTypeName().equals(INT96)) {
TrinoTimestampEncoder<?> timestampEncoder = createTimestampEncoder((TimestampType) type, timeZone);
List<Object> values = new ArrayList<>();
for (int i = 0; i < minimums.size(); i++) {
Object min = minimums.get(i);
Object max = maximums.get(i);
// available and valid in that special case
if (!(min instanceof Binary) || !(max instanceof Binary) || !min.equals(max)) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
values.add(timestampEncoder.getTimestamp(decodeInt96Timestamp((Binary) min)));
}
return Domain.multipleValues(type, values, hasNullValue);
}
if (column.getPrimitiveType().getPrimitiveTypeName().equals(INT64)) {
LogicalTypeAnnotation logicalTypeAnnotation = column.getPrimitiveType().getLogicalTypeAnnotation();
if (!(logicalTypeAnnotation instanceof TimestampLogicalTypeAnnotation)) {
// Invalid statistics. Unit and UTC adjustment are not known
return Domain.create(ValueSet.all(type), hasNullValue);
}
TimestampLogicalTypeAnnotation timestampTypeAnnotation = (TimestampLogicalTypeAnnotation) logicalTypeAnnotation;
// Bail out if the precision is not known
if (timestampTypeAnnotation.getUnit() == null) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
TrinoTimestampEncoder<?> timestampEncoder = createTimestampEncoder((TimestampType) type, DateTimeZone.UTC);
List<Range> ranges = new ArrayList<>();
for (int i = 0; i < minimums.size(); i++) {
long min = (long) minimums.get(i);
long max = (long) maximums.get(i);
ranges.add(Range.range(type, timestampEncoder.getTimestamp(decodeInt64Timestamp(min, timestampTypeAnnotation.getUnit())), true, timestampEncoder.getTimestamp(decodeInt64Timestamp(max, timestampTypeAnnotation.getUnit())), true));
}
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
}
return Domain.create(ValueSet.all(type), hasNullValue);
}
use of io.trino.spi.predicate.Domain in project trino by trinodb.
the class TupleDomainParquetPredicate method matches.
@Override
public boolean matches(long numberOfRows, Map<ColumnDescriptor, Statistics<?>> statistics, ParquetDataSourceId id) throws ParquetCorruptionException {
if (numberOfRows == 0) {
return false;
}
if (effectivePredicate.isNone()) {
return false;
}
Map<ColumnDescriptor, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalStateException("Effective predicate other than none should have domains"));
for (RichColumnDescriptor column : columns) {
Domain effectivePredicateDomain = effectivePredicateDomains.get(column);
if (effectivePredicateDomain == null) {
continue;
}
Statistics<?> columnStatistics = statistics.get(column);
if (columnStatistics == null || columnStatistics.isEmpty()) {
// no stats for column
continue;
}
Domain domain = getDomain(column, effectivePredicateDomain.getType(), numberOfRows, columnStatistics, id, timeZone);
if (!effectivePredicateDomain.overlaps(domain)) {
return false;
}
}
return true;
}
Aggregations