Search in sources :

Example 81 with Domain

use of io.trino.spi.predicate.Domain in project trino by trinodb.

the class DeltaLakeMetadata method applyFilter.

@Override
public Optional<ConstraintApplicationResult<ConnectorTableHandle>> applyFilter(ConnectorSession session, ConnectorTableHandle handle, Constraint constraint) {
    DeltaLakeTableHandle tableHandle = (DeltaLakeTableHandle) handle;
    SchemaTableName tableName = tableHandle.getSchemaTableName();
    Set<DeltaLakeColumnHandle> partitionColumns = ImmutableSet.copyOf(extractPartitionColumns(tableHandle.getMetadataEntry(), typeManager));
    verify(!constraint.getSummary().isNone(), "applyFilter constraint has summary NONE");
    Map<ColumnHandle, Domain> constraintDomains = constraint.getSummary().getDomains().orElseThrow();
    ImmutableMap.Builder<DeltaLakeColumnHandle, Domain> enforceableDomains = ImmutableMap.builder();
    ImmutableMap.Builder<DeltaLakeColumnHandle, Domain> unenforceableDomains = ImmutableMap.builder();
    for (Map.Entry<ColumnHandle, Domain> domainEntry : constraintDomains.entrySet()) {
        DeltaLakeColumnHandle column = (DeltaLakeColumnHandle) domainEntry.getKey();
        if (!partitionColumns.contains(column)) {
            unenforceableDomains.put(column, domainEntry.getValue());
        } else {
            enforceableDomains.put(column, domainEntry.getValue());
        }
    }
    TupleDomain<DeltaLakeColumnHandle> newEnforcedConstraint = TupleDomain.withColumnDomains(enforceableDomains.buildOrThrow());
    TupleDomain<DeltaLakeColumnHandle> newUnenforcedConstraint = TupleDomain.withColumnDomains(unenforceableDomains.buildOrThrow());
    DeltaLakeTableHandle newHandle = new DeltaLakeTableHandle(tableName.getSchemaName(), tableName.getTableName(), tableHandle.getLocation(), Optional.of(tableHandle.getMetadataEntry()), // The unenforced constraint will still be checked by the engine.
    tableHandle.getEnforcedPartitionConstraint().intersect(newEnforcedConstraint), tableHandle.getNonPartitionConstraint().intersect(newUnenforcedConstraint).simplify(domainCompactionThreshold), tableHandle.getWriteType(), tableHandle.getProjectedColumns(), tableHandle.getUpdatedColumns(), tableHandle.getUpdateRowIdColumns(), Optional.empty(), tableHandle.getReadVersion());
    if (tableHandle.getEnforcedPartitionConstraint().equals(newHandle.getEnforcedPartitionConstraint()) && tableHandle.getNonPartitionConstraint().equals(newHandle.getNonPartitionConstraint())) {
        return Optional.empty();
    }
    return Optional.of(new ConstraintApplicationResult<>(newHandle, newUnenforcedConstraint.transformKeys(ColumnHandle.class::cast), false));
}
Also used : DeltaLakeColumnHandle.fileSizeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileSizeColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) DeltaLakeColumnHandle.pathColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.pathColumnHandle) DeltaLakeColumnHandle.fileModifiedTimeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileModifiedTimeColumnHandle) SchemaTableName(io.trino.spi.connector.SchemaTableName) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Collections.unmodifiableMap(java.util.Collections.unmodifiableMap)

Example 82 with Domain

use of io.trino.spi.predicate.Domain in project trino by trinodb.

the class TestHiveMetadata method testCreateMixedPredicate.

@Test
public void testCreateMixedPredicate() {
    ImmutableList.Builder<HivePartition> partitions = ImmutableList.builder();
    for (int i = 0; i < 5; i++) {
        partitions.add(new HivePartition(new SchemaTableName("test", "test"), Integer.toString(i), ImmutableMap.of(TEST_COLUMN_HANDLE, NullableValue.of(VARCHAR, utf8Slice(Integer.toString(i))))));
    }
    partitions.add(new HivePartition(new SchemaTableName("test", "test"), "null", ImmutableMap.of(TEST_COLUMN_HANDLE, NullableValue.asNull(VARCHAR))));
    Domain domain = createPredicate(ImmutableList.of(TEST_COLUMN_HANDLE), partitions.build()).getDomains().orElseThrow().get(TEST_COLUMN_HANDLE);
    assertEquals(domain, Domain.create(ValueSet.of(VARCHAR, utf8Slice("0"), utf8Slice("1"), utf8Slice("2"), utf8Slice("3"), utf8Slice("4")), true));
}
Also used : ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) Domain(io.trino.spi.predicate.Domain) SchemaTableName(io.trino.spi.connector.SchemaTableName) Test(org.testng.annotations.Test)

Example 83 with Domain

use of io.trino.spi.predicate.Domain in project trino by trinodb.

the class TestHiveMetadata method testCreatePredicateWithNaNAndNull.

@Test
public void testCreatePredicateWithNaNAndNull() {
    HiveColumnHandle columnHandle = DOUBLE_COLUMN_HANDLE;
    ImmutableList.Builder<HivePartition> partitions = ImmutableList.builder();
    partitions.add(new HivePartition(new SchemaTableName("test", "test"), "p1", ImmutableMap.of(columnHandle, NullableValue.of(DOUBLE, Double.NaN))));
    partitions.add(new HivePartition(new SchemaTableName("test", "test"), "p2", ImmutableMap.of(columnHandle, NullableValue.of(DOUBLE, 4.2))));
    partitions.add(new HivePartition(new SchemaTableName("test", "test"), "p3", ImmutableMap.of(columnHandle, NullableValue.asNull(DOUBLE))));
    Domain domain = createPredicate(ImmutableList.of(columnHandle), partitions.build()).getDomains().orElseThrow().get(columnHandle);
    assertNull(domain);
}
Also used : ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) Domain(io.trino.spi.predicate.Domain) SchemaTableName(io.trino.spi.connector.SchemaTableName) Test(org.testng.annotations.Test)

Example 84 with Domain

use of io.trino.spi.predicate.Domain in project trino by trinodb.

the class TupleDomainParquetPredicate method getDomain.

/**
 * Get a domain for the ranges defined by each pair of elements from {@code minimums} and {@code maximums}.
 * Both arrays must have the same length.
 */
private static Domain getDomain(ColumnDescriptor column, Type type, List<Object> minimums, List<Object> maximums, boolean hasNullValue, DateTimeZone timeZone) {
    checkArgument(minimums.size() == maximums.size(), "Expected minimums and maximums to have the same size");
    if (type.equals(BOOLEAN)) {
        boolean hasTrueValues = minimums.stream().anyMatch(value -> (boolean) value) || maximums.stream().anyMatch(value -> (boolean) value);
        boolean hasFalseValues = minimums.stream().anyMatch(value -> !(boolean) value) || maximums.stream().anyMatch(value -> !(boolean) value);
        if (hasTrueValues && hasFalseValues) {
            return Domain.all(type);
        }
        if (hasTrueValues) {
            return Domain.create(ValueSet.of(type, true), hasNullValue);
        }
        if (hasFalseValues) {
            return Domain.create(ValueSet.of(type, false), hasNullValue);
        }
        // All nulls case is handled earlier
        throw new VerifyException("Impossible boolean statistics");
    }
    if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(DATE) || type.equals(SMALLINT) || type.equals(TINYINT)) {
        List<Range> ranges = new ArrayList<>();
        for (int i = 0; i < minimums.size(); i++) {
            long min = asLong(minimums.get(i));
            long max = asLong(maximums.get(i));
            if (isStatisticsOverflow(type, min, max)) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, min, true, max, true));
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type instanceof DecimalType) {
        DecimalType decimalType = (DecimalType) type;
        List<Range> ranges = new ArrayList<>();
        if (decimalType.isShort()) {
            for (int i = 0; i < minimums.size(); i++) {
                Object min = minimums.get(i);
                Object max = maximums.get(i);
                long minValue = min instanceof Binary ? getShortDecimalValue(((Binary) min).getBytes()) : asLong(min);
                long maxValue = min instanceof Binary ? getShortDecimalValue(((Binary) max).getBytes()) : asLong(max);
                if (isStatisticsOverflow(type, minValue, maxValue)) {
                    return Domain.create(ValueSet.all(type), hasNullValue);
                }
                ranges.add(Range.range(type, minValue, true, maxValue, true));
            }
        } else {
            for (int i = 0; i < minimums.size(); i++) {
                Int128 min = Int128.fromBigEndian(((Binary) minimums.get(i)).getBytes());
                Int128 max = Int128.fromBigEndian(((Binary) maximums.get(i)).getBytes());
                ranges.add(Range.range(type, min, true, max, true));
            }
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type.equals(REAL)) {
        List<Range> ranges = new ArrayList<>();
        for (int i = 0; i < minimums.size(); i++) {
            Float min = (Float) minimums.get(i);
            Float max = (Float) maximums.get(i);
            if (min.isNaN() || max.isNaN()) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, (long) floatToRawIntBits(min), true, (long) floatToRawIntBits(max), true));
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type.equals(DOUBLE)) {
        List<Range> ranges = new ArrayList<>();
        for (int i = 0; i < minimums.size(); i++) {
            Double min = (Double) minimums.get(i);
            Double max = (Double) maximums.get(i);
            if (min.isNaN() || max.isNaN()) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, min, true, max, true));
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type instanceof VarcharType) {
        List<Range> ranges = new ArrayList<>();
        for (int i = 0; i < minimums.size(); i++) {
            Slice min = Slices.wrappedBuffer(((Binary) minimums.get(i)).toByteBuffer());
            Slice max = Slices.wrappedBuffer(((Binary) maximums.get(i)).toByteBuffer());
            ranges.add(Range.range(type, min, true, max, true));
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type instanceof TimestampType) {
        if (column.getPrimitiveType().getPrimitiveTypeName().equals(INT96)) {
            TrinoTimestampEncoder<?> timestampEncoder = createTimestampEncoder((TimestampType) type, timeZone);
            List<Object> values = new ArrayList<>();
            for (int i = 0; i < minimums.size(); i++) {
                Object min = minimums.get(i);
                Object max = maximums.get(i);
                // available and valid in that special case
                if (!(min instanceof Binary) || !(max instanceof Binary) || !min.equals(max)) {
                    return Domain.create(ValueSet.all(type), hasNullValue);
                }
                values.add(timestampEncoder.getTimestamp(decodeInt96Timestamp((Binary) min)));
            }
            return Domain.multipleValues(type, values, hasNullValue);
        }
        if (column.getPrimitiveType().getPrimitiveTypeName().equals(INT64)) {
            LogicalTypeAnnotation logicalTypeAnnotation = column.getPrimitiveType().getLogicalTypeAnnotation();
            if (!(logicalTypeAnnotation instanceof TimestampLogicalTypeAnnotation)) {
                // Invalid statistics. Unit and UTC adjustment are not known
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            TimestampLogicalTypeAnnotation timestampTypeAnnotation = (TimestampLogicalTypeAnnotation) logicalTypeAnnotation;
            // Bail out if the precision is not known
            if (timestampTypeAnnotation.getUnit() == null) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            TrinoTimestampEncoder<?> timestampEncoder = createTimestampEncoder((TimestampType) type, DateTimeZone.UTC);
            List<Range> ranges = new ArrayList<>();
            for (int i = 0; i < minimums.size(); i++) {
                long min = (long) minimums.get(i);
                long max = (long) maximums.get(i);
                ranges.add(Range.range(type, timestampEncoder.getTimestamp(decodeInt64Timestamp(min, timestampTypeAnnotation.getUnit())), true, timestampEncoder.getTimestamp(decodeInt64Timestamp(max, timestampTypeAnnotation.getUnit())), true));
            }
            return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
        }
    }
    return Domain.create(ValueSet.all(type), hasNullValue);
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) DateTimeZone(org.joda.time.DateTimeZone) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) ColumnIndex(org.apache.parquet.internal.column.columnindex.ColumnIndex) FilterApi(org.apache.parquet.filter2.predicate.FilterApi) ByteBuffer(java.nio.ByteBuffer) INT96(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96) TrinoTimestampEncoderFactory.createTimestampEncoder(io.trino.plugin.base.type.TrinoTimestampEncoderFactory.createTimestampEncoder) ParquetDataSourceId(io.trino.parquet.ParquetDataSourceId) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Slices(io.airlift.slice.Slices) Map(java.util.Map) INTEGER(io.trino.spi.type.IntegerType.INTEGER) UserDefinedPredicate(org.apache.parquet.filter2.predicate.UserDefinedPredicate) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) PredicateUtils.isStatisticsOverflow(io.trino.parquet.predicate.PredicateUtils.isStatisticsOverflow) Range(io.trino.spi.predicate.Range) TimestampLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) Domain(io.trino.spi.predicate.Domain) ParquetTimestampUtils.decodeInt96Timestamp(io.trino.parquet.ParquetTimestampUtils.decodeInt96Timestamp) Dictionary(io.trino.parquet.dictionary.Dictionary) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) String.format(java.lang.String.format) ValueSet(io.trino.spi.predicate.ValueSet) Binary(org.apache.parquet.io.api.Binary) Serializable(java.io.Serializable) INT64(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64) List(java.util.List) TrinoTimestampEncoder(io.trino.plugin.base.type.TrinoTimestampEncoder) BIGINT(io.trino.spi.type.BigintType.BIGINT) LITTLE_ENDIAN(java.nio.ByteOrder.LITTLE_ENDIAN) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) Optional(java.util.Optional) Operators(org.apache.parquet.filter2.predicate.Operators) DecimalType(io.trino.spi.type.DecimalType) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) ParquetCorruptionException(io.trino.parquet.ParquetCorruptionException) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath) Slice(io.airlift.slice.Slice) Type(io.trino.spi.type.Type) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) LogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation) Function(java.util.function.Function) TimestampType(io.trino.spi.type.TimestampType) ArrayList(java.util.ArrayList) VarcharType(io.trino.spi.type.VarcharType) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) ImmutableList(com.google.common.collect.ImmutableList) Objects.requireNonNull(java.util.Objects.requireNonNull) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) VerifyException(com.google.common.base.VerifyException) Int128(io.trino.spi.type.Int128) Statistics(org.apache.parquet.column.statistics.Statistics) DictionaryPage(io.trino.parquet.DictionaryPage) ParquetTimestampUtils.decodeInt64Timestamp(io.trino.parquet.ParquetTimestampUtils.decodeInt64Timestamp) ParquetTypeUtils.getShortDecimalValue(io.trino.parquet.ParquetTypeUtils.getShortDecimalValue) TupleDomain(io.trino.spi.predicate.TupleDomain) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TINYINT(io.trino.spi.type.TinyintType.TINYINT) VarcharType(io.trino.spi.type.VarcharType) ArrayList(java.util.ArrayList) Range(io.trino.spi.predicate.Range) VerifyException(com.google.common.base.VerifyException) TimestampLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) LogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation) Slice(io.airlift.slice.Slice) TimestampLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) DecimalType(io.trino.spi.type.DecimalType) TimestampType(io.trino.spi.type.TimestampType) Binary(org.apache.parquet.io.api.Binary) Int128(io.trino.spi.type.Int128)

Example 85 with Domain

use of io.trino.spi.predicate.Domain in project trino by trinodb.

the class TupleDomainParquetPredicate method matches.

@Override
public boolean matches(long numberOfRows, Map<ColumnDescriptor, Statistics<?>> statistics, ParquetDataSourceId id) throws ParquetCorruptionException {
    if (numberOfRows == 0) {
        return false;
    }
    if (effectivePredicate.isNone()) {
        return false;
    }
    Map<ColumnDescriptor, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalStateException("Effective predicate other than none should have domains"));
    for (RichColumnDescriptor column : columns) {
        Domain effectivePredicateDomain = effectivePredicateDomains.get(column);
        if (effectivePredicateDomain == null) {
            continue;
        }
        Statistics<?> columnStatistics = statistics.get(column);
        if (columnStatistics == null || columnStatistics.isEmpty()) {
            // no stats for column
            continue;
        }
        Domain domain = getDomain(column, effectivePredicateDomain.getType(), numberOfRows, columnStatistics, id, timeZone);
        if (!effectivePredicateDomain.overlaps(domain)) {
            return false;
        }
    }
    return true;
}
Also used : RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain)

Aggregations

Domain (io.trino.spi.predicate.Domain)120 TupleDomain (io.trino.spi.predicate.TupleDomain)107 ColumnHandle (io.trino.spi.connector.ColumnHandle)51 Test (org.testng.annotations.Test)45 Map (java.util.Map)38 ImmutableList (com.google.common.collect.ImmutableList)36 ImmutableMap (com.google.common.collect.ImmutableMap)33 List (java.util.List)27 Optional (java.util.Optional)25 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)23 Type (io.trino.spi.type.Type)23 ConnectorSession (io.trino.spi.connector.ConnectorSession)21 SchemaTableName (io.trino.spi.connector.SchemaTableName)21 Objects.requireNonNull (java.util.Objects.requireNonNull)21 Set (java.util.Set)20 Range (io.trino.spi.predicate.Range)19 ValueSet (io.trino.spi.predicate.ValueSet)18 Constraint (io.trino.spi.connector.Constraint)17 String.format (java.lang.String.format)17 ImmutableSet (com.google.common.collect.ImmutableSet)16