Search in sources :

Example 1 with TrinoTimestampEncoder

use of io.trino.plugin.base.type.TrinoTimestampEncoder in project trino by trinodb.

the class TupleDomainParquetPredicate method getDomain.

/**
 * Get a domain for the ranges defined by each pair of elements from {@code minimums} and {@code maximums}.
 * Both arrays must have the same length.
 */
private static Domain getDomain(ColumnDescriptor column, Type type, List<Object> minimums, List<Object> maximums, boolean hasNullValue, DateTimeZone timeZone) {
    checkArgument(minimums.size() == maximums.size(), "Expected minimums and maximums to have the same size");
    if (type.equals(BOOLEAN)) {
        boolean hasTrueValues = minimums.stream().anyMatch(value -> (boolean) value) || maximums.stream().anyMatch(value -> (boolean) value);
        boolean hasFalseValues = minimums.stream().anyMatch(value -> !(boolean) value) || maximums.stream().anyMatch(value -> !(boolean) value);
        if (hasTrueValues && hasFalseValues) {
            return Domain.all(type);
        }
        if (hasTrueValues) {
            return Domain.create(ValueSet.of(type, true), hasNullValue);
        }
        if (hasFalseValues) {
            return Domain.create(ValueSet.of(type, false), hasNullValue);
        }
        // All nulls case is handled earlier
        throw new VerifyException("Impossible boolean statistics");
    }
    if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(DATE) || type.equals(SMALLINT) || type.equals(TINYINT)) {
        List<Range> ranges = new ArrayList<>();
        for (int i = 0; i < minimums.size(); i++) {
            long min = asLong(minimums.get(i));
            long max = asLong(maximums.get(i));
            if (isStatisticsOverflow(type, min, max)) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, min, true, max, true));
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type instanceof DecimalType) {
        DecimalType decimalType = (DecimalType) type;
        List<Range> ranges = new ArrayList<>();
        if (decimalType.isShort()) {
            for (int i = 0; i < minimums.size(); i++) {
                Object min = minimums.get(i);
                Object max = maximums.get(i);
                long minValue = min instanceof Binary ? getShortDecimalValue(((Binary) min).getBytes()) : asLong(min);
                long maxValue = min instanceof Binary ? getShortDecimalValue(((Binary) max).getBytes()) : asLong(max);
                if (isStatisticsOverflow(type, minValue, maxValue)) {
                    return Domain.create(ValueSet.all(type), hasNullValue);
                }
                ranges.add(Range.range(type, minValue, true, maxValue, true));
            }
        } else {
            for (int i = 0; i < minimums.size(); i++) {
                Int128 min = Int128.fromBigEndian(((Binary) minimums.get(i)).getBytes());
                Int128 max = Int128.fromBigEndian(((Binary) maximums.get(i)).getBytes());
                ranges.add(Range.range(type, min, true, max, true));
            }
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type.equals(REAL)) {
        List<Range> ranges = new ArrayList<>();
        for (int i = 0; i < minimums.size(); i++) {
            Float min = (Float) minimums.get(i);
            Float max = (Float) maximums.get(i);
            if (min.isNaN() || max.isNaN()) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, (long) floatToRawIntBits(min), true, (long) floatToRawIntBits(max), true));
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type.equals(DOUBLE)) {
        List<Range> ranges = new ArrayList<>();
        for (int i = 0; i < minimums.size(); i++) {
            Double min = (Double) minimums.get(i);
            Double max = (Double) maximums.get(i);
            if (min.isNaN() || max.isNaN()) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, min, true, max, true));
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type instanceof VarcharType) {
        List<Range> ranges = new ArrayList<>();
        for (int i = 0; i < minimums.size(); i++) {
            Slice min = Slices.wrappedBuffer(((Binary) minimums.get(i)).toByteBuffer());
            Slice max = Slices.wrappedBuffer(((Binary) maximums.get(i)).toByteBuffer());
            ranges.add(Range.range(type, min, true, max, true));
        }
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type instanceof TimestampType) {
        if (column.getPrimitiveType().getPrimitiveTypeName().equals(INT96)) {
            TrinoTimestampEncoder<?> timestampEncoder = createTimestampEncoder((TimestampType) type, timeZone);
            List<Object> values = new ArrayList<>();
            for (int i = 0; i < minimums.size(); i++) {
                Object min = minimums.get(i);
                Object max = maximums.get(i);
                // available and valid in that special case
                if (!(min instanceof Binary) || !(max instanceof Binary) || !min.equals(max)) {
                    return Domain.create(ValueSet.all(type), hasNullValue);
                }
                values.add(timestampEncoder.getTimestamp(decodeInt96Timestamp((Binary) min)));
            }
            return Domain.multipleValues(type, values, hasNullValue);
        }
        if (column.getPrimitiveType().getPrimitiveTypeName().equals(INT64)) {
            LogicalTypeAnnotation logicalTypeAnnotation = column.getPrimitiveType().getLogicalTypeAnnotation();
            if (!(logicalTypeAnnotation instanceof TimestampLogicalTypeAnnotation)) {
                // Invalid statistics. Unit and UTC adjustment are not known
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            TimestampLogicalTypeAnnotation timestampTypeAnnotation = (TimestampLogicalTypeAnnotation) logicalTypeAnnotation;
            // Bail out if the precision is not known
            if (timestampTypeAnnotation.getUnit() == null) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            TrinoTimestampEncoder<?> timestampEncoder = createTimestampEncoder((TimestampType) type, DateTimeZone.UTC);
            List<Range> ranges = new ArrayList<>();
            for (int i = 0; i < minimums.size(); i++) {
                long min = (long) minimums.get(i);
                long max = (long) maximums.get(i);
                ranges.add(Range.range(type, timestampEncoder.getTimestamp(decodeInt64Timestamp(min, timestampTypeAnnotation.getUnit())), true, timestampEncoder.getTimestamp(decodeInt64Timestamp(max, timestampTypeAnnotation.getUnit())), true));
            }
            return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
        }
    }
    return Domain.create(ValueSet.all(type), hasNullValue);
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) DateTimeZone(org.joda.time.DateTimeZone) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) ColumnIndex(org.apache.parquet.internal.column.columnindex.ColumnIndex) FilterApi(org.apache.parquet.filter2.predicate.FilterApi) ByteBuffer(java.nio.ByteBuffer) INT96(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96) TrinoTimestampEncoderFactory.createTimestampEncoder(io.trino.plugin.base.type.TrinoTimestampEncoderFactory.createTimestampEncoder) ParquetDataSourceId(io.trino.parquet.ParquetDataSourceId) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Slices(io.airlift.slice.Slices) Map(java.util.Map) INTEGER(io.trino.spi.type.IntegerType.INTEGER) UserDefinedPredicate(org.apache.parquet.filter2.predicate.UserDefinedPredicate) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) PredicateUtils.isStatisticsOverflow(io.trino.parquet.predicate.PredicateUtils.isStatisticsOverflow) Range(io.trino.spi.predicate.Range) TimestampLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) Domain(io.trino.spi.predicate.Domain) ParquetTimestampUtils.decodeInt96Timestamp(io.trino.parquet.ParquetTimestampUtils.decodeInt96Timestamp) Dictionary(io.trino.parquet.dictionary.Dictionary) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) String.format(java.lang.String.format) ValueSet(io.trino.spi.predicate.ValueSet) Binary(org.apache.parquet.io.api.Binary) Serializable(java.io.Serializable) INT64(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64) List(java.util.List) TrinoTimestampEncoder(io.trino.plugin.base.type.TrinoTimestampEncoder) BIGINT(io.trino.spi.type.BigintType.BIGINT) LITTLE_ENDIAN(java.nio.ByteOrder.LITTLE_ENDIAN) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) Optional(java.util.Optional) Operators(org.apache.parquet.filter2.predicate.Operators) DecimalType(io.trino.spi.type.DecimalType) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) ParquetCorruptionException(io.trino.parquet.ParquetCorruptionException) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath) Slice(io.airlift.slice.Slice) Type(io.trino.spi.type.Type) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) LogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation) Function(java.util.function.Function) TimestampType(io.trino.spi.type.TimestampType) ArrayList(java.util.ArrayList) VarcharType(io.trino.spi.type.VarcharType) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) ImmutableList(com.google.common.collect.ImmutableList) Objects.requireNonNull(java.util.Objects.requireNonNull) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) VerifyException(com.google.common.base.VerifyException) Int128(io.trino.spi.type.Int128) Statistics(org.apache.parquet.column.statistics.Statistics) DictionaryPage(io.trino.parquet.DictionaryPage) ParquetTimestampUtils.decodeInt64Timestamp(io.trino.parquet.ParquetTimestampUtils.decodeInt64Timestamp) ParquetTypeUtils.getShortDecimalValue(io.trino.parquet.ParquetTypeUtils.getShortDecimalValue) TupleDomain(io.trino.spi.predicate.TupleDomain) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TINYINT(io.trino.spi.type.TinyintType.TINYINT) VarcharType(io.trino.spi.type.VarcharType) ArrayList(java.util.ArrayList) Range(io.trino.spi.predicate.Range) VerifyException(com.google.common.base.VerifyException) TimestampLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) LogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation) Slice(io.airlift.slice.Slice) TimestampLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) DecimalType(io.trino.spi.type.DecimalType) TimestampType(io.trino.spi.type.TimestampType) Binary(org.apache.parquet.io.api.Binary) Int128(io.trino.spi.type.Int128)

Aggregations

VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)1 VerifyException (com.google.common.base.VerifyException)1 ImmutableList (com.google.common.collect.ImmutableList)1 Slice (io.airlift.slice.Slice)1 Slices (io.airlift.slice.Slices)1 DictionaryPage (io.trino.parquet.DictionaryPage)1 ParquetCorruptionException (io.trino.parquet.ParquetCorruptionException)1 ParquetDataSourceId (io.trino.parquet.ParquetDataSourceId)1 ParquetTimestampUtils.decodeInt64Timestamp (io.trino.parquet.ParquetTimestampUtils.decodeInt64Timestamp)1 ParquetTimestampUtils.decodeInt96Timestamp (io.trino.parquet.ParquetTimestampUtils.decodeInt96Timestamp)1 ParquetTypeUtils.getShortDecimalValue (io.trino.parquet.ParquetTypeUtils.getShortDecimalValue)1 RichColumnDescriptor (io.trino.parquet.RichColumnDescriptor)1 Dictionary (io.trino.parquet.dictionary.Dictionary)1 PredicateUtils.isStatisticsOverflow (io.trino.parquet.predicate.PredicateUtils.isStatisticsOverflow)1 TrinoTimestampEncoder (io.trino.plugin.base.type.TrinoTimestampEncoder)1 TrinoTimestampEncoderFactory.createTimestampEncoder (io.trino.plugin.base.type.TrinoTimestampEncoderFactory.createTimestampEncoder)1 Domain (io.trino.spi.predicate.Domain)1 Range (io.trino.spi.predicate.Range)1 TupleDomain (io.trino.spi.predicate.TupleDomain)1