Search in sources :

Example 1 with UTC_KEY

use of io.trino.spi.type.TimeZoneKey.UTC_KEY in project trino by trinodb.

the class TupleDomainOrcPredicate method getDomain.

@VisibleForTesting
public static Domain getDomain(Type type, long rowCount, ColumnStatistics columnStatistics) {
    if (rowCount == 0) {
        return Domain.none(type);
    }
    if (columnStatistics == null) {
        return Domain.all(type);
    }
    if (columnStatistics.hasNumberOfValues() && columnStatistics.getNumberOfValues() == 0) {
        return Domain.onlyNull(type);
    }
    boolean hasNullValue = columnStatistics.getNumberOfValues() != rowCount;
    if (type instanceof TimeType && columnStatistics.getIntegerStatistics() != null) {
        // This is the representation of TIME used by Iceberg
        return createDomain(type, hasNullValue, columnStatistics.getIntegerStatistics(), value -> ((long) value) * Timestamps.PICOSECONDS_PER_MICROSECOND);
    }
    if (type.getJavaType() == boolean.class && columnStatistics.getBooleanStatistics() != null) {
        BooleanStatistics booleanStatistics = columnStatistics.getBooleanStatistics();
        boolean hasTrueValues = (booleanStatistics.getTrueValueCount() != 0);
        boolean hasFalseValues = (columnStatistics.getNumberOfValues() != booleanStatistics.getTrueValueCount());
        if (hasTrueValues && hasFalseValues) {
            return Domain.all(BOOLEAN);
        }
        if (hasTrueValues) {
            return Domain.create(ValueSet.of(BOOLEAN, true), hasNullValue);
        }
        if (hasFalseValues) {
            return Domain.create(ValueSet.of(BOOLEAN, false), hasNullValue);
        }
    } else if (isShortDecimal(type) && columnStatistics.getDecimalStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getDecimalStatistics(), value -> rescale(value, (DecimalType) type).unscaledValue().longValue());
    } else if (isLongDecimal(type) && columnStatistics.getDecimalStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getDecimalStatistics(), value -> Int128.valueOf(rescale(value, (DecimalType) type).unscaledValue()));
    } else if (type instanceof CharType && columnStatistics.getStringStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getStringStatistics(), value -> truncateToLengthAndTrimSpaces(value, type));
    } else if (type instanceof VarcharType && columnStatistics.getStringStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getStringStatistics());
    } else if (type instanceof DateType && columnStatistics.getDateStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getDateStatistics(), value -> (long) value);
    } else if ((type.equals(TIMESTAMP_MILLIS) || type.equals(TIMESTAMP_MICROS)) && columnStatistics.getTimestampStatistics() != null) {
        // upper bound of the domain we create must be adjusted accordingly, to includes the rounded timestamp.
        return createDomain(type, hasNullValue, columnStatistics.getTimestampStatistics(), min -> min * MICROSECONDS_PER_MILLISECOND, max -> (max + 1) * MICROSECONDS_PER_MILLISECOND);
    } else if (type.equals(TIMESTAMP_NANOS) && columnStatistics.getTimestampStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getTimestampStatistics(), min -> new LongTimestamp(min * MICROSECONDS_PER_MILLISECOND, 0), max -> new LongTimestamp((max + 1) * MICROSECONDS_PER_MILLISECOND, 0));
    } else if (type.equals(TIMESTAMP_TZ_MILLIS) && columnStatistics.getTimestampStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getTimestampStatistics(), value -> packDateTimeWithZone(value, UTC_KEY));
    } else if (type.equals(TIMESTAMP_TZ_MICROS) && (columnStatistics.getTimestampStatistics() != null)) {
        return createDomain(type, hasNullValue, columnStatistics.getTimestampStatistics(), min -> LongTimestampWithTimeZone.fromEpochMillisAndFraction(min, 0, UTC_KEY), max -> LongTimestampWithTimeZone.fromEpochMillisAndFraction(max, 999_000_000, UTC_KEY));
    } else if (type.equals(TIMESTAMP_TZ_NANOS) && columnStatistics.getTimestampStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getTimestampStatistics(), min -> LongTimestampWithTimeZone.fromEpochMillisAndFraction(min, 0, UTC_KEY), max -> LongTimestampWithTimeZone.fromEpochMillisAndFraction(max, 999_999_000, UTC_KEY));
    } else if (type.getJavaType() == long.class && columnStatistics.getIntegerStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getIntegerStatistics());
    } else if (type.getJavaType() == double.class && columnStatistics.getDoubleStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getDoubleStatistics());
    } else if (REAL.equals(type) && columnStatistics.getDoubleStatistics() != null) {
        return createDomain(type, hasNullValue, columnStatistics.getDoubleStatistics(), value -> (long) floatToRawIntBits(value.floatValue()));
    }
    return Domain.create(ValueSet.all(type), hasNullValue);
}
Also used : MICROSECONDS_PER_MILLISECOND(io.trino.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND) DateType(io.trino.spi.type.DateType) TIMESTAMP_TZ_NANOS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_NANOS) LongTimestampWithTimeZone(io.trino.spi.type.LongTimestampWithTimeZone) Decimals.rescale(io.trino.spi.type.Decimals.rescale) RangeStatistics(io.trino.orc.metadata.statistics.RangeStatistics) INTEGER(io.trino.spi.type.IntegerType.INTEGER) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) Range(io.trino.spi.predicate.Range) UTC_KEY(io.trino.spi.type.TimeZoneKey.UTC_KEY) Domain(io.trino.spi.predicate.Domain) Collection(java.util.Collection) DateTimeEncoding.packDateTimeWithZone(io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone) ValueSet(io.trino.spi.predicate.ValueSet) TIMESTAMP_NANOS(io.trino.spi.type.TimestampType.TIMESTAMP_NANOS) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) DateTimeEncoding.unpackMillisUtc(io.trino.spi.type.DateTimeEncoding.unpackMillisUtc) DecimalType(io.trino.spi.type.DecimalType) ColumnStatistics(io.trino.orc.metadata.statistics.ColumnStatistics) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) Timestamps(io.trino.spi.type.Timestamps) Slice(io.airlift.slice.Slice) TimeType(io.trino.spi.type.TimeType) Decimals.isLongDecimal(io.trino.spi.type.Decimals.isLongDecimal) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) Type(io.trino.spi.type.Type) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) Float.intBitsToFloat(java.lang.Float.intBitsToFloat) Function(java.util.function.Function) ArrayList(java.util.ArrayList) VarcharType(io.trino.spi.type.VarcharType) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) TIMESTAMP_TZ_MILLIS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS) ImmutableList(com.google.common.collect.ImmutableList) Chars.truncateToLengthAndTrimSpaces(io.trino.spi.type.Chars.truncateToLengthAndTrimSpaces) Objects.requireNonNull(java.util.Objects.requireNonNull) Math.floorDiv(java.lang.Math.floorDiv) Decimals.isShortDecimal(io.trino.spi.type.Decimals.isShortDecimal) Int128(io.trino.spi.type.Int128) TIMESTAMP_TZ_MICROS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MICROS) LongTimestamp(io.trino.spi.type.LongTimestamp) BloomFilter(io.trino.orc.metadata.statistics.BloomFilter) ColumnMetadata(io.trino.orc.metadata.ColumnMetadata) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) TIMESTAMP_MICROS(io.trino.spi.type.TimestampType.TIMESTAMP_MICROS) VarbinaryType(io.trino.spi.type.VarbinaryType) CharType(io.trino.spi.type.CharType) BooleanStatistics(io.trino.orc.metadata.statistics.BooleanStatistics) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TINYINT(io.trino.spi.type.TinyintType.TINYINT) OrcColumnId(io.trino.orc.metadata.OrcColumnId) LongTimestamp(io.trino.spi.type.LongTimestamp) VarcharType(io.trino.spi.type.VarcharType) BooleanStatistics(io.trino.orc.metadata.statistics.BooleanStatistics) DecimalType(io.trino.spi.type.DecimalType) CharType(io.trino.spi.type.CharType) DateType(io.trino.spi.type.DateType) TimeType(io.trino.spi.type.TimeType) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 2 with UTC_KEY

use of io.trino.spi.type.TimeZoneKey.UTC_KEY in project trino by trinodb.

the class TestDeltaLakeCreateTableStatistics method testTimestampMilliSingleRecord.

// int96 timestamp Statistics are only populated if a row group contains a single value
@Test
public void testTimestampMilliSingleRecord() throws IOException {
    String columnName = "t_timestamp";
    DeltaLakeColumnHandle columnHandle = new DeltaLakeColumnHandle(columnName, TIMESTAMP_TZ_MILLIS, REGULAR);
    try (TestTable table = new TestTable("test_timestamp_single_record_", ImmutableList.of(columnName), "VALUES timestamp '2012-10-31 04:00:00.123 America/New_York', timestamp '2012-10-31 01:00:00.123 America/Los_Angeles', null")) {
        List<AddFileEntry> addFileEntries = getAddFileEntries(table.getName());
        AddFileEntry entry = getOnlyElement(addFileEntries);
        assertThat(entry.getStats()).isPresent();
        DeltaLakeFileStatistics fileStatistics = entry.getStats().get();
        assertEquals(fileStatistics.getNumRecords(), Optional.of(3L));
        Function<String, Long> timestampValueConverter = valueString -> {
            ZonedDateTime zonedDateTime = ZonedDateTime.parse(valueString);
            Instant instant = zonedDateTime.toInstant();
            return packDateTimeWithZone(instant.toEpochMilli(), UTC_KEY);
        };
        assertEquals(fileStatistics.getMinColumnValue(columnHandle), Optional.of(timestampValueConverter.apply("2012-10-31T08:00:00.123Z")));
        assertEquals(fileStatistics.getMaxColumnValue(columnHandle), Optional.of(timestampValueConverter.apply("2012-10-31T08:00:00.123Z")));
        assertEquals(fileStatistics.getNullCount(columnName), Optional.of(1L));
    }
}
Also used : DeltaLakeFileStatistics(io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics) ImmutableMap(com.google.common.collect.ImmutableMap) UTC_KEY(io.trino.spi.type.TimeZoneKey.UTC_KEY) ZonedDateTime(java.time.ZonedDateTime) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) DateTimeEncoding.packDateTimeWithZone(io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Instant(java.time.Instant) Function(java.util.function.Function) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) ParquetWriterConfig(io.trino.plugin.hive.parquet.ParquetWriterConfig) List(java.util.List) TIMESTAMP_TZ_MILLIS(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) QueryRunner(io.trino.testing.QueryRunner) Map(java.util.Map) Optional(java.util.Optional) REGULAR(io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR) ZonedDateTime(java.time.ZonedDateTime) Instant(java.time.Instant) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) DeltaLakeFileStatistics(io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics) Test(org.testng.annotations.Test)

Aggregations

ImmutableList (com.google.common.collect.ImmutableList)2 DateTimeEncoding.packDateTimeWithZone (io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone)2 UTC_KEY (io.trino.spi.type.TimeZoneKey.UTC_KEY)2 TIMESTAMP_TZ_MILLIS (io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 MoreObjects.toStringHelper (com.google.common.base.MoreObjects.toStringHelper)1 Verify.verify (com.google.common.base.Verify.verify)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Iterables.getOnlyElement (com.google.common.collect.Iterables.getOnlyElement)1 Slice (io.airlift.slice.Slice)1 ColumnMetadata (io.trino.orc.metadata.ColumnMetadata)1 OrcColumnId (io.trino.orc.metadata.OrcColumnId)1 BloomFilter (io.trino.orc.metadata.statistics.BloomFilter)1 BooleanStatistics (io.trino.orc.metadata.statistics.BooleanStatistics)1 ColumnStatistics (io.trino.orc.metadata.statistics.ColumnStatistics)1 RangeStatistics (io.trino.orc.metadata.statistics.RangeStatistics)1 REGULAR (io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR)1 AddFileEntry (io.trino.plugin.deltalake.transactionlog.AddFileEntry)1 DeltaLakeFileStatistics (io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics)1 ParquetWriterConfig (io.trino.plugin.hive.parquet.ParquetWriterConfig)1