Search in sources :

Example 1 with DecimalStatistics

use of io.trino.orc.metadata.statistics.DecimalStatistics in project trino by trinodb.

the class OrcMetadataReader method toDecimalStatistics.

private static DecimalStatistics toDecimalStatistics(OrcProto.DecimalStatistics decimalStatistics) {
    BigDecimal minimum = decimalStatistics.hasMinimum() ? new BigDecimal(decimalStatistics.getMinimum()) : null;
    BigDecimal maximum = decimalStatistics.hasMaximum() ? new BigDecimal(decimalStatistics.getMaximum()) : null;
    // could be long (16 bytes) or short (8 bytes); use short for estimation
    return new DecimalStatistics(minimum, maximum, SHORT_DECIMAL_VALUE_BYTES);
}
Also used : DecimalStatistics(io.trino.orc.metadata.statistics.DecimalStatistics) BigDecimal(java.math.BigDecimal)

Example 2 with DecimalStatistics

use of io.trino.orc.metadata.statistics.DecimalStatistics in project trino by trinodb.

the class IcebergOrcFileWriter method toIcebergMinMax.

private static Optional<IcebergMinMax> toIcebergMinMax(ColumnStatistics orcColumnStats, org.apache.iceberg.types.Type icebergType, MetricsModes.MetricsMode metricsModes) {
    BooleanStatistics booleanStatistics = orcColumnStats.getBooleanStatistics();
    if (booleanStatistics != null) {
        boolean hasTrueValues = booleanStatistics.getTrueValueCount() != 0;
        boolean hasFalseValues = orcColumnStats.getNumberOfValues() != booleanStatistics.getTrueValueCount();
        return Optional.of(new IcebergMinMax(icebergType, !hasFalseValues, hasTrueValues, metricsModes));
    }
    IntegerStatistics integerStatistics = orcColumnStats.getIntegerStatistics();
    if (integerStatistics != null) {
        Object min = integerStatistics.getMin();
        Object max = integerStatistics.getMax();
        if (min == null || max == null) {
            return Optional.empty();
        }
        if (icebergType.typeId() == org.apache.iceberg.types.Type.TypeID.INTEGER) {
            min = toIntExact((Long) min);
            max = toIntExact((Long) max);
        }
        return Optional.of(new IcebergMinMax(icebergType, min, max, metricsModes));
    }
    DoubleStatistics doubleStatistics = orcColumnStats.getDoubleStatistics();
    if (doubleStatistics != null) {
        Object min = doubleStatistics.getMin();
        Object max = doubleStatistics.getMax();
        if (min == null || max == null) {
            return Optional.empty();
        }
        if (icebergType.typeId() == org.apache.iceberg.types.Type.TypeID.FLOAT) {
            min = ((Double) min).floatValue();
            max = ((Double) max).floatValue();
        }
        return Optional.of(new IcebergMinMax(icebergType, min, max, metricsModes));
    }
    StringStatistics stringStatistics = orcColumnStats.getStringStatistics();
    if (stringStatistics != null) {
        Slice min = stringStatistics.getMin();
        Slice max = stringStatistics.getMax();
        if (min == null || max == null) {
            return Optional.empty();
        }
        return Optional.of(new IcebergMinMax(icebergType, min.toStringUtf8(), max.toStringUtf8(), metricsModes));
    }
    DateStatistics dateStatistics = orcColumnStats.getDateStatistics();
    if (dateStatistics != null) {
        Integer min = dateStatistics.getMin();
        Integer max = dateStatistics.getMax();
        if (min == null || max == null) {
            return Optional.empty();
        }
        return Optional.of(new IcebergMinMax(icebergType, min, max, metricsModes));
    }
    DecimalStatistics decimalStatistics = orcColumnStats.getDecimalStatistics();
    if (decimalStatistics != null) {
        BigDecimal min = decimalStatistics.getMin();
        BigDecimal max = decimalStatistics.getMax();
        if (min == null || max == null) {
            return Optional.empty();
        }
        min = min.setScale(((Types.DecimalType) icebergType).scale());
        max = max.setScale(((Types.DecimalType) icebergType).scale());
        return Optional.of(new IcebergMinMax(icebergType, min, max, metricsModes));
    }
    TimestampStatistics timestampStatistics = orcColumnStats.getTimestampStatistics();
    if (timestampStatistics != null) {
        Long min = timestampStatistics.getMin();
        Long max = timestampStatistics.getMax();
        if (min == null || max == null) {
            return Optional.empty();
        }
        // We are appending 999 microseconds to account for the fact that Trino ORC writer truncates timestamps.
        return Optional.of(new IcebergMinMax(icebergType, min * MICROSECONDS_PER_MILLISECOND, (max * MICROSECONDS_PER_MILLISECOND) + (MICROSECONDS_PER_MILLISECOND - 1), metricsModes));
    }
    return Optional.empty();
}
Also used : DateStatistics(io.trino.orc.metadata.statistics.DateStatistics) TimestampStatistics(io.trino.orc.metadata.statistics.TimestampStatistics) BigDecimal(java.math.BigDecimal) StringStatistics(io.trino.orc.metadata.statistics.StringStatistics) DecimalStatistics(io.trino.orc.metadata.statistics.DecimalStatistics) DoubleStatistics(io.trino.orc.metadata.statistics.DoubleStatistics) Slice(io.airlift.slice.Slice) BooleanStatistics(io.trino.orc.metadata.statistics.BooleanStatistics) IntegerStatistics(io.trino.orc.metadata.statistics.IntegerStatistics)

Example 3 with DecimalStatistics

use of io.trino.orc.metadata.statistics.DecimalStatistics in project trino by trinodb.

the class TestTupleDomainOrcPredicate method decimalColumnStats.

private static ColumnStatistics decimalColumnStats(Long numberOfValues, String minimum, String maximum) {
    BigDecimal minimumDecimal = minimum == null ? null : new BigDecimal(minimum);
    BigDecimal maximumDecimal = maximum == null ? null : new BigDecimal(maximum);
    return new ColumnStatistics(numberOfValues, 9L, null, null, null, null, null, null, new DecimalStatistics(minimumDecimal, maximumDecimal, SHORT_DECIMAL_VALUE_BYTES), null, null);
}
Also used : ColumnStatistics(io.trino.orc.metadata.statistics.ColumnStatistics) DecimalStatistics(io.trino.orc.metadata.statistics.DecimalStatistics) BigDecimal(java.math.BigDecimal)

Aggregations

DecimalStatistics (io.trino.orc.metadata.statistics.DecimalStatistics)3 BigDecimal (java.math.BigDecimal)3 Slice (io.airlift.slice.Slice)1 BooleanStatistics (io.trino.orc.metadata.statistics.BooleanStatistics)1 ColumnStatistics (io.trino.orc.metadata.statistics.ColumnStatistics)1 DateStatistics (io.trino.orc.metadata.statistics.DateStatistics)1 DoubleStatistics (io.trino.orc.metadata.statistics.DoubleStatistics)1 IntegerStatistics (io.trino.orc.metadata.statistics.IntegerStatistics)1 StringStatistics (io.trino.orc.metadata.statistics.StringStatistics)1 TimestampStatistics (io.trino.orc.metadata.statistics.TimestampStatistics)1