use of io.trino.orc.metadata.statistics.DoubleStatistics in project trino by trinodb.
the class IcebergOrcFileWriter method toIcebergMinMax.
private static Optional<IcebergMinMax> toIcebergMinMax(ColumnStatistics orcColumnStats, org.apache.iceberg.types.Type icebergType, MetricsModes.MetricsMode metricsModes) {
BooleanStatistics booleanStatistics = orcColumnStats.getBooleanStatistics();
if (booleanStatistics != null) {
boolean hasTrueValues = booleanStatistics.getTrueValueCount() != 0;
boolean hasFalseValues = orcColumnStats.getNumberOfValues() != booleanStatistics.getTrueValueCount();
return Optional.of(new IcebergMinMax(icebergType, !hasFalseValues, hasTrueValues, metricsModes));
}
IntegerStatistics integerStatistics = orcColumnStats.getIntegerStatistics();
if (integerStatistics != null) {
Object min = integerStatistics.getMin();
Object max = integerStatistics.getMax();
if (min == null || max == null) {
return Optional.empty();
}
if (icebergType.typeId() == org.apache.iceberg.types.Type.TypeID.INTEGER) {
min = toIntExact((Long) min);
max = toIntExact((Long) max);
}
return Optional.of(new IcebergMinMax(icebergType, min, max, metricsModes));
}
DoubleStatistics doubleStatistics = orcColumnStats.getDoubleStatistics();
if (doubleStatistics != null) {
Object min = doubleStatistics.getMin();
Object max = doubleStatistics.getMax();
if (min == null || max == null) {
return Optional.empty();
}
if (icebergType.typeId() == org.apache.iceberg.types.Type.TypeID.FLOAT) {
min = ((Double) min).floatValue();
max = ((Double) max).floatValue();
}
return Optional.of(new IcebergMinMax(icebergType, min, max, metricsModes));
}
StringStatistics stringStatistics = orcColumnStats.getStringStatistics();
if (stringStatistics != null) {
Slice min = stringStatistics.getMin();
Slice max = stringStatistics.getMax();
if (min == null || max == null) {
return Optional.empty();
}
return Optional.of(new IcebergMinMax(icebergType, min.toStringUtf8(), max.toStringUtf8(), metricsModes));
}
DateStatistics dateStatistics = orcColumnStats.getDateStatistics();
if (dateStatistics != null) {
Integer min = dateStatistics.getMin();
Integer max = dateStatistics.getMax();
if (min == null || max == null) {
return Optional.empty();
}
return Optional.of(new IcebergMinMax(icebergType, min, max, metricsModes));
}
DecimalStatistics decimalStatistics = orcColumnStats.getDecimalStatistics();
if (decimalStatistics != null) {
BigDecimal min = decimalStatistics.getMin();
BigDecimal max = decimalStatistics.getMax();
if (min == null || max == null) {
return Optional.empty();
}
min = min.setScale(((Types.DecimalType) icebergType).scale());
max = max.setScale(((Types.DecimalType) icebergType).scale());
return Optional.of(new IcebergMinMax(icebergType, min, max, metricsModes));
}
TimestampStatistics timestampStatistics = orcColumnStats.getTimestampStatistics();
if (timestampStatistics != null) {
Long min = timestampStatistics.getMin();
Long max = timestampStatistics.getMax();
if (min == null || max == null) {
return Optional.empty();
}
// We are appending 999 microseconds to account for the fact that Trino ORC writer truncates timestamps.
return Optional.of(new IcebergMinMax(icebergType, min * MICROSECONDS_PER_MILLISECOND, (max * MICROSECONDS_PER_MILLISECOND) + (MICROSECONDS_PER_MILLISECOND - 1), metricsModes));
}
return Optional.empty();
}
Aggregations