use of com.facebook.presto.orc.metadata.statistics.StringStatistics in project presto by prestodb.
the class TestOrcMetadataReader method testToStringStatistics.
@Test
public void testToStringStatistics() {
// ORIGINAL version only produces stats at the row group level
assertNull(OrcMetadataReader.toStringStatistics(ORIGINAL, OrcProto.StringStatistics.newBuilder().setMinimum("ant").setMaximum("cat").setSum(44).build(), false));
// having only sum should work for current version
for (boolean isRowGroup : ImmutableList.of(true, false)) {
assertEquals(OrcMetadataReader.toStringStatistics(ORC_HIVE_8732, OrcProto.StringStatistics.newBuilder().setSum(45).build(), isRowGroup), new StringStatistics(null, null, 45));
}
// and the ORIGINAL version row group stats (but not rolled up stats)
assertEquals(OrcMetadataReader.toStringStatistics(ORIGINAL, OrcProto.StringStatistics.newBuilder().setSum(45).build(), true), new StringStatistics(null, null, 45));
// having only a min or max should work
assertEquals(OrcMetadataReader.toStringStatistics(ORC_HIVE_8732, OrcProto.StringStatistics.newBuilder().setMinimum("ant").build(), true), new StringStatistics(utf8Slice("ant"), null, 0));
assertEquals(OrcMetadataReader.toStringStatistics(ORC_HIVE_8732, OrcProto.StringStatistics.newBuilder().setMaximum("cat").build(), true), new StringStatistics(null, utf8Slice("cat"), 0));
// normal full stat
assertEquals(OrcMetadataReader.toStringStatistics(ORC_HIVE_8732, OrcProto.StringStatistics.newBuilder().setMinimum("ant").setMaximum("cat").setSum(79).build(), true), new StringStatistics(utf8Slice("ant"), utf8Slice("cat"), 79));
for (Slice prefix : ALL_UTF8_SEQUENCES) {
for (int testCodePoint : TEST_CODE_POINTS) {
Slice codePoint = codePointToUtf8(testCodePoint);
for (Slice suffix : ALL_UTF8_SEQUENCES) {
Slice testValue = concatSlice(prefix, codePoint, suffix);
testStringStatisticsTruncation(testValue, ORIGINAL);
testStringStatisticsTruncation(testValue, ORC_HIVE_8732);
}
}
}
}
use of com.facebook.presto.orc.metadata.statistics.StringStatistics in project presto by prestodb.
the class TestDwrfMetadataReader method testToStringStatistics.
@Test
public void testToStringStatistics() {
// ORIGINAL version only produces stats at the row group level
assertNull(DwrfMetadataReader.toStringStatistics(HiveWriterVersion.ORIGINAL, DwrfProto.StringStatistics.newBuilder().setMinimum("ant").setMaximum("cat").setSum(44).build(), false));
// having only sum should work for current version
for (boolean isRowGroup : ImmutableList.of(true, false)) {
assertEquals(DwrfMetadataReader.toStringStatistics(HiveWriterVersion.ORC_HIVE_8732, DwrfProto.StringStatistics.newBuilder().setSum(45).build(), isRowGroup), new StringStatistics(null, null, 45));
}
// and the ORIGINAL version row group stats (but not rolled up stats)
assertEquals(DwrfMetadataReader.toStringStatistics(HiveWriterVersion.ORIGINAL, DwrfProto.StringStatistics.newBuilder().setSum(45).build(), true), new StringStatistics(null, null, 45));
// having only a min or max should work
assertEquals(DwrfMetadataReader.toStringStatistics(HiveWriterVersion.ORC_HIVE_8732, DwrfProto.StringStatistics.newBuilder().setMinimum("ant").build(), true), new StringStatistics(Slices.utf8Slice("ant"), null, 0));
assertEquals(DwrfMetadataReader.toStringStatistics(HiveWriterVersion.ORC_HIVE_8732, DwrfProto.StringStatistics.newBuilder().setMaximum("cat").build(), true), new StringStatistics(null, Slices.utf8Slice("cat"), 0));
// normal full stat
assertEquals(DwrfMetadataReader.toStringStatistics(HiveWriterVersion.ORC_HIVE_8732, DwrfProto.StringStatistics.newBuilder().setMinimum("ant").setMaximum("cat").setSum(79).build(), true), new StringStatistics(Slices.utf8Slice("ant"), Slices.utf8Slice("cat"), 79));
for (Slice prefix : ALL_UTF8_SEQUENCES) {
for (int testCodePoint : TEST_CODE_POINTS) {
Slice codePoint = codePointToUtf8(testCodePoint);
for (Slice suffix : ALL_UTF8_SEQUENCES) {
Slice testValue = concatSlice(prefix, codePoint, suffix);
testStringStatisticsTruncation(testValue, HiveWriterVersion.ORIGINAL);
testStringStatisticsTruncation(testValue, HiveWriterVersion.ORC_HIVE_8732);
}
}
}
}
use of com.facebook.presto.orc.metadata.statistics.StringStatistics in project presto by prestodb.
the class OrcMetadataReader method toStringStatistics.
static StringStatistics toStringStatistics(HiveWriterVersion hiveWriterVersion, OrcProto.StringStatistics stringStatistics, boolean isRowGroup) {
if (hiveWriterVersion == ORIGINAL && !isRowGroup) {
return null;
}
Slice maximum = stringStatistics.hasMaximum() ? maxStringTruncateToValidRange(byteStringToSlice(stringStatistics.getMaximumBytes()), hiveWriterVersion) : null;
Slice minimum = stringStatistics.hasMinimum() ? minStringTruncateToValidRange(byteStringToSlice(stringStatistics.getMinimumBytes()), hiveWriterVersion) : null;
long sum = stringStatistics.hasSum() ? stringStatistics.getSum() : 0;
return new StringStatistics(minimum, maximum, sum);
}
use of com.facebook.presto.orc.metadata.statistics.StringStatistics in project presto by prestodb.
the class IcebergOrcFileWriter method toIcebergMinMax.
private static Optional<IcebergMinMax> toIcebergMinMax(ColumnStatistics orcColumnStats, org.apache.iceberg.types.Type icebergType) {
IntegerStatistics integerStatistics = orcColumnStats.getIntegerStatistics();
if (integerStatistics != null) {
Object min = integerStatistics.getMin();
Object max = integerStatistics.getMax();
if (min == null || max == null) {
return Optional.empty();
}
if (icebergType.typeId() == org.apache.iceberg.types.Type.TypeID.INTEGER) {
min = toIntExact((Long) min);
max = toIntExact((Long) max);
}
return Optional.of(new IcebergMinMax(icebergType, min, max));
}
DoubleStatistics doubleStatistics = orcColumnStats.getDoubleStatistics();
if (doubleStatistics != null) {
Object min = doubleStatistics.getMin();
Object max = doubleStatistics.getMax();
if (min == null || max == null) {
return Optional.empty();
}
if (icebergType.typeId() == org.apache.iceberg.types.Type.TypeID.FLOAT) {
min = ((Double) min).floatValue();
max = ((Double) max).floatValue();
}
return Optional.of(new IcebergMinMax(icebergType, min, max));
}
StringStatistics stringStatistics = orcColumnStats.getStringStatistics();
if (stringStatistics != null) {
Slice min = stringStatistics.getMin();
Slice max = stringStatistics.getMax();
if (min == null || max == null) {
return Optional.empty();
}
return Optional.of(new IcebergMinMax(icebergType, min.toStringUtf8(), max.toStringUtf8()));
}
DateStatistics dateStatistics = orcColumnStats.getDateStatistics();
if (dateStatistics != null) {
Integer min = dateStatistics.getMin();
Integer max = dateStatistics.getMax();
if (min == null || max == null) {
return Optional.empty();
}
return Optional.of(new IcebergMinMax(icebergType, min, max));
}
DecimalStatistics decimalStatistics = orcColumnStats.getDecimalStatistics();
if (decimalStatistics != null) {
BigDecimal min = decimalStatistics.getMin();
BigDecimal max = decimalStatistics.getMax();
if (min == null || max == null) {
return Optional.empty();
}
min = min.setScale(((DecimalType) icebergType).scale());
max = max.setScale(((DecimalType) icebergType).scale());
return Optional.of(new IcebergMinMax(icebergType, min, max));
}
return Optional.empty();
}
use of com.facebook.presto.orc.metadata.statistics.StringStatistics in project presto by prestodb.
the class TestTupleDomainOrcPredicate method stringColumnStats.
private static ColumnStatistics stringColumnStats(Long numberOfValues, String minimum, String maximum) {
Slice minimumSlice = minimum == null ? null : utf8Slice(minimum);
Slice maximumSlice = maximum == null ? null : utf8Slice(maximum);
// sum and minAverageValueSizeInBytes are not used in this test; they could be arbitrary numbers
return new StringColumnStatistics(numberOfValues, null, new StringStatistics(minimumSlice, maximumSlice, 100L));
}
Aggregations