Search in sources :

Example 1 with StringStatistics

use of com.facebook.presto.orc.metadata.statistics.StringStatistics in project presto by prestodb.

the class TestOrcMetadataReader method testToStringStatistics.

@Test
public void testToStringStatistics() {
    // ORIGINAL version only produces stats at the row group level
    assertNull(OrcMetadataReader.toStringStatistics(ORIGINAL, OrcProto.StringStatistics.newBuilder().setMinimum("ant").setMaximum("cat").setSum(44).build(), false));
    // having only sum should work for current version
    for (boolean isRowGroup : ImmutableList.of(true, false)) {
        assertEquals(OrcMetadataReader.toStringStatistics(ORC_HIVE_8732, OrcProto.StringStatistics.newBuilder().setSum(45).build(), isRowGroup), new StringStatistics(null, null, 45));
    }
    // and the ORIGINAL version row group stats (but not rolled up stats)
    assertEquals(OrcMetadataReader.toStringStatistics(ORIGINAL, OrcProto.StringStatistics.newBuilder().setSum(45).build(), true), new StringStatistics(null, null, 45));
    // having only a min or max should work
    assertEquals(OrcMetadataReader.toStringStatistics(ORC_HIVE_8732, OrcProto.StringStatistics.newBuilder().setMinimum("ant").build(), true), new StringStatistics(utf8Slice("ant"), null, 0));
    assertEquals(OrcMetadataReader.toStringStatistics(ORC_HIVE_8732, OrcProto.StringStatistics.newBuilder().setMaximum("cat").build(), true), new StringStatistics(null, utf8Slice("cat"), 0));
    // normal full stat
    assertEquals(OrcMetadataReader.toStringStatistics(ORC_HIVE_8732, OrcProto.StringStatistics.newBuilder().setMinimum("ant").setMaximum("cat").setSum(79).build(), true), new StringStatistics(utf8Slice("ant"), utf8Slice("cat"), 79));
    for (Slice prefix : ALL_UTF8_SEQUENCES) {
        for (int testCodePoint : TEST_CODE_POINTS) {
            Slice codePoint = codePointToUtf8(testCodePoint);
            for (Slice suffix : ALL_UTF8_SEQUENCES) {
                Slice testValue = concatSlice(prefix, codePoint, suffix);
                testStringStatisticsTruncation(testValue, ORIGINAL);
                testStringStatisticsTruncation(testValue, ORC_HIVE_8732);
            }
        }
    }
}
Also used : StringStatistics(com.facebook.presto.orc.metadata.statistics.StringStatistics) Slice(io.airlift.slice.Slice) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Test(org.testng.annotations.Test)

Example 2 with StringStatistics

use of com.facebook.presto.orc.metadata.statistics.StringStatistics in project presto by prestodb.

the class TestDwrfMetadataReader method testToStringStatistics.

@Test
public void testToStringStatistics() {
    // ORIGINAL version only produces stats at the row group level
    assertNull(DwrfMetadataReader.toStringStatistics(HiveWriterVersion.ORIGINAL, DwrfProto.StringStatistics.newBuilder().setMinimum("ant").setMaximum("cat").setSum(44).build(), false));
    // having only sum should work for current version
    for (boolean isRowGroup : ImmutableList.of(true, false)) {
        assertEquals(DwrfMetadataReader.toStringStatistics(HiveWriterVersion.ORC_HIVE_8732, DwrfProto.StringStatistics.newBuilder().setSum(45).build(), isRowGroup), new StringStatistics(null, null, 45));
    }
    // and the ORIGINAL version row group stats (but not rolled up stats)
    assertEquals(DwrfMetadataReader.toStringStatistics(HiveWriterVersion.ORIGINAL, DwrfProto.StringStatistics.newBuilder().setSum(45).build(), true), new StringStatistics(null, null, 45));
    // having only a min or max should work
    assertEquals(DwrfMetadataReader.toStringStatistics(HiveWriterVersion.ORC_HIVE_8732, DwrfProto.StringStatistics.newBuilder().setMinimum("ant").build(), true), new StringStatistics(Slices.utf8Slice("ant"), null, 0));
    assertEquals(DwrfMetadataReader.toStringStatistics(HiveWriterVersion.ORC_HIVE_8732, DwrfProto.StringStatistics.newBuilder().setMaximum("cat").build(), true), new StringStatistics(null, Slices.utf8Slice("cat"), 0));
    // normal full stat
    assertEquals(DwrfMetadataReader.toStringStatistics(HiveWriterVersion.ORC_HIVE_8732, DwrfProto.StringStatistics.newBuilder().setMinimum("ant").setMaximum("cat").setSum(79).build(), true), new StringStatistics(Slices.utf8Slice("ant"), Slices.utf8Slice("cat"), 79));
    for (Slice prefix : ALL_UTF8_SEQUENCES) {
        for (int testCodePoint : TEST_CODE_POINTS) {
            Slice codePoint = codePointToUtf8(testCodePoint);
            for (Slice suffix : ALL_UTF8_SEQUENCES) {
                Slice testValue = concatSlice(prefix, codePoint, suffix);
                testStringStatisticsTruncation(testValue, HiveWriterVersion.ORIGINAL);
                testStringStatisticsTruncation(testValue, HiveWriterVersion.ORC_HIVE_8732);
            }
        }
    }
}
Also used : StringStatistics(com.facebook.presto.orc.metadata.statistics.StringStatistics) Slice(io.airlift.slice.Slice) TestOrcMetadataReader.concatSlice(com.facebook.presto.orc.metadata.TestOrcMetadataReader.concatSlice) Test(org.testng.annotations.Test)

Example 3 with StringStatistics

use of com.facebook.presto.orc.metadata.statistics.StringStatistics in project presto by prestodb.

the class OrcMetadataReader method toStringStatistics.

static StringStatistics toStringStatistics(HiveWriterVersion hiveWriterVersion, OrcProto.StringStatistics stringStatistics, boolean isRowGroup) {
    if (hiveWriterVersion == ORIGINAL && !isRowGroup) {
        return null;
    }
    Slice maximum = stringStatistics.hasMaximum() ? maxStringTruncateToValidRange(byteStringToSlice(stringStatistics.getMaximumBytes()), hiveWriterVersion) : null;
    Slice minimum = stringStatistics.hasMinimum() ? minStringTruncateToValidRange(byteStringToSlice(stringStatistics.getMinimumBytes()), hiveWriterVersion) : null;
    long sum = stringStatistics.hasSum() ? stringStatistics.getSum() : 0;
    return new StringStatistics(minimum, maximum, sum);
}
Also used : StringStatistics(com.facebook.presto.orc.metadata.statistics.StringStatistics) Slice(io.airlift.slice.Slice)

Example 4 with StringStatistics

use of com.facebook.presto.orc.metadata.statistics.StringStatistics in project presto by prestodb.

the class IcebergOrcFileWriter method toIcebergMinMax.

private static Optional<IcebergMinMax> toIcebergMinMax(ColumnStatistics orcColumnStats, org.apache.iceberg.types.Type icebergType) {
    IntegerStatistics integerStatistics = orcColumnStats.getIntegerStatistics();
    if (integerStatistics != null) {
        Object min = integerStatistics.getMin();
        Object max = integerStatistics.getMax();
        if (min == null || max == null) {
            return Optional.empty();
        }
        if (icebergType.typeId() == org.apache.iceberg.types.Type.TypeID.INTEGER) {
            min = toIntExact((Long) min);
            max = toIntExact((Long) max);
        }
        return Optional.of(new IcebergMinMax(icebergType, min, max));
    }
    DoubleStatistics doubleStatistics = orcColumnStats.getDoubleStatistics();
    if (doubleStatistics != null) {
        Object min = doubleStatistics.getMin();
        Object max = doubleStatistics.getMax();
        if (min == null || max == null) {
            return Optional.empty();
        }
        if (icebergType.typeId() == org.apache.iceberg.types.Type.TypeID.FLOAT) {
            min = ((Double) min).floatValue();
            max = ((Double) max).floatValue();
        }
        return Optional.of(new IcebergMinMax(icebergType, min, max));
    }
    StringStatistics stringStatistics = orcColumnStats.getStringStatistics();
    if (stringStatistics != null) {
        Slice min = stringStatistics.getMin();
        Slice max = stringStatistics.getMax();
        if (min == null || max == null) {
            return Optional.empty();
        }
        return Optional.of(new IcebergMinMax(icebergType, min.toStringUtf8(), max.toStringUtf8()));
    }
    DateStatistics dateStatistics = orcColumnStats.getDateStatistics();
    if (dateStatistics != null) {
        Integer min = dateStatistics.getMin();
        Integer max = dateStatistics.getMax();
        if (min == null || max == null) {
            return Optional.empty();
        }
        return Optional.of(new IcebergMinMax(icebergType, min, max));
    }
    DecimalStatistics decimalStatistics = orcColumnStats.getDecimalStatistics();
    if (decimalStatistics != null) {
        BigDecimal min = decimalStatistics.getMin();
        BigDecimal max = decimalStatistics.getMax();
        if (min == null || max == null) {
            return Optional.empty();
        }
        min = min.setScale(((DecimalType) icebergType).scale());
        max = max.setScale(((DecimalType) icebergType).scale());
        return Optional.of(new IcebergMinMax(icebergType, min, max));
    }
    return Optional.empty();
}
Also used : StringStatistics(com.facebook.presto.orc.metadata.statistics.StringStatistics) DecimalStatistics(com.facebook.presto.orc.metadata.statistics.DecimalStatistics) DoubleStatistics(com.facebook.presto.orc.metadata.statistics.DoubleStatistics) Slice(io.airlift.slice.Slice) DateStatistics(com.facebook.presto.orc.metadata.statistics.DateStatistics) DecimalType(org.apache.iceberg.types.Types.DecimalType) BigDecimal(java.math.BigDecimal) IntegerStatistics(com.facebook.presto.orc.metadata.statistics.IntegerStatistics)

Example 5 with StringStatistics

use of com.facebook.presto.orc.metadata.statistics.StringStatistics in project presto by prestodb.

the class TestTupleDomainOrcPredicate method stringColumnStats.

private static ColumnStatistics stringColumnStats(Long numberOfValues, String minimum, String maximum) {
    Slice minimumSlice = minimum == null ? null : utf8Slice(minimum);
    Slice maximumSlice = maximum == null ? null : utf8Slice(maximum);
    // sum and minAverageValueSizeInBytes are not used in this test; they could be arbitrary numbers
    return new StringColumnStatistics(numberOfValues, null, new StringStatistics(minimumSlice, maximumSlice, 100L));
}
Also used : StringStatistics(com.facebook.presto.orc.metadata.statistics.StringStatistics) StringColumnStatistics(com.facebook.presto.orc.metadata.statistics.StringColumnStatistics) Slice(io.airlift.slice.Slice) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice)

Aggregations

StringStatistics (com.facebook.presto.orc.metadata.statistics.StringStatistics)7 Slice (io.airlift.slice.Slice)6 IntegerStatistics (com.facebook.presto.orc.metadata.statistics.IntegerStatistics)2 Slices.utf8Slice (io.airlift.slice.Slices.utf8Slice)2 Test (org.testng.annotations.Test)2 OrcMetadataReader.byteStringToSlice (com.facebook.presto.orc.metadata.OrcMetadataReader.byteStringToSlice)1 TestOrcMetadataReader.concatSlice (com.facebook.presto.orc.metadata.TestOrcMetadataReader.concatSlice)1 DateStatistics (com.facebook.presto.orc.metadata.statistics.DateStatistics)1 DecimalStatistics (com.facebook.presto.orc.metadata.statistics.DecimalStatistics)1 DoubleStatistics (com.facebook.presto.orc.metadata.statistics.DoubleStatistics)1 StringColumnStatistics (com.facebook.presto.orc.metadata.statistics.StringColumnStatistics)1 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 BigDecimal (java.math.BigDecimal)1 DecimalType (org.apache.iceberg.types.Types.DecimalType)1