Search in sources :

Example 6 with IntegerStatistics

use of com.facebook.presto.orc.metadata.statistics.IntegerStatistics in project presto by prestodb.

the class TestOrcBloomFilters method testMatches.

@Test
public // simulate query on a 2 columns where 1 is used as part of the where, with and without bloom filter
void testMatches() {
    // stripe column
    Domain testingColumnHandleDomain = Domain.singleValue(BIGINT, 1234L);
    TupleDomain.ColumnDomain<String> column0 = new TupleDomain.ColumnDomain<>(COLUMN_0, testingColumnHandleDomain);
    // predicate consist of the bigint_0 = 1234
    TupleDomain<String> effectivePredicate = TupleDomain.fromColumnDomains(Optional.of(ImmutableList.of(column0)));
    TupleDomain<String> emptyEffectivePredicate = TupleDomain.all();
    // predicate column references
    List<ColumnReference<String>> columnReferences = ImmutableList.<ColumnReference<String>>builder().add(new ColumnReference<>(COLUMN_0, 0, BIGINT)).add(new ColumnReference<>(COLUMN_1, 1, BIGINT)).build();
    TupleDomainOrcPredicate<String> predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences, true, Optional.empty());
    TupleDomainOrcPredicate<String> emptyPredicate = new TupleDomainOrcPredicate<>(emptyEffectivePredicate, columnReferences, true, Optional.empty());
    // assemble a matching and a non-matching bloom filter
    HiveBloomFilter hiveBloomFilter = new HiveBloomFilter(new BloomFilter(1000, 0.01));
    OrcProto.BloomFilter emptyOrcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
    hiveBloomFilter.addLong(1234);
    OrcProto.BloomFilter orcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
    Map<Integer, ColumnStatistics> matchingStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, toHiveBloomFilter(orcBloomFilter), new IntegerStatistics(10L, 2000L, null)));
    Map<Integer, ColumnStatistics> nonMatchingStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, toHiveBloomFilter(emptyOrcBloomFilter), new IntegerStatistics(10L, 2000L, null)));
    Map<Integer, ColumnStatistics> withoutBloomFilterStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, null, new IntegerStatistics(10L, 2000L, null)));
    assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex));
    assertTrue(predicate.matches(1L, withoutBloomFilterStatisticsByColumnIndex));
    assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex));
    assertTrue(emptyPredicate.matches(1L, matchingStatisticsByColumnIndex));
}
Also used : IntegerColumnStatistics(com.facebook.presto.orc.metadata.statistics.IntegerColumnStatistics) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) OrcProto(com.facebook.presto.orc.proto.OrcProto) IntegerColumnStatistics(com.facebook.presto.orc.metadata.statistics.IntegerColumnStatistics) HiveBloomFilter(com.facebook.presto.orc.metadata.statistics.HiveBloomFilter) TupleDomainOrcPredicate.checkInBloomFilter(com.facebook.presto.orc.TupleDomainOrcPredicate.checkInBloomFilter) BloomFilter(com.facebook.presto.orc.metadata.statistics.BloomFilter) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) HiveBloomFilter(com.facebook.presto.orc.metadata.statistics.HiveBloomFilter) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) ColumnReference(com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference) IntegerStatistics(com.facebook.presto.orc.metadata.statistics.IntegerStatistics) Test(org.testng.annotations.Test)

Example 7 with IntegerStatistics

use of com.facebook.presto.orc.metadata.statistics.IntegerStatistics in project presto by prestodb.

the class OrcWriteValidation method validateColumnStatisticsEquivalent.

private static void validateColumnStatisticsEquivalent(OrcDataSourceId orcDataSourceId, String name, ColumnStatistics actualColumnStatistics, ColumnStatistics expectedColumnStatistics) throws OrcCorruptionException {
    requireNonNull(name, "name is null");
    requireNonNull(actualColumnStatistics, "actualColumnStatistics is null");
    requireNonNull(expectedColumnStatistics, "expectedColumnStatistics is null");
    if (actualColumnStatistics.getNumberOfValues() != expectedColumnStatistics.getNumberOfValues()) {
        String failureMessage = format("Actual Values %s does not match expected values %s", actualColumnStatistics, expectedColumnStatistics);
        throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: %s in %s statistics", failureMessage, name);
    }
    if (!Objects.equals(actualColumnStatistics.getBooleanStatistics(), expectedColumnStatistics.getBooleanStatistics())) {
        throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected boolean counts in %s statistics", name);
    }
    if (!Objects.equals(actualColumnStatistics.getIntegerStatistics(), expectedColumnStatistics.getIntegerStatistics())) {
        IntegerStatistics actualIntegerStatistics = actualColumnStatistics.getIntegerStatistics();
        IntegerStatistics expectedIntegerStatistics = expectedColumnStatistics.getIntegerStatistics();
        // Ignore the validation of sum if one of the two sums is null.
        if (actualIntegerStatistics == null || expectedIntegerStatistics == null || !Objects.equals(actualIntegerStatistics.getMin(), expectedIntegerStatistics.getMin()) || !Objects.equals(actualIntegerStatistics.getMax(), expectedIntegerStatistics.getMax()) || (actualIntegerStatistics.getSum() != null && expectedIntegerStatistics.getSum() != null && !Objects.equals(actualIntegerStatistics.getSum(), expectedIntegerStatistics.getSum()))) {
            throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected integer range in %s statistics", name);
        }
    }
    if (!Objects.equals(actualColumnStatistics.getDoubleStatistics(), expectedColumnStatistics.getDoubleStatistics())) {
        throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected double range in %s statistics", name);
    }
    StringStatistics expectedStringStatistics = expectedColumnStatistics.getStringStatistics();
    if (expectedStringStatistics != null) {
        expectedStringStatistics = new StringStatistics(minStringTruncateToValidRange(expectedStringStatistics.getMin(), HiveWriterVersion.ORC_HIVE_8732), maxStringTruncateToValidRange(expectedStringStatistics.getMax(), HiveWriterVersion.ORC_HIVE_8732), expectedStringStatistics.getSum());
    }
    StringStatistics actualStringStatistics = actualColumnStatistics.getStringStatistics();
    if (!Objects.equals(actualColumnStatistics.getStringStatistics(), expectedStringStatistics) && expectedStringStatistics != null) {
        // Merging row group stats can produce nulls given we have string stats limit.
        if (actualStringStatistics == null || actualStringStatistics.getSum() != expectedStringStatistics.getSum() || (expectedStringStatistics.getMax() != null && !Objects.equals(actualStringStatistics.getMax(), expectedStringStatistics.getMax())) || (expectedStringStatistics.getMin() != null && !Objects.equals(actualStringStatistics.getMin(), expectedStringStatistics.getMin()))) {
            throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected string range in %s statistics", name);
        }
    }
    if (!Objects.equals(actualColumnStatistics.getDateStatistics(), expectedColumnStatistics.getDateStatistics())) {
        throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected date range in %s statistics", name);
    }
    if (!Objects.equals(actualColumnStatistics.getDecimalStatistics(), expectedColumnStatistics.getDecimalStatistics())) {
        throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected decimal range in %s statistics", name);
    }
    if (!Objects.equals(actualColumnStatistics.getBloomFilter(), expectedColumnStatistics.getBloomFilter())) {
        throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected bloom filter in %s statistics", name);
    }
}
Also used : StringStatistics(com.facebook.presto.orc.metadata.statistics.StringStatistics) IntegerStatistics(com.facebook.presto.orc.metadata.statistics.IntegerStatistics)

Aggregations

IntegerStatistics (com.facebook.presto.orc.metadata.statistics.IntegerStatistics)7 Test (org.testng.annotations.Test)5 ImmutableList (com.google.common.collect.ImmutableList)4 Slice (io.airlift.slice.Slice)4 Page (com.facebook.presto.common.Page)3 RuntimeStats (com.facebook.presto.common.RuntimeStats)3 Block (com.facebook.presto.common.block.Block)3 SqlFunctionProperties (com.facebook.presto.common.function.SqlFunctionProperties)3 FilterFunction (com.facebook.presto.common.predicate.FilterFunction)3 Predicate (com.facebook.presto.common.relation.Predicate)3 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)3 VARCHAR (com.facebook.presto.common.type.VarcharType.VARCHAR)3 NO_ENCRYPTION (com.facebook.presto.orc.DwrfEncryptionProvider.NO_ENCRYPTION)3 NOOP_ORC_AGGREGATED_MEMORY_CONTEXT (com.facebook.presto.orc.NoopOrcAggregatedMemoryContext.NOOP_ORC_AGGREGATED_MEMORY_CONTEXT)3 ORC (com.facebook.presto.orc.OrcEncoding.ORC)3 BATCH_SIZE_GROWTH_FACTOR (com.facebook.presto.orc.OrcReader.BATCH_SIZE_GROWTH_FACTOR)3 INITIAL_BATCH_SIZE (com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE)3 MAX_BATCH_SIZE (com.facebook.presto.orc.OrcReader.MAX_BATCH_SIZE)3 ORC_12 (com.facebook.presto.orc.OrcTester.Format.ORC_12)3 MAX_BLOCK_SIZE (com.facebook.presto.orc.OrcTester.MAX_BLOCK_SIZE)3