use of com.facebook.presto.orc.metadata.statistics.IntegerStatistics in project presto by prestodb.
the class TestOrcBloomFilters method testMatches.
@Test
public // simulate query on a 2 columns where 1 is used as part of the where, with and without bloom filter
void testMatches() {
// stripe column
Domain testingColumnHandleDomain = Domain.singleValue(BIGINT, 1234L);
TupleDomain.ColumnDomain<String> column0 = new TupleDomain.ColumnDomain<>(COLUMN_0, testingColumnHandleDomain);
// predicate consist of the bigint_0 = 1234
TupleDomain<String> effectivePredicate = TupleDomain.fromColumnDomains(Optional.of(ImmutableList.of(column0)));
TupleDomain<String> emptyEffectivePredicate = TupleDomain.all();
// predicate column references
List<ColumnReference<String>> columnReferences = ImmutableList.<ColumnReference<String>>builder().add(new ColumnReference<>(COLUMN_0, 0, BIGINT)).add(new ColumnReference<>(COLUMN_1, 1, BIGINT)).build();
TupleDomainOrcPredicate<String> predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences, true, Optional.empty());
TupleDomainOrcPredicate<String> emptyPredicate = new TupleDomainOrcPredicate<>(emptyEffectivePredicate, columnReferences, true, Optional.empty());
// assemble a matching and a non-matching bloom filter
HiveBloomFilter hiveBloomFilter = new HiveBloomFilter(new BloomFilter(1000, 0.01));
OrcProto.BloomFilter emptyOrcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
hiveBloomFilter.addLong(1234);
OrcProto.BloomFilter orcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
Map<Integer, ColumnStatistics> matchingStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, toHiveBloomFilter(orcBloomFilter), new IntegerStatistics(10L, 2000L, null)));
Map<Integer, ColumnStatistics> nonMatchingStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, toHiveBloomFilter(emptyOrcBloomFilter), new IntegerStatistics(10L, 2000L, null)));
Map<Integer, ColumnStatistics> withoutBloomFilterStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, null, new IntegerStatistics(10L, 2000L, null)));
assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex));
assertTrue(predicate.matches(1L, withoutBloomFilterStatisticsByColumnIndex));
assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex));
assertTrue(emptyPredicate.matches(1L, matchingStatisticsByColumnIndex));
}
use of com.facebook.presto.orc.metadata.statistics.IntegerStatistics in project presto by prestodb.
the class OrcWriteValidation method validateColumnStatisticsEquivalent.
private static void validateColumnStatisticsEquivalent(OrcDataSourceId orcDataSourceId, String name, ColumnStatistics actualColumnStatistics, ColumnStatistics expectedColumnStatistics) throws OrcCorruptionException {
requireNonNull(name, "name is null");
requireNonNull(actualColumnStatistics, "actualColumnStatistics is null");
requireNonNull(expectedColumnStatistics, "expectedColumnStatistics is null");
if (actualColumnStatistics.getNumberOfValues() != expectedColumnStatistics.getNumberOfValues()) {
String failureMessage = format("Actual Values %s does not match expected values %s", actualColumnStatistics, expectedColumnStatistics);
throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: %s in %s statistics", failureMessage, name);
}
if (!Objects.equals(actualColumnStatistics.getBooleanStatistics(), expectedColumnStatistics.getBooleanStatistics())) {
throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected boolean counts in %s statistics", name);
}
if (!Objects.equals(actualColumnStatistics.getIntegerStatistics(), expectedColumnStatistics.getIntegerStatistics())) {
IntegerStatistics actualIntegerStatistics = actualColumnStatistics.getIntegerStatistics();
IntegerStatistics expectedIntegerStatistics = expectedColumnStatistics.getIntegerStatistics();
// Ignore the validation of sum if one of the two sums is null.
if (actualIntegerStatistics == null || expectedIntegerStatistics == null || !Objects.equals(actualIntegerStatistics.getMin(), expectedIntegerStatistics.getMin()) || !Objects.equals(actualIntegerStatistics.getMax(), expectedIntegerStatistics.getMax()) || (actualIntegerStatistics.getSum() != null && expectedIntegerStatistics.getSum() != null && !Objects.equals(actualIntegerStatistics.getSum(), expectedIntegerStatistics.getSum()))) {
throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected integer range in %s statistics", name);
}
}
if (!Objects.equals(actualColumnStatistics.getDoubleStatistics(), expectedColumnStatistics.getDoubleStatistics())) {
throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected double range in %s statistics", name);
}
StringStatistics expectedStringStatistics = expectedColumnStatistics.getStringStatistics();
if (expectedStringStatistics != null) {
expectedStringStatistics = new StringStatistics(minStringTruncateToValidRange(expectedStringStatistics.getMin(), HiveWriterVersion.ORC_HIVE_8732), maxStringTruncateToValidRange(expectedStringStatistics.getMax(), HiveWriterVersion.ORC_HIVE_8732), expectedStringStatistics.getSum());
}
StringStatistics actualStringStatistics = actualColumnStatistics.getStringStatistics();
if (!Objects.equals(actualColumnStatistics.getStringStatistics(), expectedStringStatistics) && expectedStringStatistics != null) {
// Merging row group stats can produce nulls given we have string stats limit.
if (actualStringStatistics == null || actualStringStatistics.getSum() != expectedStringStatistics.getSum() || (expectedStringStatistics.getMax() != null && !Objects.equals(actualStringStatistics.getMax(), expectedStringStatistics.getMax())) || (expectedStringStatistics.getMin() != null && !Objects.equals(actualStringStatistics.getMin(), expectedStringStatistics.getMin()))) {
throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected string range in %s statistics", name);
}
}
if (!Objects.equals(actualColumnStatistics.getDateStatistics(), expectedColumnStatistics.getDateStatistics())) {
throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected date range in %s statistics", name);
}
if (!Objects.equals(actualColumnStatistics.getDecimalStatistics(), expectedColumnStatistics.getDecimalStatistics())) {
throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected decimal range in %s statistics", name);
}
if (!Objects.equals(actualColumnStatistics.getBloomFilter(), expectedColumnStatistics.getBloomFilter())) {
throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected bloom filter in %s statistics", name);
}
}
Aggregations