use of com.facebook.presto.orc.metadata.statistics.IntegerColumnStatistics in project presto by prestodb.
the class TestStripeReader method testRowSize.
@Test
public void testRowSize() {
int numberOfEntries = 10_000;
long numRowsInGroup = MILLION;
IntegerStatistics integerStatistics = new IntegerStatistics(0L, 0L, 0L);
ColumnStatistics intColumnStatistics = new IntegerColumnStatistics(numRowsInGroup, null, integerStatistics);
ColumnStatistics mapColumnStatistics = new ColumnStatistics(numRowsInGroup, null);
ColumnStatistics mapKeyColumnStatistics = new IntegerColumnStatistics(numRowsInGroup * numberOfEntries, null, integerStatistics);
ColumnStatistics mapValueColumnStatistics = new IntegerColumnStatistics(numRowsInGroup * numberOfEntries, null, integerStatistics);
StreamId intStreamId = new StreamId(1, 0, Stream.StreamKind.ROW_INDEX);
StreamId mapStreamId = new StreamId(2, 0, Stream.StreamKind.ROW_INDEX);
StreamId mapKeyStreamId = new StreamId(3, 0, Stream.StreamKind.ROW_INDEX);
StreamId mapValueStreamId = new StreamId(4, 0, Stream.StreamKind.ROW_INDEX);
Map<StreamId, List<RowGroupIndex>> columnIndexes = ImmutableMap.of(intStreamId, createRowGroupIndex(intColumnStatistics), mapStreamId, createRowGroupIndex(mapColumnStatistics), mapKeyStreamId, createRowGroupIndex(mapKeyColumnStatistics), mapValueStreamId, createRowGroupIndex(mapValueColumnStatistics));
// Each row contains 1 integer, 2 * numberOfEntries * integer (2 is for key and value).
long expectedRowSize = INTEGER_VALUE_BYTES + 2 * numberOfEntries * INTEGER_VALUE_BYTES;
RowGroup rowGroup = StripeReader.createRowGroup(0, Long.MAX_VALUE, numRowsInGroup, columnIndexes, ImmutableMap.of(), ImmutableMap.of());
assertEquals(expectedRowSize, rowGroup.getMinAverageRowBytes());
}
use of com.facebook.presto.orc.metadata.statistics.IntegerColumnStatistics in project presto by prestodb.
the class TestOrcBloomFilters method testMatches.
@Test
public // simulate query on a 2 columns where 1 is used as part of the where, with and without bloom filter
void testMatches() {
// stripe column
Domain testingColumnHandleDomain = Domain.singleValue(BIGINT, 1234L);
TupleDomain.ColumnDomain<String> column0 = new TupleDomain.ColumnDomain<>(COLUMN_0, testingColumnHandleDomain);
// predicate consist of the bigint_0 = 1234
TupleDomain<String> effectivePredicate = TupleDomain.fromColumnDomains(Optional.of(ImmutableList.of(column0)));
TupleDomain<String> emptyEffectivePredicate = TupleDomain.all();
// predicate column references
List<ColumnReference<String>> columnReferences = ImmutableList.<ColumnReference<String>>builder().add(new ColumnReference<>(COLUMN_0, 0, BIGINT)).add(new ColumnReference<>(COLUMN_1, 1, BIGINT)).build();
TupleDomainOrcPredicate<String> predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences, true, Optional.empty());
TupleDomainOrcPredicate<String> emptyPredicate = new TupleDomainOrcPredicate<>(emptyEffectivePredicate, columnReferences, true, Optional.empty());
// assemble a matching and a non-matching bloom filter
HiveBloomFilter hiveBloomFilter = new HiveBloomFilter(new BloomFilter(1000, 0.01));
OrcProto.BloomFilter emptyOrcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
hiveBloomFilter.addLong(1234);
OrcProto.BloomFilter orcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
Map<Integer, ColumnStatistics> matchingStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, toHiveBloomFilter(orcBloomFilter), new IntegerStatistics(10L, 2000L, null)));
Map<Integer, ColumnStatistics> nonMatchingStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, toHiveBloomFilter(emptyOrcBloomFilter), new IntegerStatistics(10L, 2000L, null)));
Map<Integer, ColumnStatistics> withoutBloomFilterStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, null, new IntegerStatistics(10L, 2000L, null)));
assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex));
assertTrue(predicate.matches(1L, withoutBloomFilterStatisticsByColumnIndex));
assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex));
assertTrue(emptyPredicate.matches(1L, matchingStatisticsByColumnIndex));
}
Aggregations