Search in sources :

Example 1 with IntegerColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.IntegerColumnStatistics in project presto by prestodb.

the class TestStripeReader method testRowSize.

@Test
public void testRowSize() {
    int numberOfEntries = 10_000;
    long numRowsInGroup = MILLION;
    IntegerStatistics integerStatistics = new IntegerStatistics(0L, 0L, 0L);
    ColumnStatistics intColumnStatistics = new IntegerColumnStatistics(numRowsInGroup, null, integerStatistics);
    ColumnStatistics mapColumnStatistics = new ColumnStatistics(numRowsInGroup, null);
    ColumnStatistics mapKeyColumnStatistics = new IntegerColumnStatistics(numRowsInGroup * numberOfEntries, null, integerStatistics);
    ColumnStatistics mapValueColumnStatistics = new IntegerColumnStatistics(numRowsInGroup * numberOfEntries, null, integerStatistics);
    StreamId intStreamId = new StreamId(1, 0, Stream.StreamKind.ROW_INDEX);
    StreamId mapStreamId = new StreamId(2, 0, Stream.StreamKind.ROW_INDEX);
    StreamId mapKeyStreamId = new StreamId(3, 0, Stream.StreamKind.ROW_INDEX);
    StreamId mapValueStreamId = new StreamId(4, 0, Stream.StreamKind.ROW_INDEX);
    Map<StreamId, List<RowGroupIndex>> columnIndexes = ImmutableMap.of(intStreamId, createRowGroupIndex(intColumnStatistics), mapStreamId, createRowGroupIndex(mapColumnStatistics), mapKeyStreamId, createRowGroupIndex(mapKeyColumnStatistics), mapValueStreamId, createRowGroupIndex(mapValueColumnStatistics));
    // Each row contains 1 integer, 2 * numberOfEntries * integer (2 is for key and value).
    long expectedRowSize = INTEGER_VALUE_BYTES + 2 * numberOfEntries * INTEGER_VALUE_BYTES;
    RowGroup rowGroup = StripeReader.createRowGroup(0, Long.MAX_VALUE, numRowsInGroup, columnIndexes, ImmutableMap.of(), ImmutableMap.of());
    assertEquals(expectedRowSize, rowGroup.getMinAverageRowBytes());
}
Also used : IntegerColumnStatistics(com.facebook.presto.orc.metadata.statistics.IntegerColumnStatistics) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) IntegerColumnStatistics(com.facebook.presto.orc.metadata.statistics.IntegerColumnStatistics) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) IntegerStatistics(com.facebook.presto.orc.metadata.statistics.IntegerStatistics) Test(org.testng.annotations.Test)

Example 2 with IntegerColumnStatistics

use of com.facebook.presto.orc.metadata.statistics.IntegerColumnStatistics in project presto by prestodb.

the class TestOrcBloomFilters method testMatches.

@Test
public // simulate query on a 2 columns where 1 is used as part of the where, with and without bloom filter
void testMatches() {
    // stripe column
    Domain testingColumnHandleDomain = Domain.singleValue(BIGINT, 1234L);
    TupleDomain.ColumnDomain<String> column0 = new TupleDomain.ColumnDomain<>(COLUMN_0, testingColumnHandleDomain);
    // predicate consist of the bigint_0 = 1234
    TupleDomain<String> effectivePredicate = TupleDomain.fromColumnDomains(Optional.of(ImmutableList.of(column0)));
    TupleDomain<String> emptyEffectivePredicate = TupleDomain.all();
    // predicate column references
    List<ColumnReference<String>> columnReferences = ImmutableList.<ColumnReference<String>>builder().add(new ColumnReference<>(COLUMN_0, 0, BIGINT)).add(new ColumnReference<>(COLUMN_1, 1, BIGINT)).build();
    TupleDomainOrcPredicate<String> predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences, true, Optional.empty());
    TupleDomainOrcPredicate<String> emptyPredicate = new TupleDomainOrcPredicate<>(emptyEffectivePredicate, columnReferences, true, Optional.empty());
    // assemble a matching and a non-matching bloom filter
    HiveBloomFilter hiveBloomFilter = new HiveBloomFilter(new BloomFilter(1000, 0.01));
    OrcProto.BloomFilter emptyOrcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
    hiveBloomFilter.addLong(1234);
    OrcProto.BloomFilter orcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
    Map<Integer, ColumnStatistics> matchingStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, toHiveBloomFilter(orcBloomFilter), new IntegerStatistics(10L, 2000L, null)));
    Map<Integer, ColumnStatistics> nonMatchingStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, toHiveBloomFilter(emptyOrcBloomFilter), new IntegerStatistics(10L, 2000L, null)));
    Map<Integer, ColumnStatistics> withoutBloomFilterStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, null, new IntegerStatistics(10L, 2000L, null)));
    assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex));
    assertTrue(predicate.matches(1L, withoutBloomFilterStatisticsByColumnIndex));
    assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex));
    assertTrue(emptyPredicate.matches(1L, matchingStatisticsByColumnIndex));
}
Also used : IntegerColumnStatistics(com.facebook.presto.orc.metadata.statistics.IntegerColumnStatistics) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) OrcProto(com.facebook.presto.orc.proto.OrcProto) IntegerColumnStatistics(com.facebook.presto.orc.metadata.statistics.IntegerColumnStatistics) HiveBloomFilter(com.facebook.presto.orc.metadata.statistics.HiveBloomFilter) TupleDomainOrcPredicate.checkInBloomFilter(com.facebook.presto.orc.TupleDomainOrcPredicate.checkInBloomFilter) BloomFilter(com.facebook.presto.orc.metadata.statistics.BloomFilter) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) HiveBloomFilter(com.facebook.presto.orc.metadata.statistics.HiveBloomFilter) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) ColumnReference(com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference) IntegerStatistics(com.facebook.presto.orc.metadata.statistics.IntegerStatistics) Test(org.testng.annotations.Test)

Aggregations

ColumnStatistics (com.facebook.presto.orc.metadata.statistics.ColumnStatistics)2 IntegerColumnStatistics (com.facebook.presto.orc.metadata.statistics.IntegerColumnStatistics)2 IntegerStatistics (com.facebook.presto.orc.metadata.statistics.IntegerStatistics)2 Test (org.testng.annotations.Test)2 Domain (com.facebook.presto.common.predicate.Domain)1 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)1 ColumnReference (com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference)1 TupleDomainOrcPredicate.checkInBloomFilter (com.facebook.presto.orc.TupleDomainOrcPredicate.checkInBloomFilter)1 BloomFilter (com.facebook.presto.orc.metadata.statistics.BloomFilter)1 HiveBloomFilter (com.facebook.presto.orc.metadata.statistics.HiveBloomFilter)1 OrcProto (com.facebook.presto.orc.proto.OrcProto)1 ImmutableList (com.google.common.collect.ImmutableList)1 List (java.util.List)1