use of com.facebook.presto.orc.metadata.statistics.BloomFilter in project presto by prestodb.
the class TestOrcBloomFilters method testBloomFilterPredicateValuesExisting.
@Test
public void testBloomFilterPredicateValuesExisting() {
BloomFilter bloomFilter = new BloomFilter(TEST_VALUES.size() * 10, 0.01);
for (Object o : TEST_VALUES.keySet()) {
if (o instanceof Long) {
bloomFilter.addLong((Long) o);
} else if (o instanceof Integer) {
bloomFilter.addLong((Integer) o);
} else if (o instanceof String) {
bloomFilter.addString((String) o);
} else if (o instanceof BigDecimal) {
bloomFilter.addString(o.toString());
} else if (o instanceof Slice) {
bloomFilter.addString(((Slice) o).toStringUtf8());
} else if (o instanceof Timestamp) {
bloomFilter.addLong(((Timestamp) o).getTime());
} else if (o instanceof Double) {
bloomFilter.addDouble((Double) o);
} else {
fail("Unsupported type " + o.getClass());
}
}
for (Map.Entry<Object, Type> testValue : TEST_VALUES.entrySet()) {
boolean matched = checkInBloomFilter(bloomFilter, testValue.getKey(), testValue.getValue());
assertTrue(matched, "type " + testValue.getClass());
}
// test unsupported type: can be supported by ORC but is not implemented yet
assertTrue(checkInBloomFilter(bloomFilter, new Date(), DATE), "unsupported type DATE should always return true");
}
use of com.facebook.presto.orc.metadata.statistics.BloomFilter in project presto by prestodb.
the class TestOrcBloomFilters method testBloomFilterPredicateValuesNonExisting.
@Test
public void testBloomFilterPredicateValuesNonExisting() {
BloomFilter bloomFilter = new BloomFilter(TEST_VALUES.size() * 10, 0.01);
for (Map.Entry<Object, Type> testValue : TEST_VALUES.entrySet()) {
boolean matched = checkInBloomFilter(bloomFilter, testValue.getKey(), testValue.getValue());
assertFalse(matched, "type " + testValue.getKey().getClass());
}
// test unsupported type: can be supported by ORC but is not implemented yet
assertTrue(checkInBloomFilter(bloomFilter, new Date(), DATE), "unsupported type DATE should always return true");
}
use of com.facebook.presto.orc.metadata.statistics.BloomFilter in project presto by prestodb.
the class TestOrcBloomFilters method testOrcHiveBloomFilterSerde.
@Test
public void testOrcHiveBloomFilterSerde() throws Exception {
BloomFilter bloomFilterWrite = new BloomFilter(1000L, 0.05);
bloomFilterWrite.addString(TEST_STRING);
assertTrue(bloomFilterWrite.testString(TEST_STRING));
OrcProto.BloomFilter.Builder bloomFilterBuilder = OrcProto.BloomFilter.newBuilder();
bloomFilterBuilder.addAllBitset(Longs.asList(bloomFilterWrite.getBitSet()));
bloomFilterBuilder.setNumHashFunctions(bloomFilterWrite.getNumHashFunctions());
OrcProto.BloomFilter bloomFilter = bloomFilterBuilder.build();
OrcProto.BloomFilterIndex bloomFilterIndex = OrcProto.BloomFilterIndex.getDefaultInstance();
byte[] bytes = serializeBloomFilterToIndex(bloomFilter, bloomFilterIndex);
// Read through method
InputStream inputStream = new ByteArrayInputStream(bytes);
OrcMetadataReader metadataReader = new OrcMetadataReader(new RuntimeStats());
List<HiveBloomFilter> bloomFilters = metadataReader.readBloomFilterIndexes(inputStream);
assertEquals(bloomFilters.size(), 1);
assertTrue(bloomFilters.get(0).testString(TEST_STRING));
assertFalse(bloomFilters.get(0).testString(TEST_STRING_NOT_WRITTEN));
assertEquals(bloomFilterWrite.getBitSize(), bloomFilters.get(0).getBitSize());
assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilters.get(0).getNumHashFunctions());
// Validate bit set
assertTrue(Arrays.equals(bloomFilters.get(0).getBitSet(), bloomFilterWrite.getBitSet()));
// Read directly: allows better inspection of the bit sets (helped to fix a lot of bugs)
CodedInputStream input = CodedInputStream.newInstance(bytes);
OrcProto.BloomFilterIndex deserializedBloomFilterIndex = OrcProto.BloomFilterIndex.parseFrom(input);
List<OrcProto.BloomFilter> bloomFilterList = deserializedBloomFilterIndex.getBloomFilterList();
assertEquals(bloomFilterList.size(), 1);
OrcProto.BloomFilter bloomFilterRead = bloomFilterList.get(0);
// Validate contents of ORC bloom filter bit set
assertTrue(Arrays.equals(Longs.toArray(bloomFilterRead.getBitsetList()), bloomFilterWrite.getBitSet()));
// hash functions
assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilterRead.getNumHashFunctions());
// bit size
assertEquals(bloomFilterWrite.getBitSet().length, bloomFilterRead.getBitsetCount());
}
use of com.facebook.presto.orc.metadata.statistics.BloomFilter in project presto by prestodb.
the class TestOrcBloomFilters method testHiveBloomFilterSerde.
@Test
public void testHiveBloomFilterSerde() {
BloomFilter bloomFilter = new BloomFilter(1_000_000L, 0.05);
// String
bloomFilter.addString(TEST_STRING);
assertTrue(bloomFilter.testString(TEST_STRING));
assertFalse(bloomFilter.testString(TEST_STRING_NOT_WRITTEN));
// Integer
bloomFilter.addLong(TEST_INTEGER);
assertTrue(bloomFilter.testLong(TEST_INTEGER));
assertFalse(bloomFilter.testLong(TEST_INTEGER + 1));
// Re-construct
HiveBloomFilter hiveBloomFilter = new HiveBloomFilter(ImmutableList.copyOf(Longs.asList(bloomFilter.getBitSet())), bloomFilter.getBitSize(), bloomFilter.getNumHashFunctions());
// String
assertTrue(hiveBloomFilter.testString(TEST_STRING));
assertFalse(hiveBloomFilter.testString(TEST_STRING_NOT_WRITTEN));
// Integer
assertTrue(hiveBloomFilter.testLong(TEST_INTEGER));
assertFalse(hiveBloomFilter.testLong(TEST_INTEGER + 1));
}
use of com.facebook.presto.orc.metadata.statistics.BloomFilter in project presto by prestodb.
the class TestOrcBloomFilters method testMatches.
@Test
public // simulate query on a 2 columns where 1 is used as part of the where, with and without bloom filter
void testMatches() {
// stripe column
Domain testingColumnHandleDomain = Domain.singleValue(BIGINT, 1234L);
TupleDomain.ColumnDomain<String> column0 = new TupleDomain.ColumnDomain<>(COLUMN_0, testingColumnHandleDomain);
// predicate consist of the bigint_0 = 1234
TupleDomain<String> effectivePredicate = TupleDomain.fromColumnDomains(Optional.of(ImmutableList.of(column0)));
TupleDomain<String> emptyEffectivePredicate = TupleDomain.all();
// predicate column references
List<ColumnReference<String>> columnReferences = ImmutableList.<ColumnReference<String>>builder().add(new ColumnReference<>(COLUMN_0, 0, BIGINT)).add(new ColumnReference<>(COLUMN_1, 1, BIGINT)).build();
TupleDomainOrcPredicate<String> predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences, true, Optional.empty());
TupleDomainOrcPredicate<String> emptyPredicate = new TupleDomainOrcPredicate<>(emptyEffectivePredicate, columnReferences, true, Optional.empty());
// assemble a matching and a non-matching bloom filter
HiveBloomFilter hiveBloomFilter = new HiveBloomFilter(new BloomFilter(1000, 0.01));
OrcProto.BloomFilter emptyOrcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
hiveBloomFilter.addLong(1234);
OrcProto.BloomFilter orcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
Map<Integer, ColumnStatistics> matchingStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, toHiveBloomFilter(orcBloomFilter), new IntegerStatistics(10L, 2000L, null)));
Map<Integer, ColumnStatistics> nonMatchingStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, toHiveBloomFilter(emptyOrcBloomFilter), new IntegerStatistics(10L, 2000L, null)));
Map<Integer, ColumnStatistics> withoutBloomFilterStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, null, new IntegerStatistics(10L, 2000L, null)));
assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex));
assertTrue(predicate.matches(1L, withoutBloomFilterStatisticsByColumnIndex));
assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex));
assertTrue(emptyPredicate.matches(1L, matchingStatisticsByColumnIndex));
}
Aggregations