use of org.apache.hive.common.util.BloomFilter in project hive by apache.
the class AggrStatsInvalidatorFilter method filterKeyValue.
@Override
public ReturnCode filterKeyValue(Cell cell) throws IOException {
// Is this the partition we want?
if (Arrays.equals(CellUtil.cloneQualifier(cell), HBaseReadWrite.AGGR_STATS_BLOOM_COL)) {
HbaseMetastoreProto.AggrStatsBloomFilter fromCol = HbaseMetastoreProto.AggrStatsBloomFilter.parseFrom(CellUtil.cloneValue(cell));
BloomFilter bloom = null;
if (now - maxCacheEntryLife > fromCol.getAggregatedAt()) {
// It's too old, kill it regardless of whether we were asked to or not.
return ReturnCode.INCLUDE;
} else if (now - runEvery * 2 <= fromCol.getAggregatedAt()) {
// It's too new. We might be stomping on something that was just created. Skip it.
return ReturnCode.NEXT_ROW;
} else {
// Look through each of our entries and see if any of them match.
for (HbaseMetastoreProto.AggrStatsInvalidatorFilter.Entry entry : entries) {
// First check if we match on db and table match
if (entry.getDbName().equals(fromCol.getDbName()) && entry.getTableName().equals(fromCol.getTableName())) {
if (bloom == null) {
// Now, reconstitute the bloom filter and probe it with each of our partition names
bloom = new BloomFilter(fromCol.getBloomFilter().getBitsList(), fromCol.getBloomFilter().getNumBits(), fromCol.getBloomFilter().getNumFuncs());
}
if (bloom.test(entry.getPartName().toByteArray())) {
// This is most likely a match, so mark it and quit looking.
return ReturnCode.INCLUDE;
}
}
}
}
return ReturnCode.NEXT_ROW;
} else {
return ReturnCode.NEXT_COL;
}
}
use of org.apache.hive.common.util.BloomFilter in project presto by prestodb.
the class TestOrcBloomFilters method testHiveBloomFilterSerde.
@Test
public void testHiveBloomFilterSerde() throws Exception {
BloomFilter bloomFilter = new BloomFilter(1_000_000L, 0.05);
// String
bloomFilter.addString(TEST_STRING);
assertTrue(bloomFilter.testString(TEST_STRING));
assertFalse(bloomFilter.testString(TEST_STRING_NOT_WRITTEN));
// Integer
bloomFilter.addLong(TEST_INTEGER);
assertTrue(bloomFilter.testLong(TEST_INTEGER));
assertFalse(bloomFilter.testLong(TEST_INTEGER + 1));
// Re-construct
HiveBloomFilter hiveBloomFilter = new HiveBloomFilter(ImmutableList.copyOf(Longs.asList(bloomFilter.getBitSet())), bloomFilter.getBitSize(), bloomFilter.getNumHashFunctions());
// String
assertTrue(hiveBloomFilter.testString(TEST_STRING));
assertFalse(hiveBloomFilter.testString(TEST_STRING_NOT_WRITTEN));
// Integer
assertTrue(hiveBloomFilter.testLong(TEST_INTEGER));
assertFalse(hiveBloomFilter.testLong(TEST_INTEGER + 1));
}
use of org.apache.hive.common.util.BloomFilter in project presto by prestodb.
the class TestOrcBloomFilters method testBloomFilterPredicateValuesExisting.
@Test
public void testBloomFilterPredicateValuesExisting() throws Exception {
BloomFilter bloomFilter = new BloomFilter(TEST_VALUES.size() * 10, 0.01);
for (Object o : TEST_VALUES.keySet()) {
if (o instanceof Long) {
bloomFilter.addLong((Long) o);
} else if (o instanceof Integer) {
bloomFilter.addLong((Integer) o);
} else if (o instanceof String) {
bloomFilter.addString((String) o);
} else if (o instanceof BigDecimal) {
bloomFilter.addString(o.toString());
} else if (o instanceof Slice) {
bloomFilter.addString(((Slice) o).toStringUtf8());
} else if (o instanceof Timestamp) {
bloomFilter.addLong(((Timestamp) o).getTime());
} else if (o instanceof Double) {
bloomFilter.addDouble((Double) o);
} else {
fail("Unsupported type " + o.getClass());
}
}
for (Map.Entry<Object, Type> testValue : TEST_VALUES.entrySet()) {
boolean matched = checkInBloomFilter(bloomFilter, testValue.getKey(), testValue.getValue());
assertTrue(matched, "type " + testValue.getClass());
}
// test unsupported type: can be supported by ORC but is not implemented yet
assertTrue(checkInBloomFilter(bloomFilter, new Date(), DATE), "unsupported type DATE should always return true");
}
use of org.apache.hive.common.util.BloomFilter in project presto by prestodb.
the class TestOrcBloomFilters method testBloomFilterPredicateValuesNonExisting.
@Test
public void testBloomFilterPredicateValuesNonExisting() throws Exception {
BloomFilter bloomFilter = new BloomFilter(TEST_VALUES.size() * 10, 0.01);
for (Map.Entry<Object, Type> testValue : TEST_VALUES.entrySet()) {
boolean matched = checkInBloomFilter(bloomFilter, testValue.getKey(), testValue.getValue());
assertFalse(matched, "type " + testValue.getKey().getClass());
}
// test unsupported type: can be supported by ORC but is not implemented yet
assertTrue(checkInBloomFilter(bloomFilter, new Date(), DATE), "unsupported type DATE should always return true");
}
use of org.apache.hive.common.util.BloomFilter in project presto by prestodb.
the class TestOrcBloomFilters method testMatches.
@Test
public // simulate query on a 2 columns where 1 is used as part of the where, with and without bloom filter
void testMatches() throws Exception {
// stripe column
Domain testingColumnHandleDomain = Domain.singleValue(BIGINT, 1234L);
TupleDomain.ColumnDomain<String> column0 = new TupleDomain.ColumnDomain<>(COLUMN_0, testingColumnHandleDomain);
// predicate consist of the bigint_0 = 1234
TupleDomain<String> effectivePredicate = TupleDomain.fromColumnDomains(Optional.of(ImmutableList.of(column0)));
TupleDomain<String> emptyEffectivePredicate = TupleDomain.all();
// predicate column references
List<ColumnReference<String>> columnReferences = ImmutableList.<ColumnReference<String>>builder().add(new ColumnReference<>(COLUMN_0, 0, BIGINT)).add(new ColumnReference<>(COLUMN_1, 1, BIGINT)).build();
TupleDomainOrcPredicate<String> predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences, true);
TupleDomainOrcPredicate<String> emptyPredicate = new TupleDomainOrcPredicate<>(emptyEffectivePredicate, columnReferences, true);
// assemble a matching and a non-matching bloom filter
HiveBloomFilter hiveBloomFilter = new HiveBloomFilter(new BloomFilter(1000, 0.01));
OrcProto.BloomFilter emptyOrcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
hiveBloomFilter.addLong(1234);
OrcProto.BloomFilter orcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
Map<Integer, ColumnStatistics> matchingStatisticsByColumnIndex = ImmutableMap.of(0, new ColumnStatistics(null, null, new IntegerStatistics(10L, 2000L), null, null, null, null, toHiveBloomFilter(orcBloomFilter)));
Map<Integer, ColumnStatistics> nonMatchingStatisticsByColumnIndex = ImmutableMap.of(0, new ColumnStatistics(null, null, new IntegerStatistics(10L, 2000L), null, null, null, null, toHiveBloomFilter(emptyOrcBloomFilter)));
Map<Integer, ColumnStatistics> withoutBloomFilterStatisticsByColumnIndex = ImmutableMap.of(0, new ColumnStatistics(null, null, new IntegerStatistics(10L, 2000L), null, null, null, null, null));
assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex));
assertTrue(predicate.matches(1L, withoutBloomFilterStatisticsByColumnIndex));
assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex));
assertTrue(emptyPredicate.matches(1L, matchingStatisticsByColumnIndex));
}
Aggregations