use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class TestDynamicFilterServiceWithBloomFilter method testRegisterAndMergeDynamicFilters.
@Test
public void testRegisterAndMergeDynamicFilters() throws InterruptedException {
registerDf(filterId, session, PARTITIONED, dynamicFilterService);
// Test getDynamicFilterSupplier
VariableReferenceExpression mockExpression = mock(VariableReferenceExpression.class);
when(mockExpression.getName()).thenReturn("name");
ColumnHandle mockColumnHandle = mock(ColumnHandle.class);
Supplier<List<Set<DynamicFilter>>> dynamicFilterSupplier = DynamicFilterService.getDynamicFilterSupplier(session.getQueryId(), ImmutableList.of(ImmutableList.of(new DynamicFilters.Descriptor(filterId, mockExpression))), ImmutableMap.of(new Symbol("name"), mockColumnHandle));
assertTrue(dynamicFilterSupplier.get().isEmpty(), "should return empty dynamic filter set when dynamic filters are not available");
mockLocalDynamicFilter("task1.0", filterId, session.getQueryId().toString(), Arrays.asList("1", "2", "3", "4"));
mockLocalDynamicFilter("task1.1", filterId, session.getQueryId().toString(), Arrays.asList("5", "6", "7", "8"));
Thread.sleep(3000);
BloomFilter bf = fetchDynamicFilter(filterId, session.getQueryId().toString());
for (int i = 1; i < 9; i++) {
assertTrue(bf.test((String.valueOf(i).getBytes(StandardCharsets.UTF_8))));
}
assertFalse(bf.test("10".getBytes(StandardCharsets.UTF_8)));
// Test getDynamicFilterSupplier
dynamicFilterSupplier = DynamicFilterService.getDynamicFilterSupplier(session.getQueryId(), ImmutableList.of(ImmutableList.of(new DynamicFilters.Descriptor(filterId, mockExpression))), ImmutableMap.of(new Symbol("name"), mockColumnHandle));
List<Set<DynamicFilter>> dynamicFilters = dynamicFilterSupplier.get();
assertFalse(dynamicFilters == null, "dynamic filters should be ready");
assertEquals(dynamicFilters.size(), 1, "there should be 1 dynamic filter in supplier");
DynamicFilter dynamicFilter = dynamicFilters.get(0).iterator().next();
for (int i = 1; i < 9; i++) {
assertTrue(dynamicFilter.contains(String.valueOf(i)));
}
assertFalse(dynamicFilter.contains("10"));
dynamicFilterSupplier = DynamicFilterService.getDynamicFilterSupplier(new QueryId("invalid"), ImmutableList.of(ImmutableList.of(new DynamicFilters.Descriptor(filterId, mockExpression))), ImmutableMap.of(new Symbol("name"), mockColumnHandle));
assertTrue(dynamicFilterSupplier.get().isEmpty(), "should return empty dynamic filter set for invalid or non-existing queryId");
String queryId = session.getQueryId().getId();
assertEquals(stateStoreProvider.getStateStore().getStateCollection(createKey(DynamicFilterUtils.PARTIALPREFIX, filterId, queryId)).size(), 2);
assertEquals(stateStoreProvider.getStateStore().getStateCollection(createKey(DynamicFilterUtils.TASKSPREFIX, filterId, queryId)).size(), 2);
dynamicFilterService.clearDynamicFiltersForQuery(queryId);
Thread.sleep(1000);
assertEquals(stateStoreProvider.getStateStore().getStateCollection(createKey(DynamicFilterUtils.PARTIALPREFIX, filterId, queryId)).size(), 0);
assertEquals(stateStoreProvider.getStateStore().getStateCollection(createKey(DynamicFilterUtils.TASKSPREFIX, filterId, queryId)).size(), 0);
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class TestHiveDistributedJoinQueriesWithDynamicFiltering method createDynamicFilterSupplier.
private Supplier<List<Map<ColumnHandle, DynamicFilter>>> createDynamicFilterSupplier(List<Long> values, ColumnHandle columnHandle, String filterId) throws IOException {
BloomFilter filter = new BloomFilter(values.size(), 0.01);
for (Long value : values) {
filter.add(value);
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
filter.writeTo(out);
DynamicFilter dynamicFilter = DynamicFilterFactory.create(filterId, columnHandle, out.toByteArray(), DynamicFilter.Type.GLOBAL);
Map<ColumnHandle, DynamicFilter> dynamicFilterMap = ImmutableMap.of(columnHandle, dynamicFilter);
return () -> ImmutableList.of(dynamicFilterMap);
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class LocalDynamicFilter method createBloomFilterFromSet.
private BloomFilter createBloomFilterFromSet(DynamicFilterSourceOperator.Channel channel, Set values, double bloomFilterFpp) {
BloomFilter bloomFilter = new BloomFilter(BloomFilterDynamicFilter.DEFAULT_DYNAMIC_FILTER_SIZE, bloomFilterFpp);
if (channel.getType().getJavaType() == long.class) {
for (Object value : values) {
long lv = (Long) value;
bloomFilter.add(lv);
}
} else if (channel.getType().getJavaType() == double.class) {
for (Object value : values) {
double lv = (Double) value;
bloomFilter.add(lv);
}
} else if (channel.getType().getJavaType() == Slice.class) {
for (Object value : values) {
bloomFilter.add((Slice) value);
}
} else {
for (Object value : values) {
bloomFilter.add(String.valueOf(value).getBytes(StandardCharsets.UTF_8));
}
}
return bloomFilter;
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class TestCrossRegionDynamicFilterOperator method addBloomFilter.
private void addBloomFilter(String column, List<String> values, DynamicFilterCacheManager dynamicFilterCacheManager, String queryId) {
BloomFilter bloomFilter = new BloomFilter(1024 * 1024, 0.005);
values.forEach(value -> bloomFilter.add(value.getBytes()));
ByteArrayOutputStream out = new ByteArrayOutputStream();
try {
bloomFilter.writeTo(out);
Map<String, byte[]> bloomFilters = dynamicFilterCacheManager.getBloomFitler(queryId + CROSS_REGION_DYNAMIC_FILTER_COLLECTION);
if (bloomFilters == null) {
bloomFilters = new HashMap<>();
}
bloomFilters.put(column, convertBloomFilterToByteArray(bloomFilter));
dynamicFilterCacheManager.cacheBloomFilters(queryId + CROSS_REGION_DYNAMIC_FILTER_COLLECTION, bloomFilters);
} catch (IOException e) {
throw new RuntimeException("error to write bloom filter into byte");
}
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class DynamicFilterService method mergeDynamicFilters.
/**
* Global Dynamic Filter merging, periodically looks for dynamic filters that can be merged and merges them
*/
private void mergeDynamicFilters() {
final StateStore stateStore = stateStoreProvider.getStateStore();
for (Map.Entry<String, Map<String, DynamicFilterRegistryInfo>> queryToDynamicFiltersEntry : dynamicFilters.entrySet()) {
final String queryId = queryToDynamicFiltersEntry.getKey();
if (!cachedDynamicFilters.containsKey(queryId)) {
cachedDynamicFilters.put(queryId, new ConcurrentHashMap<>());
}
Map<String, DynamicFilter> cachedDynamicFiltersForQuery = cachedDynamicFilters.get(queryId);
StateMap mergedDynamicFilters = (StateMap) stateStore.getOrCreateStateCollection(DynamicFilterUtils.MERGED_DYNAMIC_FILTERS, MAP);
for (Map.Entry<String, DynamicFilterRegistryInfo> columnToDynamicFilterEntry : queryToDynamicFiltersEntry.getValue().entrySet()) {
if (columnToDynamicFilterEntry.getValue().isMerged()) {
continue;
}
final String filterId = columnToDynamicFilterEntry.getKey();
final Type filterType = columnToDynamicFilterEntry.getValue().getType();
final DataType filterDataType = columnToDynamicFilterEntry.getValue().getDataType();
final Optional<Predicate<List>> dfFilter = columnToDynamicFilterEntry.getValue().getFilter();
final Symbol column = columnToDynamicFilterEntry.getValue().getSymbol();
final String filterKey = createKey(DynamicFilterUtils.FILTERPREFIX, filterId, queryId);
if (!hasMergeCondition(filterId, queryId)) {
continue;
}
Collection<Object> results = ((StateSet) stateStore.getStateCollection(createKey(DynamicFilterUtils.PARTIALPREFIX, filterId, queryId))).getAll();
try {
DynamicFilter mergedFilter;
if (filterDataType == BLOOM_FILTER) {
BloomFilter mergedBloomFilter = mergeBloomFilters(results);
if (mergedBloomFilter.expectedFpp() > DynamicFilterUtils.BLOOM_FILTER_EXPECTED_FPP) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "FPP too high: " + mergedBloomFilter.approximateElementCount());
}
mergedFilter = new BloomFilterDynamicFilter(filterKey, null, mergedBloomFilter, filterType);
if (filterType == GLOBAL) {
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
mergedBloomFilter.writeTo(out);
byte[] filter = out.toByteArray();
mergedDynamicFilters.put(filterKey, filter);
}
}
} else if (filterDataType == HASHSET) {
Set mergedSet = mergeHashSets(results);
mergedFilter = DynamicFilterFactory.create(filterKey, null, mergedSet, filterType, dfFilter, Optional.empty());
if (filterType == GLOBAL) {
mergedDynamicFilters.put(filterKey, mergedSet);
}
} else {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Unsupported filter data type: " + filterDataType);
}
log.debug("Merged successfully dynamic filter id: " + filterId + "-" + queryId + " type: " + filterDataType + ", column: " + column + ", item count: " + mergedFilter.getSize());
cachedDynamicFiltersForQuery.put(filterId, mergedFilter);
} catch (IOException | PrestoException e) {
log.warn("Could not merge dynamic filter: " + e.getLocalizedMessage());
} finally {
// for each dynamic filter we only try to merge it once
columnToDynamicFilterEntry.getValue().setMerged();
}
}
}
}
Aggregations