Search in sources :

Example 11 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class TestDynamicFilterServiceWithBloomFilter method testRegisterAndMergeDynamicFilters.

@Test
public void testRegisterAndMergeDynamicFilters() throws InterruptedException {
    registerDf(filterId, session, PARTITIONED, dynamicFilterService);
    // Test getDynamicFilterSupplier
    VariableReferenceExpression mockExpression = mock(VariableReferenceExpression.class);
    when(mockExpression.getName()).thenReturn("name");
    ColumnHandle mockColumnHandle = mock(ColumnHandle.class);
    Supplier<List<Set<DynamicFilter>>> dynamicFilterSupplier = DynamicFilterService.getDynamicFilterSupplier(session.getQueryId(), ImmutableList.of(ImmutableList.of(new DynamicFilters.Descriptor(filterId, mockExpression))), ImmutableMap.of(new Symbol("name"), mockColumnHandle));
    assertTrue(dynamicFilterSupplier.get().isEmpty(), "should return empty dynamic filter set when dynamic filters are not available");
    mockLocalDynamicFilter("task1.0", filterId, session.getQueryId().toString(), Arrays.asList("1", "2", "3", "4"));
    mockLocalDynamicFilter("task1.1", filterId, session.getQueryId().toString(), Arrays.asList("5", "6", "7", "8"));
    Thread.sleep(3000);
    BloomFilter bf = fetchDynamicFilter(filterId, session.getQueryId().toString());
    for (int i = 1; i < 9; i++) {
        assertTrue(bf.test((String.valueOf(i).getBytes(StandardCharsets.UTF_8))));
    }
    assertFalse(bf.test("10".getBytes(StandardCharsets.UTF_8)));
    // Test getDynamicFilterSupplier
    dynamicFilterSupplier = DynamicFilterService.getDynamicFilterSupplier(session.getQueryId(), ImmutableList.of(ImmutableList.of(new DynamicFilters.Descriptor(filterId, mockExpression))), ImmutableMap.of(new Symbol("name"), mockColumnHandle));
    List<Set<DynamicFilter>> dynamicFilters = dynamicFilterSupplier.get();
    assertFalse(dynamicFilters == null, "dynamic filters should be ready");
    assertEquals(dynamicFilters.size(), 1, "there should be 1 dynamic filter in supplier");
    DynamicFilter dynamicFilter = dynamicFilters.get(0).iterator().next();
    for (int i = 1; i < 9; i++) {
        assertTrue(dynamicFilter.contains(String.valueOf(i)));
    }
    assertFalse(dynamicFilter.contains("10"));
    dynamicFilterSupplier = DynamicFilterService.getDynamicFilterSupplier(new QueryId("invalid"), ImmutableList.of(ImmutableList.of(new DynamicFilters.Descriptor(filterId, mockExpression))), ImmutableMap.of(new Symbol("name"), mockColumnHandle));
    assertTrue(dynamicFilterSupplier.get().isEmpty(), "should return empty dynamic filter set for invalid or non-existing queryId");
    String queryId = session.getQueryId().getId();
    assertEquals(stateStoreProvider.getStateStore().getStateCollection(createKey(DynamicFilterUtils.PARTIALPREFIX, filterId, queryId)).size(), 2);
    assertEquals(stateStoreProvider.getStateStore().getStateCollection(createKey(DynamicFilterUtils.TASKSPREFIX, filterId, queryId)).size(), 2);
    dynamicFilterService.clearDynamicFiltersForQuery(queryId);
    Thread.sleep(1000);
    assertEquals(stateStoreProvider.getStateStore().getStateCollection(createKey(DynamicFilterUtils.PARTIALPREFIX, filterId, queryId)).size(), 0);
    assertEquals(stateStoreProvider.getStateStore().getStateCollection(createKey(DynamicFilterUtils.TASKSPREFIX, filterId, queryId)).size(), 0);
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) HashSet(java.util.HashSet) Set(java.util.Set) StateSet(io.prestosql.spi.statestore.StateSet) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) Symbol(io.prestosql.spi.plan.Symbol) QueryId(io.prestosql.spi.QueryId) BloomFilter(io.prestosql.spi.util.BloomFilter) DynamicFilters(io.prestosql.sql.DynamicFilters) VariableReferenceExpression(io.prestosql.spi.relation.VariableReferenceExpression) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Test(org.testng.annotations.Test) BeforeTest(org.testng.annotations.BeforeTest)

Example 12 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class TestHiveDistributedJoinQueriesWithDynamicFiltering method createDynamicFilterSupplier.

private Supplier<List<Map<ColumnHandle, DynamicFilter>>> createDynamicFilterSupplier(List<Long> values, ColumnHandle columnHandle, String filterId) throws IOException {
    BloomFilter filter = new BloomFilter(values.size(), 0.01);
    for (Long value : values) {
        filter.add(value);
    }
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    filter.writeTo(out);
    DynamicFilter dynamicFilter = DynamicFilterFactory.create(filterId, columnHandle, out.toByteArray(), DynamicFilter.Type.GLOBAL);
    Map<ColumnHandle, DynamicFilter> dynamicFilterMap = ImmutableMap.of(columnHandle, dynamicFilter);
    return () -> ImmutableList.of(dynamicFilterMap);
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) BloomFilter(io.prestosql.spi.util.BloomFilter)

Example 13 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class LocalDynamicFilter method createBloomFilterFromSet.

private BloomFilter createBloomFilterFromSet(DynamicFilterSourceOperator.Channel channel, Set values, double bloomFilterFpp) {
    BloomFilter bloomFilter = new BloomFilter(BloomFilterDynamicFilter.DEFAULT_DYNAMIC_FILTER_SIZE, bloomFilterFpp);
    if (channel.getType().getJavaType() == long.class) {
        for (Object value : values) {
            long lv = (Long) value;
            bloomFilter.add(lv);
        }
    } else if (channel.getType().getJavaType() == double.class) {
        for (Object value : values) {
            double lv = (Double) value;
            bloomFilter.add(lv);
        }
    } else if (channel.getType().getJavaType() == Slice.class) {
        for (Object value : values) {
            bloomFilter.add((Slice) value);
        }
    } else {
        for (Object value : values) {
            bloomFilter.add(String.valueOf(value).getBytes(StandardCharsets.UTF_8));
        }
    }
    return bloomFilter;
}
Also used : BloomFilter(io.prestosql.spi.util.BloomFilter)

Example 14 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class TestCrossRegionDynamicFilterOperator method addBloomFilter.

private void addBloomFilter(String column, List<String> values, DynamicFilterCacheManager dynamicFilterCacheManager, String queryId) {
    BloomFilter bloomFilter = new BloomFilter(1024 * 1024, 0.005);
    values.forEach(value -> bloomFilter.add(value.getBytes()));
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    try {
        bloomFilter.writeTo(out);
        Map<String, byte[]> bloomFilters = dynamicFilterCacheManager.getBloomFitler(queryId + CROSS_REGION_DYNAMIC_FILTER_COLLECTION);
        if (bloomFilters == null) {
            bloomFilters = new HashMap<>();
        }
        bloomFilters.put(column, convertBloomFilterToByteArray(bloomFilter));
        dynamicFilterCacheManager.cacheBloomFilters(queryId + CROSS_REGION_DYNAMIC_FILTER_COLLECTION, bloomFilters);
    } catch (IOException e) {
        throw new RuntimeException("error to write bloom filter into byte");
    }
}
Also used : ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) BloomFilter(io.prestosql.spi.util.BloomFilter)

Example 15 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class DynamicFilterService method mergeDynamicFilters.

/**
 * Global Dynamic Filter merging, periodically looks for dynamic filters that can be merged and merges them
 */
private void mergeDynamicFilters() {
    final StateStore stateStore = stateStoreProvider.getStateStore();
    for (Map.Entry<String, Map<String, DynamicFilterRegistryInfo>> queryToDynamicFiltersEntry : dynamicFilters.entrySet()) {
        final String queryId = queryToDynamicFiltersEntry.getKey();
        if (!cachedDynamicFilters.containsKey(queryId)) {
            cachedDynamicFilters.put(queryId, new ConcurrentHashMap<>());
        }
        Map<String, DynamicFilter> cachedDynamicFiltersForQuery = cachedDynamicFilters.get(queryId);
        StateMap mergedDynamicFilters = (StateMap) stateStore.getOrCreateStateCollection(DynamicFilterUtils.MERGED_DYNAMIC_FILTERS, MAP);
        for (Map.Entry<String, DynamicFilterRegistryInfo> columnToDynamicFilterEntry : queryToDynamicFiltersEntry.getValue().entrySet()) {
            if (columnToDynamicFilterEntry.getValue().isMerged()) {
                continue;
            }
            final String filterId = columnToDynamicFilterEntry.getKey();
            final Type filterType = columnToDynamicFilterEntry.getValue().getType();
            final DataType filterDataType = columnToDynamicFilterEntry.getValue().getDataType();
            final Optional<Predicate<List>> dfFilter = columnToDynamicFilterEntry.getValue().getFilter();
            final Symbol column = columnToDynamicFilterEntry.getValue().getSymbol();
            final String filterKey = createKey(DynamicFilterUtils.FILTERPREFIX, filterId, queryId);
            if (!hasMergeCondition(filterId, queryId)) {
                continue;
            }
            Collection<Object> results = ((StateSet) stateStore.getStateCollection(createKey(DynamicFilterUtils.PARTIALPREFIX, filterId, queryId))).getAll();
            try {
                DynamicFilter mergedFilter;
                if (filterDataType == BLOOM_FILTER) {
                    BloomFilter mergedBloomFilter = mergeBloomFilters(results);
                    if (mergedBloomFilter.expectedFpp() > DynamicFilterUtils.BLOOM_FILTER_EXPECTED_FPP) {
                        throw new PrestoException(GENERIC_INTERNAL_ERROR, "FPP too high: " + mergedBloomFilter.approximateElementCount());
                    }
                    mergedFilter = new BloomFilterDynamicFilter(filterKey, null, mergedBloomFilter, filterType);
                    if (filterType == GLOBAL) {
                        try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
                            mergedBloomFilter.writeTo(out);
                            byte[] filter = out.toByteArray();
                            mergedDynamicFilters.put(filterKey, filter);
                        }
                    }
                } else if (filterDataType == HASHSET) {
                    Set mergedSet = mergeHashSets(results);
                    mergedFilter = DynamicFilterFactory.create(filterKey, null, mergedSet, filterType, dfFilter, Optional.empty());
                    if (filterType == GLOBAL) {
                        mergedDynamicFilters.put(filterKey, mergedSet);
                    }
                } else {
                    throw new PrestoException(GENERIC_INTERNAL_ERROR, "Unsupported filter data type: " + filterDataType);
                }
                log.debug("Merged successfully dynamic filter id: " + filterId + "-" + queryId + " type: " + filterDataType + ", column: " + column + ", item count: " + mergedFilter.getSize());
                cachedDynamicFiltersForQuery.put(filterId, mergedFilter);
            } catch (IOException | PrestoException e) {
                log.warn("Could not merge dynamic filter: " + e.getLocalizedMessage());
            } finally {
                // for each dynamic filter we only try to merge it once
                columnToDynamicFilterEntry.getValue().setMerged();
            }
        }
    }
}
Also used : ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) StateSet(io.prestosql.spi.statestore.StateSet) CopyOnWriteArraySet(java.util.concurrent.CopyOnWriteArraySet) HashSet(java.util.HashSet) Symbol(io.prestosql.spi.plan.Symbol) StateMap(io.prestosql.spi.statestore.StateMap) PrestoException(io.prestosql.spi.PrestoException) BloomFilter(io.prestosql.spi.util.BloomFilter) Predicate(java.util.function.Predicate) SystemSessionProperties.getDynamicFilteringDataType(io.prestosql.SystemSessionProperties.getDynamicFilteringDataType) DynamicFilterUtils.getDynamicFilterDataType(io.prestosql.utils.DynamicFilterUtils.getDynamicFilterDataType) DataType(io.prestosql.spi.dynamicfilter.DynamicFilter.DataType) StateSet(io.prestosql.spi.statestore.StateSet) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) StateStore(io.prestosql.spi.statestore.StateStore) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) SystemSessionProperties.getDynamicFilteringDataType(io.prestosql.SystemSessionProperties.getDynamicFilteringDataType) DynamicFilterUtils.getDynamicFilterDataType(io.prestosql.utils.DynamicFilterUtils.getDynamicFilterDataType) Type(io.prestosql.spi.dynamicfilter.DynamicFilter.Type) DataType(io.prestosql.spi.dynamicfilter.DynamicFilter.DataType) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) StateMap(io.prestosql.spi.statestore.StateMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Aggregations

BloomFilter (io.prestosql.spi.util.BloomFilter)26 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)9 DynamicFilter (io.prestosql.spi.dynamicfilter.DynamicFilter)9 ArrayList (java.util.ArrayList)8 HashMap (java.util.HashMap)7 HashSet (java.util.HashSet)7 Test (org.testng.annotations.Test)7 BloomFilterDynamicFilter (io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)6 Map (java.util.Map)6 Page (io.prestosql.spi.Page)5 IOException (java.io.IOException)4 ImmutableMap (com.google.common.collect.ImmutableMap)3 Block (io.prestosql.spi.block.Block)3 Symbol (io.prestosql.spi.plan.Symbol)3 StateSet (io.prestosql.spi.statestore.StateSet)3 List (java.util.List)3 ImmutableList (com.google.common.collect.ImmutableList)2 BlockBuilder (io.prestosql.spi.block.BlockBuilder)2 LongArrayBlockBuilder (io.prestosql.spi.block.LongArrayBlockBuilder)2