Search in sources :

Example 6 with StateSet

use of io.prestosql.spi.statestore.StateSet in project hetu-core by openlookeng.

the class DynamicFilterService method mergeDynamicFilters.

/**
 * Global Dynamic Filter merging, periodically looks for dynamic filters that can be merged and merges them
 */
private void mergeDynamicFilters() {
    final StateStore stateStore = stateStoreProvider.getStateStore();
    for (Map.Entry<String, Map<String, DynamicFilterRegistryInfo>> queryToDynamicFiltersEntry : dynamicFilters.entrySet()) {
        final String queryId = queryToDynamicFiltersEntry.getKey();
        if (!cachedDynamicFilters.containsKey(queryId)) {
            cachedDynamicFilters.put(queryId, new ConcurrentHashMap<>());
        }
        Map<String, DynamicFilter> cachedDynamicFiltersForQuery = cachedDynamicFilters.get(queryId);
        StateMap mergedDynamicFilters = (StateMap) stateStore.getOrCreateStateCollection(DynamicFilterUtils.MERGED_DYNAMIC_FILTERS, MAP);
        for (Map.Entry<String, DynamicFilterRegistryInfo> columnToDynamicFilterEntry : queryToDynamicFiltersEntry.getValue().entrySet()) {
            if (columnToDynamicFilterEntry.getValue().isMerged()) {
                continue;
            }
            final String filterId = columnToDynamicFilterEntry.getKey();
            final Type filterType = columnToDynamicFilterEntry.getValue().getType();
            final DataType filterDataType = columnToDynamicFilterEntry.getValue().getDataType();
            final Optional<Predicate<List>> dfFilter = columnToDynamicFilterEntry.getValue().getFilter();
            final Symbol column = columnToDynamicFilterEntry.getValue().getSymbol();
            final String filterKey = createKey(DynamicFilterUtils.FILTERPREFIX, filterId, queryId);
            if (!hasMergeCondition(filterId, queryId)) {
                continue;
            }
            Collection<Object> results = ((StateSet) stateStore.getStateCollection(createKey(DynamicFilterUtils.PARTIALPREFIX, filterId, queryId))).getAll();
            try {
                DynamicFilter mergedFilter;
                if (filterDataType == BLOOM_FILTER) {
                    BloomFilter mergedBloomFilter = mergeBloomFilters(results);
                    if (mergedBloomFilter.expectedFpp() > DynamicFilterUtils.BLOOM_FILTER_EXPECTED_FPP) {
                        throw new PrestoException(GENERIC_INTERNAL_ERROR, "FPP too high: " + mergedBloomFilter.approximateElementCount());
                    }
                    mergedFilter = new BloomFilterDynamicFilter(filterKey, null, mergedBloomFilter, filterType);
                    if (filterType == GLOBAL) {
                        try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
                            mergedBloomFilter.writeTo(out);
                            byte[] filter = out.toByteArray();
                            mergedDynamicFilters.put(filterKey, filter);
                        }
                    }
                } else if (filterDataType == HASHSET) {
                    Set mergedSet = mergeHashSets(results);
                    mergedFilter = DynamicFilterFactory.create(filterKey, null, mergedSet, filterType, dfFilter, Optional.empty());
                    if (filterType == GLOBAL) {
                        mergedDynamicFilters.put(filterKey, mergedSet);
                    }
                } else {
                    throw new PrestoException(GENERIC_INTERNAL_ERROR, "Unsupported filter data type: " + filterDataType);
                }
                log.debug("Merged successfully dynamic filter id: " + filterId + "-" + queryId + " type: " + filterDataType + ", column: " + column + ", item count: " + mergedFilter.getSize());
                cachedDynamicFiltersForQuery.put(filterId, mergedFilter);
            } catch (IOException | PrestoException e) {
                log.warn("Could not merge dynamic filter: " + e.getLocalizedMessage());
            } finally {
                // for each dynamic filter we only try to merge it once
                columnToDynamicFilterEntry.getValue().setMerged();
            }
        }
    }
}
Also used : ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) StateSet(io.prestosql.spi.statestore.StateSet) CopyOnWriteArraySet(java.util.concurrent.CopyOnWriteArraySet) HashSet(java.util.HashSet) Symbol(io.prestosql.spi.plan.Symbol) StateMap(io.prestosql.spi.statestore.StateMap) PrestoException(io.prestosql.spi.PrestoException) BloomFilter(io.prestosql.spi.util.BloomFilter) Predicate(java.util.function.Predicate) SystemSessionProperties.getDynamicFilteringDataType(io.prestosql.SystemSessionProperties.getDynamicFilteringDataType) DynamicFilterUtils.getDynamicFilterDataType(io.prestosql.utils.DynamicFilterUtils.getDynamicFilterDataType) DataType(io.prestosql.spi.dynamicfilter.DynamicFilter.DataType) StateSet(io.prestosql.spi.statestore.StateSet) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) StateStore(io.prestosql.spi.statestore.StateStore) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) SystemSessionProperties.getDynamicFilteringDataType(io.prestosql.SystemSessionProperties.getDynamicFilteringDataType) DynamicFilterUtils.getDynamicFilterDataType(io.prestosql.utils.DynamicFilterUtils.getDynamicFilterDataType) Type(io.prestosql.spi.dynamicfilter.DynamicFilter.Type) DataType(io.prestosql.spi.dynamicfilter.DynamicFilter.DataType) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) StateMap(io.prestosql.spi.statestore.StateMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 7 with StateSet

use of io.prestosql.spi.statestore.StateSet in project hetu-core by openlookeng.

the class LocalDynamicFilter method addPartialFilterToStateStore.

private void addPartialFilterToStateStore() {
    StateStore stateStore = stateStoreProvider.getStateStore();
    if (stateStore == null) {
        return;
    }
    DynamicFilter.DataType dataType = getDynamicFilterDataType(type, dynamicFilterDataType);
    for (Map.Entry<String, Set> filter : result.entrySet()) {
        DynamicFilterSourceOperator.Channel channel = channels.get(filter.getKey());
        Set filterValues = filter.getValue();
        String filterId = channel.getFilterId();
        String key = createKey(PARTIALPREFIX, filterId, channel.getQueryId());
        if (dataType == BLOOM_FILTER) {
            byte[] finalOutput = convertBloomFilterToByteArray(createBloomFilterFromSet(channel, filterValues, bloomFilterFpp));
            if (finalOutput != null) {
                ((StateSet) stateStore.getOrCreateStateCollection(key, SET)).add(finalOutput);
            }
        } else {
            ((StateSet) stateStore.getOrCreateStateCollection(key, SET)).add(filterValues);
        }
        ((StateSet) stateStore.getOrCreateStateCollection(createKey(TASKSPREFIX, filterId, channel.getQueryId()), SET)).add(taskId.toString());
        log.debug("creating new " + dataType + " dynamic filter for size of: " + result.size() + ", key: " + key + ", taskId: " + taskId);
    }
}
Also used : Set(java.util.Set) StateSet(io.prestosql.spi.statestore.StateSet) HashSet(java.util.HashSet) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) StateStore(io.prestosql.spi.statestore.StateStore) Collectors.toMap(java.util.stream.Collectors.toMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) DynamicFilterSourceOperator(io.prestosql.operator.DynamicFilterSourceOperator) StateSet(io.prestosql.spi.statestore.StateSet)

Example 8 with StateSet

use of io.prestosql.spi.statestore.StateSet in project hetu-core by openlookeng.

the class TestDynamicFilterServiceWithHashSet method mockLocalDynamicFilterHashSet.

private void mockLocalDynamicFilterHashSet(String taskId, String filterId, String queryId, List<String> values) {
    HashSet filter = new HashSet();
    for (String val : values) {
        filter.add(val);
    }
    String key = DynamicFilterUtils.createKey(DynamicFilterUtils.PARTIALPREFIX, filterId, queryId);
    try {
        ((StateSet) stateStoreProvider.getStateStore().getStateCollection(key)).add(filter);
        ((StateSet) stateStoreProvider.getStateStore().getStateCollection(DynamicFilterUtils.createKey(DynamicFilterUtils.TASKSPREFIX, filterId, queryId))).add(taskId);
    } catch (Exception e) {
        Assert.fail("could not register finish filter, Exception happened:" + e.getMessage());
    }
}
Also used : StateSet(io.prestosql.spi.statestore.StateSet) HashSet(java.util.HashSet)

Example 9 with StateSet

use of io.prestosql.spi.statestore.StateSet in project hetu-core by openlookeng.

the class TestDynamicFilterSourceOperator method testGlobalDynamicFilterSourceOperatorBloomFilter.

@Test
public void testGlobalDynamicFilterSourceOperatorBloomFilter() {
    String filterId = "99";
    DynamicFilterSourceOperatorFactory operatorFactory = createOperatorFactory(GLOBAL, BLOOM_FILTER, 1, channel(0, BIGINT, filterId));
    // will finish before noMoreOperators()
    DynamicFilterSourceOperator op1 = createOperator(operatorFactory);
    verifyPassthrough(op1, ImmutableList.of(BIGINT), new Page(createLongsBlock(1, 2)), new Page(createLongsBlock(99, 101)), new Page(createLongsBlock(3, 5)));
    String key = DynamicFilterUtils.createKey(PARTIALPREFIX, filterId, TEST_SESSION.getQueryId().toString());
    StateSet states = ((StateSet) stateStoreProvider.getStateStore().getStateCollection(key));
    for (Object bfSerialized : states.getAll()) {
        BloomFilterDynamicFilter bfdf = new BloomFilterDynamicFilter(filterId, null, (byte[]) bfSerialized, GLOBAL);
        assertTrue(bfdf.contains(101L));
        assertEquals(bfdf.getSize(), 6);
    }
    assertEquals((stateStoreProvider.getStateStore().getStateCollection(DynamicFilterUtils.createKey(TASKSPREFIX, filterId, TEST_SESSION.getQueryId().toString()))).size(), 1);
}
Also used : DynamicFilterSourceOperatorFactory(io.prestosql.operator.DynamicFilterSourceOperator.DynamicFilterSourceOperatorFactory) SequencePageBuilder.createSequencePage(io.prestosql.SequencePageBuilder.createSequencePage) Page(io.prestosql.spi.Page) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) StateSet(io.prestosql.spi.statestore.StateSet) Test(org.testng.annotations.Test) AfterTest(org.testng.annotations.AfterTest) BeforeTest(org.testng.annotations.BeforeTest)

Example 10 with StateSet

use of io.prestosql.spi.statestore.StateSet in project hetu-core by openlookeng.

the class TestDynamicFilterSourceOperator method testCollectOnlyFirstColumn.

@Test
public void testCollectOnlyFirstColumn() {
    String filterId = "first_column";
    OperatorFactory operatorFactory = createOperatorFactory(LOCAL, HASHSET, 1, channel(0, BOOLEAN, filterId));
    verifyPassthrough(createOperator((DynamicFilterSourceOperatorFactory) operatorFactory), ImmutableList.of(BOOLEAN, DOUBLE), new Page(createBooleansBlock(true, 2), createDoublesBlock(1.5, 3.0)), new Page(createBooleansBlock(false, 1), createDoublesBlock(4.5)));
    operatorFactory.noMoreOperators();
    String key = DynamicFilterUtils.createKey(PARTIALPREFIX, filterId, TEST_SESSION.getQueryId().toString());
    Set<Boolean> set = new HashSet<>(Arrays.asList(true, false));
    StateSet states = ((StateSet) stateStoreProvider.getStateStore().getStateCollection(key));
    for (Object bfSerialized : states.getAll()) {
        assertEquals((Set) bfSerialized, set);
    }
}
Also used : DynamicFilterSourceOperatorFactory(io.prestosql.operator.DynamicFilterSourceOperator.DynamicFilterSourceOperatorFactory) DynamicFilterSourceOperatorFactory(io.prestosql.operator.DynamicFilterSourceOperator.DynamicFilterSourceOperatorFactory) SequencePageBuilder.createSequencePage(io.prestosql.SequencePageBuilder.createSequencePage) Page(io.prestosql.spi.Page) StateSet(io.prestosql.spi.statestore.StateSet) HashSet(java.util.HashSet) Test(org.testng.annotations.Test) AfterTest(org.testng.annotations.AfterTest) BeforeTest(org.testng.annotations.BeforeTest)

Aggregations

StateSet (io.prestosql.spi.statestore.StateSet)15 DynamicFilterSourceOperatorFactory (io.prestosql.operator.DynamicFilterSourceOperator.DynamicFilterSourceOperatorFactory)10 HashSet (java.util.HashSet)10 AfterTest (org.testng.annotations.AfterTest)10 BeforeTest (org.testng.annotations.BeforeTest)10 Test (org.testng.annotations.Test)10 SequencePageBuilder.createSequencePage (io.prestosql.SequencePageBuilder.createSequencePage)9 Page (io.prestosql.spi.Page)9 BloomFilterDynamicFilter (io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter)4 StateStore (io.prestosql.spi.statestore.StateStore)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 DynamicFilter (io.prestosql.spi.dynamicfilter.DynamicFilter)2 StateMap (io.prestosql.spi.statestore.StateMap)2 BloomFilter (io.prestosql.spi.util.BloomFilter)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 Set (java.util.Set)2 ImmutableSet (com.google.common.collect.ImmutableSet)1