Search in sources :

Example 16 with StateMap

use of io.prestosql.spi.statestore.StateMap in project hetu-core by openlookeng.

the class DynamicFilterService method mergeDynamicFilters.

/**
 * Global Dynamic Filter merging, periodically looks for dynamic filters that can be merged and merges them
 */
private void mergeDynamicFilters() {
    final StateStore stateStore = stateStoreProvider.getStateStore();
    for (Map.Entry<String, Map<String, DynamicFilterRegistryInfo>> queryToDynamicFiltersEntry : dynamicFilters.entrySet()) {
        final String queryId = queryToDynamicFiltersEntry.getKey();
        if (!cachedDynamicFilters.containsKey(queryId)) {
            cachedDynamicFilters.put(queryId, new ConcurrentHashMap<>());
        }
        Map<String, DynamicFilter> cachedDynamicFiltersForQuery = cachedDynamicFilters.get(queryId);
        StateMap mergedDynamicFilters = (StateMap) stateStore.getOrCreateStateCollection(DynamicFilterUtils.MERGED_DYNAMIC_FILTERS, MAP);
        for (Map.Entry<String, DynamicFilterRegistryInfo> columnToDynamicFilterEntry : queryToDynamicFiltersEntry.getValue().entrySet()) {
            if (columnToDynamicFilterEntry.getValue().isMerged()) {
                continue;
            }
            final String filterId = columnToDynamicFilterEntry.getKey();
            final Type filterType = columnToDynamicFilterEntry.getValue().getType();
            final DataType filterDataType = columnToDynamicFilterEntry.getValue().getDataType();
            final Optional<Predicate<List>> dfFilter = columnToDynamicFilterEntry.getValue().getFilter();
            final Symbol column = columnToDynamicFilterEntry.getValue().getSymbol();
            final String filterKey = createKey(DynamicFilterUtils.FILTERPREFIX, filterId, queryId);
            if (!hasMergeCondition(filterId, queryId)) {
                continue;
            }
            Collection<Object> results = ((StateSet) stateStore.getStateCollection(createKey(DynamicFilterUtils.PARTIALPREFIX, filterId, queryId))).getAll();
            try {
                DynamicFilter mergedFilter;
                if (filterDataType == BLOOM_FILTER) {
                    BloomFilter mergedBloomFilter = mergeBloomFilters(results);
                    if (mergedBloomFilter.expectedFpp() > DynamicFilterUtils.BLOOM_FILTER_EXPECTED_FPP) {
                        throw new PrestoException(GENERIC_INTERNAL_ERROR, "FPP too high: " + mergedBloomFilter.approximateElementCount());
                    }
                    mergedFilter = new BloomFilterDynamicFilter(filterKey, null, mergedBloomFilter, filterType);
                    if (filterType == GLOBAL) {
                        try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
                            mergedBloomFilter.writeTo(out);
                            byte[] filter = out.toByteArray();
                            mergedDynamicFilters.put(filterKey, filter);
                        }
                    }
                } else if (filterDataType == HASHSET) {
                    Set mergedSet = mergeHashSets(results);
                    mergedFilter = DynamicFilterFactory.create(filterKey, null, mergedSet, filterType, dfFilter, Optional.empty());
                    if (filterType == GLOBAL) {
                        mergedDynamicFilters.put(filterKey, mergedSet);
                    }
                } else {
                    throw new PrestoException(GENERIC_INTERNAL_ERROR, "Unsupported filter data type: " + filterDataType);
                }
                log.debug("Merged successfully dynamic filter id: " + filterId + "-" + queryId + " type: " + filterDataType + ", column: " + column + ", item count: " + mergedFilter.getSize());
                cachedDynamicFiltersForQuery.put(filterId, mergedFilter);
            } catch (IOException | PrestoException e) {
                log.warn("Could not merge dynamic filter: " + e.getLocalizedMessage());
            } finally {
                // for each dynamic filter we only try to merge it once
                columnToDynamicFilterEntry.getValue().setMerged();
            }
        }
    }
}
Also used : ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) StateSet(io.prestosql.spi.statestore.StateSet) CopyOnWriteArraySet(java.util.concurrent.CopyOnWriteArraySet) HashSet(java.util.HashSet) Symbol(io.prestosql.spi.plan.Symbol) StateMap(io.prestosql.spi.statestore.StateMap) PrestoException(io.prestosql.spi.PrestoException) BloomFilter(io.prestosql.spi.util.BloomFilter) Predicate(java.util.function.Predicate) SystemSessionProperties.getDynamicFilteringDataType(io.prestosql.SystemSessionProperties.getDynamicFilteringDataType) DynamicFilterUtils.getDynamicFilterDataType(io.prestosql.utils.DynamicFilterUtils.getDynamicFilterDataType) DataType(io.prestosql.spi.dynamicfilter.DynamicFilter.DataType) StateSet(io.prestosql.spi.statestore.StateSet) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) StateStore(io.prestosql.spi.statestore.StateStore) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) SystemSessionProperties.getDynamicFilteringDataType(io.prestosql.SystemSessionProperties.getDynamicFilteringDataType) DynamicFilterUtils.getDynamicFilterDataType(io.prestosql.utils.DynamicFilterUtils.getDynamicFilterDataType) Type(io.prestosql.spi.dynamicfilter.DynamicFilter.Type) DataType(io.prestosql.spi.dynamicfilter.DynamicFilter.DataType) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) StateMap(io.prestosql.spi.statestore.StateMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 17 with StateMap

use of io.prestosql.spi.statestore.StateMap in project hetu-core by openlookeng.

the class DynamicFilterService method removeFinishedQuery.

private void removeFinishedQuery() {
    List<String> handledQuery = new ArrayList<>();
    StateStore stateStore = stateStoreProvider.getStateStore();
    StateMap mergedStateCollection = (StateMap) stateStore.getOrCreateStateCollection(DynamicFilterUtils.MERGED_DYNAMIC_FILTERS, MAP);
    // Clear registered dynamic filter tasks
    synchronized (finishedQuery) {
        for (String queryId : finishedQuery) {
            Map<String, DynamicFilterRegistryInfo> filters = dynamicFilters.get(queryId);
            if (filters != null) {
                for (Entry<String, DynamicFilterRegistryInfo> entry : filters.entrySet()) {
                    String filterId = entry.getKey();
                    clearPartialResults(filterId, queryId);
                    if (entry.getValue().isMerged()) {
                        String filterKey = createKey(DynamicFilterUtils.FILTERPREFIX, filterId, queryId);
                        mergedStateCollection.remove(filterKey);
                    }
                }
            }
            List<String> collectionKeys = stateStore.getStateCollections().keySet().stream().filter(key -> key.contains(queryId)).collect(Collectors.toList());
            for (String key : collectionKeys) {
                clearStatesInStateStore(stateStore, key);
            }
            dynamicFilters.remove(queryId);
            cachedDynamicFilters.remove(queryId);
            handledQuery.add(queryId);
        }
        finishedQuery.removeAll(handledQuery);
    }
}
Also used : ScheduledFuture(java.util.concurrent.ScheduledFuture) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) Inject(com.google.inject.Inject) StateStore(io.prestosql.spi.statestore.StateStore) DynamicFilterUtils.findFilterNodeInStage(io.prestosql.utils.DynamicFilterUtils.findFilterNodeInStage) CallExpression(io.prestosql.spi.relation.CallExpression) PreDestroy(javax.annotation.PreDestroy) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) FilterNode(io.prestosql.spi.plan.FilterNode) ByteArrayInputStream(java.io.ByteArrayInputStream) Map(java.util.Map) SystemSessionProperties.getDynamicFilteringDataType(io.prestosql.SystemSessionProperties.getDynamicFilteringDataType) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) StateMap(io.prestosql.spi.statestore.StateMap) Predicate(java.util.function.Predicate) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) DynamicFilters(io.prestosql.sql.DynamicFilters) PlanNode(io.prestosql.spi.plan.PlanNode) StateSet(io.prestosql.spi.statestore.StateSet) CopyOnWriteArraySet(java.util.concurrent.CopyOnWriteArraySet) Threads.threadsNamed(io.airlift.concurrent.Threads.threadsNamed) Collectors(java.util.stream.Collectors) DynamicFilterUtils.getDynamicFilterDataType(io.prestosql.utils.DynamicFilterUtils.getDynamicFilterDataType) Executors(java.util.concurrent.Executors) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Type(io.prestosql.spi.dynamicfilter.DynamicFilter.Type) List(java.util.List) PostConstruct(javax.annotation.PostConstruct) Entry(java.util.Map.Entry) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) Optional(java.util.Optional) LOCAL(io.prestosql.spi.dynamicfilter.DynamicFilter.Type.LOCAL) TaskId(io.prestosql.execution.TaskId) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Logger(io.airlift.log.Logger) HASHSET(io.prestosql.spi.dynamicfilter.DynamicFilter.DataType.HASHSET) HashMap(java.util.HashMap) Supplier(java.util.function.Supplier) SemiJoinNode(io.prestosql.sql.planner.plan.SemiJoinNode) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BloomFilter(io.prestosql.spi.util.BloomFilter) DataType(io.prestosql.spi.dynamicfilter.DynamicFilter.DataType) Objects.requireNonNull(java.util.Objects.requireNonNull) Session(io.prestosql.Session) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) QueryId(io.prestosql.spi.QueryId) JoinNode(io.prestosql.spi.plan.JoinNode) Symbol(io.prestosql.spi.plan.Symbol) StageStateMachine(io.prestosql.execution.StageStateMachine) StateCollection(io.prestosql.spi.statestore.StateCollection) InternalNode(io.prestosql.metadata.InternalNode) IOException(java.io.IOException) BLOOM_FILTER(io.prestosql.spi.dynamicfilter.DynamicFilter.DataType.BLOOM_FILTER) VariableReferenceExpression(io.prestosql.spi.relation.VariableReferenceExpression) TimeUnit(java.util.concurrent.TimeUnit) GLOBAL(io.prestosql.spi.dynamicfilter.DynamicFilter.Type.GLOBAL) DynamicFilterUtils(io.prestosql.utils.DynamicFilterUtils) DynamicFilterUtils.createKey(io.prestosql.utils.DynamicFilterUtils.createKey) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) StateStoreProvider(io.prestosql.statestore.StateStoreProvider) RowExpression(io.prestosql.spi.relation.RowExpression) MAP(io.prestosql.spi.statestore.StateCollection.Type.MAP) DynamicFilterFactory(io.prestosql.spi.dynamicfilter.DynamicFilterFactory) Collections(java.util.Collections) SET(io.prestosql.spi.statestore.StateCollection.Type.SET) StateMap(io.prestosql.spi.statestore.StateMap) ArrayList(java.util.ArrayList) StateStore(io.prestosql.spi.statestore.StateStore)

Example 18 with StateMap

use of io.prestosql.spi.statestore.StateMap in project hetu-core by openlookeng.

the class SplitCacheStateInitializer method updateLocal.

private void updateLocal() {
    StateStore stateStore = provider.getStateStore();
    if (stateStore == null) {
        log.debug("State store not yet initialized. Will retry after %s milli seconds until %s", delay.toMillis(), timeout.toString(TimeUnit.SECONDS));
        return;
    }
    if (initializationState.get() == InitializationStatus.COMPLETED) {
        log.debug("Split cache map already initialized.");
        return;
    }
    // create state store collection
    if (stateStore.getStateCollection(StateStoreConstants.SPLIT_CACHE_METADATA_NAME) == null) {
        stateStore.createStateMap(StateStoreConstants.SPLIT_CACHE_METADATA_NAME, new SplitCacheStateStoreChangesListener(SplitCacheMap.getInstance(), mapper));
    }
    StateMap<String, String> stateMap = (StateMap<String, String>) stateStore.getStateCollection(StateStoreConstants.SPLIT_CACHE_METADATA_NAME);
    stateMap.getAll().forEach((fqTableName, stateJson) -> {
        try {
            TableCacheInfo cacheInfo = mapper.readerFor(TableCacheInfo.class).readValue(stateJson);
            log.info("Retrieving cache info for table %s from state store and updating on local copy.", fqTableName);
            splitCacheMap.setTableCacheInfo(fqTableName, cacheInfo);
        } catch (Exception e) {
            log.error(e, "Unable to update local split cache map from state store.");
        }
    });
    initializationState.set(InitializationStatus.COMPLETED);
}
Also used : StateMap(io.prestosql.spi.statestore.StateMap) StateStore(io.prestosql.spi.statestore.StateStore)

Example 19 with StateMap

use of io.prestosql.spi.statestore.StateMap in project hetu-core by openlookeng.

the class QueryTracker method removeQueryInStateStore.

private void removeQueryInStateStore(QueryId queryId) {
    StateStore stateStore = stateStoreProvider.getStateStore();
    if (stateStore == null) {
        return;
    }
    StateCollection stateCollection = stateStore.getStateCollection(StateStoreConstants.FINISHED_QUERY_STATE_COLLECTION_NAME);
    if (stateCollection != null && stateCollection.getType().equals(StateCollection.Type.MAP)) {
        ((StateMap<String, String>) stateCollection).remove(queryId.getId());
    }
}
Also used : StateCollection(io.prestosql.spi.statestore.StateCollection) StateMap(io.prestosql.spi.statestore.StateMap) StateStore(io.prestosql.spi.statestore.StateStore)

Example 20 with StateMap

use of io.prestosql.spi.statestore.StateMap in project hetu-core by openlookeng.

the class BloomFilterUtils method updateBloomFilter.

/**
 * update bloom filter from dynamicFilterCacheManger,
 * and if the table is a dc table, should create a new bloomFilter for the dc table, and put the new filter into state-store
 *
 * @param queryIdOptional queryId
 * @param isDcTable is this table a dc connector table
 * @param stateStoreProviderOptional stateStoreProvider
 * @param tableScanNodeOptional tableScanNode
 * @param dynamicFilterCacheManagerOptional dynamicFilterCacheManager
 * @param bloomFiltersBackup bloom filters backup, to check whether the bloom filter in stateStore has been updated
 * @param bloomFilters bloom filters
 */
public static void updateBloomFilter(Optional<QueryId> queryIdOptional, boolean isDcTable, Optional<StateStoreProvider> stateStoreProviderOptional, Optional<TableScanNode> tableScanNodeOptional, Optional<DynamicFilterCacheManager> dynamicFilterCacheManagerOptional, Map<String, byte[]> bloomFiltersBackup, Map<Integer, BloomFilter> bloomFilters) {
    if (!queryIdOptional.isPresent() || !stateStoreProviderOptional.isPresent() || !tableScanNodeOptional.isPresent() || !dynamicFilterCacheManagerOptional.isPresent()) {
        return;
    }
    Map<String, Set<String>> mapping = dynamicFilterCacheManagerOptional.get().getMapping(queryIdOptional.get().getId() + QUERY_COLUMN_NAME_TO_SYMBOL_MAPPING);
    Map<String, byte[]> bloomFiltersFromStateStoreCache = dynamicFilterCacheManagerOptional.get().getBloomFitler(queryIdOptional.get().getId() + CROSS_REGION_DYNAMIC_FILTER_COLLECTION);
    for (Map.Entry<String, byte[]> entry : bloomFiltersFromStateStoreCache.entrySet()) {
        // check the bloom filter exist in backup
        if (bloomFiltersBackup.keySet().contains(entry.getKey())) {
            // maybe the bloom filter has been merged, so have to check it
            if (Arrays.equals(bloomFiltersBackup.get(entry.getKey()), entry.getValue())) {
                continue;
            }
        }
        bloomFiltersBackup.put(entry.getKey(), Arrays.copyOf(entry.getValue(), entry.getValue().length));
        int index = -1;
        // query statement column -> outputs symbols
        Set<String> columnToSymbols = mapping.get(entry.getKey());
        if (columnToSymbols != null && columnToSymbols.size() > 0) {
            List<Symbol> outputSymbols = tableScanNodeOptional.get().getOutputSymbols();
            for (int i = 0; i < outputSymbols.size(); i++) {
                // find the position of the output symbol
                if (columnToSymbols.contains(outputSymbols.get(i).getName())) {
                    index = i;
                    break;
                }
            }
        }
        if (index < 0) {
            continue;
        }
        // put the bloom filter into bloomFilters
        try (ByteArrayInputStream input = new ByteArrayInputStream(bloomFiltersBackup.get(entry.getKey()))) {
            bloomFilters.put(index, BloomFilter.readFrom(input));
        } catch (IOException e) {
            // ignore the bloomFilter if broken
            LOGGER.warn("queryId:%s, update BloomFilter error, cause : %s", queryIdOptional.get(), e.getMessage());
        }
        // if this tableScanNode.table is a dc connector table, we should push the bloomFilter to next cluster
        if (isDcTable) {
            // filter keyName is DataCenterColumnHandle.getName(), filter value is entry.getValue()
            ColumnHandle columnHandle = tableScanNodeOptional.get().getAssignments().get(tableScanNodeOptional.get().getOutputSymbols().get(index));
            StateMap<String, Map<String, byte[]>> crossRegionDynamicFilters = (StateMap<String, Map<String, byte[]>>) stateStoreProviderOptional.get().getStateStore().getOrCreateStateCollection(CROSS_REGION_DYNAMIC_FILTERS, StateCollection.Type.MAP);
            Map<String, byte[]> newBloomFilterForNextDCCluster = dynamicFilterCacheManagerOptional.get().getBloomFitler(queryIdOptional.get().getId() + CROSS_LAYER_DYNAMIC_FILTER);
            if (newBloomFilterForNextDCCluster == null) {
                newBloomFilterForNextDCCluster = new HashMap<>();
            }
            newBloomFilterForNextDCCluster.put(columnHandle.getColumnName(), entry.getValue());
            crossRegionDynamicFilters.put(queryIdOptional.get().getId() + CROSS_LAYER_DYNAMIC_FILTER, newBloomFilterForNextDCCluster);
        }
    }
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) HashSet(java.util.HashSet) Set(java.util.Set) Symbol(io.prestosql.spi.plan.Symbol) StateMap(io.prestosql.spi.statestore.StateMap) IOException(java.io.IOException) ByteArrayInputStream(java.io.ByteArrayInputStream) HashMap(java.util.HashMap) Map(java.util.Map) StateMap(io.prestosql.spi.statestore.StateMap)

Aggregations

StateMap (io.prestosql.spi.statestore.StateMap)27 StateStore (io.prestosql.spi.statestore.StateStore)16 HashMap (java.util.HashMap)11 Map (java.util.Map)10 StateCollection (io.prestosql.spi.statestore.StateCollection)8 Test (org.testng.annotations.Test)8 ImmutableSet (com.google.common.collect.ImmutableSet)5 Symbol (io.prestosql.spi.plan.Symbol)5 IOException (java.io.IOException)5 ImmutableMap (com.google.common.collect.ImmutableMap)4 HashSet (java.util.HashSet)4 Set (java.util.Set)4 PrestoException (io.prestosql.spi.PrestoException)3 DynamicFilter (io.prestosql.spi.dynamicfilter.DynamicFilter)3 StateSet (io.prestosql.spi.statestore.StateSet)3 SimpleDateFormat (java.text.SimpleDateFormat)3 Date (java.util.Date)3 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)3 TimeUnit (java.util.concurrent.TimeUnit)3 Collectors (java.util.stream.Collectors)3