Search in sources :

Example 1 with BloomFilterDynamicFilter

use of io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter in project hetu-core by openlookeng.

the class TestHiveUtil method testIsPartitionFiltered.

@Test
public void testIsPartitionFiltered() {
    TypeManager typeManager = new TestingTypeManager();
    assertFalse(isPartitionFiltered(null, null, typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
    Set<DynamicFilter> dynamicFilters = new HashSet<>();
    List<HivePartitionKey> partitions = new ArrayList<>();
    assertFalse(isPartitionFiltered(partitions, null, typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
    assertFalse(isPartitionFiltered(null, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
    assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if partitions and dynamicFilters are empty");
    partitions.add(new HivePartitionKey("pt_d", "0"));
    partitions.add(new HivePartitionKey("app_id", "10000"));
    assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if dynamicFilters is empty");
    ColumnHandle dayColumn = new HiveColumnHandle("pt_d", HIVE_LONG, parseTypeSignature(BIGINT), 0, PARTITION_KEY, Optional.empty());
    BloomFilter dayFilter = new BloomFilter(1024 * 1024, 0.01);
    dynamicFilters.add(new BloomFilterDynamicFilter("1", dayColumn, dayFilter, DynamicFilter.Type.GLOBAL));
    assertTrue(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should filter partition if any dynamicFilter has 0 element count");
    dayFilter.add(1L);
    assertTrue(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should filter partition if partition value not in dynamicFilter");
    dayFilter.add(0L);
    assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if partition value is in dynamicFilter");
    Set<DynamicFilter> dynamicFilters1 = new HashSet<>();
    BloomFilter dayFilter1 = new BloomFilter(1024 * 1024, 0.01);
    dynamicFilters1.add(new BloomFilterDynamicFilter("1", dayColumn, dayFilter1, DynamicFilter.Type.GLOBAL));
    dayFilter1.add(0L);
    assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters1), typeManager), "Should not filter partition if partition value is in dynamicFilter");
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) HashSetDynamicFilter(io.prestosql.spi.dynamicfilter.HashSetDynamicFilter) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) ArrayList(java.util.ArrayList) TestingTypeManager(io.prestosql.spi.type.TestingTypeManager) TypeManager(io.prestosql.spi.type.TypeManager) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) TestingTypeManager(io.prestosql.spi.type.TestingTypeManager) BloomFilter(io.prestosql.spi.util.BloomFilter) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Example 2 with BloomFilterDynamicFilter

use of io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter in project hetu-core by openlookeng.

the class HivePageSource method filterRows.

private static boolean[] filterRows(List<Map<ColumnHandle, DynamicFilter>> dynamicFilters, Page page, List<Map<Integer, ColumnHandle>> eligibleColumns, Type[] types) {
    boolean[] result = new boolean[page.getPositionCount()];
    Arrays.fill(result, Boolean.FALSE);
    // loop to handle union of filters if any
    for (int j = 0; j < dynamicFilters.size(); j++) {
        boolean[] filterResult = new boolean[page.getPositionCount()];
        Arrays.fill(filterResult, Boolean.TRUE);
        for (Map.Entry<Integer, ColumnHandle> column : eligibleColumns.get(j).entrySet()) {
            final int columnIndex = column.getKey();
            final ColumnHandle columnHandle = column.getValue();
            final DynamicFilter dynamicFilter = dynamicFilters.get(j).get(columnHandle);
            final Block block = page.getBlock(columnIndex).getLoadedBlock();
            if (dynamicFilter instanceof BloomFilterDynamicFilter) {
                block.filter(((BloomFilterDynamicFilter) dynamicFilters.get(j).get(columnHandle)).getBloomFilterDeserialized(), filterResult);
            } else {
                for (int i = 0; i < block.getPositionCount(); i++) {
                    filterResult[i] = filterResult[i] && dynamicFilter.contains(TypeUtils.readNativeValue(types[columnIndex], block, i));
                }
            }
        }
        // apply union of last filter
        for (Map.Entry<Integer, ColumnHandle> column : eligibleColumns.get(j).entrySet()) {
            final int columnIndex = column.getKey();
            final Block block = page.getBlock(columnIndex).getLoadedBlock();
            for (int i = 0; i < block.getPositionCount(); i++) {
                result[i] = result[i] || filterResult[i];
            }
        }
    }
    return result;
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) RunLengthEncodedBlock(io.prestosql.spi.block.RunLengthEncodedBlock) Block(io.prestosql.spi.block.Block) LazyBlock(io.prestosql.spi.block.LazyBlock) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) HashMap(java.util.HashMap) Map(java.util.Map)

Example 3 with BloomFilterDynamicFilter

use of io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter in project hetu-core by openlookeng.

the class DynamicFilterService method mergeDynamicFilters.

/**
 * Global Dynamic Filter merging, periodically looks for dynamic filters that can be merged and merges them
 */
private void mergeDynamicFilters() {
    final StateStore stateStore = stateStoreProvider.getStateStore();
    for (Map.Entry<String, Map<String, DynamicFilterRegistryInfo>> queryToDynamicFiltersEntry : dynamicFilters.entrySet()) {
        final String queryId = queryToDynamicFiltersEntry.getKey();
        if (!cachedDynamicFilters.containsKey(queryId)) {
            cachedDynamicFilters.put(queryId, new ConcurrentHashMap<>());
        }
        Map<String, DynamicFilter> cachedDynamicFiltersForQuery = cachedDynamicFilters.get(queryId);
        StateMap mergedDynamicFilters = (StateMap) stateStore.getOrCreateStateCollection(DynamicFilterUtils.MERGED_DYNAMIC_FILTERS, MAP);
        for (Map.Entry<String, DynamicFilterRegistryInfo> columnToDynamicFilterEntry : queryToDynamicFiltersEntry.getValue().entrySet()) {
            if (columnToDynamicFilterEntry.getValue().isMerged()) {
                continue;
            }
            final String filterId = columnToDynamicFilterEntry.getKey();
            final Type filterType = columnToDynamicFilterEntry.getValue().getType();
            final DataType filterDataType = columnToDynamicFilterEntry.getValue().getDataType();
            final Optional<Predicate<List>> dfFilter = columnToDynamicFilterEntry.getValue().getFilter();
            final Symbol column = columnToDynamicFilterEntry.getValue().getSymbol();
            final String filterKey = createKey(DynamicFilterUtils.FILTERPREFIX, filterId, queryId);
            if (!hasMergeCondition(filterId, queryId)) {
                continue;
            }
            Collection<Object> results = ((StateSet) stateStore.getStateCollection(createKey(DynamicFilterUtils.PARTIALPREFIX, filterId, queryId))).getAll();
            try {
                DynamicFilter mergedFilter;
                if (filterDataType == BLOOM_FILTER) {
                    BloomFilter mergedBloomFilter = mergeBloomFilters(results);
                    if (mergedBloomFilter.expectedFpp() > DynamicFilterUtils.BLOOM_FILTER_EXPECTED_FPP) {
                        throw new PrestoException(GENERIC_INTERNAL_ERROR, "FPP too high: " + mergedBloomFilter.approximateElementCount());
                    }
                    mergedFilter = new BloomFilterDynamicFilter(filterKey, null, mergedBloomFilter, filterType);
                    if (filterType == GLOBAL) {
                        try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
                            mergedBloomFilter.writeTo(out);
                            byte[] filter = out.toByteArray();
                            mergedDynamicFilters.put(filterKey, filter);
                        }
                    }
                } else if (filterDataType == HASHSET) {
                    Set mergedSet = mergeHashSets(results);
                    mergedFilter = DynamicFilterFactory.create(filterKey, null, mergedSet, filterType, dfFilter, Optional.empty());
                    if (filterType == GLOBAL) {
                        mergedDynamicFilters.put(filterKey, mergedSet);
                    }
                } else {
                    throw new PrestoException(GENERIC_INTERNAL_ERROR, "Unsupported filter data type: " + filterDataType);
                }
                log.debug("Merged successfully dynamic filter id: " + filterId + "-" + queryId + " type: " + filterDataType + ", column: " + column + ", item count: " + mergedFilter.getSize());
                cachedDynamicFiltersForQuery.put(filterId, mergedFilter);
            } catch (IOException | PrestoException e) {
                log.warn("Could not merge dynamic filter: " + e.getLocalizedMessage());
            } finally {
                // for each dynamic filter we only try to merge it once
                columnToDynamicFilterEntry.getValue().setMerged();
            }
        }
    }
}
Also used : ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) StateSet(io.prestosql.spi.statestore.StateSet) CopyOnWriteArraySet(java.util.concurrent.CopyOnWriteArraySet) HashSet(java.util.HashSet) Symbol(io.prestosql.spi.plan.Symbol) StateMap(io.prestosql.spi.statestore.StateMap) PrestoException(io.prestosql.spi.PrestoException) BloomFilter(io.prestosql.spi.util.BloomFilter) Predicate(java.util.function.Predicate) SystemSessionProperties.getDynamicFilteringDataType(io.prestosql.SystemSessionProperties.getDynamicFilteringDataType) DynamicFilterUtils.getDynamicFilterDataType(io.prestosql.utils.DynamicFilterUtils.getDynamicFilterDataType) DataType(io.prestosql.spi.dynamicfilter.DynamicFilter.DataType) StateSet(io.prestosql.spi.statestore.StateSet) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) StateStore(io.prestosql.spi.statestore.StateStore) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) SystemSessionProperties.getDynamicFilteringDataType(io.prestosql.SystemSessionProperties.getDynamicFilteringDataType) DynamicFilterUtils.getDynamicFilterDataType(io.prestosql.utils.DynamicFilterUtils.getDynamicFilterDataType) Type(io.prestosql.spi.dynamicfilter.DynamicFilter.Type) DataType(io.prestosql.spi.dynamicfilter.DynamicFilter.DataType) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) StateMap(io.prestosql.spi.statestore.StateMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 4 with BloomFilterDynamicFilter

use of io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter in project hetu-core by openlookeng.

the class TestHivePageSource method testFilterRows.

@Test(dataProvider = "data")
public void testFilterRows(int columnOffset1, int columnOffset2, int expectedPositionCount, String message) {
    final Type[] types = new Type[] { BigintType.BIGINT, BigintType.BIGINT };
    final int numValues = 1024;
    BlockBuilder builder = new LongArrayBlockBuilder(null, numValues);
    for (int i = 0; i < numValues; i++) {
        builder.writeLong(i);
    }
    Block dayBlock = builder.build();
    builder = new LongArrayBlockBuilder(null, numValues);
    for (int i = 0; i < numValues; i++) {
        builder.writeLong(10000 + i);
    }
    Block appBlock = builder.build();
    Page page = new Page(dayBlock, appBlock);
    Map<ColumnHandle, DynamicFilter> dynamicFilter = new HashMap<>();
    ColumnHandle dayColumn = new HiveColumnHandle("pt_d", HIVE_INT, parseTypeSignature(INTEGER), 0, REGULAR, Optional.empty());
    ColumnHandle appColumn = new HiveColumnHandle("app_d", HIVE_INT, parseTypeSignature(INTEGER), 1, REGULAR, Optional.empty());
    BloomFilter dayFilter = new BloomFilter(1024 * 1024, 0.01);
    BloomFilter appFilter = new BloomFilter(1024 * 1024, 0.01);
    for (int i = 0; i < 10; i++) {
        dayFilter.add(columnOffset1 + i);
        appFilter.add(columnOffset2 + i);
    }
    dynamicFilter.put(dayColumn, new BloomFilterDynamicFilter("1", dayColumn, dayFilter, DynamicFilter.Type.GLOBAL));
    dynamicFilter.put(appColumn, new BloomFilterDynamicFilter("2", appColumn, appFilter, DynamicFilter.Type.GLOBAL));
    List<Map<ColumnHandle, DynamicFilter>> dynamicFilters = new ArrayList<>();
    dynamicFilters.add(dynamicFilter);
    List<Map<Integer, ColumnHandle>> eligibleColumns = ImmutableList.of(ImmutableMap.of(0, dayColumn, 1, appColumn));
    Page filteredPage = filter(dynamicFilters, page, eligibleColumns, types);
    assertEquals(filteredPage.getPositionCount(), expectedPositionCount, message);
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Page(io.prestosql.spi.Page) BloomFilter(io.prestosql.spi.util.BloomFilter) BigintType(io.prestosql.spi.type.BigintType) Type(io.prestosql.spi.type.Type) Block(io.prestosql.spi.block.Block) LongArrayBlockBuilder(io.prestosql.spi.block.LongArrayBlockBuilder) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) BlockBuilder(io.prestosql.spi.block.BlockBuilder) LongArrayBlockBuilder(io.prestosql.spi.block.LongArrayBlockBuilder) Test(org.testng.annotations.Test)

Example 5 with BloomFilterDynamicFilter

use of io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter in project hetu-core by openlookeng.

the class LocalDynamicFiltersCollector method getDynamicFilters.

/**
 * This function returns the bloom filters fetched from the state store. To prevent excessive reads from state store,
 * it caches fetched bloom filters for re-use
 *
 * @param tableScan TableScanNode that has DynamicFilter applied
 * @return ColumnHandle to DynamicFilter mapping that contains any DynamicFilter that are ready for use
 */
List<Map<ColumnHandle, DynamicFilter>> getDynamicFilters(TableScanNode tableScan) {
    Map<Symbol, ColumnHandle> assignments = tableScan.getAssignments();
    // Skips symbols irrelevant to this table scan node.
    Set<String> columnNames = new HashSet<>();
    List<Map<ColumnHandle, DynamicFilter>> resultList = new ArrayList<>();
    for (int i = 0; i < context.getDisjunctSize(); i++) {
        Map<ColumnHandle, DynamicFilter> result = new HashMap<ColumnHandle, DynamicFilter>();
        for (Map.Entry<Symbol, ColumnHandle> entry : assignments.entrySet()) {
            final Symbol columnSymbol = entry.getKey();
            final ColumnHandle columnHandle = entry.getValue();
            try {
                columnNames.add(columnHandle.getColumnName());
            } catch (NotImplementedException e) {
            // ignore this exception, maybe some implementation class not implement the default method.
            }
            final List<String> filterIds = context.getId(columnSymbol, i);
            if (filterIds == null || filterIds.isEmpty()) {
                continue;
            }
            for (String filterId : filterIds) {
                // Try to get dynamic filter from local cache first
                String cacheKey = createCacheKey(filterId, queryId);
                DynamicFilter cachedDynamicFilter = cachedDynamicFilters.get(filterId);
                if (cachedDynamicFilter == null) {
                    cachedDynamicFilter = dynamicFilterCacheManager.getDynamicFilter(cacheKey);
                }
                if (cachedDynamicFilter != null) {
                    // Combine multiple dynamic filters for same column handle
                    DynamicFilter dynamicFilter = result.get(columnHandle);
                    // Same dynamic filter might be referred in multiple table scans for different columns due multi table joins.
                    // So clone before setting the columnHandle to avoid race in setting the columnHandle.
                    cachedDynamicFilter = cachedDynamicFilter.clone();
                    cachedDynamicFilter.setColumnHandle(columnHandle);
                    if (dynamicFilter == null) {
                        dynamicFilter = cachedDynamicFilter;
                    } else {
                        dynamicFilter = DynamicFilterFactory.combine(columnHandle, dynamicFilter, cachedDynamicFilter);
                    }
                    dynamicFilter.setColumnHandle(columnHandle);
                    result.put(columnHandle, dynamicFilter);
                    continue;
                }
                // Local dynamic filters
                if (predicates.containsKey(filterId)) {
                    Optional<RowExpression> filter = context.getFilter(filterId, i);
                    Optional<Predicate<List>> filterPredicate = DynamicFilters.createDynamicFilterPredicate(filter);
                    DynamicFilter dynamicFilter = DynamicFilterFactory.create(filterId, columnHandle, predicates.get(filterId), LOCAL, filterPredicate, filter);
                    cachedDynamicFilters.put(filterId, dynamicFilter);
                    result.put(columnHandle, dynamicFilter);
                }
            }
        }
        if (!result.isEmpty()) {
            resultList.add(result);
        }
    }
    if (isCrossRegionDynamicFilterEnabled(session)) {
        if (!metadataOptional.isPresent()) {
            return resultList;
        }
        // check the tableScan is a dc connector table,if a dc table, should consider push down the cross region bloom filter to next cluster
        if (!DataCenterUtility.isDCCatalog(metadataOptional.get(), tableScan.getTable().getCatalogName().getCatalogName())) {
            return resultList;
        }
        // stateMap, key is dc-connector-table column name, value is bloomFilter bytes
        Map<String, byte[]> newBloomFilterFromStateStoreCache = dynamicFilterCacheManager.getBloomFitler(session.getQueryId().getId() + CROSS_LAYER_DYNAMIC_FILTER);
        if (newBloomFilterFromStateStoreCache == null) {
            return resultList;
        }
        // check tableScan contains the stateMap.key, if contains, should push the filter to next cluster
        for (Map.Entry<String, byte[]> entry : newBloomFilterFromStateStoreCache.entrySet()) {
            if (!columnNames.contains(entry.getKey())) {
                continue;
            }
            ColumnHandle columnHandle = new ColumnHandle() {

                @Override
                public String getColumnName() {
                    return entry.getKey();
                }
            };
            BloomFilterDynamicFilter newBloomDynamicFilter = new BloomFilterDynamicFilter("", columnHandle, entry.getValue(), GLOBAL);
            for (Map<ColumnHandle, DynamicFilter> result : resultList) {
                if (result.keySet().contains(entry.getKey())) {
                    DynamicFilter existsFilter = result.get(entry.getKey());
                    if (existsFilter instanceof BloomFilterDynamicFilter) {
                        BloomFilter existsBloomFilter = ((BloomFilterDynamicFilter) existsFilter).getBloomFilterDeserialized();
                        existsBloomFilter.merge(newBloomDynamicFilter.getBloomFilterDeserialized());
                        DynamicFilter newDynamicFilter = new BloomFilterDynamicFilter(existsFilter.getFilterId(), columnHandle, existsBloomFilter, GLOBAL);
                        result.put(columnHandle, newDynamicFilter);
                    }
                } else {
                    result.put(columnHandle, newBloomDynamicFilter);
                }
            }
        }
    }
    if (resultList.size() != context.getDisjunctSize()) {
        return ImmutableList.of();
    }
    return resultList;
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Symbol(io.prestosql.spi.plan.Symbol) NotImplementedException(sun.reflect.generics.reflectiveObjects.NotImplementedException) ArrayList(java.util.ArrayList) RowExpression(io.prestosql.spi.relation.RowExpression) BloomFilter(io.prestosql.spi.util.BloomFilter) Predicate(java.util.function.Predicate) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashSet(java.util.HashSet)

Aggregations

BloomFilterDynamicFilter (io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter)9 DynamicFilter (io.prestosql.spi.dynamicfilter.DynamicFilter)7 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)6 Map (java.util.Map)5 BloomFilter (io.prestosql.spi.util.BloomFilter)4 HashMap (java.util.HashMap)4 Test (org.testng.annotations.Test)4 ImmutableMap (com.google.common.collect.ImmutableMap)3 Page (io.prestosql.spi.Page)3 StateSet (io.prestosql.spi.statestore.StateSet)3 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 SequencePageBuilder.createSequencePage (io.prestosql.SequencePageBuilder.createSequencePage)2 DynamicFilterSourceOperatorFactory (io.prestosql.operator.DynamicFilterSourceOperator.DynamicFilterSourceOperatorFactory)2 PrestoException (io.prestosql.spi.PrestoException)2 Block (io.prestosql.spi.block.Block)2 HashSetDynamicFilter (io.prestosql.spi.dynamicfilter.HashSetDynamicFilter)2 Symbol (io.prestosql.spi.plan.Symbol)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 Predicate (java.util.function.Predicate)2