Search in sources :

Example 16 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project boostkit-bigdata by kunpengcompute.

the class TestHiveUtil method testIsPartitionFiltered.

@Test
public void testIsPartitionFiltered() {
    TypeManager typeManager = new TestingTypeManager();
    assertFalse(isPartitionFiltered(null, null, typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
    Set<DynamicFilter> dynamicFilters = new HashSet<>();
    List<HivePartitionKey> partitions = new ArrayList<>();
    assertFalse(isPartitionFiltered(partitions, null, typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
    assertFalse(isPartitionFiltered(null, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
    assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if partitions and dynamicFilters are empty");
    partitions.add(new HivePartitionKey("pt_d", "0"));
    partitions.add(new HivePartitionKey("app_id", "10000"));
    assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if dynamicFilters is empty");
    ColumnHandle dayColumn = new HiveColumnHandle("pt_d", HIVE_LONG, parseTypeSignature(BIGINT), 0, PARTITION_KEY, Optional.empty());
    BloomFilter dayFilter = new BloomFilter(1024 * 1024, 0.01);
    dynamicFilters.add(new BloomFilterDynamicFilter("1", dayColumn, dayFilter, DynamicFilter.Type.GLOBAL));
    assertTrue(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should filter partition if any dynamicFilter has 0 element count");
    dayFilter.add(1L);
    assertTrue(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should filter partition if partition value not in dynamicFilter");
    dayFilter.add(0L);
    assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if partition value is in dynamicFilter");
    Set<DynamicFilter> dynamicFilters1 = new HashSet<>();
    BloomFilter dayFilter1 = new BloomFilter(1024 * 1024, 0.01);
    dynamicFilters1.add(new BloomFilterDynamicFilter("1", dayColumn, dayFilter1, DynamicFilter.Type.GLOBAL));
    dayFilter1.add(0L);
    assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters1), typeManager), "Should not filter partition if partition value is in dynamicFilter");
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) HashSetDynamicFilter(io.prestosql.spi.dynamicfilter.HashSetDynamicFilter) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) ArrayList(java.util.ArrayList) TestingTypeManager(io.prestosql.spi.type.TestingTypeManager) TypeManager(io.prestosql.spi.type.TypeManager) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) TestingTypeManager(io.prestosql.spi.type.TestingTypeManager) BloomFilter(io.prestosql.spi.util.BloomFilter) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Example 17 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project boostkit-bigdata by kunpengcompute.

the class TestHiveDistributedJoinQueriesWithDynamicFiltering method createDynamicFilterSupplier.

private Supplier<List<Map<ColumnHandle, DynamicFilter>>> createDynamicFilterSupplier(List<Long> values, ColumnHandle columnHandle, String filterId) throws IOException {
    BloomFilter filter = new BloomFilter(values.size(), 0.01);
    for (Long value : values) {
        filter.add(value);
    }
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    filter.writeTo(out);
    DynamicFilter dynamicFilter = DynamicFilterFactory.create(filterId, columnHandle, out.toByteArray(), DynamicFilter.Type.GLOBAL);
    Map<ColumnHandle, DynamicFilter> dynamicFilterMap = ImmutableMap.of(columnHandle, dynamicFilter);
    return () -> ImmutableList.of(dynamicFilterMap);
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) BloomFilter(io.prestosql.spi.util.BloomFilter)

Example 18 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class TestHivePageSource method testFilterRows.

@Test(dataProvider = "data")
public void testFilterRows(int columnOffset1, int columnOffset2, int expectedPositionCount, String message) {
    final Type[] types = new Type[] { BigintType.BIGINT, BigintType.BIGINT };
    final int numValues = 1024;
    BlockBuilder builder = new LongArrayBlockBuilder(null, numValues);
    for (int i = 0; i < numValues; i++) {
        builder.writeLong(i);
    }
    Block dayBlock = builder.build();
    builder = new LongArrayBlockBuilder(null, numValues);
    for (int i = 0; i < numValues; i++) {
        builder.writeLong(10000 + i);
    }
    Block appBlock = builder.build();
    Page page = new Page(dayBlock, appBlock);
    Map<ColumnHandle, DynamicFilter> dynamicFilter = new HashMap<>();
    ColumnHandle dayColumn = new HiveColumnHandle("pt_d", HIVE_INT, parseTypeSignature(INTEGER), 0, REGULAR, Optional.empty());
    ColumnHandle appColumn = new HiveColumnHandle("app_d", HIVE_INT, parseTypeSignature(INTEGER), 1, REGULAR, Optional.empty());
    BloomFilter dayFilter = new BloomFilter(1024 * 1024, 0.01);
    BloomFilter appFilter = new BloomFilter(1024 * 1024, 0.01);
    for (int i = 0; i < 10; i++) {
        dayFilter.add(columnOffset1 + i);
        appFilter.add(columnOffset2 + i);
    }
    dynamicFilter.put(dayColumn, new BloomFilterDynamicFilter("1", dayColumn, dayFilter, DynamicFilter.Type.GLOBAL));
    dynamicFilter.put(appColumn, new BloomFilterDynamicFilter("2", appColumn, appFilter, DynamicFilter.Type.GLOBAL));
    List<Map<ColumnHandle, DynamicFilter>> dynamicFilters = new ArrayList<>();
    dynamicFilters.add(dynamicFilter);
    List<Map<Integer, ColumnHandle>> eligibleColumns = ImmutableList.of(ImmutableMap.of(0, dayColumn, 1, appColumn));
    Page filteredPage = filter(dynamicFilters, page, eligibleColumns, types);
    assertEquals(filteredPage.getPositionCount(), expectedPositionCount, message);
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Page(io.prestosql.spi.Page) BloomFilter(io.prestosql.spi.util.BloomFilter) BigintType(io.prestosql.spi.type.BigintType) Type(io.prestosql.spi.type.Type) Block(io.prestosql.spi.block.Block) LongArrayBlockBuilder(io.prestosql.spi.block.LongArrayBlockBuilder) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) BlockBuilder(io.prestosql.spi.block.BlockBuilder) LongArrayBlockBuilder(io.prestosql.spi.block.LongArrayBlockBuilder) Test(org.testng.annotations.Test)

Example 19 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class LocalDynamicFiltersCollector method getDynamicFilters.

/**
 * This function returns the bloom filters fetched from the state store. To prevent excessive reads from state store,
 * it caches fetched bloom filters for re-use
 *
 * @param tableScan TableScanNode that has DynamicFilter applied
 * @return ColumnHandle to DynamicFilter mapping that contains any DynamicFilter that are ready for use
 */
List<Map<ColumnHandle, DynamicFilter>> getDynamicFilters(TableScanNode tableScan) {
    Map<Symbol, ColumnHandle> assignments = tableScan.getAssignments();
    // Skips symbols irrelevant to this table scan node.
    Set<String> columnNames = new HashSet<>();
    List<Map<ColumnHandle, DynamicFilter>> resultList = new ArrayList<>();
    for (int i = 0; i < context.getDisjunctSize(); i++) {
        Map<ColumnHandle, DynamicFilter> result = new HashMap<ColumnHandle, DynamicFilter>();
        for (Map.Entry<Symbol, ColumnHandle> entry : assignments.entrySet()) {
            final Symbol columnSymbol = entry.getKey();
            final ColumnHandle columnHandle = entry.getValue();
            try {
                columnNames.add(columnHandle.getColumnName());
            } catch (NotImplementedException e) {
            // ignore this exception, maybe some implementation class not implement the default method.
            }
            final List<String> filterIds = context.getId(columnSymbol, i);
            if (filterIds == null || filterIds.isEmpty()) {
                continue;
            }
            for (String filterId : filterIds) {
                // Try to get dynamic filter from local cache first
                String cacheKey = createCacheKey(filterId, queryId);
                DynamicFilter cachedDynamicFilter = cachedDynamicFilters.get(filterId);
                if (cachedDynamicFilter == null) {
                    cachedDynamicFilter = dynamicFilterCacheManager.getDynamicFilter(cacheKey);
                }
                if (cachedDynamicFilter != null) {
                    // Combine multiple dynamic filters for same column handle
                    DynamicFilter dynamicFilter = result.get(columnHandle);
                    // Same dynamic filter might be referred in multiple table scans for different columns due multi table joins.
                    // So clone before setting the columnHandle to avoid race in setting the columnHandle.
                    cachedDynamicFilter = cachedDynamicFilter.clone();
                    cachedDynamicFilter.setColumnHandle(columnHandle);
                    if (dynamicFilter == null) {
                        dynamicFilter = cachedDynamicFilter;
                    } else {
                        dynamicFilter = DynamicFilterFactory.combine(columnHandle, dynamicFilter, cachedDynamicFilter);
                    }
                    dynamicFilter.setColumnHandle(columnHandle);
                    result.put(columnHandle, dynamicFilter);
                    continue;
                }
                // Local dynamic filters
                if (predicates.containsKey(filterId)) {
                    Optional<RowExpression> filter = context.getFilter(filterId, i);
                    Optional<Predicate<List>> filterPredicate = DynamicFilters.createDynamicFilterPredicate(filter);
                    DynamicFilter dynamicFilter = DynamicFilterFactory.create(filterId, columnHandle, predicates.get(filterId), LOCAL, filterPredicate, filter);
                    cachedDynamicFilters.put(filterId, dynamicFilter);
                    result.put(columnHandle, dynamicFilter);
                }
            }
        }
        if (!result.isEmpty()) {
            resultList.add(result);
        }
    }
    if (isCrossRegionDynamicFilterEnabled(session)) {
        if (!metadataOptional.isPresent()) {
            return resultList;
        }
        // check the tableScan is a dc connector table,if a dc table, should consider push down the cross region bloom filter to next cluster
        if (!DataCenterUtility.isDCCatalog(metadataOptional.get(), tableScan.getTable().getCatalogName().getCatalogName())) {
            return resultList;
        }
        // stateMap, key is dc-connector-table column name, value is bloomFilter bytes
        Map<String, byte[]> newBloomFilterFromStateStoreCache = dynamicFilterCacheManager.getBloomFitler(session.getQueryId().getId() + CROSS_LAYER_DYNAMIC_FILTER);
        if (newBloomFilterFromStateStoreCache == null) {
            return resultList;
        }
        // check tableScan contains the stateMap.key, if contains, should push the filter to next cluster
        for (Map.Entry<String, byte[]> entry : newBloomFilterFromStateStoreCache.entrySet()) {
            if (!columnNames.contains(entry.getKey())) {
                continue;
            }
            ColumnHandle columnHandle = new ColumnHandle() {

                @Override
                public String getColumnName() {
                    return entry.getKey();
                }
            };
            BloomFilterDynamicFilter newBloomDynamicFilter = new BloomFilterDynamicFilter("", columnHandle, entry.getValue(), GLOBAL);
            for (Map<ColumnHandle, DynamicFilter> result : resultList) {
                if (result.keySet().contains(entry.getKey())) {
                    DynamicFilter existsFilter = result.get(entry.getKey());
                    if (existsFilter instanceof BloomFilterDynamicFilter) {
                        BloomFilter existsBloomFilter = ((BloomFilterDynamicFilter) existsFilter).getBloomFilterDeserialized();
                        existsBloomFilter.merge(newBloomDynamicFilter.getBloomFilterDeserialized());
                        DynamicFilter newDynamicFilter = new BloomFilterDynamicFilter(existsFilter.getFilterId(), columnHandle, existsBloomFilter, GLOBAL);
                        result.put(columnHandle, newDynamicFilter);
                    }
                } else {
                    result.put(columnHandle, newBloomDynamicFilter);
                }
            }
        }
    }
    if (resultList.size() != context.getDisjunctSize()) {
        return ImmutableList.of();
    }
    return resultList;
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Symbol(io.prestosql.spi.plan.Symbol) NotImplementedException(sun.reflect.generics.reflectiveObjects.NotImplementedException) ArrayList(java.util.ArrayList) RowExpression(io.prestosql.spi.relation.RowExpression) BloomFilter(io.prestosql.spi.util.BloomFilter) Predicate(java.util.function.Predicate) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashSet(java.util.HashSet)

Example 20 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class BloomIndex method readFromMmap.

private BloomFilter readFromMmap() throws IOException {
    try (RandomAccessFile randomFile = new RandomAccessFile(getFile(), "r")) {
        try (FileChannel channel = randomFile.getChannel()) {
            MappedByteBuffer map = channel.map(FileChannel.MapMode.READ_ONLY, 0, 2 * 4 + mmapSizeInByte);
            int numHashFunctions = map.getInt();
            int numBits = map.getInt();
            long[] bits = new long[numBits];
            for (int i = 0; i < numBits; i++) {
                bits[i] = map.getLong();
            }
            return new BloomFilter(bits, numHashFunctions);
        }
    }
}
Also used : RandomAccessFile(java.io.RandomAccessFile) MappedByteBuffer(java.nio.MappedByteBuffer) FileChannel(java.nio.channels.FileChannel) BloomFilter(io.prestosql.spi.util.BloomFilter)

Aggregations

BloomFilter (io.prestosql.spi.util.BloomFilter)26 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)9 DynamicFilter (io.prestosql.spi.dynamicfilter.DynamicFilter)9 ArrayList (java.util.ArrayList)8 HashMap (java.util.HashMap)7 HashSet (java.util.HashSet)7 Test (org.testng.annotations.Test)7 BloomFilterDynamicFilter (io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)6 Map (java.util.Map)6 Page (io.prestosql.spi.Page)5 IOException (java.io.IOException)4 ImmutableMap (com.google.common.collect.ImmutableMap)3 Block (io.prestosql.spi.block.Block)3 Symbol (io.prestosql.spi.plan.Symbol)3 StateSet (io.prestosql.spi.statestore.StateSet)3 List (java.util.List)3 ImmutableList (com.google.common.collect.ImmutableList)2 BlockBuilder (io.prestosql.spi.block.BlockBuilder)2 LongArrayBlockBuilder (io.prestosql.spi.block.LongArrayBlockBuilder)2