use of io.prestosql.spi.util.BloomFilter in project boostkit-bigdata by kunpengcompute.
the class TestHiveUtil method testIsPartitionFiltered.
@Test
public void testIsPartitionFiltered() {
TypeManager typeManager = new TestingTypeManager();
assertFalse(isPartitionFiltered(null, null, typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
Set<DynamicFilter> dynamicFilters = new HashSet<>();
List<HivePartitionKey> partitions = new ArrayList<>();
assertFalse(isPartitionFiltered(partitions, null, typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
assertFalse(isPartitionFiltered(null, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if partitions and dynamicFilters are empty");
partitions.add(new HivePartitionKey("pt_d", "0"));
partitions.add(new HivePartitionKey("app_id", "10000"));
assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if dynamicFilters is empty");
ColumnHandle dayColumn = new HiveColumnHandle("pt_d", HIVE_LONG, parseTypeSignature(BIGINT), 0, PARTITION_KEY, Optional.empty());
BloomFilter dayFilter = new BloomFilter(1024 * 1024, 0.01);
dynamicFilters.add(new BloomFilterDynamicFilter("1", dayColumn, dayFilter, DynamicFilter.Type.GLOBAL));
assertTrue(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should filter partition if any dynamicFilter has 0 element count");
dayFilter.add(1L);
assertTrue(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should filter partition if partition value not in dynamicFilter");
dayFilter.add(0L);
assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if partition value is in dynamicFilter");
Set<DynamicFilter> dynamicFilters1 = new HashSet<>();
BloomFilter dayFilter1 = new BloomFilter(1024 * 1024, 0.01);
dynamicFilters1.add(new BloomFilterDynamicFilter("1", dayColumn, dayFilter1, DynamicFilter.Type.GLOBAL));
dayFilter1.add(0L);
assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters1), typeManager), "Should not filter partition if partition value is in dynamicFilter");
}
use of io.prestosql.spi.util.BloomFilter in project boostkit-bigdata by kunpengcompute.
the class TestHiveDistributedJoinQueriesWithDynamicFiltering method createDynamicFilterSupplier.
private Supplier<List<Map<ColumnHandle, DynamicFilter>>> createDynamicFilterSupplier(List<Long> values, ColumnHandle columnHandle, String filterId) throws IOException {
BloomFilter filter = new BloomFilter(values.size(), 0.01);
for (Long value : values) {
filter.add(value);
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
filter.writeTo(out);
DynamicFilter dynamicFilter = DynamicFilterFactory.create(filterId, columnHandle, out.toByteArray(), DynamicFilter.Type.GLOBAL);
Map<ColumnHandle, DynamicFilter> dynamicFilterMap = ImmutableMap.of(columnHandle, dynamicFilter);
return () -> ImmutableList.of(dynamicFilterMap);
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class TestHivePageSource method testFilterRows.
@Test(dataProvider = "data")
public void testFilterRows(int columnOffset1, int columnOffset2, int expectedPositionCount, String message) {
final Type[] types = new Type[] { BigintType.BIGINT, BigintType.BIGINT };
final int numValues = 1024;
BlockBuilder builder = new LongArrayBlockBuilder(null, numValues);
for (int i = 0; i < numValues; i++) {
builder.writeLong(i);
}
Block dayBlock = builder.build();
builder = new LongArrayBlockBuilder(null, numValues);
for (int i = 0; i < numValues; i++) {
builder.writeLong(10000 + i);
}
Block appBlock = builder.build();
Page page = new Page(dayBlock, appBlock);
Map<ColumnHandle, DynamicFilter> dynamicFilter = new HashMap<>();
ColumnHandle dayColumn = new HiveColumnHandle("pt_d", HIVE_INT, parseTypeSignature(INTEGER), 0, REGULAR, Optional.empty());
ColumnHandle appColumn = new HiveColumnHandle("app_d", HIVE_INT, parseTypeSignature(INTEGER), 1, REGULAR, Optional.empty());
BloomFilter dayFilter = new BloomFilter(1024 * 1024, 0.01);
BloomFilter appFilter = new BloomFilter(1024 * 1024, 0.01);
for (int i = 0; i < 10; i++) {
dayFilter.add(columnOffset1 + i);
appFilter.add(columnOffset2 + i);
}
dynamicFilter.put(dayColumn, new BloomFilterDynamicFilter("1", dayColumn, dayFilter, DynamicFilter.Type.GLOBAL));
dynamicFilter.put(appColumn, new BloomFilterDynamicFilter("2", appColumn, appFilter, DynamicFilter.Type.GLOBAL));
List<Map<ColumnHandle, DynamicFilter>> dynamicFilters = new ArrayList<>();
dynamicFilters.add(dynamicFilter);
List<Map<Integer, ColumnHandle>> eligibleColumns = ImmutableList.of(ImmutableMap.of(0, dayColumn, 1, appColumn));
Page filteredPage = filter(dynamicFilters, page, eligibleColumns, types);
assertEquals(filteredPage.getPositionCount(), expectedPositionCount, message);
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class LocalDynamicFiltersCollector method getDynamicFilters.
/**
* This function returns the bloom filters fetched from the state store. To prevent excessive reads from state store,
* it caches fetched bloom filters for re-use
*
* @param tableScan TableScanNode that has DynamicFilter applied
* @return ColumnHandle to DynamicFilter mapping that contains any DynamicFilter that are ready for use
*/
List<Map<ColumnHandle, DynamicFilter>> getDynamicFilters(TableScanNode tableScan) {
Map<Symbol, ColumnHandle> assignments = tableScan.getAssignments();
// Skips symbols irrelevant to this table scan node.
Set<String> columnNames = new HashSet<>();
List<Map<ColumnHandle, DynamicFilter>> resultList = new ArrayList<>();
for (int i = 0; i < context.getDisjunctSize(); i++) {
Map<ColumnHandle, DynamicFilter> result = new HashMap<ColumnHandle, DynamicFilter>();
for (Map.Entry<Symbol, ColumnHandle> entry : assignments.entrySet()) {
final Symbol columnSymbol = entry.getKey();
final ColumnHandle columnHandle = entry.getValue();
try {
columnNames.add(columnHandle.getColumnName());
} catch (NotImplementedException e) {
// ignore this exception, maybe some implementation class not implement the default method.
}
final List<String> filterIds = context.getId(columnSymbol, i);
if (filterIds == null || filterIds.isEmpty()) {
continue;
}
for (String filterId : filterIds) {
// Try to get dynamic filter from local cache first
String cacheKey = createCacheKey(filterId, queryId);
DynamicFilter cachedDynamicFilter = cachedDynamicFilters.get(filterId);
if (cachedDynamicFilter == null) {
cachedDynamicFilter = dynamicFilterCacheManager.getDynamicFilter(cacheKey);
}
if (cachedDynamicFilter != null) {
// Combine multiple dynamic filters for same column handle
DynamicFilter dynamicFilter = result.get(columnHandle);
// Same dynamic filter might be referred in multiple table scans for different columns due multi table joins.
// So clone before setting the columnHandle to avoid race in setting the columnHandle.
cachedDynamicFilter = cachedDynamicFilter.clone();
cachedDynamicFilter.setColumnHandle(columnHandle);
if (dynamicFilter == null) {
dynamicFilter = cachedDynamicFilter;
} else {
dynamicFilter = DynamicFilterFactory.combine(columnHandle, dynamicFilter, cachedDynamicFilter);
}
dynamicFilter.setColumnHandle(columnHandle);
result.put(columnHandle, dynamicFilter);
continue;
}
// Local dynamic filters
if (predicates.containsKey(filterId)) {
Optional<RowExpression> filter = context.getFilter(filterId, i);
Optional<Predicate<List>> filterPredicate = DynamicFilters.createDynamicFilterPredicate(filter);
DynamicFilter dynamicFilter = DynamicFilterFactory.create(filterId, columnHandle, predicates.get(filterId), LOCAL, filterPredicate, filter);
cachedDynamicFilters.put(filterId, dynamicFilter);
result.put(columnHandle, dynamicFilter);
}
}
}
if (!result.isEmpty()) {
resultList.add(result);
}
}
if (isCrossRegionDynamicFilterEnabled(session)) {
if (!metadataOptional.isPresent()) {
return resultList;
}
// check the tableScan is a dc connector table,if a dc table, should consider push down the cross region bloom filter to next cluster
if (!DataCenterUtility.isDCCatalog(metadataOptional.get(), tableScan.getTable().getCatalogName().getCatalogName())) {
return resultList;
}
// stateMap, key is dc-connector-table column name, value is bloomFilter bytes
Map<String, byte[]> newBloomFilterFromStateStoreCache = dynamicFilterCacheManager.getBloomFitler(session.getQueryId().getId() + CROSS_LAYER_DYNAMIC_FILTER);
if (newBloomFilterFromStateStoreCache == null) {
return resultList;
}
// check tableScan contains the stateMap.key, if contains, should push the filter to next cluster
for (Map.Entry<String, byte[]> entry : newBloomFilterFromStateStoreCache.entrySet()) {
if (!columnNames.contains(entry.getKey())) {
continue;
}
ColumnHandle columnHandle = new ColumnHandle() {
@Override
public String getColumnName() {
return entry.getKey();
}
};
BloomFilterDynamicFilter newBloomDynamicFilter = new BloomFilterDynamicFilter("", columnHandle, entry.getValue(), GLOBAL);
for (Map<ColumnHandle, DynamicFilter> result : resultList) {
if (result.keySet().contains(entry.getKey())) {
DynamicFilter existsFilter = result.get(entry.getKey());
if (existsFilter instanceof BloomFilterDynamicFilter) {
BloomFilter existsBloomFilter = ((BloomFilterDynamicFilter) existsFilter).getBloomFilterDeserialized();
existsBloomFilter.merge(newBloomDynamicFilter.getBloomFilterDeserialized());
DynamicFilter newDynamicFilter = new BloomFilterDynamicFilter(existsFilter.getFilterId(), columnHandle, existsBloomFilter, GLOBAL);
result.put(columnHandle, newDynamicFilter);
}
} else {
result.put(columnHandle, newBloomDynamicFilter);
}
}
}
}
if (resultList.size() != context.getDisjunctSize()) {
return ImmutableList.of();
}
return resultList;
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class BloomIndex method readFromMmap.
private BloomFilter readFromMmap() throws IOException {
try (RandomAccessFile randomFile = new RandomAccessFile(getFile(), "r")) {
try (FileChannel channel = randomFile.getChannel()) {
MappedByteBuffer map = channel.map(FileChannel.MapMode.READ_ONLY, 0, 2 * 4 + mmapSizeInByte);
int numHashFunctions = map.getInt();
int numBits = map.getInt();
long[] bits = new long[numBits];
for (int i = 0; i < numBits; i++) {
bits[i] = map.getLong();
}
return new BloomFilter(bits, numHashFunctions);
}
}
}
Aggregations