use of io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter in project hetu-core by openlookeng.
the class TestHiveUtil method testIsPartitionFiltered.
@Test
public void testIsPartitionFiltered() {
TypeManager typeManager = new TestingTypeManager();
assertFalse(isPartitionFiltered(null, null, typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
Set<DynamicFilter> dynamicFilters = new HashSet<>();
List<HivePartitionKey> partitions = new ArrayList<>();
assertFalse(isPartitionFiltered(partitions, null, typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
assertFalse(isPartitionFiltered(null, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if either partitions or dynamicFilters is null");
assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if partitions and dynamicFilters are empty");
partitions.add(new HivePartitionKey("pt_d", "0"));
partitions.add(new HivePartitionKey("app_id", "10000"));
assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if dynamicFilters is empty");
ColumnHandle dayColumn = new HiveColumnHandle("pt_d", HIVE_LONG, parseTypeSignature(BIGINT), 0, PARTITION_KEY, Optional.empty());
BloomFilter dayFilter = new BloomFilter(1024 * 1024, 0.01);
dynamicFilters.add(new BloomFilterDynamicFilter("1", dayColumn, dayFilter, DynamicFilter.Type.GLOBAL));
assertTrue(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should filter partition if any dynamicFilter has 0 element count");
dayFilter.add(1L);
assertTrue(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should filter partition if partition value not in dynamicFilter");
dayFilter.add(0L);
assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters), typeManager), "Should not filter partition if partition value is in dynamicFilter");
Set<DynamicFilter> dynamicFilters1 = new HashSet<>();
BloomFilter dayFilter1 = new BloomFilter(1024 * 1024, 0.01);
dynamicFilters1.add(new BloomFilterDynamicFilter("1", dayColumn, dayFilter1, DynamicFilter.Type.GLOBAL));
dayFilter1.add(0L);
assertFalse(isPartitionFiltered(partitions, ImmutableList.of(dynamicFilters1), typeManager), "Should not filter partition if partition value is in dynamicFilter");
}
use of io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter in project hetu-core by openlookeng.
the class HivePageSource method filterRows.
private static boolean[] filterRows(List<Map<ColumnHandle, DynamicFilter>> dynamicFilters, Page page, List<Map<Integer, ColumnHandle>> eligibleColumns, Type[] types) {
boolean[] result = new boolean[page.getPositionCount()];
Arrays.fill(result, Boolean.FALSE);
// loop to handle union of filters if any
for (int j = 0; j < dynamicFilters.size(); j++) {
boolean[] filterResult = new boolean[page.getPositionCount()];
Arrays.fill(filterResult, Boolean.TRUE);
for (Map.Entry<Integer, ColumnHandle> column : eligibleColumns.get(j).entrySet()) {
final int columnIndex = column.getKey();
final ColumnHandle columnHandle = column.getValue();
final DynamicFilter dynamicFilter = dynamicFilters.get(j).get(columnHandle);
final Block block = page.getBlock(columnIndex).getLoadedBlock();
if (dynamicFilter instanceof BloomFilterDynamicFilter) {
block.filter(((BloomFilterDynamicFilter) dynamicFilters.get(j).get(columnHandle)).getBloomFilterDeserialized(), filterResult);
} else {
for (int i = 0; i < block.getPositionCount(); i++) {
filterResult[i] = filterResult[i] && dynamicFilter.contains(TypeUtils.readNativeValue(types[columnIndex], block, i));
}
}
}
// apply union of last filter
for (Map.Entry<Integer, ColumnHandle> column : eligibleColumns.get(j).entrySet()) {
final int columnIndex = column.getKey();
final Block block = page.getBlock(columnIndex).getLoadedBlock();
for (int i = 0; i < block.getPositionCount(); i++) {
result[i] = result[i] || filterResult[i];
}
}
}
return result;
}
use of io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter in project hetu-core by openlookeng.
the class DynamicFilterService method mergeDynamicFilters.
/**
* Global Dynamic Filter merging, periodically looks for dynamic filters that can be merged and merges them
*/
private void mergeDynamicFilters() {
final StateStore stateStore = stateStoreProvider.getStateStore();
for (Map.Entry<String, Map<String, DynamicFilterRegistryInfo>> queryToDynamicFiltersEntry : dynamicFilters.entrySet()) {
final String queryId = queryToDynamicFiltersEntry.getKey();
if (!cachedDynamicFilters.containsKey(queryId)) {
cachedDynamicFilters.put(queryId, new ConcurrentHashMap<>());
}
Map<String, DynamicFilter> cachedDynamicFiltersForQuery = cachedDynamicFilters.get(queryId);
StateMap mergedDynamicFilters = (StateMap) stateStore.getOrCreateStateCollection(DynamicFilterUtils.MERGED_DYNAMIC_FILTERS, MAP);
for (Map.Entry<String, DynamicFilterRegistryInfo> columnToDynamicFilterEntry : queryToDynamicFiltersEntry.getValue().entrySet()) {
if (columnToDynamicFilterEntry.getValue().isMerged()) {
continue;
}
final String filterId = columnToDynamicFilterEntry.getKey();
final Type filterType = columnToDynamicFilterEntry.getValue().getType();
final DataType filterDataType = columnToDynamicFilterEntry.getValue().getDataType();
final Optional<Predicate<List>> dfFilter = columnToDynamicFilterEntry.getValue().getFilter();
final Symbol column = columnToDynamicFilterEntry.getValue().getSymbol();
final String filterKey = createKey(DynamicFilterUtils.FILTERPREFIX, filterId, queryId);
if (!hasMergeCondition(filterId, queryId)) {
continue;
}
Collection<Object> results = ((StateSet) stateStore.getStateCollection(createKey(DynamicFilterUtils.PARTIALPREFIX, filterId, queryId))).getAll();
try {
DynamicFilter mergedFilter;
if (filterDataType == BLOOM_FILTER) {
BloomFilter mergedBloomFilter = mergeBloomFilters(results);
if (mergedBloomFilter.expectedFpp() > DynamicFilterUtils.BLOOM_FILTER_EXPECTED_FPP) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "FPP too high: " + mergedBloomFilter.approximateElementCount());
}
mergedFilter = new BloomFilterDynamicFilter(filterKey, null, mergedBloomFilter, filterType);
if (filterType == GLOBAL) {
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
mergedBloomFilter.writeTo(out);
byte[] filter = out.toByteArray();
mergedDynamicFilters.put(filterKey, filter);
}
}
} else if (filterDataType == HASHSET) {
Set mergedSet = mergeHashSets(results);
mergedFilter = DynamicFilterFactory.create(filterKey, null, mergedSet, filterType, dfFilter, Optional.empty());
if (filterType == GLOBAL) {
mergedDynamicFilters.put(filterKey, mergedSet);
}
} else {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Unsupported filter data type: " + filterDataType);
}
log.debug("Merged successfully dynamic filter id: " + filterId + "-" + queryId + " type: " + filterDataType + ", column: " + column + ", item count: " + mergedFilter.getSize());
cachedDynamicFiltersForQuery.put(filterId, mergedFilter);
} catch (IOException | PrestoException e) {
log.warn("Could not merge dynamic filter: " + e.getLocalizedMessage());
} finally {
// for each dynamic filter we only try to merge it once
columnToDynamicFilterEntry.getValue().setMerged();
}
}
}
}
use of io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter in project hetu-core by openlookeng.
the class TestHivePageSource method testFilterRows.
@Test(dataProvider = "data")
public void testFilterRows(int columnOffset1, int columnOffset2, int expectedPositionCount, String message) {
final Type[] types = new Type[] { BigintType.BIGINT, BigintType.BIGINT };
final int numValues = 1024;
BlockBuilder builder = new LongArrayBlockBuilder(null, numValues);
for (int i = 0; i < numValues; i++) {
builder.writeLong(i);
}
Block dayBlock = builder.build();
builder = new LongArrayBlockBuilder(null, numValues);
for (int i = 0; i < numValues; i++) {
builder.writeLong(10000 + i);
}
Block appBlock = builder.build();
Page page = new Page(dayBlock, appBlock);
Map<ColumnHandle, DynamicFilter> dynamicFilter = new HashMap<>();
ColumnHandle dayColumn = new HiveColumnHandle("pt_d", HIVE_INT, parseTypeSignature(INTEGER), 0, REGULAR, Optional.empty());
ColumnHandle appColumn = new HiveColumnHandle("app_d", HIVE_INT, parseTypeSignature(INTEGER), 1, REGULAR, Optional.empty());
BloomFilter dayFilter = new BloomFilter(1024 * 1024, 0.01);
BloomFilter appFilter = new BloomFilter(1024 * 1024, 0.01);
for (int i = 0; i < 10; i++) {
dayFilter.add(columnOffset1 + i);
appFilter.add(columnOffset2 + i);
}
dynamicFilter.put(dayColumn, new BloomFilterDynamicFilter("1", dayColumn, dayFilter, DynamicFilter.Type.GLOBAL));
dynamicFilter.put(appColumn, new BloomFilterDynamicFilter("2", appColumn, appFilter, DynamicFilter.Type.GLOBAL));
List<Map<ColumnHandle, DynamicFilter>> dynamicFilters = new ArrayList<>();
dynamicFilters.add(dynamicFilter);
List<Map<Integer, ColumnHandle>> eligibleColumns = ImmutableList.of(ImmutableMap.of(0, dayColumn, 1, appColumn));
Page filteredPage = filter(dynamicFilters, page, eligibleColumns, types);
assertEquals(filteredPage.getPositionCount(), expectedPositionCount, message);
}
use of io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter in project hetu-core by openlookeng.
the class LocalDynamicFiltersCollector method getDynamicFilters.
/**
* This function returns the bloom filters fetched from the state store. To prevent excessive reads from state store,
* it caches fetched bloom filters for re-use
*
* @param tableScan TableScanNode that has DynamicFilter applied
* @return ColumnHandle to DynamicFilter mapping that contains any DynamicFilter that are ready for use
*/
List<Map<ColumnHandle, DynamicFilter>> getDynamicFilters(TableScanNode tableScan) {
Map<Symbol, ColumnHandle> assignments = tableScan.getAssignments();
// Skips symbols irrelevant to this table scan node.
Set<String> columnNames = new HashSet<>();
List<Map<ColumnHandle, DynamicFilter>> resultList = new ArrayList<>();
for (int i = 0; i < context.getDisjunctSize(); i++) {
Map<ColumnHandle, DynamicFilter> result = new HashMap<ColumnHandle, DynamicFilter>();
for (Map.Entry<Symbol, ColumnHandle> entry : assignments.entrySet()) {
final Symbol columnSymbol = entry.getKey();
final ColumnHandle columnHandle = entry.getValue();
try {
columnNames.add(columnHandle.getColumnName());
} catch (NotImplementedException e) {
// ignore this exception, maybe some implementation class not implement the default method.
}
final List<String> filterIds = context.getId(columnSymbol, i);
if (filterIds == null || filterIds.isEmpty()) {
continue;
}
for (String filterId : filterIds) {
// Try to get dynamic filter from local cache first
String cacheKey = createCacheKey(filterId, queryId);
DynamicFilter cachedDynamicFilter = cachedDynamicFilters.get(filterId);
if (cachedDynamicFilter == null) {
cachedDynamicFilter = dynamicFilterCacheManager.getDynamicFilter(cacheKey);
}
if (cachedDynamicFilter != null) {
// Combine multiple dynamic filters for same column handle
DynamicFilter dynamicFilter = result.get(columnHandle);
// Same dynamic filter might be referred in multiple table scans for different columns due multi table joins.
// So clone before setting the columnHandle to avoid race in setting the columnHandle.
cachedDynamicFilter = cachedDynamicFilter.clone();
cachedDynamicFilter.setColumnHandle(columnHandle);
if (dynamicFilter == null) {
dynamicFilter = cachedDynamicFilter;
} else {
dynamicFilter = DynamicFilterFactory.combine(columnHandle, dynamicFilter, cachedDynamicFilter);
}
dynamicFilter.setColumnHandle(columnHandle);
result.put(columnHandle, dynamicFilter);
continue;
}
// Local dynamic filters
if (predicates.containsKey(filterId)) {
Optional<RowExpression> filter = context.getFilter(filterId, i);
Optional<Predicate<List>> filterPredicate = DynamicFilters.createDynamicFilterPredicate(filter);
DynamicFilter dynamicFilter = DynamicFilterFactory.create(filterId, columnHandle, predicates.get(filterId), LOCAL, filterPredicate, filter);
cachedDynamicFilters.put(filterId, dynamicFilter);
result.put(columnHandle, dynamicFilter);
}
}
}
if (!result.isEmpty()) {
resultList.add(result);
}
}
if (isCrossRegionDynamicFilterEnabled(session)) {
if (!metadataOptional.isPresent()) {
return resultList;
}
// check the tableScan is a dc connector table,if a dc table, should consider push down the cross region bloom filter to next cluster
if (!DataCenterUtility.isDCCatalog(metadataOptional.get(), tableScan.getTable().getCatalogName().getCatalogName())) {
return resultList;
}
// stateMap, key is dc-connector-table column name, value is bloomFilter bytes
Map<String, byte[]> newBloomFilterFromStateStoreCache = dynamicFilterCacheManager.getBloomFitler(session.getQueryId().getId() + CROSS_LAYER_DYNAMIC_FILTER);
if (newBloomFilterFromStateStoreCache == null) {
return resultList;
}
// check tableScan contains the stateMap.key, if contains, should push the filter to next cluster
for (Map.Entry<String, byte[]> entry : newBloomFilterFromStateStoreCache.entrySet()) {
if (!columnNames.contains(entry.getKey())) {
continue;
}
ColumnHandle columnHandle = new ColumnHandle() {
@Override
public String getColumnName() {
return entry.getKey();
}
};
BloomFilterDynamicFilter newBloomDynamicFilter = new BloomFilterDynamicFilter("", columnHandle, entry.getValue(), GLOBAL);
for (Map<ColumnHandle, DynamicFilter> result : resultList) {
if (result.keySet().contains(entry.getKey())) {
DynamicFilter existsFilter = result.get(entry.getKey());
if (existsFilter instanceof BloomFilterDynamicFilter) {
BloomFilter existsBloomFilter = ((BloomFilterDynamicFilter) existsFilter).getBloomFilterDeserialized();
existsBloomFilter.merge(newBloomDynamicFilter.getBloomFilterDeserialized());
DynamicFilter newDynamicFilter = new BloomFilterDynamicFilter(existsFilter.getFilterId(), columnHandle, existsBloomFilter, GLOBAL);
result.put(columnHandle, newDynamicFilter);
}
} else {
result.put(columnHandle, newBloomDynamicFilter);
}
}
}
}
if (resultList.size() != context.getDisjunctSize()) {
return ImmutableList.of();
}
return resultList;
}
Aggregations