use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.
the class BloomDimFilterSqlTest method testBloomFilters.
@Test
public void testBloomFilters() throws Exception {
BloomKFilter filter = new BloomKFilter(1500);
filter.addString("def");
BloomKFilter filter2 = new BloomKFilter(1500);
filter.addString("abc");
byte[] bytes = BloomFilterSerializersModule.bloomKFilterToBytes(filter);
byte[] bytes2 = BloomFilterSerializersModule.bloomKFilterToBytes(filter2);
String base64 = StringUtils.encodeBase64String(bytes);
String base642 = StringUtils.encodeBase64String(bytes2);
testQuery(StringUtils.format("SELECT COUNT(*) FROM druid.foo WHERE bloom_filter_test(dim1, '%s') OR bloom_filter_test(dim2, '%s')", base64, base642), ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).filters(new OrDimFilter(new BloomDimFilter("dim1", BloomKFilterHolder.fromBloomKFilter(filter), null), new BloomDimFilter("dim2", BloomKFilterHolder.fromBloomKFilter(filter2), null))).aggregators(aggregators(new CountAggregatorFactory("a0"))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 2L }));
}
use of org.apache.druid.query.filter.BloomKFilter in project hive by apache.
the class DruidStorageHandlerUtils method toDruidFilter.
@Nullable
private static DimFilter toDruidFilter(ExprNodeDesc filterExpr, Configuration configuration, List<VirtualColumn> virtualColumns, boolean resolveDynamicValues) {
if (filterExpr == null) {
return null;
}
Class<? extends GenericUDF> genericUDFClass = getGenericUDFClassFromExprDesc(filterExpr);
if (FunctionRegistry.isOpAnd(filterExpr)) {
Iterator<ExprNodeDesc> iterator = filterExpr.getChildren().iterator();
List<DimFilter> delegates = Lists.newArrayList();
while (iterator.hasNext()) {
DimFilter filter = toDruidFilter(iterator.next(), configuration, virtualColumns, resolveDynamicValues);
if (filter != null) {
delegates.add(filter);
}
}
if (!delegates.isEmpty()) {
return new AndDimFilter(delegates);
}
}
if (FunctionRegistry.isOpOr(filterExpr)) {
Iterator<ExprNodeDesc> iterator = filterExpr.getChildren().iterator();
List<DimFilter> delegates = Lists.newArrayList();
while (iterator.hasNext()) {
DimFilter filter = toDruidFilter(iterator.next(), configuration, virtualColumns, resolveDynamicValues);
if (filter != null) {
delegates.add(filter);
}
}
if (!delegates.isEmpty()) {
return new OrDimFilter(delegates);
}
} else if (GenericUDFBetween.class == genericUDFClass) {
List<ExprNodeDesc> child = filterExpr.getChildren();
String col = extractColName(child.get(1), virtualColumns);
if (col != null) {
try {
StringComparator comparator = stringTypeInfos.contains(child.get(1).getTypeInfo()) ? StringComparators.LEXICOGRAPHIC : StringComparators.NUMERIC;
String lower = evaluate(child.get(2), configuration, resolveDynamicValues);
String upper = evaluate(child.get(3), configuration, resolveDynamicValues);
return new BoundDimFilter(col, lower, upper, false, false, null, null, comparator);
} catch (HiveException e) {
throw new RuntimeException(e);
}
}
} else if (GenericUDFInBloomFilter.class == genericUDFClass) {
List<ExprNodeDesc> child = filterExpr.getChildren();
String col = extractColName(child.get(0), virtualColumns);
if (col != null) {
try {
BloomKFilter bloomFilter = evaluateBloomFilter(child.get(1), configuration, resolveDynamicValues);
return new BloomDimFilter(col, BloomKFilterHolder.fromBloomKFilter(bloomFilter), null);
} catch (HiveException | IOException e) {
throw new RuntimeException(e);
}
}
}
return null;
}
use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.
the class BloomFilterSqlAggregatorTest method testBloomFilterAggDoubleVirtualColumn.
@Test
public void testBloomFilterAggDoubleVirtualColumn() throws Exception {
cannotVectorize();
BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
Object raw = row.getRaw("d1");
if (raw == null) {
if (NullHandling.replaceWithDefault()) {
expected1.addDouble(NullHandling.defaultDoubleValue());
} else {
expected1.addBytes(null, 0, 0);
}
} else {
expected1.addDouble(2 * ((Number) raw).doubleValue());
}
}
testQuery("SELECT\n" + "BLOOM_FILTER(d1 * 2, 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "(\"d1\" * 2)", ColumnType.DOUBLE, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("v0", "a0:v0"), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1) }));
}
use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.
the class BloomFilterSqlAggregatorTest method testBloomFilterTwoAggs.
@Test
public void testBloomFilterTwoAggs() throws Exception {
cannotVectorize();
BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
BloomKFilter expected2 = new BloomKFilter(TEST_NUM_ENTRIES);
for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
String raw = NullHandling.emptyToNullIfNeeded((String) row.getRaw("dim1"));
if (raw == null) {
expected1.addBytes(null, 0, 0);
} else {
expected1.addString(raw);
}
List<String> lst = row.getDimension("dim2");
if (lst.size() == 0) {
expected2.addBytes(null, 0, 0);
}
for (String s : lst) {
String val = NullHandling.emptyToNullIfNeeded(s);
if (val == null) {
expected2.addBytes(null, 0, 0);
} else {
expected2.addString(val);
}
}
}
testQuery("SELECT\n" + "BLOOM_FILTER(dim1, 1000),\n" + "BLOOM_FILTER(dim2, 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("dim1", "a0:dim1"), TEST_NUM_ENTRIES), new BloomFilterAggregatorFactory("a1:agg", new DefaultDimensionSpec("dim2", "a1:dim2"), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1), CalciteTests.getJsonMapper().writeValueAsString(expected2) }));
}
use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.
the class BloomFilterSqlAggregatorTest method testEmptyTimeseriesResults.
@Test
public void testEmptyTimeseriesResults() throws Exception {
// makes empty bloom filters
cannotVectorize();
BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
BloomKFilter expected2 = new BloomKFilter(TEST_NUM_ENTRIES);
testQuery("SELECT\n" + "BLOOM_FILTER(dim1, 1000),\n" + "BLOOM_FILTER(l1, 1000)\n" + "FROM numfoo where dim2 = 0", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).filters(BaseCalciteQueryTest.bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("dim1", "a0:dim1"), TEST_NUM_ENTRIES), new BloomFilterAggregatorFactory("a1:agg", new DefaultDimensionSpec("l1", "a1:l1", ColumnType.LONG), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1), CalciteTests.getJsonMapper().writeValueAsString(expected2) }));
}
Aggregations