use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.
the class BloomFilterSqlAggregatorTest method testBloomFilterAggExtractionFn.
@Test
public void testBloomFilterAggExtractionFn() throws Exception {
cannotVectorize();
BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
String raw = NullHandling.emptyToNullIfNeeded((String) row.getRaw("dim1"));
// empty string extractionFn produces null
if (raw == null || "".equals(raw)) {
expected1.addBytes(null, 0, 0);
} else {
expected1.addString(raw.substring(0, 1));
}
}
testQuery("SELECT\n" + "BLOOM_FILTER(SUBSTRING(dim1, 1, 1), 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new ExtractionDimensionSpec("dim1", "a0:dim1", new SubstringDimExtractionFn(0, 1)), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1) }));
}
use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.
the class BloomDimFilterSqlTest method testBloomFilterBigNoParam.
@Ignore("this test is really slow and is intended to use for comparisons with testBloomFilterBigParameter")
@Test
public void testBloomFilterBigNoParam() throws Exception {
BloomKFilter filter = new BloomKFilter(5_000_000);
filter.addString("def");
byte[] bytes = BloomFilterSerializersModule.bloomKFilterToBytes(filter);
String base64 = StringUtils.encodeBase64String(bytes);
testQuery(StringUtils.format("SELECT COUNT(*) FROM druid.foo WHERE bloom_filter_test(dim1, '%s')", base64), ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).filters(new BloomDimFilter("dim1", BloomKFilterHolder.fromBloomKFilter(filter), null)).aggregators(aggregators(new CountAggregatorFactory("a0"))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 1L }));
}
use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.
the class BloomDimFilterSqlTest method testBloomFilterVirtualColumn.
@Test
public void testBloomFilterVirtualColumn() throws Exception {
BloomKFilter filter = new BloomKFilter(1500);
filter.addString("def-foo");
byte[] bytes = BloomFilterSerializersModule.bloomKFilterToBytes(filter);
String base64 = StringUtils.encodeBase64String(bytes);
testQuery(StringUtils.format("SELECT COUNT(*) FROM druid.foo WHERE bloom_filter_test(concat(dim1, '-foo'), '%s')", base64), ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).virtualColumns(expressionVirtualColumn("v0", "concat(\"dim1\",'-foo')", ColumnType.STRING)).filters(new BloomDimFilter("v0", BloomKFilterHolder.fromBloomKFilter(filter), null)).aggregators(aggregators(new CountAggregatorFactory("a0"))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 1L }));
}
use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.
the class BloomFilterGroupByQueryTest method testNestedQuery.
@Test
public void testNestedQuery() throws Exception {
if (!isV2) {
return;
}
String query = "{" + "\"queryType\": \"groupBy\"," + "\"dataSource\": {" + "\"type\": \"query\"," + "\"query\": {" + "\"queryType\":\"groupBy\"," + "\"dataSource\": \"test_datasource\"," + "\"intervals\": [ \"1970/2050\" ]," + "\"granularity\":\"ALL\"," + "\"dimensions\":[]," + "\"aggregations\": [{ \"type\":\"longSum\", \"name\":\"innerSum\", \"fieldName\":\"count\"}]" + "}" + "}," + "\"granularity\": \"ALL\"," + "\"dimensions\": []," + "\"aggregations\": [" + " { \"type\": \"bloom\", \"name\": \"bloom\", \"field\": \"innerSum\" }" + "]," + "\"intervals\": [ \"1970/2050\" ]" + "}";
MapBasedRow row = ingestAndQuery(query);
BloomKFilter filter = BloomKFilter.deserialize((ByteBuffer) row.getRaw("bloom"));
Assert.assertTrue(filter.testLong(13L));
Assert.assertFalse(filter.testLong(5L));
}
use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.
the class BloomFilterGroupByQueryTest method testQueryFakeDimension.
@Test
public void testQueryFakeDimension() throws Exception {
String query = "{" + "\"queryType\": \"groupBy\"," + "\"dataSource\": \"test_datasource\"," + "\"granularity\": \"ALL\"," + "\"dimensions\": []," + "\"filter\":{ \"type\":\"selector\", \"dimension\":\"market\", \"value\":\"upfront\"}," + "\"aggregations\": [" + " { \"type\": \"bloom\", \"name\": \"blooming_quality\", \"field\": \"nope\" }" + "]," + "\"intervals\": [ \"1970/2050\" ]" + "}";
MapBasedRow row = ingestAndQuery(query);
// a nil column results in a totally empty bloom filter
BloomKFilter filter = new BloomKFilter(1500);
Object val = row.getRaw("blooming_quality");
String serialized = BloomFilterAggregatorTest.filterToString(BloomKFilter.deserialize((ByteBuffer) val));
String empty = BloomFilterAggregatorTest.filterToString(filter);
Assert.assertEquals(empty, serialized);
}
Aggregations