Search in sources :

Example 26 with BloomKFilter

use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.

the class BloomFilterSqlAggregatorTest method testBloomFilterAggExtractionFn.

@Test
public void testBloomFilterAggExtractionFn() throws Exception {
    cannotVectorize();
    BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
    for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
        String raw = NullHandling.emptyToNullIfNeeded((String) row.getRaw("dim1"));
        // empty string extractionFn produces null
        if (raw == null || "".equals(raw)) {
            expected1.addBytes(null, 0, 0);
        } else {
            expected1.addString(raw.substring(0, 1));
        }
    }
    testQuery("SELECT\n" + "BLOOM_FILTER(SUBSTRING(dim1, 1, 1), 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new ExtractionDimensionSpec("dim1", "a0:dim1", new SubstringDimExtractionFn(0, 1)), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1) }));
}
Also used : SubstringDimExtractionFn(org.apache.druid.query.extraction.SubstringDimExtractionFn) InputRow(org.apache.druid.data.input.InputRow) BloomFilterAggregatorFactory(org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 27 with BloomKFilter

use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.

the class BloomDimFilterSqlTest method testBloomFilterBigNoParam.

@Ignore("this test is really slow and is intended to use for comparisons with testBloomFilterBigParameter")
@Test
public void testBloomFilterBigNoParam() throws Exception {
    BloomKFilter filter = new BloomKFilter(5_000_000);
    filter.addString("def");
    byte[] bytes = BloomFilterSerializersModule.bloomKFilterToBytes(filter);
    String base64 = StringUtils.encodeBase64String(bytes);
    testQuery(StringUtils.format("SELECT COUNT(*) FROM druid.foo WHERE bloom_filter_test(dim1, '%s')", base64), ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).filters(new BloomDimFilter("dim1", BloomKFilterHolder.fromBloomKFilter(filter), null)).aggregators(aggregators(new CountAggregatorFactory("a0"))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 1L }));
}
Also used : CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) BloomDimFilter(org.apache.druid.query.filter.BloomDimFilter) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) Ignore(org.junit.Ignore) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 28 with BloomKFilter

use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.

the class BloomDimFilterSqlTest method testBloomFilterVirtualColumn.

@Test
public void testBloomFilterVirtualColumn() throws Exception {
    BloomKFilter filter = new BloomKFilter(1500);
    filter.addString("def-foo");
    byte[] bytes = BloomFilterSerializersModule.bloomKFilterToBytes(filter);
    String base64 = StringUtils.encodeBase64String(bytes);
    testQuery(StringUtils.format("SELECT COUNT(*) FROM druid.foo WHERE bloom_filter_test(concat(dim1, '-foo'), '%s')", base64), ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).virtualColumns(expressionVirtualColumn("v0", "concat(\"dim1\",'-foo')", ColumnType.STRING)).filters(new BloomDimFilter("v0", BloomKFilterHolder.fromBloomKFilter(filter), null)).aggregators(aggregators(new CountAggregatorFactory("a0"))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 1L }));
}
Also used : CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) BloomDimFilter(org.apache.druid.query.filter.BloomDimFilter) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 29 with BloomKFilter

use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.

the class BloomFilterGroupByQueryTest method testNestedQuery.

@Test
public void testNestedQuery() throws Exception {
    if (!isV2) {
        return;
    }
    String query = "{" + "\"queryType\": \"groupBy\"," + "\"dataSource\": {" + "\"type\": \"query\"," + "\"query\": {" + "\"queryType\":\"groupBy\"," + "\"dataSource\": \"test_datasource\"," + "\"intervals\": [ \"1970/2050\" ]," + "\"granularity\":\"ALL\"," + "\"dimensions\":[]," + "\"aggregations\": [{ \"type\":\"longSum\", \"name\":\"innerSum\", \"fieldName\":\"count\"}]" + "}" + "}," + "\"granularity\": \"ALL\"," + "\"dimensions\": []," + "\"aggregations\": [" + "  { \"type\": \"bloom\", \"name\": \"bloom\", \"field\": \"innerSum\" }" + "]," + "\"intervals\": [ \"1970/2050\" ]" + "}";
    MapBasedRow row = ingestAndQuery(query);
    BloomKFilter filter = BloomKFilter.deserialize((ByteBuffer) row.getRaw("bloom"));
    Assert.assertTrue(filter.testLong(13L));
    Assert.assertFalse(filter.testLong(5L));
}
Also used : MapBasedRow(org.apache.druid.data.input.MapBasedRow) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 30 with BloomKFilter

use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.

the class BloomFilterGroupByQueryTest method testQueryFakeDimension.

@Test
public void testQueryFakeDimension() throws Exception {
    String query = "{" + "\"queryType\": \"groupBy\"," + "\"dataSource\": \"test_datasource\"," + "\"granularity\": \"ALL\"," + "\"dimensions\": []," + "\"filter\":{ \"type\":\"selector\", \"dimension\":\"market\", \"value\":\"upfront\"}," + "\"aggregations\": [" + "  { \"type\": \"bloom\", \"name\": \"blooming_quality\", \"field\": \"nope\" }" + "]," + "\"intervals\": [ \"1970/2050\" ]" + "}";
    MapBasedRow row = ingestAndQuery(query);
    // a nil column results in a totally empty bloom filter
    BloomKFilter filter = new BloomKFilter(1500);
    Object val = row.getRaw("blooming_quality");
    String serialized = BloomFilterAggregatorTest.filterToString(BloomKFilter.deserialize((ByteBuffer) val));
    String empty = BloomFilterAggregatorTest.filterToString(filter);
    Assert.assertEquals(empty, serialized);
}
Also used : MapBasedRow(org.apache.druid.data.input.MapBasedRow) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) ByteBuffer(java.nio.ByteBuffer) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

BloomKFilter (org.apache.druid.query.filter.BloomKFilter)40 Test (org.junit.Test)37 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)20 BaseCalciteQueryTest (org.apache.druid.sql.calcite.BaseCalciteQueryTest)17 CardinalityAggregatorTest (org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorTest)12 BloomFilterAggregatorFactory (org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory)9 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)8 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)8 BloomDimFilter (org.apache.druid.query.filter.BloomDimFilter)8 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)8 ByteBuffer (java.nio.ByteBuffer)7 InputRow (org.apache.druid.data.input.InputRow)7 MapBasedRow (org.apache.druid.data.input.MapBasedRow)4 Expr (org.apache.druid.math.expr.Expr)4 ExprEval (org.apache.druid.math.expr.ExprEval)4 GroupByQueryRunnerTest (org.apache.druid.query.groupby.GroupByQueryRunnerTest)4 DimensionSelector (org.apache.druid.segment.DimensionSelector)3 ExpressionVirtualColumn (org.apache.druid.segment.virtual.ExpressionVirtualColumn)3 IOException (java.io.IOException)2 Nullable (javax.annotation.Nullable)2