Search in sources :

Example 21 with BloomKFilter

use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.

the class BloomDimFilterSqlTest method testBloomFilters.

@Test
public void testBloomFilters() throws Exception {
    BloomKFilter filter = new BloomKFilter(1500);
    filter.addString("def");
    BloomKFilter filter2 = new BloomKFilter(1500);
    filter.addString("abc");
    byte[] bytes = BloomFilterSerializersModule.bloomKFilterToBytes(filter);
    byte[] bytes2 = BloomFilterSerializersModule.bloomKFilterToBytes(filter2);
    String base64 = StringUtils.encodeBase64String(bytes);
    String base642 = StringUtils.encodeBase64String(bytes2);
    testQuery(StringUtils.format("SELECT COUNT(*) FROM druid.foo WHERE bloom_filter_test(dim1, '%s') OR bloom_filter_test(dim2, '%s')", base64, base642), ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).filters(new OrDimFilter(new BloomDimFilter("dim1", BloomKFilterHolder.fromBloomKFilter(filter), null), new BloomDimFilter("dim2", BloomKFilterHolder.fromBloomKFilter(filter2), null))).aggregators(aggregators(new CountAggregatorFactory("a0"))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 2L }));
}
Also used : CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) BloomDimFilter(org.apache.druid.query.filter.BloomDimFilter) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 22 with BloomKFilter

use of org.apache.druid.query.filter.BloomKFilter in project hive by apache.

the class DruidStorageHandlerUtils method toDruidFilter.

@Nullable
private static DimFilter toDruidFilter(ExprNodeDesc filterExpr, Configuration configuration, List<VirtualColumn> virtualColumns, boolean resolveDynamicValues) {
    if (filterExpr == null) {
        return null;
    }
    Class<? extends GenericUDF> genericUDFClass = getGenericUDFClassFromExprDesc(filterExpr);
    if (FunctionRegistry.isOpAnd(filterExpr)) {
        Iterator<ExprNodeDesc> iterator = filterExpr.getChildren().iterator();
        List<DimFilter> delegates = Lists.newArrayList();
        while (iterator.hasNext()) {
            DimFilter filter = toDruidFilter(iterator.next(), configuration, virtualColumns, resolveDynamicValues);
            if (filter != null) {
                delegates.add(filter);
            }
        }
        if (!delegates.isEmpty()) {
            return new AndDimFilter(delegates);
        }
    }
    if (FunctionRegistry.isOpOr(filterExpr)) {
        Iterator<ExprNodeDesc> iterator = filterExpr.getChildren().iterator();
        List<DimFilter> delegates = Lists.newArrayList();
        while (iterator.hasNext()) {
            DimFilter filter = toDruidFilter(iterator.next(), configuration, virtualColumns, resolveDynamicValues);
            if (filter != null) {
                delegates.add(filter);
            }
        }
        if (!delegates.isEmpty()) {
            return new OrDimFilter(delegates);
        }
    } else if (GenericUDFBetween.class == genericUDFClass) {
        List<ExprNodeDesc> child = filterExpr.getChildren();
        String col = extractColName(child.get(1), virtualColumns);
        if (col != null) {
            try {
                StringComparator comparator = stringTypeInfos.contains(child.get(1).getTypeInfo()) ? StringComparators.LEXICOGRAPHIC : StringComparators.NUMERIC;
                String lower = evaluate(child.get(2), configuration, resolveDynamicValues);
                String upper = evaluate(child.get(3), configuration, resolveDynamicValues);
                return new BoundDimFilter(col, lower, upper, false, false, null, null, comparator);
            } catch (HiveException e) {
                throw new RuntimeException(e);
            }
        }
    } else if (GenericUDFInBloomFilter.class == genericUDFClass) {
        List<ExprNodeDesc> child = filterExpr.getChildren();
        String col = extractColName(child.get(0), virtualColumns);
        if (col != null) {
            try {
                BloomKFilter bloomFilter = evaluateBloomFilter(child.get(1), configuration, resolveDynamicValues);
                return new BloomDimFilter(col, BloomKFilterHolder.fromBloomKFilter(bloomFilter), null);
            } catch (HiveException | IOException e) {
                throw new RuntimeException(e);
            }
        }
    }
    return null;
}
Also used : GenericUDFBetween(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) GenericUDFToString(org.apache.hadoop.hive.ql.udf.generic.GenericUDFToString) IOException(java.io.IOException) StringComparator(org.apache.druid.query.ordering.StringComparator) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) BloomDimFilter(org.apache.druid.query.filter.BloomDimFilter) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) BloomDimFilter(org.apache.druid.query.filter.BloomDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) Nullable(javax.annotation.Nullable)

Example 23 with BloomKFilter

use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.

the class BloomFilterSqlAggregatorTest method testBloomFilterAggDoubleVirtualColumn.

@Test
public void testBloomFilterAggDoubleVirtualColumn() throws Exception {
    cannotVectorize();
    BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
    for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
        Object raw = row.getRaw("d1");
        if (raw == null) {
            if (NullHandling.replaceWithDefault()) {
                expected1.addDouble(NullHandling.defaultDoubleValue());
            } else {
                expected1.addBytes(null, 0, 0);
            }
        } else {
            expected1.addDouble(2 * ((Number) raw).doubleValue());
        }
    }
    testQuery("SELECT\n" + "BLOOM_FILTER(d1 * 2, 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "(\"d1\" * 2)", ColumnType.DOUBLE, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("v0", "a0:v0"), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1) }));
}
Also used : ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) InputRow(org.apache.druid.data.input.InputRow) BloomFilterAggregatorFactory(org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 24 with BloomKFilter

use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.

the class BloomFilterSqlAggregatorTest method testBloomFilterTwoAggs.

@Test
public void testBloomFilterTwoAggs() throws Exception {
    cannotVectorize();
    BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
    BloomKFilter expected2 = new BloomKFilter(TEST_NUM_ENTRIES);
    for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
        String raw = NullHandling.emptyToNullIfNeeded((String) row.getRaw("dim1"));
        if (raw == null) {
            expected1.addBytes(null, 0, 0);
        } else {
            expected1.addString(raw);
        }
        List<String> lst = row.getDimension("dim2");
        if (lst.size() == 0) {
            expected2.addBytes(null, 0, 0);
        }
        for (String s : lst) {
            String val = NullHandling.emptyToNullIfNeeded(s);
            if (val == null) {
                expected2.addBytes(null, 0, 0);
            } else {
                expected2.addString(val);
            }
        }
    }
    testQuery("SELECT\n" + "BLOOM_FILTER(dim1, 1000),\n" + "BLOOM_FILTER(dim2, 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("dim1", "a0:dim1"), TEST_NUM_ENTRIES), new BloomFilterAggregatorFactory("a1:agg", new DefaultDimensionSpec("dim2", "a1:dim2"), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1), CalciteTests.getJsonMapper().writeValueAsString(expected2) }));
}
Also used : InputRow(org.apache.druid.data.input.InputRow) BloomFilterAggregatorFactory(org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 25 with BloomKFilter

use of org.apache.druid.query.filter.BloomKFilter in project druid by druid-io.

the class BloomFilterSqlAggregatorTest method testEmptyTimeseriesResults.

@Test
public void testEmptyTimeseriesResults() throws Exception {
    // makes empty bloom filters
    cannotVectorize();
    BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
    BloomKFilter expected2 = new BloomKFilter(TEST_NUM_ENTRIES);
    testQuery("SELECT\n" + "BLOOM_FILTER(dim1, 1000),\n" + "BLOOM_FILTER(l1, 1000)\n" + "FROM numfoo where dim2 = 0", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).filters(BaseCalciteQueryTest.bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("dim1", "a0:dim1"), TEST_NUM_ENTRIES), new BloomFilterAggregatorFactory("a1:agg", new DefaultDimensionSpec("l1", "a1:l1", ColumnType.LONG), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1), CalciteTests.getJsonMapper().writeValueAsString(expected2) }));
}
Also used : BloomFilterAggregatorFactory(org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Aggregations

BloomKFilter (org.apache.druid.query.filter.BloomKFilter)40 Test (org.junit.Test)37 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)20 BaseCalciteQueryTest (org.apache.druid.sql.calcite.BaseCalciteQueryTest)17 CardinalityAggregatorTest (org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorTest)12 BloomFilterAggregatorFactory (org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory)9 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)8 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)8 BloomDimFilter (org.apache.druid.query.filter.BloomDimFilter)8 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)8 ByteBuffer (java.nio.ByteBuffer)7 InputRow (org.apache.druid.data.input.InputRow)7 MapBasedRow (org.apache.druid.data.input.MapBasedRow)4 Expr (org.apache.druid.math.expr.Expr)4 ExprEval (org.apache.druid.math.expr.ExprEval)4 GroupByQueryRunnerTest (org.apache.druid.query.groupby.GroupByQueryRunnerTest)4 DimensionSelector (org.apache.druid.segment.DimensionSelector)3 ExpressionVirtualColumn (org.apache.druid.segment.virtual.ExpressionVirtualColumn)3 IOException (java.io.IOException)2 Nullable (javax.annotation.Nullable)2