Search in sources :

Example 6 with BloomFilterAggregatorFactory

use of org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory in project druid by druid-io.

the class BloomFilterSqlAggregatorTest method testEmptyTimeseriesResults.

@Test
public void testEmptyTimeseriesResults() throws Exception {
    // makes empty bloom filters
    cannotVectorize();
    BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
    BloomKFilter expected2 = new BloomKFilter(TEST_NUM_ENTRIES);
    testQuery("SELECT\n" + "BLOOM_FILTER(dim1, 1000),\n" + "BLOOM_FILTER(l1, 1000)\n" + "FROM numfoo where dim2 = 0", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).filters(BaseCalciteQueryTest.bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("dim1", "a0:dim1"), TEST_NUM_ENTRIES), new BloomFilterAggregatorFactory("a1:agg", new DefaultDimensionSpec("l1", "a1:l1", ColumnType.LONG), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1), CalciteTests.getJsonMapper().writeValueAsString(expected2) }));
}
Also used : BloomFilterAggregatorFactory(org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 7 with BloomFilterAggregatorFactory

use of org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory in project druid by druid-io.

the class BloomFilterSqlAggregatorTest method testBloomFilterAggExtractionFn.

@Test
public void testBloomFilterAggExtractionFn() throws Exception {
    cannotVectorize();
    BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
    for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
        String raw = NullHandling.emptyToNullIfNeeded((String) row.getRaw("dim1"));
        // empty string extractionFn produces null
        if (raw == null || "".equals(raw)) {
            expected1.addBytes(null, 0, 0);
        } else {
            expected1.addString(raw.substring(0, 1));
        }
    }
    testQuery("SELECT\n" + "BLOOM_FILTER(SUBSTRING(dim1, 1, 1), 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new ExtractionDimensionSpec("dim1", "a0:dim1", new SubstringDimExtractionFn(0, 1)), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1) }));
}
Also used : SubstringDimExtractionFn(org.apache.druid.query.extraction.SubstringDimExtractionFn) InputRow(org.apache.druid.data.input.InputRow) BloomFilterAggregatorFactory(org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 8 with BloomFilterAggregatorFactory

use of org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory in project druid by druid-io.

the class BloomFilterSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
    final RexNode inputOperand = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0));
    final DruidExpression input = Expressions.toDruidExpression(plannerContext, rowSignature, inputOperand);
    if (input == null) {
        return null;
    }
    final AggregatorFactory aggregatorFactory;
    final String aggName = StringUtils.format("%s:agg", name);
    final RexNode maxNumEntriesOperand = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
    if (!maxNumEntriesOperand.isA(SqlKind.LITERAL)) {
        // maxNumEntriesOperand must be a literal in order to plan.
        return null;
    }
    final int maxNumEntries = ((Number) RexLiteral.value(maxNumEntriesOperand)).intValue();
    // Look for existing matching aggregatorFactory.
    for (final Aggregation existing : existingAggregations) {
        for (AggregatorFactory factory : existing.getAggregatorFactories()) {
            if (factory instanceof BloomFilterAggregatorFactory) {
                final BloomFilterAggregatorFactory theFactory = (BloomFilterAggregatorFactory) factory;
                // Check input for equivalence.
                final boolean inputMatches;
                final DruidExpression virtualInput = virtualColumnRegistry.findVirtualColumnExpressions(theFactory.requiredFields()).stream().findFirst().orElse(null);
                if (virtualInput == null) {
                    if (input.isDirectColumnAccess()) {
                        inputMatches = input.getDirectColumn().equals(theFactory.getField().getDimension());
                    } else {
                        inputMatches = input.getSimpleExtraction().getColumn().equals(theFactory.getField().getDimension()) && input.getSimpleExtraction().getExtractionFn().equals(theFactory.getField().getExtractionFn());
                    }
                } else {
                    inputMatches = virtualInput.equals(input);
                }
                final boolean matches = inputMatches && theFactory.getMaxNumEntries() == maxNumEntries;
                if (matches) {
                    // Found existing one. Use this.
                    return Aggregation.create(theFactory);
                }
            }
        }
    }
    // No existing match found. Create a new one.
    ColumnType valueType = Calcites.getColumnTypeForRelDataType(inputOperand.getType());
    final DimensionSpec spec;
    if (input.isDirectColumnAccess()) {
        spec = new DefaultDimensionSpec(input.getSimpleExtraction().getColumn(), StringUtils.format("%s:%s", name, input.getSimpleExtraction().getColumn()), valueType);
    } else if (input.isSimpleExtraction()) {
        spec = new ExtractionDimensionSpec(input.getSimpleExtraction().getColumn(), StringUtils.format("%s:%s", name, input.getSimpleExtraction().getColumn()), valueType, input.getSimpleExtraction().getExtractionFn());
    } else {
        String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, inputOperand.getType());
        spec = new DefaultDimensionSpec(virtualColumnName, StringUtils.format("%s:%s", name, virtualColumnName));
    }
    aggregatorFactory = new BloomFilterAggregatorFactory(aggName, spec, maxNumEntries);
    return Aggregation.create(aggregatorFactory);
}
Also used : DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType) BloomFilterAggregatorFactory(org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory) BloomFilterAggregatorFactory(org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Aggregation(org.apache.druid.sql.calcite.aggregation.Aggregation) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) RexNode(org.apache.calcite.rex.RexNode) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) Nullable(javax.annotation.Nullable)

Example 9 with BloomFilterAggregatorFactory

use of org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory in project druid by druid-io.

the class BloomFilterSqlAggregatorTest method testBloomFilterAggFloatVirtualColumn.

@Test
public void testBloomFilterAggFloatVirtualColumn() throws Exception {
    cannotVectorize();
    BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
    for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
        Object raw = row.getRaw("f1");
        if (raw == null) {
            if (NullHandling.replaceWithDefault()) {
                expected1.addFloat(NullHandling.defaultFloatValue());
            } else {
                expected1.addBytes(null, 0, 0);
            }
        } else {
            expected1.addFloat(2 * ((Number) raw).floatValue());
        }
    }
    testQuery("SELECT\n" + "BLOOM_FILTER(f1 * 2, 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "(\"f1\" * 2)", ColumnType.FLOAT, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("v0", "a0:v0"), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1) }));
}
Also used : ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) InputRow(org.apache.druid.data.input.InputRow) BloomFilterAggregatorFactory(org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 10 with BloomFilterAggregatorFactory

use of org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory in project druid by druid-io.

the class BloomFilterSqlAggregatorTest method testGroupByAggregatorDefaultValues.

@Test
public void testGroupByAggregatorDefaultValues() throws Exception {
    // makes empty bloom filters
    cannotVectorize();
    BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
    BloomKFilter expected2 = new BloomKFilter(TEST_NUM_ENTRIES);
    testQuery("SELECT\n" + "dim2,\n" + "BLOOM_FILTER(dim1, 1000) FILTER(WHERE dim1 = 'nonexistent'),\n" + "BLOOM_FILTER(l1, 1000) FILTER(WHERE dim1 = 'nonexistent')\n" + "FROM numfoo WHERE dim2 = 'a' GROUP BY dim2", ImmutableList.of(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE3).setInterval(querySegmentSpec(Filtration.eternity())).setDimFilter(selector("dim2", "a", null)).setGranularity(Granularities.ALL).setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)).setDimensions(new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING)).setAggregatorSpecs(aggregators(new FilteredAggregatorFactory(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("dim1", "a0:dim1"), TEST_NUM_ENTRIES), selector("dim1", "nonexistent", null)), new FilteredAggregatorFactory(new BloomFilterAggregatorFactory("a1:agg", new DefaultDimensionSpec("l1", "a1:l1", ColumnType.LONG), TEST_NUM_ENTRIES), selector("dim1", "nonexistent", null)))).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { "a", CalciteTests.getJsonMapper().writeValueAsString(expected1), CalciteTests.getJsonMapper().writeValueAsString(expected2) }));
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) BloomFilterAggregatorFactory(org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Aggregations

BloomFilterAggregatorFactory (org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory)10 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)9 BloomKFilter (org.apache.druid.query.filter.BloomKFilter)9 BaseCalciteQueryTest (org.apache.druid.sql.calcite.BaseCalciteQueryTest)9 Test (org.junit.Test)9 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)8 InputRow (org.apache.druid.data.input.InputRow)7 ExpressionVirtualColumn (org.apache.druid.segment.virtual.ExpressionVirtualColumn)3 ExtractionDimensionSpec (org.apache.druid.query.dimension.ExtractionDimensionSpec)2 Nullable (javax.annotation.Nullable)1 RexNode (org.apache.calcite.rex.RexNode)1 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)1 FilteredAggregatorFactory (org.apache.druid.query.aggregation.FilteredAggregatorFactory)1 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)1 SubstringDimExtractionFn (org.apache.druid.query.extraction.SubstringDimExtractionFn)1 ColumnType (org.apache.druid.segment.column.ColumnType)1 Aggregation (org.apache.druid.sql.calcite.aggregation.Aggregation)1 DruidExpression (org.apache.druid.sql.calcite.expression.DruidExpression)1