use of org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory in project druid by druid-io.
the class BloomFilterSqlAggregatorTest method testEmptyTimeseriesResults.
@Test
public void testEmptyTimeseriesResults() throws Exception {
// makes empty bloom filters
cannotVectorize();
BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
BloomKFilter expected2 = new BloomKFilter(TEST_NUM_ENTRIES);
testQuery("SELECT\n" + "BLOOM_FILTER(dim1, 1000),\n" + "BLOOM_FILTER(l1, 1000)\n" + "FROM numfoo where dim2 = 0", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).filters(BaseCalciteQueryTest.bound("dim2", "0", "0", false, false, null, StringComparators.NUMERIC)).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("dim1", "a0:dim1"), TEST_NUM_ENTRIES), new BloomFilterAggregatorFactory("a1:agg", new DefaultDimensionSpec("l1", "a1:l1", ColumnType.LONG), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1), CalciteTests.getJsonMapper().writeValueAsString(expected2) }));
}
use of org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory in project druid by druid-io.
the class BloomFilterSqlAggregatorTest method testBloomFilterAggExtractionFn.
@Test
public void testBloomFilterAggExtractionFn() throws Exception {
cannotVectorize();
BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
String raw = NullHandling.emptyToNullIfNeeded((String) row.getRaw("dim1"));
// empty string extractionFn produces null
if (raw == null || "".equals(raw)) {
expected1.addBytes(null, 0, 0);
} else {
expected1.addString(raw.substring(0, 1));
}
}
testQuery("SELECT\n" + "BLOOM_FILTER(SUBSTRING(dim1, 1, 1), 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new ExtractionDimensionSpec("dim1", "a0:dim1", new SubstringDimExtractionFn(0, 1)), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1) }));
}
use of org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory in project druid by druid-io.
the class BloomFilterSqlAggregator method toDruidAggregation.
@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
final RexNode inputOperand = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0));
final DruidExpression input = Expressions.toDruidExpression(plannerContext, rowSignature, inputOperand);
if (input == null) {
return null;
}
final AggregatorFactory aggregatorFactory;
final String aggName = StringUtils.format("%s:agg", name);
final RexNode maxNumEntriesOperand = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
if (!maxNumEntriesOperand.isA(SqlKind.LITERAL)) {
// maxNumEntriesOperand must be a literal in order to plan.
return null;
}
final int maxNumEntries = ((Number) RexLiteral.value(maxNumEntriesOperand)).intValue();
// Look for existing matching aggregatorFactory.
for (final Aggregation existing : existingAggregations) {
for (AggregatorFactory factory : existing.getAggregatorFactories()) {
if (factory instanceof BloomFilterAggregatorFactory) {
final BloomFilterAggregatorFactory theFactory = (BloomFilterAggregatorFactory) factory;
// Check input for equivalence.
final boolean inputMatches;
final DruidExpression virtualInput = virtualColumnRegistry.findVirtualColumnExpressions(theFactory.requiredFields()).stream().findFirst().orElse(null);
if (virtualInput == null) {
if (input.isDirectColumnAccess()) {
inputMatches = input.getDirectColumn().equals(theFactory.getField().getDimension());
} else {
inputMatches = input.getSimpleExtraction().getColumn().equals(theFactory.getField().getDimension()) && input.getSimpleExtraction().getExtractionFn().equals(theFactory.getField().getExtractionFn());
}
} else {
inputMatches = virtualInput.equals(input);
}
final boolean matches = inputMatches && theFactory.getMaxNumEntries() == maxNumEntries;
if (matches) {
// Found existing one. Use this.
return Aggregation.create(theFactory);
}
}
}
}
// No existing match found. Create a new one.
ColumnType valueType = Calcites.getColumnTypeForRelDataType(inputOperand.getType());
final DimensionSpec spec;
if (input.isDirectColumnAccess()) {
spec = new DefaultDimensionSpec(input.getSimpleExtraction().getColumn(), StringUtils.format("%s:%s", name, input.getSimpleExtraction().getColumn()), valueType);
} else if (input.isSimpleExtraction()) {
spec = new ExtractionDimensionSpec(input.getSimpleExtraction().getColumn(), StringUtils.format("%s:%s", name, input.getSimpleExtraction().getColumn()), valueType, input.getSimpleExtraction().getExtractionFn());
} else {
String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, inputOperand.getType());
spec = new DefaultDimensionSpec(virtualColumnName, StringUtils.format("%s:%s", name, virtualColumnName));
}
aggregatorFactory = new BloomFilterAggregatorFactory(aggName, spec, maxNumEntries);
return Aggregation.create(aggregatorFactory);
}
use of org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory in project druid by druid-io.
the class BloomFilterSqlAggregatorTest method testBloomFilterAggFloatVirtualColumn.
@Test
public void testBloomFilterAggFloatVirtualColumn() throws Exception {
cannotVectorize();
BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
Object raw = row.getRaw("f1");
if (raw == null) {
if (NullHandling.replaceWithDefault()) {
expected1.addFloat(NullHandling.defaultFloatValue());
} else {
expected1.addBytes(null, 0, 0);
}
} else {
expected1.addFloat(2 * ((Number) raw).floatValue());
}
}
testQuery("SELECT\n" + "BLOOM_FILTER(f1 * 2, 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "(\"f1\" * 2)", ColumnType.FLOAT, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("v0", "a0:v0"), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1) }));
}
use of org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory in project druid by druid-io.
the class BloomFilterSqlAggregatorTest method testGroupByAggregatorDefaultValues.
@Test
public void testGroupByAggregatorDefaultValues() throws Exception {
// makes empty bloom filters
cannotVectorize();
BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
BloomKFilter expected2 = new BloomKFilter(TEST_NUM_ENTRIES);
testQuery("SELECT\n" + "dim2,\n" + "BLOOM_FILTER(dim1, 1000) FILTER(WHERE dim1 = 'nonexistent'),\n" + "BLOOM_FILTER(l1, 1000) FILTER(WHERE dim1 = 'nonexistent')\n" + "FROM numfoo WHERE dim2 = 'a' GROUP BY dim2", ImmutableList.of(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE3).setInterval(querySegmentSpec(Filtration.eternity())).setDimFilter(selector("dim2", "a", null)).setGranularity(Granularities.ALL).setVirtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)).setDimensions(new DefaultDimensionSpec("v0", "_d0", ColumnType.STRING)).setAggregatorSpecs(aggregators(new FilteredAggregatorFactory(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("dim1", "a0:dim1"), TEST_NUM_ENTRIES), selector("dim1", "nonexistent", null)), new FilteredAggregatorFactory(new BloomFilterAggregatorFactory("a1:agg", new DefaultDimensionSpec("l1", "a1:l1", ColumnType.LONG), TEST_NUM_ENTRIES), selector("dim1", "nonexistent", null)))).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { "a", CalciteTests.getJsonMapper().writeValueAsString(expected1), CalciteTests.getJsonMapper().writeValueAsString(expected2) }));
}
Aggregations