use of org.apache.druid.query.aggregation.FilteredAggregatorFactory in project druid by druid-io.
the class TimeseriesBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
Map<String, TimeseriesQuery> basicQueries = new LinkedHashMap<>();
GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
{
// basic.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
TimeseriesQuery queryA = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
basicQueries.put("A", queryA);
}
{
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
LongSumAggregatorFactory lsaf = new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential");
BoundDimFilter timeFilter = new BoundDimFilter(ColumnHolder.TIME_COLUMN_NAME, "200000", "300000", false, false, null, null, StringComparators.NUMERIC);
queryAggs.add(new FilteredAggregatorFactory(lsaf, timeFilter));
TimeseriesQuery timeFilterQuery = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
basicQueries.put("timeFilterNumeric", timeFilterQuery);
}
{
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
LongSumAggregatorFactory lsaf = new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential");
BoundDimFilter timeFilter = new BoundDimFilter(ColumnHolder.TIME_COLUMN_NAME, "200000", "300000", false, false, null, null, StringComparators.ALPHANUMERIC);
queryAggs.add(new FilteredAggregatorFactory(lsaf, timeFilter));
TimeseriesQuery timeFilterQuery = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
basicQueries.put("timeFilterAlphanumeric", timeFilterQuery);
}
{
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.utc(200000, 300000)));
List<AggregatorFactory> queryAggs = new ArrayList<>();
LongSumAggregatorFactory lsaf = new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential");
queryAggs.add(lsaf);
TimeseriesQuery timeFilterQuery = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
basicQueries.put("timeFilterByInterval", timeFilterQuery);
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
use of org.apache.druid.query.aggregation.FilteredAggregatorFactory in project druid by druid-io.
the class TimeCompareBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
long startMillis = basicSchema.getDataInterval().getStartMillis();
long endMillis = basicSchema.getDataInterval().getEndMillis();
long half = (endMillis - startMillis) / 2;
Interval recent = Intervals.utc(half, endMillis);
Interval previous = Intervals.utc(startMillis, half);
log.info("Recent interval: " + recent);
log.info("Previous interval: " + previous);
{
// basic.topNTimeCompare
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new FilteredAggregatorFactory(// jsAgg1,
new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new IntervalDimFilter(ColumnHolder.TIME_COLUMN_NAME, Collections.singletonList(recent), null)));
queryAggs.add(new FilteredAggregatorFactory(new LongSumAggregatorFactory("_cmp_sumLongSequential", "sumLongSequential"), new IntervalDimFilter(ColumnHolder.TIME_COLUMN_NAME, Collections.singletonList(previous), null)));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric("sumLongSequential").intervals(intervalSpec).aggregators(queryAggs).threshold(threshold);
topNQuery = queryBuilderA.build();
topNFactory = new TopNQueryRunnerFactory(new StupidPool<>("TopNBenchmark-compute-bufferPool", new OffheapBufferGenerator("compute", 250000000), 0, Integer.MAX_VALUE), new TopNQueryQueryToolChest(new TopNQueryConfig()), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
}
{
// basic.timeseriesTimeCompare
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new FilteredAggregatorFactory(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new IntervalDimFilter(ColumnHolder.TIME_COLUMN_NAME, Collections.singletonList(recent), null)));
queryAggs.add(new FilteredAggregatorFactory(new LongSumAggregatorFactory("_cmp_sumLongSequential", "sumLongSequential"), new IntervalDimFilter(ColumnHolder.TIME_COLUMN_NAME, Collections.singletonList(previous), null)));
Druids.TimeseriesQueryBuilder timeseriesQueryBuilder = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(false);
timeseriesQuery = timeseriesQueryBuilder.build();
timeseriesFactory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
}
}
use of org.apache.druid.query.aggregation.FilteredAggregatorFactory in project druid by druid-io.
the class FilteredAggregatorBenchmark method setup.
/**
* Setup everything common for benchmarking both the incremental-index and the queriable-index.
*/
@Setup
public void setup() {
log.info("SETUP CALLED AT " + System.currentTimeMillis());
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schema);
generator = new DataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED, schemaInfo.getDataInterval(), rowsPerSegment);
filter = new OrDimFilter(Arrays.asList(new BoundDimFilter("dimSequential", "-1", "-1", true, true, null, null, StringComparators.ALPHANUMERIC), new RegexDimFilter("dimSequential", "X", null), new SearchQueryDimFilter("dimSequential", new ContainsSearchQuerySpec("X", false), null), new InDimFilter("dimSequential", Collections.singletonList("X"), null)));
filteredMetric = new FilteredAggregatorFactory(new CountAggregatorFactory("rows"), filter);
factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = Collections.singletonList(filteredMetric);
query = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
}
use of org.apache.druid.query.aggregation.FilteredAggregatorFactory in project druid by druid-io.
the class MovingAverageIterableTest method testWithFilteredAggregation.
@Test
public void testWithFilteredAggregation() {
Map<String, Object> event1 = new HashMap<>();
Map<String, Object> event2 = new HashMap<>();
List<DimensionSpec> ds = new ArrayList<>();
ds.add(new DefaultDimensionSpec("gender", "gender"));
event1.put("gender", "m");
event1.put("pageViews", 10L);
Row row1 = new MapBasedRow(JAN_1, event1);
event2.put("gender", "m");
event2.put("pageViews", 20L);
Row row2 = new MapBasedRow(JAN_4, event2);
Sequence<RowBucket> seq = Sequences.simple(Arrays.asList(new RowBucket(JAN_1, Collections.singletonList(row1)), new RowBucket(JAN_2, Collections.emptyList()), new RowBucket(JAN_3, Collections.emptyList()), new RowBucket(JAN_4, Collections.singletonList(row2))));
AveragerFactory averagerfactory = new LongMeanAveragerFactory("movingAvgPageViews", 4, 1, "pageViews");
AggregatorFactory aggregatorFactory = new LongSumAggregatorFactory("pageViews", "pageViews");
DimFilter filter = new SelectorDimFilter("gender", "m", null);
FilteredAggregatorFactory filteredAggregatorFactory = new FilteredAggregatorFactory(aggregatorFactory, filter);
Iterator<Row> iter = new MovingAverageIterable(seq, ds, Collections.singletonList(averagerfactory), Collections.emptyList(), Collections.singletonList(filteredAggregatorFactory)).iterator();
Assert.assertTrue(iter.hasNext());
Row result = iter.next();
Assert.assertEquals("m", (result.getDimension("gender")).get(0));
Assert.assertEquals(2.5f, result.getMetric("movingAvgPageViews").floatValue(), 0.0f);
Assert.assertTrue(iter.hasNext());
result = iter.next();
Assert.assertEquals("m", (result.getDimension("gender")).get(0));
Assert.assertEquals(2.5f, result.getMetric("movingAvgPageViews").floatValue(), 0.0f);
Assert.assertTrue(iter.hasNext());
result = iter.next();
Assert.assertEquals("m", (result.getDimension("gender")).get(0));
Assert.assertEquals(2.5f, result.getMetric("movingAvgPageViews").floatValue(), 0.0f);
Assert.assertTrue(iter.hasNext());
result = iter.next();
Assert.assertEquals("m", (result.getDimension("gender")).get(0));
Assert.assertEquals(7.5f, result.getMetric("movingAvgPageViews").floatValue(), 0.0f);
Assert.assertFalse(iter.hasNext());
}
use of org.apache.druid.query.aggregation.FilteredAggregatorFactory in project druid by druid-io.
the class HllSketchSqlAggregatorTest method testApproxCountDistinctHllSketch.
@Test
public void testApproxCountDistinctHllSketch() throws Exception {
// Can't vectorize due to SUBSTRING expression.
cannotVectorize();
final String sql = "SELECT\n" + " SUM(cnt),\n" + // uppercase
" APPROX_COUNT_DISTINCT_DS_HLL(dim2),\n" + // lowercase; also, filtered
" APPROX_COUNT_DISTINCT_DS_HLL(dim2) FILTER(WHERE dim2 <> ''),\n" + // on extractionFn, using generic A.C.D.
" APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" + // on expression, using COUNT DISTINCT
" COUNT(DISTINCT SUBSTRING(dim2, 1, 1) || 'x'),\n" + // on native HllSketch column
" APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1, 21, 'HLL_8'),\n" + // on native HllSketch column
" APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1)\n" + "FROM druid.foo";
final List<Object[]> expectedResults;
if (NullHandling.replaceWithDefault()) {
expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 2L, 5L, 5L });
} else {
expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 1L, 5L, 5L });
}
testQuery(sql, ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "substring(\"dim2\", 0, 1)", ColumnType.STRING, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("v1", "concat(substring(\"dim2\", 0, 1),'x')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new LongSumAggregatorFactory("a0", "cnt"), new HllSketchBuildAggregatorFactory("a1", "dim2", null, null, ROUND), new FilteredAggregatorFactory(new HllSketchBuildAggregatorFactory("a2", "dim2", null, null, ROUND), BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("dim2", "", null))), new HllSketchBuildAggregatorFactory("a3", "v0", null, null, ROUND), new HllSketchBuildAggregatorFactory("a4", "v1", null, null, ROUND), new HllSketchMergeAggregatorFactory("a5", "hllsketch_dim1", 21, "HLL_8", ROUND), new HllSketchMergeAggregatorFactory("a6", "hllsketch_dim1", null, null, ROUND))).context(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Aggregations