Search in sources :

Example 31 with FilteredAggregatorFactory

use of org.apache.druid.query.aggregation.FilteredAggregatorFactory in project druid by druid-io.

the class TimeseriesBenchmark method setupQueries.

private void setupQueries() {
    // queries for the basic schema
    Map<String, TimeseriesQuery> basicQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    {
        // basic.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
        queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
        queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
        queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
        TimeseriesQuery queryA = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
        basicQueries.put("A", queryA);
    }
    {
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        LongSumAggregatorFactory lsaf = new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential");
        BoundDimFilter timeFilter = new BoundDimFilter(ColumnHolder.TIME_COLUMN_NAME, "200000", "300000", false, false, null, null, StringComparators.NUMERIC);
        queryAggs.add(new FilteredAggregatorFactory(lsaf, timeFilter));
        TimeseriesQuery timeFilterQuery = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
        basicQueries.put("timeFilterNumeric", timeFilterQuery);
    }
    {
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        LongSumAggregatorFactory lsaf = new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential");
        BoundDimFilter timeFilter = new BoundDimFilter(ColumnHolder.TIME_COLUMN_NAME, "200000", "300000", false, false, null, null, StringComparators.ALPHANUMERIC);
        queryAggs.add(new FilteredAggregatorFactory(lsaf, timeFilter));
        TimeseriesQuery timeFilterQuery = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
        basicQueries.put("timeFilterAlphanumeric", timeFilterQuery);
    }
    {
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.utc(200000, 300000)));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        LongSumAggregatorFactory lsaf = new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential");
        queryAggs.add(lsaf);
        TimeseriesQuery timeFilterQuery = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
        basicQueries.put("timeFilterByInterval", timeFilterQuery);
    }
    SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DoubleMinAggregatorFactory(org.apache.druid.query.aggregation.DoubleMinAggregatorFactory) LinkedHashMap(java.util.LinkedHashMap) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) List(java.util.List) ArrayList(java.util.ArrayList) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory)

Example 32 with FilteredAggregatorFactory

use of org.apache.druid.query.aggregation.FilteredAggregatorFactory in project druid by druid-io.

the class TimeCompareBenchmark method setupQueries.

private void setupQueries() {
    // queries for the basic schema
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
    long startMillis = basicSchema.getDataInterval().getStartMillis();
    long endMillis = basicSchema.getDataInterval().getEndMillis();
    long half = (endMillis - startMillis) / 2;
    Interval recent = Intervals.utc(half, endMillis);
    Interval previous = Intervals.utc(startMillis, half);
    log.info("Recent interval: " + recent);
    log.info("Previous interval: " + previous);
    {
        // basic.topNTimeCompare
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new FilteredAggregatorFactory(// jsAgg1,
        new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new IntervalDimFilter(ColumnHolder.TIME_COLUMN_NAME, Collections.singletonList(recent), null)));
        queryAggs.add(new FilteredAggregatorFactory(new LongSumAggregatorFactory("_cmp_sumLongSequential", "sumLongSequential"), new IntervalDimFilter(ColumnHolder.TIME_COLUMN_NAME, Collections.singletonList(previous), null)));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric("sumLongSequential").intervals(intervalSpec).aggregators(queryAggs).threshold(threshold);
        topNQuery = queryBuilderA.build();
        topNFactory = new TopNQueryRunnerFactory(new StupidPool<>("TopNBenchmark-compute-bufferPool", new OffheapBufferGenerator("compute", 250000000), 0, Integer.MAX_VALUE), new TopNQueryQueryToolChest(new TopNQueryConfig()), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
    }
    {
        // basic.timeseriesTimeCompare
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new FilteredAggregatorFactory(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new IntervalDimFilter(ColumnHolder.TIME_COLUMN_NAME, Collections.singletonList(recent), null)));
        queryAggs.add(new FilteredAggregatorFactory(new LongSumAggregatorFactory("_cmp_sumLongSequential", "sumLongSequential"), new IntervalDimFilter(ColumnHolder.TIME_COLUMN_NAME, Collections.singletonList(previous), null)));
        Druids.TimeseriesQueryBuilder timeseriesQueryBuilder = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(false);
        timeseriesQuery = timeseriesQueryBuilder.build();
        timeseriesFactory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
    }
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) TimeseriesQueryEngine(org.apache.druid.query.timeseries.TimeseriesQueryEngine) TimeseriesQueryRunnerFactory(org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory) OffheapBufferGenerator(org.apache.druid.offheap.OffheapBufferGenerator) TopNQueryConfig(org.apache.druid.query.topn.TopNQueryConfig) TopNQueryRunnerFactory(org.apache.druid.query.topn.TopNQueryRunnerFactory) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) List(java.util.List) ArrayList(java.util.ArrayList) TopNQueryQueryToolChest(org.apache.druid.query.topn.TopNQueryQueryToolChest) IntervalDimFilter(org.apache.druid.query.filter.IntervalDimFilter) Interval(org.joda.time.Interval)

Example 33 with FilteredAggregatorFactory

use of org.apache.druid.query.aggregation.FilteredAggregatorFactory in project druid by druid-io.

the class FilteredAggregatorBenchmark method setup.

/**
 * Setup everything common for benchmarking both the incremental-index and the queriable-index.
 */
@Setup
public void setup() {
    log.info("SETUP CALLED AT " + System.currentTimeMillis());
    ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
    schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schema);
    generator = new DataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED, schemaInfo.getDataInterval(), rowsPerSegment);
    filter = new OrDimFilter(Arrays.asList(new BoundDimFilter("dimSequential", "-1", "-1", true, true, null, null, StringComparators.ALPHANUMERIC), new RegexDimFilter("dimSequential", "X", null), new SearchQueryDimFilter("dimSequential", new ContainsSearchQuerySpec("X", false), null), new InDimFilter("dimSequential", Collections.singletonList("X"), null)));
    filteredMetric = new FilteredAggregatorFactory(new CountAggregatorFactory("rows"), filter);
    factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
    List<AggregatorFactory> queryAggs = Collections.singletonList(filteredMetric);
    query = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) RegexDimFilter(org.apache.druid.query.filter.RegexDimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) ContainsSearchQuerySpec(org.apache.druid.query.search.ContainsSearchQuerySpec) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) HyperUniquesSerde(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TimeseriesQueryEngine(org.apache.druid.query.timeseries.TimeseriesQueryEngine) TimeseriesQueryRunnerFactory(org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DataGenerator(org.apache.druid.segment.generator.DataGenerator) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) SearchQueryDimFilter(org.apache.druid.query.filter.SearchQueryDimFilter) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) Setup(org.openjdk.jmh.annotations.Setup)

Example 34 with FilteredAggregatorFactory

use of org.apache.druid.query.aggregation.FilteredAggregatorFactory in project druid by druid-io.

the class MovingAverageIterableTest method testWithFilteredAggregation.

@Test
public void testWithFilteredAggregation() {
    Map<String, Object> event1 = new HashMap<>();
    Map<String, Object> event2 = new HashMap<>();
    List<DimensionSpec> ds = new ArrayList<>();
    ds.add(new DefaultDimensionSpec("gender", "gender"));
    event1.put("gender", "m");
    event1.put("pageViews", 10L);
    Row row1 = new MapBasedRow(JAN_1, event1);
    event2.put("gender", "m");
    event2.put("pageViews", 20L);
    Row row2 = new MapBasedRow(JAN_4, event2);
    Sequence<RowBucket> seq = Sequences.simple(Arrays.asList(new RowBucket(JAN_1, Collections.singletonList(row1)), new RowBucket(JAN_2, Collections.emptyList()), new RowBucket(JAN_3, Collections.emptyList()), new RowBucket(JAN_4, Collections.singletonList(row2))));
    AveragerFactory averagerfactory = new LongMeanAveragerFactory("movingAvgPageViews", 4, 1, "pageViews");
    AggregatorFactory aggregatorFactory = new LongSumAggregatorFactory("pageViews", "pageViews");
    DimFilter filter = new SelectorDimFilter("gender", "m", null);
    FilteredAggregatorFactory filteredAggregatorFactory = new FilteredAggregatorFactory(aggregatorFactory, filter);
    Iterator<Row> iter = new MovingAverageIterable(seq, ds, Collections.singletonList(averagerfactory), Collections.emptyList(), Collections.singletonList(filteredAggregatorFactory)).iterator();
    Assert.assertTrue(iter.hasNext());
    Row result = iter.next();
    Assert.assertEquals("m", (result.getDimension("gender")).get(0));
    Assert.assertEquals(2.5f, result.getMetric("movingAvgPageViews").floatValue(), 0.0f);
    Assert.assertTrue(iter.hasNext());
    result = iter.next();
    Assert.assertEquals("m", (result.getDimension("gender")).get(0));
    Assert.assertEquals(2.5f, result.getMetric("movingAvgPageViews").floatValue(), 0.0f);
    Assert.assertTrue(iter.hasNext());
    result = iter.next();
    Assert.assertEquals("m", (result.getDimension("gender")).get(0));
    Assert.assertEquals(2.5f, result.getMetric("movingAvgPageViews").floatValue(), 0.0f);
    Assert.assertTrue(iter.hasNext());
    result = iter.next();
    Assert.assertEquals("m", (result.getDimension("gender")).get(0));
    Assert.assertEquals(7.5f, result.getMetric("movingAvgPageViews").floatValue(), 0.0f);
    Assert.assertFalse(iter.hasNext());
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ConstantAveragerFactory(org.apache.druid.query.movingaverage.averagers.ConstantAveragerFactory) AveragerFactory(org.apache.druid.query.movingaverage.averagers.AveragerFactory) LongMeanAveragerFactory(org.apache.druid.query.movingaverage.averagers.LongMeanAveragerFactory) MapBasedRow(org.apache.druid.data.input.MapBasedRow) LongMeanAveragerFactory(org.apache.druid.query.movingaverage.averagers.LongMeanAveragerFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) MapBasedRow(org.apache.druid.data.input.MapBasedRow) Row(org.apache.druid.data.input.Row) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 35 with FilteredAggregatorFactory

use of org.apache.druid.query.aggregation.FilteredAggregatorFactory in project druid by druid-io.

the class HllSketchSqlAggregatorTest method testApproxCountDistinctHllSketch.

@Test
public void testApproxCountDistinctHllSketch() throws Exception {
    // Can't vectorize due to SUBSTRING expression.
    cannotVectorize();
    final String sql = "SELECT\n" + "  SUM(cnt),\n" + // uppercase
    "  APPROX_COUNT_DISTINCT_DS_HLL(dim2),\n" + // lowercase; also, filtered
    "  APPROX_COUNT_DISTINCT_DS_HLL(dim2) FILTER(WHERE dim2 <> ''),\n" + // on extractionFn, using generic A.C.D.
    "  APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" + // on expression, using COUNT DISTINCT
    "  COUNT(DISTINCT SUBSTRING(dim2, 1, 1) || 'x'),\n" + // on native HllSketch column
    "  APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1, 21, 'HLL_8'),\n" + // on native HllSketch column
    "  APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1)\n" + "FROM druid.foo";
    final List<Object[]> expectedResults;
    if (NullHandling.replaceWithDefault()) {
        expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 2L, 5L, 5L });
    } else {
        expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 1L, 5L, 5L });
    }
    testQuery(sql, ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "substring(\"dim2\", 0, 1)", ColumnType.STRING, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("v1", "concat(substring(\"dim2\", 0, 1),'x')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new LongSumAggregatorFactory("a0", "cnt"), new HllSketchBuildAggregatorFactory("a1", "dim2", null, null, ROUND), new FilteredAggregatorFactory(new HllSketchBuildAggregatorFactory("a2", "dim2", null, null, ROUND), BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("dim2", "", null))), new HllSketchBuildAggregatorFactory("a3", "v0", null, null, ROUND), new HllSketchBuildAggregatorFactory("a4", "v1", null, null, ROUND), new HllSketchMergeAggregatorFactory("a5", "hllsketch_dim1", 21, "HLL_8", ROUND), new HllSketchMergeAggregatorFactory("a6", "hllsketch_dim1", null, null, ROUND))).context(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) HllSketchMergeAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchMergeAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) HllSketchBuildAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Aggregations

FilteredAggregatorFactory (org.apache.druid.query.aggregation.FilteredAggregatorFactory)63 Test (org.junit.Test)52 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)28 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)23 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)23 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)20 QueryDataSource (org.apache.druid.query.QueryDataSource)14 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)14 ExpressionLambdaAggregatorFactory (org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory)13 NotDimFilter (org.apache.druid.query.filter.NotDimFilter)11 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)10 BaseCalciteQueryTest (org.apache.druid.sql.calcite.BaseCalciteQueryTest)10 ArithmeticPostAggregator (org.apache.druid.query.aggregation.post.ArithmeticPostAggregator)8 FieldAccessPostAggregator (org.apache.druid.query.aggregation.post.FieldAccessPostAggregator)8 BoundDimFilter (org.apache.druid.query.filter.BoundDimFilter)8 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)8 Result (org.apache.druid.query.Result)7 LongMaxAggregatorFactory (org.apache.druid.query.aggregation.LongMaxAggregatorFactory)7 TableDataSource (org.apache.druid.query.TableDataSource)6 DimFilter (org.apache.druid.query.filter.DimFilter)6