Search in sources :

Example 21 with TopNQueryBuilder

use of org.apache.druid.query.topn.TopNQueryBuilder in project druid by druid-io.

the class TimeCompareBenchmark method setupQueries.

private void setupQueries() {
    // queries for the basic schema
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
    long startMillis = basicSchema.getDataInterval().getStartMillis();
    long endMillis = basicSchema.getDataInterval().getEndMillis();
    long half = (endMillis - startMillis) / 2;
    Interval recent = Intervals.utc(half, endMillis);
    Interval previous = Intervals.utc(startMillis, half);
    log.info("Recent interval: " + recent);
    log.info("Previous interval: " + previous);
    {
        // basic.topNTimeCompare
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new FilteredAggregatorFactory(// jsAgg1,
        new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new IntervalDimFilter(ColumnHolder.TIME_COLUMN_NAME, Collections.singletonList(recent), null)));
        queryAggs.add(new FilteredAggregatorFactory(new LongSumAggregatorFactory("_cmp_sumLongSequential", "sumLongSequential"), new IntervalDimFilter(ColumnHolder.TIME_COLUMN_NAME, Collections.singletonList(previous), null)));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric("sumLongSequential").intervals(intervalSpec).aggregators(queryAggs).threshold(threshold);
        topNQuery = queryBuilderA.build();
        topNFactory = new TopNQueryRunnerFactory(new StupidPool<>("TopNBenchmark-compute-bufferPool", new OffheapBufferGenerator("compute", 250000000), 0, Integer.MAX_VALUE), new TopNQueryQueryToolChest(new TopNQueryConfig()), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
    }
    {
        // basic.timeseriesTimeCompare
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new FilteredAggregatorFactory(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new IntervalDimFilter(ColumnHolder.TIME_COLUMN_NAME, Collections.singletonList(recent), null)));
        queryAggs.add(new FilteredAggregatorFactory(new LongSumAggregatorFactory("_cmp_sumLongSequential", "sumLongSequential"), new IntervalDimFilter(ColumnHolder.TIME_COLUMN_NAME, Collections.singletonList(previous), null)));
        Druids.TimeseriesQueryBuilder timeseriesQueryBuilder = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(false);
        timeseriesQuery = timeseriesQueryBuilder.build();
        timeseriesFactory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
    }
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) TimeseriesQueryEngine(org.apache.druid.query.timeseries.TimeseriesQueryEngine) TimeseriesQueryRunnerFactory(org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory) OffheapBufferGenerator(org.apache.druid.offheap.OffheapBufferGenerator) TopNQueryConfig(org.apache.druid.query.topn.TopNQueryConfig) TopNQueryRunnerFactory(org.apache.druid.query.topn.TopNQueryRunnerFactory) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) List(java.util.List) ArrayList(java.util.ArrayList) TopNQueryQueryToolChest(org.apache.druid.query.topn.TopNQueryQueryToolChest) IntervalDimFilter(org.apache.druid.query.filter.IntervalDimFilter) Interval(org.joda.time.Interval)

Example 22 with TopNQueryBuilder

use of org.apache.druid.query.topn.TopNQueryBuilder in project druid by druid-io.

the class CachingClusteredClientBenchmark method topNQuery.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void topNQuery(Blackhole blackhole) {
    query = new TopNQueryBuilder().dataSource(DATA_SOURCE).intervals(basicSchemaIntervalSpec).dimension(new DefaultDimensionSpec("dimZipf", null)).aggregators(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential")).granularity(Granularity.fromString(queryGranularity)).metric("sumLongSequential").threshold(// we are primarily measuring 'broker' merge time, so collect a significant number of results
    10_000).context(ImmutableMap.of(QueryContexts.BROKER_PARALLEL_MERGE_KEY, parallelCombine, QueryContexts.BROKER_PARALLELISM, parallelism)).build();
    final List<Result<TopNResultValue>> results = runQuery();
    for (Result<TopNResultValue> result : results) {
        blackhole.consume(result);
    }
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) TopNResultValue(org.apache.druid.query.topn.TopNResultValue) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Result(org.apache.druid.query.Result) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 23 with TopNQueryBuilder

use of org.apache.druid.query.topn.TopNQueryBuilder in project druid by druid-io.

the class TopNTypeInterfaceBenchmark method setupQueries.

private void setupQueries() {
    // queries for the basic schema
    Map<String, TopNQueryBuilder> basicQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    {
        // basic.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
        queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
        queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
        queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
        // Use an IdentityExtractionFn to force usage of HeapBasedTopNAlgorithm
        TopNQueryBuilder queryBuilderString = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension(new ExtractionDimensionSpec("dimSequential", "dimSequential", IdentityExtractionFn.getInstance())).metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
        // HeapBasedTopNAlgorithm is always used for numeric columns
        TopNQueryBuilder queryBuilderLong = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metLongUniform").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
        TopNQueryBuilder queryBuilderFloat = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metFloatNormal").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("string", queryBuilderString);
        basicQueries.put("long", queryBuilderLong);
        basicQueries.put("float", queryBuilderFloat);
    }
    {
        // basic.numericSort
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("numericSort", queryBuilderA);
    }
    {
        // basic.alphanumericSort
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.ALPHANUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("alphanumericSort", queryBuilderA);
    }
    SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DoubleMinAggregatorFactory(org.apache.druid.query.aggregation.DoubleMinAggregatorFactory) LinkedHashMap(java.util.LinkedHashMap) DimensionTopNMetricSpec(org.apache.druid.query.topn.DimensionTopNMetricSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) List(java.util.List) ArrayList(java.util.ArrayList) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec)

Example 24 with TopNQueryBuilder

use of org.apache.druid.query.topn.TopNQueryBuilder in project druid by druid-io.

the class DatasourceOptimizerTest method testOptimize.

@Test(timeout = 60_000L)
public void testOptimize() throws InterruptedException {
    // insert datasource metadata
    String dataSource = "derivative";
    String baseDataSource = "base";
    Set<String> dims = Sets.newHashSet("dim1", "dim2", "dim3");
    Set<String> metrics = Sets.newHashSet("cost");
    DerivativeDataSourceMetadata metadata = new DerivativeDataSourceMetadata(baseDataSource, dims, metrics);
    metadataStorageCoordinator.insertDataSourceMetadata(dataSource, metadata);
    // insert base datasource segments
    List<Boolean> baseResult = Lists.transform(ImmutableList.of("2011-04-01/2011-04-02", "2011-04-02/2011-04-03", "2011-04-03/2011-04-04", "2011-04-04/2011-04-05", "2011-04-05/2011-04-06"), interval -> {
        final DataSegment segment = createDataSegment("base", interval, "v1", Lists.newArrayList("dim1", "dim2", "dim3", "dim4"), 1024 * 1024);
        try {
            metadataStorageCoordinator.announceHistoricalSegments(Sets.newHashSet(segment));
            announceSegmentForServer(druidServer, segment, zkPathsConfig, jsonMapper);
        } catch (IOException e) {
            return false;
        }
        return true;
    });
    // insert derivative segments
    List<Boolean> derivativeResult = Lists.transform(ImmutableList.of("2011-04-01/2011-04-02", "2011-04-02/2011-04-03", "2011-04-03/2011-04-04"), interval -> {
        final DataSegment segment = createDataSegment("derivative", interval, "v1", Lists.newArrayList("dim1", "dim2", "dim3"), 1024);
        try {
            metadataStorageCoordinator.announceHistoricalSegments(Sets.newHashSet(segment));
            announceSegmentForServer(druidServer, segment, zkPathsConfig, jsonMapper);
        } catch (IOException e) {
            return false;
        }
        return true;
    });
    Assert.assertFalse(baseResult.contains(false));
    Assert.assertFalse(derivativeResult.contains(false));
    derivativesManager.start();
    while (DerivativeDataSourceManager.getAllDerivatives().isEmpty()) {
        TimeUnit.SECONDS.sleep(1L);
    }
    // build user query
    TopNQuery userQuery = new TopNQueryBuilder().dataSource("base").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals("2011-04-01/2011-04-06").aggregators(new LongSumAggregatorFactory("cost", "cost")).build();
    List<Query> expectedQueryAfterOptimizing = Lists.newArrayList(new TopNQueryBuilder().dataSource("derivative").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-01/2011-04-04")))).aggregators(new LongSumAggregatorFactory("cost", "cost")).build(), new TopNQueryBuilder().dataSource("base").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-04/2011-04-06")))).aggregators(new LongSumAggregatorFactory("cost", "cost")).build());
    Assert.assertEquals(expectedQueryAfterOptimizing, optimizer.optimize(userQuery));
    derivativesManager.stop();
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) TopNQuery(org.apache.druid.query.topn.TopNQuery) Query(org.apache.druid.query.Query) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) IOException(java.io.IOException) DataSegment(org.apache.druid.timeline.DataSegment) DerivativeDataSourceMetadata(org.apache.druid.indexing.materializedview.DerivativeDataSourceMetadata) TopNQuery(org.apache.druid.query.topn.TopNQuery) Test(org.junit.Test)

Example 25 with TopNQueryBuilder

use of org.apache.druid.query.topn.TopNQueryBuilder in project druid by druid-io.

the class MaterializedViewQueryTest method testQuerySerialization.

@Test
public void testQuerySerialization() throws IOException {
    TopNQuery topNQuery = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).dimension(QueryRunnerTestHelper.MARKET_DIMENSION).metric(QueryRunnerTestHelper.INDEX_METRIC).threshold(4).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(Lists.newArrayList(Iterables.concat(QueryRunnerTestHelper.COMMON_DOUBLE_AGGREGATORS, Lists.newArrayList(new DoubleMaxAggregatorFactory("maxIndex", "index"), new DoubleMinAggregatorFactory("minIndex", "index"))))).postAggregators(QueryRunnerTestHelper.ADD_ROWS_INDEX_CONSTANT).build();
    MaterializedViewQuery query = new MaterializedViewQuery(topNQuery, optimizer);
    String json = JSON_MAPPER.writeValueAsString(query);
    Query serdeQuery = JSON_MAPPER.readValue(json, Query.class);
    Assert.assertEquals(query, serdeQuery);
    Assert.assertEquals(new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), query.getDataSource());
    Assert.assertEquals(QueryRunnerTestHelper.ALL_GRAN, query.getGranularity());
    Assert.assertEquals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC.getIntervals(), query.getIntervals());
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) DoubleMaxAggregatorFactory(org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory) TopNQuery(org.apache.druid.query.topn.TopNQuery) Query(org.apache.druid.query.Query) TableDataSource(org.apache.druid.query.TableDataSource) TopNQuery(org.apache.druid.query.topn.TopNQuery) DoubleMinAggregatorFactory(org.apache.druid.query.aggregation.DoubleMinAggregatorFactory) Test(org.junit.Test)

Aggregations

TopNQueryBuilder (org.apache.druid.query.topn.TopNQueryBuilder)55 Test (org.junit.Test)44 TopNQuery (org.apache.druid.query.topn.TopNQuery)25 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)22 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)16 InvertedTopNMetricSpec (org.apache.druid.query.topn.InvertedTopNMetricSpec)14 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)13 TopNQueryQueryToolChest (org.apache.druid.query.topn.TopNQueryQueryToolChest)13 Result (org.apache.druid.query.Result)12 TopNQueryConfig (org.apache.druid.query.topn.TopNQueryConfig)12 TopNResultValue (org.apache.druid.query.topn.TopNResultValue)11 DoubleMinAggregatorFactory (org.apache.druid.query.aggregation.DoubleMinAggregatorFactory)9 DimensionTopNMetricSpec (org.apache.druid.query.topn.DimensionTopNMetricSpec)9 NumericTopNMetricSpec (org.apache.druid.query.topn.NumericTopNMetricSpec)9 DoubleMaxAggregatorFactory (org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory)7 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)7 ByteBuffer (java.nio.ByteBuffer)5 QueryRunner (org.apache.druid.query.QueryRunner)5 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)5 HashMap (java.util.HashMap)4