Search in sources :

Example 81 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class DatasourceOptimizerTest method testOptimize.

@Test(timeout = 60_000L)
public void testOptimize() throws InterruptedException {
    // insert datasource metadata
    String dataSource = "derivative";
    String baseDataSource = "base";
    Set<String> dims = Sets.newHashSet("dim1", "dim2", "dim3");
    Set<String> metrics = Sets.newHashSet("cost");
    DerivativeDataSourceMetadata metadata = new DerivativeDataSourceMetadata(baseDataSource, dims, metrics);
    metadataStorageCoordinator.insertDataSourceMetadata(dataSource, metadata);
    // insert base datasource segments
    List<Boolean> baseResult = Lists.transform(ImmutableList.of("2011-04-01/2011-04-02", "2011-04-02/2011-04-03", "2011-04-03/2011-04-04", "2011-04-04/2011-04-05", "2011-04-05/2011-04-06"), interval -> {
        final DataSegment segment = createDataSegment("base", interval, "v1", Lists.newArrayList("dim1", "dim2", "dim3", "dim4"), 1024 * 1024);
        try {
            metadataStorageCoordinator.announceHistoricalSegments(Sets.newHashSet(segment));
            announceSegmentForServer(druidServer, segment, zkPathsConfig, jsonMapper);
        } catch (IOException e) {
            return false;
        }
        return true;
    });
    // insert derivative segments
    List<Boolean> derivativeResult = Lists.transform(ImmutableList.of("2011-04-01/2011-04-02", "2011-04-02/2011-04-03", "2011-04-03/2011-04-04"), interval -> {
        final DataSegment segment = createDataSegment("derivative", interval, "v1", Lists.newArrayList("dim1", "dim2", "dim3"), 1024);
        try {
            metadataStorageCoordinator.announceHistoricalSegments(Sets.newHashSet(segment));
            announceSegmentForServer(druidServer, segment, zkPathsConfig, jsonMapper);
        } catch (IOException e) {
            return false;
        }
        return true;
    });
    Assert.assertFalse(baseResult.contains(false));
    Assert.assertFalse(derivativeResult.contains(false));
    derivativesManager.start();
    while (DerivativeDataSourceManager.getAllDerivatives().isEmpty()) {
        TimeUnit.SECONDS.sleep(1L);
    }
    // build user query
    TopNQuery userQuery = new TopNQueryBuilder().dataSource("base").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals("2011-04-01/2011-04-06").aggregators(new LongSumAggregatorFactory("cost", "cost")).build();
    List<Query> expectedQueryAfterOptimizing = Lists.newArrayList(new TopNQueryBuilder().dataSource("derivative").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-01/2011-04-04")))).aggregators(new LongSumAggregatorFactory("cost", "cost")).build(), new TopNQueryBuilder().dataSource("base").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-04/2011-04-06")))).aggregators(new LongSumAggregatorFactory("cost", "cost")).build());
    Assert.assertEquals(expectedQueryAfterOptimizing, optimizer.optimize(userQuery));
    derivativesManager.stop();
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) TopNQuery(org.apache.druid.query.topn.TopNQuery) Query(org.apache.druid.query.Query) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) IOException(java.io.IOException) DataSegment(org.apache.druid.timeline.DataSegment) DerivativeDataSourceMetadata(org.apache.druid.indexing.materializedview.DerivativeDataSourceMetadata) TopNQuery(org.apache.druid.query.topn.TopNQuery) Test(org.junit.Test)

Example 82 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class TopNBenchmark method setupQueries.

private void setupQueries() {
    // queries for the basic schema
    Map<String, TopNQueryBuilder> basicQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    {
        // basic.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
        queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
        queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
        queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimSequential").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("A", queryBuilderA);
    }
    {
        // basic.numericSort
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("numericSort", queryBuilderA);
    }
    {
        // basic.alphanumericSort
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.ALPHANUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("alphanumericSort", queryBuilderA);
    }
    SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DoubleMinAggregatorFactory(org.apache.druid.query.aggregation.DoubleMinAggregatorFactory) LinkedHashMap(java.util.LinkedHashMap) DimensionTopNMetricSpec(org.apache.druid.query.topn.DimensionTopNMetricSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) List(java.util.List) ArrayList(java.util.ArrayList) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory)

Example 83 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class GroupByBenchmark method setupQueries.

private void setupQueries() {
    // queries for the basic schema
    Map<String, GroupByQuery> basicQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    {
        // basic.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new CountAggregatorFactory("cnt"));
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        basicQueries.put("A", queryA);
    }
    {
        // basic.sorted
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec("sumLongSequential", OrderByColumnSpec.Direction.DESCENDING, StringComparators.NUMERIC)), 100)).build();
        basicQueries.put("sorted", queryA);
    }
    {
        // basic.nested
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        GroupByQuery subqueryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularities.DAY).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        GroupByQuery queryA = GroupByQuery.builder().setDataSource(subqueryA).setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularities.WEEK).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        basicQueries.put("nested", queryA);
    }
    {
        // basic.filter
        final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        // Use multiple aggregators to see how the number of aggregators impact to the query performance
        List<AggregatorFactory> queryAggs = ImmutableList.of(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new LongSumAggregatorFactory("rows", "rows"), new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"), new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimUniform", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setDimFilter(new BoundDimFilter("dimUniform", "0", "100", true, true, null, null, null)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        basicQueries.put("filter", queryA);
    }
    {
        // basic.singleZipf
        final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        // Use multiple aggregators to see how the number of aggregators impact to the query performance
        List<AggregatorFactory> queryAggs = ImmutableList.of(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new LongSumAggregatorFactory("rows", "rows"), new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"), new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        basicQueries.put("singleZipf", queryA);
    }
    SCHEMA_QUERY_MAP.put("basic", basicQueries);
    // simple one column schema, for testing performance difference between querying on numeric values as Strings and
    // directly as longs
    Map<String, GroupByQuery> simpleQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo simpleSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simple");
    {
        // simple.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.STRING)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        simpleQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("simple", simpleQueries);
    Map<String, GroupByQuery> simpleLongQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo simpleLongSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simpleLong");
    {
        // simpleLong.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleLongSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.LONG)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        simpleLongQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("simpleLong", simpleLongQueries);
    Map<String, GroupByQuery> simpleFloatQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo simpleFloatSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simpleFloat");
    {
        // simpleFloat.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleFloatSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.FLOAT)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        simpleFloatQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("simpleFloat", simpleFloatQueries);
    // simple one column schema, for testing performance difference between querying on numeric values as Strings and
    // directly as longs
    Map<String, GroupByQuery> nullQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo nullSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("nulls");
    {
        // simple-null
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(nullSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new DoubleSumAggregatorFactory("doubleSum", "doubleZipf"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("stringZipf", "stringZipf", ColumnType.STRING)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        nullQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("nulls", nullQueries);
}
Also used : BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DoubleMinAggregatorFactory(org.apache.druid.query.aggregation.DoubleMinAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) LinkedHashMap(java.util.LinkedHashMap) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList)

Example 84 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class ScanBenchmark method basicD.

private static Druids.ScanQueryBuilder basicD(final GeneratorSchemaInfo basicSchema) {
    final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
    final String dimName = "dimUniform";
    return Druids.newScanQueryBuilder().filters(new BoundDimFilter(dimName, "100", "10000", true, true, true, null, null)).intervals(intervalSpec).dataSource("blah").order(ordering);
}
Also used : BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec)

Example 85 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class ScanBenchmark method basicC.

private static Druids.ScanQueryBuilder basicC(final GeneratorSchemaInfo basicSchema) {
    final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
    final String dimName = "dimUniform";
    return Druids.newScanQueryBuilder().filters(new SelectorDimFilter(dimName, "3", StrlenExtractionFn.instance())).intervals(intervalSpec).dataSource("blah").order(ordering);
}
Also used : SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec)

Aggregations

MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)142 Test (org.junit.Test)115 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)53 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)44 BaseCalciteQueryTest (org.apache.druid.sql.calcite.BaseCalciteQueryTest)39 QuerySegmentSpec (org.apache.druid.query.spec.QuerySegmentSpec)28 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)25 ResponseContext (org.apache.druid.query.context.ResponseContext)22 QueryRunner (org.apache.druid.query.QueryRunner)21 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)21 QueryPlus (org.apache.druid.query.QueryPlus)20 TableDataSource (org.apache.druid.query.TableDataSource)19 FinalizeResultsQueryRunner (org.apache.druid.query.FinalizeResultsQueryRunner)18 ArrayList (java.util.ArrayList)17 QueryDataSource (org.apache.druid.query.QueryDataSource)15 DefaultLimitSpec (org.apache.druid.query.groupby.orderby.DefaultLimitSpec)15 OrderByColumnSpec (org.apache.druid.query.groupby.orderby.OrderByColumnSpec)15 Sequence (org.apache.druid.java.util.common.guava.Sequence)14 FieldAccessPostAggregator (org.apache.druid.query.aggregation.post.FieldAccessPostAggregator)14 Result (org.apache.druid.query.Result)13