Search in sources :

Example 31 with MultipleIntervalSegmentSpec

use of io.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class GroupByBenchmark method setupQueries.

private void setupQueries() {
    // queries for the basic schema
    Map<String, GroupByQuery> basicQueries = new LinkedHashMap<>();
    BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic");
    {
        // basic.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null))).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).build();
        basicQueries.put("A", queryA);
    }
    {
        // basic.nested
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        GroupByQuery subqueryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null))).setAggregatorSpecs(queryAggs).setGranularity(Granularities.DAY).build();
        GroupByQuery queryA = GroupByQuery.builder().setDataSource(subqueryA).setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", null))).setAggregatorSpecs(queryAggs).setGranularity(Granularities.WEEK).build();
        basicQueries.put("nested", queryA);
    }
    SCHEMA_QUERY_MAP.put("basic", basicQueries);
    // simple one column schema, for testing performance difference between querying on numeric values as Strings and
    // directly as longs
    Map<String, GroupByQuery> simpleQueries = new LinkedHashMap<>();
    BenchmarkSchemaInfo simpleSchema = BenchmarkSchemas.SCHEMA_MAP.get("simple");
    {
        // simple.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(simpleSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", "dimSequential", ValueType.STRING))).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).build();
        simpleQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("simple", simpleQueries);
    Map<String, GroupByQuery> simpleLongQueries = new LinkedHashMap<>();
    BenchmarkSchemaInfo simpleLongSchema = BenchmarkSchemas.SCHEMA_MAP.get("simpleLong");
    {
        // simpleLong.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(simpleLongSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", "dimSequential", ValueType.LONG))).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).build();
        simpleLongQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("simpleLong", simpleLongQueries);
    Map<String, GroupByQuery> simpleFloatQueries = new LinkedHashMap<>();
    BenchmarkSchemaInfo simpleFloatSchema = BenchmarkSchemas.SCHEMA_MAP.get("simpleFloat");
    {
        // simpleFloat.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(simpleFloatSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("dimSequential", "dimSequential", ValueType.FLOAT))).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).build();
        simpleFloatQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("simpleFloat", simpleFloatQueries);
}
Also used : GroupByQuery(io.druid.query.groupby.GroupByQuery) BenchmarkSchemaInfo(io.druid.benchmark.datagen.BenchmarkSchemaInfo) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(io.druid.query.spec.MultipleIntervalSegmentSpec) QuerySegmentSpec(io.druid.query.spec.QuerySegmentSpec) List(java.util.List) ArrayList(java.util.ArrayList) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) LinkedHashMap(java.util.LinkedHashMap)

Example 32 with MultipleIntervalSegmentSpec

use of io.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class SearchBenchmark method basicD.

private static SearchQueryBuilder basicD(final BenchmarkSchemaInfo basicSchema) {
    final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
    final List<String> dimUniformFilterVals = Lists.newArrayList();
    final int resultNum = (int) (100000 * 0.1);
    final int step = 100000 / resultNum;
    for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) {
        dimUniformFilterVals.add(String.valueOf(i));
    }
    final String dimName = "dimUniform";
    final List<DimFilter> dimFilters = Lists.newArrayList();
    dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null));
    dimFilters.add(new SelectorDimFilter(dimName, "3", null));
    dimFilters.add(new BoundDimFilter(dimName, "100", "10000", true, true, true, null, null));
    dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null));
    dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null));
    dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, null));
    return Druids.newSearchQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).query("").dimensions(Lists.newArrayList("dimUniform")).filters(new AndDimFilter(dimFilters));
}
Also used : BoundDimFilter(io.druid.query.filter.BoundDimFilter) AndDimFilter(io.druid.query.filter.AndDimFilter) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) InDimFilter(io.druid.query.filter.InDimFilter) QuerySegmentSpec(io.druid.query.spec.QuerySegmentSpec) MultipleIntervalSegmentSpec(io.druid.query.spec.MultipleIntervalSegmentSpec) BoundDimFilter(io.druid.query.filter.BoundDimFilter) InDimFilter(io.druid.query.filter.InDimFilter) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) DimFilter(io.druid.query.filter.DimFilter) AndDimFilter(io.druid.query.filter.AndDimFilter)

Example 33 with MultipleIntervalSegmentSpec

use of io.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class GroupByQueryRunnerTest method testMergedHavingSpec.

@Test
public void testMergedHavingSpec() {
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 2L, "idx", 217L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 6L, "idx", 4420L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 6L, "idx", 4416L));
    GroupByQuery.Builder builder = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setInterval("2011-04-02/2011-04-04").setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))).setGranularity(new PeriodGranularity(new Period("P1M"), null, null)).setHavingSpec(new OrHavingSpec(ImmutableList.<HavingSpec>of(new GreaterThanHavingSpec("rows", 2L), new EqualToHavingSpec("idx", 217L))));
    GroupByQuery fullQuery = builder.build();
    QueryRunner mergedRunner = factory.getToolchest().mergeResults(new QueryRunner<Row>() {

        @Override
        public Sequence<Row> run(Query<Row> query, Map<String, Object> responseContext) {
            // simulate two daily segments
            final Query query1 = query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Lists.newArrayList(new Interval("2011-04-02/2011-04-03"))));
            final Query query2 = query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Lists.newArrayList(new Interval("2011-04-03/2011-04-04"))));
            return new MergeSequence(query.getResultOrdering(), Sequences.simple(Arrays.asList(runner.run(query1, responseContext), runner.run(query2, responseContext))));
        }
    });
    Map<String, Object> context = Maps.newHashMap();
    TestHelper.assertExpectedObjects(expectedResults, mergedRunner.run(fullQuery, context), "merged");
}
Also used : DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) RegexFilteredDimensionSpec(io.druid.query.dimension.RegexFilteredDimensionSpec) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) ListFilteredDimensionSpec(io.druid.query.dimension.ListFilteredDimensionSpec) DimensionSpec(io.druid.query.dimension.DimensionSpec) EqualToHavingSpec(io.druid.query.groupby.having.EqualToHavingSpec) Query(io.druid.query.Query) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) PeriodGranularity(io.druid.java.util.common.granularity.PeriodGranularity) MultipleIntervalSegmentSpec(io.druid.query.spec.MultipleIntervalSegmentSpec) MergeSequence(io.druid.java.util.common.guava.MergeSequence) GreaterThanHavingSpec(io.druid.query.groupby.having.GreaterThanHavingSpec) OrHavingSpec(io.druid.query.groupby.having.OrHavingSpec) Period(org.joda.time.Period) Sequence(io.druid.java.util.common.guava.Sequence) MergeSequence(io.druid.java.util.common.guava.MergeSequence) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) QueryRunner(io.druid.query.QueryRunner) GreaterThanHavingSpec(io.druid.query.groupby.having.GreaterThanHavingSpec) HavingSpec(io.druid.query.groupby.having.HavingSpec) DimFilterHavingSpec(io.druid.query.groupby.having.DimFilterHavingSpec) BaseHavingSpec(io.druid.query.groupby.having.BaseHavingSpec) OrHavingSpec(io.druid.query.groupby.having.OrHavingSpec) DimensionSelectorHavingSpec(io.druid.query.groupby.having.DimensionSelectorHavingSpec) EqualToHavingSpec(io.druid.query.groupby.having.EqualToHavingSpec) Row(io.druid.data.input.Row) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 34 with MultipleIntervalSegmentSpec

use of io.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class GroupByQueryRunnerTest method doTestMergeResultsWithOrderBy.

private void doTestMergeResultsWithOrderBy(LimitSpec orderBySpec, List<Row> expectedResults) {
    GroupByQuery.Builder builder = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setInterval("2011-04-02/2011-04-04").setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))).setGranularity(new PeriodGranularity(new Period("P1M"), null, null)).setLimitSpec(orderBySpec);
    final GroupByQuery fullQuery = builder.build();
    QueryRunner mergedRunner = factory.getToolchest().mergeResults(new QueryRunner<Row>() {

        @Override
        public Sequence<Row> run(Query<Row> query, Map<String, Object> responseContext) {
            // simulate two daily segments
            final Query query1 = query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Lists.newArrayList(new Interval("2011-04-02/2011-04-03"))));
            final Query query2 = query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Lists.newArrayList(new Interval("2011-04-03/2011-04-04"))));
            return new MergeSequence(query.getResultOrdering(), Sequences.simple(Arrays.asList(runner.run(query1, responseContext), runner.run(query2, responseContext))));
        }
    });
    Map<String, Object> context = Maps.newHashMap();
    TestHelper.assertExpectedObjects(expectedResults, mergedRunner.run(fullQuery, context), "merged");
}
Also used : DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) RegexFilteredDimensionSpec(io.druid.query.dimension.RegexFilteredDimensionSpec) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) ListFilteredDimensionSpec(io.druid.query.dimension.ListFilteredDimensionSpec) DimensionSpec(io.druid.query.dimension.DimensionSpec) Query(io.druid.query.Query) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) PeriodGranularity(io.druid.java.util.common.granularity.PeriodGranularity) Period(org.joda.time.Period) MultipleIntervalSegmentSpec(io.druid.query.spec.MultipleIntervalSegmentSpec) Sequence(io.druid.java.util.common.guava.Sequence) MergeSequence(io.druid.java.util.common.guava.MergeSequence) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) QueryRunner(io.druid.query.QueryRunner) MergeSequence(io.druid.java.util.common.guava.MergeSequence) Row(io.druid.data.input.Row) Interval(org.joda.time.Interval)

Example 35 with MultipleIntervalSegmentSpec

use of io.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class TopNQueryQueryToolChestTest method testCacheStrategy.

@Test
public void testCacheStrategy() throws Exception {
    CacheStrategy<Result<TopNResultValue>, Object, TopNQuery> strategy = new TopNQueryQueryToolChest(null, null).getCacheStrategy(new TopNQuery(new TableDataSource("dummy"), VirtualColumns.EMPTY, new DefaultDimensionSpec("test", "test"), new NumericTopNMetricSpec("metric1"), 3, new MultipleIntervalSegmentSpec(ImmutableList.of(new Interval("2015-01-01/2015-01-02"))), null, Granularities.ALL, ImmutableList.<AggregatorFactory>of(new CountAggregatorFactory("metric1")), ImmutableList.<PostAggregator>of(new ConstantPostAggregator("post", 10)), null));
    final Result<TopNResultValue> result = new Result<>(// test timestamps that result in integer size millis
    new DateTime(123L), new TopNResultValue(Arrays.asList(ImmutableMap.<String, Object>of("test", "val1", "metric1", 2))));
    Object preparedValue = strategy.prepareForCache().apply(result);
    ObjectMapper objectMapper = new DefaultObjectMapper();
    Object fromCacheValue = objectMapper.readValue(objectMapper.writeValueAsBytes(preparedValue), strategy.getCacheObjectClazz());
    Result<TopNResultValue> fromCacheResult = strategy.pullFromCache().apply(fromCacheValue);
    Assert.assertEquals(result, fromCacheResult);
}
Also used : PostAggregator(io.druid.query.aggregation.PostAggregator) FieldAccessPostAggregator(io.druid.query.aggregation.post.FieldAccessPostAggregator) ArithmeticPostAggregator(io.druid.query.aggregation.post.ArithmeticPostAggregator) ConstantPostAggregator(io.druid.query.aggregation.post.ConstantPostAggregator) ConstantPostAggregator(io.druid.query.aggregation.post.ConstantPostAggregator) MultipleIntervalSegmentSpec(io.druid.query.spec.MultipleIntervalSegmentSpec) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) TableDataSource(io.druid.query.TableDataSource) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Interval(org.joda.time.Interval) Test(org.junit.Test)

Aggregations

MultipleIntervalSegmentSpec (io.druid.query.spec.MultipleIntervalSegmentSpec)37 Interval (org.joda.time.Interval)26 Test (org.junit.Test)20 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)13 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)12 QuerySegmentSpec (io.druid.query.spec.QuerySegmentSpec)10 Result (io.druid.query.Result)9 ArrayList (java.util.ArrayList)9 Row (io.druid.data.input.Row)8 List (java.util.List)8 BenchmarkSchemaInfo (io.druid.benchmark.datagen.BenchmarkSchemaInfo)7 Sequence (io.druid.java.util.common.guava.Sequence)7 Query (io.druid.query.Query)7 QueryRunner (io.druid.query.QueryRunner)7 TableDataSource (io.druid.query.TableDataSource)7 FinalizeResultsQueryRunner (io.druid.query.FinalizeResultsQueryRunner)6 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)6 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)6 DimensionSpec (io.druid.query.dimension.DimensionSpec)6 ExtractionDimensionSpec (io.druid.query.dimension.ExtractionDimensionSpec)6