Search in sources :

Example 6 with UnionDataSource

use of org.apache.druid.query.UnionDataSource in project druid by druid-io.

the class ClientQuerySegmentWalkerTest method testGroupByOnUnionOfTwoTables.

@Test
public void testGroupByOnUnionOfTwoTables() {
    final GroupByQuery query = (GroupByQuery) GroupByQuery.builder().setDataSource(new UnionDataSource(ImmutableList.of(new TableDataSource(FOO), new TableDataSource(BAR)))).setGranularity(Granularities.ALL).setInterval(Intervals.ONLY_ETERNITY).setDimensions(DefaultDimensionSpec.of("s")).setAggregatorSpecs(new CountAggregatorFactory("cnt")).build().withId(DUMMY_QUERY_ID);
    testQuery(query, ImmutableList.of(ExpectedQuery.cluster(query.withDataSource(new TableDataSource(FOO)).withSubQueryId("foo.1")), ExpectedQuery.cluster(query.withDataSource(new TableDataSource(BAR)).withSubQueryId("bar.2"))), ImmutableList.of(new Object[] { "a", 2L }, new Object[] { "b", 1L }, new Object[] { "c", 1L }, new Object[] { "x", 2L }, new Object[] { "y", 1L }, new Object[] { "z", 1L }));
    // note: this should really be 1, but in the interim queries that are composed of multiple queries count each
    // invocation of either the cluster or local walker in ClientQuerySegmentWalker
    Assert.assertEquals(2, scheduler.getTotalRun().get());
    Assert.assertEquals(2, scheduler.getTotalPrioritizedAndLaned().get());
    Assert.assertEquals(2, scheduler.getTotalAcquired().get());
    Assert.assertEquals(2, scheduler.getTotalReleased().get());
}
Also used : GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) TableDataSource(org.apache.druid.query.TableDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) UnionDataSource(org.apache.druid.query.UnionDataSource) Test(org.junit.Test)

Example 7 with UnionDataSource

use of org.apache.druid.query.UnionDataSource in project druid by druid-io.

the class ClientQuerySegmentWalkerTest method testGroupByOnUnionOfOneTable.

@Test
public void testGroupByOnUnionOfOneTable() {
    final GroupByQuery query = (GroupByQuery) GroupByQuery.builder().setDataSource(new UnionDataSource(ImmutableList.of(new TableDataSource(FOO)))).setGranularity(Granularities.ALL).setInterval(Intervals.ONLY_ETERNITY).setDimensions(DefaultDimensionSpec.of("s")).setAggregatorSpecs(new CountAggregatorFactory("cnt")).build().withId(DUMMY_QUERY_ID);
    testQuery(query, ImmutableList.of(ExpectedQuery.cluster(query.withDataSource(new TableDataSource(FOO)))), ImmutableList.of(new Object[] { "x", 2L }, new Object[] { "y", 1L }, new Object[] { "z", 1L }));
    Assert.assertEquals(1, scheduler.getTotalRun().get());
    Assert.assertEquals(1, scheduler.getTotalPrioritizedAndLaned().get());
    Assert.assertEquals(1, scheduler.getTotalAcquired().get());
    Assert.assertEquals(1, scheduler.getTotalReleased().get());
}
Also used : GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) TableDataSource(org.apache.druid.query.TableDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) UnionDataSource(org.apache.druid.query.UnionDataSource) Test(org.junit.Test)

Example 8 with UnionDataSource

use of org.apache.druid.query.UnionDataSource in project druid by druid-io.

the class MovingAverageQueryRunner method run.

@Override
public Sequence<Row> run(QueryPlus<Row> query, ResponseContext responseContext) {
    MovingAverageQuery maq = (MovingAverageQuery) query.getQuery();
    List<Interval> intervals;
    final Period period;
    // Get the largest bucket from the list of averagers
    Optional<Integer> opt = maq.getAveragerSpecs().stream().map(AveragerFactory::getNumBuckets).max(Integer::compare);
    int buckets = opt.orElse(0);
    // Extend the interval beginning by specified bucket - 1
    if (maq.getGranularity() instanceof PeriodGranularity) {
        period = ((PeriodGranularity) maq.getGranularity()).getPeriod();
        int offset = buckets <= 0 ? 0 : (1 - buckets);
        intervals = maq.getIntervals().stream().map(i -> new Interval(i.getStart().withPeriodAdded(period, offset), i.getEnd())).collect(Collectors.toList());
    } else {
        throw new ISE("Only PeriodGranulaity is supported for movingAverage queries");
    }
    Sequence<Row> resultsSeq;
    DataSource dataSource = maq.getDataSource();
    if (maq.getDimensions() != null && !maq.getDimensions().isEmpty() && (dataSource instanceof TableDataSource || dataSource instanceof UnionDataSource || dataSource instanceof QueryDataSource)) {
        // build groupBy query from movingAverage query
        GroupByQuery.Builder builder = GroupByQuery.builder().setDataSource(dataSource).setInterval(intervals).setDimFilter(maq.getFilter()).setGranularity(maq.getGranularity()).setDimensions(maq.getDimensions()).setAggregatorSpecs(maq.getAggregatorSpecs()).setPostAggregatorSpecs(maq.getPostAggregatorSpecs()).setContext(maq.getContext());
        GroupByQuery gbq = builder.build();
        ResponseContext gbqResponseContext = ResponseContext.createEmpty();
        gbqResponseContext.merge(responseContext);
        gbqResponseContext.putQueryFailDeadlineMs(System.currentTimeMillis() + QueryContexts.getTimeout(gbq));
        Sequence<ResultRow> results = gbq.getRunner(walker).run(QueryPlus.wrap(gbq), gbqResponseContext);
        try {
            // use localhost for remote address
            requestLogger.logNativeQuery(RequestLogLine.forNative(gbq, DateTimes.nowUtc(), "127.0.0.1", new QueryStats(ImmutableMap.of("query/time", 0, "query/bytes", 0, "success", true))));
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
        resultsSeq = results.map(row -> row.toMapBasedRow(gbq));
    } else {
        // no dimensions, so optimize this as a TimeSeries
        TimeseriesQuery tsq = new TimeseriesQuery(dataSource, new MultipleIntervalSegmentSpec(intervals), false, null, maq.getFilter(), maq.getGranularity(), maq.getAggregatorSpecs(), maq.getPostAggregatorSpecs(), 0, maq.getContext());
        ResponseContext tsqResponseContext = ResponseContext.createEmpty();
        tsqResponseContext.merge(responseContext);
        tsqResponseContext.putQueryFailDeadlineMs(System.currentTimeMillis() + QueryContexts.getTimeout(tsq));
        Sequence<Result<TimeseriesResultValue>> results = tsq.getRunner(walker).run(QueryPlus.wrap(tsq), tsqResponseContext);
        try {
            // use localhost for remote address
            requestLogger.logNativeQuery(RequestLogLine.forNative(tsq, DateTimes.nowUtc(), "127.0.0.1", new QueryStats(ImmutableMap.of("query/time", 0, "query/bytes", 0, "success", true))));
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
        resultsSeq = Sequences.map(results, new TimeseriesResultToRow());
    }
    // Process into period buckets
    Sequence<RowBucket> bucketedMovingAvgResults = Sequences.simple(new RowBucketIterable(resultsSeq, intervals, period));
    // Apply the windows analysis functions
    Sequence<Row> movingAvgResults = Sequences.simple(new MovingAverageIterable(bucketedMovingAvgResults, maq.getDimensions(), maq.getAveragerSpecs(), maq.getPostAggregatorSpecs(), maq.getAggregatorSpecs()));
    // Apply any postAveragers
    Sequence<Row> movingAvgResultsWithPostAveragers = Sequences.map(movingAvgResults, new PostAveragerAggregatorCalculator(maq));
    // remove rows outside the reporting window
    List<Interval> reportingIntervals = maq.getIntervals();
    movingAvgResults = Sequences.filter(movingAvgResultsWithPostAveragers, row -> reportingIntervals.stream().anyMatch(i -> i.contains(row.getTimestamp())));
    // Apply any having, sorting, and limits
    movingAvgResults = maq.applyLimit(movingAvgResults);
    return movingAvgResults;
}
Also used : QueryPlus(org.apache.druid.query.QueryPlus) MapBasedRow(org.apache.druid.data.input.MapBasedRow) AveragerFactory(org.apache.druid.query.movingaverage.averagers.AveragerFactory) TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) Row(org.apache.druid.data.input.Row) QueryStats(org.apache.druid.server.QueryStats) Interval(org.joda.time.Interval) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) QuerySegmentWalker(org.apache.druid.query.QuerySegmentWalker) Sequences(org.apache.druid.java.util.common.guava.Sequences) Nullable(javax.annotation.Nullable) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) Period(org.joda.time.Period) Function(com.google.common.base.Function) ImmutableMap(com.google.common.collect.ImmutableMap) ResponseContext(org.apache.druid.query.context.ResponseContext) ResultRow(org.apache.druid.query.groupby.ResultRow) DataSource(org.apache.druid.query.DataSource) Throwables(com.google.common.base.Throwables) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) RequestLogger(org.apache.druid.server.log.RequestLogger) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) QueryContexts(org.apache.druid.query.QueryContexts) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) Result(org.apache.druid.query.Result) List(java.util.List) UnionDataSource(org.apache.druid.query.UnionDataSource) RequestLogLine(org.apache.druid.server.RequestLogLine) Optional(java.util.Optional) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) Result(org.apache.druid.query.Result) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) ResponseContext(org.apache.druid.query.context.ResponseContext) ISE(org.apache.druid.java.util.common.ISE) ResultRow(org.apache.druid.query.groupby.ResultRow) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Period(org.joda.time.Period) UnionDataSource(org.apache.druid.query.UnionDataSource) DataSource(org.apache.druid.query.DataSource) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) UnionDataSource(org.apache.druid.query.UnionDataSource) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) QueryStats(org.apache.druid.server.QueryStats) MapBasedRow(org.apache.druid.data.input.MapBasedRow) Row(org.apache.druid.data.input.Row) ResultRow(org.apache.druid.query.groupby.ResultRow) Interval(org.joda.time.Interval)

Example 9 with UnionDataSource

use of org.apache.druid.query.UnionDataSource in project druid by druid-io.

the class TimeSeriesUnionQueryRunnerTest method testUnionResultMerging.

@Test
public void testUnionResultMerging() {
    TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(new UnionDataSource(Lists.newArrayList(new TableDataSource("ds1"), new TableDataSource("ds2")))).granularity(QueryRunnerTestHelper.DAY_GRAN).intervals(QueryRunnerTestHelper.FIRST_TO_THIRD).aggregators(Arrays.asList(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index"))).descending(descending).build();
    QueryToolChest toolChest = new TimeseriesQueryQueryToolChest();
    final List<Result<TimeseriesResultValue>> ds1 = Lists.newArrayList(new Result<>(DateTimes.of("2011-04-02"), new TimeseriesResultValue(ImmutableMap.of("rows", 1L, "idx", 2L))), new Result<>(DateTimes.of("2011-04-03"), new TimeseriesResultValue(ImmutableMap.of("rows", 3L, "idx", 4L))));
    final List<Result<TimeseriesResultValue>> ds2 = Lists.newArrayList(new Result<>(DateTimes.of("2011-04-01"), new TimeseriesResultValue(ImmutableMap.of("rows", 5L, "idx", 6L))), new Result<>(DateTimes.of("2011-04-02"), new TimeseriesResultValue(ImmutableMap.of("rows", 7L, "idx", 8L))), new Result<>(DateTimes.of("2011-04-04"), new TimeseriesResultValue(ImmutableMap.of("rows", 9L, "idx", 10L))));
    QueryRunner mergingrunner = toolChest.mergeResults(new UnionQueryRunner<>(new QueryRunner<Result<TimeseriesResultValue>>() {

        @Override
        public Sequence<Result<TimeseriesResultValue>> run(QueryPlus<Result<TimeseriesResultValue>> queryPlus, ResponseContext responseContext) {
            if (queryPlus.getQuery().getDataSource().equals(new TableDataSource("ds1"))) {
                return Sequences.simple(descending ? Lists.reverse(ds1) : ds1);
            } else {
                return Sequences.simple(descending ? Lists.reverse(ds2) : ds2);
            }
        }
    }));
    List<Result<TimeseriesResultValue>> expectedResults = Arrays.asList(new Result<>(DateTimes.of("2011-04-01"), new TimeseriesResultValue(ImmutableMap.of("rows", 5L, "idx", 6L))), new Result<>(DateTimes.of("2011-04-02"), new TimeseriesResultValue(ImmutableMap.of("rows", 8L, "idx", 10L))), new Result<>(DateTimes.of("2011-04-03"), new TimeseriesResultValue(ImmutableMap.of("rows", 3L, "idx", 4L))), new Result<>(DateTimes.of("2011-04-04"), new TimeseriesResultValue(ImmutableMap.of("rows", 9L, "idx", 10L))));
    Iterable<Result<TimeseriesResultValue>> results = mergingrunner.run(QueryPlus.wrap(query)).toList();
    assertExpectedResults(expectedResults, results);
}
Also used : LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) QueryToolChest(org.apache.druid.query.QueryToolChest) UnionDataSource(org.apache.druid.query.UnionDataSource) UnionQueryRunner(org.apache.druid.query.UnionQueryRunner) QueryRunner(org.apache.druid.query.QueryRunner) Result(org.apache.druid.query.Result) TableDataSource(org.apache.druid.query.TableDataSource) ResponseContext(org.apache.druid.query.context.ResponseContext) QueryPlus(org.apache.druid.query.QueryPlus) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 10 with UnionDataSource

use of org.apache.druid.query.UnionDataSource in project druid by druid-io.

the class DataSourceAnalysisTest method testQueryOnUnion.

@Test
public void testQueryOnUnion() {
    final UnionDataSource unionDataSource = new UnionDataSource(ImmutableList.of(TABLE_FOO, TABLE_BAR));
    final QueryDataSource queryDataSource = subquery(unionDataSource);
    final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(queryDataSource);
    Assert.assertTrue(analysis.isConcreteBased());
    Assert.assertTrue(analysis.isConcreteTableBased());
    Assert.assertFalse(analysis.isGlobal());
    Assert.assertTrue(analysis.isQuery());
    Assert.assertEquals(queryDataSource, analysis.getDataSource());
    Assert.assertEquals(unionDataSource, analysis.getBaseDataSource());
    Assert.assertEquals(Optional.empty(), analysis.getBaseTableDataSource());
    Assert.assertEquals(Optional.of(unionDataSource), analysis.getBaseUnionDataSource());
    Assert.assertEquals(Optional.of(queryDataSource.getQuery()), analysis.getBaseQuery());
    Assert.assertEquals(Optional.of(new MultipleIntervalSegmentSpec(MILLENIUM_INTERVALS)), analysis.getBaseQuerySegmentSpec());
    Assert.assertEquals(Collections.emptyList(), analysis.getPreJoinableClauses());
    Assert.assertFalse(analysis.isJoin());
}
Also used : QueryDataSource(org.apache.druid.query.QueryDataSource) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) UnionDataSource(org.apache.druid.query.UnionDataSource) Test(org.junit.Test)

Aggregations

UnionDataSource (org.apache.druid.query.UnionDataSource)10 Test (org.junit.Test)8 TableDataSource (org.apache.druid.query.TableDataSource)7 GlobalTableDataSource (org.apache.druid.query.GlobalTableDataSource)4 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)4 GroupByQuery (org.apache.druid.query.groupby.GroupByQuery)4 QueryDataSource (org.apache.druid.query.QueryDataSource)3 DataSource (org.apache.druid.query.DataSource)2 QueryPlus (org.apache.druid.query.QueryPlus)2 QueryRunner (org.apache.druid.query.QueryRunner)2 Result (org.apache.druid.query.Result)2 ResponseContext (org.apache.druid.query.context.ResponseContext)2 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)2 Function (com.google.common.base.Function)1 Throwables (com.google.common.base.Throwables)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Map (java.util.Map)1 Optional (java.util.Optional)1