Search in sources :

Example 46 with TimeseriesQuery

use of org.apache.druid.query.timeseries.TimeseriesQuery in project druid by druid-io.

the class RealtimeIndexTaskTest method sumMetric.

@Nullable
public Long sumMetric(final Task task, final DimFilter filter, final String metric) {
    // Do a query.
    TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("test_ds").filters(filter).aggregators(ImmutableList.of(new LongSumAggregatorFactory(metric, metric))).granularity(Granularities.ALL).intervals("2000/3000").build();
    List<Result<TimeseriesResultValue>> results = task.getQueryRunner(query).run(QueryPlus.wrap(query)).toList();
    if (results.isEmpty()) {
        return 0L;
    } else {
        return results.get(0).getValue().getLongMetric(metric);
    }
}
Also used : TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) Result(org.apache.druid.query.Result) Nullable(javax.annotation.Nullable)

Example 47 with TimeseriesQuery

use of org.apache.druid.query.timeseries.TimeseriesQuery in project druid by druid-io.

the class SeekableStreamIndexTaskTestBase method countEvents.

protected long countEvents(final Task task) {
    // Do a query.
    TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(OLD_DATA_SCHEMA.getDataSource()).aggregators(ImmutableList.of(new LongSumAggregatorFactory("rows", "rows"))).granularity(Granularities.ALL).intervals("0000/3000").build();
    List<Result<TimeseriesResultValue>> results = task.getQueryRunner(query).run(QueryPlus.wrap(query)).toList();
    return results.isEmpty() ? 0L : DimensionHandlerUtils.nullToZero(results.get(0).getValue().getLongMetric("rows"));
}
Also used : TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) Result(org.apache.druid.query.Result)

Example 48 with TimeseriesQuery

use of org.apache.druid.query.timeseries.TimeseriesQuery in project druid by druid-io.

the class DataSourceOptimizer method optimize.

/**
 * Do main work about materialized view selection: transform user query to one or more sub-queries.
 *
 * In the sub-query, the dataSource is the derivative of dataSource in user query, and sum of all sub-queries'
 * intervals equals the interval in user query
 *
 * Derived dataSource with smallest average data size per segment granularity have highest priority to replace the
 * datasource in user query
 *
 * @param query only TopNQuery/TimeseriesQuery/GroupByQuery can be optimized
 * @return a list of queries with specified derived dataSources and intervals
 */
public List<Query> optimize(Query query) {
    long start = System.currentTimeMillis();
    // only TableDataSource can be optimiezed
    if (!(query instanceof TopNQuery || query instanceof TimeseriesQuery || query instanceof GroupByQuery) || !(query.getDataSource() instanceof TableDataSource)) {
        return Collections.singletonList(query);
    }
    String datasourceName = ((TableDataSource) query.getDataSource()).getName();
    // get all derivatives for datasource in query. The derivatives set is sorted by average size of
    // per segment granularity.
    Set<DerivativeDataSource> derivatives = DerivativeDataSourceManager.getDerivatives(datasourceName);
    if (derivatives.isEmpty()) {
        return Collections.singletonList(query);
    }
    lock.readLock().lock();
    try {
        totalCount.computeIfAbsent(datasourceName, dsName -> new AtomicLong(0)).incrementAndGet();
        hitCount.putIfAbsent(datasourceName, new AtomicLong(0));
        AtomicLong costTimeOfDataSource = costTime.computeIfAbsent(datasourceName, dsName -> new AtomicLong(0));
        // get all fields which the query required
        Set<String> requiredFields = MaterializedViewUtils.getRequiredFields(query);
        Set<DerivativeDataSource> derivativesWithRequiredFields = new HashSet<>();
        for (DerivativeDataSource derivativeDataSource : derivatives) {
            derivativesHitCount.putIfAbsent(derivativeDataSource.getName(), new AtomicLong(0));
            if (derivativeDataSource.getColumns().containsAll(requiredFields)) {
                derivativesWithRequiredFields.add(derivativeDataSource);
            }
        }
        // if no derivatives contains all required dimensions, this materialized view selection failed.
        if (derivativesWithRequiredFields.isEmpty()) {
            missFields.computeIfAbsent(datasourceName, dsName -> new ConcurrentHashMap<>()).computeIfAbsent(requiredFields, rf -> new AtomicLong(0)).incrementAndGet();
            costTimeOfDataSource.addAndGet(System.currentTimeMillis() - start);
            return Collections.singletonList(query);
        }
        List<Query> queries = new ArrayList<>();
        List<Interval> remainingQueryIntervals = (List<Interval>) query.getIntervals();
        for (DerivativeDataSource derivativeDataSource : ImmutableSortedSet.copyOf(derivativesWithRequiredFields)) {
            final List<Interval> derivativeIntervals = remainingQueryIntervals.stream().flatMap(interval -> serverView.getTimeline(DataSourceAnalysis.forDataSource(new TableDataSource(derivativeDataSource.getName()))).orElseThrow(() -> new ISE("No timeline for dataSource: %s", derivativeDataSource.getName())).lookup(interval).stream().map(TimelineObjectHolder::getInterval)).collect(Collectors.toList());
            // not be selected.
            if (derivativeIntervals.isEmpty()) {
                continue;
            }
            remainingQueryIntervals = MaterializedViewUtils.minus(remainingQueryIntervals, derivativeIntervals);
            queries.add(query.withDataSource(new TableDataSource(derivativeDataSource.getName())).withQuerySegmentSpec(new MultipleIntervalSegmentSpec(derivativeIntervals)));
            derivativesHitCount.get(derivativeDataSource.getName()).incrementAndGet();
            if (remainingQueryIntervals.isEmpty()) {
                break;
            }
        }
        if (queries.isEmpty()) {
            costTime.get(datasourceName).addAndGet(System.currentTimeMillis() - start);
            return Collections.singletonList(query);
        }
        // the original datasource.
        if (!remainingQueryIntervals.isEmpty()) {
            queries.add(query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(remainingQueryIntervals)));
        }
        hitCount.get(datasourceName).incrementAndGet();
        costTime.get(datasourceName).addAndGet(System.currentTimeMillis() - start);
        return queries;
    } finally {
        lock.readLock().unlock();
    }
}
Also used : DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis) Inject(com.google.inject.Inject) HashMap(java.util.HashMap) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) TopNQuery(org.apache.druid.query.topn.TopNQuery) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) Query(org.apache.druid.query.Query) Map(java.util.Map) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) TimelineServerView(org.apache.druid.client.TimelineServerView) ReadWriteLock(java.util.concurrent.locks.ReadWriteLock) ImmutableSortedSet(com.google.common.collect.ImmutableSortedSet) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) TableDataSource(org.apache.druid.query.TableDataSource) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) Collections(java.util.Collections) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) TopNQuery(org.apache.druid.query.topn.TopNQuery) Query(org.apache.druid.query.Query) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) ArrayList(java.util.ArrayList) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) AtomicLong(java.util.concurrent.atomic.AtomicLong) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) TableDataSource(org.apache.druid.query.TableDataSource) TopNQuery(org.apache.druid.query.topn.TopNQuery) ArrayList(java.util.ArrayList) List(java.util.List) ISE(org.apache.druid.java.util.common.ISE) HashSet(java.util.HashSet) Interval(org.joda.time.Interval)

Example 49 with TimeseriesQuery

use of org.apache.druid.query.timeseries.TimeseriesQuery in project druid by druid-io.

the class MaterializedViewUtils method getRequiredFields.

/**
 * extract all dimensions in query.
 * only support TopNQuery/TimeseriesQuery/GroupByQuery
 *
 * @param query
 * @return dimensions set in query
 */
public static Set<String> getRequiredFields(Query query) {
    Set<String> dimsInFilter = null == query.getFilter() ? new HashSet<String>() : query.getFilter().getRequiredColumns();
    Set<String> dimensions = new HashSet<>(dimsInFilter);
    if (query instanceof TopNQuery) {
        TopNQuery q = (TopNQuery) query;
        dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
        dimensions.add(q.getDimensionSpec().getDimension());
    } else if (query instanceof TimeseriesQuery) {
        TimeseriesQuery q = (TimeseriesQuery) query;
        dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
    } else if (query instanceof GroupByQuery) {
        GroupByQuery q = (GroupByQuery) query;
        dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
        for (DimensionSpec spec : q.getDimensions()) {
            String dim = spec.getDimension();
            dimensions.add(dim);
        }
    } else {
        throw new UnsupportedOperationException("Method getRequiredFields only supports TopNQuery/TimeseriesQuery/GroupByQuery");
    }
    return dimensions;
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) TopNQuery(org.apache.druid.query.topn.TopNQuery) HashSet(java.util.HashSet)

Example 50 with TimeseriesQuery

use of org.apache.druid.query.timeseries.TimeseriesQuery in project druid by druid-io.

the class MaterializedViewQueryQueryToolChestTest method testMakePostComputeManipulatorFn.

@Test
public void testMakePostComputeManipulatorFn() {
    TimeseriesQuery realQuery = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.DAY_GRAN).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(QueryRunnerTestHelper.ROWS_COUNT).descending(true).build();
    MaterializedViewQuery materializedViewQuery = new MaterializedViewQuery(realQuery, null);
    QueryToolChest materializedViewQueryQueryToolChest = new MaterializedViewQueryQueryToolChest(new MapQueryToolChestWarehouse(ImmutableMap.<Class<? extends Query>, QueryToolChest>builder().put(TimeseriesQuery.class, new TimeseriesQueryQueryToolChest()).build()));
    Function postFn = materializedViewQueryQueryToolChest.makePostComputeManipulatorFn(materializedViewQuery, new MetricManipulationFn() {

        @Override
        public Object manipulate(AggregatorFactory factory, Object object) {
            return "metricvalue1";
        }
    });
    Result<TimeseriesResultValue> result = new Result<>(DateTimes.nowUtc(), new TimeseriesResultValue(ImmutableMap.of("dim1", "dimvalue1")));
    Result<TimeseriesResultValue> postResult = (Result<TimeseriesResultValue>) postFn.apply(result);
    Map<String, Object> postResultMap = postResult.getValue().getBaseObject();
    Assert.assertEquals(postResult.getTimestamp(), result.getTimestamp());
    Assert.assertEquals(postResultMap.size(), 2);
    Assert.assertEquals(postResultMap.get(QueryRunnerTestHelper.ROWS_COUNT.getName()), "metricvalue1");
    Assert.assertEquals(postResultMap.get("dim1"), "dimvalue1");
}
Also used : TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Query(org.apache.druid.query.Query) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) QueryToolChest(org.apache.druid.query.QueryToolChest) GroupByQueryQueryToolChest(org.apache.druid.query.groupby.GroupByQueryQueryToolChest) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Result(org.apache.druid.query.Result) Function(com.google.common.base.Function) MapQueryToolChestWarehouse(org.apache.druid.query.MapQueryToolChestWarehouse) MetricManipulationFn(org.apache.druid.query.aggregation.MetricManipulationFn) Test(org.junit.Test)

Aggregations

TimeseriesQuery (org.apache.druid.query.timeseries.TimeseriesQuery)130 Test (org.junit.Test)109 TimeseriesQueryQueryToolChest (org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest)58 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)50 Result (org.apache.druid.query.Result)39 TimeseriesResultValue (org.apache.druid.query.timeseries.TimeseriesResultValue)35 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)29 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)25 QueryRunner (org.apache.druid.query.QueryRunner)22 FinalizeResultsQueryRunner (org.apache.druid.query.FinalizeResultsQueryRunner)21 FieldAccessPostAggregator (org.apache.druid.query.aggregation.post.FieldAccessPostAggregator)17 Interval (org.joda.time.Interval)17 TimeseriesQueryEngine (org.apache.druid.query.timeseries.TimeseriesQueryEngine)15 TimeseriesQueryRunnerFactory (org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory)15 ArrayList (java.util.ArrayList)13 FinalizingFieldAccessPostAggregator (org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator)13 DateTime (org.joda.time.DateTime)12 Sequence (org.apache.druid.java.util.common.guava.Sequence)11 IOException (java.io.IOException)10 SpatialDimFilter (org.apache.druid.query.filter.SpatialDimFilter)9