use of org.apache.druid.query.timeseries.TimeseriesQuery in project druid by druid-io.
the class RealtimeIndexTaskTest method sumMetric.
@Nullable
public Long sumMetric(final Task task, final DimFilter filter, final String metric) {
// Do a query.
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("test_ds").filters(filter).aggregators(ImmutableList.of(new LongSumAggregatorFactory(metric, metric))).granularity(Granularities.ALL).intervals("2000/3000").build();
List<Result<TimeseriesResultValue>> results = task.getQueryRunner(query).run(QueryPlus.wrap(query)).toList();
if (results.isEmpty()) {
return 0L;
} else {
return results.get(0).getValue().getLongMetric(metric);
}
}
use of org.apache.druid.query.timeseries.TimeseriesQuery in project druid by druid-io.
the class SeekableStreamIndexTaskTestBase method countEvents.
protected long countEvents(final Task task) {
// Do a query.
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(OLD_DATA_SCHEMA.getDataSource()).aggregators(ImmutableList.of(new LongSumAggregatorFactory("rows", "rows"))).granularity(Granularities.ALL).intervals("0000/3000").build();
List<Result<TimeseriesResultValue>> results = task.getQueryRunner(query).run(QueryPlus.wrap(query)).toList();
return results.isEmpty() ? 0L : DimensionHandlerUtils.nullToZero(results.get(0).getValue().getLongMetric("rows"));
}
use of org.apache.druid.query.timeseries.TimeseriesQuery in project druid by druid-io.
the class DataSourceOptimizer method optimize.
/**
* Do main work about materialized view selection: transform user query to one or more sub-queries.
*
* In the sub-query, the dataSource is the derivative of dataSource in user query, and sum of all sub-queries'
* intervals equals the interval in user query
*
* Derived dataSource with smallest average data size per segment granularity have highest priority to replace the
* datasource in user query
*
* @param query only TopNQuery/TimeseriesQuery/GroupByQuery can be optimized
* @return a list of queries with specified derived dataSources and intervals
*/
public List<Query> optimize(Query query) {
long start = System.currentTimeMillis();
// only TableDataSource can be optimiezed
if (!(query instanceof TopNQuery || query instanceof TimeseriesQuery || query instanceof GroupByQuery) || !(query.getDataSource() instanceof TableDataSource)) {
return Collections.singletonList(query);
}
String datasourceName = ((TableDataSource) query.getDataSource()).getName();
// get all derivatives for datasource in query. The derivatives set is sorted by average size of
// per segment granularity.
Set<DerivativeDataSource> derivatives = DerivativeDataSourceManager.getDerivatives(datasourceName);
if (derivatives.isEmpty()) {
return Collections.singletonList(query);
}
lock.readLock().lock();
try {
totalCount.computeIfAbsent(datasourceName, dsName -> new AtomicLong(0)).incrementAndGet();
hitCount.putIfAbsent(datasourceName, new AtomicLong(0));
AtomicLong costTimeOfDataSource = costTime.computeIfAbsent(datasourceName, dsName -> new AtomicLong(0));
// get all fields which the query required
Set<String> requiredFields = MaterializedViewUtils.getRequiredFields(query);
Set<DerivativeDataSource> derivativesWithRequiredFields = new HashSet<>();
for (DerivativeDataSource derivativeDataSource : derivatives) {
derivativesHitCount.putIfAbsent(derivativeDataSource.getName(), new AtomicLong(0));
if (derivativeDataSource.getColumns().containsAll(requiredFields)) {
derivativesWithRequiredFields.add(derivativeDataSource);
}
}
// if no derivatives contains all required dimensions, this materialized view selection failed.
if (derivativesWithRequiredFields.isEmpty()) {
missFields.computeIfAbsent(datasourceName, dsName -> new ConcurrentHashMap<>()).computeIfAbsent(requiredFields, rf -> new AtomicLong(0)).incrementAndGet();
costTimeOfDataSource.addAndGet(System.currentTimeMillis() - start);
return Collections.singletonList(query);
}
List<Query> queries = new ArrayList<>();
List<Interval> remainingQueryIntervals = (List<Interval>) query.getIntervals();
for (DerivativeDataSource derivativeDataSource : ImmutableSortedSet.copyOf(derivativesWithRequiredFields)) {
final List<Interval> derivativeIntervals = remainingQueryIntervals.stream().flatMap(interval -> serverView.getTimeline(DataSourceAnalysis.forDataSource(new TableDataSource(derivativeDataSource.getName()))).orElseThrow(() -> new ISE("No timeline for dataSource: %s", derivativeDataSource.getName())).lookup(interval).stream().map(TimelineObjectHolder::getInterval)).collect(Collectors.toList());
// not be selected.
if (derivativeIntervals.isEmpty()) {
continue;
}
remainingQueryIntervals = MaterializedViewUtils.minus(remainingQueryIntervals, derivativeIntervals);
queries.add(query.withDataSource(new TableDataSource(derivativeDataSource.getName())).withQuerySegmentSpec(new MultipleIntervalSegmentSpec(derivativeIntervals)));
derivativesHitCount.get(derivativeDataSource.getName()).incrementAndGet();
if (remainingQueryIntervals.isEmpty()) {
break;
}
}
if (queries.isEmpty()) {
costTime.get(datasourceName).addAndGet(System.currentTimeMillis() - start);
return Collections.singletonList(query);
}
// the original datasource.
if (!remainingQueryIntervals.isEmpty()) {
queries.add(query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(remainingQueryIntervals)));
}
hitCount.get(datasourceName).incrementAndGet();
costTime.get(datasourceName).addAndGet(System.currentTimeMillis() - start);
return queries;
} finally {
lock.readLock().unlock();
}
}
use of org.apache.druid.query.timeseries.TimeseriesQuery in project druid by druid-io.
the class MaterializedViewUtils method getRequiredFields.
/**
* extract all dimensions in query.
* only support TopNQuery/TimeseriesQuery/GroupByQuery
*
* @param query
* @return dimensions set in query
*/
public static Set<String> getRequiredFields(Query query) {
Set<String> dimsInFilter = null == query.getFilter() ? new HashSet<String>() : query.getFilter().getRequiredColumns();
Set<String> dimensions = new HashSet<>(dimsInFilter);
if (query instanceof TopNQuery) {
TopNQuery q = (TopNQuery) query;
dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
dimensions.add(q.getDimensionSpec().getDimension());
} else if (query instanceof TimeseriesQuery) {
TimeseriesQuery q = (TimeseriesQuery) query;
dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
} else if (query instanceof GroupByQuery) {
GroupByQuery q = (GroupByQuery) query;
dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
for (DimensionSpec spec : q.getDimensions()) {
String dim = spec.getDimension();
dimensions.add(dim);
}
} else {
throw new UnsupportedOperationException("Method getRequiredFields only supports TopNQuery/TimeseriesQuery/GroupByQuery");
}
return dimensions;
}
use of org.apache.druid.query.timeseries.TimeseriesQuery in project druid by druid-io.
the class MaterializedViewQueryQueryToolChestTest method testMakePostComputeManipulatorFn.
@Test
public void testMakePostComputeManipulatorFn() {
TimeseriesQuery realQuery = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.DAY_GRAN).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(QueryRunnerTestHelper.ROWS_COUNT).descending(true).build();
MaterializedViewQuery materializedViewQuery = new MaterializedViewQuery(realQuery, null);
QueryToolChest materializedViewQueryQueryToolChest = new MaterializedViewQueryQueryToolChest(new MapQueryToolChestWarehouse(ImmutableMap.<Class<? extends Query>, QueryToolChest>builder().put(TimeseriesQuery.class, new TimeseriesQueryQueryToolChest()).build()));
Function postFn = materializedViewQueryQueryToolChest.makePostComputeManipulatorFn(materializedViewQuery, new MetricManipulationFn() {
@Override
public Object manipulate(AggregatorFactory factory, Object object) {
return "metricvalue1";
}
});
Result<TimeseriesResultValue> result = new Result<>(DateTimes.nowUtc(), new TimeseriesResultValue(ImmutableMap.of("dim1", "dimvalue1")));
Result<TimeseriesResultValue> postResult = (Result<TimeseriesResultValue>) postFn.apply(result);
Map<String, Object> postResultMap = postResult.getValue().getBaseObject();
Assert.assertEquals(postResult.getTimestamp(), result.getTimestamp());
Assert.assertEquals(postResultMap.size(), 2);
Assert.assertEquals(postResultMap.get(QueryRunnerTestHelper.ROWS_COUNT.getName()), "metricvalue1");
Assert.assertEquals(postResultMap.get("dim1"), "dimvalue1");
}
Aggregations