use of org.apache.druid.query.topn.TopNQuery in project druid by druid-io.
the class SchemalessTestSimpleTest method testFullOnTopN.
// @Test TODO: Handling of null values is inconsistent right now, need to make it all consistent and re-enable test
// TODO: Complain to Eric when you see this. It shouldn't be like this...
@Ignore
@SuppressWarnings("unused")
public void testFullOnTopN() {
TopNQuery query = new TopNQueryBuilder().dataSource(dataSource).granularity(ALL_GRAN).dimension(marketDimension).metric(indexMetric).threshold(3).intervals(fullOnInterval).aggregators(Lists.newArrayList(Iterables.concat(commonAggregators, Lists.newArrayList(new DoubleMaxAggregatorFactory("maxIndex", "index"), new DoubleMinAggregatorFactory("minIndex", "index"))))).postAggregators(addRowsIndexConstant).build();
List<Result<TopNResultValue>> expectedResults = Collections.singletonList(new Result<>(DateTimes.of("2011-01-12T00:00:00.000Z"), new TopNResultValue(Arrays.asList(new DimensionAndMetricValueExtractor(ImmutableMap.<String, Object>builder().put("market", "spot").put("rows", 4L).put("index", 400.0D).put("addRowsIndexConstant", 405.0D).put("uniques", 1.0002442201269182D).put("maxIndex", 100.0).put("minIndex", 100.0).build()), new DimensionAndMetricValueExtractor(ImmutableMap.<String, Object>builder().put("market", "").put("rows", 2L).put("index", 200.0D).put("addRowsIndexConstant", 203.0D).put("uniques", 0.0).put("maxIndex", 100.0D).put("minIndex", 100.0D).build()), new DimensionAndMetricValueExtractor(ImmutableMap.<String, Object>builder().put("market", "total_market").put("rows", 2L).put("index", 200.0D).put("addRowsIndexConstant", 203.0D).put("uniques", 1.0002442201269182D).put("maxIndex", 100.0D).put("minIndex", 100.0D).build())))));
try (CloseableStupidPool<ByteBuffer> pool = TestQueryRunners.createDefaultNonBlockingPool()) {
QueryRunner runner = TestQueryRunners.makeTopNQueryRunner(segment, pool);
TestHelper.assertExpectedResults(expectedResults, runner.run(QueryPlus.wrap(query)));
}
}
use of org.apache.druid.query.topn.TopNQuery in project druid by druid-io.
the class DataSourceOptimizer method optimize.
/**
* Do main work about materialized view selection: transform user query to one or more sub-queries.
*
* In the sub-query, the dataSource is the derivative of dataSource in user query, and sum of all sub-queries'
* intervals equals the interval in user query
*
* Derived dataSource with smallest average data size per segment granularity have highest priority to replace the
* datasource in user query
*
* @param query only TopNQuery/TimeseriesQuery/GroupByQuery can be optimized
* @return a list of queries with specified derived dataSources and intervals
*/
public List<Query> optimize(Query query) {
long start = System.currentTimeMillis();
// only TableDataSource can be optimiezed
if (!(query instanceof TopNQuery || query instanceof TimeseriesQuery || query instanceof GroupByQuery) || !(query.getDataSource() instanceof TableDataSource)) {
return Collections.singletonList(query);
}
String datasourceName = ((TableDataSource) query.getDataSource()).getName();
// get all derivatives for datasource in query. The derivatives set is sorted by average size of
// per segment granularity.
Set<DerivativeDataSource> derivatives = DerivativeDataSourceManager.getDerivatives(datasourceName);
if (derivatives.isEmpty()) {
return Collections.singletonList(query);
}
lock.readLock().lock();
try {
totalCount.computeIfAbsent(datasourceName, dsName -> new AtomicLong(0)).incrementAndGet();
hitCount.putIfAbsent(datasourceName, new AtomicLong(0));
AtomicLong costTimeOfDataSource = costTime.computeIfAbsent(datasourceName, dsName -> new AtomicLong(0));
// get all fields which the query required
Set<String> requiredFields = MaterializedViewUtils.getRequiredFields(query);
Set<DerivativeDataSource> derivativesWithRequiredFields = new HashSet<>();
for (DerivativeDataSource derivativeDataSource : derivatives) {
derivativesHitCount.putIfAbsent(derivativeDataSource.getName(), new AtomicLong(0));
if (derivativeDataSource.getColumns().containsAll(requiredFields)) {
derivativesWithRequiredFields.add(derivativeDataSource);
}
}
// if no derivatives contains all required dimensions, this materialized view selection failed.
if (derivativesWithRequiredFields.isEmpty()) {
missFields.computeIfAbsent(datasourceName, dsName -> new ConcurrentHashMap<>()).computeIfAbsent(requiredFields, rf -> new AtomicLong(0)).incrementAndGet();
costTimeOfDataSource.addAndGet(System.currentTimeMillis() - start);
return Collections.singletonList(query);
}
List<Query> queries = new ArrayList<>();
List<Interval> remainingQueryIntervals = (List<Interval>) query.getIntervals();
for (DerivativeDataSource derivativeDataSource : ImmutableSortedSet.copyOf(derivativesWithRequiredFields)) {
final List<Interval> derivativeIntervals = remainingQueryIntervals.stream().flatMap(interval -> serverView.getTimeline(DataSourceAnalysis.forDataSource(new TableDataSource(derivativeDataSource.getName()))).orElseThrow(() -> new ISE("No timeline for dataSource: %s", derivativeDataSource.getName())).lookup(interval).stream().map(TimelineObjectHolder::getInterval)).collect(Collectors.toList());
// not be selected.
if (derivativeIntervals.isEmpty()) {
continue;
}
remainingQueryIntervals = MaterializedViewUtils.minus(remainingQueryIntervals, derivativeIntervals);
queries.add(query.withDataSource(new TableDataSource(derivativeDataSource.getName())).withQuerySegmentSpec(new MultipleIntervalSegmentSpec(derivativeIntervals)));
derivativesHitCount.get(derivativeDataSource.getName()).incrementAndGet();
if (remainingQueryIntervals.isEmpty()) {
break;
}
}
if (queries.isEmpty()) {
costTime.get(datasourceName).addAndGet(System.currentTimeMillis() - start);
return Collections.singletonList(query);
}
// the original datasource.
if (!remainingQueryIntervals.isEmpty()) {
queries.add(query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(remainingQueryIntervals)));
}
hitCount.get(datasourceName).incrementAndGet();
costTime.get(datasourceName).addAndGet(System.currentTimeMillis() - start);
return queries;
} finally {
lock.readLock().unlock();
}
}
use of org.apache.druid.query.topn.TopNQuery in project druid by druid-io.
the class MaterializedViewUtils method getRequiredFields.
/**
* extract all dimensions in query.
* only support TopNQuery/TimeseriesQuery/GroupByQuery
*
* @param query
* @return dimensions set in query
*/
public static Set<String> getRequiredFields(Query query) {
Set<String> dimsInFilter = null == query.getFilter() ? new HashSet<String>() : query.getFilter().getRequiredColumns();
Set<String> dimensions = new HashSet<>(dimsInFilter);
if (query instanceof TopNQuery) {
TopNQuery q = (TopNQuery) query;
dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
dimensions.add(q.getDimensionSpec().getDimension());
} else if (query instanceof TimeseriesQuery) {
TimeseriesQuery q = (TimeseriesQuery) query;
dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
} else if (query instanceof GroupByQuery) {
GroupByQuery q = (GroupByQuery) query;
dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
for (DimensionSpec spec : q.getDimensions()) {
String dim = spec.getDimension();
dimensions.add(dim);
}
} else {
throw new UnsupportedOperationException("Method getRequiredFields only supports TopNQuery/TimeseriesQuery/GroupByQuery");
}
return dimensions;
}
use of org.apache.druid.query.topn.TopNQuery in project druid by druid-io.
the class MapVirtualColumnTopNTest method testWithMapColumn.
@Test
public void testWithMapColumn() {
final TopNQuery query = new TopNQuery(new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), VirtualColumns.create(ImmutableList.of(new MapVirtualColumn("keys", "values", "params"))), // params is the map type
new DefaultDimensionSpec("params", "params"), new NumericTopNMetricSpec("count"), 1, new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2011/2012"))), null, Granularities.ALL, ImmutableList.of(new CountAggregatorFactory("count")), null, null);
expectedException.expect(UnsupportedOperationException.class);
expectedException.expectMessage("Map column doesn't support getRow()");
runner.run(QueryPlus.wrap(query)).toList();
}
use of org.apache.druid.query.topn.TopNQuery in project druid by druid-io.
the class MapVirtualColumnTopNTest method testWithSubColumn.
@Test
public void testWithSubColumn() {
final TopNQuery query = new TopNQuery(new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), VirtualColumns.create(ImmutableList.of(new MapVirtualColumn("keys", "values", "params"))), // params.key3 is string
new DefaultDimensionSpec("params.key3", "params.key3"), new NumericTopNMetricSpec("count"), 2, new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2011/2012"))), null, Granularities.ALL, ImmutableList.of(new CountAggregatorFactory("count")), null, null);
final List<Result<TopNResultValue>> result = runner.run(QueryPlus.wrap(query)).toList();
final List<Result<TopNResultValue>> expected = Collections.singletonList(new Result<>(DateTimes.of("2011-01-12T00:00:00.000Z"), new TopNResultValue(ImmutableList.of(new DimensionAndMetricValueExtractor(MapVirtualColumnTestBase.mapOf("count", 2L, "params.key3", null)), new DimensionAndMetricValueExtractor(MapVirtualColumnTestBase.mapOf("count", 1L, "params.key3", "value3"))))));
Assert.assertEquals(expected, result);
}
Aggregations