Search in sources :

Example 11 with TopNQuery

use of org.apache.druid.query.topn.TopNQuery in project druid by druid-io.

the class SchemalessTestSimpleTest method testFullOnTopN.

// @Test TODO: Handling of null values is inconsistent right now, need to make it all consistent and re-enable test
// TODO: Complain to Eric when you see this.  It shouldn't be like this...
@Ignore
@SuppressWarnings("unused")
public void testFullOnTopN() {
    TopNQuery query = new TopNQueryBuilder().dataSource(dataSource).granularity(ALL_GRAN).dimension(marketDimension).metric(indexMetric).threshold(3).intervals(fullOnInterval).aggregators(Lists.newArrayList(Iterables.concat(commonAggregators, Lists.newArrayList(new DoubleMaxAggregatorFactory("maxIndex", "index"), new DoubleMinAggregatorFactory("minIndex", "index"))))).postAggregators(addRowsIndexConstant).build();
    List<Result<TopNResultValue>> expectedResults = Collections.singletonList(new Result<>(DateTimes.of("2011-01-12T00:00:00.000Z"), new TopNResultValue(Arrays.asList(new DimensionAndMetricValueExtractor(ImmutableMap.<String, Object>builder().put("market", "spot").put("rows", 4L).put("index", 400.0D).put("addRowsIndexConstant", 405.0D).put("uniques", 1.0002442201269182D).put("maxIndex", 100.0).put("minIndex", 100.0).build()), new DimensionAndMetricValueExtractor(ImmutableMap.<String, Object>builder().put("market", "").put("rows", 2L).put("index", 200.0D).put("addRowsIndexConstant", 203.0D).put("uniques", 0.0).put("maxIndex", 100.0D).put("minIndex", 100.0D).build()), new DimensionAndMetricValueExtractor(ImmutableMap.<String, Object>builder().put("market", "total_market").put("rows", 2L).put("index", 200.0D).put("addRowsIndexConstant", 203.0D).put("uniques", 1.0002442201269182D).put("maxIndex", 100.0D).put("minIndex", 100.0D).build())))));
    try (CloseableStupidPool<ByteBuffer> pool = TestQueryRunners.createDefaultNonBlockingPool()) {
        QueryRunner runner = TestQueryRunners.makeTopNQueryRunner(segment, pool);
        TestHelper.assertExpectedResults(expectedResults, runner.run(QueryPlus.wrap(query)));
    }
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) TopNResultValue(org.apache.druid.query.topn.TopNResultValue) DoubleMaxAggregatorFactory(org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory) TopNQuery(org.apache.druid.query.topn.TopNQuery) DoubleMinAggregatorFactory(org.apache.druid.query.aggregation.DoubleMinAggregatorFactory) ByteBuffer(java.nio.ByteBuffer) DimensionAndMetricValueExtractor(org.apache.druid.query.topn.DimensionAndMetricValueExtractor) QueryRunner(org.apache.druid.query.QueryRunner) Result(org.apache.druid.query.Result) Ignore(org.junit.Ignore)

Example 12 with TopNQuery

use of org.apache.druid.query.topn.TopNQuery in project druid by druid-io.

the class DataSourceOptimizer method optimize.

/**
 * Do main work about materialized view selection: transform user query to one or more sub-queries.
 *
 * In the sub-query, the dataSource is the derivative of dataSource in user query, and sum of all sub-queries'
 * intervals equals the interval in user query
 *
 * Derived dataSource with smallest average data size per segment granularity have highest priority to replace the
 * datasource in user query
 *
 * @param query only TopNQuery/TimeseriesQuery/GroupByQuery can be optimized
 * @return a list of queries with specified derived dataSources and intervals
 */
public List<Query> optimize(Query query) {
    long start = System.currentTimeMillis();
    // only TableDataSource can be optimiezed
    if (!(query instanceof TopNQuery || query instanceof TimeseriesQuery || query instanceof GroupByQuery) || !(query.getDataSource() instanceof TableDataSource)) {
        return Collections.singletonList(query);
    }
    String datasourceName = ((TableDataSource) query.getDataSource()).getName();
    // get all derivatives for datasource in query. The derivatives set is sorted by average size of
    // per segment granularity.
    Set<DerivativeDataSource> derivatives = DerivativeDataSourceManager.getDerivatives(datasourceName);
    if (derivatives.isEmpty()) {
        return Collections.singletonList(query);
    }
    lock.readLock().lock();
    try {
        totalCount.computeIfAbsent(datasourceName, dsName -> new AtomicLong(0)).incrementAndGet();
        hitCount.putIfAbsent(datasourceName, new AtomicLong(0));
        AtomicLong costTimeOfDataSource = costTime.computeIfAbsent(datasourceName, dsName -> new AtomicLong(0));
        // get all fields which the query required
        Set<String> requiredFields = MaterializedViewUtils.getRequiredFields(query);
        Set<DerivativeDataSource> derivativesWithRequiredFields = new HashSet<>();
        for (DerivativeDataSource derivativeDataSource : derivatives) {
            derivativesHitCount.putIfAbsent(derivativeDataSource.getName(), new AtomicLong(0));
            if (derivativeDataSource.getColumns().containsAll(requiredFields)) {
                derivativesWithRequiredFields.add(derivativeDataSource);
            }
        }
        // if no derivatives contains all required dimensions, this materialized view selection failed.
        if (derivativesWithRequiredFields.isEmpty()) {
            missFields.computeIfAbsent(datasourceName, dsName -> new ConcurrentHashMap<>()).computeIfAbsent(requiredFields, rf -> new AtomicLong(0)).incrementAndGet();
            costTimeOfDataSource.addAndGet(System.currentTimeMillis() - start);
            return Collections.singletonList(query);
        }
        List<Query> queries = new ArrayList<>();
        List<Interval> remainingQueryIntervals = (List<Interval>) query.getIntervals();
        for (DerivativeDataSource derivativeDataSource : ImmutableSortedSet.copyOf(derivativesWithRequiredFields)) {
            final List<Interval> derivativeIntervals = remainingQueryIntervals.stream().flatMap(interval -> serverView.getTimeline(DataSourceAnalysis.forDataSource(new TableDataSource(derivativeDataSource.getName()))).orElseThrow(() -> new ISE("No timeline for dataSource: %s", derivativeDataSource.getName())).lookup(interval).stream().map(TimelineObjectHolder::getInterval)).collect(Collectors.toList());
            // not be selected.
            if (derivativeIntervals.isEmpty()) {
                continue;
            }
            remainingQueryIntervals = MaterializedViewUtils.minus(remainingQueryIntervals, derivativeIntervals);
            queries.add(query.withDataSource(new TableDataSource(derivativeDataSource.getName())).withQuerySegmentSpec(new MultipleIntervalSegmentSpec(derivativeIntervals)));
            derivativesHitCount.get(derivativeDataSource.getName()).incrementAndGet();
            if (remainingQueryIntervals.isEmpty()) {
                break;
            }
        }
        if (queries.isEmpty()) {
            costTime.get(datasourceName).addAndGet(System.currentTimeMillis() - start);
            return Collections.singletonList(query);
        }
        // the original datasource.
        if (!remainingQueryIntervals.isEmpty()) {
            queries.add(query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(remainingQueryIntervals)));
        }
        hitCount.get(datasourceName).incrementAndGet();
        costTime.get(datasourceName).addAndGet(System.currentTimeMillis() - start);
        return queries;
    } finally {
        lock.readLock().unlock();
    }
}
Also used : DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis) Inject(com.google.inject.Inject) HashMap(java.util.HashMap) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) TopNQuery(org.apache.druid.query.topn.TopNQuery) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) Query(org.apache.druid.query.Query) Map(java.util.Map) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) TimelineServerView(org.apache.druid.client.TimelineServerView) ReadWriteLock(java.util.concurrent.locks.ReadWriteLock) ImmutableSortedSet(com.google.common.collect.ImmutableSortedSet) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) TableDataSource(org.apache.druid.query.TableDataSource) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) Collections(java.util.Collections) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) TopNQuery(org.apache.druid.query.topn.TopNQuery) Query(org.apache.druid.query.Query) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) ArrayList(java.util.ArrayList) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) AtomicLong(java.util.concurrent.atomic.AtomicLong) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) TableDataSource(org.apache.druid.query.TableDataSource) TopNQuery(org.apache.druid.query.topn.TopNQuery) ArrayList(java.util.ArrayList) List(java.util.List) ISE(org.apache.druid.java.util.common.ISE) HashSet(java.util.HashSet) Interval(org.joda.time.Interval)

Example 13 with TopNQuery

use of org.apache.druid.query.topn.TopNQuery in project druid by druid-io.

the class MaterializedViewUtils method getRequiredFields.

/**
 * extract all dimensions in query.
 * only support TopNQuery/TimeseriesQuery/GroupByQuery
 *
 * @param query
 * @return dimensions set in query
 */
public static Set<String> getRequiredFields(Query query) {
    Set<String> dimsInFilter = null == query.getFilter() ? new HashSet<String>() : query.getFilter().getRequiredColumns();
    Set<String> dimensions = new HashSet<>(dimsInFilter);
    if (query instanceof TopNQuery) {
        TopNQuery q = (TopNQuery) query;
        dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
        dimensions.add(q.getDimensionSpec().getDimension());
    } else if (query instanceof TimeseriesQuery) {
        TimeseriesQuery q = (TimeseriesQuery) query;
        dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
    } else if (query instanceof GroupByQuery) {
        GroupByQuery q = (GroupByQuery) query;
        dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
        for (DimensionSpec spec : q.getDimensions()) {
            String dim = spec.getDimension();
            dimensions.add(dim);
        }
    } else {
        throw new UnsupportedOperationException("Method getRequiredFields only supports TopNQuery/TimeseriesQuery/GroupByQuery");
    }
    return dimensions;
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) TopNQuery(org.apache.druid.query.topn.TopNQuery) HashSet(java.util.HashSet)

Example 14 with TopNQuery

use of org.apache.druid.query.topn.TopNQuery in project druid by druid-io.

the class MapVirtualColumnTopNTest method testWithMapColumn.

@Test
public void testWithMapColumn() {
    final TopNQuery query = new TopNQuery(new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), VirtualColumns.create(ImmutableList.of(new MapVirtualColumn("keys", "values", "params"))), // params is the map type
    new DefaultDimensionSpec("params", "params"), new NumericTopNMetricSpec("count"), 1, new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2011/2012"))), null, Granularities.ALL, ImmutableList.of(new CountAggregatorFactory("count")), null, null);
    expectedException.expect(UnsupportedOperationException.class);
    expectedException.expectMessage("Map column doesn't support getRow()");
    runner.run(QueryPlus.wrap(query)).toList();
}
Also used : TableDataSource(org.apache.druid.query.TableDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TopNQuery(org.apache.druid.query.topn.TopNQuery) NumericTopNMetricSpec(org.apache.druid.query.topn.NumericTopNMetricSpec) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 15 with TopNQuery

use of org.apache.druid.query.topn.TopNQuery in project druid by druid-io.

the class MapVirtualColumnTopNTest method testWithSubColumn.

@Test
public void testWithSubColumn() {
    final TopNQuery query = new TopNQuery(new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), VirtualColumns.create(ImmutableList.of(new MapVirtualColumn("keys", "values", "params"))), // params.key3 is string
    new DefaultDimensionSpec("params.key3", "params.key3"), new NumericTopNMetricSpec("count"), 2, new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2011/2012"))), null, Granularities.ALL, ImmutableList.of(new CountAggregatorFactory("count")), null, null);
    final List<Result<TopNResultValue>> result = runner.run(QueryPlus.wrap(query)).toList();
    final List<Result<TopNResultValue>> expected = Collections.singletonList(new Result<>(DateTimes.of("2011-01-12T00:00:00.000Z"), new TopNResultValue(ImmutableList.of(new DimensionAndMetricValueExtractor(MapVirtualColumnTestBase.mapOf("count", 2L, "params.key3", null)), new DimensionAndMetricValueExtractor(MapVirtualColumnTestBase.mapOf("count", 1L, "params.key3", "value3"))))));
    Assert.assertEquals(expected, result);
}
Also used : TopNResultValue(org.apache.druid.query.topn.TopNResultValue) TableDataSource(org.apache.druid.query.TableDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TopNQuery(org.apache.druid.query.topn.TopNQuery) NumericTopNMetricSpec(org.apache.druid.query.topn.NumericTopNMetricSpec) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionAndMetricValueExtractor(org.apache.druid.query.topn.DimensionAndMetricValueExtractor) Result(org.apache.druid.query.Result) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

TopNQuery (org.apache.druid.query.topn.TopNQuery)38 Test (org.junit.Test)28 TopNQueryBuilder (org.apache.druid.query.topn.TopNQueryBuilder)25 Result (org.apache.druid.query.Result)10 TopNResultValue (org.apache.druid.query.topn.TopNResultValue)10 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)9 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)9 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)8 QueryRunner (org.apache.druid.query.QueryRunner)6 DoubleMaxAggregatorFactory (org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory)6 DoubleMinAggregatorFactory (org.apache.druid.query.aggregation.DoubleMinAggregatorFactory)6 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)6 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)6 TopNQueryConfig (org.apache.druid.query.topn.TopNQueryConfig)6 TopNQueryQueryToolChest (org.apache.druid.query.topn.TopNQueryQueryToolChest)6 HashMap (java.util.HashMap)5 Map (java.util.Map)5 FinalizeResultsQueryRunner (org.apache.druid.query.FinalizeResultsQueryRunner)5 TableDataSource (org.apache.druid.query.TableDataSource)5 ImmutableMap (com.google.common.collect.ImmutableMap)4