Search in sources :

Example 41 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class VarianceSqlAggregatorTest method testVarPop.

@Test
public void testVarPop() throws Exception {
    VarianceAggregatorCollector holder1 = new VarianceAggregatorCollector();
    VarianceAggregatorCollector holder2 = new VarianceAggregatorCollector();
    VarianceAggregatorCollector holder3 = new VarianceAggregatorCollector();
    for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
        Object raw1 = row.getRaw("d1");
        Object raw2 = row.getRaw("f1");
        Object raw3 = row.getRaw("l1");
        addToHolder(holder1, raw1);
        addToHolder(holder2, raw2);
        addToHolder(holder3, raw3);
    }
    final List<Object[]> expectedResults = ImmutableList.of(new Object[] { holder1.getVariance(true), holder2.getVariance(true).floatValue(), holder3.getVariance(true).longValue() });
    testQuery("SELECT\n" + "VAR_POP(d1),\n" + "VAR_POP(f1),\n" + "VAR_POP(l1)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new VarianceAggregatorFactory("a0:agg", "d1", "population", "double"), new VarianceAggregatorFactory("a1:agg", "f1", "population", "float"), new VarianceAggregatorFactory("a2:agg", "l1", "population", "long"))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : VarianceAggregatorCollector(org.apache.druid.query.aggregation.variance.VarianceAggregatorCollector) InputRow(org.apache.druid.data.input.InputRow) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) VarianceAggregatorFactory(org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 42 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class SingleTaskBackgroundRunnerTest method testGetQueryRunner.

@Test
public void testGetQueryRunner() throws ExecutionException, InterruptedException {
    runner.run(new NoopTask(null, null, "foo", 500L, 0, null, null, null)).get().getStatusCode();
    final QueryRunner<ScanResultValue> queryRunner = Druids.newScanQueryBuilder().dataSource("foo").intervals(new MultipleIntervalSegmentSpec(Intervals.ONLY_ETERNITY)).build().getRunner(runner);
    Assert.assertThat(queryRunner, CoreMatchers.instanceOf(SetAndVerifyContextQueryRunner.class));
}
Also used : SetAndVerifyContextQueryRunner(org.apache.druid.server.SetAndVerifyContextQueryRunner) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) NoopTask(org.apache.druid.indexing.common.task.NoopTask) Test(org.junit.Test)

Example 43 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class GroupByStrategyV1 method processSubqueryResult.

@Override
public Sequence<ResultRow> processSubqueryResult(GroupByQuery subquery, GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> subqueryResult, boolean wasQueryPushedDown) {
    final Set<AggregatorFactory> aggs = new HashSet<>();
    // Nested group-bys work by first running the inner query and then materializing the results in an incremental
    // index which the outer query is then run against. To build the incremental index, we use the fieldNames from
    // the aggregators for the outer query to define the column names so that the index will match the query. If
    // there are multiple types of aggregators in the outer query referencing the same fieldName, we will try to build
    // multiple columns of the same name using different aggregator types and will fail. Here, we permit multiple
    // aggregators of the same type referencing the same fieldName (and skip creating identical columns for the
    // subsequent ones) and return an error if the aggregator types are different.
    final Set<String> dimensionNames = new HashSet<>();
    for (DimensionSpec dimension : subquery.getDimensions()) {
        dimensionNames.add(dimension.getOutputName());
    }
    for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
        for (final AggregatorFactory transferAgg : aggregatorFactory.getRequiredColumns()) {
            if (dimensionNames.contains(transferAgg.getName())) {
                // doesn't have this problem.
                continue;
            }
            if (Iterables.any(aggs, new Predicate<AggregatorFactory>() {

                @Override
                public boolean apply(AggregatorFactory agg) {
                    return agg.getName().equals(transferAgg.getName()) && !agg.equals(transferAgg);
                }
            })) {
                throw new IAE("Inner aggregator can currently only be referenced by a single type of outer aggregator" + " for '%s'", transferAgg.getName());
            }
            aggs.add(transferAgg);
        }
    }
    // We need the inner incremental index to have all the columns required by the outer query
    final GroupByQuery innerQuery = new GroupByQuery.Builder(subquery).setAggregatorSpecs(ImmutableList.copyOf(aggs)).setInterval(subquery.getIntervals()).setPostAggregatorSpecs(new ArrayList<>()).build();
    final GroupByQuery outerQuery = new GroupByQuery.Builder(query).setLimitSpec(query.getLimitSpec().merge(subquery.getLimitSpec())).build();
    final IncrementalIndex innerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(innerQuery.withOverriddenContext(ImmutableMap.of(GroupByQueryHelper.CTX_KEY_SORT_RESULTS, true)), subquery, configSupplier.get(), subqueryResult);
    // Outer query might have multiple intervals, but they are expected to be non-overlapping and sorted which
    // is ensured by QuerySegmentSpec.
    // GroupByQueryEngine can only process one interval at a time, so we need to call it once per interval
    // and concatenate the results.
    final IncrementalIndex outerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(outerQuery, null, configSupplier.get(), Sequences.concat(Sequences.map(Sequences.simple(outerQuery.getIntervals()), new Function<Interval, Sequence<ResultRow>>() {

        @Override
        public Sequence<ResultRow> apply(Interval interval) {
            return process(outerQuery.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(interval))), new IncrementalIndexStorageAdapter(innerQueryResultIndex));
        }
    })));
    innerQueryResultIndex.close();
    return Sequences.withBaggage(outerQuery.postProcess(GroupByQueryHelper.postAggregate(query, outerQueryResultIndex)), outerQueryResultIndex);
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) ArrayList(java.util.ArrayList) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) Sequence(org.apache.druid.java.util.common.guava.Sequence) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) IAE(org.apache.druid.java.util.common.IAE) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) HashSet(java.util.HashSet) Interval(org.joda.time.Interval)

Example 44 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class GroupByStrategyV2 method processSubqueryResult.

@Override
public Sequence<ResultRow> processSubqueryResult(GroupByQuery subquery, GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> subqueryResult, boolean wasQueryPushedDown) {
    // Keep a reference to resultSupplier outside the "try" so we can close it if something goes wrong
    // while creating the sequence.
    GroupByRowProcessor.ResultSupplier resultSupplier = null;
    try {
        final GroupByQuery queryToRun;
        if (wasQueryPushedDown) {
            // If the query was pushed down, filters would have been applied downstream, so skip it here.
            queryToRun = query.withDimFilter(null).withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Intervals.ONLY_ETERNITY));
        } else {
            queryToRun = query;
        }
        resultSupplier = GroupByRowProcessor.process(queryToRun, wasQueryPushedDown ? queryToRun : subquery, subqueryResult, configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
        final GroupByRowProcessor.ResultSupplier finalResultSupplier = resultSupplier;
        return Sequences.withBaggage(mergeResults((queryPlus, responseContext) -> finalResultSupplier.results(null), query, null), finalResultSupplier);
    } catch (Throwable e) {
        throw CloseableUtils.closeAndWrapInCatch(e, resultSupplier);
    }
}
Also used : QueryPlus(org.apache.druid.query.QueryPlus) GroupByQueryEngineV2(org.apache.druid.query.groupby.epinephelinae.GroupByQueryEngineV2) Inject(com.google.inject.Inject) Smile(org.apache.druid.guice.annotations.Smile) Merging(org.apache.druid.guice.annotations.Merging) QueryProcessingPool(org.apache.druid.query.QueryProcessingPool) ResultMergeQueryRunner(org.apache.druid.query.ResultMergeQueryRunner) StorageAdapter(org.apache.druid.segment.StorageAdapter) ByteBuffer(java.nio.ByteBuffer) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) GroupByBinaryFnV2(org.apache.druid.query.groupby.epinephelinae.GroupByBinaryFnV2) QueryWatcher(org.apache.druid.query.QueryWatcher) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) Sequence(org.apache.druid.java.util.common.guava.Sequence) LazySequence(org.apache.druid.java.util.common.guava.LazySequence) GroupByMergingQueryRunnerV2(org.apache.druid.query.groupby.epinephelinae.GroupByMergingQueryRunnerV2) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) ResultRow(org.apache.druid.query.groupby.ResultRow) DataSource(org.apache.druid.query.DataSource) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) Collectors(java.util.stream.Collectors) QueryContexts(org.apache.druid.query.QueryContexts) BinaryOperator(java.util.function.BinaryOperator) BlockingPool(org.apache.druid.collections.BlockingPool) QueryDataSource(org.apache.druid.query.QueryDataSource) List(java.util.List) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupByRowProcessor(org.apache.druid.query.groupby.epinephelinae.GroupByRowProcessor) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) NonBlockingPool(org.apache.druid.collections.NonBlockingPool) Intervals(org.apache.druid.java.util.common.Intervals) Supplier(com.google.common.base.Supplier) GroupByQueryResource(org.apache.druid.query.groupby.resource.GroupByQueryResource) Utils(org.apache.druid.java.util.common.collect.Utils) ArrayList(java.util.ArrayList) QueryCapacityExceededException(org.apache.druid.query.QueryCapacityExceededException) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) Suppliers(com.google.common.base.Suppliers) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) Sequences(org.apache.druid.java.util.common.guava.Sequences) VirtualColumns(org.apache.druid.segment.VirtualColumns) ResponseContext(org.apache.druid.query.context.ResponseContext) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) Global(org.apache.druid.guice.annotations.Global) LimitSpec(org.apache.druid.query.groupby.orderby.LimitSpec) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) CloseableUtils(org.apache.druid.utils.CloseableUtils) ReferenceCountingResourceHolder(org.apache.druid.collections.ReferenceCountingResourceHolder) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) GroupByRowProcessor(org.apache.druid.query.groupby.epinephelinae.GroupByRowProcessor) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec)

Example 45 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class DataSourceOptimizer method optimize.

/**
 * Do main work about materialized view selection: transform user query to one or more sub-queries.
 *
 * In the sub-query, the dataSource is the derivative of dataSource in user query, and sum of all sub-queries'
 * intervals equals the interval in user query
 *
 * Derived dataSource with smallest average data size per segment granularity have highest priority to replace the
 * datasource in user query
 *
 * @param query only TopNQuery/TimeseriesQuery/GroupByQuery can be optimized
 * @return a list of queries with specified derived dataSources and intervals
 */
public List<Query> optimize(Query query) {
    long start = System.currentTimeMillis();
    // only TableDataSource can be optimiezed
    if (!(query instanceof TopNQuery || query instanceof TimeseriesQuery || query instanceof GroupByQuery) || !(query.getDataSource() instanceof TableDataSource)) {
        return Collections.singletonList(query);
    }
    String datasourceName = ((TableDataSource) query.getDataSource()).getName();
    // get all derivatives for datasource in query. The derivatives set is sorted by average size of
    // per segment granularity.
    Set<DerivativeDataSource> derivatives = DerivativeDataSourceManager.getDerivatives(datasourceName);
    if (derivatives.isEmpty()) {
        return Collections.singletonList(query);
    }
    lock.readLock().lock();
    try {
        totalCount.computeIfAbsent(datasourceName, dsName -> new AtomicLong(0)).incrementAndGet();
        hitCount.putIfAbsent(datasourceName, new AtomicLong(0));
        AtomicLong costTimeOfDataSource = costTime.computeIfAbsent(datasourceName, dsName -> new AtomicLong(0));
        // get all fields which the query required
        Set<String> requiredFields = MaterializedViewUtils.getRequiredFields(query);
        Set<DerivativeDataSource> derivativesWithRequiredFields = new HashSet<>();
        for (DerivativeDataSource derivativeDataSource : derivatives) {
            derivativesHitCount.putIfAbsent(derivativeDataSource.getName(), new AtomicLong(0));
            if (derivativeDataSource.getColumns().containsAll(requiredFields)) {
                derivativesWithRequiredFields.add(derivativeDataSource);
            }
        }
        // if no derivatives contains all required dimensions, this materialized view selection failed.
        if (derivativesWithRequiredFields.isEmpty()) {
            missFields.computeIfAbsent(datasourceName, dsName -> new ConcurrentHashMap<>()).computeIfAbsent(requiredFields, rf -> new AtomicLong(0)).incrementAndGet();
            costTimeOfDataSource.addAndGet(System.currentTimeMillis() - start);
            return Collections.singletonList(query);
        }
        List<Query> queries = new ArrayList<>();
        List<Interval> remainingQueryIntervals = (List<Interval>) query.getIntervals();
        for (DerivativeDataSource derivativeDataSource : ImmutableSortedSet.copyOf(derivativesWithRequiredFields)) {
            final List<Interval> derivativeIntervals = remainingQueryIntervals.stream().flatMap(interval -> serverView.getTimeline(DataSourceAnalysis.forDataSource(new TableDataSource(derivativeDataSource.getName()))).orElseThrow(() -> new ISE("No timeline for dataSource: %s", derivativeDataSource.getName())).lookup(interval).stream().map(TimelineObjectHolder::getInterval)).collect(Collectors.toList());
            // not be selected.
            if (derivativeIntervals.isEmpty()) {
                continue;
            }
            remainingQueryIntervals = MaterializedViewUtils.minus(remainingQueryIntervals, derivativeIntervals);
            queries.add(query.withDataSource(new TableDataSource(derivativeDataSource.getName())).withQuerySegmentSpec(new MultipleIntervalSegmentSpec(derivativeIntervals)));
            derivativesHitCount.get(derivativeDataSource.getName()).incrementAndGet();
            if (remainingQueryIntervals.isEmpty()) {
                break;
            }
        }
        if (queries.isEmpty()) {
            costTime.get(datasourceName).addAndGet(System.currentTimeMillis() - start);
            return Collections.singletonList(query);
        }
        // the original datasource.
        if (!remainingQueryIntervals.isEmpty()) {
            queries.add(query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(remainingQueryIntervals)));
        }
        hitCount.get(datasourceName).incrementAndGet();
        costTime.get(datasourceName).addAndGet(System.currentTimeMillis() - start);
        return queries;
    } finally {
        lock.readLock().unlock();
    }
}
Also used : DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis) Inject(com.google.inject.Inject) HashMap(java.util.HashMap) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) TopNQuery(org.apache.druid.query.topn.TopNQuery) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) Query(org.apache.druid.query.Query) Map(java.util.Map) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) TimelineServerView(org.apache.druid.client.TimelineServerView) ReadWriteLock(java.util.concurrent.locks.ReadWriteLock) ImmutableSortedSet(com.google.common.collect.ImmutableSortedSet) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) TableDataSource(org.apache.druid.query.TableDataSource) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) Collections(java.util.Collections) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) TopNQuery(org.apache.druid.query.topn.TopNQuery) Query(org.apache.druid.query.Query) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) ArrayList(java.util.ArrayList) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) AtomicLong(java.util.concurrent.atomic.AtomicLong) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) TableDataSource(org.apache.druid.query.TableDataSource) TopNQuery(org.apache.druid.query.topn.TopNQuery) ArrayList(java.util.ArrayList) List(java.util.List) ISE(org.apache.druid.java.util.common.ISE) HashSet(java.util.HashSet) Interval(org.joda.time.Interval)

Aggregations

MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)142 Test (org.junit.Test)115 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)53 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)44 BaseCalciteQueryTest (org.apache.druid.sql.calcite.BaseCalciteQueryTest)39 QuerySegmentSpec (org.apache.druid.query.spec.QuerySegmentSpec)28 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)25 ResponseContext (org.apache.druid.query.context.ResponseContext)22 QueryRunner (org.apache.druid.query.QueryRunner)21 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)21 QueryPlus (org.apache.druid.query.QueryPlus)20 TableDataSource (org.apache.druid.query.TableDataSource)19 FinalizeResultsQueryRunner (org.apache.druid.query.FinalizeResultsQueryRunner)18 ArrayList (java.util.ArrayList)17 QueryDataSource (org.apache.druid.query.QueryDataSource)15 DefaultLimitSpec (org.apache.druid.query.groupby.orderby.DefaultLimitSpec)15 OrderByColumnSpec (org.apache.druid.query.groupby.orderby.OrderByColumnSpec)15 Sequence (org.apache.druid.java.util.common.guava.Sequence)14 FieldAccessPostAggregator (org.apache.druid.query.aggregation.post.FieldAccessPostAggregator)14 Result (org.apache.druid.query.Result)13