Search in sources :

Example 96 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class TimeseriesQueryRunnerTest method testTimeseriesWithVaryingGran.

@Test
public void testTimeseriesWithVaryingGran() {
    TimeseriesQuery query1 = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(new PeriodGranularity(new Period("P1M"), null, null)).intervals(Arrays.asList(new Interval("2011-04-02T00:00:00.000Z/2011-04-03T00:00:00.000Z"))).aggregators(Arrays.<AggregatorFactory>asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"), QueryRunnerTestHelper.qualityUniques)).descending(descending).build();
    List<Result<TimeseriesResultValue>> expectedResults1 = Arrays.asList(new Result<>(new DateTime("2011-04-01"), new TimeseriesResultValue(ImmutableMap.<String, Object>of("rows", 13L, "idx", 5827L, "uniques", QueryRunnerTestHelper.UNIQUES_9))));
    Iterable<Result<TimeseriesResultValue>> results1 = Sequences.toList(runner.run(query1, CONTEXT), Lists.<Result<TimeseriesResultValue>>newArrayList());
    assertExpectedResults(expectedResults1, results1);
    TimeseriesQuery query2 = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity("DAY").intervals(Arrays.asList(new Interval("2011-04-02T00:00:00.000Z/2011-04-03T00:00:00.000Z"))).aggregators(Arrays.<AggregatorFactory>asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"), QueryRunnerTestHelper.qualityUniques)).build();
    List<Result<TimeseriesResultValue>> expectedResults2 = Arrays.asList(new Result<>(new DateTime("2011-04-02"), new TimeseriesResultValue(ImmutableMap.<String, Object>of("rows", 13L, "idx", 5827L, "uniques", QueryRunnerTestHelper.UNIQUES_9))));
    Iterable<Result<TimeseriesResultValue>> results2 = Sequences.toList(runner.run(query2, CONTEXT), Lists.<Result<TimeseriesResultValue>>newArrayList());
    assertExpectedResults(expectedResults2, results2);
}
Also used : PeriodGranularity(io.druid.java.util.common.granularity.PeriodGranularity) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) Period(org.joda.time.Period) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) DoubleFirstAggregatorFactory(io.druid.query.aggregation.first.DoubleFirstAggregatorFactory) DoubleMaxAggregatorFactory(io.druid.query.aggregation.DoubleMaxAggregatorFactory) FilteredAggregatorFactory(io.druid.query.aggregation.FilteredAggregatorFactory) DoubleMinAggregatorFactory(io.druid.query.aggregation.DoubleMinAggregatorFactory) DoubleLastAggregatorFactory(io.druid.query.aggregation.last.DoubleLastAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) DateTime(org.joda.time.DateTime) Interval(org.joda.time.Interval) Result(io.druid.query.Result) Test(org.junit.Test)

Example 97 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class Aggregation method filter.

public Aggregation filter(final DimFilter filter) {
    if (filter == null) {
        return this;
    }
    if (postAggregator != null) {
        // Verify that this Aggregation contains all inputs. If not, this "filter" call won't work right.
        final Set<String> dependentFields = postAggregator.getDependentFields();
        final Set<String> aggregatorNames = Sets.newHashSet();
        for (AggregatorFactory aggregatorFactory : aggregatorFactories) {
            aggregatorNames.add(aggregatorFactory.getName());
        }
        for (String field : dependentFields) {
            if (!aggregatorNames.contains(field)) {
                throw new ISE("Cannot filter an Aggregation that does not contain its inputs: %s", this);
            }
        }
    }
    final List<AggregatorFactory> newAggregators = Lists.newArrayList();
    for (AggregatorFactory agg : aggregatorFactories) {
        newAggregators.add(new FilteredAggregatorFactory(agg, filter));
    }
    return new Aggregation(newAggregators, postAggregator, finalizingPostAggregatorFactory);
}
Also used : FilteredAggregatorFactory(io.druid.query.aggregation.FilteredAggregatorFactory) ISE(io.druid.java.util.common.ISE) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) FilteredAggregatorFactory(io.druid.query.aggregation.FilteredAggregatorFactory)

Example 98 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class DruidSchemaTest method setUp.

@Before
public void setUp() throws Exception {
    Calcites.setSystemProperties();
    final File tmpDir = temporaryFolder.newFolder();
    final QueryableIndex index1 = IndexBuilder.create().tmpDir(new File(tmpDir, "1")).indexMerger(TestHelper.getTestIndexMergerV9()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1"), new HyperUniquesAggregatorFactory("unique_dim1", "dim1") }).withRollup(false).build()).rows(ROWS1).buildMMappedIndex();
    final QueryableIndex index2 = IndexBuilder.create().tmpDir(new File(tmpDir, "2")).indexMerger(TestHelper.getTestIndexMergerV9()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new LongSumAggregatorFactory("m1", "m1") }).withRollup(false).build()).rows(ROWS2).buildMMappedIndex();
    walker = new SpecificSegmentsQuerySegmentWalker(CalciteTests.queryRunnerFactoryConglomerate()).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(new Interval("2000/P1Y")).version("1").shardSpec(new LinearShardSpec(0)).build(), index1).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(new Interval("2001/P1Y")).version("1").shardSpec(new LinearShardSpec(0)).build(), index2).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE2).interval(index2.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).build(), index2);
    schema = new DruidSchema(walker, new TestServerInventoryView(walker.getSegments()), PLANNER_CONFIG_DEFAULT);
    schema.start();
    schema.awaitInitialization();
}
Also used : DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) TestServerInventoryView(io.druid.sql.calcite.util.TestServerInventoryView) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) IndexBuilder(io.druid.segment.IndexBuilder) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) SpecificSegmentsQuerySegmentWalker(io.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker) QueryableIndex(io.druid.segment.QueryableIndex) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) File(java.io.File) Interval(org.joda.time.Interval) Before(org.junit.Before)

Example 99 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class GroupByMergingQueryRunnerV2 method run.

@Override
public Sequence<Row> run(final Query queryParam, final Map responseContext) {
    final GroupByQuery query = (GroupByQuery) queryParam;
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    // CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION is here because realtime servers use nested mergeRunners calls
    // (one for the entire query and one for each sink). We only want the outer call to actually do merging with a
    // merge buffer, otherwise the query will allocate too many merge buffers. This is potentially sub-optimal as it
    // will involve materializing the results for each sink before starting to feed them into the outer merge buffer.
    // I'm not sure of a better way to do this without tweaking how realtime servers do queries.
    final boolean forceChainedExecution = query.getContextBoolean(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, false);
    final GroupByQuery queryForRunners = query.withOverriddenContext(ImmutableMap.<String, Object>of(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, true));
    if (BaseQuery.getContextBySegment(query, false) || forceChainedExecution) {
        return new ChainedExecutionQueryRunner(exec, queryWatcher, queryables).run(query, responseContext);
    }
    final boolean isSingleThreaded = querySpecificConfig.isSingleThreaded();
    final AggregatorFactory[] combiningAggregatorFactories = new AggregatorFactory[query.getAggregatorSpecs().size()];
    for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
        combiningAggregatorFactories[i] = query.getAggregatorSpecs().get(i).getCombiningFactory();
    }
    final File temporaryStorageDirectory = new File(processingTmpDir, String.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
    final int priority = BaseQuery.getContextPriority(query, 0);
    // Figure out timeoutAt time now, so we can apply the timeout to both the mergeBufferPool.take and the actual
    // query processing together.
    final Number queryTimeout = query.getContextValue(QueryContextKeys.TIMEOUT, null);
    final long timeoutAt = queryTimeout == null ? JodaUtils.MAX_INSTANT : System.currentTimeMillis() + queryTimeout.longValue();
    return new BaseSequence<>(new BaseSequence.IteratorMaker<Row, CloseableGrouperIterator<RowBasedKey, Row>>() {

        @Override
        public CloseableGrouperIterator<RowBasedKey, Row> make() {
            final List<ReferenceCountingResourceHolder> resources = Lists.newArrayList();
            try {
                final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
                final ReferenceCountingResourceHolder<LimitedTemporaryStorage> temporaryStorageHolder = ReferenceCountingResourceHolder.fromCloseable(temporaryStorage);
                resources.add(temporaryStorageHolder);
                final ReferenceCountingResourceHolder<ByteBuffer> mergeBufferHolder;
                try {
                    // This will potentially block if there are no merge buffers left in the pool.
                    final long timeout = timeoutAt - System.currentTimeMillis();
                    if (timeout <= 0 || (mergeBufferHolder = mergeBufferPool.take(timeout)) == null) {
                        throw new TimeoutException();
                    }
                    resources.add(mergeBufferHolder);
                } catch (Exception e) {
                    throw new QueryInterruptedException(e);
                }
                Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, false, null, config, Suppliers.ofInstance(mergeBufferHolder.get()), concurrencyHint, temporaryStorage, spillMapper, combiningAggregatorFactories);
                final Grouper<RowBasedKey> grouper = pair.lhs;
                final Accumulator<Grouper<RowBasedKey>, Row> accumulator = pair.rhs;
                grouper.init();
                final ReferenceCountingResourceHolder<Grouper<RowBasedKey>> grouperHolder = ReferenceCountingResourceHolder.fromCloseable(grouper);
                resources.add(grouperHolder);
                ListenableFuture<List<Boolean>> futures = Futures.allAsList(Lists.newArrayList(Iterables.transform(queryables, new Function<QueryRunner<Row>, ListenableFuture<Boolean>>() {

                    @Override
                    public ListenableFuture<Boolean> apply(final QueryRunner<Row> input) {
                        if (input == null) {
                            throw new ISE("Null queryRunner! Looks to be some segment unmapping action happening");
                        }
                        ListenableFuture<Boolean> future = exec.submit(new AbstractPrioritizedCallable<Boolean>(priority) {

                            @Override
                            public Boolean call() throws Exception {
                                try (Releaser bufferReleaser = mergeBufferHolder.increment();
                                    Releaser grouperReleaser = grouperHolder.increment()) {
                                    final Object retVal = input.run(queryForRunners, responseContext).accumulate(grouper, accumulator);
                                    // Return true if OK, false if resources were exhausted.
                                    return retVal == grouper;
                                } catch (QueryInterruptedException e) {
                                    throw e;
                                } catch (Exception e) {
                                    log.error(e, "Exception with one of the sequences!");
                                    throw Throwables.propagate(e);
                                }
                            }
                        });
                        if (isSingleThreaded) {
                            waitForFutureCompletion(query, Futures.allAsList(ImmutableList.of(future)), timeoutAt - System.currentTimeMillis());
                        }
                        return future;
                    }
                })));
                if (!isSingleThreaded) {
                    waitForFutureCompletion(query, futures, timeoutAt - System.currentTimeMillis());
                }
                return RowBasedGrouperHelper.makeGrouperIterator(grouper, query, new Closeable() {

                    @Override
                    public void close() throws IOException {
                        for (Closeable closeable : Lists.reverse(resources)) {
                            CloseQuietly.close(closeable);
                        }
                    }
                });
            } catch (Throwable e) {
                // Exception caught while setting up the iterator; release resources.
                for (Closeable closeable : Lists.reverse(resources)) {
                    CloseQuietly.close(closeable);
                }
                throw e;
            }
        }

        @Override
        public void cleanup(CloseableGrouperIterator<RowBasedKey, Row> iterFromMake) {
            iterFromMake.close();
        }
    });
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) Closeable(java.io.Closeable) ChainedExecutionQueryRunner(io.druid.query.ChainedExecutionQueryRunner) Function(com.google.common.base.Function) GroupByQuery(io.druid.query.groupby.GroupByQuery) Releaser(io.druid.collections.Releaser) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) ISE(io.druid.java.util.common.ISE) TimeoutException(java.util.concurrent.TimeoutException) QueryInterruptedException(io.druid.query.QueryInterruptedException) Pair(io.druid.java.util.common.Pair) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) RowBasedKey(io.druid.query.groupby.epinephelinae.RowBasedGrouperHelper.RowBasedKey) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) BaseSequence(io.druid.java.util.common.guava.BaseSequence) TimeoutException(java.util.concurrent.TimeoutException) CancellationException(java.util.concurrent.CancellationException) QueryInterruptedException(io.druid.query.QueryInterruptedException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ChainedExecutionQueryRunner(io.druid.query.ChainedExecutionQueryRunner) QueryRunner(io.druid.query.QueryRunner) ReferenceCountingResourceHolder(io.druid.collections.ReferenceCountingResourceHolder) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Row(io.druid.data.input.Row) File(java.io.File)

Example 100 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class DefaultLimitSpec method build.

@Override
public Function<Sequence<Row>, Sequence<Row>> build(List<DimensionSpec> dimensions, List<AggregatorFactory> aggs, List<PostAggregator> postAggs) {
    // Can avoid re-sorting if the natural ordering is good enough.
    boolean sortingNeeded = false;
    if (dimensions.size() < columns.size()) {
        sortingNeeded = true;
    }
    final Set<String> aggAndPostAggNames = Sets.newHashSet();
    for (AggregatorFactory agg : aggs) {
        aggAndPostAggNames.add(agg.getName());
    }
    for (PostAggregator postAgg : postAggs) {
        aggAndPostAggNames.add(postAgg.getName());
    }
    if (!sortingNeeded) {
        for (int i = 0; i < columns.size(); i++) {
            final OrderByColumnSpec columnSpec = columns.get(i);
            if (aggAndPostAggNames.contains(columnSpec.getDimension())) {
                sortingNeeded = true;
                break;
            }
            final ValueType columnType = getOrderByType(columnSpec, dimensions);
            final StringComparator naturalComparator;
            if (columnType == ValueType.STRING) {
                naturalComparator = StringComparators.LEXICOGRAPHIC;
            } else if (columnType == ValueType.LONG || columnType == ValueType.FLOAT) {
                naturalComparator = StringComparators.NUMERIC;
            } else {
                sortingNeeded = true;
                break;
            }
            if (columnSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING || !columnSpec.getDimensionComparator().equals(naturalComparator) || !columnSpec.getDimension().equals(dimensions.get(i).getOutputName())) {
                sortingNeeded = true;
                break;
            }
        }
    }
    if (!sortingNeeded) {
        return limit == Integer.MAX_VALUE ? Functions.<Sequence<Row>>identity() : new LimitingFn(limit);
    }
    // Materialize the Comparator first for fast-fail error checking.
    final Ordering<Row> ordering = makeComparator(dimensions, aggs, postAggs);
    if (limit == Integer.MAX_VALUE) {
        return new SortingFn(ordering);
    } else {
        return new TopNFunction(ordering, limit);
    }
}
Also used : PostAggregator(io.druid.query.aggregation.PostAggregator) ValueType(io.druid.segment.column.ValueType) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) StringComparator(io.druid.query.ordering.StringComparator) Row(io.druid.data.input.Row)

Aggregations

AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)148 Test (org.junit.Test)86 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)82 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)64 Interval (org.joda.time.Interval)45 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)38 DateTime (org.joda.time.DateTime)37 FilteredAggregatorFactory (io.druid.query.aggregation.FilteredAggregatorFactory)32 Result (io.druid.query.Result)31 DoubleMaxAggregatorFactory (io.druid.query.aggregation.DoubleMaxAggregatorFactory)27 HyperUniquesAggregatorFactory (io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)25 Row (io.druid.data.input.Row)24 PostAggregator (io.druid.query.aggregation.PostAggregator)24 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)22 CardinalityAggregatorFactory (io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory)19 LongMaxAggregatorFactory (io.druid.query.aggregation.LongMaxAggregatorFactory)18 LongFirstAggregatorFactory (io.druid.query.aggregation.first.LongFirstAggregatorFactory)18 LongLastAggregatorFactory (io.druid.query.aggregation.last.LongLastAggregatorFactory)18 DimensionSpec (io.druid.query.dimension.DimensionSpec)18 TimeseriesQuery (io.druid.query.timeseries.TimeseriesQuery)17