use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class TimeseriesQueryRunnerTest method testTimeseriesWithVaryingGran.
@Test
public void testTimeseriesWithVaryingGran() {
TimeseriesQuery query1 = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(new PeriodGranularity(new Period("P1M"), null, null)).intervals(Arrays.asList(new Interval("2011-04-02T00:00:00.000Z/2011-04-03T00:00:00.000Z"))).aggregators(Arrays.<AggregatorFactory>asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"), QueryRunnerTestHelper.qualityUniques)).descending(descending).build();
List<Result<TimeseriesResultValue>> expectedResults1 = Arrays.asList(new Result<>(new DateTime("2011-04-01"), new TimeseriesResultValue(ImmutableMap.<String, Object>of("rows", 13L, "idx", 5827L, "uniques", QueryRunnerTestHelper.UNIQUES_9))));
Iterable<Result<TimeseriesResultValue>> results1 = Sequences.toList(runner.run(query1, CONTEXT), Lists.<Result<TimeseriesResultValue>>newArrayList());
assertExpectedResults(expectedResults1, results1);
TimeseriesQuery query2 = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity("DAY").intervals(Arrays.asList(new Interval("2011-04-02T00:00:00.000Z/2011-04-03T00:00:00.000Z"))).aggregators(Arrays.<AggregatorFactory>asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"), QueryRunnerTestHelper.qualityUniques)).build();
List<Result<TimeseriesResultValue>> expectedResults2 = Arrays.asList(new Result<>(new DateTime("2011-04-02"), new TimeseriesResultValue(ImmutableMap.<String, Object>of("rows", 13L, "idx", 5827L, "uniques", QueryRunnerTestHelper.UNIQUES_9))));
Iterable<Result<TimeseriesResultValue>> results2 = Sequences.toList(runner.run(query2, CONTEXT), Lists.<Result<TimeseriesResultValue>>newArrayList());
assertExpectedResults(expectedResults2, results2);
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class Aggregation method filter.
public Aggregation filter(final DimFilter filter) {
if (filter == null) {
return this;
}
if (postAggregator != null) {
// Verify that this Aggregation contains all inputs. If not, this "filter" call won't work right.
final Set<String> dependentFields = postAggregator.getDependentFields();
final Set<String> aggregatorNames = Sets.newHashSet();
for (AggregatorFactory aggregatorFactory : aggregatorFactories) {
aggregatorNames.add(aggregatorFactory.getName());
}
for (String field : dependentFields) {
if (!aggregatorNames.contains(field)) {
throw new ISE("Cannot filter an Aggregation that does not contain its inputs: %s", this);
}
}
}
final List<AggregatorFactory> newAggregators = Lists.newArrayList();
for (AggregatorFactory agg : aggregatorFactories) {
newAggregators.add(new FilteredAggregatorFactory(agg, filter));
}
return new Aggregation(newAggregators, postAggregator, finalizingPostAggregatorFactory);
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class DruidSchemaTest method setUp.
@Before
public void setUp() throws Exception {
Calcites.setSystemProperties();
final File tmpDir = temporaryFolder.newFolder();
final QueryableIndex index1 = IndexBuilder.create().tmpDir(new File(tmpDir, "1")).indexMerger(TestHelper.getTestIndexMergerV9()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1"), new HyperUniquesAggregatorFactory("unique_dim1", "dim1") }).withRollup(false).build()).rows(ROWS1).buildMMappedIndex();
final QueryableIndex index2 = IndexBuilder.create().tmpDir(new File(tmpDir, "2")).indexMerger(TestHelper.getTestIndexMergerV9()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new LongSumAggregatorFactory("m1", "m1") }).withRollup(false).build()).rows(ROWS2).buildMMappedIndex();
walker = new SpecificSegmentsQuerySegmentWalker(CalciteTests.queryRunnerFactoryConglomerate()).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(new Interval("2000/P1Y")).version("1").shardSpec(new LinearShardSpec(0)).build(), index1).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(new Interval("2001/P1Y")).version("1").shardSpec(new LinearShardSpec(0)).build(), index2).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE2).interval(index2.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).build(), index2);
schema = new DruidSchema(walker, new TestServerInventoryView(walker.getSegments()), PLANNER_CONFIG_DEFAULT);
schema.start();
schema.awaitInitialization();
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class GroupByMergingQueryRunnerV2 method run.
@Override
public Sequence<Row> run(final Query queryParam, final Map responseContext) {
final GroupByQuery query = (GroupByQuery) queryParam;
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
// CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION is here because realtime servers use nested mergeRunners calls
// (one for the entire query and one for each sink). We only want the outer call to actually do merging with a
// merge buffer, otherwise the query will allocate too many merge buffers. This is potentially sub-optimal as it
// will involve materializing the results for each sink before starting to feed them into the outer merge buffer.
// I'm not sure of a better way to do this without tweaking how realtime servers do queries.
final boolean forceChainedExecution = query.getContextBoolean(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, false);
final GroupByQuery queryForRunners = query.withOverriddenContext(ImmutableMap.<String, Object>of(CTX_KEY_MERGE_RUNNERS_USING_CHAINED_EXECUTION, true));
if (BaseQuery.getContextBySegment(query, false) || forceChainedExecution) {
return new ChainedExecutionQueryRunner(exec, queryWatcher, queryables).run(query, responseContext);
}
final boolean isSingleThreaded = querySpecificConfig.isSingleThreaded();
final AggregatorFactory[] combiningAggregatorFactories = new AggregatorFactory[query.getAggregatorSpecs().size()];
for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
combiningAggregatorFactories[i] = query.getAggregatorSpecs().get(i).getCombiningFactory();
}
final File temporaryStorageDirectory = new File(processingTmpDir, String.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
final int priority = BaseQuery.getContextPriority(query, 0);
// Figure out timeoutAt time now, so we can apply the timeout to both the mergeBufferPool.take and the actual
// query processing together.
final Number queryTimeout = query.getContextValue(QueryContextKeys.TIMEOUT, null);
final long timeoutAt = queryTimeout == null ? JodaUtils.MAX_INSTANT : System.currentTimeMillis() + queryTimeout.longValue();
return new BaseSequence<>(new BaseSequence.IteratorMaker<Row, CloseableGrouperIterator<RowBasedKey, Row>>() {
@Override
public CloseableGrouperIterator<RowBasedKey, Row> make() {
final List<ReferenceCountingResourceHolder> resources = Lists.newArrayList();
try {
final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
final ReferenceCountingResourceHolder<LimitedTemporaryStorage> temporaryStorageHolder = ReferenceCountingResourceHolder.fromCloseable(temporaryStorage);
resources.add(temporaryStorageHolder);
final ReferenceCountingResourceHolder<ByteBuffer> mergeBufferHolder;
try {
// This will potentially block if there are no merge buffers left in the pool.
final long timeout = timeoutAt - System.currentTimeMillis();
if (timeout <= 0 || (mergeBufferHolder = mergeBufferPool.take(timeout)) == null) {
throw new TimeoutException();
}
resources.add(mergeBufferHolder);
} catch (Exception e) {
throw new QueryInterruptedException(e);
}
Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, false, null, config, Suppliers.ofInstance(mergeBufferHolder.get()), concurrencyHint, temporaryStorage, spillMapper, combiningAggregatorFactories);
final Grouper<RowBasedKey> grouper = pair.lhs;
final Accumulator<Grouper<RowBasedKey>, Row> accumulator = pair.rhs;
grouper.init();
final ReferenceCountingResourceHolder<Grouper<RowBasedKey>> grouperHolder = ReferenceCountingResourceHolder.fromCloseable(grouper);
resources.add(grouperHolder);
ListenableFuture<List<Boolean>> futures = Futures.allAsList(Lists.newArrayList(Iterables.transform(queryables, new Function<QueryRunner<Row>, ListenableFuture<Boolean>>() {
@Override
public ListenableFuture<Boolean> apply(final QueryRunner<Row> input) {
if (input == null) {
throw new ISE("Null queryRunner! Looks to be some segment unmapping action happening");
}
ListenableFuture<Boolean> future = exec.submit(new AbstractPrioritizedCallable<Boolean>(priority) {
@Override
public Boolean call() throws Exception {
try (Releaser bufferReleaser = mergeBufferHolder.increment();
Releaser grouperReleaser = grouperHolder.increment()) {
final Object retVal = input.run(queryForRunners, responseContext).accumulate(grouper, accumulator);
// Return true if OK, false if resources were exhausted.
return retVal == grouper;
} catch (QueryInterruptedException e) {
throw e;
} catch (Exception e) {
log.error(e, "Exception with one of the sequences!");
throw Throwables.propagate(e);
}
}
});
if (isSingleThreaded) {
waitForFutureCompletion(query, Futures.allAsList(ImmutableList.of(future)), timeoutAt - System.currentTimeMillis());
}
return future;
}
})));
if (!isSingleThreaded) {
waitForFutureCompletion(query, futures, timeoutAt - System.currentTimeMillis());
}
return RowBasedGrouperHelper.makeGrouperIterator(grouper, query, new Closeable() {
@Override
public void close() throws IOException {
for (Closeable closeable : Lists.reverse(resources)) {
CloseQuietly.close(closeable);
}
}
});
} catch (Throwable e) {
// Exception caught while setting up the iterator; release resources.
for (Closeable closeable : Lists.reverse(resources)) {
CloseQuietly.close(closeable);
}
throw e;
}
}
@Override
public void cleanup(CloseableGrouperIterator<RowBasedKey, Row> iterFromMake) {
iterFromMake.close();
}
});
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class DefaultLimitSpec method build.
@Override
public Function<Sequence<Row>, Sequence<Row>> build(List<DimensionSpec> dimensions, List<AggregatorFactory> aggs, List<PostAggregator> postAggs) {
// Can avoid re-sorting if the natural ordering is good enough.
boolean sortingNeeded = false;
if (dimensions.size() < columns.size()) {
sortingNeeded = true;
}
final Set<String> aggAndPostAggNames = Sets.newHashSet();
for (AggregatorFactory agg : aggs) {
aggAndPostAggNames.add(agg.getName());
}
for (PostAggregator postAgg : postAggs) {
aggAndPostAggNames.add(postAgg.getName());
}
if (!sortingNeeded) {
for (int i = 0; i < columns.size(); i++) {
final OrderByColumnSpec columnSpec = columns.get(i);
if (aggAndPostAggNames.contains(columnSpec.getDimension())) {
sortingNeeded = true;
break;
}
final ValueType columnType = getOrderByType(columnSpec, dimensions);
final StringComparator naturalComparator;
if (columnType == ValueType.STRING) {
naturalComparator = StringComparators.LEXICOGRAPHIC;
} else if (columnType == ValueType.LONG || columnType == ValueType.FLOAT) {
naturalComparator = StringComparators.NUMERIC;
} else {
sortingNeeded = true;
break;
}
if (columnSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING || !columnSpec.getDimensionComparator().equals(naturalComparator) || !columnSpec.getDimension().equals(dimensions.get(i).getOutputName())) {
sortingNeeded = true;
break;
}
}
}
if (!sortingNeeded) {
return limit == Integer.MAX_VALUE ? Functions.<Sequence<Row>>identity() : new LimitingFn(limit);
}
// Materialize the Comparator first for fast-fail error checking.
final Ordering<Row> ordering = makeComparator(dimensions, aggs, postAggs);
if (limit == Integer.MAX_VALUE) {
return new SortingFn(ordering);
} else {
return new TopNFunction(ordering, limit);
}
}
Aggregations