use of io.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class GroupByStrategyV1 method processSubqueryResult.
@Override
public Sequence<Row> processSubqueryResult(GroupByQuery subquery, GroupByQuery query, GroupByQueryResource resource, Sequence<Row> subqueryResult) {
final Set<AggregatorFactory> aggs = Sets.newHashSet();
// Nested group-bys work by first running the inner query and then materializing the results in an incremental
// index which the outer query is then run against. To build the incremental index, we use the fieldNames from
// the aggregators for the outer query to define the column names so that the index will match the query. If
// there are multiple types of aggregators in the outer query referencing the same fieldName, we will try to build
// multiple columns of the same name using different aggregator types and will fail. Here, we permit multiple
// aggregators of the same type referencing the same fieldName (and skip creating identical columns for the
// subsequent ones) and return an error if the aggregator types are different.
final Set<String> dimensionNames = Sets.newHashSet();
for (DimensionSpec dimension : subquery.getDimensions()) {
dimensionNames.add(dimension.getOutputName());
}
for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
for (final AggregatorFactory transferAgg : aggregatorFactory.getRequiredColumns()) {
if (dimensionNames.contains(transferAgg.getName())) {
// doesn't have this problem.
continue;
}
if (Iterables.any(aggs, new Predicate<AggregatorFactory>() {
@Override
public boolean apply(AggregatorFactory agg) {
return agg.getName().equals(transferAgg.getName()) && !agg.equals(transferAgg);
}
})) {
throw new IAE("Inner aggregator can currently only be referenced by a single type of outer aggregator" + " for '%s'", transferAgg.getName());
}
aggs.add(transferAgg);
}
}
// We need the inner incremental index to have all the columns required by the outer query
final GroupByQuery innerQuery = new GroupByQuery.Builder(subquery).setAggregatorSpecs(Lists.newArrayList(aggs)).setInterval(subquery.getIntervals()).setPostAggregatorSpecs(Lists.<PostAggregator>newArrayList()).build();
final GroupByQuery outerQuery = new GroupByQuery.Builder(query).setLimitSpec(query.getLimitSpec().merge(subquery.getLimitSpec())).build();
final IncrementalIndex innerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(innerQuery.withOverriddenContext(ImmutableMap.<String, Object>of(GroupByQueryHelper.CTX_KEY_SORT_RESULTS, true)), configSupplier.get(), bufferPool, subqueryResult, false);
//Outer query might have multiple intervals, but they are expected to be non-overlapping and sorted which
//is ensured by QuerySegmentSpec.
//GroupByQueryEngine can only process one interval at a time, so we need to call it once per interval
//and concatenate the results.
final IncrementalIndex outerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(outerQuery, configSupplier.get(), bufferPool, Sequences.concat(Sequences.map(Sequences.simple(outerQuery.getIntervals()), new Function<Interval, Sequence<Row>>() {
@Override
public Sequence<Row> apply(Interval interval) {
return process(outerQuery.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(interval))), new IncrementalIndexStorageAdapter(innerQueryResultIndex));
}
})), true);
innerQueryResultIndex.close();
return Sequences.withBaggage(outerQuery.applyLimit(GroupByQueryHelper.postAggregate(query, outerQueryResultIndex)), outerQueryResultIndex);
}
use of io.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class TimewarpOperator method postProcess.
public QueryRunner<T> postProcess(final QueryRunner<T> baseRunner, final long now) {
return new QueryRunner<T>() {
@Override
public Sequence<T> run(final Query<T> query, final Map<String, Object> responseContext) {
final long offset = computeOffset(now);
final Interval interval = query.getIntervals().get(0);
final Interval modifiedInterval = new Interval(Math.min(interval.getStartMillis() + offset, now + offset), Math.min(interval.getEndMillis() + offset, now + offset));
return Sequences.map(baseRunner.run(query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Arrays.asList(modifiedInterval))), responseContext), new Function<T, T>() {
@Override
public T apply(T input) {
if (input instanceof Result) {
Result res = (Result) input;
Object value = res.getValue();
if (value instanceof TimeBoundaryResultValue) {
TimeBoundaryResultValue boundary = (TimeBoundaryResultValue) value;
DateTime minTime = null;
try {
minTime = boundary.getMinTime();
} catch (IllegalArgumentException e) {
}
final DateTime maxTime = boundary.getMaxTime();
return (T) ((TimeBoundaryQuery) query).buildResult(new DateTime(Math.min(res.getTimestamp().getMillis() - offset, now)), minTime != null ? minTime.minus(offset) : null, maxTime != null ? new DateTime(Math.min(maxTime.getMillis() - offset, now)) : null).iterator().next();
}
return (T) new Result(res.getTimestamp().minus(offset), value);
} else if (input instanceof MapBasedRow) {
MapBasedRow row = (MapBasedRow) input;
return (T) new MapBasedRow(row.getTimestamp().minus(offset), row.getEvent());
}
// default to noop for unknown result types
return input;
}
});
}
};
}
use of io.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class GroupByQueryRunnerTest method testSubqueryWithMultipleIntervalsInOuterQuery.
@Test
public void testSubqueryWithMultipleIntervalsInOuterQuery() {
GroupByQuery subquery = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setDimFilter(new JavaScriptDimFilter("quality", "function(dim){ return true; }", null, JavaScriptConfig.getEnabledInstance())).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"), new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
GroupByQuery query = GroupByQuery.builder().setDataSource(subquery).setQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(new Interval("2011-04-01T00:00:00.000Z/2011-04-01T23:58:00.000Z"), new Interval("2011-04-02T00:00:00.000Z/2011-04-03T00:00:00.000Z")))).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("alias", "alias"))).setAggregatorSpecs(Arrays.<AggregatorFactory>asList(new LongSumAggregatorFactory("rows", "rows"), new LongSumAggregatorFactory("idx", "idx"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L));
// Subqueries are handled by the ToolChest
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
use of io.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class GroupByQueryRunnerTest method testSubqueryWithExtractionFnInOuterQuery.
@Test
public void testSubqueryWithExtractionFnInOuterQuery() {
//https://github.com/druid-io/druid/issues/2556
GroupByQuery subquery = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setDimFilter(new JavaScriptDimFilter("quality", "function(dim){ return true; }", null, JavaScriptConfig.getEnabledInstance())).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"), new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
GroupByQuery query = GroupByQuery.builder().setDataSource(subquery).setQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(new Interval("2011-04-01T00:00:00.000Z/2011-04-03T00:00:00.000Z")))).setDimensions(Lists.<DimensionSpec>newArrayList(new ExtractionDimensionSpec("alias", "alias", new RegexDimExtractionFn("(a).*", true, "a")))).setAggregatorSpecs(Arrays.<AggregatorFactory>asList(new LongSumAggregatorFactory("rows", "rows"), new LongSumAggregatorFactory("idx", "idx"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "a", "rows", 13L, "idx", 6619L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "a", "rows", 13L, "idx", 5827L));
// Subqueries are handled by the ToolChest
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
use of io.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class SearchQueryQueryToolChestTest method testCacheStrategy.
@Test
public void testCacheStrategy() throws Exception {
CacheStrategy<Result<SearchResultValue>, Object, SearchQuery> strategy = new SearchQueryQueryToolChest(null, null).getCacheStrategy(new SearchQuery(new TableDataSource("dummy"), null, Granularities.ALL, 1, new MultipleIntervalSegmentSpec(ImmutableList.of(new Interval("2015-01-01/2015-01-02"))), ImmutableList.of(Druids.DIMENSION_IDENTITY.apply("dim1")), new FragmentSearchQuerySpec(ImmutableList.of("a", "b")), null, null));
final Result<SearchResultValue> result = new Result<>(new DateTime(123L), new SearchResultValue(ImmutableList.of(new SearchHit("dim1", "a"))));
Object preparedValue = strategy.prepareForCache().apply(result);
ObjectMapper objectMapper = new DefaultObjectMapper();
Object fromCacheValue = objectMapper.readValue(objectMapper.writeValueAsBytes(preparedValue), strategy.getCacheObjectClazz());
Result<SearchResultValue> fromCacheResult = strategy.pullFromCache().apply(fromCacheValue);
Assert.assertEquals(result, fromCacheResult);
}
Aggregations