Search in sources :

Example 6 with GroupByQuery

use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class GroupByRowProcessor method process.

public static Sequence<Row> process(final Query queryParam, final Sequence<Row> rows, final Map<String, ValueType> rowSignature, final GroupByQueryConfig config, final GroupByQueryResource resource, final ObjectMapper spillMapper, final String processingTmpDir) {
    final GroupByQuery query = (GroupByQuery) queryParam;
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final AggregatorFactory[] aggregatorFactories = new AggregatorFactory[query.getAggregatorSpecs().size()];
    for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
        aggregatorFactories[i] = query.getAggregatorSpecs().get(i);
    }
    final File temporaryStorageDirectory = new File(processingTmpDir, String.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
    final List<Interval> queryIntervals = query.getIntervals();
    final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimFilter()));
    final SettableSupplier<Row> rowSupplier = new SettableSupplier<>();
    final RowBasedColumnSelectorFactory columnSelectorFactory = RowBasedColumnSelectorFactory.create(rowSupplier, rowSignature);
    final ValueMatcher filterMatcher = filter == null ? BooleanValueMatcher.of(true) : filter.makeMatcher(columnSelectorFactory);
    final FilteredSequence<Row> filteredSequence = new FilteredSequence<>(rows, new Predicate<Row>() {

        @Override
        public boolean apply(Row input) {
            boolean inInterval = false;
            DateTime rowTime = input.getTimestamp();
            for (Interval queryInterval : queryIntervals) {
                if (queryInterval.contains(rowTime)) {
                    inInterval = true;
                    break;
                }
            }
            if (!inInterval) {
                return false;
            }
            rowSupplier.set(input);
            return filterMatcher.matches();
        }
    });
    return new BaseSequence<>(new BaseSequence.IteratorMaker<Row, CloseableGrouperIterator<RowBasedKey, Row>>() {

        @Override
        public CloseableGrouperIterator<RowBasedKey, Row> make() {
            // This contains all closeable objects which are closed when the returned iterator iterates all the elements,
            // or an exceptions is thrown. The objects are closed in their reverse order.
            final List<Closeable> closeOnExit = Lists.newArrayList();
            try {
                final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
                closeOnExit.add(temporaryStorage);
                Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, true, rowSignature, querySpecificConfig, new Supplier<ByteBuffer>() {

                    @Override
                    public ByteBuffer get() {
                        final ResourceHolder<ByteBuffer> mergeBufferHolder = resource.getMergeBuffer();
                        closeOnExit.add(mergeBufferHolder);
                        return mergeBufferHolder.get();
                    }
                }, -1, temporaryStorage, spillMapper, aggregatorFactories);
                final Grouper<RowBasedKey> grouper = pair.lhs;
                final Accumulator<Grouper<RowBasedKey>, Row> accumulator = pair.rhs;
                closeOnExit.add(grouper);
                final Grouper<RowBasedKey> retVal = filteredSequence.accumulate(grouper, accumulator);
                if (retVal != grouper) {
                    throw GroupByQueryHelper.throwAccumulationResourceLimitExceededException();
                }
                return RowBasedGrouperHelper.makeGrouperIterator(grouper, query, new Closeable() {

                    @Override
                    public void close() throws IOException {
                        for (Closeable closeable : Lists.reverse(closeOnExit)) {
                            CloseQuietly.close(closeable);
                        }
                    }
                });
            } catch (Throwable e) {
                // Exception caught while setting up the iterator; release resources.
                for (Closeable closeable : Lists.reverse(closeOnExit)) {
                    CloseQuietly.close(closeable);
                }
                throw e;
            }
        }

        @Override
        public void cleanup(CloseableGrouperIterator<RowBasedKey, Row> iterFromMake) {
            iterFromMake.close();
        }
    });
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) Closeable(java.io.Closeable) RowBasedColumnSelectorFactory(io.druid.query.groupby.RowBasedColumnSelectorFactory) DateTime(org.joda.time.DateTime) GroupByQuery(io.druid.query.groupby.GroupByQuery) List(java.util.List) Supplier(com.google.common.base.Supplier) SettableSupplier(io.druid.common.guava.SettableSupplier) Pair(io.druid.java.util.common.Pair) BooleanValueMatcher(io.druid.segment.filter.BooleanValueMatcher) ValueMatcher(io.druid.query.filter.ValueMatcher) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) FilteredSequence(io.druid.java.util.common.guava.FilteredSequence) RowBasedKey(io.druid.query.groupby.epinephelinae.RowBasedGrouperHelper.RowBasedKey) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) ByteBuffer(java.nio.ByteBuffer) BaseSequence(io.druid.java.util.common.guava.BaseSequence) SettableSupplier(io.druid.common.guava.SettableSupplier) Filter(io.druid.query.filter.Filter) Row(io.druid.data.input.Row) File(java.io.File) Interval(org.joda.time.Interval)

Example 7 with GroupByQuery

use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class GroupByStrategyV1 method processSubqueryResult.

@Override
public Sequence<Row> processSubqueryResult(GroupByQuery subquery, GroupByQuery query, GroupByQueryResource resource, Sequence<Row> subqueryResult) {
    final Set<AggregatorFactory> aggs = Sets.newHashSet();
    // Nested group-bys work by first running the inner query and then materializing the results in an incremental
    // index which the outer query is then run against. To build the incremental index, we use the fieldNames from
    // the aggregators for the outer query to define the column names so that the index will match the query. If
    // there are multiple types of aggregators in the outer query referencing the same fieldName, we will try to build
    // multiple columns of the same name using different aggregator types and will fail. Here, we permit multiple
    // aggregators of the same type referencing the same fieldName (and skip creating identical columns for the
    // subsequent ones) and return an error if the aggregator types are different.
    final Set<String> dimensionNames = Sets.newHashSet();
    for (DimensionSpec dimension : subquery.getDimensions()) {
        dimensionNames.add(dimension.getOutputName());
    }
    for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
        for (final AggregatorFactory transferAgg : aggregatorFactory.getRequiredColumns()) {
            if (dimensionNames.contains(transferAgg.getName())) {
                // doesn't have this problem.
                continue;
            }
            if (Iterables.any(aggs, new Predicate<AggregatorFactory>() {

                @Override
                public boolean apply(AggregatorFactory agg) {
                    return agg.getName().equals(transferAgg.getName()) && !agg.equals(transferAgg);
                }
            })) {
                throw new IAE("Inner aggregator can currently only be referenced by a single type of outer aggregator" + " for '%s'", transferAgg.getName());
            }
            aggs.add(transferAgg);
        }
    }
    // We need the inner incremental index to have all the columns required by the outer query
    final GroupByQuery innerQuery = new GroupByQuery.Builder(subquery).setAggregatorSpecs(Lists.newArrayList(aggs)).setInterval(subquery.getIntervals()).setPostAggregatorSpecs(Lists.<PostAggregator>newArrayList()).build();
    final GroupByQuery outerQuery = new GroupByQuery.Builder(query).setLimitSpec(query.getLimitSpec().merge(subquery.getLimitSpec())).build();
    final IncrementalIndex innerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(innerQuery.withOverriddenContext(ImmutableMap.<String, Object>of(GroupByQueryHelper.CTX_KEY_SORT_RESULTS, true)), configSupplier.get(), bufferPool, subqueryResult, false);
    //Outer query might have multiple intervals, but they are expected to be non-overlapping and sorted which
    //is ensured by QuerySegmentSpec.
    //GroupByQueryEngine can only process one interval at a time, so we need to call it once per interval
    //and concatenate the results.
    final IncrementalIndex outerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(outerQuery, configSupplier.get(), bufferPool, Sequences.concat(Sequences.map(Sequences.simple(outerQuery.getIntervals()), new Function<Interval, Sequence<Row>>() {

        @Override
        public Sequence<Row> apply(Interval interval) {
            return process(outerQuery.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(interval))), new IncrementalIndexStorageAdapter(innerQueryResultIndex));
        }
    })), true);
    innerQueryResultIndex.close();
    return Sequences.withBaggage(outerQuery.applyLimit(GroupByQueryHelper.postAggregate(query, outerQueryResultIndex)), outerQueryResultIndex);
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) PostAggregator(io.druid.query.aggregation.PostAggregator) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) MultipleIntervalSegmentSpec(io.druid.query.spec.MultipleIntervalSegmentSpec) Sequence(io.druid.java.util.common.guava.Sequence) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) IAE(io.druid.java.util.common.IAE) GroupByQuery(io.druid.query.groupby.GroupByQuery) IncrementalIndexStorageAdapter(io.druid.segment.incremental.IncrementalIndexStorageAdapter) Interval(org.joda.time.Interval)

Example 8 with GroupByQuery

use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class GroupByStrategyV2 method mergeResults.

@Override
public Sequence<Row> mergeResults(final QueryRunner<Row> baseRunner, final GroupByQuery query, final Map<String, Object> responseContext) {
    // Merge streams using ResultMergeQueryRunner, then apply postaggregators, then apply limit (which may
    // involve materialization)
    final ResultMergeQueryRunner<Row> mergingQueryRunner = new ResultMergeQueryRunner<Row>(baseRunner) {

        @Override
        protected Ordering<Row> makeOrdering(Query<Row> queryParam) {
            return ((GroupByQuery) queryParam).getRowOrdering(true);
        }

        @Override
        protected BinaryFn<Row, Row, Row> createMergeFn(Query<Row> queryParam) {
            return new GroupByBinaryFnV2((GroupByQuery) queryParam);
        }
    };
    // Fudge timestamp, maybe.
    final DateTime fudgeTimestamp = getUniversalTimestamp(query);
    return query.applyLimit(Sequences.map(mergingQueryRunner.run(new GroupByQuery(query.getDataSource(), query.getQuerySegmentSpec(), query.getVirtualColumns(), query.getDimFilter(), query.getGranularity(), query.getDimensions(), query.getAggregatorSpecs(), // Don't do post aggs until the end of this method.
    ImmutableList.<PostAggregator>of(), // Don't do "having" clause until the end of this method.
    null, null, query.getContext()).withOverriddenContext(ImmutableMap.<String, Object>of("finalize", false, GroupByQueryConfig.CTX_KEY_STRATEGY, GroupByStrategySelector.STRATEGY_V2, CTX_KEY_FUDGE_TIMESTAMP, fudgeTimestamp == null ? "" : String.valueOf(fudgeTimestamp.getMillis()), CTX_KEY_OUTERMOST, false)), responseContext), new Function<Row, Row>() {

        @Override
        public Row apply(final Row row) {
            if (!query.getContextBoolean(CTX_KEY_OUTERMOST, true)) {
                return row;
            }
            if (query.getPostAggregatorSpecs().isEmpty() && fudgeTimestamp == null) {
                return row;
            }
            final Map<String, Object> newMap;
            if (query.getPostAggregatorSpecs().isEmpty()) {
                newMap = ((MapBasedRow) row).getEvent();
            } else {
                newMap = Maps.newLinkedHashMap(((MapBasedRow) row).getEvent());
                for (PostAggregator postAggregator : query.getPostAggregatorSpecs()) {
                    newMap.put(postAggregator.getName(), postAggregator.compute(newMap));
                }
            }
            return new MapBasedRow(fudgeTimestamp != null ? fudgeTimestamp : row.getTimestamp(), newMap);
        }
    }));
}
Also used : ResultMergeQueryRunner(io.druid.query.ResultMergeQueryRunner) GroupByBinaryFnV2(io.druid.query.groupby.epinephelinae.GroupByBinaryFnV2) Query(io.druid.query.Query) GroupByQuery(io.druid.query.groupby.GroupByQuery) PostAggregator(io.druid.query.aggregation.PostAggregator) DateTime(org.joda.time.DateTime) MapBasedRow(io.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) GroupByQuery(io.druid.query.groupby.GroupByQuery) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow)

Example 9 with GroupByQuery

use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class MultiValuedDimensionTest method testGroupByNoFilter.

@Test
public void testGroupByNoFilter() throws Exception {
    GroupByQuery query = GroupByQuery.builder().setDataSource("xx").setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")).setGranularity(Granularities.ALL).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("tags", "tags"))).setAggregatorSpecs(Arrays.asList(new AggregatorFactory[] { new CountAggregatorFactory("count") })).build();
    Sequence<Row> result = helper.runQueryOnSegmentsObjs(ImmutableList.<Segment>of(new QueryableIndexSegment("sid1", queryableIndex), new IncrementalIndexSegment(incrementalIndex, "sid2")), query);
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", null, "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t1", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t2", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t3", "count", 4L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t4", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t5", "count", 4L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t6", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t7", "count", 2L));
    TestHelper.assertExpectedObjects(expectedResults, Sequences.toList(result, new ArrayList<Row>()), "");
}
Also used : QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) GroupByQuery(io.druid.query.groupby.GroupByQuery) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) ArrayList(java.util.ArrayList) Row(io.druid.data.input.Row) LegacySegmentSpec(io.druid.query.spec.LegacySegmentSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest) Test(org.junit.Test)

Example 10 with GroupByQuery

use of io.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class MultiValuedDimensionTest method testGroupByWithDimFilter.

@Test
public void testGroupByWithDimFilter() throws Exception {
    GroupByQuery query = GroupByQuery.builder().setDataSource("xx").setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")).setGranularity(Granularities.ALL).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("tags", "tags"))).setAggregatorSpecs(Arrays.asList(new AggregatorFactory[] { new CountAggregatorFactory("count") })).setDimFilter(new SelectorDimFilter("tags", "t3", null)).build();
    Sequence<Row> result = helper.runQueryOnSegmentsObjs(ImmutableList.<Segment>of(new QueryableIndexSegment("sid1", queryableIndex), new IncrementalIndexSegment(incrementalIndex, "sid2")), query);
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t1", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t2", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t3", "count", 4L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t4", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "tags", "t5", "count", 2L));
    TestHelper.assertExpectedObjects(expectedResults, Sequences.toList(result, new ArrayList<Row>()), "");
}
Also used : QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) ListFilteredDimensionSpec(io.druid.query.dimension.ListFilteredDimensionSpec) DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) RegexFilteredDimensionSpec(io.druid.query.dimension.RegexFilteredDimensionSpec) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) ArrayList(java.util.ArrayList) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LegacySegmentSpec(io.druid.query.spec.LegacySegmentSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) GroupByQuery(io.druid.query.groupby.GroupByQuery) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) Row(io.druid.data.input.Row) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest) Test(org.junit.Test)

Aggregations

GroupByQuery (io.druid.query.groupby.GroupByQuery)26 Row (io.druid.data.input.Row)19 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)15 Test (org.junit.Test)13 GroupByQueryRunnerTest (io.druid.query.groupby.GroupByQueryRunnerTest)12 DimensionSpec (io.druid.query.dimension.DimensionSpec)10 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)8 ArrayList (java.util.ArrayList)7 InputRow (io.druid.data.input.InputRow)6 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)6 List (java.util.List)6 QueryRunner (io.druid.query.QueryRunner)5 PostAggregator (io.druid.query.aggregation.PostAggregator)5 Interval (org.joda.time.Interval)5 Function (com.google.common.base.Function)4 FinalizeResultsQueryRunner (io.druid.query.FinalizeResultsQueryRunner)4 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)4 MultipleIntervalSegmentSpec (io.druid.query.spec.MultipleIntervalSegmentSpec)4 IncrementalIndexSegment (io.druid.segment.IncrementalIndexSegment)4 ISE (io.druid.java.util.common.ISE)3