Search in sources :

Example 76 with GroupByQuery

use of org.apache.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class DataSourceTest method testQueryDataSource.

@Test
public void testQueryDataSource() throws IOException {
    GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new DefaultDimensionSpec("quality", "alias")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")).setGranularity(QueryRunnerTestHelper.DAY_GRAN).build();
    String dataSourceJSON = "{\"type\":\"query\", \"query\":" + JSON_MAPPER.writeValueAsString(query) + "}";
    DataSource dataSource = JSON_MAPPER.readValue(dataSourceJSON, DataSource.class);
    Assert.assertEquals(new QueryDataSource(query), dataSource);
}
Also used : GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Test(org.junit.Test)

Example 77 with GroupByQuery

use of org.apache.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class VarianceGroupByQueryTest method testGroupByZtestPostAgg.

@Test
public void testGroupByZtestPostAgg() {
    // test postaggs from 'teststats' package in here since we've already gone to the trouble of setting up the test
    GroupByQuery query = queryBuilder.setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new DefaultDimensionSpec("quality", "alias")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, VarianceTestHelper.INDEX_VARIANCE_AGGR, new LongSumAggregatorFactory("idx", "index")).setPostAggregatorSpecs(ImmutableList.of(VarianceTestHelper.STD_DEV_OF_INDEX_POST_AGGR, // these inputs are totally nonsensical, i just want the code path to be executed
    new ZtestPostAggregator("ztest", new FieldAccessPostAggregator("f1", "idx"), new ConstantPostAggregator("f2", 100000L), new FieldAccessPostAggregator("f3", "index_stddev"), new ConstantPostAggregator("f2", 100000L)))).setLimitSpec(new DefaultLimitSpec(OrderByColumnSpec.descending("ztest"), 1)).setGranularity(QueryRunnerTestHelper.DAY_GRAN).build();
    VarianceTestHelper.RowBuilder builder = new VarianceTestHelper.RowBuilder(new String[] { "alias", "rows", "idx", "index_stddev", "index_var", "ztest" });
    List<ResultRow> expectedResults = builder.add("2011-04-01", "premium", 3L, 2900.0, 726.632270328514, 527994.4562827706, 36.54266309285626).build(query);
    Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "groupBy");
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) ConstantPostAggregator(org.apache.druid.query.aggregation.post.ConstantPostAggregator) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ZtestPostAggregator(org.apache.druid.query.aggregation.teststats.ZtestPostAggregator) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 78 with GroupByQuery

use of org.apache.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class VarianceGroupByQueryTest method testPostAggHavingSpec.

@Test
public void testPostAggHavingSpec() {
    VarianceTestHelper.RowBuilder expect = new VarianceTestHelper.RowBuilder(new String[] { "alias", "rows", "index", "index_var", "index_stddev" });
    GroupByQuery query = queryBuilder.setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setInterval("2011-04-02/2011-04-04").setDimensions(new DefaultDimensionSpec("quality", "alias")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, QueryRunnerTestHelper.INDEX_LONG_SUM, VarianceTestHelper.INDEX_VARIANCE_AGGR).setPostAggregatorSpecs(ImmutableList.of(VarianceTestHelper.STD_DEV_OF_INDEX_POST_AGGR)).setGranularity(new PeriodGranularity(new Period("P1M"), null, null)).setHavingSpec(new OrHavingSpec(ImmutableList.of(// 3 rows
    new GreaterThanHavingSpec(VarianceTestHelper.STD_DEV_OF_INDEX_METRIC, 15L)))).build();
    List<ResultRow> expectedResults = expect.add("2011-04-01", "automotive", 2L, 269L, 299.0009819048282, 17.29164485827847).add("2011-04-01", "mezzanine", 6L, 4420L, 254083.76447001836, 504.06722217380724).add("2011-04-01", "premium", 6L, 4416L, 252279.2020389339, 502.27403082275106).build(query);
    Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "havingSpec");
    query = query.withLimitSpec(new DefaultLimitSpec(Collections.singletonList(OrderByColumnSpec.asc(VarianceTestHelper.STD_DEV_OF_INDEX_METRIC)), 2));
    expectedResults = expect.add("2011-04-01", "automotive", 2L, 269L, 299.0009819048282, 17.29164485827847).add("2011-04-01", "premium", 6L, 4416L, 252279.2020389339, 502.27403082275106).build(query);
    results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "limitSpec");
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) OrHavingSpec(org.apache.druid.query.groupby.having.OrHavingSpec) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Period(org.joda.time.Period) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) GreaterThanHavingSpec(org.apache.druid.query.groupby.having.GreaterThanHavingSpec) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 79 with GroupByQuery

use of org.apache.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class RowBasedGrouperHelper method getResultRowPredicate.

/**
 * Returns a predicate that filters result rows from a particular "subquery" based on the intervals and dim filters
 * from "query".
 *
 * @param query    outer query
 * @param subquery inner query
 */
private static Predicate<ResultRow> getResultRowPredicate(final GroupByQuery query, final GroupByQuery subquery) {
    final List<Interval> queryIntervals = query.getIntervals();
    final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimFilter()));
    final SettableSupplier<ResultRow> rowSupplier = new SettableSupplier<>();
    final ColumnSelectorFactory columnSelectorFactory = query.getVirtualColumns().wrap(RowBasedGrouperHelper.createResultRowBasedColumnSelectorFactory(subquery, rowSupplier, RowSignature.Finalization.UNKNOWN));
    final ValueMatcher filterMatcher = filter == null ? BooleanValueMatcher.of(true) : filter.makeMatcher(columnSelectorFactory);
    if (subquery.getUniversalTimestamp() != null && queryIntervals.stream().noneMatch(itvl -> itvl.contains(subquery.getUniversalTimestamp()))) {
        // By the way, if there's a universal timestamp that _does_ match the query intervals, we do nothing special here.
        return row -> false;
    }
    return row -> {
        if (subquery.getResultRowHasTimestamp()) {
            boolean inInterval = false;
            for (Interval queryInterval : queryIntervals) {
                if (queryInterval.contains(row.getLong(0))) {
                    inInterval = true;
                    break;
                }
            }
            if (!inInterval) {
                return false;
            }
        }
        rowSupplier.set(row);
        return filterMatcher.matches();
    };
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) IntArrayUtils(org.apache.druid.common.utils.IntArrayUtils) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) AllGranularity(org.apache.druid.java.util.common.granularity.AllGranularity) IndexedInts(org.apache.druid.segment.data.IndexedInts) ByteBuffer(java.nio.ByteBuffer) Pair(org.apache.druid.java.util.common.Pair) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) BaseFloatColumnValueSelector(org.apache.druid.segment.BaseFloatColumnValueSelector) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) RowAdapter(org.apache.druid.segment.RowAdapter) ColumnSelectorStrategyFactory(org.apache.druid.query.dimension.ColumnSelectorStrategyFactory) JsonValue(com.fasterxml.jackson.annotation.JsonValue) GroupingAggregatorFactory(org.apache.druid.query.aggregation.GroupingAggregatorFactory) BufferComparator(org.apache.druid.query.groupby.epinephelinae.Grouper.BufferComparator) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) IAE(org.apache.druid.java.util.common.IAE) ToLongFunction(java.util.function.ToLongFunction) Longs(com.google.common.primitives.Longs) RowBasedColumnSelectorFactory(org.apache.druid.segment.RowBasedColumnSelectorFactory) ResultRow(org.apache.druid.query.groupby.ResultRow) Predicate(java.util.function.Predicate) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) ValueType(org.apache.druid.segment.column.ValueType) Collectors(java.util.stream.Collectors) List(java.util.List) ColumnCapabilitiesImpl(org.apache.druid.segment.column.ColumnCapabilitiesImpl) BooleanValueMatcher(org.apache.druid.segment.filter.BooleanValueMatcher) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) BaseDoubleColumnValueSelector(org.apache.druid.segment.BaseDoubleColumnValueSelector) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) Accumulator(org.apache.druid.java.util.common.guava.Accumulator) IntStream(java.util.stream.IntStream) ColumnSelectorPlus(org.apache.druid.query.ColumnSelectorPlus) ComparableList(org.apache.druid.segment.data.ComparableList) Supplier(com.google.common.base.Supplier) BaseQuery(org.apache.druid.query.BaseQuery) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) ColumnSelectorStrategy(org.apache.druid.query.dimension.ColumnSelectorStrategy) StringComparators(org.apache.druid.query.ordering.StringComparators) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) DimensionSelector(org.apache.druid.segment.DimensionSelector) Nullable(javax.annotation.Nullable) ValueMatcher(org.apache.druid.query.filter.ValueMatcher) ColumnInspector(org.apache.druid.segment.ColumnInspector) StringComparator(org.apache.druid.query.ordering.StringComparator) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) DateTime(org.joda.time.DateTime) Ints(com.google.common.primitives.Ints) BaseLongColumnValueSelector(org.apache.druid.segment.BaseLongColumnValueSelector) Object2IntMap(it.unimi.dsi.fastutil.objects.Object2IntMap) NullHandling(org.apache.druid.common.config.NullHandling) RowSignature(org.apache.druid.segment.column.RowSignature) Closeable(java.io.Closeable) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) ColumnType(org.apache.druid.segment.column.ColumnType) Preconditions(com.google.common.base.Preconditions) BitSet(java.util.BitSet) IntArrays(it.unimi.dsi.fastutil.ints.IntArrays) Comparator(java.util.Comparator) Filters(org.apache.druid.segment.filter.Filters) ReferenceCountingResourceHolder(org.apache.druid.collections.ReferenceCountingResourceHolder) Filter(org.apache.druid.query.filter.Filter) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) RowBasedColumnSelectorFactory(org.apache.druid.segment.RowBasedColumnSelectorFactory) BooleanValueMatcher(org.apache.druid.segment.filter.BooleanValueMatcher) ValueMatcher(org.apache.druid.query.filter.ValueMatcher) Filter(org.apache.druid.query.filter.Filter) Interval(org.joda.time.Interval)

Example 80 with GroupByQuery

use of org.apache.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class DoubleMeanAggregationTest method testBufferAggretatorUsingGroupByQuery.

@Test
@Parameters(method = "doVectorize")
public void testBufferAggretatorUsingGroupByQuery(boolean doVectorize) throws Exception {
    GroupByQuery query = new GroupByQuery.Builder().setDataSource("test").setGranularity(Granularities.ALL).setInterval("1970/2050").setAggregatorSpecs(new DoubleMeanAggregatorFactory("meanOnDouble", SimpleTestIndex.DOUBLE_COL), new DoubleMeanAggregatorFactory("meanOnString", SimpleTestIndex.SINGLE_VALUE_DOUBLE_AS_STRING_DIM), new DoubleMeanAggregatorFactory("meanOnMultiValue", SimpleTestIndex.MULTI_VALUE_DOUBLE_AS_STRING_DIM)).setContext(ImmutableMap.of(QueryContexts.VECTORIZE_KEY, doVectorize)).build();
    // do json serialization and deserialization of query to ensure there are no serde issues
    ObjectMapper jsonMapper = groupByQueryTestHelper.getObjectMapper();
    query = (GroupByQuery) jsonMapper.readValue(jsonMapper.writeValueAsString(query), Query.class);
    Sequence<ResultRow> seq = groupByQueryTestHelper.runQueryOnSegmentsObjs(segments, query);
    Row result = Iterables.getOnlyElement(seq.toList()).toMapBasedRow(query);
    Assert.assertEquals(6.2d, result.getMetric("meanOnDouble").doubleValue(), 0.0001d);
    Assert.assertEquals(6.2d, result.getMetric("meanOnString").doubleValue(), 0.0001d);
    Assert.assertEquals(4.1333d, result.getMetric("meanOnMultiValue").doubleValue(), 0.0001d);
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) Row(org.apache.druid.data.input.Row) ResultRow(org.apache.druid.query.groupby.ResultRow) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Parameters(junitparams.Parameters) Test(org.junit.Test)

Aggregations

GroupByQuery (org.apache.druid.query.groupby.GroupByQuery)95 Test (org.junit.Test)68 ResultRow (org.apache.druid.query.groupby.ResultRow)57 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)49 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)37 GroupByQueryRunnerTest (org.apache.druid.query.groupby.GroupByQueryRunnerTest)37 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)37 List (java.util.List)21 IncrementalIndexSegment (org.apache.druid.segment.IncrementalIndexSegment)21 LegacySegmentSpec (org.apache.druid.query.spec.LegacySegmentSpec)20 QueryableIndexSegment (org.apache.druid.segment.QueryableIndexSegment)20 DefaultLimitSpec (org.apache.druid.query.groupby.orderby.DefaultLimitSpec)17 ArrayList (java.util.ArrayList)16 ExpressionVirtualColumn (org.apache.druid.segment.virtual.ExpressionVirtualColumn)15 Collectors (java.util.stream.Collectors)13 QueryDataSource (org.apache.druid.query.QueryDataSource)13 GroupByQueryConfig (org.apache.druid.query.groupby.GroupByQueryConfig)13 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)12 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)12 MapBasedRow (org.apache.druid.data.input.MapBasedRow)11