Search in sources :

Example 91 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class HllSketchSqlAggregatorTest method testApproxCountDistinctHllSketch.

@Test
public void testApproxCountDistinctHllSketch() throws Exception {
    // Can't vectorize due to SUBSTRING expression.
    cannotVectorize();
    final String sql = "SELECT\n" + "  SUM(cnt),\n" + // uppercase
    "  APPROX_COUNT_DISTINCT_DS_HLL(dim2),\n" + // lowercase; also, filtered
    "  APPROX_COUNT_DISTINCT_DS_HLL(dim2) FILTER(WHERE dim2 <> ''),\n" + // on extractionFn, using generic A.C.D.
    "  APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" + // on expression, using COUNT DISTINCT
    "  COUNT(DISTINCT SUBSTRING(dim2, 1, 1) || 'x'),\n" + // on native HllSketch column
    "  APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1, 21, 'HLL_8'),\n" + // on native HllSketch column
    "  APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1)\n" + "FROM druid.foo";
    final List<Object[]> expectedResults;
    if (NullHandling.replaceWithDefault()) {
        expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 2L, 5L, 5L });
    } else {
        expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 1L, 5L, 5L });
    }
    testQuery(sql, ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "substring(\"dim2\", 0, 1)", ColumnType.STRING, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("v1", "concat(substring(\"dim2\", 0, 1),'x')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new LongSumAggregatorFactory("a0", "cnt"), new HllSketchBuildAggregatorFactory("a1", "dim2", null, null, ROUND), new FilteredAggregatorFactory(new HllSketchBuildAggregatorFactory("a2", "dim2", null, null, ROUND), BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("dim2", "", null))), new HllSketchBuildAggregatorFactory("a3", "v0", null, null, ROUND), new HllSketchBuildAggregatorFactory("a4", "v1", null, null, ROUND), new HllSketchMergeAggregatorFactory("a5", "hllsketch_dim1", 21, "HLL_8", ROUND), new HllSketchMergeAggregatorFactory("a6", "hllsketch_dim1", null, null, ROUND))).context(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) HllSketchMergeAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchMergeAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) HllSketchBuildAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 92 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class HllSketchSqlAggregatorTest method testAvgDailyCountDistinctHllSketch.

@Test
public void testAvgDailyCountDistinctHllSketch() throws Exception {
    // Can't vectorize due to outer query, which runs on an inline datasource.
    cannotVectorize();
    final List<Object[]> expectedResults = ImmutableList.of(new Object[] { 1L });
    testQuery("SELECT\n" + "  AVG(u)\n" + "FROM (" + "  SELECT FLOOR(__time TO DAY), APPROX_COUNT_DISTINCT_DS_HLL(cnt) AS u\n" + "  FROM druid.foo\n" + "  GROUP BY 1\n" + ")", ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(new PeriodGranularity(Period.days(1), null, DateTimeZone.UTC)).aggregators(Collections.singletonList(new HllSketchBuildAggregatorFactory("a0:a", "cnt", null, null, ROUND))).postAggregators(ImmutableList.of(new FinalizingFieldAccessPostAggregator("a0", "a0:a"))).context(QUERY_CONTEXT_DEFAULT).build().withOverriddenContext(BaseCalciteQueryTest.getTimeseriesContextWithFloorTime(ImmutableMap.of(TimeseriesQuery.SKIP_EMPTY_BUCKETS, true, BaseQuery.SQL_QUERY_ID, "dummy"), "d0")))).setInterval(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).setGranularity(Granularities.ALL).setAggregatorSpecs(NullHandling.replaceWithDefault() ? Arrays.asList(new LongSumAggregatorFactory("_a0:sum", "a0"), new CountAggregatorFactory("_a0:count")) : Arrays.asList(new LongSumAggregatorFactory("_a0:sum", "a0"), new FilteredAggregatorFactory(new CountAggregatorFactory("_a0:count"), BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("a0", null, null))))).setPostAggregatorSpecs(ImmutableList.of(new ArithmeticPostAggregator("_a0", "quotient", ImmutableList.of(new FieldAccessPostAggregator(null, "_a0:sum"), new FieldAccessPostAggregator(null, "_a0:count"))))).setContext(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) ArithmeticPostAggregator(org.apache.druid.query.aggregation.post.ArithmeticPostAggregator) FinalizingFieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) QueryDataSource(org.apache.druid.query.QueryDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) FinalizingFieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator) HllSketchBuildAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 93 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class ThetaSketchSqlAggregatorTest method testThetaSketchPostAggsPostSort.

@Test
public void testThetaSketchPostAggsPostSort() throws Exception {
    final String sql = "SELECT DS_THETA(dim2) as y FROM druid.foo ORDER BY THETA_SKETCH_ESTIMATE(DS_THETA(dim2)) DESC LIMIT 10";
    final List<Object[]> expectedResults = ImmutableList.of(new Object[] { 2.0d });
    testQuery(StringUtils.format("SELECT THETA_SKETCH_ESTIMATE(y) from (%s)", sql), ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new SketchMergeAggregatorFactory("a0", "dim2", null, null, null, null))).postAggregators(new FieldAccessPostAggregator("p0", "a0"), new SketchEstimatePostAggregator("p2", new FieldAccessPostAggregator("p1", "a0"), null), new SketchEstimatePostAggregator("s1", new FieldAccessPostAggregator("s0", "p0"), null)).context(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : SketchMergeAggregatorFactory(org.apache.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory) FinalizingFieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) SketchEstimatePostAggregator(org.apache.druid.query.aggregation.datasketches.theta.SketchEstimatePostAggregator) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 94 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class BloomFilterSqlAggregatorTest method testBloomFilterAggDoubleVirtualColumn.

@Test
public void testBloomFilterAggDoubleVirtualColumn() throws Exception {
    cannotVectorize();
    BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
    for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
        Object raw = row.getRaw("d1");
        if (raw == null) {
            if (NullHandling.replaceWithDefault()) {
                expected1.addDouble(NullHandling.defaultDoubleValue());
            } else {
                expected1.addBytes(null, 0, 0);
            }
        } else {
            expected1.addDouble(2 * ((Number) raw).doubleValue());
        }
    }
    testQuery("SELECT\n" + "BLOOM_FILTER(d1 * 2, 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "(\"d1\" * 2)", ColumnType.DOUBLE, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("v0", "a0:v0"), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1) }));
}
Also used : ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) InputRow(org.apache.druid.data.input.InputRow) BloomFilterAggregatorFactory(org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 95 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class BloomFilterSqlAggregatorTest method testBloomFilterTwoAggs.

@Test
public void testBloomFilterTwoAggs() throws Exception {
    cannotVectorize();
    BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
    BloomKFilter expected2 = new BloomKFilter(TEST_NUM_ENTRIES);
    for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
        String raw = NullHandling.emptyToNullIfNeeded((String) row.getRaw("dim1"));
        if (raw == null) {
            expected1.addBytes(null, 0, 0);
        } else {
            expected1.addString(raw);
        }
        List<String> lst = row.getDimension("dim2");
        if (lst.size() == 0) {
            expected2.addBytes(null, 0, 0);
        }
        for (String s : lst) {
            String val = NullHandling.emptyToNullIfNeeded(s);
            if (val == null) {
                expected2.addBytes(null, 0, 0);
            } else {
                expected2.addString(val);
            }
        }
    }
    testQuery("SELECT\n" + "BLOOM_FILTER(dim1, 1000),\n" + "BLOOM_FILTER(dim2, 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("dim1", "a0:dim1"), TEST_NUM_ENTRIES), new BloomFilterAggregatorFactory("a1:agg", new DefaultDimensionSpec("dim2", "a1:dim2"), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1), CalciteTests.getJsonMapper().writeValueAsString(expected2) }));
}
Also used : InputRow(org.apache.druid.data.input.InputRow) BloomFilterAggregatorFactory(org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Aggregations

MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)142 Test (org.junit.Test)115 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)53 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)44 BaseCalciteQueryTest (org.apache.druid.sql.calcite.BaseCalciteQueryTest)39 QuerySegmentSpec (org.apache.druid.query.spec.QuerySegmentSpec)28 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)25 ResponseContext (org.apache.druid.query.context.ResponseContext)22 QueryRunner (org.apache.druid.query.QueryRunner)21 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)21 QueryPlus (org.apache.druid.query.QueryPlus)20 TableDataSource (org.apache.druid.query.TableDataSource)19 FinalizeResultsQueryRunner (org.apache.druid.query.FinalizeResultsQueryRunner)18 ArrayList (java.util.ArrayList)17 QueryDataSource (org.apache.druid.query.QueryDataSource)15 DefaultLimitSpec (org.apache.druid.query.groupby.orderby.DefaultLimitSpec)15 OrderByColumnSpec (org.apache.druid.query.groupby.orderby.OrderByColumnSpec)15 Sequence (org.apache.druid.java.util.common.guava.Sequence)14 FieldAccessPostAggregator (org.apache.druid.query.aggregation.post.FieldAccessPostAggregator)14 Result (org.apache.druid.query.Result)13