Search in sources :

Example 56 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class HllSketchSqlAggregatorTest method testtHllSketchPostAggsPostSort.

@Test
public void testtHllSketchPostAggsPostSort() throws Exception {
    final String sketchSummary = "### HLL SKETCH SUMMARY: \n" + "  Log Config K   : 12\n" + "  Hll Target     : HLL_4\n" + "  Current Mode   : LIST\n" + "  Memory         : false\n" + "  LB             : 2.0\n" + "  Estimate       : 2.000000004967054\n" + "  UB             : 2.000099863468538\n" + "  OutOfOrder Flag: false\n" + "  Coupon Count   : 2\n";
    final String sql = "SELECT DS_HLL(dim2) as y FROM druid.foo ORDER BY HLL_SKETCH_ESTIMATE(DS_HLL(dim2)) DESC LIMIT 10";
    testQuery(StringUtils.format("SELECT HLL_SKETCH_ESTIMATE(y), HLL_SKETCH_TO_STRING(y) from (%s)", sql), ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new HllSketchBuildAggregatorFactory("a0", "dim2", null, null, true))).postAggregators(ImmutableList.of(new FieldAccessPostAggregator("p0", "a0"), new HllSketchToEstimatePostAggregator("p2", new FieldAccessPostAggregator("p1", "a0"), false), new HllSketchToEstimatePostAggregator("s1", new FieldAccessPostAggregator("s0", "p0"), false), new HllSketchToStringPostAggregator("s3", new FieldAccessPostAggregator("s2", "p0")))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 2.000000004967054d, sketchSummary }));
}
Also used : FinalizingFieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) HllSketchToEstimatePostAggregator(org.apache.druid.query.aggregation.datasketches.hll.HllSketchToEstimatePostAggregator) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) HllSketchToStringPostAggregator(org.apache.druid.query.aggregation.datasketches.hll.HllSketchToStringPostAggregator) HllSketchBuildAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 57 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class ThetaSketchSqlAggregatorTest method testThetaSketchPostAggs.

@Test
public void testThetaSketchPostAggs() throws Exception {
    final List<Object[]> expectedResults;
    if (NullHandling.replaceWithDefault()) {
        expectedResults = ImmutableList.of(new Object[] { 6L, 2.0d, 3.0d, "{\"estimate\":2.0,\"highBound\":2.0,\"lowBound\":2.0,\"numStdDev\":10}", "\"AQMDAAA6zJOQxkPsNomrZQ==\"", "\"AgMDAAAazJMGAAAAAACAP1XTBztMIcMJ+HOoBBne1zKQxkPsNomrZUeWbJt3n+VpF8EdUoUHAXvxsLkOSE0lfQ==\"", "\"AQMDAAA6zJMXwR1ShQcBew==\"", "\"AQMDAAA6zJOQxkPsNomrZQ==\"", 1.0d });
    } else {
        expectedResults = ImmutableList.of(new Object[] { 6L, 2.0d, 3.0d, "{\"estimate\":2.0,\"highBound\":2.0,\"lowBound\":2.0,\"numStdDev\":10}", "\"AQMDAAA6zJOQxkPsNomrZQ==\"", "\"AgMDAAAazJMGAAAAAACAP1XTBztMIcMJ+HOoBBne1zKQxkPsNomrZUeWbJt3n+VpF8EdUoUHAXvxsLkOSE0lfQ==\"", "\"AQMDAAA6zJMXwR1ShQcBew==\"", "\"AQMDAAA6zJOQxkPsNomrZQ==\"", 1.0d });
    }
    testQuery("SELECT\n" + "  SUM(cnt),\n" + "  theta_sketch_estimate(DS_THETA(dim2)),\n" + "  theta_sketch_estimate(DS_THETA(CONCAT(dim2, 'hello'))),\n" + "  theta_sketch_estimate_with_error_bounds(DS_THETA(dim2), 10),\n" + "  THETA_SKETCH_INTERSECT(DS_THETA(dim2), DS_THETA(dim1)),\n" + "  THETA_SKETCH_UNION(DS_THETA(dim2), DS_THETA(dim1)),\n" + "  THETA_SKETCH_NOT(DS_THETA(dim2), DS_THETA(dim1)),\n" + "  THETA_SKETCH_INTERSECT(32768, DS_THETA(dim2), DS_THETA(dim1)),\n" + "  theta_sketch_estimate(THETA_SKETCH_INTERSECT(THETA_SKETCH_INTERSECT(DS_THETA(dim2), DS_THETA(dim1)), DS_THETA(dim2)))\n" + "FROM druid.foo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "concat(\"dim2\",'hello')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new LongSumAggregatorFactory("a0", "cnt"), new SketchMergeAggregatorFactory("a1", "dim2", null, null, null, null), new SketchMergeAggregatorFactory("a2", "v0", null, null, null, null), new SketchMergeAggregatorFactory("a3", "dim1", null, null, null, null))).postAggregators(new SketchEstimatePostAggregator("p1", new FieldAccessPostAggregator("p0", "a1"), null), new SketchEstimatePostAggregator("p3", new FieldAccessPostAggregator("p2", "a2"), null), new SketchEstimatePostAggregator("p5", new FieldAccessPostAggregator("p4", "a1"), 10), new SketchSetPostAggregator("p8", "INTERSECT", null, ImmutableList.of(new FieldAccessPostAggregator("p6", "a1"), new FieldAccessPostAggregator("p7", "a3"))), new SketchSetPostAggregator("p11", "UNION", null, ImmutableList.of(new FieldAccessPostAggregator("p9", "a1"), new FieldAccessPostAggregator("p10", "a3"))), new SketchSetPostAggregator("p14", "NOT", null, ImmutableList.of(new FieldAccessPostAggregator("p12", "a1"), new FieldAccessPostAggregator("p13", "a3"))), new SketchSetPostAggregator("p17", "INTERSECT", 32768, ImmutableList.of(new FieldAccessPostAggregator("p15", "a1"), new FieldAccessPostAggregator("p16", "a3"))), new SketchEstimatePostAggregator("p23", new SketchSetPostAggregator("p22", "INTERSECT", null, ImmutableList.of(new SketchSetPostAggregator("p20", "INTERSECT", null, ImmutableList.of(new FieldAccessPostAggregator("p18", "a1"), new FieldAccessPostAggregator("p19", "a3"))), new FieldAccessPostAggregator("p21", "a1"))), null)).context(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : SketchMergeAggregatorFactory(org.apache.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) FinalizingFieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) SketchSetPostAggregator(org.apache.druid.query.aggregation.datasketches.theta.SketchSetPostAggregator) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) SketchEstimatePostAggregator(org.apache.druid.query.aggregation.datasketches.theta.SketchEstimatePostAggregator) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 58 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class ThetaSketchSqlAggregatorTest method testAvgDailyCountDistinctThetaSketch.

@Test
public void testAvgDailyCountDistinctThetaSketch() throws Exception {
    // Can't vectorize due to outer query (it operates on an inlined data source, which cannot be vectorized).
    cannotVectorize();
    final List<Object[]> expectedResults = ImmutableList.of(new Object[] { 1L });
    testQuery("SELECT\n" + "  AVG(u)\n" + "FROM (SELECT FLOOR(__time TO DAY), APPROX_COUNT_DISTINCT_DS_THETA(cnt) AS u FROM druid.foo GROUP BY 1)", ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(new PeriodGranularity(Period.days(1), null, DateTimeZone.UTC)).aggregators(Collections.singletonList(new SketchMergeAggregatorFactory("a0:a", "cnt", null, null, null, null))).postAggregators(ImmutableList.of(new FinalizingFieldAccessPostAggregator("a0", "a0:a"))).context(TIMESERIES_CONTEXT_BY_GRAN).build().withOverriddenContext(BaseCalciteQueryTest.getTimeseriesContextWithFloorTime(TIMESERIES_CONTEXT_BY_GRAN, "d0")))).setInterval(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).setGranularity(Granularities.ALL).setAggregatorSpecs(NullHandling.replaceWithDefault() ? Arrays.asList(new LongSumAggregatorFactory("_a0:sum", "a0"), new CountAggregatorFactory("_a0:count")) : Arrays.asList(new LongSumAggregatorFactory("_a0:sum", "a0"), new FilteredAggregatorFactory(new CountAggregatorFactory("_a0:count"), BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("a0", null, null))))).setPostAggregatorSpecs(ImmutableList.of(new ArithmeticPostAggregator("_a0", "quotient", ImmutableList.of(new FieldAccessPostAggregator(null, "_a0:sum"), new FieldAccessPostAggregator(null, "_a0:count"))))).setContext(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) SketchMergeAggregatorFactory(org.apache.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory) ArithmeticPostAggregator(org.apache.druid.query.aggregation.post.ArithmeticPostAggregator) FinalizingFieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) QueryDataSource(org.apache.druid.query.QueryDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) FinalizingFieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 59 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class ThetaSketchSqlAggregatorTest method testApproxCountDistinctThetaSketch.

@Test
public void testApproxCountDistinctThetaSketch() throws Exception {
    // Cannot vectorize due to SUBSTRING.
    cannotVectorize();
    final String sql = "SELECT\n" + "  SUM(cnt),\n" + "  APPROX_COUNT_DISTINCT_DS_THETA(dim2),\n" + // uppercase
    "  APPROX_COUNT_DISTINCT_DS_THETA(dim2) FILTER(WHERE dim2 <> ''),\n" + // lowercase; also, filtered
    "  APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" + // on extractionFn, using A.C.D.
    "  COUNT(DISTINCT SUBSTRING(dim2, 1, 1) || 'x'),\n" + // on expression, using COUNT DISTINCT
    "  APPROX_COUNT_DISTINCT_DS_THETA(thetasketch_dim1, 32768),\n" + // on native theta sketch column
    "  APPROX_COUNT_DISTINCT_DS_THETA(thetasketch_dim1)\n" + // on native theta sketch column
    "FROM druid.foo";
    final List<Object[]> expectedResults;
    if (NullHandling.replaceWithDefault()) {
        expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 2L, 5L, 5L });
    } else {
        expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 1L, 5L, 5L });
    }
    testQuery(sql, ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "substring(\"dim2\", 0, 1)", ColumnType.STRING, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("v1", "concat(substring(\"dim2\", 0, 1),'x')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new LongSumAggregatorFactory("a0", "cnt"), new SketchMergeAggregatorFactory("a1", "dim2", null, null, null, null), new FilteredAggregatorFactory(new SketchMergeAggregatorFactory("a2", "dim2", null, null, null, null), BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("dim2", "", null))), new SketchMergeAggregatorFactory("a3", "v0", null, null, null, null), new SketchMergeAggregatorFactory("a4", "v1", null, null, null, null), new SketchMergeAggregatorFactory("a5", "thetasketch_dim1", 32768, null, null, null), new SketchMergeAggregatorFactory("a6", "thetasketch_dim1", null, null, null, null))).context(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) SketchMergeAggregatorFactory(org.apache.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 60 with MultipleIntervalSegmentSpec

use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.

the class BloomFilterSqlAggregatorTest method testBloomFilterAgg.

@Test
public void testBloomFilterAgg() throws Exception {
    cannotVectorize();
    BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
    for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
        String raw = NullHandling.emptyToNullIfNeeded((String) row.getRaw("dim1"));
        if (raw == null) {
            expected1.addBytes(null, 0, 0);
        } else {
            expected1.addString(raw);
        }
    }
    testQuery("SELECT\n" + "BLOOM_FILTER(dim1, 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("dim1", "a0:dim1"), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1) }));
}
Also used : InputRow(org.apache.druid.data.input.InputRow) BloomFilterAggregatorFactory(org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Aggregations

MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)142 Test (org.junit.Test)115 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)53 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)44 BaseCalciteQueryTest (org.apache.druid.sql.calcite.BaseCalciteQueryTest)39 QuerySegmentSpec (org.apache.druid.query.spec.QuerySegmentSpec)28 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)25 ResponseContext (org.apache.druid.query.context.ResponseContext)22 QueryRunner (org.apache.druid.query.QueryRunner)21 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)21 QueryPlus (org.apache.druid.query.QueryPlus)20 TableDataSource (org.apache.druid.query.TableDataSource)19 FinalizeResultsQueryRunner (org.apache.druid.query.FinalizeResultsQueryRunner)18 ArrayList (java.util.ArrayList)17 QueryDataSource (org.apache.druid.query.QueryDataSource)15 DefaultLimitSpec (org.apache.druid.query.groupby.orderby.DefaultLimitSpec)15 OrderByColumnSpec (org.apache.druid.query.groupby.orderby.OrderByColumnSpec)15 Sequence (org.apache.druid.java.util.common.guava.Sequence)14 FieldAccessPostAggregator (org.apache.druid.query.aggregation.post.FieldAccessPostAggregator)14 Result (org.apache.druid.query.Result)13