use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class HllSketchSqlAggregatorTest method testApproxCountDistinctHllSketch.
@Test
public void testApproxCountDistinctHllSketch() throws Exception {
// Can't vectorize due to SUBSTRING expression.
cannotVectorize();
final String sql = "SELECT\n" + " SUM(cnt),\n" + // uppercase
" APPROX_COUNT_DISTINCT_DS_HLL(dim2),\n" + // lowercase; also, filtered
" APPROX_COUNT_DISTINCT_DS_HLL(dim2) FILTER(WHERE dim2 <> ''),\n" + // on extractionFn, using generic A.C.D.
" APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" + // on expression, using COUNT DISTINCT
" COUNT(DISTINCT SUBSTRING(dim2, 1, 1) || 'x'),\n" + // on native HllSketch column
" APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1, 21, 'HLL_8'),\n" + // on native HllSketch column
" APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1)\n" + "FROM druid.foo";
final List<Object[]> expectedResults;
if (NullHandling.replaceWithDefault()) {
expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 2L, 5L, 5L });
} else {
expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 1L, 5L, 5L });
}
testQuery(sql, ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "substring(\"dim2\", 0, 1)", ColumnType.STRING, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("v1", "concat(substring(\"dim2\", 0, 1),'x')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new LongSumAggregatorFactory("a0", "cnt"), new HllSketchBuildAggregatorFactory("a1", "dim2", null, null, ROUND), new FilteredAggregatorFactory(new HllSketchBuildAggregatorFactory("a2", "dim2", null, null, ROUND), BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("dim2", "", null))), new HllSketchBuildAggregatorFactory("a3", "v0", null, null, ROUND), new HllSketchBuildAggregatorFactory("a4", "v1", null, null, ROUND), new HllSketchMergeAggregatorFactory("a5", "hllsketch_dim1", 21, "HLL_8", ROUND), new HllSketchMergeAggregatorFactory("a6", "hllsketch_dim1", null, null, ROUND))).context(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class HllSketchSqlAggregatorTest method testAvgDailyCountDistinctHllSketch.
@Test
public void testAvgDailyCountDistinctHllSketch() throws Exception {
// Can't vectorize due to outer query, which runs on an inline datasource.
cannotVectorize();
final List<Object[]> expectedResults = ImmutableList.of(new Object[] { 1L });
testQuery("SELECT\n" + " AVG(u)\n" + "FROM (" + " SELECT FLOOR(__time TO DAY), APPROX_COUNT_DISTINCT_DS_HLL(cnt) AS u\n" + " FROM druid.foo\n" + " GROUP BY 1\n" + ")", ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(new PeriodGranularity(Period.days(1), null, DateTimeZone.UTC)).aggregators(Collections.singletonList(new HllSketchBuildAggregatorFactory("a0:a", "cnt", null, null, ROUND))).postAggregators(ImmutableList.of(new FinalizingFieldAccessPostAggregator("a0", "a0:a"))).context(QUERY_CONTEXT_DEFAULT).build().withOverriddenContext(BaseCalciteQueryTest.getTimeseriesContextWithFloorTime(ImmutableMap.of(TimeseriesQuery.SKIP_EMPTY_BUCKETS, true, BaseQuery.SQL_QUERY_ID, "dummy"), "d0")))).setInterval(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).setGranularity(Granularities.ALL).setAggregatorSpecs(NullHandling.replaceWithDefault() ? Arrays.asList(new LongSumAggregatorFactory("_a0:sum", "a0"), new CountAggregatorFactory("_a0:count")) : Arrays.asList(new LongSumAggregatorFactory("_a0:sum", "a0"), new FilteredAggregatorFactory(new CountAggregatorFactory("_a0:count"), BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("a0", null, null))))).setPostAggregatorSpecs(ImmutableList.of(new ArithmeticPostAggregator("_a0", "quotient", ImmutableList.of(new FieldAccessPostAggregator(null, "_a0:sum"), new FieldAccessPostAggregator(null, "_a0:count"))))).setContext(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class ThetaSketchSqlAggregatorTest method testThetaSketchPostAggsPostSort.
@Test
public void testThetaSketchPostAggsPostSort() throws Exception {
final String sql = "SELECT DS_THETA(dim2) as y FROM druid.foo ORDER BY THETA_SKETCH_ESTIMATE(DS_THETA(dim2)) DESC LIMIT 10";
final List<Object[]> expectedResults = ImmutableList.of(new Object[] { 2.0d });
testQuery(StringUtils.format("SELECT THETA_SKETCH_ESTIMATE(y) from (%s)", sql), ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new SketchMergeAggregatorFactory("a0", "dim2", null, null, null, null))).postAggregators(new FieldAccessPostAggregator("p0", "a0"), new SketchEstimatePostAggregator("p2", new FieldAccessPostAggregator("p1", "a0"), null), new SketchEstimatePostAggregator("s1", new FieldAccessPostAggregator("s0", "p0"), null)).context(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class BloomFilterSqlAggregatorTest method testBloomFilterAggDoubleVirtualColumn.
@Test
public void testBloomFilterAggDoubleVirtualColumn() throws Exception {
cannotVectorize();
BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
Object raw = row.getRaw("d1");
if (raw == null) {
if (NullHandling.replaceWithDefault()) {
expected1.addDouble(NullHandling.defaultDoubleValue());
} else {
expected1.addBytes(null, 0, 0);
}
} else {
expected1.addDouble(2 * ((Number) raw).doubleValue());
}
}
testQuery("SELECT\n" + "BLOOM_FILTER(d1 * 2, 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "(\"d1\" * 2)", ColumnType.DOUBLE, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("v0", "a0:v0"), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1) }));
}
use of org.apache.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class BloomFilterSqlAggregatorTest method testBloomFilterTwoAggs.
@Test
public void testBloomFilterTwoAggs() throws Exception {
cannotVectorize();
BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
BloomKFilter expected2 = new BloomKFilter(TEST_NUM_ENTRIES);
for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
String raw = NullHandling.emptyToNullIfNeeded((String) row.getRaw("dim1"));
if (raw == null) {
expected1.addBytes(null, 0, 0);
} else {
expected1.addString(raw);
}
List<String> lst = row.getDimension("dim2");
if (lst.size() == 0) {
expected2.addBytes(null, 0, 0);
}
for (String s : lst) {
String val = NullHandling.emptyToNullIfNeeded(s);
if (val == null) {
expected2.addBytes(null, 0, 0);
} else {
expected2.addString(val);
}
}
}
testQuery("SELECT\n" + "BLOOM_FILTER(dim1, 1000),\n" + "BLOOM_FILTER(dim2, 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("dim1", "a0:dim1"), TEST_NUM_ENTRIES), new BloomFilterAggregatorFactory("a1:agg", new DefaultDimensionSpec("dim2", "a1:dim2"), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1), CalciteTests.getJsonMapper().writeValueAsString(expected2) }));
}
Aggregations