use of org.apache.druid.segment.virtual.ExpressionVirtualColumn in project druid by druid-io.
the class HllSketchSqlAggregatorTest method testApproxCountDistinctHllSketch.
@Test
public void testApproxCountDistinctHllSketch() throws Exception {
// Can't vectorize due to SUBSTRING expression.
cannotVectorize();
final String sql = "SELECT\n" + " SUM(cnt),\n" + // uppercase
" APPROX_COUNT_DISTINCT_DS_HLL(dim2),\n" + // lowercase; also, filtered
" APPROX_COUNT_DISTINCT_DS_HLL(dim2) FILTER(WHERE dim2 <> ''),\n" + // on extractionFn, using generic A.C.D.
" APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" + // on expression, using COUNT DISTINCT
" COUNT(DISTINCT SUBSTRING(dim2, 1, 1) || 'x'),\n" + // on native HllSketch column
" APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1, 21, 'HLL_8'),\n" + // on native HllSketch column
" APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1)\n" + "FROM druid.foo";
final List<Object[]> expectedResults;
if (NullHandling.replaceWithDefault()) {
expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 2L, 5L, 5L });
} else {
expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 1L, 5L, 5L });
}
testQuery(sql, ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "substring(\"dim2\", 0, 1)", ColumnType.STRING, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("v1", "concat(substring(\"dim2\", 0, 1),'x')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new LongSumAggregatorFactory("a0", "cnt"), new HllSketchBuildAggregatorFactory("a1", "dim2", null, null, ROUND), new FilteredAggregatorFactory(new HllSketchBuildAggregatorFactory("a2", "dim2", null, null, ROUND), BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("dim2", "", null))), new HllSketchBuildAggregatorFactory("a3", "v0", null, null, ROUND), new HllSketchBuildAggregatorFactory("a4", "v1", null, null, ROUND), new HllSketchMergeAggregatorFactory("a5", "hllsketch_dim1", 21, "HLL_8", ROUND), new HllSketchMergeAggregatorFactory("a6", "hllsketch_dim1", null, null, ROUND))).context(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
use of org.apache.druid.segment.virtual.ExpressionVirtualColumn in project druid by druid-io.
the class BloomFilterSqlAggregatorTest method testBloomFilterAggDoubleVirtualColumn.
@Test
public void testBloomFilterAggDoubleVirtualColumn() throws Exception {
cannotVectorize();
BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
Object raw = row.getRaw("d1");
if (raw == null) {
if (NullHandling.replaceWithDefault()) {
expected1.addDouble(NullHandling.defaultDoubleValue());
} else {
expected1.addBytes(null, 0, 0);
}
} else {
expected1.addDouble(2 * ((Number) raw).doubleValue());
}
}
testQuery("SELECT\n" + "BLOOM_FILTER(d1 * 2, 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "(\"d1\" * 2)", ColumnType.DOUBLE, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("v0", "a0:v0"), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1) }));
}
use of org.apache.druid.segment.virtual.ExpressionVirtualColumn in project druid by druid-io.
the class MultiValuedDimensionTest method testTopNExpression.
@Test
public void testTopNExpression() {
TopNQuery query = new TopNQueryBuilder().dataSource("xx").granularity(Granularities.ALL).dimension(new DefaultDimensionSpec("texpr", "texpr")).virtualColumns(new ExpressionVirtualColumn("texpr", "map(x -> concat(x, 'foo'), tags)", ColumnType.STRING, TestExprMacroTable.INSTANCE)).metric("count").intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(new CountAggregatorFactory("count")).threshold(15).build();
try (CloseableStupidPool<ByteBuffer> pool = TestQueryRunners.createDefaultNonBlockingPool()) {
QueryRunnerFactory factory = new TopNQueryRunnerFactory(pool, new TopNQueryQueryToolChest(new TopNQueryConfig()), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
QueryRunner<Result<TopNResultValue>> runner = QueryRunnerTestHelper.makeQueryRunner(factory, new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), null);
Sequence<Result<TopNResultValue>> result = runner.run(QueryPlus.wrap(query));
List<Map<String, Object>> expected = ImmutableList.<Map<String, Object>>builder().add(ImmutableMap.of("texpr", "t3foo", "count", 2L)).add(ImmutableMap.of("texpr", "t5foo", "count", 2L)).add(new HashMap<String, Object>() {
{
put("texpr", NullHandling.sqlCompatible() ? "foo" : null);
put("count", 1L);
}
}).add(ImmutableMap.of("texpr", "t1foo", "count", 1L)).add(ImmutableMap.of("texpr", "t2foo", "count", 1L)).add(ImmutableMap.of("texpr", "t4foo", "count", 1L)).add(ImmutableMap.of("texpr", "t6foo", "count", 1L)).add(ImmutableMap.of("texpr", "t7foo", "count", 1L)).build();
List<Result<TopNResultValue>> expectedResults = Collections.singletonList(new Result<TopNResultValue>(DateTimes.of("2011-01-12T00:00:00.000Z"), new TopNResultValue(expected)));
TestHelper.assertExpectedObjects(expectedResults, result.toList(), "filteredDim");
}
}
use of org.apache.druid.segment.virtual.ExpressionVirtualColumn in project druid by druid-io.
the class MultiValuedDimensionTest method testGroupByExpressionFoldArrayToString.
@Test
public void testGroupByExpressionFoldArrayToString() {
if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
expectedException.expect(RuntimeException.class);
expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality.");
}
GroupByQuery query = GroupByQuery.builder().setDataSource("xx").setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")).setGranularity(Granularities.ALL).setDimensions(new DefaultDimensionSpec("tt", "tt")).setVirtualColumns(new ExpressionVirtualColumn("tt", "fold((tag, acc) -> concat(acc, tag), tags, '')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).setAggregatorSpecs(new CountAggregatorFactory("count")).setContext(context).build();
Sequence<ResultRow> result = helper.runQueryOnSegmentsObjs(ImmutableList.of(new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2"))), query);
List<ResultRow> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970-01-01T00:00:00.000Z", "tt", NullHandling.replaceWithDefault() ? null : "", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "t1t2t3", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "t3t4t5", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "t5t6t7", "count", 2L));
TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-arrayfn-auto");
}
use of org.apache.druid.segment.virtual.ExpressionVirtualColumn in project druid by druid-io.
the class MultiValuedDimensionTest method testTopNExpressionAutoTransform.
@Test
public void testTopNExpressionAutoTransform() {
TopNQuery query = new TopNQueryBuilder().dataSource("xx").granularity(Granularities.ALL).dimension(new DefaultDimensionSpec("texpr", "texpr")).virtualColumns(new ExpressionVirtualColumn("texpr", "concat(tags, 'foo')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).metric("count").intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(new CountAggregatorFactory("count")).threshold(15).build();
try (CloseableStupidPool<ByteBuffer> pool = TestQueryRunners.createDefaultNonBlockingPool()) {
QueryRunnerFactory factory = new TopNQueryRunnerFactory(pool, new TopNQueryQueryToolChest(new TopNQueryConfig()), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
QueryRunner<Result<TopNResultValue>> runner = QueryRunnerTestHelper.makeQueryRunner(factory, new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), null);
Sequence<Result<TopNResultValue>> result = runner.run(QueryPlus.wrap(query));
List<Map<String, Object>> expected = ImmutableList.<Map<String, Object>>builder().add(ImmutableMap.of("texpr", "t3foo", "count", 2L)).add(ImmutableMap.of("texpr", "t5foo", "count", 2L)).add(ImmutableMap.of("texpr", "foo", "count", 1L)).add(ImmutableMap.of("texpr", "t1foo", "count", 1L)).add(ImmutableMap.of("texpr", "t2foo", "count", 1L)).add(ImmutableMap.of("texpr", "t4foo", "count", 1L)).add(ImmutableMap.of("texpr", "t6foo", "count", 1L)).add(ImmutableMap.of("texpr", "t7foo", "count", 1L)).build();
List<Result<TopNResultValue>> expectedResults = Collections.singletonList(new Result<TopNResultValue>(DateTimes.of("2011-01-12T00:00:00.000Z"), new TopNResultValue(expected)));
TestHelper.assertExpectedObjects(expectedResults, result.toList(), "filteredDim");
}
}
Aggregations