Search in sources :

Example 51 with ExpressionVirtualColumn

use of org.apache.druid.segment.virtual.ExpressionVirtualColumn in project druid by druid-io.

the class HllSketchSqlAggregatorTest method testApproxCountDistinctHllSketch.

@Test
public void testApproxCountDistinctHllSketch() throws Exception {
    // Can't vectorize due to SUBSTRING expression.
    cannotVectorize();
    final String sql = "SELECT\n" + "  SUM(cnt),\n" + // uppercase
    "  APPROX_COUNT_DISTINCT_DS_HLL(dim2),\n" + // lowercase; also, filtered
    "  APPROX_COUNT_DISTINCT_DS_HLL(dim2) FILTER(WHERE dim2 <> ''),\n" + // on extractionFn, using generic A.C.D.
    "  APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" + // on expression, using COUNT DISTINCT
    "  COUNT(DISTINCT SUBSTRING(dim2, 1, 1) || 'x'),\n" + // on native HllSketch column
    "  APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1, 21, 'HLL_8'),\n" + // on native HllSketch column
    "  APPROX_COUNT_DISTINCT_DS_HLL(hllsketch_dim1)\n" + "FROM druid.foo";
    final List<Object[]> expectedResults;
    if (NullHandling.replaceWithDefault()) {
        expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 2L, 5L, 5L });
    } else {
        expectedResults = ImmutableList.of(new Object[] { 6L, 2L, 2L, 1L, 1L, 5L, 5L });
    }
    testQuery(sql, ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "substring(\"dim2\", 0, 1)", ColumnType.STRING, TestExprMacroTable.INSTANCE), new ExpressionVirtualColumn("v1", "concat(substring(\"dim2\", 0, 1),'x')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new LongSumAggregatorFactory("a0", "cnt"), new HllSketchBuildAggregatorFactory("a1", "dim2", null, null, ROUND), new FilteredAggregatorFactory(new HllSketchBuildAggregatorFactory("a2", "dim2", null, null, ROUND), BaseCalciteQueryTest.not(BaseCalciteQueryTest.selector("dim2", "", null))), new HllSketchBuildAggregatorFactory("a3", "v0", null, null, ROUND), new HllSketchBuildAggregatorFactory("a4", "v1", null, null, ROUND), new HllSketchMergeAggregatorFactory("a5", "hllsketch_dim1", 21, "HLL_8", ROUND), new HllSketchMergeAggregatorFactory("a6", "hllsketch_dim1", null, null, ROUND))).context(QUERY_CONTEXT_DEFAULT).build()), expectedResults);
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) HllSketchMergeAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchMergeAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) HllSketchBuildAggregatorFactory(org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 52 with ExpressionVirtualColumn

use of org.apache.druid.segment.virtual.ExpressionVirtualColumn in project druid by druid-io.

the class BloomFilterSqlAggregatorTest method testBloomFilterAggDoubleVirtualColumn.

@Test
public void testBloomFilterAggDoubleVirtualColumn() throws Exception {
    cannotVectorize();
    BloomKFilter expected1 = new BloomKFilter(TEST_NUM_ENTRIES);
    for (InputRow row : CalciteTests.ROWS1_WITH_NUMERIC_DIMS) {
        Object raw = row.getRaw("d1");
        if (raw == null) {
            if (NullHandling.replaceWithDefault()) {
                expected1.addDouble(NullHandling.defaultDoubleValue());
            } else {
                expected1.addBytes(null, 0, 0);
            }
        } else {
            expected1.addDouble(2 * ((Number) raw).doubleValue());
        }
    }
    testQuery("SELECT\n" + "BLOOM_FILTER(d1 * 2, 1000)\n" + "FROM numfoo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).virtualColumns(new ExpressionVirtualColumn("v0", "(\"d1\" * 2)", ColumnType.DOUBLE, TestExprMacroTable.INSTANCE)).aggregators(ImmutableList.of(new BloomFilterAggregatorFactory("a0:agg", new DefaultDimensionSpec("v0", "a0:v0"), TEST_NUM_ENTRIES))).context(BaseCalciteQueryTest.QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { CalciteTests.getJsonMapper().writeValueAsString(expected1) }));
}
Also used : ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) InputRow(org.apache.druid.data.input.InputRow) BloomFilterAggregatorFactory(org.apache.druid.query.aggregation.bloom.BloomFilterAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) BloomKFilter(org.apache.druid.query.filter.BloomKFilter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) BaseCalciteQueryTest(org.apache.druid.sql.calcite.BaseCalciteQueryTest) Test(org.junit.Test)

Example 53 with ExpressionVirtualColumn

use of org.apache.druid.segment.virtual.ExpressionVirtualColumn in project druid by druid-io.

the class MultiValuedDimensionTest method testTopNExpression.

@Test
public void testTopNExpression() {
    TopNQuery query = new TopNQueryBuilder().dataSource("xx").granularity(Granularities.ALL).dimension(new DefaultDimensionSpec("texpr", "texpr")).virtualColumns(new ExpressionVirtualColumn("texpr", "map(x -> concat(x, 'foo'), tags)", ColumnType.STRING, TestExprMacroTable.INSTANCE)).metric("count").intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(new CountAggregatorFactory("count")).threshold(15).build();
    try (CloseableStupidPool<ByteBuffer> pool = TestQueryRunners.createDefaultNonBlockingPool()) {
        QueryRunnerFactory factory = new TopNQueryRunnerFactory(pool, new TopNQueryQueryToolChest(new TopNQueryConfig()), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
        QueryRunner<Result<TopNResultValue>> runner = QueryRunnerTestHelper.makeQueryRunner(factory, new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), null);
        Sequence<Result<TopNResultValue>> result = runner.run(QueryPlus.wrap(query));
        List<Map<String, Object>> expected = ImmutableList.<Map<String, Object>>builder().add(ImmutableMap.of("texpr", "t3foo", "count", 2L)).add(ImmutableMap.of("texpr", "t5foo", "count", 2L)).add(new HashMap<String, Object>() {

            {
                put("texpr", NullHandling.sqlCompatible() ? "foo" : null);
                put("count", 1L);
            }
        }).add(ImmutableMap.of("texpr", "t1foo", "count", 1L)).add(ImmutableMap.of("texpr", "t2foo", "count", 1L)).add(ImmutableMap.of("texpr", "t4foo", "count", 1L)).add(ImmutableMap.of("texpr", "t6foo", "count", 1L)).add(ImmutableMap.of("texpr", "t7foo", "count", 1L)).build();
        List<Result<TopNResultValue>> expectedResults = Collections.singletonList(new Result<TopNResultValue>(DateTimes.of("2011-01-12T00:00:00.000Z"), new TopNResultValue(expected)));
        TestHelper.assertExpectedObjects(expectedResults, result.toList(), "filteredDim");
    }
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) TopNResultValue(org.apache.druid.query.topn.TopNResultValue) ByteBuffer(java.nio.ByteBuffer) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TopNQueryRunnerFactory(org.apache.druid.query.topn.TopNQueryRunnerFactory) TopNQueryConfig(org.apache.druid.query.topn.TopNQueryConfig) TopNQuery(org.apache.druid.query.topn.TopNQuery) TopNQueryRunnerFactory(org.apache.druid.query.topn.TopNQueryRunnerFactory) TopNQueryQueryToolChest(org.apache.druid.query.topn.TopNQueryQueryToolChest) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 54 with ExpressionVirtualColumn

use of org.apache.druid.segment.virtual.ExpressionVirtualColumn in project druid by druid-io.

the class MultiValuedDimensionTest method testGroupByExpressionFoldArrayToString.

@Test
public void testGroupByExpressionFoldArrayToString() {
    if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
        expectedException.expect(RuntimeException.class);
        expectedException.expectMessage("GroupBy v1 does not support dimension selectors with unknown cardinality.");
    }
    GroupByQuery query = GroupByQuery.builder().setDataSource("xx").setQuerySegmentSpec(new LegacySegmentSpec("1970/3000")).setGranularity(Granularities.ALL).setDimensions(new DefaultDimensionSpec("tt", "tt")).setVirtualColumns(new ExpressionVirtualColumn("tt", "fold((tag, acc) -> concat(acc, tag), tags, '')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).setAggregatorSpecs(new CountAggregatorFactory("count")).setContext(context).build();
    Sequence<ResultRow> result = helper.runQueryOnSegmentsObjs(ImmutableList.of(new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("sid2"))), query);
    List<ResultRow> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970-01-01T00:00:00.000Z", "tt", NullHandling.replaceWithDefault() ? null : "", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "t1t2t3", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "t3t4t5", "count", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970", "tt", "t5t6t7", "count", 2L));
    TestHelper.assertExpectedObjects(expectedResults, result.toList(), "expr-arrayfn-auto");
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) LegacySegmentSpec(org.apache.druid.query.spec.LegacySegmentSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 55 with ExpressionVirtualColumn

use of org.apache.druid.segment.virtual.ExpressionVirtualColumn in project druid by druid-io.

the class MultiValuedDimensionTest method testTopNExpressionAutoTransform.

@Test
public void testTopNExpressionAutoTransform() {
    TopNQuery query = new TopNQueryBuilder().dataSource("xx").granularity(Granularities.ALL).dimension(new DefaultDimensionSpec("texpr", "texpr")).virtualColumns(new ExpressionVirtualColumn("texpr", "concat(tags, 'foo')", ColumnType.STRING, TestExprMacroTable.INSTANCE)).metric("count").intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(new CountAggregatorFactory("count")).threshold(15).build();
    try (CloseableStupidPool<ByteBuffer> pool = TestQueryRunners.createDefaultNonBlockingPool()) {
        QueryRunnerFactory factory = new TopNQueryRunnerFactory(pool, new TopNQueryQueryToolChest(new TopNQueryConfig()), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
        QueryRunner<Result<TopNResultValue>> runner = QueryRunnerTestHelper.makeQueryRunner(factory, new QueryableIndexSegment(queryableIndex, SegmentId.dummy("sid1")), null);
        Sequence<Result<TopNResultValue>> result = runner.run(QueryPlus.wrap(query));
        List<Map<String, Object>> expected = ImmutableList.<Map<String, Object>>builder().add(ImmutableMap.of("texpr", "t3foo", "count", 2L)).add(ImmutableMap.of("texpr", "t5foo", "count", 2L)).add(ImmutableMap.of("texpr", "foo", "count", 1L)).add(ImmutableMap.of("texpr", "t1foo", "count", 1L)).add(ImmutableMap.of("texpr", "t2foo", "count", 1L)).add(ImmutableMap.of("texpr", "t4foo", "count", 1L)).add(ImmutableMap.of("texpr", "t6foo", "count", 1L)).add(ImmutableMap.of("texpr", "t7foo", "count", 1L)).build();
        List<Result<TopNResultValue>> expectedResults = Collections.singletonList(new Result<TopNResultValue>(DateTimes.of("2011-01-12T00:00:00.000Z"), new TopNResultValue(expected)));
        TestHelper.assertExpectedObjects(expectedResults, result.toList(), "filteredDim");
    }
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) TopNResultValue(org.apache.druid.query.topn.TopNResultValue) ByteBuffer(java.nio.ByteBuffer) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TopNQueryRunnerFactory(org.apache.druid.query.topn.TopNQueryRunnerFactory) TopNQueryConfig(org.apache.druid.query.topn.TopNQueryConfig) TopNQuery(org.apache.druid.query.topn.TopNQuery) TopNQueryRunnerFactory(org.apache.druid.query.topn.TopNQueryRunnerFactory) TopNQueryQueryToolChest(org.apache.druid.query.topn.TopNQueryQueryToolChest) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

ExpressionVirtualColumn (org.apache.druid.segment.virtual.ExpressionVirtualColumn)86 Test (org.junit.Test)73 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)58 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)45 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)26 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)24 QueryableIndexSegment (org.apache.druid.segment.QueryableIndexSegment)18 GroupByQueryRunnerTest (org.apache.druid.query.groupby.GroupByQueryRunnerTest)17 GroupByQuery (org.apache.druid.query.groupby.GroupByQuery)16 LegacySegmentSpec (org.apache.druid.query.spec.LegacySegmentSpec)15 IncrementalIndexSegment (org.apache.druid.segment.IncrementalIndexSegment)15 ResultRow (org.apache.druid.query.groupby.ResultRow)13 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)13 BaseCalciteQueryTest (org.apache.druid.sql.calcite.BaseCalciteQueryTest)11 VirtualColumns (org.apache.druid.segment.VirtualColumns)10 ImmutableList (com.google.common.collect.ImmutableList)8 TimeUnit (java.util.concurrent.TimeUnit)8 NullHandling (org.apache.druid.common.config.NullHandling)8 Closer (org.apache.druid.java.util.common.io.Closer)8 QueryDataSource (org.apache.druid.query.QueryDataSource)8