Search in sources :

Example 1 with ExtractionDimensionSpec

use of org.apache.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.

the class ColumnComparisonFilterTest method testSelectorWithLookupExtractionFn.

@Test
public void testSelectorWithLookupExtractionFn() {
    final Map<String, String> stringMap = ImmutableMap.of("a", "7");
    LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false);
    LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, true, null, false, true);
    assertFilterMatchesSkipVectorize(new ColumnComparisonDimFilter(ImmutableList.of(new ExtractionDimensionSpec("dim0", "dim0", lookupFn), new ExtractionDimensionSpec("dim1", "dim1", lookupFn))), ImmutableList.of("2", "5", "7", "8"));
}
Also used : LookupExtractionFn(org.apache.druid.query.lookup.LookupExtractionFn) MapLookupExtractor(org.apache.druid.query.extraction.MapLookupExtractor) ColumnComparisonDimFilter(org.apache.druid.query.filter.ColumnComparisonDimFilter) MapLookupExtractor(org.apache.druid.query.extraction.MapLookupExtractor) LookupExtractor(org.apache.druid.query.lookup.LookupExtractor) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Example 2 with ExtractionDimensionSpec

use of org.apache.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.

the class CalciteQueryTest method testRegexpExtract.

@Test
public void testRegexpExtract() throws Exception {
    // Cannot vectorize due to extractionFn in dimension spec.
    cannotVectorize();
    testQuery("SELECT DISTINCT\n" + "  REGEXP_EXTRACT(dim1, '^.'),\n" + "  REGEXP_EXTRACT(dim1, '^(.)', 1)\n" + "FROM foo\n" + "WHERE REGEXP_EXTRACT(dim1, '^(.)', 1) <> 'x'", ImmutableList.of(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setDimFilter(not(selector("dim1", "x", new RegexDimExtractionFn("^(.)", 1, true, null)))).setDimensions(dimensions(new ExtractionDimensionSpec("dim1", "d0", new RegexDimExtractionFn("^.", 0, true, null)), new ExtractionDimensionSpec("dim1", "d1", new RegexDimExtractionFn("^(.)", 1, true, null)))).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { NULL_STRING, NULL_STRING }, new Object[] { "1", "1" }, new Object[] { "2", "2" }, new Object[] { "a", "a" }, new Object[] { "d", "d" }));
}
Also used : RegexDimExtractionFn(org.apache.druid.query.extraction.RegexDimExtractionFn) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Example 3 with ExtractionDimensionSpec

use of org.apache.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.

the class CalciteQueryTest method testApproxCountDistinct.

@Test
public void testApproxCountDistinct() throws Exception {
    // Cannot vectorize due to virtual columns.
    cannotVectorize();
    testQuery("SELECT\n" + "  SUM(cnt),\n" + // uppercase
    "  APPROX_COUNT_DISTINCT(dim2),\n" + // lowercase; also, filtered
    "  approx_count_distinct(dim2) FILTER(WHERE dim2 <> ''),\n" + // on extractionFn
    "  APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" + // on expression
    "  APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1) || 'x'),\n" + // on native hyperUnique column
    "  approx_count_distinct(unique_dim1)\n" + "FROM druid.foo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).virtualColumns(expressionVirtualColumn("v0", "concat(substring(\"dim2\", 0, 1),'x')", ColumnType.STRING)).aggregators(aggregators(new LongSumAggregatorFactory("a0", "cnt"), new CardinalityAggregatorFactory("a1", null, dimensions(new DefaultDimensionSpec("dim2", "dim2")), false, true), new FilteredAggregatorFactory(new CardinalityAggregatorFactory("a2", null, dimensions(new DefaultDimensionSpec("dim2", "dim2")), false, true), not(selector("dim2", "", null))), new CardinalityAggregatorFactory("a3", null, dimensions(new ExtractionDimensionSpec("dim2", "dim2", ColumnType.STRING, new SubstringDimExtractionFn(0, 1))), false, true), new CardinalityAggregatorFactory("a4", null, dimensions(new DefaultDimensionSpec("v0", "v0", ColumnType.STRING)), false, true), new HyperUniquesAggregatorFactory("a5", "unique_dim1", false, true))).context(QUERY_CONTEXT_DEFAULT).build()), NullHandling.replaceWithDefault() ? ImmutableList.of(new Object[] { 6L, 3L, 2L, 2L, 2L, 6L }) : ImmutableList.of(new Object[] { 6L, 3L, 2L, 1L, 1L, 6L }));
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) SubstringDimExtractionFn(org.apache.druid.query.extraction.SubstringDimExtractionFn) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) CardinalityAggregatorFactory(org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Example 4 with ExtractionDimensionSpec

use of org.apache.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.

the class CalciteQueryTest method testRequireTimeConditionPositive.

@Test
public void testRequireTimeConditionPositive() throws Exception {
    // simple timeseries
    testQuery(PLANNER_CONFIG_REQUIRE_TIME_CONDITION, "SELECT SUM(cnt), gran FROM (\n" + "  SELECT __time as t, floor(__time TO month) AS gran,\n" + "  cnt FROM druid.foo\n" + ") AS x\n" + "WHERE t >= '2000-01-01' and t < '2002-01-01'" + "GROUP BY gran\n" + "ORDER BY gran", CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Intervals.of("2000-01-01/2002-01-01"))).granularity(Granularities.MONTH).aggregators(aggregators(new LongSumAggregatorFactory("a0", "cnt"))).context(getTimeseriesContextWithFloorTime(TIMESERIES_CONTEXT_BY_GRAN, "d0")).build()), ImmutableList.of(new Object[] { 3L, timestamp("2000-01-01") }, new Object[] { 3L, timestamp("2001-01-01") }));
    // nested groupby only requires time condition for inner most query
    testQuery(PLANNER_CONFIG_REQUIRE_TIME_CONDITION, "SELECT\n" + "  SUM(cnt),\n" + "  COUNT(*)\n" + "FROM (SELECT dim2, SUM(cnt) AS cnt FROM druid.foo WHERE __time >= '2000-01-01' GROUP BY dim2)", CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(querySegmentSpec(Intervals.utc(DateTimes.of("2000-01-01").getMillis(), JodaUtils.MAX_INSTANT))).setGranularity(Granularities.ALL).setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))).setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))).setContext(QUERY_CONTEXT_DEFAULT).build())).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("_a0", "a0"), new CountAggregatorFactory("_a1"))).setContext(QUERY_CONTEXT_DEFAULT).build()), NullHandling.replaceWithDefault() ? ImmutableList.of(new Object[] { 6L, 3L }) : ImmutableList.of(new Object[] { 6L, 4L }));
    // Cannot vectorize next test due to extraction dimension spec.
    cannotVectorize();
    // semi-join requires time condition on both left and right query
    testQuery(PLANNER_CONFIG_REQUIRE_TIME_CONDITION, "SELECT COUNT(*) FROM druid.foo\n" + "WHERE __time >= '2000-01-01' AND SUBSTRING(dim2, 1, 1) IN (\n" + "  SELECT SUBSTRING(dim1, 1, 1) FROM druid.foo\n" + "  WHERE dim1 <> '' AND __time >= '2000-01-01'\n" + ")", CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(join(new TableDataSource(CalciteTests.DATASOURCE1), new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(querySegmentSpec(Intervals.utc(DateTimes.of("2000-01-01").getMillis(), JodaUtils.MAX_INSTANT))).setDimFilter(not(selector("dim1", NullHandling.sqlCompatible() ? "" : null, null))).setGranularity(Granularities.ALL).setDimensions(new ExtractionDimensionSpec("dim1", "d0", ColumnType.STRING, new SubstringDimExtractionFn(0, 1))).setContext(QUERY_CONTEXT_DEFAULT).build()), "j0.", equalsCondition(makeExpression("substring(\"dim2\", 0, 1)"), DruidExpression.ofColumn(ColumnType.STRING, "j0.d0")), JoinType.INNER)).intervals(querySegmentSpec(Intervals.utc(DateTimes.of("2000-01-01").getMillis(), JodaUtils.MAX_INSTANT))).granularity(Granularities.ALL).aggregators(aggregators(new CountAggregatorFactory("a0"))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 3L }));
}
Also used : SubstringDimExtractionFn(org.apache.druid.query.extraction.SubstringDimExtractionFn) QueryDataSource(org.apache.druid.query.QueryDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TableDataSource(org.apache.druid.query.TableDataSource) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Example 5 with ExtractionDimensionSpec

use of org.apache.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.

the class CalciteQueryTest method testGroupByExpressionFromLookup.

@Test
public void testGroupByExpressionFromLookup() throws Exception {
    // Cannot vectorize direct queries on lookup tables.
    cannotVectorize();
    testQuery("SELECT SUBSTRING(v, 1, 1), COUNT(*) FROM lookup.lookyloo GROUP BY 1", ImmutableList.of(GroupByQuery.builder().setDataSource(new LookupDataSource("lookyloo")).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setDimensions(dimensions(new ExtractionDimensionSpec("v", "d0", new SubstringDimExtractionFn(0, 1)))).setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { "m", 1L }, new Object[] { "x", 3L }));
}
Also used : SubstringDimExtractionFn(org.apache.druid.query.extraction.SubstringDimExtractionFn) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) LookupDataSource(org.apache.druid.query.LookupDataSource) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Aggregations

ExtractionDimensionSpec (org.apache.druid.query.dimension.ExtractionDimensionSpec)87 Test (org.junit.Test)82 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)62 LookupExtractionFn (org.apache.druid.query.lookup.LookupExtractionFn)40 RegexDimExtractionFn (org.apache.druid.query.extraction.RegexDimExtractionFn)32 Result (org.apache.druid.query.Result)30 TimeFormatExtractionFn (org.apache.druid.query.extraction.TimeFormatExtractionFn)29 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)27 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)26 JavaScriptExtractionFn (org.apache.druid.query.extraction.JavaScriptExtractionFn)22 SubstringDimExtractionFn (org.apache.druid.query.extraction.SubstringDimExtractionFn)22 StrlenExtractionFn (org.apache.druid.query.extraction.StrlenExtractionFn)21 ExtractionFn (org.apache.druid.query.extraction.ExtractionFn)20 MapLookupExtractor (org.apache.druid.query.extraction.MapLookupExtractor)20 StringFormatExtractionFn (org.apache.druid.query.extraction.StringFormatExtractionFn)20 DimExtractionFn (org.apache.druid.query.extraction.DimExtractionFn)19 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)13 CascadeExtractionFn (org.apache.druid.query.extraction.CascadeExtractionFn)10 SearchQuerySpecDimExtractionFn (org.apache.druid.query.extraction.SearchQuerySpecDimExtractionFn)10 HashMap (java.util.HashMap)8