use of org.apache.druid.query.extraction.SubstringDimExtractionFn in project druid by druid-io.
the class LikeFilterTest method testNewlineMatchWithExtractionFn.
@Test
public void testNewlineMatchWithExtractionFn() {
assertFilterMatches(new LikeDimFilter("dim1", "e%", null, new SubstringDimExtractionFn(1, 100)), ImmutableList.of("6"));
assertFilterMatches(new LikeDimFilter("dim1", "%ine", null, new SubstringDimExtractionFn(1, 100)), ImmutableList.of("6"));
assertFilterMatches(new LikeDimFilter("dim1", "ew_line", null, new SubstringDimExtractionFn(1, 100)), ImmutableList.of("6"));
}
use of org.apache.druid.query.extraction.SubstringDimExtractionFn in project druid by druid-io.
the class CalciteQueryTest method testApproxCountDistinct.
@Test
public void testApproxCountDistinct() throws Exception {
// Cannot vectorize due to virtual columns.
cannotVectorize();
testQuery("SELECT\n" + " SUM(cnt),\n" + // uppercase
" APPROX_COUNT_DISTINCT(dim2),\n" + // lowercase; also, filtered
" approx_count_distinct(dim2) FILTER(WHERE dim2 <> ''),\n" + // on extractionFn
" APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" + // on expression
" APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1) || 'x'),\n" + // on native hyperUnique column
" approx_count_distinct(unique_dim1)\n" + "FROM druid.foo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).virtualColumns(expressionVirtualColumn("v0", "concat(substring(\"dim2\", 0, 1),'x')", ColumnType.STRING)).aggregators(aggregators(new LongSumAggregatorFactory("a0", "cnt"), new CardinalityAggregatorFactory("a1", null, dimensions(new DefaultDimensionSpec("dim2", "dim2")), false, true), new FilteredAggregatorFactory(new CardinalityAggregatorFactory("a2", null, dimensions(new DefaultDimensionSpec("dim2", "dim2")), false, true), not(selector("dim2", "", null))), new CardinalityAggregatorFactory("a3", null, dimensions(new ExtractionDimensionSpec("dim2", "dim2", ColumnType.STRING, new SubstringDimExtractionFn(0, 1))), false, true), new CardinalityAggregatorFactory("a4", null, dimensions(new DefaultDimensionSpec("v0", "v0", ColumnType.STRING)), false, true), new HyperUniquesAggregatorFactory("a5", "unique_dim1", false, true))).context(QUERY_CONTEXT_DEFAULT).build()), NullHandling.replaceWithDefault() ? ImmutableList.of(new Object[] { 6L, 3L, 2L, 2L, 2L, 6L }) : ImmutableList.of(new Object[] { 6L, 3L, 2L, 1L, 1L, 6L }));
}
use of org.apache.druid.query.extraction.SubstringDimExtractionFn in project druid by druid-io.
the class CalciteQueryTest method testRequireTimeConditionPositive.
@Test
public void testRequireTimeConditionPositive() throws Exception {
// simple timeseries
testQuery(PLANNER_CONFIG_REQUIRE_TIME_CONDITION, "SELECT SUM(cnt), gran FROM (\n" + " SELECT __time as t, floor(__time TO month) AS gran,\n" + " cnt FROM druid.foo\n" + ") AS x\n" + "WHERE t >= '2000-01-01' and t < '2002-01-01'" + "GROUP BY gran\n" + "ORDER BY gran", CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Intervals.of("2000-01-01/2002-01-01"))).granularity(Granularities.MONTH).aggregators(aggregators(new LongSumAggregatorFactory("a0", "cnt"))).context(getTimeseriesContextWithFloorTime(TIMESERIES_CONTEXT_BY_GRAN, "d0")).build()), ImmutableList.of(new Object[] { 3L, timestamp("2000-01-01") }, new Object[] { 3L, timestamp("2001-01-01") }));
// nested groupby only requires time condition for inner most query
testQuery(PLANNER_CONFIG_REQUIRE_TIME_CONDITION, "SELECT\n" + " SUM(cnt),\n" + " COUNT(*)\n" + "FROM (SELECT dim2, SUM(cnt) AS cnt FROM druid.foo WHERE __time >= '2000-01-01' GROUP BY dim2)", CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(querySegmentSpec(Intervals.utc(DateTimes.of("2000-01-01").getMillis(), JodaUtils.MAX_INSTANT))).setGranularity(Granularities.ALL).setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))).setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))).setContext(QUERY_CONTEXT_DEFAULT).build())).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("_a0", "a0"), new CountAggregatorFactory("_a1"))).setContext(QUERY_CONTEXT_DEFAULT).build()), NullHandling.replaceWithDefault() ? ImmutableList.of(new Object[] { 6L, 3L }) : ImmutableList.of(new Object[] { 6L, 4L }));
// Cannot vectorize next test due to extraction dimension spec.
cannotVectorize();
// semi-join requires time condition on both left and right query
testQuery(PLANNER_CONFIG_REQUIRE_TIME_CONDITION, "SELECT COUNT(*) FROM druid.foo\n" + "WHERE __time >= '2000-01-01' AND SUBSTRING(dim2, 1, 1) IN (\n" + " SELECT SUBSTRING(dim1, 1, 1) FROM druid.foo\n" + " WHERE dim1 <> '' AND __time >= '2000-01-01'\n" + ")", CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(join(new TableDataSource(CalciteTests.DATASOURCE1), new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(querySegmentSpec(Intervals.utc(DateTimes.of("2000-01-01").getMillis(), JodaUtils.MAX_INSTANT))).setDimFilter(not(selector("dim1", NullHandling.sqlCompatible() ? "" : null, null))).setGranularity(Granularities.ALL).setDimensions(new ExtractionDimensionSpec("dim1", "d0", ColumnType.STRING, new SubstringDimExtractionFn(0, 1))).setContext(QUERY_CONTEXT_DEFAULT).build()), "j0.", equalsCondition(makeExpression("substring(\"dim2\", 0, 1)"), DruidExpression.ofColumn(ColumnType.STRING, "j0.d0")), JoinType.INNER)).intervals(querySegmentSpec(Intervals.utc(DateTimes.of("2000-01-01").getMillis(), JodaUtils.MAX_INSTANT))).granularity(Granularities.ALL).aggregators(aggregators(new CountAggregatorFactory("a0"))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 3L }));
}
use of org.apache.druid.query.extraction.SubstringDimExtractionFn in project druid by druid-io.
the class CalciteQueryTest method testGroupByExpressionFromLookup.
@Test
public void testGroupByExpressionFromLookup() throws Exception {
// Cannot vectorize direct queries on lookup tables.
cannotVectorize();
testQuery("SELECT SUBSTRING(v, 1, 1), COUNT(*) FROM lookup.lookyloo GROUP BY 1", ImmutableList.of(GroupByQuery.builder().setDataSource(new LookupDataSource("lookyloo")).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setDimensions(dimensions(new ExtractionDimensionSpec("v", "d0", new SubstringDimExtractionFn(0, 1)))).setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { "m", 1L }, new Object[] { "x", 3L }));
}
use of org.apache.druid.query.extraction.SubstringDimExtractionFn in project druid by druid-io.
the class CalciteQueryTest method testCountDistinctOfSubstring.
@Test
public void testCountDistinctOfSubstring() throws Exception {
// Cannot vectorize due to extraction dimension spec.
cannotVectorize();
testQuery("SELECT COUNT(DISTINCT SUBSTRING(dim1, 1, 1)) FROM druid.foo WHERE dim1 <> ''", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).filters(not(selector("dim1", "", null))).granularity(Granularities.ALL).aggregators(aggregators(new CardinalityAggregatorFactory("a0", null, dimensions(new ExtractionDimensionSpec("dim1", null, new SubstringDimExtractionFn(0, 1))), false, true))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 4L }));
}
Aggregations