use of org.apache.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.
the class NestedQueryPushDownTest method testSubqueryWithExtractionFnInOuterQuery.
@Test
public void testSubqueryWithExtractionFnInOuterQuery() {
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.utc(1500000000000L, 1600000000000L)));
GroupByQuery query = GroupByQuery.builder().setDataSource("blah").setDimensions(new DefaultDimensionSpec("dimA", "dimA"), new DefaultDimensionSpec("dimB", "dimB")).setAggregatorSpecs(new LongSumAggregatorFactory("metASum", "metA"), new LongSumAggregatorFactory("metBSum", "metB")).setGranularity(Granularities.ALL).setQuerySegmentSpec(intervalSpec).build();
GroupByQuery nestedQuery = GroupByQuery.builder().setDataSource(query).setDimensions(new ExtractionDimensionSpec("dimA", "extractedDimA", new RegexDimExtractionFn("^(p)", true, "replacement"))).setAggregatorSpecs(new LongSumAggregatorFactory("finalSum", "metASum")).setContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_PUSH_DOWN_NESTED_QUERY, true)).setGranularity(Granularities.ALL).setQuerySegmentSpec(intervalSpec).build();
ResultRow expectedRow0 = GroupByQueryRunnerTestHelper.createExpectedRow(nestedQuery, "2017-07-14T02:40:00.000Z", "finalSum", 4000L, "extractedDimA", "p");
ResultRow expectedRow1 = GroupByQueryRunnerTestHelper.createExpectedRow(nestedQuery, "2017-07-14T02:40:00.000Z", "finalSum", 4000L, "extractedDimA", "replacement");
Sequence<ResultRow> queryResult = runNestedQueryWithForcePushDown(nestedQuery);
List<ResultRow> results = queryResult.toList();
Assert.assertEquals(2, results.size());
Assert.assertEquals(expectedRow0, results.get(0));
Assert.assertEquals(expectedRow1, results.get(1));
}
use of org.apache.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.
the class CalciteQueryTest method testExactCountDistinctOfSemiJoinResult.
@Test
public void testExactCountDistinctOfSemiJoinResult() throws Exception {
// Cannot vectorize due to extraction dimension spec.
cannotVectorize();
testQuery("SELECT COUNT(*)\n" + "FROM (\n" + " SELECT DISTINCT dim2\n" + " FROM druid.foo\n" + " WHERE SUBSTRING(dim2, 1, 1) IN (\n" + " SELECT SUBSTRING(dim1, 1, 1) FROM druid.foo WHERE dim1 <> ''\n" + " ) AND __time >= '2000-01-01' AND __time < '2002-01-01'\n" + ")", ImmutableList.of(GroupByQuery.builder().setDataSource(new QueryDataSource(GroupByQuery.builder().setDataSource(join(new TableDataSource(CalciteTests.DATASOURCE1), new QueryDataSource(GroupByQuery.builder().setDataSource(CalciteTests.DATASOURCE1).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setDimFilter(not(selector("dim1", "", null))).setDimensions(dimensions(new ExtractionDimensionSpec("dim1", "d0", new SubstringDimExtractionFn(0, 1)))).setContext(QUERY_CONTEXT_DEFAULT).build()), "j0.", equalsCondition(makeExpression("substring(\"dim2\", 0, 1)"), DruidExpression.ofColumn(ColumnType.STRING, "j0.d0")), JoinType.INNER)).setInterval(querySegmentSpec(Intervals.of("2000-01-01/2002-01-01"))).setGranularity(Granularities.ALL).setDimensions(dimensions(new DefaultDimensionSpec("dim2", "d0"))).setContext(QUERY_CONTEXT_DEFAULT).build())).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))).setContext(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { 2L }));
}
use of org.apache.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.
the class CalciteQueryTest method testCountDistinctOfLookup.
@Test
public void testCountDistinctOfLookup() throws Exception {
// Cannot vectorize due to extraction dimension spec.
cannotVectorize();
final RegisteredLookupExtractionFn extractionFn = new RegisteredLookupExtractionFn(null, "lookyloo", false, null, null, true);
testQuery("SELECT COUNT(DISTINCT LOOKUP(dim1, 'lookyloo')) FROM foo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).aggregators(aggregators(new CardinalityAggregatorFactory("a0", null, ImmutableList.of(new ExtractionDimensionSpec("dim1", null, extractionFn)), false, true))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(new Object[] { NullHandling.replaceWithDefault() ? 2L : 1L }));
}
use of org.apache.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.
the class LookupSerdeModuleTest method testExtractionDimensionSerde.
@Test
public void testExtractionDimensionSerde() throws Exception {
final ExtractionDimensionSpec dimensionSpec = new ExtractionDimensionSpec("xxx", "d", new RegisteredLookupExtractionFn(null, "beep", false, null, null, null));
Assert.assertEquals(dimensionSpec, objectMapper.readValue(objectMapper.writeValueAsBytes(dimensionSpec), DimensionSpec.class));
}
use of org.apache.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.
the class CalciteJoinQueryTest method testTopNFilterJoinWithProjection.
@Test
@Parameters(source = QueryContextForJoinProvider.class)
public void testTopNFilterJoinWithProjection(Map<String, Object> queryContext) throws Exception {
// Cannot vectorize JOIN operator.
cannotVectorize();
// Filters on top N values of some dimension by using an inner join. Also projects the outer dimension.
testQuery("SELECT SUBSTRING(t1.dim1, 1, 10), SUM(t1.cnt)\n" + "FROM druid.foo t1\n" + " INNER JOIN (\n" + " SELECT\n" + " SUM(cnt) AS sum_cnt,\n" + " dim2\n" + " FROM druid.foo\n" + " GROUP BY dim2\n" + " ORDER BY 1 DESC\n" + " LIMIT 2\n" + ") t2 ON (t1.dim2 = t2.dim2)\n" + "GROUP BY SUBSTRING(t1.dim1, 1, 10)", queryContext, ImmutableList.of(GroupByQuery.builder().setDataSource(join(new TableDataSource(CalciteTests.DATASOURCE1), new QueryDataSource(new TopNQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).dimension(new DefaultDimensionSpec("dim2", "d0")).aggregators(new LongSumAggregatorFactory("a0", "cnt")).metric("a0").threshold(2).context(QUERY_CONTEXT_DEFAULT).build()), "j0.", equalsCondition(makeColumnExpression("dim2"), makeColumnExpression("j0.d0")), JoinType.INNER)).setInterval(querySegmentSpec(Filtration.eternity())).setGranularity(Granularities.ALL).setDimensions(dimensions(new ExtractionDimensionSpec("dim1", "d0", ColumnType.STRING, new SubstringDimExtractionFn(0, 10)))).setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))).setContext(queryContext).build()), ImmutableList.of(new Object[] { NULL_STRING, 1L }, new Object[] { "1", 1L }));
}
Aggregations