use of org.apache.druid.query.topn.DimensionTopNMetricSpec in project druid by druid-io.
the class CalciteQueryTest method testQueryContextOuterLimit.
@Test
public void testQueryContextOuterLimit() throws Exception {
Map<String, Object> outerLimitContext = new HashMap<>(QUERY_CONTEXT_DEFAULT);
outerLimitContext.put(PlannerContext.CTX_SQL_OUTER_LIMIT, 4);
TopNQueryBuilder baseBuilder = new TopNQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).dimension(new DefaultDimensionSpec("dim1", "d0")).metric(new InvertedTopNMetricSpec(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC))).context(outerLimitContext);
List<Object[]> results1;
if (NullHandling.replaceWithDefault()) {
results1 = ImmutableList.of(new Object[] { "" }, new Object[] { "def" }, new Object[] { "abc" }, new Object[] { "2" });
} else {
results1 = ImmutableList.of(new Object[] { "def" }, new Object[] { "abc" }, new Object[] { "2" }, new Object[] { "10.1" });
}
// no existing limit
testQuery(PLANNER_CONFIG_DEFAULT, outerLimitContext, "SELECT dim1 FROM druid.foo GROUP BY dim1 ORDER BY dim1 DESC", CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(baseBuilder.threshold(4).build()), results1);
// existing limit greater than context limit, override existing limit
testQuery(PLANNER_CONFIG_DEFAULT, outerLimitContext, "SELECT dim1 FROM druid.foo GROUP BY dim1 ORDER BY dim1 DESC LIMIT 9", CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(baseBuilder.threshold(4).build()), results1);
List<Object[]> results2;
if (NullHandling.replaceWithDefault()) {
results2 = ImmutableList.of(new Object[] { "" }, new Object[] { "def" });
} else {
results2 = ImmutableList.of(new Object[] { "def" }, new Object[] { "abc" });
}
// existing limit less than context limit, keep existing limit
testQuery(PLANNER_CONFIG_DEFAULT, outerLimitContext, "SELECT dim1 FROM druid.foo GROUP BY dim1 ORDER BY dim1 DESC LIMIT 2", CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(baseBuilder.threshold(2).build()), results2);
}
use of org.apache.druid.query.topn.DimensionTopNMetricSpec in project druid by druid-io.
the class DruidQuery method toTopNQuery.
/**
* Return this query as a TopN query, or null if this query is not compatible with TopN.
*
* @return query or null
*/
@Nullable
private TopNQuery toTopNQuery(final QueryFeatureInspector queryFeatureInspector) {
// Must be allowed by the QueryMaker.
if (!queryFeatureInspector.feature(QueryFeature.CAN_RUN_TOPN)) {
return null;
}
// Must have GROUP BY one column, no GROUPING SETS, ORDER BY ≤ 1 column, LIMIT > 0 and ≤ maxTopNLimit,
// no OFFSET, no HAVING.
final boolean topNOk = grouping != null && grouping.getDimensions().size() == 1 && !grouping.getSubtotals().hasEffect(grouping.getDimensionSpecs()) && sorting != null && (sorting.getOrderBys().size() <= 1 && sorting.getOffsetLimit().hasLimit() && sorting.getOffsetLimit().getLimit() > 0 && sorting.getOffsetLimit().getLimit() <= plannerContext.getPlannerConfig().getMaxTopNLimit() && !sorting.getOffsetLimit().hasOffset()) && grouping.getHavingFilter() == null;
if (!topNOk) {
return null;
}
final DimensionSpec dimensionSpec = Iterables.getOnlyElement(grouping.getDimensions()).toDimensionSpec();
// grouping col cannot be type array
if (dimensionSpec.getOutputType().isArray()) {
return null;
}
final OrderByColumnSpec limitColumn;
if (sorting.getOrderBys().isEmpty()) {
limitColumn = new OrderByColumnSpec(dimensionSpec.getOutputName(), OrderByColumnSpec.Direction.ASCENDING, Calcites.getStringComparatorForValueType(dimensionSpec.getOutputType()));
} else {
limitColumn = Iterables.getOnlyElement(sorting.getOrderBys());
}
final TopNMetricSpec topNMetricSpec;
if (limitColumn.getDimension().equals(dimensionSpec.getOutputName())) {
// DimensionTopNMetricSpec is exact; always return it even if allowApproximate is false.
final DimensionTopNMetricSpec baseMetricSpec = new DimensionTopNMetricSpec(null, limitColumn.getDimensionComparator());
topNMetricSpec = limitColumn.getDirection() == OrderByColumnSpec.Direction.ASCENDING ? baseMetricSpec : new InvertedTopNMetricSpec(baseMetricSpec);
} else if (plannerContext.getPlannerConfig().isUseApproximateTopN()) {
// ORDER BY metric
final NumericTopNMetricSpec baseMetricSpec = new NumericTopNMetricSpec(limitColumn.getDimension());
topNMetricSpec = limitColumn.getDirection() == OrderByColumnSpec.Direction.ASCENDING ? new InvertedTopNMetricSpec(baseMetricSpec) : baseMetricSpec;
} else {
return null;
}
final Pair<DataSource, Filtration> dataSourceFiltrationPair = getFiltration(dataSource, filter, virtualColumnRegistry);
final DataSource newDataSource = dataSourceFiltrationPair.lhs;
final Filtration filtration = dataSourceFiltrationPair.rhs;
final List<PostAggregator> postAggregators = new ArrayList<>(grouping.getPostAggregators());
if (sorting.getProjection() != null) {
postAggregators.addAll(sorting.getProjection().getPostAggregators());
}
return new TopNQuery(newDataSource, getVirtualColumns(true), dimensionSpec, topNMetricSpec, Ints.checkedCast(sorting.getOffsetLimit().getLimit()), filtration.getQuerySegmentSpec(), filtration.getDimFilter(), Granularities.ALL, grouping.getAggregatorFactories(), postAggregators, ImmutableSortedMap.copyOf(plannerContext.getQueryContext()));
}
use of org.apache.druid.query.topn.DimensionTopNMetricSpec in project druid by druid-io.
the class TopNTypeInterfaceBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
Map<String, TopNQueryBuilder> basicQueries = new LinkedHashMap<>();
GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
{
// basic.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
// Use an IdentityExtractionFn to force usage of HeapBasedTopNAlgorithm
TopNQueryBuilder queryBuilderString = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension(new ExtractionDimensionSpec("dimSequential", "dimSequential", IdentityExtractionFn.getInstance())).metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
// HeapBasedTopNAlgorithm is always used for numeric columns
TopNQueryBuilder queryBuilderLong = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metLongUniform").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
TopNQueryBuilder queryBuilderFloat = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metFloatNormal").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("string", queryBuilderString);
basicQueries.put("long", queryBuilderLong);
basicQueries.put("float", queryBuilderFloat);
}
{
// basic.numericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("numericSort", queryBuilderA);
}
{
// basic.alphanumericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.ALPHANUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("alphanumericSort", queryBuilderA);
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
use of org.apache.druid.query.topn.DimensionTopNMetricSpec in project druid by druid-io.
the class TopNBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
Map<String, TopNQueryBuilder> basicQueries = new LinkedHashMap<>();
GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
{
// basic.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimSequential").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("A", queryBuilderA);
}
{
// basic.numericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("numericSort", queryBuilderA);
}
{
// basic.alphanumericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.ALPHANUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("alphanumericSort", queryBuilderA);
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
use of org.apache.druid.query.topn.DimensionTopNMetricSpec in project druid by druid-io.
the class CalciteQueryTest method testNullDoubleTopN.
@Test
public void testNullDoubleTopN() throws Exception {
List<Object[]> expected;
if (useDefault) {
expected = ImmutableList.of(new Object[] { 1.7, 1L }, new Object[] { 1.0, 1L }, new Object[] { 0.0, 4L });
} else {
expected = ImmutableList.of(new Object[] { null, 3L }, new Object[] { 1.7, 1L }, new Object[] { 1.0, 1L }, new Object[] { 0.0, 1L });
}
testQuery("SELECT d1, COUNT(*) FROM druid.numfoo GROUP BY d1 ORDER BY d1 DESC LIMIT 10", QUERY_CONTEXT_DEFAULT, ImmutableList.of(new TopNQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(querySegmentSpec(Filtration.eternity())).dimension(new DefaultDimensionSpec("d1", "_d0", ColumnType.DOUBLE)).threshold(10).aggregators(aggregators(new CountAggregatorFactory("a0"))).metric(new InvertedTopNMetricSpec(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC))).context(QUERY_CONTEXT_DEFAULT).build()), expected);
}
Aggregations