use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class ResultRow method fromLegacyRow.
/**
* Create a row based on a legacy {@link Row} that was generated by a given {@link GroupByQuery}. This is useful
* for deserializing rows that have come off the wire in the older format. (In the past, GroupBy query results
* were sequences of {@link Row}, not ResultRow.)
*
* @param row legacy row
* @param query query corresponding to the output ResultRow
*/
public static ResultRow fromLegacyRow(Row row, final GroupByQuery query) {
// Can't be sure if we'll get result rows with or without postaggregations, so be safe.
final ResultRow resultRow = ResultRow.create(query.getResultRowSizeWithPostAggregators());
int i = 0;
if (query.getResultRowHasTimestamp()) {
resultRow.set(i++, row.getTimestamp().getMillis());
}
for (DimensionSpec dimensionSpec : query.getDimensions()) {
resultRow.set(i++, row.getRaw(dimensionSpec.getOutputName()));
}
for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
resultRow.set(i++, row.getRaw(aggregatorFactory.getName()));
}
for (PostAggregator postAggregator : query.getPostAggregatorSpecs()) {
resultRow.set(i++, row.getRaw(postAggregator.getName()));
}
return resultRow;
}
use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class GroupByQueryHelper method toResultRow.
public static ResultRow toResultRow(final GroupByQuery query, final Row row) {
final ResultRow resultRow = ResultRow.create(query.getResultRowSizeWithPostAggregators());
int i = 0;
if (query.getResultRowHasTimestamp()) {
resultRow.set(i++, row.getTimestampFromEpoch());
}
for (DimensionSpec dimensionSpec : query.getDimensions()) {
resultRow.set(i++, row.getRaw(dimensionSpec.getOutputName()));
}
for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
resultRow.set(i++, row.getRaw(aggregatorFactory.getName()));
}
for (PostAggregator postAggregator : query.getPostAggregatorSpecs()) {
resultRow.set(i++, row.getRaw(postAggregator.getName()));
}
return resultRow;
}
use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class GroupByStrategyV1 method processSubqueryResult.
@Override
public Sequence<ResultRow> processSubqueryResult(GroupByQuery subquery, GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> subqueryResult, boolean wasQueryPushedDown) {
final Set<AggregatorFactory> aggs = new HashSet<>();
// Nested group-bys work by first running the inner query and then materializing the results in an incremental
// index which the outer query is then run against. To build the incremental index, we use the fieldNames from
// the aggregators for the outer query to define the column names so that the index will match the query. If
// there are multiple types of aggregators in the outer query referencing the same fieldName, we will try to build
// multiple columns of the same name using different aggregator types and will fail. Here, we permit multiple
// aggregators of the same type referencing the same fieldName (and skip creating identical columns for the
// subsequent ones) and return an error if the aggregator types are different.
final Set<String> dimensionNames = new HashSet<>();
for (DimensionSpec dimension : subquery.getDimensions()) {
dimensionNames.add(dimension.getOutputName());
}
for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
for (final AggregatorFactory transferAgg : aggregatorFactory.getRequiredColumns()) {
if (dimensionNames.contains(transferAgg.getName())) {
// doesn't have this problem.
continue;
}
if (Iterables.any(aggs, new Predicate<AggregatorFactory>() {
@Override
public boolean apply(AggregatorFactory agg) {
return agg.getName().equals(transferAgg.getName()) && !agg.equals(transferAgg);
}
})) {
throw new IAE("Inner aggregator can currently only be referenced by a single type of outer aggregator" + " for '%s'", transferAgg.getName());
}
aggs.add(transferAgg);
}
}
// We need the inner incremental index to have all the columns required by the outer query
final GroupByQuery innerQuery = new GroupByQuery.Builder(subquery).setAggregatorSpecs(ImmutableList.copyOf(aggs)).setInterval(subquery.getIntervals()).setPostAggregatorSpecs(new ArrayList<>()).build();
final GroupByQuery outerQuery = new GroupByQuery.Builder(query).setLimitSpec(query.getLimitSpec().merge(subquery.getLimitSpec())).build();
final IncrementalIndex innerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(innerQuery.withOverriddenContext(ImmutableMap.of(GroupByQueryHelper.CTX_KEY_SORT_RESULTS, true)), subquery, configSupplier.get(), subqueryResult);
// Outer query might have multiple intervals, but they are expected to be non-overlapping and sorted which
// is ensured by QuerySegmentSpec.
// GroupByQueryEngine can only process one interval at a time, so we need to call it once per interval
// and concatenate the results.
final IncrementalIndex outerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(outerQuery, null, configSupplier.get(), Sequences.concat(Sequences.map(Sequences.simple(outerQuery.getIntervals()), new Function<Interval, Sequence<ResultRow>>() {
@Override
public Sequence<ResultRow> apply(Interval interval) {
return process(outerQuery.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(interval))), new IncrementalIndexStorageAdapter(innerQueryResultIndex));
}
})));
innerQueryResultIndex.close();
return Sequences.withBaggage(outerQuery.postProcess(GroupByQueryHelper.postAggregate(query, outerQueryResultIndex)), outerQueryResultIndex);
}
use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class DefaultLimitSpec method build.
@Override
public Function<Sequence<ResultRow>, Sequence<ResultRow>> build(final GroupByQuery query) {
final List<DimensionSpec> dimensions = query.getDimensions();
// Can avoid re-sorting if the natural ordering is good enough.
boolean sortingNeeded = dimensions.size() < columns.size();
final Set<String> aggAndPostAggNames = new HashSet<>();
for (AggregatorFactory agg : query.getAggregatorSpecs()) {
aggAndPostAggNames.add(agg.getName());
}
for (PostAggregator postAgg : query.getPostAggregatorSpecs()) {
aggAndPostAggNames.add(postAgg.getName());
}
if (!sortingNeeded) {
for (int i = 0; i < columns.size(); i++) {
final OrderByColumnSpec columnSpec = columns.get(i);
if (aggAndPostAggNames.contains(columnSpec.getDimension())) {
sortingNeeded = true;
break;
}
final ColumnType columnType = getOrderByType(columnSpec, dimensions);
final StringComparator naturalComparator;
if (columnType.is(ValueType.STRING)) {
naturalComparator = StringComparators.LEXICOGRAPHIC;
} else if (columnType.isNumeric()) {
naturalComparator = StringComparators.NUMERIC;
} else if (columnType.isArray()) {
if (columnType.getElementType().isNumeric()) {
naturalComparator = StringComparators.NUMERIC;
} else {
naturalComparator = StringComparators.LEXICOGRAPHIC;
}
} else {
sortingNeeded = true;
break;
}
if (columnSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING || !columnSpec.getDimensionComparator().equals(naturalComparator) || !columnSpec.getDimension().equals(dimensions.get(i).getOutputName())) {
sortingNeeded = true;
break;
}
}
}
if (!sortingNeeded) {
// If granularity is ALL, sortByDimsFirst doesn't change the sorting order.
sortingNeeded = !query.getGranularity().equals(Granularities.ALL) && query.getContextSortByDimsFirst();
}
if (!sortingNeeded) {
String timestampField = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD);
if (timestampField != null && !timestampField.isEmpty()) {
int timestampResultFieldIndex = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_INDEX);
sortingNeeded = query.getContextSortByDimsFirst() ? timestampResultFieldIndex != query.getDimensions().size() - 1 : timestampResultFieldIndex != 0;
}
}
final Function<Sequence<ResultRow>, Sequence<ResultRow>> sortAndLimitFn;
if (sortingNeeded) {
// Materialize the Comparator first for fast-fail error checking.
final Ordering<ResultRow> ordering = makeComparator(query.getResultRowSignature(), query.getResultRowHasTimestamp(), query.getDimensions(), query.getAggregatorSpecs(), query.getPostAggregatorSpecs(), query.getContextSortByDimsFirst());
// underlying data isn't changing. (Useful for query reproducibility and offset-based pagination.)
if (isLimited()) {
sortAndLimitFn = results -> new TopNSequence<>(results, ordering, limit + offset);
} else {
sortAndLimitFn = results -> Sequences.sort(results, ordering).limit(limit + offset);
}
} else {
if (isLimited()) {
sortAndLimitFn = results -> results.limit(limit + offset);
} else {
sortAndLimitFn = Functions.identity();
}
}
// Finally, apply offset after sorting and limiting.
if (isOffset()) {
return results -> sortAndLimitFn.apply(results).skip(offset);
} else {
return sortAndLimitFn;
}
}
use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class MaterializedViewUtils method getRequiredFields.
/**
* extract all dimensions in query.
* only support TopNQuery/TimeseriesQuery/GroupByQuery
*
* @param query
* @return dimensions set in query
*/
public static Set<String> getRequiredFields(Query query) {
Set<String> dimsInFilter = null == query.getFilter() ? new HashSet<String>() : query.getFilter().getRequiredColumns();
Set<String> dimensions = new HashSet<>(dimsInFilter);
if (query instanceof TopNQuery) {
TopNQuery q = (TopNQuery) query;
dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
dimensions.add(q.getDimensionSpec().getDimension());
} else if (query instanceof TimeseriesQuery) {
TimeseriesQuery q = (TimeseriesQuery) query;
dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
} else if (query instanceof GroupByQuery) {
GroupByQuery q = (GroupByQuery) query;
dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
for (DimensionSpec spec : q.getDimensions()) {
String dim = spec.getDimension();
dimensions.add(dim);
}
} else {
throw new UnsupportedOperationException("Method getRequiredFields only supports TopNQuery/TimeseriesQuery/GroupByQuery");
}
return dimensions;
}
Aggregations