Search in sources :

Example 16 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class ResultRow method fromLegacyRow.

/**
 * Create a row based on a legacy {@link Row} that was generated by a given {@link GroupByQuery}. This is useful
 * for deserializing rows that have come off the wire in the older format. (In the past, GroupBy query results
 * were sequences of {@link Row}, not ResultRow.)
 *
 * @param row   legacy row
 * @param query query corresponding to the output ResultRow
 */
public static ResultRow fromLegacyRow(Row row, final GroupByQuery query) {
    // Can't be sure if we'll get result rows with or without postaggregations, so be safe.
    final ResultRow resultRow = ResultRow.create(query.getResultRowSizeWithPostAggregators());
    int i = 0;
    if (query.getResultRowHasTimestamp()) {
        resultRow.set(i++, row.getTimestamp().getMillis());
    }
    for (DimensionSpec dimensionSpec : query.getDimensions()) {
        resultRow.set(i++, row.getRaw(dimensionSpec.getOutputName()));
    }
    for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
        resultRow.set(i++, row.getRaw(aggregatorFactory.getName()));
    }
    for (PostAggregator postAggregator : query.getPostAggregatorSpecs()) {
        resultRow.set(i++, row.getRaw(postAggregator.getName()));
    }
    return resultRow;
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory)

Example 17 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class GroupByQueryHelper method toResultRow.

public static ResultRow toResultRow(final GroupByQuery query, final Row row) {
    final ResultRow resultRow = ResultRow.create(query.getResultRowSizeWithPostAggregators());
    int i = 0;
    if (query.getResultRowHasTimestamp()) {
        resultRow.set(i++, row.getTimestampFromEpoch());
    }
    for (DimensionSpec dimensionSpec : query.getDimensions()) {
        resultRow.set(i++, row.getRaw(dimensionSpec.getOutputName()));
    }
    for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
        resultRow.set(i++, row.getRaw(aggregatorFactory.getName()));
    }
    for (PostAggregator postAggregator : query.getPostAggregatorSpecs()) {
        resultRow.set(i++, row.getRaw(postAggregator.getName()));
    }
    return resultRow;
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory)

Example 18 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class GroupByStrategyV1 method processSubqueryResult.

@Override
public Sequence<ResultRow> processSubqueryResult(GroupByQuery subquery, GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> subqueryResult, boolean wasQueryPushedDown) {
    final Set<AggregatorFactory> aggs = new HashSet<>();
    // Nested group-bys work by first running the inner query and then materializing the results in an incremental
    // index which the outer query is then run against. To build the incremental index, we use the fieldNames from
    // the aggregators for the outer query to define the column names so that the index will match the query. If
    // there are multiple types of aggregators in the outer query referencing the same fieldName, we will try to build
    // multiple columns of the same name using different aggregator types and will fail. Here, we permit multiple
    // aggregators of the same type referencing the same fieldName (and skip creating identical columns for the
    // subsequent ones) and return an error if the aggregator types are different.
    final Set<String> dimensionNames = new HashSet<>();
    for (DimensionSpec dimension : subquery.getDimensions()) {
        dimensionNames.add(dimension.getOutputName());
    }
    for (AggregatorFactory aggregatorFactory : query.getAggregatorSpecs()) {
        for (final AggregatorFactory transferAgg : aggregatorFactory.getRequiredColumns()) {
            if (dimensionNames.contains(transferAgg.getName())) {
                // doesn't have this problem.
                continue;
            }
            if (Iterables.any(aggs, new Predicate<AggregatorFactory>() {

                @Override
                public boolean apply(AggregatorFactory agg) {
                    return agg.getName().equals(transferAgg.getName()) && !agg.equals(transferAgg);
                }
            })) {
                throw new IAE("Inner aggregator can currently only be referenced by a single type of outer aggregator" + " for '%s'", transferAgg.getName());
            }
            aggs.add(transferAgg);
        }
    }
    // We need the inner incremental index to have all the columns required by the outer query
    final GroupByQuery innerQuery = new GroupByQuery.Builder(subquery).setAggregatorSpecs(ImmutableList.copyOf(aggs)).setInterval(subquery.getIntervals()).setPostAggregatorSpecs(new ArrayList<>()).build();
    final GroupByQuery outerQuery = new GroupByQuery.Builder(query).setLimitSpec(query.getLimitSpec().merge(subquery.getLimitSpec())).build();
    final IncrementalIndex innerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(innerQuery.withOverriddenContext(ImmutableMap.of(GroupByQueryHelper.CTX_KEY_SORT_RESULTS, true)), subquery, configSupplier.get(), subqueryResult);
    // Outer query might have multiple intervals, but they are expected to be non-overlapping and sorted which
    // is ensured by QuerySegmentSpec.
    // GroupByQueryEngine can only process one interval at a time, so we need to call it once per interval
    // and concatenate the results.
    final IncrementalIndex outerQueryResultIndex = GroupByQueryHelper.makeIncrementalIndex(outerQuery, null, configSupplier.get(), Sequences.concat(Sequences.map(Sequences.simple(outerQuery.getIntervals()), new Function<Interval, Sequence<ResultRow>>() {

        @Override
        public Sequence<ResultRow> apply(Interval interval) {
            return process(outerQuery.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(interval))), new IncrementalIndexStorageAdapter(innerQueryResultIndex));
        }
    })));
    innerQueryResultIndex.close();
    return Sequences.withBaggage(outerQuery.postProcess(GroupByQueryHelper.postAggregate(query, outerQueryResultIndex)), outerQueryResultIndex);
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) ArrayList(java.util.ArrayList) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) Sequence(org.apache.druid.java.util.common.guava.Sequence) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) IAE(org.apache.druid.java.util.common.IAE) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) HashSet(java.util.HashSet) Interval(org.joda.time.Interval)

Example 19 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class DefaultLimitSpec method build.

@Override
public Function<Sequence<ResultRow>, Sequence<ResultRow>> build(final GroupByQuery query) {
    final List<DimensionSpec> dimensions = query.getDimensions();
    // Can avoid re-sorting if the natural ordering is good enough.
    boolean sortingNeeded = dimensions.size() < columns.size();
    final Set<String> aggAndPostAggNames = new HashSet<>();
    for (AggregatorFactory agg : query.getAggregatorSpecs()) {
        aggAndPostAggNames.add(agg.getName());
    }
    for (PostAggregator postAgg : query.getPostAggregatorSpecs()) {
        aggAndPostAggNames.add(postAgg.getName());
    }
    if (!sortingNeeded) {
        for (int i = 0; i < columns.size(); i++) {
            final OrderByColumnSpec columnSpec = columns.get(i);
            if (aggAndPostAggNames.contains(columnSpec.getDimension())) {
                sortingNeeded = true;
                break;
            }
            final ColumnType columnType = getOrderByType(columnSpec, dimensions);
            final StringComparator naturalComparator;
            if (columnType.is(ValueType.STRING)) {
                naturalComparator = StringComparators.LEXICOGRAPHIC;
            } else if (columnType.isNumeric()) {
                naturalComparator = StringComparators.NUMERIC;
            } else if (columnType.isArray()) {
                if (columnType.getElementType().isNumeric()) {
                    naturalComparator = StringComparators.NUMERIC;
                } else {
                    naturalComparator = StringComparators.LEXICOGRAPHIC;
                }
            } else {
                sortingNeeded = true;
                break;
            }
            if (columnSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING || !columnSpec.getDimensionComparator().equals(naturalComparator) || !columnSpec.getDimension().equals(dimensions.get(i).getOutputName())) {
                sortingNeeded = true;
                break;
            }
        }
    }
    if (!sortingNeeded) {
        // If granularity is ALL, sortByDimsFirst doesn't change the sorting order.
        sortingNeeded = !query.getGranularity().equals(Granularities.ALL) && query.getContextSortByDimsFirst();
    }
    if (!sortingNeeded) {
        String timestampField = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD);
        if (timestampField != null && !timestampField.isEmpty()) {
            int timestampResultFieldIndex = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_INDEX);
            sortingNeeded = query.getContextSortByDimsFirst() ? timestampResultFieldIndex != query.getDimensions().size() - 1 : timestampResultFieldIndex != 0;
        }
    }
    final Function<Sequence<ResultRow>, Sequence<ResultRow>> sortAndLimitFn;
    if (sortingNeeded) {
        // Materialize the Comparator first for fast-fail error checking.
        final Ordering<ResultRow> ordering = makeComparator(query.getResultRowSignature(), query.getResultRowHasTimestamp(), query.getDimensions(), query.getAggregatorSpecs(), query.getPostAggregatorSpecs(), query.getContextSortByDimsFirst());
        // underlying data isn't changing. (Useful for query reproducibility and offset-based pagination.)
        if (isLimited()) {
            sortAndLimitFn = results -> new TopNSequence<>(results, ordering, limit + offset);
        } else {
            sortAndLimitFn = results -> Sequences.sort(results, ordering).limit(limit + offset);
        }
    } else {
        if (isLimited()) {
            sortAndLimitFn = results -> results.limit(limit + offset);
        } else {
            sortAndLimitFn = Functions.identity();
        }
    }
    // Finally, apply offset after sorting and limiting.
    if (isOffset()) {
        return results -> sortAndLimitFn.apply(results).skip(offset);
    } else {
        return sortAndLimitFn;
    }
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) Iterables(com.google.common.collect.Iterables) Arrays(java.util.Arrays) ComparableList(org.apache.druid.segment.data.ComparableList) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) Rows(org.apache.druid.data.input.Rows) HashMap(java.util.HashMap) ByteBuffer(java.nio.ByteBuffer) HashSet(java.util.HashSet) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) StringComparators(org.apache.druid.query.ordering.StringComparators) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) Sequences(org.apache.druid.java.util.common.guava.Sequences) Nullable(javax.annotation.Nullable) Functions(com.google.common.base.Functions) Sequence(org.apache.druid.java.util.common.guava.Sequence) Longs(com.google.common.primitives.Longs) Function(com.google.common.base.Function) StringComparator(org.apache.druid.query.ordering.StringComparator) ResultRow(org.apache.druid.query.groupby.ResultRow) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) ValueType(org.apache.druid.segment.column.ValueType) Collectors(java.util.stream.Collectors) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Objects(java.util.Objects) List(java.util.List) Ordering(com.google.common.collect.Ordering) NullHandling(org.apache.druid.common.config.NullHandling) RowSignature(org.apache.druid.segment.column.RowSignature) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) ColumnType(org.apache.druid.segment.column.ColumnType) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) TopNSequence(org.apache.druid.java.util.common.guava.TopNSequence) Collections(java.util.Collections) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) Sequence(org.apache.druid.java.util.common.guava.Sequence) TopNSequence(org.apache.druid.java.util.common.guava.TopNSequence) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringComparator(org.apache.druid.query.ordering.StringComparator) HashSet(java.util.HashSet)

Example 20 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class MaterializedViewUtils method getRequiredFields.

/**
 * extract all dimensions in query.
 * only support TopNQuery/TimeseriesQuery/GroupByQuery
 *
 * @param query
 * @return dimensions set in query
 */
public static Set<String> getRequiredFields(Query query) {
    Set<String> dimsInFilter = null == query.getFilter() ? new HashSet<String>() : query.getFilter().getRequiredColumns();
    Set<String> dimensions = new HashSet<>(dimsInFilter);
    if (query instanceof TopNQuery) {
        TopNQuery q = (TopNQuery) query;
        dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
        dimensions.add(q.getDimensionSpec().getDimension());
    } else if (query instanceof TimeseriesQuery) {
        TimeseriesQuery q = (TimeseriesQuery) query;
        dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
    } else if (query instanceof GroupByQuery) {
        GroupByQuery q = (GroupByQuery) query;
        dimensions.addAll(extractFieldsFromAggregations(q.getAggregatorSpecs()));
        for (DimensionSpec spec : q.getDimensions()) {
            String dim = spec.getDimension();
            dimensions.add(dim);
        }
    } else {
        throw new UnsupportedOperationException("Method getRequiredFields only supports TopNQuery/TimeseriesQuery/GroupByQuery");
    }
    return dimensions;
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) TopNQuery(org.apache.druid.query.topn.TopNQuery) HashSet(java.util.HashSet)

Aggregations

DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)53 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)27 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)20 ArrayList (java.util.ArrayList)19 HashMap (java.util.HashMap)16 Nullable (javax.annotation.Nullable)15 Test (org.junit.Test)15 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)14 MapBasedRow (org.apache.druid.data.input.MapBasedRow)12 Row (org.apache.druid.data.input.Row)12 ISE (org.apache.druid.java.util.common.ISE)12 PostAggregator (org.apache.druid.query.aggregation.PostAggregator)11 Map (java.util.Map)10 ColumnType (org.apache.druid.segment.column.ColumnType)10 List (java.util.List)9 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)9 LongMeanAveragerFactory (org.apache.druid.query.movingaverage.averagers.LongMeanAveragerFactory)9 HashSet (java.util.HashSet)8 Function (com.google.common.base.Function)7 ImmutableList (com.google.common.collect.ImmutableList)7