Search in sources :

Example 46 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class GroupByBinaryFnV2 method apply.

@Override
@Nullable
public ResultRow apply(@Nullable final ResultRow arg1, @Nullable final ResultRow arg2) {
    if (arg1 == null) {
        return arg2;
    } else if (arg2 == null) {
        return arg1;
    }
    final ResultRow newResult = ResultRow.create(query.getResultRowSizeWithoutPostAggregators());
    // Add timestamp.
    if (query.getResultRowHasTimestamp()) {
        newResult.set(0, adjustTimestamp(arg1));
    }
    // Add dimensions.
    final int dimensionStart = query.getResultRowDimensionStart();
    final List<DimensionSpec> dimensions = query.getDimensions();
    for (int i = 0; i < dimensions.size(); i++) {
        final int rowIndex = dimensionStart + i;
        newResult.set(rowIndex, arg1.get(rowIndex));
    }
    // Add aggregations.
    final int aggregatorStart = query.getResultRowAggregatorStart();
    final List<AggregatorFactory> aggregatorSpecs = query.getAggregatorSpecs();
    for (int i = 0; i < aggregatorSpecs.size(); i++) {
        final AggregatorFactory aggregatorFactory = aggregatorSpecs.get(i);
        final int rowIndex = aggregatorStart + i;
        newResult.set(rowIndex, aggregatorFactory.combine(arg1.get(rowIndex), arg2.get(rowIndex)));
    }
    return newResult;
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Nullable(javax.annotation.Nullable)

Example 47 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class GroupByQueryHelper method createIndexAccumulatorPair.

public static <T> Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> createIndexAccumulatorPair(final GroupByQuery query, @Nullable final GroupByQuery subquery, final GroupByQueryConfig config) {
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final Granularity gran = query.getGranularity();
    final long timeStart = query.getIntervals().get(0).getStartMillis();
    final boolean combine = subquery == null;
    long granTimeStart = timeStart;
    if (!(Granularities.ALL.equals(gran))) {
        granTimeStart = gran.bucketStart(timeStart);
    }
    final List<AggregatorFactory> aggs;
    if (combine) {
        aggs = Lists.transform(query.getAggregatorSpecs(), new Function<AggregatorFactory, AggregatorFactory>() {

            @Override
            public AggregatorFactory apply(AggregatorFactory input) {
                return input.getCombiningFactory();
            }
        });
    } else {
        aggs = query.getAggregatorSpecs();
    }
    final List<String> dimensions = Lists.transform(query.getDimensions(), new Function<DimensionSpec, String>() {

        @Override
        public String apply(DimensionSpec input) {
            return input.getOutputName();
        }
    });
    final IncrementalIndex index;
    final boolean sortResults = query.getContextValue(CTX_KEY_SORT_RESULTS, true);
    // All groupBy dimensions are strings, for now.
    final List<DimensionSchema> dimensionSchemas = new ArrayList<>();
    for (DimensionSpec dimension : query.getDimensions()) {
        dimensionSchemas.add(new StringDimensionSchema(dimension.getOutputName()));
    }
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(dimensionSchemas)).withMetrics(aggs.toArray(new AggregatorFactory[0])).withQueryGranularity(gran).withMinTimestamp(granTimeStart).build();
    final AppendableIndexBuilder indexBuilder;
    if (query.getContextValue("useOffheap", false)) {
        throw new UnsupportedOperationException("The 'useOffheap' option is no longer available for groupBy v1. Please move to the newer groupBy engine, " + "which always operates off-heap, by removing any custom 'druid.query.groupBy.defaultStrategy' runtime " + "properties and 'groupByStrategy' query context parameters that you have set.");
    } else {
        indexBuilder = new OnheapIncrementalIndex.Builder();
    }
    index = indexBuilder.setIndexSchema(indexSchema).setDeserializeComplexMetrics(false).setConcurrentEventAdd(true).setSortFacts(sortResults).setMaxRowCount(querySpecificConfig.getMaxResults()).build();
    Accumulator<IncrementalIndex, T> accumulator = new Accumulator<IncrementalIndex, T>() {

        @Override
        public IncrementalIndex accumulate(IncrementalIndex accumulated, T in) {
            final MapBasedRow mapBasedRow;
            if (in instanceof MapBasedRow) {
                mapBasedRow = (MapBasedRow) in;
            } else if (in instanceof ResultRow) {
                final ResultRow row = (ResultRow) in;
                mapBasedRow = row.toMapBasedRow(combine ? query : subquery);
            } else {
                throw new ISE("Unable to accumulate something of type [%s]", in.getClass());
            }
            try {
                accumulated.add(new MapBasedInputRow(mapBasedRow.getTimestamp(), dimensions, mapBasedRow.getEvent()));
            } catch (IndexSizeExceededException e) {
                throw new ResourceLimitExceededException(e.getMessage());
            }
            return accumulated;
        }
    };
    return new Pair<>(index, accumulator);
}
Also used : Accumulator(org.apache.druid.java.util.common.guava.Accumulator) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) AppendableIndexBuilder(org.apache.druid.segment.incremental.AppendableIndexBuilder) ArrayList(java.util.ArrayList) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) Granularity(org.apache.druid.java.util.common.granularity.Granularity) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) MapBasedRow(org.apache.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) ISE(org.apache.druid.java.util.common.ISE) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) Pair(org.apache.druid.java.util.common.Pair) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Example 48 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class DefaultLimitSpec method makeComparator.

private Ordering<ResultRow> makeComparator(RowSignature rowSignature, boolean hasTimestamp, List<DimensionSpec> dimensions, List<AggregatorFactory> aggs, List<PostAggregator> postAggs, boolean sortByDimsFirst) {
    final Ordering<ResultRow> timeOrdering;
    if (hasTimestamp) {
        timeOrdering = new Ordering<ResultRow>() {

            @Override
            public int compare(ResultRow left, ResultRow right) {
                return Longs.compare(left.getLong(0), right.getLong(0));
            }
        };
    } else {
        timeOrdering = null;
    }
    Map<String, DimensionSpec> dimensionsMap = new HashMap<>();
    for (DimensionSpec spec : dimensions) {
        dimensionsMap.put(spec.getOutputName(), spec);
    }
    Map<String, AggregatorFactory> aggregatorsMap = new HashMap<>();
    for (final AggregatorFactory agg : aggs) {
        aggregatorsMap.put(agg.getName(), agg);
    }
    Map<String, PostAggregator> postAggregatorsMap = new HashMap<>();
    for (PostAggregator postAgg : postAggs) {
        postAggregatorsMap.put(postAgg.getName(), postAgg);
    }
    Ordering<ResultRow> ordering = null;
    for (OrderByColumnSpec columnSpec : columns) {
        String columnName = columnSpec.getDimension();
        Ordering<ResultRow> nextOrdering = null;
        final int columnIndex = rowSignature.indexOf(columnName);
        if (columnIndex >= 0) {
            if (postAggregatorsMap.containsKey(columnName)) {
                // noinspection unchecked
                nextOrdering = metricOrdering(columnIndex, postAggregatorsMap.get(columnName).getComparator());
            } else if (aggregatorsMap.containsKey(columnName)) {
                // noinspection unchecked
                nextOrdering = metricOrdering(columnIndex, aggregatorsMap.get(columnName).getComparator());
            } else if (dimensionsMap.containsKey(columnName)) {
                Optional<DimensionSpec> dimensionSpec = dimensions.stream().filter(ds -> ds.getOutputName().equals(columnName)).findFirst();
                if (!dimensionSpec.isPresent()) {
                    throw new ISE("Could not find the dimension spec for ordering column %s", columnName);
                }
                nextOrdering = dimensionOrdering(columnIndex, dimensionSpec.get().getOutputType(), columnSpec.getDimensionComparator());
            }
        }
        if (nextOrdering == null) {
            throw new ISE("Unknown column in order clause[%s]", columnSpec);
        }
        if (columnSpec.getDirection() == OrderByColumnSpec.Direction.DESCENDING) {
            nextOrdering = nextOrdering.reverse();
        }
        ordering = ordering == null ? nextOrdering : ordering.compound(nextOrdering);
    }
    if (ordering == null) {
        ordering = timeOrdering;
    } else if (timeOrdering != null) {
        ordering = sortByDimsFirst ? ordering.compound(timeOrdering) : timeOrdering.compound(ordering);
    }
    // noinspection unchecked
    return ordering != null ? ordering : (Ordering) Ordering.allEqual();
}
Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) Iterables(com.google.common.collect.Iterables) Arrays(java.util.Arrays) ComparableList(org.apache.druid.segment.data.ComparableList) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) Rows(org.apache.druid.data.input.Rows) HashMap(java.util.HashMap) ByteBuffer(java.nio.ByteBuffer) HashSet(java.util.HashSet) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) StringComparators(org.apache.druid.query.ordering.StringComparators) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) Sequences(org.apache.druid.java.util.common.guava.Sequences) Nullable(javax.annotation.Nullable) Functions(com.google.common.base.Functions) Sequence(org.apache.druid.java.util.common.guava.Sequence) Longs(com.google.common.primitives.Longs) Function(com.google.common.base.Function) StringComparator(org.apache.druid.query.ordering.StringComparator) ResultRow(org.apache.druid.query.groupby.ResultRow) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) ValueType(org.apache.druid.segment.column.ValueType) Collectors(java.util.stream.Collectors) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Objects(java.util.Objects) List(java.util.List) Ordering(com.google.common.collect.Ordering) NullHandling(org.apache.druid.common.config.NullHandling) RowSignature(org.apache.druid.segment.column.RowSignature) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) ColumnType(org.apache.druid.segment.column.ColumnType) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) TopNSequence(org.apache.druid.java.util.common.guava.TopNSequence) Collections(java.util.Collections) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) HashMap(java.util.HashMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) ISE(org.apache.druid.java.util.common.ISE)

Example 49 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class QueryableIndexVectorColumnSelectorFactory method makeSingleValueDimensionSelector.

@Override
public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(final DimensionSpec dimensionSpec) {
    if (!dimensionSpec.canVectorize()) {
        throw new ISE("DimensionSpec[%s] cannot be vectorized", dimensionSpec);
    }
    Function<DimensionSpec, SingleValueDimensionVectorSelector> mappingFunction = spec -> {
        if (virtualColumns.exists(spec.getDimension())) {
            SingleValueDimensionVectorSelector dimensionSelector = virtualColumns.makeSingleValueDimensionVectorSelector(dimensionSpec, index, offset);
            if (dimensionSelector == null) {
                return virtualColumns.makeSingleValueDimensionVectorSelector(dimensionSpec, this);
            } else {
                return dimensionSelector;
            }
        }
        final ColumnHolder holder = index.getColumnHolder(spec.getDimension());
        if (holder == null || !holder.getCapabilities().isDictionaryEncoded().isTrue() || !holder.getCapabilities().is(ValueType.STRING)) {
            // Asking for a single-value dimension selector on a non-string column gets you a bunch of nulls.
            return NilVectorSelector.create(offset);
        }
        if (holder.getCapabilities().hasMultipleValues().isMaybeTrue()) {
            // Asking for a single-value dimension selector on a multi-value column gets you an error.
            throw new ISE("Column[%s] is multi-value, do not ask for a single-value selector", spec.getDimension());
        }
        @SuppressWarnings("unchecked") final DictionaryEncodedColumn<String> dictionaryEncodedColumn = (DictionaryEncodedColumn<String>) getCachedColumn(spec.getDimension());
        // dictionaryEncodedColumn is not null because of holder null check above
        assert dictionaryEncodedColumn != null;
        final SingleValueDimensionVectorSelector selector = dictionaryEncodedColumn.makeSingleValueDimensionVectorSelector(offset);
        return spec.decorate(selector);
    };
    // We cannot use computeIfAbsent() here since the function being applied may modify the cache itself through
    // virtual column references, triggering a ConcurrentModificationException in JDK 9 and above.
    SingleValueDimensionVectorSelector selector = singleValueDimensionSelectorCache.get(dimensionSpec);
    if (selector == null) {
        selector = mappingFunction.apply(dimensionSpec);
        singleValueDimensionSelectorCache.put(dimensionSpec, selector);
    }
    return selector;
}
Also used : QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) VirtualColumns(org.apache.druid.segment.VirtualColumns) DictionaryEncodedColumn(org.apache.druid.segment.column.DictionaryEncodedColumn) Closer(org.apache.druid.java.util.common.io.Closer) QueryableIndex(org.apache.druid.segment.QueryableIndex) ISE(org.apache.druid.java.util.common.ISE) ValueType(org.apache.druid.segment.column.ValueType) HashMap(java.util.HashMap) Function(java.util.function.Function) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) Map(java.util.Map) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) Nullable(javax.annotation.Nullable) BaseColumn(org.apache.druid.segment.column.BaseColumn) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ISE(org.apache.druid.java.util.common.ISE) DictionaryEncodedColumn(org.apache.druid.segment.column.DictionaryEncodedColumn)

Example 50 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class QueryableIndexVectorColumnSelectorFactory method makeMultiValueDimensionSelector.

@Override
public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(final DimensionSpec dimensionSpec) {
    if (!dimensionSpec.canVectorize()) {
        throw new ISE("DimensionSpec[%s] cannot be vectorized", dimensionSpec);
    }
    Function<DimensionSpec, MultiValueDimensionVectorSelector> mappingFunction = spec -> {
        if (virtualColumns.exists(spec.getDimension())) {
            MultiValueDimensionVectorSelector dimensionSelector = virtualColumns.makeMultiValueDimensionVectorSelector(dimensionSpec, index, offset);
            if (dimensionSelector == null) {
                return virtualColumns.makeMultiValueDimensionVectorSelector(dimensionSpec, this);
            } else {
                return dimensionSelector;
            }
        }
        final ColumnHolder holder = index.getColumnHolder(spec.getDimension());
        if (holder == null || holder.getCapabilities().isDictionaryEncoded().isFalse() || !holder.getCapabilities().is(ValueType.STRING) || holder.getCapabilities().hasMultipleValues().isFalse()) {
            throw new ISE("Column[%s] is not a multi-value string column, do not ask for a multi-value selector", spec.getDimension());
        }
        @SuppressWarnings("unchecked") final DictionaryEncodedColumn<String> dictionaryEncodedColumn = (DictionaryEncodedColumn<String>) getCachedColumn(spec.getDimension());
        // dictionaryEncodedColumn is not null because of holder null check above
        assert dictionaryEncodedColumn != null;
        final MultiValueDimensionVectorSelector selector = dictionaryEncodedColumn.makeMultiValueDimensionVectorSelector(offset);
        return spec.decorate(selector);
    };
    // We cannot use computeIfAbsent() here since the function being applied may modify the cache itself through
    // virtual column references, triggering a ConcurrentModificationException in JDK 9 and above.
    MultiValueDimensionVectorSelector selector = multiValueDimensionSelectorCache.get(dimensionSpec);
    if (selector == null) {
        selector = mappingFunction.apply(dimensionSpec);
        multiValueDimensionSelectorCache.put(dimensionSpec, selector);
    }
    return selector;
}
Also used : QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) VirtualColumns(org.apache.druid.segment.VirtualColumns) DictionaryEncodedColumn(org.apache.druid.segment.column.DictionaryEncodedColumn) Closer(org.apache.druid.java.util.common.io.Closer) QueryableIndex(org.apache.druid.segment.QueryableIndex) ISE(org.apache.druid.java.util.common.ISE) ValueType(org.apache.druid.segment.column.ValueType) HashMap(java.util.HashMap) Function(java.util.function.Function) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) Map(java.util.Map) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) Nullable(javax.annotation.Nullable) BaseColumn(org.apache.druid.segment.column.BaseColumn) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ISE(org.apache.druid.java.util.common.ISE) DictionaryEncodedColumn(org.apache.druid.segment.column.DictionaryEncodedColumn)

Aggregations

DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)53 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)27 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)20 ArrayList (java.util.ArrayList)19 HashMap (java.util.HashMap)16 Nullable (javax.annotation.Nullable)15 Test (org.junit.Test)15 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)14 MapBasedRow (org.apache.druid.data.input.MapBasedRow)12 Row (org.apache.druid.data.input.Row)12 ISE (org.apache.druid.java.util.common.ISE)12 PostAggregator (org.apache.druid.query.aggregation.PostAggregator)11 Map (java.util.Map)10 ColumnType (org.apache.druid.segment.column.ColumnType)10 List (java.util.List)9 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)9 LongMeanAveragerFactory (org.apache.druid.query.movingaverage.averagers.LongMeanAveragerFactory)9 HashSet (java.util.HashSet)8 Function (com.google.common.base.Function)7 ImmutableList (com.google.common.collect.ImmutableList)7