use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class GroupByStrategyV2 method processSubtotalsSpec.
@Override
public Sequence<ResultRow> processSubtotalsSpec(GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> queryResult) {
// How it works?
// First we accumulate the result of top level base query aka queryResult arg inside a resultSupplierOne object.
// Next for each subtotalSpec
// If subtotalSpec is a prefix of top level dims then we iterate on rows in resultSupplierOne object which are still
// sorted by subtotalSpec, stream merge them and return.
//
// If subtotalSpec is not a prefix of top level dims then we create a resultSupplierTwo object filled with rows from
// resultSupplierOne object with only dims from subtotalSpec. Then we iterate on rows in resultSupplierTwo object which are
// of course sorted by subtotalSpec, stream merge them and return.
// Keep a reference to resultSupplier outside the "try" so we can close it if something goes wrong
// while creating the sequence.
GroupByRowProcessor.ResultSupplier resultSupplierOne = null;
try {
// baseSubtotalQuery is the original query with dimensions and aggregators rewritten to apply to the *results*
// rather than *inputs* of that query. It has its virtual columns and dim filter removed, because those only
// make sense when applied to inputs. Finally, it has subtotalsSpec removed, since we'll be computing them
// one-by-one soon enough.
GroupByQuery baseSubtotalQuery = query.withDimensionSpecs(query.getDimensions().stream().map(dimSpec -> new DefaultDimensionSpec(dimSpec.getOutputName(), dimSpec.getOutputName(), dimSpec.getOutputType())).collect(Collectors.toList())).withAggregatorSpecs(query.getAggregatorSpecs().stream().map(AggregatorFactory::getCombiningFactory).collect(Collectors.toList())).withVirtualColumns(VirtualColumns.EMPTY).withDimFilter(null).withSubtotalsSpec(null).withOverriddenContext(ImmutableMap.of(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD, ""));
resultSupplierOne = GroupByRowProcessor.process(baseSubtotalQuery, baseSubtotalQuery, queryResult, configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
List<String> queryDimNames = baseSubtotalQuery.getDimensions().stream().map(DimensionSpec::getOutputName).collect(Collectors.toList());
// Only needed to make LimitSpec.filterColumns(..) call later in case base query has a non default LimitSpec.
Set<String> aggsAndPostAggs = null;
if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) {
aggsAndPostAggs = getAggregatorAndPostAggregatorNames(baseSubtotalQuery);
}
List<List<String>> subtotals = query.getSubtotalsSpec();
List<Sequence<ResultRow>> subtotalsResults = new ArrayList<>(subtotals.size());
// Iterate through each subtotalSpec, build results for it and add to subtotalsResults
for (List<String> subtotalSpec : subtotals) {
final ImmutableSet<String> dimsInSubtotalSpec = ImmutableSet.copyOf(subtotalSpec);
// Dimension spec including dimension name and output name
final List<DimensionSpec> subTotalDimensionSpec = new ArrayList<>(dimsInSubtotalSpec.size());
final List<DimensionSpec> dimensions = query.getDimensions();
for (DimensionSpec dimensionSpec : dimensions) {
if (dimsInSubtotalSpec.contains(dimensionSpec.getOutputName())) {
subTotalDimensionSpec.add(dimensionSpec);
}
}
// Create appropriate LimitSpec for subtotal query
LimitSpec subtotalQueryLimitSpec = NoopLimitSpec.instance();
if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) {
Set<String> columns = new HashSet<>(aggsAndPostAggs);
columns.addAll(subtotalSpec);
subtotalQueryLimitSpec = baseSubtotalQuery.getLimitSpec().filterColumns(columns);
}
GroupByQuery subtotalQuery = baseSubtotalQuery.withLimitSpec(subtotalQueryLimitSpec);
final GroupByRowProcessor.ResultSupplier resultSupplierOneFinal = resultSupplierOne;
if (Utils.isPrefix(subtotalSpec, queryDimNames)) {
// Since subtotalSpec is a prefix of base query dimensions, so results from base query are also sorted
// by subtotalSpec as needed by stream merging.
subtotalsResults.add(processSubtotalsResultAndOptionallyClose(() -> resultSupplierOneFinal, subTotalDimensionSpec, subtotalQuery, false));
} else {
// Since subtotalSpec is not a prefix of base query dimensions, so results from base query are not sorted
// by subtotalSpec. So we first add the result of base query into another resultSupplier which are sorted
// by subtotalSpec and then stream merge them.
// Also note, we can't create the ResultSupplier eagerly here or as we don't want to eagerly allocate
// merge buffers for processing subtotal.
Supplier<GroupByRowProcessor.ResultSupplier> resultSupplierTwo = () -> GroupByRowProcessor.process(baseSubtotalQuery, subtotalQuery, resultSupplierOneFinal.results(subTotalDimensionSpec), configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
subtotalsResults.add(processSubtotalsResultAndOptionallyClose(resultSupplierTwo, subTotalDimensionSpec, subtotalQuery, true));
}
}
return Sequences.withBaggage(query.postProcess(Sequences.concat(subtotalsResults)), // this will close resources allocated by resultSupplierOne after sequence read
resultSupplierOne);
} catch (Throwable e) {
throw CloseableUtils.closeAndWrapInCatch(e, resultSupplierOne);
}
}
use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class SegmentMetadataQueryQueryToolChest method mergeAnalyses.
@VisibleForTesting
public static SegmentAnalysis mergeAnalyses(final SegmentAnalysis arg1, final SegmentAnalysis arg2, boolean lenientAggregatorMerge) {
if (arg1 == null) {
return arg2;
}
if (arg2 == null) {
return arg1;
}
List<Interval> newIntervals = null;
if (arg1.getIntervals() != null) {
newIntervals = new ArrayList<>(arg1.getIntervals());
}
if (arg2.getIntervals() != null) {
if (newIntervals == null) {
newIntervals = new ArrayList<>();
}
newIntervals.addAll(arg2.getIntervals());
}
final Map<String, ColumnAnalysis> leftColumns = arg1.getColumns();
final Map<String, ColumnAnalysis> rightColumns = arg2.getColumns();
Map<String, ColumnAnalysis> columns = new TreeMap<>();
Set<String> rightColumnNames = Sets.newHashSet(rightColumns.keySet());
for (Map.Entry<String, ColumnAnalysis> entry : leftColumns.entrySet()) {
final String columnName = entry.getKey();
columns.put(columnName, entry.getValue().fold(rightColumns.get(columnName)));
rightColumnNames.remove(columnName);
}
for (String columnName : rightColumnNames) {
columns.put(columnName, rightColumns.get(columnName));
}
final Map<String, AggregatorFactory> aggregators = new HashMap<>();
if (lenientAggregatorMerge) {
// Merge each aggregator individually, ignoring nulls
for (SegmentAnalysis analysis : ImmutableList.of(arg1, arg2)) {
if (analysis.getAggregators() != null) {
for (Map.Entry<String, AggregatorFactory> entry : analysis.getAggregators().entrySet()) {
final String aggregatorName = entry.getKey();
final AggregatorFactory aggregator = entry.getValue();
AggregatorFactory merged = aggregators.get(aggregatorName);
if (merged != null) {
try {
merged = merged.getMergingFactory(aggregator);
} catch (AggregatorFactoryNotMergeableException e) {
merged = null;
}
} else {
merged = aggregator;
}
aggregators.put(aggregatorName, merged);
}
}
}
} else {
final AggregatorFactory[] aggs1 = arg1.getAggregators() != null ? arg1.getAggregators().values().toArray(new AggregatorFactory[0]) : null;
final AggregatorFactory[] aggs2 = arg2.getAggregators() != null ? arg2.getAggregators().values().toArray(new AggregatorFactory[0]) : null;
final AggregatorFactory[] merged = AggregatorFactory.mergeAggregators(Arrays.asList(aggs1, aggs2));
if (merged != null) {
for (AggregatorFactory aggregator : merged) {
aggregators.put(aggregator.getName(), aggregator);
}
}
}
final TimestampSpec timestampSpec = TimestampSpec.mergeTimestampSpec(Lists.newArrayList(arg1.getTimestampSpec(), arg2.getTimestampSpec()));
final Granularity queryGranularity = Granularity.mergeGranularities(Lists.newArrayList(arg1.getQueryGranularity(), arg2.getQueryGranularity()));
final String mergedId;
if (arg1.getId() != null && arg2.getId() != null && arg1.getId().equals(arg2.getId())) {
mergedId = arg1.getId();
} else {
mergedId = "merged";
}
final Boolean rollup;
if (arg1.isRollup() != null && arg2.isRollup() != null && arg1.isRollup().equals(arg2.isRollup())) {
rollup = arg1.isRollup();
} else {
rollup = null;
}
return new SegmentAnalysis(mergedId, newIntervals, columns, arg1.getSize() + arg2.getSize(), arg1.getNumRows() + arg2.getNumRows(), aggregators.isEmpty() ? null : aggregators, timestampSpec, queryGranularity, rollup);
}
use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class SegmentMetadataQueryRunnerFactory method createRunner.
@Override
public QueryRunner<SegmentAnalysis> createRunner(final Segment segment) {
return new QueryRunner<SegmentAnalysis>() {
@Override
public Sequence<SegmentAnalysis> run(QueryPlus<SegmentAnalysis> inQ, ResponseContext responseContext) {
SegmentMetadataQuery updatedQuery = ((SegmentMetadataQuery) inQ.getQuery()).withFinalizedAnalysisTypes(toolChest.getConfig());
final SegmentAnalyzer analyzer = new SegmentAnalyzer(updatedQuery.getAnalysisTypes());
final Map<String, ColumnAnalysis> analyzedColumns = analyzer.analyze(segment);
final long numRows = analyzer.numRows(segment);
long totalSize = 0;
if (analyzer.analyzingSize()) {
// Initialize with the size of the whitespace, 1 byte per
totalSize = analyzedColumns.size() * numRows;
}
Map<String, ColumnAnalysis> columns = new TreeMap<>();
ColumnIncluderator includerator = updatedQuery.getToInclude();
for (Map.Entry<String, ColumnAnalysis> entry : analyzedColumns.entrySet()) {
final String columnName = entry.getKey();
final ColumnAnalysis column = entry.getValue();
if (!column.isError()) {
totalSize += column.getSize();
}
if (includerator.include(columnName)) {
columns.put(columnName, column);
}
}
List<Interval> retIntervals = updatedQuery.analyzingInterval() ? Collections.singletonList(segment.getDataInterval()) : null;
final Map<String, AggregatorFactory> aggregators;
Metadata metadata = null;
if (updatedQuery.hasAggregators()) {
metadata = segment.asStorageAdapter().getMetadata();
if (metadata != null && metadata.getAggregators() != null) {
aggregators = new HashMap<>();
for (AggregatorFactory aggregator : metadata.getAggregators()) {
aggregators.put(aggregator.getName(), aggregator);
}
} else {
aggregators = null;
}
} else {
aggregators = null;
}
final TimestampSpec timestampSpec;
if (updatedQuery.hasTimestampSpec()) {
if (metadata == null) {
metadata = segment.asStorageAdapter().getMetadata();
}
timestampSpec = metadata != null ? metadata.getTimestampSpec() : null;
} else {
timestampSpec = null;
}
final Granularity queryGranularity;
if (updatedQuery.hasQueryGranularity()) {
if (metadata == null) {
metadata = segment.asStorageAdapter().getMetadata();
}
queryGranularity = metadata != null ? metadata.getQueryGranularity() : null;
} else {
queryGranularity = null;
}
Boolean rollup = null;
if (updatedQuery.hasRollup()) {
if (metadata == null) {
metadata = segment.asStorageAdapter().getMetadata();
}
rollup = metadata != null ? metadata.isRollup() : null;
if (rollup == null) {
// in this case, this segment is built before no-rollup function is coded,
// thus it is built with rollup
rollup = Boolean.TRUE;
}
}
return Sequences.simple(Collections.singletonList(new SegmentAnalysis(segment.getId().toString(), retIntervals, columns, totalSize, numRows, aggregators, timestampSpec, queryGranularity, rollup)));
}
};
}
use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class TimeseriesQueryQueryToolChest method getCacheStrategy.
@Override
public CacheStrategy<Result<TimeseriesResultValue>, Object, TimeseriesQuery> getCacheStrategy(final TimeseriesQuery query) {
return new CacheStrategy<Result<TimeseriesResultValue>, Object, TimeseriesQuery>() {
private final List<AggregatorFactory> aggs = query.getAggregatorSpecs();
@Override
public boolean isCacheable(TimeseriesQuery query, boolean willMergeRunners) {
return true;
}
@Override
public byte[] computeCacheKey(TimeseriesQuery query) {
return new CacheKeyBuilder(TIMESERIES_QUERY).appendBoolean(query.isDescending()).appendBoolean(query.isSkipEmptyBuckets()).appendCacheable(query.getGranularity()).appendCacheable(query.getDimensionsFilter()).appendCacheables(query.getAggregatorSpecs()).appendCacheable(query.getVirtualColumns()).appendInt(query.getLimit()).build();
}
@Override
public byte[] computeResultLevelCacheKey(TimeseriesQuery query) {
final CacheKeyBuilder builder = new CacheKeyBuilder(TIMESERIES_QUERY).appendBoolean(query.isDescending()).appendBoolean(query.isSkipEmptyBuckets()).appendCacheable(query.getGranularity()).appendCacheable(query.getDimensionsFilter()).appendCacheables(query.getAggregatorSpecs()).appendCacheable(query.getVirtualColumns()).appendCacheables(query.getPostAggregatorSpecs()).appendInt(query.getLimit()).appendString(query.getTimestampResultField()).appendBoolean(query.isGrandTotal());
return builder.build();
}
@Override
public TypeReference<Object> getCacheObjectClazz() {
return OBJECT_TYPE_REFERENCE;
}
@Override
public Function<Result<TimeseriesResultValue>, Object> prepareForCache(boolean isResultLevelCache) {
return input -> {
TimeseriesResultValue results = input.getValue();
final List<Object> retVal = Lists.newArrayListWithCapacity(1 + aggs.size());
// Timestamp can be null if grandTotal is true.
if (isResultLevelCache) {
retVal.add(input.getTimestamp() == null ? null : input.getTimestamp().getMillis());
} else {
retVal.add(Preconditions.checkNotNull(input.getTimestamp(), "timestamp of input[%s]", input).getMillis());
}
for (AggregatorFactory agg : aggs) {
retVal.add(results.getMetric(agg.getName()));
}
if (isResultLevelCache) {
for (PostAggregator postAgg : query.getPostAggregatorSpecs()) {
retVal.add(results.getMetric(postAgg.getName()));
}
}
return retVal;
};
}
@Override
public Function<Object, Result<TimeseriesResultValue>> pullFromCache(boolean isResultLevelCache) {
return new Function<Object, Result<TimeseriesResultValue>>() {
private final Granularity granularity = query.getGranularity();
@Override
public Result<TimeseriesResultValue> apply(Object input) {
List<Object> results = (List<Object>) input;
final Map<String, Object> retVal = Maps.newLinkedHashMap();
Iterator<Object> resultIter = results.iterator();
final Number timestampNumber = (Number) resultIter.next();
final DateTime timestamp;
if (isResultLevelCache) {
timestamp = timestampNumber == null ? null : granularity.toDateTime(timestampNumber.longValue());
} else {
timestamp = granularity.toDateTime(Preconditions.checkNotNull(timestampNumber, "timestamp").longValue());
}
CacheStrategy.fetchAggregatorsFromCache(aggs, resultIter, isResultLevelCache, (aggName, aggPosition, aggValueObject) -> {
retVal.put(aggName, aggValueObject);
});
if (isResultLevelCache) {
Iterator<PostAggregator> postItr = query.getPostAggregatorSpecs().iterator();
while (postItr.hasNext() && resultIter.hasNext()) {
retVal.put(postItr.next().getName(), resultIter.next());
}
}
return new Result<>(timestamp, new TimeseriesResultValue(retVal));
}
};
}
};
}
use of org.apache.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class QueriesTest method testVerifyAggregationsMissingVal.
@Test
public void testVerifyAggregationsMissingVal() {
List<AggregatorFactory> aggFactories = Arrays.asList(new CountAggregatorFactory("count"), new DoubleSumAggregatorFactory("idx", "index"), new DoubleSumAggregatorFactory("rev", "revenue"));
List<PostAggregator> postAggs = Collections.singletonList(new ArithmeticPostAggregator("addStuff", "+", Arrays.asList(new FieldAccessPostAggregator("idx", "idx2"), new FieldAccessPostAggregator("count", "count"))));
boolean exceptionOccured = false;
try {
Queries.prepareAggregations(ImmutableList.of(), aggFactories, postAggs);
} catch (IllegalArgumentException e) {
exceptionOccured = true;
}
Assert.assertTrue(exceptionOccured);
}
Aggregations