use of org.apache.druid.query.CacheStrategy in project druid by druid-io.
the class GroupByQueryQueryToolChest method getCacheStrategy.
@Override
public CacheStrategy<ResultRow, Object, GroupByQuery> getCacheStrategy(final GroupByQuery query) {
return new CacheStrategy<ResultRow, Object, GroupByQuery>() {
private static final byte CACHE_STRATEGY_VERSION = 0x1;
private final List<AggregatorFactory> aggs = query.getAggregatorSpecs();
private final List<DimensionSpec> dims = query.getDimensions();
@Override
public boolean isCacheable(GroupByQuery query, boolean willMergeRunners) {
return strategySelector.strategize(query).isCacheable(willMergeRunners);
}
@Override
public byte[] computeCacheKey(GroupByQuery query) {
CacheKeyBuilder builder = new CacheKeyBuilder(GROUPBY_QUERY).appendByte(CACHE_STRATEGY_VERSION).appendCacheable(query.getGranularity()).appendCacheable(query.getDimFilter()).appendCacheables(query.getAggregatorSpecs()).appendCacheables(query.getDimensions()).appendCacheable(query.getVirtualColumns());
if (query.isApplyLimitPushDown()) {
builder.appendCacheable(query.getLimitSpec());
}
return builder.build();
}
@Override
public byte[] computeResultLevelCacheKey(GroupByQuery query) {
final CacheKeyBuilder builder = new CacheKeyBuilder(GROUPBY_QUERY).appendByte(CACHE_STRATEGY_VERSION).appendCacheable(query.getGranularity()).appendCacheable(query.getDimFilter()).appendCacheables(query.getAggregatorSpecs()).appendCacheables(query.getDimensions()).appendCacheable(query.getVirtualColumns()).appendCacheable(query.getHavingSpec()).appendCacheable(query.getLimitSpec()).appendCacheables(query.getPostAggregatorSpecs());
if (query.getSubtotalsSpec() != null && !query.getSubtotalsSpec().isEmpty()) {
for (List<String> subTotalSpec : query.getSubtotalsSpec()) {
builder.appendStrings(subTotalSpec);
}
}
return builder.build();
}
@Override
public TypeReference<Object> getCacheObjectClazz() {
return OBJECT_TYPE_REFERENCE;
}
@Override
public Function<ResultRow, Object> prepareForCache(boolean isResultLevelCache) {
final boolean resultRowHasTimestamp = query.getResultRowHasTimestamp();
return new Function<ResultRow, Object>() {
@Override
public Object apply(ResultRow resultRow) {
final List<Object> retVal = new ArrayList<>(1 + dims.size() + aggs.size());
int inPos = 0;
if (resultRowHasTimestamp) {
retVal.add(resultRow.getLong(inPos++));
} else {
retVal.add(query.getUniversalTimestamp().getMillis());
}
for (int i = 0; i < dims.size(); i++) {
retVal.add(resultRow.get(inPos++));
}
for (int i = 0; i < aggs.size(); i++) {
retVal.add(resultRow.get(inPos++));
}
if (isResultLevelCache) {
for (int i = 0; i < query.getPostAggregatorSpecs().size(); i++) {
retVal.add(resultRow.get(inPos++));
}
}
return retVal;
}
};
}
@Override
public Function<Object, ResultRow> pullFromCache(boolean isResultLevelCache) {
final boolean resultRowHasTimestamp = query.getResultRowHasTimestamp();
final int dimensionStart = query.getResultRowDimensionStart();
final int aggregatorStart = query.getResultRowAggregatorStart();
final int postAggregatorStart = query.getResultRowPostAggregatorStart();
return new Function<Object, ResultRow>() {
private final Granularity granularity = query.getGranularity();
@Override
public ResultRow apply(Object input) {
Iterator<Object> results = ((List<Object>) input).iterator();
DateTime timestamp = granularity.toDateTime(((Number) results.next()).longValue());
final int size = isResultLevelCache ? query.getResultRowSizeWithPostAggregators() : query.getResultRowSizeWithoutPostAggregators();
final ResultRow resultRow = ResultRow.create(size);
if (resultRowHasTimestamp) {
resultRow.set(0, timestamp.getMillis());
}
final Iterator<DimensionSpec> dimsIter = dims.iterator();
int dimPos = 0;
while (dimsIter.hasNext() && results.hasNext()) {
final DimensionSpec dimensionSpec = dimsIter.next();
// Must convert generic Jackson-deserialized type into the proper type.
resultRow.set(dimensionStart + dimPos, DimensionHandlerUtils.convertObjectToType(results.next(), dimensionSpec.getOutputType()));
dimPos++;
}
CacheStrategy.fetchAggregatorsFromCache(aggs, results, isResultLevelCache, (aggName, aggPosition, aggValueObject) -> {
resultRow.set(aggregatorStart + aggPosition, aggValueObject);
});
if (isResultLevelCache) {
Iterator<PostAggregator> postItr = query.getPostAggregatorSpecs().iterator();
int postPos = 0;
while (postItr.hasNext() && results.hasNext()) {
resultRow.set(postAggregatorStart + postPos, results.next());
}
}
if (dimsIter.hasNext() || results.hasNext()) {
throw new ISE("Found left over objects while reading from cache!! dimsIter[%s] results[%s]", dimsIter.hasNext(), results.hasNext());
}
return resultRow;
}
};
}
};
}
use of org.apache.druid.query.CacheStrategy in project druid by druid-io.
the class TopNQueryQueryToolChest method getCacheStrategy.
@Override
public CacheStrategy<Result<TopNResultValue>, Object, TopNQuery> getCacheStrategy(final TopNQuery query) {
return new CacheStrategy<Result<TopNResultValue>, Object, TopNQuery>() {
private final List<AggregatorFactory> aggs = Lists.newArrayList(query.getAggregatorSpecs());
private final List<PostAggregator> postAggs = AggregatorUtil.pruneDependentPostAgg(query.getPostAggregatorSpecs(), query.getTopNMetricSpec().getMetricName(query.getDimensionSpec()));
@Override
public boolean isCacheable(TopNQuery query, boolean willMergeRunners) {
return true;
}
@Override
public byte[] computeCacheKey(TopNQuery query) {
final CacheKeyBuilder builder = new CacheKeyBuilder(TOPN_QUERY).appendCacheable(query.getDimensionSpec()).appendCacheable(query.getTopNMetricSpec()).appendInt(query.getThreshold()).appendCacheable(query.getGranularity()).appendCacheable(query.getDimensionsFilter()).appendCacheables(query.getAggregatorSpecs()).appendCacheable(query.getVirtualColumns());
final List<PostAggregator> postAggregators = prunePostAggregators(query);
if (!postAggregators.isEmpty()) {
// Append post aggregators only when they are used as sort keys.
// Note that appending an empty list produces a different cache key from not appending it.
builder.appendCacheablesIgnoringOrder(postAggregators);
}
return builder.build();
}
@Override
public byte[] computeResultLevelCacheKey(TopNQuery query) {
final CacheKeyBuilder builder = new CacheKeyBuilder(TOPN_QUERY).appendCacheable(query.getDimensionSpec()).appendCacheable(query.getTopNMetricSpec()).appendInt(query.getThreshold()).appendCacheable(query.getGranularity()).appendCacheable(query.getDimensionsFilter()).appendCacheables(query.getAggregatorSpecs()).appendCacheable(query.getVirtualColumns()).appendCacheables(query.getPostAggregatorSpecs());
return builder.build();
}
@Override
public TypeReference<Object> getCacheObjectClazz() {
return OBJECT_TYPE_REFERENCE;
}
@Override
public Function<Result<TopNResultValue>, Object> prepareForCache(boolean isResultLevelCache) {
return new Function<Result<TopNResultValue>, Object>() {
private final String[] aggFactoryNames = extractFactoryName(query.getAggregatorSpecs());
@Override
public Object apply(final Result<TopNResultValue> input) {
List<DimensionAndMetricValueExtractor> results = Lists.newArrayList(input.getValue());
final List<Object> retVal = Lists.newArrayListWithCapacity(results.size() + 1);
// make sure to preserve timezone information when caching results
retVal.add(input.getTimestamp().getMillis());
for (DimensionAndMetricValueExtractor result : results) {
List<Object> vals = Lists.newArrayListWithCapacity(aggFactoryNames.length + 2);
vals.add(result.getDimensionValue(query.getDimensionSpec().getOutputName()));
for (String aggName : aggFactoryNames) {
vals.add(result.getMetric(aggName));
}
if (isResultLevelCache) {
for (PostAggregator postAgg : query.getPostAggregatorSpecs()) {
vals.add(result.getMetric(postAgg.getName()));
}
}
retVal.add(vals);
}
return retVal;
}
};
}
@Override
public Function<Object, Result<TopNResultValue>> pullFromCache(boolean isResultLevelCache) {
return new Function<Object, Result<TopNResultValue>>() {
private final Granularity granularity = query.getGranularity();
@Override
public Result<TopNResultValue> apply(Object input) {
List<Object> results = (List<Object>) input;
List<Map<String, Object>> retVal = Lists.newArrayListWithCapacity(results.size());
Iterator<Object> inputIter = results.iterator();
DateTime timestamp = granularity.toDateTime(((Number) inputIter.next()).longValue());
while (inputIter.hasNext()) {
List<Object> result = (List<Object>) inputIter.next();
final Map<String, Object> vals = Maps.newLinkedHashMap();
Iterator<Object> resultIter = result.iterator();
// Must convert generic Jackson-deserialized type into the proper type.
vals.put(query.getDimensionSpec().getOutputName(), DimensionHandlerUtils.convertObjectToType(resultIter.next(), query.getDimensionSpec().getOutputType()));
CacheStrategy.fetchAggregatorsFromCache(aggs, resultIter, isResultLevelCache, (aggName, aggPos, aggValueObject) -> {
vals.put(aggName, aggValueObject);
});
if (isResultLevelCache) {
Iterator<PostAggregator> postItr = query.getPostAggregatorSpecs().iterator();
while (postItr.hasNext() && resultIter.hasNext()) {
vals.put(postItr.next().getName(), resultIter.next());
}
} else {
for (PostAggregator postAgg : postAggs) {
vals.put(postAgg.getName(), postAgg.compute(vals));
}
}
retVal.add(vals);
}
return new Result<>(timestamp, new TopNResultValue(retVal));
}
};
}
};
}
use of org.apache.druid.query.CacheStrategy in project druid by druid-io.
the class CachingQueryRunner method run.
@Override
public Sequence<T> run(QueryPlus<T> queryPlus, ResponseContext responseContext) {
Query<T> query = queryPlus.getQuery();
final CacheStrategy strategy = toolChest.getCacheStrategy(query);
final boolean populateCache = canPopulateCache(query, strategy);
final boolean useCache = canUseCache(query, strategy);
final Cache.NamedKey key;
if (useCache || populateCache) {
key = CacheUtil.computeSegmentCacheKey(cacheId, alignToActualDataInterval(segmentDescriptor), Bytes.concat(cacheKeyPrefix.get(), strategy.computeCacheKey(query)));
} else {
key = null;
}
if (useCache) {
final Function cacheFn = strategy.pullFromSegmentLevelCache();
final byte[] cachedResult = cache.get(key);
if (cachedResult != null) {
final TypeReference cacheObjectClazz = strategy.getCacheObjectClazz();
return Sequences.map(new BaseSequence<>(new BaseSequence.IteratorMaker<T, Iterator<T>>() {
@Override
public Iterator<T> make() {
try {
if (cachedResult.length == 0) {
return Collections.emptyIterator();
}
return mapper.readValues(mapper.getFactory().createParser(cachedResult), cacheObjectClazz);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public void cleanup(Iterator<T> iterFromMake) {
}
}), cacheFn);
}
}
if (populateCache) {
final Function cacheFn = strategy.prepareForSegmentLevelCache();
return cachePopulator.wrap(base.run(queryPlus, responseContext), value -> cacheFn.apply(value), cache, key);
} else {
return base.run(queryPlus, responseContext);
}
}
use of org.apache.druid.query.CacheStrategy in project druid by druid-io.
the class CachingQueryRunnerTest method testUseCache.
private void testUseCache(List<Result> expectedResults, Query query, QueryToolChest toolchest) throws IOException {
byte[] cacheKeyPrefix = RandomUtils.nextBytes(10);
CacheStrategy cacheStrategy = toolchest.getCacheStrategy(query);
Cache.NamedKey cacheKey = CacheUtil.computeSegmentCacheKey(CACHE_ID, SEGMENT_DESCRIPTOR, Bytes.concat(cacheKeyPrefix, cacheStrategy.computeCacheKey(query)));
Cache cache = MapCache.create(1024 * 1024);
cache.put(cacheKey, toByteArray(Iterables.transform(expectedResults, cacheStrategy.prepareForSegmentLevelCache())));
CachingQueryRunner runner = makeCachingQueryRunner(cacheKeyPrefix, cache, toolchest, Sequences.empty());
Assert.assertTrue(runner.canUseCache(query, toolchest.getCacheStrategy(query)));
List<Result> results = runner.run(QueryPlus.wrap(query)).toList();
Assert.assertEquals(expectedResults.toString(), results.toString());
}
Aggregations