Search in sources :

Example 1 with CacheStrategy

use of io.druid.query.CacheStrategy in project druid by druid-io.

the class GroupByQueryQueryToolChest method getCacheStrategy.

@Override
public CacheStrategy<Row, Object, GroupByQuery> getCacheStrategy(final GroupByQuery query) {
    return new CacheStrategy<Row, Object, GroupByQuery>() {

        private static final byte CACHE_STRATEGY_VERSION = 0x1;

        private final List<AggregatorFactory> aggs = query.getAggregatorSpecs();

        private final List<DimensionSpec> dims = query.getDimensions();

        @Override
        public boolean isCacheable(GroupByQuery query, boolean willMergeRunners) {
            return strategySelector.strategize(query).isCacheable(willMergeRunners);
        }

        @Override
        public byte[] computeCacheKey(GroupByQuery query) {
            return new CacheKeyBuilder(GROUPBY_QUERY).appendByte(CACHE_STRATEGY_VERSION).appendCacheable(query.getGranularity()).appendCacheable(query.getDimFilter()).appendCacheablesIgnoringOrder(query.getAggregatorSpecs()).appendCacheablesIgnoringOrder(query.getDimensions()).appendCacheable(query.getVirtualColumns()).build();
        }

        @Override
        public TypeReference<Object> getCacheObjectClazz() {
            return OBJECT_TYPE_REFERENCE;
        }

        @Override
        public Function<Row, Object> prepareForCache() {
            return new Function<Row, Object>() {

                @Override
                public Object apply(Row input) {
                    if (input instanceof MapBasedRow) {
                        final MapBasedRow row = (MapBasedRow) input;
                        final List<Object> retVal = Lists.newArrayListWithCapacity(1 + dims.size() + aggs.size());
                        retVal.add(row.getTimestamp().getMillis());
                        Map<String, Object> event = row.getEvent();
                        for (DimensionSpec dim : dims) {
                            retVal.add(event.get(dim.getOutputName()));
                        }
                        for (AggregatorFactory agg : aggs) {
                            retVal.add(event.get(agg.getName()));
                        }
                        return retVal;
                    }
                    throw new ISE("Don't know how to cache input rows of type[%s]", input.getClass());
                }
            };
        }

        @Override
        public Function<Object, Row> pullFromCache() {
            return new Function<Object, Row>() {

                private final Granularity granularity = query.getGranularity();

                @Override
                public Row apply(Object input) {
                    Iterator<Object> results = ((List<Object>) input).iterator();
                    DateTime timestamp = granularity.toDateTime(((Number) results.next()).longValue());
                    Map<String, Object> event = Maps.newLinkedHashMap();
                    Iterator<DimensionSpec> dimsIter = dims.iterator();
                    while (dimsIter.hasNext() && results.hasNext()) {
                        final DimensionSpec factory = dimsIter.next();
                        event.put(factory.getOutputName(), results.next());
                    }
                    Iterator<AggregatorFactory> aggsIter = aggs.iterator();
                    while (aggsIter.hasNext() && results.hasNext()) {
                        final AggregatorFactory factory = aggsIter.next();
                        event.put(factory.getName(), factory.deserialize(results.next()));
                    }
                    if (dimsIter.hasNext() || aggsIter.hasNext() || results.hasNext()) {
                        throw new ISE("Found left over objects while reading from cache!! dimsIter[%s] aggsIter[%s] results[%s]", dimsIter.hasNext(), aggsIter.hasNext(), results.hasNext());
                    }
                    return new MapBasedRow(timestamp, event);
                }
            };
        }
    };
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) CacheKeyBuilder(io.druid.query.cache.CacheKeyBuilder) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Granularity(io.druid.java.util.common.granularity.Granularity) DateTime(org.joda.time.DateTime) MapBasedRow(io.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) ArrayList(java.util.ArrayList) List(java.util.List) ISE(io.druid.java.util.common.ISE) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow) CacheStrategy(io.druid.query.CacheStrategy)

Example 2 with CacheStrategy

use of io.druid.query.CacheStrategy in project druid by druid-io.

the class TimeseriesQueryQueryToolChest method getCacheStrategy.

@Override
public CacheStrategy<Result<TimeseriesResultValue>, Object, TimeseriesQuery> getCacheStrategy(final TimeseriesQuery query) {
    return new CacheStrategy<Result<TimeseriesResultValue>, Object, TimeseriesQuery>() {

        private final List<AggregatorFactory> aggs = query.getAggregatorSpecs();

        @Override
        public boolean isCacheable(TimeseriesQuery query, boolean willMergeRunners) {
            return true;
        }

        @Override
        public byte[] computeCacheKey(TimeseriesQuery query) {
            return new CacheKeyBuilder(TIMESERIES_QUERY).appendBoolean(query.isDescending()).appendBoolean(query.isSkipEmptyBuckets()).appendCacheable(query.getGranularity()).appendCacheable(query.getDimensionsFilter()).appendCacheablesIgnoringOrder(query.getAggregatorSpecs()).appendCacheable(query.getVirtualColumns()).build();
        }

        @Override
        public TypeReference<Object> getCacheObjectClazz() {
            return OBJECT_TYPE_REFERENCE;
        }

        @Override
        public Function<Result<TimeseriesResultValue>, Object> prepareForCache() {
            return new Function<Result<TimeseriesResultValue>, Object>() {

                @Override
                public Object apply(final Result<TimeseriesResultValue> input) {
                    TimeseriesResultValue results = input.getValue();
                    final List<Object> retVal = Lists.newArrayListWithCapacity(1 + aggs.size());
                    retVal.add(input.getTimestamp().getMillis());
                    for (AggregatorFactory agg : aggs) {
                        retVal.add(results.getMetric(agg.getName()));
                    }
                    return retVal;
                }
            };
        }

        @Override
        public Function<Object, Result<TimeseriesResultValue>> pullFromCache() {
            return new Function<Object, Result<TimeseriesResultValue>>() {

                private final Granularity granularity = query.getGranularity();

                @Override
                public Result<TimeseriesResultValue> apply(@Nullable Object input) {
                    List<Object> results = (List<Object>) input;
                    Map<String, Object> retVal = Maps.newLinkedHashMap();
                    Iterator<AggregatorFactory> aggsIter = aggs.iterator();
                    Iterator<Object> resultIter = results.iterator();
                    DateTime timestamp = granularity.toDateTime(((Number) resultIter.next()).longValue());
                    while (aggsIter.hasNext() && resultIter.hasNext()) {
                        final AggregatorFactory factory = aggsIter.next();
                        retVal.put(factory.getName(), factory.deserialize(resultIter.next()));
                    }
                    return new Result<TimeseriesResultValue>(timestamp, new TimeseriesResultValue(retVal));
                }
            };
        }
    };
}
Also used : CacheKeyBuilder(io.druid.query.cache.CacheKeyBuilder) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Granularity(io.druid.java.util.common.granularity.Granularity) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) Function(com.google.common.base.Function) List(java.util.List) CacheStrategy(io.druid.query.CacheStrategy) Nullable(javax.annotation.Nullable)

Example 3 with CacheStrategy

use of io.druid.query.CacheStrategy in project druid by druid-io.

the class CachingQueryRunner method run.

@Override
public Sequence<T> run(Query<T> query, Map<String, Object> responseContext) {
    final CacheStrategy strategy = toolChest.getCacheStrategy(query);
    final boolean populateCache = CacheUtil.populateCacheOnDataNodes(query, strategy, cacheConfig);
    final boolean useCache = CacheUtil.useCacheOnDataNodes(query, strategy, cacheConfig);
    final Cache.NamedKey key;
    if (strategy != null && (useCache || populateCache)) {
        key = CacheUtil.computeSegmentCacheKey(segmentIdentifier, segmentDescriptor, strategy.computeCacheKey(query));
    } else {
        key = null;
    }
    if (useCache) {
        final Function cacheFn = strategy.pullFromCache();
        final byte[] cachedResult = cache.get(key);
        if (cachedResult != null) {
            final TypeReference cacheObjectClazz = strategy.getCacheObjectClazz();
            return Sequences.map(new BaseSequence<>(new BaseSequence.IteratorMaker<T, Iterator<T>>() {

                @Override
                public Iterator<T> make() {
                    try {
                        if (cachedResult.length == 0) {
                            return Iterators.emptyIterator();
                        }
                        return mapper.readValues(mapper.getFactory().createParser(cachedResult), cacheObjectClazz);
                    } catch (IOException e) {
                        throw Throwables.propagate(e);
                    }
                }

                @Override
                public void cleanup(Iterator<T> iterFromMake) {
                }
            }), cacheFn);
        }
    }
    final Collection<ListenableFuture<?>> cacheFutures = Collections.synchronizedList(Lists.<ListenableFuture<?>>newLinkedList());
    if (populateCache) {
        final Function cacheFn = strategy.prepareForCache();
        return Sequences.withEffect(Sequences.map(base.run(query, responseContext), new Function<T, T>() {

            @Override
            public T apply(final T input) {
                final SettableFuture<Object> future = SettableFuture.create();
                cacheFutures.add(future);
                backgroundExecutorService.submit(new Runnable() {

                    @Override
                    public void run() {
                        try {
                            future.set(cacheFn.apply(input));
                        } catch (Exception e) {
                            // if there is exception, should setException to quit the caching processing
                            future.setException(e);
                        }
                    }
                });
                return input;
            }
        }), new Runnable() {

            @Override
            public void run() {
                try {
                    CacheUtil.populate(cache, mapper, key, Futures.allAsList(cacheFutures).get());
                } catch (Exception e) {
                    log.error(e, "Error while getting future for cache task");
                    throw Throwables.propagate(e);
                }
            }
        }, backgroundExecutorService);
    } else {
        return base.run(query, responseContext);
    }
}
Also used : IOException(java.io.IOException) IOException(java.io.IOException) Function(com.google.common.base.Function) Iterator(java.util.Iterator) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) TypeReference(com.fasterxml.jackson.core.type.TypeReference) CacheStrategy(io.druid.query.CacheStrategy) Cache(io.druid.client.cache.Cache)

Example 4 with CacheStrategy

use of io.druid.query.CacheStrategy in project druid by druid-io.

the class SearchQueryQueryToolChest method getCacheStrategy.

@Override
public CacheStrategy<Result<SearchResultValue>, Object, SearchQuery> getCacheStrategy(final SearchQuery query) {
    return new CacheStrategy<Result<SearchResultValue>, Object, SearchQuery>() {

        private final List<DimensionSpec> dimensionSpecs = query.getDimensions() != null ? query.getDimensions() : Collections.<DimensionSpec>emptyList();

        private final List<String> dimOutputNames = dimensionSpecs.size() > 0 ? Lists.transform(dimensionSpecs, new Function<DimensionSpec, String>() {

            @Override
            public String apply(DimensionSpec input) {
                return input.getOutputName();
            }
        }) : Collections.<String>emptyList();

        @Override
        public boolean isCacheable(SearchQuery query, boolean willMergeRunners) {
            return true;
        }

        @Override
        public byte[] computeCacheKey(SearchQuery query) {
            final DimFilter dimFilter = query.getDimensionsFilter();
            final byte[] filterBytes = dimFilter == null ? new byte[] {} : dimFilter.getCacheKey();
            final byte[] querySpecBytes = query.getQuery().getCacheKey();
            final byte[] granularityBytes = query.getGranularity().getCacheKey();
            final List<DimensionSpec> dimensionSpecs = query.getDimensions() != null ? query.getDimensions() : Collections.<DimensionSpec>emptyList();
            final byte[][] dimensionsBytes = new byte[dimensionSpecs.size()][];
            int dimensionsBytesSize = 0;
            int index = 0;
            for (DimensionSpec dimensionSpec : dimensionSpecs) {
                dimensionsBytes[index] = dimensionSpec.getCacheKey();
                dimensionsBytesSize += dimensionsBytes[index].length;
                ++index;
            }
            final byte[] sortSpecBytes = query.getSort().getCacheKey();
            final ByteBuffer queryCacheKey = ByteBuffer.allocate(1 + 4 + granularityBytes.length + filterBytes.length + querySpecBytes.length + dimensionsBytesSize + sortSpecBytes.length).put(SEARCH_QUERY).put(Ints.toByteArray(query.getLimit())).put(granularityBytes).put(filterBytes).put(querySpecBytes).put(sortSpecBytes);
            for (byte[] bytes : dimensionsBytes) {
                queryCacheKey.put(bytes);
            }
            return queryCacheKey.array();
        }

        @Override
        public TypeReference<Object> getCacheObjectClazz() {
            return OBJECT_TYPE_REFERENCE;
        }

        @Override
        public Function<Result<SearchResultValue>, Object> prepareForCache() {
            return new Function<Result<SearchResultValue>, Object>() {

                @Override
                public Object apply(Result<SearchResultValue> input) {
                    return dimensionSpecs.size() > 0 ? Lists.newArrayList(input.getTimestamp().getMillis(), input.getValue(), dimOutputNames) : Lists.newArrayList(input.getTimestamp().getMillis(), input.getValue());
                }
            };
        }

        @Override
        public Function<Object, Result<SearchResultValue>> pullFromCache() {
            return new Function<Object, Result<SearchResultValue>>() {

                @Override
                @SuppressWarnings("unchecked")
                public Result<SearchResultValue> apply(Object input) {
                    List<Object> result = (List<Object>) input;
                    boolean needsRename = false;
                    final Map<String, String> outputNameMap = Maps.newHashMap();
                    if (hasOutputName(result)) {
                        List<String> cachedOutputNames = (List) result.get(2);
                        Preconditions.checkArgument(cachedOutputNames.size() == dimOutputNames.size(), "cache hit, but number of dimensions mismatch");
                        needsRename = false;
                        for (int idx = 0; idx < cachedOutputNames.size(); idx++) {
                            String cachedOutputName = cachedOutputNames.get(idx);
                            String outputName = dimOutputNames.get(idx);
                            if (!cachedOutputName.equals(outputName)) {
                                needsRename = true;
                            }
                            outputNameMap.put(cachedOutputName, outputName);
                        }
                    }
                    return !needsRename ? new Result<>(new DateTime(((Number) result.get(0)).longValue()), new SearchResultValue(Lists.transform((List) result.get(1), new Function<Object, SearchHit>() {

                        @Override
                        public SearchHit apply(@Nullable Object input) {
                            if (input instanceof Map) {
                                return new SearchHit((String) ((Map) input).get("dimension"), (String) ((Map) input).get("value"), (Integer) ((Map) input).get("count"));
                            } else if (input instanceof SearchHit) {
                                return (SearchHit) input;
                            } else {
                                throw new IAE("Unknown format [%s]", input.getClass());
                            }
                        }
                    }))) : new Result<>(new DateTime(((Number) result.get(0)).longValue()), new SearchResultValue(Lists.transform((List) result.get(1), new Function<Object, SearchHit>() {

                        @Override
                        public SearchHit apply(@Nullable Object input) {
                            String dim = null;
                            String val = null;
                            Integer cnt = null;
                            if (input instanceof Map) {
                                dim = outputNameMap.get((String) ((Map) input).get("dimension"));
                                val = (String) ((Map) input).get("value");
                                cnt = (Integer) ((Map) input).get("count");
                            } else if (input instanceof SearchHit) {
                                SearchHit cached = (SearchHit) input;
                                dim = outputNameMap.get(cached.getDimension());
                                val = cached.getValue();
                                cnt = cached.getCount();
                            } else {
                                throw new IAE("Unknown format [%s]", input.getClass());
                            }
                            return new SearchHit(dim, val, cnt);
                        }
                    })));
                }
            };
        }

        private boolean hasOutputName(List<Object> cachedEntry) {
            /*
         * cached entry is list of two or three objects
         *  1. timestamp
         *  2. SearchResultValue
         *  3. outputName of each dimension (optional)
         *
         * if a cached entry has three objects, dimension name of SearchResultValue should be check if rename is needed
         */
            return cachedEntry.size() == 3;
        }
    };
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) SearchHit(io.druid.query.search.search.SearchHit) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) Function(com.google.common.base.Function) List(java.util.List) SearchQuery(io.druid.query.search.search.SearchQuery) IAE(io.druid.java.util.common.IAE) ByteBuffer(java.nio.ByteBuffer) DimFilter(io.druid.query.filter.DimFilter) Map(java.util.Map) CacheStrategy(io.druid.query.CacheStrategy) Nullable(javax.annotation.Nullable)

Example 5 with CacheStrategy

use of io.druid.query.CacheStrategy in project druid by druid-io.

the class SelectQueryQueryToolChest method getCacheStrategy.

@Override
public CacheStrategy<Result<SelectResultValue>, Object, SelectQuery> getCacheStrategy(final SelectQuery query) {
    return new CacheStrategy<Result<SelectResultValue>, Object, SelectQuery>() {

        private final List<DimensionSpec> dimensionSpecs = query.getDimensions() != null ? query.getDimensions() : Collections.<DimensionSpec>emptyList();

        private final List<String> dimOutputNames = dimensionSpecs.size() > 0 ? Lists.transform(dimensionSpecs, new Function<DimensionSpec, String>() {

            @Override
            public String apply(DimensionSpec input) {
                return input.getOutputName();
            }
        }) : Collections.<String>emptyList();

        @Override
        public boolean isCacheable(SelectQuery query, boolean willMergeRunners) {
            return true;
        }

        @Override
        public byte[] computeCacheKey(SelectQuery query) {
            final DimFilter dimFilter = query.getDimensionsFilter();
            final byte[] filterBytes = dimFilter == null ? new byte[] {} : dimFilter.getCacheKey();
            final byte[] granularityBytes = query.getGranularity().getCacheKey();
            final List<DimensionSpec> dimensionSpecs = query.getDimensions() != null ? query.getDimensions() : Collections.<DimensionSpec>emptyList();
            final byte[][] dimensionsBytes = new byte[dimensionSpecs.size()][];
            int dimensionsBytesSize = 0;
            int index = 0;
            for (DimensionSpec dimension : dimensionSpecs) {
                dimensionsBytes[index] = dimension.getCacheKey();
                dimensionsBytesSize += dimensionsBytes[index].length;
                ++index;
            }
            final Set<String> metrics = Sets.newTreeSet();
            if (query.getMetrics() != null) {
                metrics.addAll(query.getMetrics());
            }
            final byte[][] metricBytes = new byte[metrics.size()][];
            int metricBytesSize = 0;
            index = 0;
            for (String metric : metrics) {
                metricBytes[index] = StringUtils.toUtf8(metric);
                metricBytesSize += metricBytes[index].length;
                ++index;
            }
            final byte[] virtualColumnsCacheKey = query.getVirtualColumns().getCacheKey();
            final ByteBuffer queryCacheKey = ByteBuffer.allocate(1 + granularityBytes.length + filterBytes.length + query.getPagingSpec().getCacheKey().length + dimensionsBytesSize + metricBytesSize + virtualColumnsCacheKey.length).put(SELECT_QUERY).put(granularityBytes).put(filterBytes).put(query.getPagingSpec().getCacheKey());
            for (byte[] dimensionsByte : dimensionsBytes) {
                queryCacheKey.put(dimensionsByte);
            }
            for (byte[] metricByte : metricBytes) {
                queryCacheKey.put(metricByte);
            }
            queryCacheKey.put(virtualColumnsCacheKey);
            return queryCacheKey.array();
        }

        @Override
        public TypeReference<Object> getCacheObjectClazz() {
            return OBJECT_TYPE_REFERENCE;
        }

        @Override
        public Function<Result<SelectResultValue>, Object> prepareForCache() {
            return new Function<Result<SelectResultValue>, Object>() {

                @Override
                public Object apply(final Result<SelectResultValue> input) {
                    if (!dimOutputNames.isEmpty()) {
                        return Arrays.asList(input.getTimestamp().getMillis(), input.getValue().getPagingIdentifiers(), input.getValue().getDimensions(), input.getValue().getMetrics(), input.getValue().getEvents(), dimOutputNames);
                    }
                    return Arrays.asList(input.getTimestamp().getMillis(), input.getValue().getPagingIdentifiers(), input.getValue().getDimensions(), input.getValue().getMetrics(), input.getValue().getEvents());
                }
            };
        }

        @Override
        public Function<Object, Result<SelectResultValue>> pullFromCache() {
            return new Function<Object, Result<SelectResultValue>>() {

                private final Granularity granularity = query.getGranularity();

                @Override
                public Result<SelectResultValue> apply(Object input) {
                    List<Object> results = (List<Object>) input;
                    Iterator<Object> resultIter = results.iterator();
                    DateTime timestamp = granularity.toDateTime(((Number) resultIter.next()).longValue());
                    Map<String, Integer> pageIdentifier = jsonMapper.convertValue(resultIter.next(), new TypeReference<Map<String, Integer>>() {
                    });
                    Set<String> dimensionSet = jsonMapper.convertValue(resultIter.next(), new TypeReference<Set<String>>() {
                    });
                    Set<String> metricSet = jsonMapper.convertValue(resultIter.next(), new TypeReference<Set<String>>() {
                    });
                    List<EventHolder> eventHolders = jsonMapper.convertValue(resultIter.next(), new TypeReference<List<EventHolder>>() {
                    });
                    // check the condition that outputName of cached result should be updated
                    if (resultIter.hasNext()) {
                        List<String> cachedOutputNames = (List<String>) resultIter.next();
                        Preconditions.checkArgument(cachedOutputNames.size() == dimOutputNames.size(), "Cache hit but different number of dimensions??");
                        for (int idx = 0; idx < dimOutputNames.size(); idx++) {
                            if (!cachedOutputNames.get(idx).equals(dimOutputNames.get(idx))) {
                                // rename outputName in the EventHolder
                                for (EventHolder eventHolder : eventHolders) {
                                    Object obj = eventHolder.getEvent().remove(cachedOutputNames.get(idx));
                                    if (obj != null) {
                                        eventHolder.getEvent().put(dimOutputNames.get(idx), obj);
                                    }
                                }
                            }
                        }
                    }
                    return new Result<>(timestamp, new SelectResultValue(pageIdentifier, dimensionSet, metricSet, eventHolders));
                }
            };
        }
    };
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) Set(java.util.Set) Granularity(io.druid.java.util.common.granularity.Granularity) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) Function(com.google.common.base.Function) List(java.util.List) ByteBuffer(java.nio.ByteBuffer) DimFilter(io.druid.query.filter.DimFilter) Map(java.util.Map) TreeMap(java.util.TreeMap) CacheStrategy(io.druid.query.CacheStrategy)

Aggregations

CacheStrategy (io.druid.query.CacheStrategy)8 Function (com.google.common.base.Function)6 Result (io.druid.query.Result)6 List (java.util.List)5 Map (java.util.Map)5 DateTime (org.joda.time.DateTime)5 Granularity (io.druid.java.util.common.granularity.Granularity)4 Cache (io.druid.client.cache.Cache)3 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)3 CacheKeyBuilder (io.druid.query.cache.CacheKeyBuilder)3 DimensionSpec (io.druid.query.dimension.DimensionSpec)3 ArrayList (java.util.ArrayList)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 CacheConfig (io.druid.client.cache.CacheConfig)2 MapCache (io.druid.client.cache.MapCache)2 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)2 Sequence (io.druid.java.util.common.guava.Sequence)2 Query (io.druid.query.Query)2 QueryRunner (io.druid.query.QueryRunner)2 SegmentDescriptor (io.druid.query.SegmentDescriptor)2