Search in sources :

Example 16 with Granularity

use of io.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class SegmentMetadataQueryQueryToolChest method mergeAnalyses.

@VisibleForTesting
public static SegmentAnalysis mergeAnalyses(final SegmentAnalysis arg1, final SegmentAnalysis arg2, boolean lenientAggregatorMerge) {
    if (arg1 == null) {
        return arg2;
    }
    if (arg2 == null) {
        return arg1;
    }
    List<Interval> newIntervals = null;
    if (arg1.getIntervals() != null) {
        newIntervals = Lists.newArrayList();
        newIntervals.addAll(arg1.getIntervals());
    }
    if (arg2.getIntervals() != null) {
        if (newIntervals == null) {
            newIntervals = Lists.newArrayList();
        }
        newIntervals.addAll(arg2.getIntervals());
    }
    final Map<String, ColumnAnalysis> leftColumns = arg1.getColumns();
    final Map<String, ColumnAnalysis> rightColumns = arg2.getColumns();
    Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
    Set<String> rightColumnNames = Sets.newHashSet(rightColumns.keySet());
    for (Map.Entry<String, ColumnAnalysis> entry : leftColumns.entrySet()) {
        final String columnName = entry.getKey();
        columns.put(columnName, entry.getValue().fold(rightColumns.get(columnName)));
        rightColumnNames.remove(columnName);
    }
    for (String columnName : rightColumnNames) {
        columns.put(columnName, rightColumns.get(columnName));
    }
    final Map<String, AggregatorFactory> aggregators = Maps.newHashMap();
    if (lenientAggregatorMerge) {
        // Merge each aggregator individually, ignoring nulls
        for (SegmentAnalysis analysis : ImmutableList.of(arg1, arg2)) {
            if (analysis.getAggregators() != null) {
                for (Map.Entry<String, AggregatorFactory> entry : analysis.getAggregators().entrySet()) {
                    final String aggregatorName = entry.getKey();
                    final AggregatorFactory aggregator = entry.getValue();
                    AggregatorFactory merged = aggregators.get(aggregatorName);
                    if (merged != null) {
                        try {
                            merged = merged.getMergingFactory(aggregator);
                        } catch (AggregatorFactoryNotMergeableException e) {
                            merged = null;
                        }
                    } else {
                        merged = aggregator;
                    }
                    aggregators.put(aggregatorName, merged);
                }
            }
        }
    } else {
        final AggregatorFactory[] aggs1 = arg1.getAggregators() != null ? arg1.getAggregators().values().toArray(new AggregatorFactory[arg1.getAggregators().size()]) : null;
        final AggregatorFactory[] aggs2 = arg2.getAggregators() != null ? arg2.getAggregators().values().toArray(new AggregatorFactory[arg2.getAggregators().size()]) : null;
        final AggregatorFactory[] merged = AggregatorFactory.mergeAggregators(Arrays.asList(aggs1, aggs2));
        if (merged != null) {
            for (AggregatorFactory aggregator : merged) {
                aggregators.put(aggregator.getName(), aggregator);
            }
        }
    }
    final TimestampSpec timestampSpec = TimestampSpec.mergeTimestampSpec(Lists.newArrayList(arg1.getTimestampSpec(), arg2.getTimestampSpec()));
    final Granularity queryGranularity = Granularity.mergeGranularities(Lists.newArrayList(arg1.getQueryGranularity(), arg2.getQueryGranularity()));
    final String mergedId;
    if (arg1.getId() != null && arg2.getId() != null && arg1.getId().equals(arg2.getId())) {
        mergedId = arg1.getId();
    } else {
        mergedId = "merged";
    }
    final Boolean rollup;
    if (arg1.isRollup() != null && arg2.isRollup() != null && arg1.isRollup().equals(arg2.isRollup())) {
        rollup = arg1.isRollup();
    } else {
        rollup = null;
    }
    return new SegmentAnalysis(mergedId, newIntervals, columns, arg1.getSize() + arg2.getSize(), arg1.getNumRows() + arg2.getNumRows(), aggregators.isEmpty() ? null : aggregators, timestampSpec, queryGranularity, rollup);
}
Also used : AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Granularity(io.druid.java.util.common.granularity.Granularity) AggregatorFactoryNotMergeableException(io.druid.query.aggregation.AggregatorFactoryNotMergeableException) ColumnAnalysis(io.druid.query.metadata.metadata.ColumnAnalysis) TimestampSpec(io.druid.data.input.impl.TimestampSpec) SegmentAnalysis(io.druid.query.metadata.metadata.SegmentAnalysis) Map(java.util.Map) Interval(org.joda.time.Interval) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 17 with Granularity

use of io.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class SegmentMetadataQueryRunnerFactory method createRunner.

@Override
public QueryRunner<SegmentAnalysis> createRunner(final Segment segment) {
    return new QueryRunner<SegmentAnalysis>() {

        @Override
        public Sequence<SegmentAnalysis> run(Query<SegmentAnalysis> inQ, Map<String, Object> responseContext) {
            SegmentMetadataQuery query = (SegmentMetadataQuery) inQ;
            final SegmentAnalyzer analyzer = new SegmentAnalyzer(query.getAnalysisTypes());
            final Map<String, ColumnAnalysis> analyzedColumns = analyzer.analyze(segment);
            final long numRows = analyzer.numRows(segment);
            long totalSize = 0;
            if (analyzer.analyzingSize()) {
                // Initialize with the size of the whitespace, 1 byte per
                totalSize = analyzedColumns.size() * numRows;
            }
            Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
            ColumnIncluderator includerator = query.getToInclude();
            for (Map.Entry<String, ColumnAnalysis> entry : analyzedColumns.entrySet()) {
                final String columnName = entry.getKey();
                final ColumnAnalysis column = entry.getValue();
                if (!column.isError()) {
                    totalSize += column.getSize();
                }
                if (includerator.include(columnName)) {
                    columns.put(columnName, column);
                }
            }
            List<Interval> retIntervals = query.analyzingInterval() ? Arrays.asList(segment.getDataInterval()) : null;
            final Map<String, AggregatorFactory> aggregators;
            Metadata metadata = null;
            if (query.hasAggregators()) {
                metadata = segment.asStorageAdapter().getMetadata();
                if (metadata != null && metadata.getAggregators() != null) {
                    aggregators = Maps.newHashMap();
                    for (AggregatorFactory aggregator : metadata.getAggregators()) {
                        aggregators.put(aggregator.getName(), aggregator);
                    }
                } else {
                    aggregators = null;
                }
            } else {
                aggregators = null;
            }
            final TimestampSpec timestampSpec;
            if (query.hasTimestampSpec()) {
                if (metadata == null) {
                    metadata = segment.asStorageAdapter().getMetadata();
                }
                timestampSpec = metadata != null ? metadata.getTimestampSpec() : null;
            } else {
                timestampSpec = null;
            }
            final Granularity queryGranularity;
            if (query.hasQueryGranularity()) {
                if (metadata == null) {
                    metadata = segment.asStorageAdapter().getMetadata();
                }
                queryGranularity = metadata != null ? metadata.getQueryGranularity() : null;
            } else {
                queryGranularity = null;
            }
            Boolean rollup = null;
            if (query.hasRollup()) {
                if (metadata == null) {
                    metadata = segment.asStorageAdapter().getMetadata();
                }
                rollup = metadata != null ? metadata.isRollup() : null;
                if (rollup == null) {
                    // in this case, this segment is built before no-rollup function is coded,
                    // thus it is built with rollup
                    rollup = Boolean.TRUE;
                }
            }
            return Sequences.simple(Arrays.asList(new SegmentAnalysis(segment.getIdentifier(), retIntervals, columns, totalSize, numRows, aggregators, timestampSpec, queryGranularity, rollup)));
        }
    };
}
Also used : BaseQuery(io.druid.query.BaseQuery) SegmentMetadataQuery(io.druid.query.metadata.metadata.SegmentMetadataQuery) Query(io.druid.query.Query) Metadata(io.druid.segment.Metadata) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Granularity(io.druid.java.util.common.granularity.Granularity) ColumnIncluderator(io.druid.query.metadata.metadata.ColumnIncluderator) QueryRunner(io.druid.query.QueryRunner) ConcatQueryRunner(io.druid.query.ConcatQueryRunner) SegmentMetadataQuery(io.druid.query.metadata.metadata.SegmentMetadataQuery) ColumnAnalysis(io.druid.query.metadata.metadata.ColumnAnalysis) TimestampSpec(io.druid.data.input.impl.TimestampSpec) SegmentAnalysis(io.druid.query.metadata.metadata.SegmentAnalysis) Map(java.util.Map) Interval(org.joda.time.Interval)

Example 18 with Granularity

use of io.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class SelectQueryQueryToolChest method filterSegments.

@Override
public <T extends LogicalSegment> List<T> filterSegments(SelectQuery query, List<T> segments) {
    // at the point where this code is called, only one datasource should exist.
    String dataSource = Iterables.getOnlyElement(query.getDataSource().getNames());
    PagingSpec pagingSpec = query.getPagingSpec();
    Map<String, Integer> paging = pagingSpec.getPagingIdentifiers();
    if (paging == null || paging.isEmpty()) {
        return segments;
    }
    final Granularity granularity = query.getGranularity();
    List<Interval> intervals = Lists.newArrayList(Iterables.transform(paging.keySet(), DataSegmentUtils.INTERVAL_EXTRACTOR(dataSource)));
    Collections.sort(intervals, query.isDescending() ? Comparators.intervalsByEndThenStart() : Comparators.intervalsByStartThenEnd());
    TreeMap<Long, Long> granularThresholds = Maps.newTreeMap();
    for (Interval interval : intervals) {
        if (query.isDescending()) {
            long granularEnd = granularity.bucketStart(interval.getEnd()).getMillis();
            Long currentEnd = granularThresholds.get(granularEnd);
            if (currentEnd == null || interval.getEndMillis() > currentEnd) {
                granularThresholds.put(granularEnd, interval.getEndMillis());
            }
        } else {
            long granularStart = granularity.bucketStart(interval.getStart()).getMillis();
            Long currentStart = granularThresholds.get(granularStart);
            if (currentStart == null || interval.getStartMillis() < currentStart) {
                granularThresholds.put(granularStart, interval.getStartMillis());
            }
        }
    }
    List<T> queryIntervals = Lists.newArrayList(segments);
    Iterator<T> it = queryIntervals.iterator();
    if (query.isDescending()) {
        while (it.hasNext()) {
            Interval interval = it.next().getInterval();
            Map.Entry<Long, Long> ceiling = granularThresholds.ceilingEntry(granularity.bucketStart(interval.getEnd()).getMillis());
            if (ceiling == null || interval.getStartMillis() >= ceiling.getValue()) {
                it.remove();
            }
        }
    } else {
        while (it.hasNext()) {
            Interval interval = it.next().getInterval();
            Map.Entry<Long, Long> floor = granularThresholds.floorEntry(granularity.bucketStart(interval.getStart()).getMillis());
            if (floor == null || interval.getEndMillis() <= floor.getValue()) {
                it.remove();
            }
        }
    }
    return queryIntervals;
}
Also used : Granularity(io.druid.java.util.common.granularity.Granularity) Map(java.util.Map) TreeMap(java.util.TreeMap) Interval(org.joda.time.Interval)

Example 19 with Granularity

use of io.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class SelectQueryQueryToolChest method getCacheStrategy.

@Override
public CacheStrategy<Result<SelectResultValue>, Object, SelectQuery> getCacheStrategy(final SelectQuery query) {
    return new CacheStrategy<Result<SelectResultValue>, Object, SelectQuery>() {

        private final List<DimensionSpec> dimensionSpecs = query.getDimensions() != null ? query.getDimensions() : Collections.<DimensionSpec>emptyList();

        private final List<String> dimOutputNames = dimensionSpecs.size() > 0 ? Lists.transform(dimensionSpecs, new Function<DimensionSpec, String>() {

            @Override
            public String apply(DimensionSpec input) {
                return input.getOutputName();
            }
        }) : Collections.<String>emptyList();

        @Override
        public boolean isCacheable(SelectQuery query, boolean willMergeRunners) {
            return true;
        }

        @Override
        public byte[] computeCacheKey(SelectQuery query) {
            final DimFilter dimFilter = query.getDimensionsFilter();
            final byte[] filterBytes = dimFilter == null ? new byte[] {} : dimFilter.getCacheKey();
            final byte[] granularityBytes = query.getGranularity().getCacheKey();
            final List<DimensionSpec> dimensionSpecs = query.getDimensions() != null ? query.getDimensions() : Collections.<DimensionSpec>emptyList();
            final byte[][] dimensionsBytes = new byte[dimensionSpecs.size()][];
            int dimensionsBytesSize = 0;
            int index = 0;
            for (DimensionSpec dimension : dimensionSpecs) {
                dimensionsBytes[index] = dimension.getCacheKey();
                dimensionsBytesSize += dimensionsBytes[index].length;
                ++index;
            }
            final Set<String> metrics = Sets.newTreeSet();
            if (query.getMetrics() != null) {
                metrics.addAll(query.getMetrics());
            }
            final byte[][] metricBytes = new byte[metrics.size()][];
            int metricBytesSize = 0;
            index = 0;
            for (String metric : metrics) {
                metricBytes[index] = StringUtils.toUtf8(metric);
                metricBytesSize += metricBytes[index].length;
                ++index;
            }
            final byte[] virtualColumnsCacheKey = query.getVirtualColumns().getCacheKey();
            final ByteBuffer queryCacheKey = ByteBuffer.allocate(1 + granularityBytes.length + filterBytes.length + query.getPagingSpec().getCacheKey().length + dimensionsBytesSize + metricBytesSize + virtualColumnsCacheKey.length).put(SELECT_QUERY).put(granularityBytes).put(filterBytes).put(query.getPagingSpec().getCacheKey());
            for (byte[] dimensionsByte : dimensionsBytes) {
                queryCacheKey.put(dimensionsByte);
            }
            for (byte[] metricByte : metricBytes) {
                queryCacheKey.put(metricByte);
            }
            queryCacheKey.put(virtualColumnsCacheKey);
            return queryCacheKey.array();
        }

        @Override
        public TypeReference<Object> getCacheObjectClazz() {
            return OBJECT_TYPE_REFERENCE;
        }

        @Override
        public Function<Result<SelectResultValue>, Object> prepareForCache() {
            return new Function<Result<SelectResultValue>, Object>() {

                @Override
                public Object apply(final Result<SelectResultValue> input) {
                    if (!dimOutputNames.isEmpty()) {
                        return Arrays.asList(input.getTimestamp().getMillis(), input.getValue().getPagingIdentifiers(), input.getValue().getDimensions(), input.getValue().getMetrics(), input.getValue().getEvents(), dimOutputNames);
                    }
                    return Arrays.asList(input.getTimestamp().getMillis(), input.getValue().getPagingIdentifiers(), input.getValue().getDimensions(), input.getValue().getMetrics(), input.getValue().getEvents());
                }
            };
        }

        @Override
        public Function<Object, Result<SelectResultValue>> pullFromCache() {
            return new Function<Object, Result<SelectResultValue>>() {

                private final Granularity granularity = query.getGranularity();

                @Override
                public Result<SelectResultValue> apply(Object input) {
                    List<Object> results = (List<Object>) input;
                    Iterator<Object> resultIter = results.iterator();
                    DateTime timestamp = granularity.toDateTime(((Number) resultIter.next()).longValue());
                    Map<String, Integer> pageIdentifier = jsonMapper.convertValue(resultIter.next(), new TypeReference<Map<String, Integer>>() {
                    });
                    Set<String> dimensionSet = jsonMapper.convertValue(resultIter.next(), new TypeReference<Set<String>>() {
                    });
                    Set<String> metricSet = jsonMapper.convertValue(resultIter.next(), new TypeReference<Set<String>>() {
                    });
                    List<EventHolder> eventHolders = jsonMapper.convertValue(resultIter.next(), new TypeReference<List<EventHolder>>() {
                    });
                    // check the condition that outputName of cached result should be updated
                    if (resultIter.hasNext()) {
                        List<String> cachedOutputNames = (List<String>) resultIter.next();
                        Preconditions.checkArgument(cachedOutputNames.size() == dimOutputNames.size(), "Cache hit but different number of dimensions??");
                        for (int idx = 0; idx < dimOutputNames.size(); idx++) {
                            if (!cachedOutputNames.get(idx).equals(dimOutputNames.get(idx))) {
                                // rename outputName in the EventHolder
                                for (EventHolder eventHolder : eventHolders) {
                                    Object obj = eventHolder.getEvent().remove(cachedOutputNames.get(idx));
                                    if (obj != null) {
                                        eventHolder.getEvent().put(dimOutputNames.get(idx), obj);
                                    }
                                }
                            }
                        }
                    }
                    return new Result<>(timestamp, new SelectResultValue(pageIdentifier, dimensionSet, metricSet, eventHolders));
                }
            };
        }
    };
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) Set(java.util.Set) Granularity(io.druid.java.util.common.granularity.Granularity) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) Function(com.google.common.base.Function) List(java.util.List) ByteBuffer(java.nio.ByteBuffer) DimFilter(io.druid.query.filter.DimFilter) Map(java.util.Map) TreeMap(java.util.TreeMap) CacheStrategy(io.druid.query.CacheStrategy)

Example 20 with Granularity

use of io.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class TopNQueryQueryToolChest method getCacheStrategy.

@Override
public CacheStrategy<Result<TopNResultValue>, Object, TopNQuery> getCacheStrategy(final TopNQuery query) {
    return new CacheStrategy<Result<TopNResultValue>, Object, TopNQuery>() {

        private final List<AggregatorFactory> aggs = Lists.newArrayList(query.getAggregatorSpecs());

        private final List<PostAggregator> postAggs = AggregatorUtil.pruneDependentPostAgg(query.getPostAggregatorSpecs(), query.getTopNMetricSpec().getMetricName(query.getDimensionSpec()));

        @Override
        public boolean isCacheable(TopNQuery query, boolean willMergeRunners) {
            return true;
        }

        @Override
        public byte[] computeCacheKey(TopNQuery query) {
            final CacheKeyBuilder builder = new CacheKeyBuilder(TOPN_QUERY).appendCacheable(query.getDimensionSpec()).appendCacheable(query.getTopNMetricSpec()).appendInt(query.getThreshold()).appendCacheable(query.getGranularity()).appendCacheable(query.getDimensionsFilter()).appendCacheablesIgnoringOrder(query.getAggregatorSpecs()).appendCacheable(query.getVirtualColumns());
            final List<PostAggregator> postAggregators = prunePostAggregators(query);
            if (!postAggregators.isEmpty()) {
                // Append post aggregators only when they are used as sort keys.
                // Note that appending an empty list produces a different cache key from not appending it.
                builder.appendCacheablesIgnoringOrder(postAggregators);
            }
            return builder.build();
        }

        @Override
        public TypeReference<Object> getCacheObjectClazz() {
            return OBJECT_TYPE_REFERENCE;
        }

        @Override
        public Function<Result<TopNResultValue>, Object> prepareForCache() {
            return new Function<Result<TopNResultValue>, Object>() {

                private final String[] aggFactoryNames = extractFactoryName(query.getAggregatorSpecs());

                @Override
                public Object apply(final Result<TopNResultValue> input) {
                    List<DimensionAndMetricValueExtractor> results = Lists.newArrayList(input.getValue());
                    final List<Object> retVal = Lists.newArrayListWithCapacity(results.size() + 1);
                    // make sure to preserve timezone information when caching results
                    retVal.add(input.getTimestamp().getMillis());
                    for (DimensionAndMetricValueExtractor result : results) {
                        List<Object> vals = Lists.newArrayListWithCapacity(aggFactoryNames.length + 2);
                        vals.add(result.getDimensionValue(query.getDimensionSpec().getOutputName()));
                        for (String aggName : aggFactoryNames) {
                            vals.add(result.getMetric(aggName));
                        }
                        retVal.add(vals);
                    }
                    return retVal;
                }
            };
        }

        @Override
        public Function<Object, Result<TopNResultValue>> pullFromCache() {
            return new Function<Object, Result<TopNResultValue>>() {

                private final Granularity granularity = query.getGranularity();

                @Override
                public Result<TopNResultValue> apply(Object input) {
                    List<Object> results = (List<Object>) input;
                    List<Map<String, Object>> retVal = Lists.newArrayListWithCapacity(results.size());
                    Iterator<Object> inputIter = results.iterator();
                    DateTime timestamp = granularity.toDateTime(((Number) inputIter.next()).longValue());
                    while (inputIter.hasNext()) {
                        List<Object> result = (List<Object>) inputIter.next();
                        Map<String, Object> vals = Maps.newLinkedHashMap();
                        Iterator<AggregatorFactory> aggIter = aggs.iterator();
                        Iterator<Object> resultIter = result.iterator();
                        vals.put(query.getDimensionSpec().getOutputName(), resultIter.next());
                        while (aggIter.hasNext() && resultIter.hasNext()) {
                            final AggregatorFactory factory = aggIter.next();
                            vals.put(factory.getName(), factory.deserialize(resultIter.next()));
                        }
                        for (PostAggregator postAgg : postAggs) {
                            vals.put(postAgg.getName(), postAgg.compute(vals));
                        }
                        retVal.add(vals);
                    }
                    return new Result<>(timestamp, new TopNResultValue(retVal));
                }
            };
        }
    };
}
Also used : PostAggregator(io.druid.query.aggregation.PostAggregator) CacheKeyBuilder(io.druid.query.cache.CacheKeyBuilder) Granularity(io.druid.java.util.common.granularity.Granularity) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) Function(com.google.common.base.Function) List(java.util.List) Map(java.util.Map) CacheStrategy(io.druid.query.CacheStrategy)

Aggregations

Granularity (io.druid.java.util.common.granularity.Granularity)34 DateTime (org.joda.time.DateTime)20 Interval (org.joda.time.Interval)12 Test (org.junit.Test)11 Map (java.util.Map)9 Result (io.druid.query.Result)8 Function (com.google.common.base.Function)7 PeriodGranularity (io.druid.java.util.common.granularity.PeriodGranularity)7 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)7 List (java.util.List)7 Period (org.joda.time.Period)7 CacheStrategy (io.druid.query.CacheStrategy)4 DimensionSpec (io.druid.query.dimension.DimensionSpec)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)3 DurationGranularity (io.druid.java.util.common.granularity.DurationGranularity)3 JsonMappingException (com.fasterxml.jackson.databind.JsonMappingException)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 MapBasedRow (io.druid.data.input.MapBasedRow)2 TimestampSpec (io.druid.data.input.impl.TimestampSpec)2 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)2