use of io.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class SegmentMetadataQueryQueryToolChest method mergeAnalyses.
@VisibleForTesting
public static SegmentAnalysis mergeAnalyses(final SegmentAnalysis arg1, final SegmentAnalysis arg2, boolean lenientAggregatorMerge) {
if (arg1 == null) {
return arg2;
}
if (arg2 == null) {
return arg1;
}
List<Interval> newIntervals = null;
if (arg1.getIntervals() != null) {
newIntervals = Lists.newArrayList();
newIntervals.addAll(arg1.getIntervals());
}
if (arg2.getIntervals() != null) {
if (newIntervals == null) {
newIntervals = Lists.newArrayList();
}
newIntervals.addAll(arg2.getIntervals());
}
final Map<String, ColumnAnalysis> leftColumns = arg1.getColumns();
final Map<String, ColumnAnalysis> rightColumns = arg2.getColumns();
Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
Set<String> rightColumnNames = Sets.newHashSet(rightColumns.keySet());
for (Map.Entry<String, ColumnAnalysis> entry : leftColumns.entrySet()) {
final String columnName = entry.getKey();
columns.put(columnName, entry.getValue().fold(rightColumns.get(columnName)));
rightColumnNames.remove(columnName);
}
for (String columnName : rightColumnNames) {
columns.put(columnName, rightColumns.get(columnName));
}
final Map<String, AggregatorFactory> aggregators = Maps.newHashMap();
if (lenientAggregatorMerge) {
// Merge each aggregator individually, ignoring nulls
for (SegmentAnalysis analysis : ImmutableList.of(arg1, arg2)) {
if (analysis.getAggregators() != null) {
for (Map.Entry<String, AggregatorFactory> entry : analysis.getAggregators().entrySet()) {
final String aggregatorName = entry.getKey();
final AggregatorFactory aggregator = entry.getValue();
AggregatorFactory merged = aggregators.get(aggregatorName);
if (merged != null) {
try {
merged = merged.getMergingFactory(aggregator);
} catch (AggregatorFactoryNotMergeableException e) {
merged = null;
}
} else {
merged = aggregator;
}
aggregators.put(aggregatorName, merged);
}
}
}
} else {
final AggregatorFactory[] aggs1 = arg1.getAggregators() != null ? arg1.getAggregators().values().toArray(new AggregatorFactory[arg1.getAggregators().size()]) : null;
final AggregatorFactory[] aggs2 = arg2.getAggregators() != null ? arg2.getAggregators().values().toArray(new AggregatorFactory[arg2.getAggregators().size()]) : null;
final AggregatorFactory[] merged = AggregatorFactory.mergeAggregators(Arrays.asList(aggs1, aggs2));
if (merged != null) {
for (AggregatorFactory aggregator : merged) {
aggregators.put(aggregator.getName(), aggregator);
}
}
}
final TimestampSpec timestampSpec = TimestampSpec.mergeTimestampSpec(Lists.newArrayList(arg1.getTimestampSpec(), arg2.getTimestampSpec()));
final Granularity queryGranularity = Granularity.mergeGranularities(Lists.newArrayList(arg1.getQueryGranularity(), arg2.getQueryGranularity()));
final String mergedId;
if (arg1.getId() != null && arg2.getId() != null && arg1.getId().equals(arg2.getId())) {
mergedId = arg1.getId();
} else {
mergedId = "merged";
}
final Boolean rollup;
if (arg1.isRollup() != null && arg2.isRollup() != null && arg1.isRollup().equals(arg2.isRollup())) {
rollup = arg1.isRollup();
} else {
rollup = null;
}
return new SegmentAnalysis(mergedId, newIntervals, columns, arg1.getSize() + arg2.getSize(), arg1.getNumRows() + arg2.getNumRows(), aggregators.isEmpty() ? null : aggregators, timestampSpec, queryGranularity, rollup);
}
use of io.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class SegmentMetadataQueryRunnerFactory method createRunner.
@Override
public QueryRunner<SegmentAnalysis> createRunner(final Segment segment) {
return new QueryRunner<SegmentAnalysis>() {
@Override
public Sequence<SegmentAnalysis> run(Query<SegmentAnalysis> inQ, Map<String, Object> responseContext) {
SegmentMetadataQuery query = (SegmentMetadataQuery) inQ;
final SegmentAnalyzer analyzer = new SegmentAnalyzer(query.getAnalysisTypes());
final Map<String, ColumnAnalysis> analyzedColumns = analyzer.analyze(segment);
final long numRows = analyzer.numRows(segment);
long totalSize = 0;
if (analyzer.analyzingSize()) {
// Initialize with the size of the whitespace, 1 byte per
totalSize = analyzedColumns.size() * numRows;
}
Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
ColumnIncluderator includerator = query.getToInclude();
for (Map.Entry<String, ColumnAnalysis> entry : analyzedColumns.entrySet()) {
final String columnName = entry.getKey();
final ColumnAnalysis column = entry.getValue();
if (!column.isError()) {
totalSize += column.getSize();
}
if (includerator.include(columnName)) {
columns.put(columnName, column);
}
}
List<Interval> retIntervals = query.analyzingInterval() ? Arrays.asList(segment.getDataInterval()) : null;
final Map<String, AggregatorFactory> aggregators;
Metadata metadata = null;
if (query.hasAggregators()) {
metadata = segment.asStorageAdapter().getMetadata();
if (metadata != null && metadata.getAggregators() != null) {
aggregators = Maps.newHashMap();
for (AggregatorFactory aggregator : metadata.getAggregators()) {
aggregators.put(aggregator.getName(), aggregator);
}
} else {
aggregators = null;
}
} else {
aggregators = null;
}
final TimestampSpec timestampSpec;
if (query.hasTimestampSpec()) {
if (metadata == null) {
metadata = segment.asStorageAdapter().getMetadata();
}
timestampSpec = metadata != null ? metadata.getTimestampSpec() : null;
} else {
timestampSpec = null;
}
final Granularity queryGranularity;
if (query.hasQueryGranularity()) {
if (metadata == null) {
metadata = segment.asStorageAdapter().getMetadata();
}
queryGranularity = metadata != null ? metadata.getQueryGranularity() : null;
} else {
queryGranularity = null;
}
Boolean rollup = null;
if (query.hasRollup()) {
if (metadata == null) {
metadata = segment.asStorageAdapter().getMetadata();
}
rollup = metadata != null ? metadata.isRollup() : null;
if (rollup == null) {
// in this case, this segment is built before no-rollup function is coded,
// thus it is built with rollup
rollup = Boolean.TRUE;
}
}
return Sequences.simple(Arrays.asList(new SegmentAnalysis(segment.getIdentifier(), retIntervals, columns, totalSize, numRows, aggregators, timestampSpec, queryGranularity, rollup)));
}
};
}
use of io.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class SelectQueryQueryToolChest method filterSegments.
@Override
public <T extends LogicalSegment> List<T> filterSegments(SelectQuery query, List<T> segments) {
// at the point where this code is called, only one datasource should exist.
String dataSource = Iterables.getOnlyElement(query.getDataSource().getNames());
PagingSpec pagingSpec = query.getPagingSpec();
Map<String, Integer> paging = pagingSpec.getPagingIdentifiers();
if (paging == null || paging.isEmpty()) {
return segments;
}
final Granularity granularity = query.getGranularity();
List<Interval> intervals = Lists.newArrayList(Iterables.transform(paging.keySet(), DataSegmentUtils.INTERVAL_EXTRACTOR(dataSource)));
Collections.sort(intervals, query.isDescending() ? Comparators.intervalsByEndThenStart() : Comparators.intervalsByStartThenEnd());
TreeMap<Long, Long> granularThresholds = Maps.newTreeMap();
for (Interval interval : intervals) {
if (query.isDescending()) {
long granularEnd = granularity.bucketStart(interval.getEnd()).getMillis();
Long currentEnd = granularThresholds.get(granularEnd);
if (currentEnd == null || interval.getEndMillis() > currentEnd) {
granularThresholds.put(granularEnd, interval.getEndMillis());
}
} else {
long granularStart = granularity.bucketStart(interval.getStart()).getMillis();
Long currentStart = granularThresholds.get(granularStart);
if (currentStart == null || interval.getStartMillis() < currentStart) {
granularThresholds.put(granularStart, interval.getStartMillis());
}
}
}
List<T> queryIntervals = Lists.newArrayList(segments);
Iterator<T> it = queryIntervals.iterator();
if (query.isDescending()) {
while (it.hasNext()) {
Interval interval = it.next().getInterval();
Map.Entry<Long, Long> ceiling = granularThresholds.ceilingEntry(granularity.bucketStart(interval.getEnd()).getMillis());
if (ceiling == null || interval.getStartMillis() >= ceiling.getValue()) {
it.remove();
}
}
} else {
while (it.hasNext()) {
Interval interval = it.next().getInterval();
Map.Entry<Long, Long> floor = granularThresholds.floorEntry(granularity.bucketStart(interval.getStart()).getMillis());
if (floor == null || interval.getEndMillis() <= floor.getValue()) {
it.remove();
}
}
}
return queryIntervals;
}
use of io.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class SelectQueryQueryToolChest method getCacheStrategy.
@Override
public CacheStrategy<Result<SelectResultValue>, Object, SelectQuery> getCacheStrategy(final SelectQuery query) {
return new CacheStrategy<Result<SelectResultValue>, Object, SelectQuery>() {
private final List<DimensionSpec> dimensionSpecs = query.getDimensions() != null ? query.getDimensions() : Collections.<DimensionSpec>emptyList();
private final List<String> dimOutputNames = dimensionSpecs.size() > 0 ? Lists.transform(dimensionSpecs, new Function<DimensionSpec, String>() {
@Override
public String apply(DimensionSpec input) {
return input.getOutputName();
}
}) : Collections.<String>emptyList();
@Override
public boolean isCacheable(SelectQuery query, boolean willMergeRunners) {
return true;
}
@Override
public byte[] computeCacheKey(SelectQuery query) {
final DimFilter dimFilter = query.getDimensionsFilter();
final byte[] filterBytes = dimFilter == null ? new byte[] {} : dimFilter.getCacheKey();
final byte[] granularityBytes = query.getGranularity().getCacheKey();
final List<DimensionSpec> dimensionSpecs = query.getDimensions() != null ? query.getDimensions() : Collections.<DimensionSpec>emptyList();
final byte[][] dimensionsBytes = new byte[dimensionSpecs.size()][];
int dimensionsBytesSize = 0;
int index = 0;
for (DimensionSpec dimension : dimensionSpecs) {
dimensionsBytes[index] = dimension.getCacheKey();
dimensionsBytesSize += dimensionsBytes[index].length;
++index;
}
final Set<String> metrics = Sets.newTreeSet();
if (query.getMetrics() != null) {
metrics.addAll(query.getMetrics());
}
final byte[][] metricBytes = new byte[metrics.size()][];
int metricBytesSize = 0;
index = 0;
for (String metric : metrics) {
metricBytes[index] = StringUtils.toUtf8(metric);
metricBytesSize += metricBytes[index].length;
++index;
}
final byte[] virtualColumnsCacheKey = query.getVirtualColumns().getCacheKey();
final ByteBuffer queryCacheKey = ByteBuffer.allocate(1 + granularityBytes.length + filterBytes.length + query.getPagingSpec().getCacheKey().length + dimensionsBytesSize + metricBytesSize + virtualColumnsCacheKey.length).put(SELECT_QUERY).put(granularityBytes).put(filterBytes).put(query.getPagingSpec().getCacheKey());
for (byte[] dimensionsByte : dimensionsBytes) {
queryCacheKey.put(dimensionsByte);
}
for (byte[] metricByte : metricBytes) {
queryCacheKey.put(metricByte);
}
queryCacheKey.put(virtualColumnsCacheKey);
return queryCacheKey.array();
}
@Override
public TypeReference<Object> getCacheObjectClazz() {
return OBJECT_TYPE_REFERENCE;
}
@Override
public Function<Result<SelectResultValue>, Object> prepareForCache() {
return new Function<Result<SelectResultValue>, Object>() {
@Override
public Object apply(final Result<SelectResultValue> input) {
if (!dimOutputNames.isEmpty()) {
return Arrays.asList(input.getTimestamp().getMillis(), input.getValue().getPagingIdentifiers(), input.getValue().getDimensions(), input.getValue().getMetrics(), input.getValue().getEvents(), dimOutputNames);
}
return Arrays.asList(input.getTimestamp().getMillis(), input.getValue().getPagingIdentifiers(), input.getValue().getDimensions(), input.getValue().getMetrics(), input.getValue().getEvents());
}
};
}
@Override
public Function<Object, Result<SelectResultValue>> pullFromCache() {
return new Function<Object, Result<SelectResultValue>>() {
private final Granularity granularity = query.getGranularity();
@Override
public Result<SelectResultValue> apply(Object input) {
List<Object> results = (List<Object>) input;
Iterator<Object> resultIter = results.iterator();
DateTime timestamp = granularity.toDateTime(((Number) resultIter.next()).longValue());
Map<String, Integer> pageIdentifier = jsonMapper.convertValue(resultIter.next(), new TypeReference<Map<String, Integer>>() {
});
Set<String> dimensionSet = jsonMapper.convertValue(resultIter.next(), new TypeReference<Set<String>>() {
});
Set<String> metricSet = jsonMapper.convertValue(resultIter.next(), new TypeReference<Set<String>>() {
});
List<EventHolder> eventHolders = jsonMapper.convertValue(resultIter.next(), new TypeReference<List<EventHolder>>() {
});
// check the condition that outputName of cached result should be updated
if (resultIter.hasNext()) {
List<String> cachedOutputNames = (List<String>) resultIter.next();
Preconditions.checkArgument(cachedOutputNames.size() == dimOutputNames.size(), "Cache hit but different number of dimensions??");
for (int idx = 0; idx < dimOutputNames.size(); idx++) {
if (!cachedOutputNames.get(idx).equals(dimOutputNames.get(idx))) {
// rename outputName in the EventHolder
for (EventHolder eventHolder : eventHolders) {
Object obj = eventHolder.getEvent().remove(cachedOutputNames.get(idx));
if (obj != null) {
eventHolder.getEvent().put(dimOutputNames.get(idx), obj);
}
}
}
}
}
return new Result<>(timestamp, new SelectResultValue(pageIdentifier, dimensionSet, metricSet, eventHolders));
}
};
}
};
}
use of io.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class TopNQueryQueryToolChest method getCacheStrategy.
@Override
public CacheStrategy<Result<TopNResultValue>, Object, TopNQuery> getCacheStrategy(final TopNQuery query) {
return new CacheStrategy<Result<TopNResultValue>, Object, TopNQuery>() {
private final List<AggregatorFactory> aggs = Lists.newArrayList(query.getAggregatorSpecs());
private final List<PostAggregator> postAggs = AggregatorUtil.pruneDependentPostAgg(query.getPostAggregatorSpecs(), query.getTopNMetricSpec().getMetricName(query.getDimensionSpec()));
@Override
public boolean isCacheable(TopNQuery query, boolean willMergeRunners) {
return true;
}
@Override
public byte[] computeCacheKey(TopNQuery query) {
final CacheKeyBuilder builder = new CacheKeyBuilder(TOPN_QUERY).appendCacheable(query.getDimensionSpec()).appendCacheable(query.getTopNMetricSpec()).appendInt(query.getThreshold()).appendCacheable(query.getGranularity()).appendCacheable(query.getDimensionsFilter()).appendCacheablesIgnoringOrder(query.getAggregatorSpecs()).appendCacheable(query.getVirtualColumns());
final List<PostAggregator> postAggregators = prunePostAggregators(query);
if (!postAggregators.isEmpty()) {
// Append post aggregators only when they are used as sort keys.
// Note that appending an empty list produces a different cache key from not appending it.
builder.appendCacheablesIgnoringOrder(postAggregators);
}
return builder.build();
}
@Override
public TypeReference<Object> getCacheObjectClazz() {
return OBJECT_TYPE_REFERENCE;
}
@Override
public Function<Result<TopNResultValue>, Object> prepareForCache() {
return new Function<Result<TopNResultValue>, Object>() {
private final String[] aggFactoryNames = extractFactoryName(query.getAggregatorSpecs());
@Override
public Object apply(final Result<TopNResultValue> input) {
List<DimensionAndMetricValueExtractor> results = Lists.newArrayList(input.getValue());
final List<Object> retVal = Lists.newArrayListWithCapacity(results.size() + 1);
// make sure to preserve timezone information when caching results
retVal.add(input.getTimestamp().getMillis());
for (DimensionAndMetricValueExtractor result : results) {
List<Object> vals = Lists.newArrayListWithCapacity(aggFactoryNames.length + 2);
vals.add(result.getDimensionValue(query.getDimensionSpec().getOutputName()));
for (String aggName : aggFactoryNames) {
vals.add(result.getMetric(aggName));
}
retVal.add(vals);
}
return retVal;
}
};
}
@Override
public Function<Object, Result<TopNResultValue>> pullFromCache() {
return new Function<Object, Result<TopNResultValue>>() {
private final Granularity granularity = query.getGranularity();
@Override
public Result<TopNResultValue> apply(Object input) {
List<Object> results = (List<Object>) input;
List<Map<String, Object>> retVal = Lists.newArrayListWithCapacity(results.size());
Iterator<Object> inputIter = results.iterator();
DateTime timestamp = granularity.toDateTime(((Number) inputIter.next()).longValue());
while (inputIter.hasNext()) {
List<Object> result = (List<Object>) inputIter.next();
Map<String, Object> vals = Maps.newLinkedHashMap();
Iterator<AggregatorFactory> aggIter = aggs.iterator();
Iterator<Object> resultIter = result.iterator();
vals.put(query.getDimensionSpec().getOutputName(), resultIter.next());
while (aggIter.hasNext() && resultIter.hasNext()) {
final AggregatorFactory factory = aggIter.next();
vals.put(factory.getName(), factory.deserialize(resultIter.next()));
}
for (PostAggregator postAgg : postAggs) {
vals.put(postAgg.getName(), postAgg.compute(vals));
}
retVal.add(vals);
}
return new Result<>(timestamp, new TopNResultValue(retVal));
}
};
}
};
}
Aggregations