use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class ITAutoCompactionTest method testAutoCompactionDutyWithSegmentGranularityAndWithDropExistingFalse.
@Test
public void testAutoCompactionDutyWithSegmentGranularityAndWithDropExistingFalse() throws Exception {
loadData(INDEX_TASK);
try (final Closeable ignored = unloader(fullDatasourceName)) {
final List<String> intervalsBeforeCompaction = coordinator.getSegmentIntervals(fullDatasourceName);
intervalsBeforeCompaction.sort(null);
// 4 segments across 2 days (4 total)...
verifySegmentsCount(4);
verifyQuery(INDEX_QUERIES_RESOURCE);
Granularity newGranularity = Granularities.YEAR;
// Set dropExisting to false
submitCompactionConfig(1000, NO_SKIP_OFFSET, new UserCompactionTaskGranularityConfig(newGranularity, null, null), false);
LOG.info("Auto compaction test with YEAR segment granularity");
List<String> expectedIntervalAfterCompaction = new ArrayList<>();
for (String interval : intervalsBeforeCompaction) {
for (Interval newinterval : newGranularity.getIterable(new Interval(interval, ISOChronology.getInstanceUTC()))) {
expectedIntervalAfterCompaction.add(newinterval.toString());
}
}
forceTriggerAutoCompaction(1);
verifyQuery(INDEX_QUERIES_RESOURCE);
verifySegmentsCompacted(1, 1000);
checkCompactionIntervals(expectedIntervalAfterCompaction);
newGranularity = Granularities.DAY;
// Set dropExisting to false
submitCompactionConfig(1000, NO_SKIP_OFFSET, new UserCompactionTaskGranularityConfig(newGranularity, null, null), false);
LOG.info("Auto compaction test with DAY segment granularity");
// (which are 2013-08-31 to 2013-09-01, 2013-09-01 to 2013-09-02 and 2013-01-01 to 2014-01-01)
for (String interval : intervalsBeforeCompaction) {
for (Interval newinterval : newGranularity.getIterable(new Interval(interval, ISOChronology.getInstanceUTC()))) {
expectedIntervalAfterCompaction.add(newinterval.toString());
}
}
forceTriggerAutoCompaction(3);
verifyQuery(INDEX_QUERIES_RESOURCE);
verifySegmentsCompacted(3, 1000);
checkCompactionIntervals(expectedIntervalAfterCompaction);
}
}
use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class ITAutoCompactionTest method testAutoCompactionDutyWithSegmentGranularityAndWithDropExistingTrue.
@Test
public void testAutoCompactionDutyWithSegmentGranularityAndWithDropExistingTrue() throws Exception {
loadData(INDEX_TASK);
try (final Closeable ignored = unloader(fullDatasourceName)) {
final List<String> intervalsBeforeCompaction = coordinator.getSegmentIntervals(fullDatasourceName);
intervalsBeforeCompaction.sort(null);
// 4 segments across 2 days (4 total)...
verifySegmentsCount(4);
verifyQuery(INDEX_QUERIES_RESOURCE);
Granularity newGranularity = Granularities.YEAR;
// Set dropExisting to true
submitCompactionConfig(1000, NO_SKIP_OFFSET, new UserCompactionTaskGranularityConfig(newGranularity, null, null), true);
LOG.info("Auto compaction test with YEAR segment granularity");
List<String> expectedIntervalAfterCompaction = new ArrayList<>();
for (String interval : intervalsBeforeCompaction) {
for (Interval newinterval : newGranularity.getIterable(new Interval(interval, ISOChronology.getInstanceUTC()))) {
expectedIntervalAfterCompaction.add(newinterval.toString());
}
}
forceTriggerAutoCompaction(1);
verifyQuery(INDEX_QUERIES_RESOURCE);
verifySegmentsCompacted(1, 1000);
checkCompactionIntervals(expectedIntervalAfterCompaction);
newGranularity = Granularities.DAY;
// Set dropExisting to true
submitCompactionConfig(1000, NO_SKIP_OFFSET, new UserCompactionTaskGranularityConfig(newGranularity, null, null), true);
LOG.info("Auto compaction test with DAY segment granularity");
// Since dropExisting is set to true...
// The earlier segment with YEAR granularity will be dropped post-compaction
// Hence, we will only have 2013-08-31 to 2013-09-01 and 2013-09-01 to 2013-09-02.
expectedIntervalAfterCompaction = new ArrayList<>();
for (String interval : intervalsBeforeCompaction) {
for (Interval newinterval : newGranularity.getIterable(new Interval(interval, ISOChronology.getInstanceUTC()))) {
expectedIntervalAfterCompaction.add(newinterval.toString());
}
}
forceTriggerAutoCompaction(2);
verifyQuery(INDEX_QUERIES_RESOURCE);
verifySegmentsCompacted(2, 1000);
checkCompactionIntervals(expectedIntervalAfterCompaction);
}
}
use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class GroupByStrategyV2 method mergeResults.
@Override
public Sequence<ResultRow> mergeResults(final QueryRunner<ResultRow> baseRunner, final GroupByQuery query, final ResponseContext responseContext) {
// Merge streams using ResultMergeQueryRunner, then apply postaggregators, then apply limit (which may
// involve materialization)
final ResultMergeQueryRunner<ResultRow> mergingQueryRunner = new ResultMergeQueryRunner<>(baseRunner, this::createResultComparator, this::createMergeFn);
// Set up downstream context.
final ImmutableMap.Builder<String, Object> context = ImmutableMap.builder();
context.put("finalize", false);
context.put(GroupByQueryConfig.CTX_KEY_STRATEGY, GroupByStrategySelector.STRATEGY_V2);
context.put(CTX_KEY_OUTERMOST, false);
Granularity granularity = query.getGranularity();
List<DimensionSpec> dimensionSpecs = query.getDimensions();
// the CTX_TIMESTAMP_RESULT_FIELD is set in DruidQuery.java
final String timestampResultField = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD);
final boolean hasTimestampResultField = (timestampResultField != null && !timestampResultField.isEmpty()) && query.getContextBoolean(CTX_KEY_OUTERMOST, true) && !query.isApplyLimitPushDown();
int timestampResultFieldIndex = 0;
if (hasTimestampResultField) {
// sql like "group by city_id,time_floor(__time to day)",
// the original translated query is granularity=all and dimensions:[d0, d1]
// the better plan is granularity=day and dimensions:[d0]
// but the ResultRow structure is changed from [d0, d1] to [__time, d0]
// this structure should be fixed as [d0, d1] (actually it is [d0, __time]) before postAggs are called.
//
// the above is the general idea of this optimization.
// but from coding perspective, the granularity=all and "d0" dimension are referenced by many places,
// eg: subtotals, having, grouping set, post agg,
// there would be many many places need to be fixed if "d0" dimension is removed from query.dimensions
// and the same to the granularity change.
// so from easier coding perspective, this optimization is coded as groupby engine-level inner process change.
// the most part of codes are in GroupByStrategyV2 about the process change between broker and compute node.
// the basic logic like nested queries and subtotals are kept unchanged,
// they will still see the granularity=all and the "d0" dimension.
//
// the tradeoff is that GroupByStrategyV2 behaviors differently according to the query contexts set in DruidQuery
// in another word,
// the query generated by "explain plan for select ..." doesn't match to the native query ACTUALLY being executed,
// the granularity and dimensions are slightly different.
// now, part of the query plan logic is handled in GroupByStrategyV2, not only in DruidQuery.toGroupByQuery()
final Granularity timestampResultFieldGranularity = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_GRANULARITY);
dimensionSpecs = query.getDimensions().stream().filter(dimensionSpec -> !dimensionSpec.getOutputName().equals(timestampResultField)).collect(Collectors.toList());
granularity = timestampResultFieldGranularity;
// when timestampResultField is the last dimension, should set sortByDimsFirst=true,
// otherwise the downstream is sorted by row's timestamp first which makes the final ordering not as expected
timestampResultFieldIndex = query.getContextValue(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_INDEX);
if (!query.getContextSortByDimsFirst() && timestampResultFieldIndex == query.getDimensions().size() - 1) {
context.put(GroupByQuery.CTX_KEY_SORT_BY_DIMS_FIRST, true);
}
// it is actually equals to sortByDimsFirst=false
if (query.getContextSortByDimsFirst() && timestampResultFieldIndex == 0) {
context.put(GroupByQuery.CTX_KEY_SORT_BY_DIMS_FIRST, false);
}
// when hasTimestampResultField=true and timestampResultField is neither first nor last dimension,
// the DefaultLimitSpec will always do the reordering
}
final int timestampResultFieldIndexInOriginalDimensions = timestampResultFieldIndex;
if (query.getUniversalTimestamp() != null && !hasTimestampResultField) {
// universalTimestamp works only when granularity is all
// hasTimestampResultField works only when granularity is all
// fudgeTimestamp should not be used when hasTimestampResultField=true due to the row's actual timestamp is used
context.put(CTX_KEY_FUDGE_TIMESTAMP, String.valueOf(query.getUniversalTimestamp().getMillis()));
}
// The having spec shouldn't be passed down, so we need to convey the existing limit push down status
context.put(GroupByQueryConfig.CTX_KEY_APPLY_LIMIT_PUSH_DOWN, query.isApplyLimitPushDown());
// Always request array result rows when passing the query downstream.
context.put(GroupByQueryConfig.CTX_KEY_ARRAY_RESULT_ROWS, true);
final GroupByQuery newQuery = new GroupByQuery(query.getDataSource(), query.getQuerySegmentSpec(), query.getVirtualColumns(), query.getDimFilter(), granularity, dimensionSpecs, query.getAggregatorSpecs(), // Don't apply postaggregators on compute nodes
ImmutableList.of(), // Don't do "having" clause until the end of this method.
null, // higher-up).
query.isApplyLimitPushDown() ? ((DefaultLimitSpec) query.getLimitSpec()).withOffsetToLimit() : null, query.getSubtotalsSpec(), query.getContext()).withOverriddenContext(context.build());
final Sequence<ResultRow> mergedResults = mergingQueryRunner.run(QueryPlus.wrap(newQuery), responseContext);
if (!query.getContextBoolean(CTX_KEY_OUTERMOST, true) || query.getContextBoolean(GroupByQueryConfig.CTX_KEY_EXECUTING_NESTED_QUERY, false)) {
return mergedResults;
} else if (query.getPostAggregatorSpecs().isEmpty()) {
if (!hasTimestampResultField) {
return mergedResults;
}
return Sequences.map(mergedResults, row -> {
final ResultRow resultRow = ResultRow.create(query.getResultRowSizeWithoutPostAggregators());
moveOrReplicateTimestampInRow(query, timestampResultFieldIndexInOriginalDimensions, row, resultRow);
return resultRow;
});
} else {
return Sequences.map(mergedResults, row -> {
// This function's purpose is to apply PostAggregators.
final ResultRow rowWithPostAggregations = ResultRow.create(query.getResultRowSizeWithPostAggregators());
// Copy everything that comes before the postaggregations.
if (hasTimestampResultField) {
moveOrReplicateTimestampInRow(query, timestampResultFieldIndexInOriginalDimensions, row, rowWithPostAggregations);
} else {
for (int i = 0; i < query.getResultRowPostAggregatorStart(); i++) {
rowWithPostAggregations.set(i, row.get(i));
}
}
// Compute postaggregations. We need to do this with a result-row map because PostAggregator.compute
// expects a map. Some further design adjustment may eliminate the need for it, and speed up this function.
final Map<String, Object> mapForPostAggregationComputation = rowWithPostAggregations.toMap(query);
for (int i = 0; i < query.getPostAggregatorSpecs().size(); i++) {
final PostAggregator postAggregator = query.getPostAggregatorSpecs().get(i);
final Object value = postAggregator.compute(mapForPostAggregationComputation);
rowWithPostAggregations.set(query.getResultRowPostAggregatorStart() + i, value);
mapForPostAggregationComputation.put(postAggregator.getName(), value);
}
return rowWithPostAggregations;
});
}
}
use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class SegmentMetadataQueryQueryToolChest method mergeAnalyses.
@VisibleForTesting
public static SegmentAnalysis mergeAnalyses(final SegmentAnalysis arg1, final SegmentAnalysis arg2, boolean lenientAggregatorMerge) {
if (arg1 == null) {
return arg2;
}
if (arg2 == null) {
return arg1;
}
List<Interval> newIntervals = null;
if (arg1.getIntervals() != null) {
newIntervals = new ArrayList<>(arg1.getIntervals());
}
if (arg2.getIntervals() != null) {
if (newIntervals == null) {
newIntervals = new ArrayList<>();
}
newIntervals.addAll(arg2.getIntervals());
}
final Map<String, ColumnAnalysis> leftColumns = arg1.getColumns();
final Map<String, ColumnAnalysis> rightColumns = arg2.getColumns();
Map<String, ColumnAnalysis> columns = new TreeMap<>();
Set<String> rightColumnNames = Sets.newHashSet(rightColumns.keySet());
for (Map.Entry<String, ColumnAnalysis> entry : leftColumns.entrySet()) {
final String columnName = entry.getKey();
columns.put(columnName, entry.getValue().fold(rightColumns.get(columnName)));
rightColumnNames.remove(columnName);
}
for (String columnName : rightColumnNames) {
columns.put(columnName, rightColumns.get(columnName));
}
final Map<String, AggregatorFactory> aggregators = new HashMap<>();
if (lenientAggregatorMerge) {
// Merge each aggregator individually, ignoring nulls
for (SegmentAnalysis analysis : ImmutableList.of(arg1, arg2)) {
if (analysis.getAggregators() != null) {
for (Map.Entry<String, AggregatorFactory> entry : analysis.getAggregators().entrySet()) {
final String aggregatorName = entry.getKey();
final AggregatorFactory aggregator = entry.getValue();
AggregatorFactory merged = aggregators.get(aggregatorName);
if (merged != null) {
try {
merged = merged.getMergingFactory(aggregator);
} catch (AggregatorFactoryNotMergeableException e) {
merged = null;
}
} else {
merged = aggregator;
}
aggregators.put(aggregatorName, merged);
}
}
}
} else {
final AggregatorFactory[] aggs1 = arg1.getAggregators() != null ? arg1.getAggregators().values().toArray(new AggregatorFactory[0]) : null;
final AggregatorFactory[] aggs2 = arg2.getAggregators() != null ? arg2.getAggregators().values().toArray(new AggregatorFactory[0]) : null;
final AggregatorFactory[] merged = AggregatorFactory.mergeAggregators(Arrays.asList(aggs1, aggs2));
if (merged != null) {
for (AggregatorFactory aggregator : merged) {
aggregators.put(aggregator.getName(), aggregator);
}
}
}
final TimestampSpec timestampSpec = TimestampSpec.mergeTimestampSpec(Lists.newArrayList(arg1.getTimestampSpec(), arg2.getTimestampSpec()));
final Granularity queryGranularity = Granularity.mergeGranularities(Lists.newArrayList(arg1.getQueryGranularity(), arg2.getQueryGranularity()));
final String mergedId;
if (arg1.getId() != null && arg2.getId() != null && arg1.getId().equals(arg2.getId())) {
mergedId = arg1.getId();
} else {
mergedId = "merged";
}
final Boolean rollup;
if (arg1.isRollup() != null && arg2.isRollup() != null && arg1.isRollup().equals(arg2.isRollup())) {
rollup = arg1.isRollup();
} else {
rollup = null;
}
return new SegmentAnalysis(mergedId, newIntervals, columns, arg1.getSize() + arg2.getSize(), arg1.getNumRows() + arg2.getNumRows(), aggregators.isEmpty() ? null : aggregators, timestampSpec, queryGranularity, rollup);
}
use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class SegmentMetadataQueryRunnerFactory method createRunner.
@Override
public QueryRunner<SegmentAnalysis> createRunner(final Segment segment) {
return new QueryRunner<SegmentAnalysis>() {
@Override
public Sequence<SegmentAnalysis> run(QueryPlus<SegmentAnalysis> inQ, ResponseContext responseContext) {
SegmentMetadataQuery updatedQuery = ((SegmentMetadataQuery) inQ.getQuery()).withFinalizedAnalysisTypes(toolChest.getConfig());
final SegmentAnalyzer analyzer = new SegmentAnalyzer(updatedQuery.getAnalysisTypes());
final Map<String, ColumnAnalysis> analyzedColumns = analyzer.analyze(segment);
final long numRows = analyzer.numRows(segment);
long totalSize = 0;
if (analyzer.analyzingSize()) {
// Initialize with the size of the whitespace, 1 byte per
totalSize = analyzedColumns.size() * numRows;
}
Map<String, ColumnAnalysis> columns = new TreeMap<>();
ColumnIncluderator includerator = updatedQuery.getToInclude();
for (Map.Entry<String, ColumnAnalysis> entry : analyzedColumns.entrySet()) {
final String columnName = entry.getKey();
final ColumnAnalysis column = entry.getValue();
if (!column.isError()) {
totalSize += column.getSize();
}
if (includerator.include(columnName)) {
columns.put(columnName, column);
}
}
List<Interval> retIntervals = updatedQuery.analyzingInterval() ? Collections.singletonList(segment.getDataInterval()) : null;
final Map<String, AggregatorFactory> aggregators;
Metadata metadata = null;
if (updatedQuery.hasAggregators()) {
metadata = segment.asStorageAdapter().getMetadata();
if (metadata != null && metadata.getAggregators() != null) {
aggregators = new HashMap<>();
for (AggregatorFactory aggregator : metadata.getAggregators()) {
aggregators.put(aggregator.getName(), aggregator);
}
} else {
aggregators = null;
}
} else {
aggregators = null;
}
final TimestampSpec timestampSpec;
if (updatedQuery.hasTimestampSpec()) {
if (metadata == null) {
metadata = segment.asStorageAdapter().getMetadata();
}
timestampSpec = metadata != null ? metadata.getTimestampSpec() : null;
} else {
timestampSpec = null;
}
final Granularity queryGranularity;
if (updatedQuery.hasQueryGranularity()) {
if (metadata == null) {
metadata = segment.asStorageAdapter().getMetadata();
}
queryGranularity = metadata != null ? metadata.getQueryGranularity() : null;
} else {
queryGranularity = null;
}
Boolean rollup = null;
if (updatedQuery.hasRollup()) {
if (metadata == null) {
metadata = segment.asStorageAdapter().getMetadata();
}
rollup = metadata != null ? metadata.isRollup() : null;
if (rollup == null) {
// in this case, this segment is built before no-rollup function is coded,
// thus it is built with rollup
rollup = Boolean.TRUE;
}
}
return Sequences.simple(Collections.singletonList(new SegmentAnalysis(segment.getId().toString(), retIntervals, columns, totalSize, numRows, aggregators, timestampSpec, queryGranularity, rollup)));
}
};
}
Aggregations